Blame tests/trie-test.cc

Packit de3218
#include <algorithm>
Packit de3218
#include <cstring>
Packit de3218
#include <sstream>
Packit de3218
Packit de3218
#include <marisa/grimoire/trie/config.h>
Packit de3218
#include <marisa/grimoire/trie/header.h>
Packit de3218
#include <marisa/grimoire/trie/key.h>
Packit de3218
#include <marisa/grimoire/trie/range.h>
Packit de3218
#include <marisa/grimoire/trie/tail.h>
Packit de3218
#include <marisa/grimoire/trie/state.h>
Packit de3218
Packit de3218
#include "marisa-assert.h"
Packit de3218
Packit de3218
namespace {
Packit de3218
Packit de3218
void TestConfig() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::Config config;
Packit de3218
Packit de3218
  ASSERT(config.num_tries() == MARISA_DEFAULT_NUM_TRIES);
Packit de3218
  ASSERT(config.tail_mode() == MARISA_DEFAULT_TAIL);
Packit de3218
  ASSERT(config.node_order() == MARISA_DEFAULT_ORDER);
Packit de3218
  ASSERT(config.cache_level() == MARISA_DEFAULT_CACHE);
Packit de3218
Packit de3218
  config.parse(10 | MARISA_BINARY_TAIL | MARISA_LABEL_ORDER |
Packit de3218
      MARISA_TINY_CACHE);
Packit de3218
Packit de3218
  ASSERT(config.num_tries() == 10);
Packit de3218
  ASSERT(config.tail_mode() == MARISA_BINARY_TAIL);
Packit de3218
  ASSERT(config.node_order() == MARISA_LABEL_ORDER);
Packit de3218
  ASSERT(config.cache_level() == MARISA_TINY_CACHE);
Packit de3218
Packit de3218
  config.parse(0);
Packit de3218
Packit de3218
  ASSERT(config.num_tries() == MARISA_DEFAULT_NUM_TRIES);
Packit de3218
  ASSERT(config.tail_mode() == MARISA_DEFAULT_TAIL);
Packit de3218
  ASSERT(config.node_order() == MARISA_DEFAULT_ORDER);
Packit de3218
  ASSERT(config.cache_level() == MARISA_DEFAULT_CACHE);
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestHeader() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::Header header;
Packit de3218
Packit de3218
  {
Packit de3218
    marisa::grimoire::Writer writer;
Packit de3218
    writer.open("trie-test.dat");
Packit de3218
    header.write(writer);
Packit de3218
  }
Packit de3218
Packit de3218
  {
Packit de3218
    marisa::grimoire::Mapper mapper;
Packit de3218
    mapper.open("trie-test.dat");
Packit de3218
    header.map(mapper);
Packit de3218
  }
Packit de3218
Packit de3218
  {
Packit de3218
    marisa::grimoire::Reader reader;
Packit de3218
    reader.open("trie-test.dat");
Packit de3218
    header.read(reader);
Packit de3218
  }
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestKey() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::Key key;
Packit de3218
Packit de3218
  ASSERT(key.ptr() == NULL);
Packit de3218
  ASSERT(key.length() == 0);
Packit de3218
  ASSERT(key.id() == 0);
Packit de3218
  ASSERT(key.terminal() == 0);
Packit de3218
Packit de3218
  const char *str = "xyz";
Packit de3218
Packit de3218
  key.set_str(str, 3);
Packit de3218
  key.set_weight(10.0F);
Packit de3218
  key.set_id(20);
Packit de3218
Packit de3218
Packit de3218
  ASSERT(key.ptr() == str);
Packit de3218
  ASSERT(key.length() == 3);
Packit de3218
  ASSERT(key[0] == 'x');
Packit de3218
  ASSERT(key[1] == 'y');
Packit de3218
  ASSERT(key[2] == 'z');
Packit de3218
  ASSERT(key.weight() == 10.0F);
Packit de3218
  ASSERT(key.id() == 20);
Packit de3218
Packit de3218
  key.set_terminal(30);
Packit de3218
  ASSERT(key.terminal() == 30);
Packit de3218
Packit de3218
  key.substr(1, 2);
Packit de3218
Packit de3218
  ASSERT(key.ptr() == str + 1);
Packit de3218
  ASSERT(key.length() == 2);
Packit de3218
  ASSERT(key[0] == 'y');
Packit de3218
  ASSERT(key[1] == 'z');
Packit de3218
Packit de3218
  marisa::grimoire::trie::Key key2;
Packit de3218
  key2.set_str("abc", 3);
Packit de3218
Packit de3218
  ASSERT(key == key);
Packit de3218
  ASSERT(key != key2);
Packit de3218
  ASSERT(key > key2);
Packit de3218
  ASSERT(key2 < key);
Packit de3218
Packit de3218
  marisa::grimoire::trie::ReverseKey r_key;
Packit de3218
Packit de3218
  ASSERT(r_key.ptr() == NULL);
Packit de3218
  ASSERT(r_key.length() == 0);
Packit de3218
  ASSERT(r_key.id() == 0);
Packit de3218
  ASSERT(r_key.terminal() == 0);
Packit de3218
Packit de3218
  r_key.set_str(str, 3);
Packit de3218
  r_key.set_weight(100.0F);
Packit de3218
  r_key.set_id(200);
Packit de3218
Packit de3218
  ASSERT(r_key.ptr() == str);
Packit de3218
  ASSERT(r_key.length() == 3);
Packit de3218
  ASSERT(r_key[0] == 'z');
Packit de3218
  ASSERT(r_key[1] == 'y');
Packit de3218
  ASSERT(r_key[2] == 'x');
Packit de3218
  ASSERT(r_key.weight() == 100.0F);
Packit de3218
  ASSERT(r_key.id() == 200);
Packit de3218
Packit de3218
  r_key.set_terminal(300);
Packit de3218
  ASSERT(r_key.terminal() == 300);
Packit de3218
Packit de3218
  r_key.substr(1, 2);
Packit de3218
Packit de3218
  ASSERT(r_key.ptr() == str);
Packit de3218
  ASSERT(r_key.length() == 2);
Packit de3218
  ASSERT(r_key[0] == 'y');
Packit de3218
  ASSERT(r_key[1] == 'x');
Packit de3218
Packit de3218
  marisa::grimoire::trie::ReverseKey r_key2;
Packit de3218
  r_key2.set_str("abc", 3);
Packit de3218
Packit de3218
  ASSERT(r_key == r_key);
Packit de3218
  ASSERT(r_key != r_key2);
Packit de3218
  ASSERT(r_key > r_key2);
Packit de3218
  ASSERT(r_key2 < r_key);
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestRange() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::Range range;
Packit de3218
Packit de3218
  ASSERT(range.begin() == 0);
Packit de3218
  ASSERT(range.end() == 0);
Packit de3218
  ASSERT(range.key_pos() == 0);
Packit de3218
Packit de3218
  range.set_begin(1);
Packit de3218
  range.set_end(2);
Packit de3218
  range.set_key_pos(3);
Packit de3218
Packit de3218
  ASSERT(range.begin() == 1);
Packit de3218
  ASSERT(range.end() == 2);
Packit de3218
  ASSERT(range.key_pos() == 3);
Packit de3218
Packit de3218
  range = marisa::grimoire::trie::make_range(10, 20, 30);
Packit de3218
Packit de3218
  ASSERT(range.begin() == 10);
Packit de3218
  ASSERT(range.end() == 20);
Packit de3218
  ASSERT(range.key_pos() == 30);
Packit de3218
Packit de3218
  marisa::grimoire::trie::WeightedRange w_range;
Packit de3218
Packit de3218
  ASSERT(w_range.begin() == 0);
Packit de3218
  ASSERT(w_range.end() == 0);
Packit de3218
  ASSERT(w_range.key_pos() == 0);
Packit de3218
  ASSERT(w_range.weight() == 0.0F);
Packit de3218
Packit de3218
  w_range.set_begin(10);
Packit de3218
  w_range.set_end(20);
Packit de3218
  w_range.set_key_pos(30);
Packit de3218
  w_range.set_weight(40.0F);
Packit de3218
Packit de3218
  ASSERT(w_range.begin() == 10);
Packit de3218
  ASSERT(w_range.end() == 20);
Packit de3218
  ASSERT(w_range.key_pos() == 30);
Packit de3218
  ASSERT(w_range.weight() == 40.0F);
Packit de3218
Packit de3218
  marisa::grimoire::trie::WeightedRange w_range2 =
Packit de3218
      marisa::grimoire::trie::make_weighted_range(100, 200, 300, 400.0F);
Packit de3218
Packit de3218
  ASSERT(w_range2.begin() == 100);
Packit de3218
  ASSERT(w_range2.end() == 200);
Packit de3218
  ASSERT(w_range2.key_pos() == 300);
Packit de3218
  ASSERT(w_range2.weight() == 400.0F);
Packit de3218
Packit de3218
  ASSERT(w_range < w_range2);
Packit de3218
  ASSERT(w_range2 > w_range);
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestEntry() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::Entry entry;
Packit de3218
Packit de3218
  ASSERT(entry.ptr() == NULL);
Packit de3218
  ASSERT(entry.length() == 0);
Packit de3218
  ASSERT(entry.id() == 0);
Packit de3218
Packit de3218
  const char *str = "XYZ";
Packit de3218
Packit de3218
  entry.set_str(str, 3);
Packit de3218
  entry.set_id(123);
Packit de3218
Packit de3218
  ASSERT(entry.ptr() == str);
Packit de3218
  ASSERT(entry.length() == 3);
Packit de3218
  ASSERT(entry[0] == 'Z');
Packit de3218
  ASSERT(entry[1] == 'Y');
Packit de3218
  ASSERT(entry[2] == 'X');
Packit de3218
  ASSERT(entry.id() == 123);
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestTextTail() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::Tail tail;
Packit de3218
  marisa::grimoire::Vector<marisa::grimoire::trie::Entry> entries;
Packit de3218
  marisa::grimoire::Vector<marisa::UInt32> offsets;
Packit de3218
  tail.build(entries, &offsets, MARISA_TEXT_TAIL);
Packit de3218
Packit de3218
  ASSERT(tail.mode() == MARISA_TEXT_TAIL);
Packit de3218
  ASSERT(tail.size() == 0);
Packit de3218
  ASSERT(tail.empty());
Packit de3218
  ASSERT(tail.total_size() == tail.size());
Packit de3218
  ASSERT(tail.io_size() == (sizeof(marisa::UInt64) * 6));
Packit de3218
Packit de3218
  ASSERT(offsets.empty());
Packit de3218
Packit de3218
  marisa::grimoire::trie::Entry entry;
Packit de3218
  entry.set_str("X", 1);
Packit de3218
  entries.push_back(entry);
Packit de3218
Packit de3218
  tail.build(entries, &offsets, MARISA_TEXT_TAIL);
Packit de3218
Packit de3218
  ASSERT(tail.mode() == MARISA_TEXT_TAIL);
Packit de3218
  ASSERT(tail.size() == 2);
Packit de3218
  ASSERT(!tail.empty());
Packit de3218
  ASSERT(tail.total_size() == tail.size());
Packit de3218
  ASSERT(tail.io_size() == (sizeof(marisa::UInt64) * 7));
Packit de3218
Packit de3218
  ASSERT(offsets.size() == entries.size());
Packit de3218
  ASSERT(offsets[0] == 0);
Packit de3218
  ASSERT(tail[offsets[0]] == 'X');
Packit de3218
  ASSERT(tail[offsets[0] + 1] == '\0');
Packit de3218
Packit de3218
  entries.clear();
Packit de3218
  entry.set_str("abc", 3);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("bc", 2);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("abc", 3);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("c", 1);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("ABC", 3);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("AB", 2);
Packit de3218
  entries.push_back(entry);
Packit de3218
Packit de3218
  tail.build(entries, &offsets, MARISA_TEXT_TAIL);
Packit de3218
  std::sort(entries.begin(), entries.end(),
Packit de3218
      marisa::grimoire::trie::Entry::IDComparer());
Packit de3218
Packit de3218
  ASSERT(tail.size() == 11);
Packit de3218
  ASSERT(offsets.size() == entries.size());
Packit de3218
  for (std::size_t i = 0; i < entries.size(); ++i) {
Packit de3218
    const char * const ptr = &tail[offsets[i]];
Packit de3218
    ASSERT(std::strlen(ptr) == entries[i].length());
Packit de3218
    ASSERT(std::strcmp(ptr, entries[i].ptr()) == 0);
Packit de3218
  }
Packit de3218
Packit de3218
  {
Packit de3218
    marisa::grimoire::Writer writer;
Packit de3218
    writer.open("trie-test.dat");
Packit de3218
    tail.write(writer);
Packit de3218
  }
Packit de3218
Packit de3218
  tail.clear();
Packit de3218
Packit de3218
  ASSERT(tail.size() == 0);
Packit de3218
  ASSERT(tail.total_size() == tail.size());
Packit de3218
Packit de3218
  {
Packit de3218
    marisa::grimoire::Mapper mapper;
Packit de3218
    mapper.open("trie-test.dat");
Packit de3218
    tail.map(mapper);
Packit de3218
Packit de3218
    ASSERT(tail.mode() == MARISA_TEXT_TAIL);
Packit de3218
    ASSERT(tail.size() == 11);
Packit de3218
    for (std::size_t i = 0; i < entries.size(); ++i) {
Packit de3218
      const char * const ptr = &tail[offsets[i]];
Packit de3218
    ASSERT(std::strlen(ptr) == entries[i].length());
Packit de3218
    ASSERT(std::strcmp(ptr, entries[i].ptr()) == 0);
Packit de3218
    }
Packit de3218
    tail.clear();
Packit de3218
  }
Packit de3218
Packit de3218
  {
Packit de3218
    marisa::grimoire::Reader reader;
Packit de3218
    reader.open("trie-test.dat");
Packit de3218
    tail.read(reader);
Packit de3218
  }
Packit de3218
Packit de3218
  ASSERT(tail.size() == 11);
Packit de3218
  ASSERT(offsets.size() == entries.size());
Packit de3218
  for (std::size_t i = 0; i < entries.size(); ++i) {
Packit de3218
    const char * const ptr = &tail[offsets[i]];
Packit de3218
    ASSERT(std::strlen(ptr) == entries[i].length());
Packit de3218
    ASSERT(std::strcmp(ptr, entries[i].ptr()) == 0);
Packit de3218
  }
Packit de3218
Packit de3218
  {
Packit de3218
    std::stringstream stream;
Packit de3218
    marisa::grimoire::Writer writer;
Packit de3218
    writer.open(stream);
Packit de3218
    tail.write(writer);
Packit de3218
    tail.clear();
Packit de3218
    marisa::grimoire::Reader reader;
Packit de3218
    reader.open(stream);
Packit de3218
    tail.read(reader);
Packit de3218
  }
Packit de3218
Packit de3218
  ASSERT(tail.size() == 11);
Packit de3218
  ASSERT(offsets.size() == entries.size());
Packit de3218
  for (std::size_t i = 0; i < entries.size(); ++i) {
Packit de3218
    const char * const ptr = &tail[offsets[i]];
Packit de3218
    ASSERT(std::strlen(ptr) == entries[i].length());
Packit de3218
    ASSERT(std::strcmp(ptr, entries[i].ptr()) == 0);
Packit de3218
  }
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestBinaryTail() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::Tail tail;
Packit de3218
  marisa::grimoire::Vector<marisa::grimoire::trie::Entry> entries;
Packit de3218
  marisa::grimoire::Vector<marisa::UInt32> offsets;
Packit de3218
  tail.build(entries, &offsets, MARISA_BINARY_TAIL);
Packit de3218
Packit de3218
  ASSERT(tail.mode() == MARISA_TEXT_TAIL);
Packit de3218
  ASSERT(tail.size() == 0);
Packit de3218
  ASSERT(tail.empty());
Packit de3218
  ASSERT(tail.total_size() == tail.size());
Packit de3218
  ASSERT(tail.io_size() == (sizeof(marisa::UInt64) * 6));
Packit de3218
Packit de3218
  ASSERT(offsets.empty());
Packit de3218
Packit de3218
  marisa::grimoire::trie::Entry entry;
Packit de3218
  entry.set_str("X", 1);
Packit de3218
  entries.push_back(entry);
Packit de3218
Packit de3218
  tail.build(entries, &offsets, MARISA_BINARY_TAIL);
Packit de3218
Packit de3218
  ASSERT(tail.mode() == MARISA_BINARY_TAIL);
Packit de3218
  ASSERT(tail.size() == 1);
Packit de3218
  ASSERT(!tail.empty());
Packit de3218
  ASSERT(tail.total_size() == (tail.size() + sizeof(marisa::UInt64)));
Packit de3218
  ASSERT(tail.io_size() == (sizeof(marisa::UInt64) * 8));
Packit de3218
Packit de3218
  ASSERT(offsets.size() == entries.size());
Packit de3218
  ASSERT(offsets[0] == 0);
Packit de3218
Packit de3218
  const char binary_entry[] = { 'N', 'P', '\0', 'T', 'r', 'i', 'e' };
Packit de3218
  entries[0].set_str(binary_entry, sizeof(binary_entry));
Packit de3218
Packit de3218
  tail.build(entries, &offsets, MARISA_TEXT_TAIL);
Packit de3218
Packit de3218
  ASSERT(tail.mode() == MARISA_BINARY_TAIL);
Packit de3218
  ASSERT(tail.size() == entries[0].length());
Packit de3218
Packit de3218
  ASSERT(offsets.size() == entries.size());
Packit de3218
  ASSERT(offsets[0] == 0);
Packit de3218
Packit de3218
  entries.clear();
Packit de3218
  entry.set_str("abc", 3);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("bc", 2);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("abc", 3);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("c", 1);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("ABC", 3);
Packit de3218
  entries.push_back(entry);
Packit de3218
  entry.set_str("AB", 2);
Packit de3218
  entries.push_back(entry);
Packit de3218
Packit de3218
  tail.build(entries, &offsets, MARISA_BINARY_TAIL);
Packit de3218
  std::sort(entries.begin(), entries.end(),
Packit de3218
      marisa::grimoire::trie::Entry::IDComparer());
Packit de3218
Packit de3218
  ASSERT(tail.mode() == MARISA_BINARY_TAIL);
Packit de3218
  ASSERT(tail.size() == 8);
Packit de3218
  ASSERT(offsets.size() == entries.size());
Packit de3218
  for (std::size_t i = 0; i < entries.size(); ++i) {
Packit de3218
    const char * const ptr = &tail[offsets[i]];
Packit de3218
    ASSERT(std::memcmp(ptr, entries[i].ptr(), entries[i].length()) == 0);
Packit de3218
  }
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestHistory() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::History history;
Packit de3218
Packit de3218
  ASSERT(history.node_id() == 0);
Packit de3218
  ASSERT(history.louds_pos() == 0);
Packit de3218
  ASSERT(history.key_pos() == 0);
Packit de3218
  ASSERT(history.link_id() == MARISA_INVALID_LINK_ID);
Packit de3218
  ASSERT(history.key_id() == MARISA_INVALID_KEY_ID);
Packit de3218
Packit de3218
  history.set_node_id(100);
Packit de3218
  history.set_louds_pos(200);
Packit de3218
  history.set_key_pos(300);
Packit de3218
  history.set_link_id(400);
Packit de3218
  history.set_key_id(500);
Packit de3218
Packit de3218
  ASSERT(history.node_id() == 100);
Packit de3218
  ASSERT(history.louds_pos() == 200);
Packit de3218
  ASSERT(history.key_pos() == 300);
Packit de3218
  ASSERT(history.link_id() == 400);
Packit de3218
  ASSERT(history.key_id() == 500);
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestState() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::grimoire::trie::State state;
Packit de3218
Packit de3218
  ASSERT(state.key_buf().empty());
Packit de3218
  ASSERT(state.history().empty());
Packit de3218
  ASSERT(state.node_id() == 0);
Packit de3218
  ASSERT(state.query_pos() == 0);
Packit de3218
  ASSERT(state.history_pos() == 0);
Packit de3218
  ASSERT(state.status_code() == marisa::grimoire::trie::MARISA_READY_TO_ALL);
Packit de3218
Packit de3218
  state.set_node_id(10);
Packit de3218
  state.set_query_pos(100);
Packit de3218
  state.set_history_pos(1000);
Packit de3218
  state.set_status_code(
Packit de3218
      marisa::grimoire::trie::MARISA_END_OF_PREDICTIVE_SEARCH);
Packit de3218
Packit de3218
  ASSERT(state.node_id() == 10);
Packit de3218
  ASSERT(state.query_pos() == 100);
Packit de3218
  ASSERT(state.history_pos() == 1000);
Packit de3218
  ASSERT(state.status_code() ==
Packit de3218
      marisa::grimoire::trie::MARISA_END_OF_PREDICTIVE_SEARCH);
Packit de3218
Packit de3218
  state.lookup_init();
Packit de3218
  ASSERT(state.status_code() == marisa::grimoire::trie::MARISA_READY_TO_ALL);
Packit de3218
Packit de3218
  state.reverse_lookup_init();
Packit de3218
  ASSERT(state.status_code() == marisa::grimoire::trie::MARISA_READY_TO_ALL);
Packit de3218
Packit de3218
  state.common_prefix_search_init();
Packit de3218
  ASSERT(state.status_code() ==
Packit de3218
      marisa::grimoire::trie::MARISA_READY_TO_COMMON_PREFIX_SEARCH);
Packit de3218
Packit de3218
  state.predictive_search_init();
Packit de3218
  ASSERT(state.status_code() ==
Packit de3218
      marisa::grimoire::trie::MARISA_READY_TO_PREDICTIVE_SEARCH);
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
}  // namespace
Packit de3218
Packit de3218
int main() try {
Packit de3218
  TestConfig();
Packit de3218
  TestHeader();
Packit de3218
  TestKey();
Packit de3218
  TestRange();
Packit de3218
  TestEntry();
Packit de3218
  TestTextTail();
Packit de3218
  TestBinaryTail();
Packit de3218
  TestHistory();
Packit de3218
  TestState();
Packit de3218
Packit de3218
  return 0;
Packit de3218
} catch (const marisa::Exception &ex) {
Packit de3218
  std::cerr << ex.what() << std::endl;
Packit de3218
  throw;
Packit de3218
}