Blame tests/marisa-test.cc

Packit de3218
#include <cstdlib>
Packit de3218
#include <cstring>
Packit de3218
#include <ctime>
Packit de3218
#include <sstream>
Packit de3218
Packit de3218
#include <marisa.h>
Packit de3218
Packit de3218
#include "marisa-assert.h"
Packit de3218
Packit de3218
namespace {
Packit de3218
Packit de3218
void TestEmptyTrie() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::Trie trie;
Packit de3218
Packit de3218
  EXCEPT(trie.save("marisa-test.dat"), MARISA_STATE_ERROR);
Packit de3218
#ifdef _MSC_VER
Packit de3218
  EXCEPT(trie.write(::_fileno(stdout)), MARISA_STATE_ERROR);
Packit de3218
#else  // _MSC_VER
Packit de3218
  EXCEPT(trie.write(::fileno(stdout)), MARISA_STATE_ERROR);
Packit de3218
#endif  // _MSC_VER
Packit de3218
  EXCEPT(std::cout << trie, MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(marisa::fwrite(stdout, trie), MARISA_STATE_ERROR);
Packit de3218
Packit de3218
  marisa::Agent agent;
Packit de3218
Packit de3218
  EXCEPT(trie.lookup(agent), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.reverse_lookup(agent), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.common_prefix_search(agent), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.predictive_search(agent), MARISA_STATE_ERROR);
Packit de3218
Packit de3218
  EXCEPT(trie.num_tries(), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.num_keys(), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.num_nodes(), MARISA_STATE_ERROR);
Packit de3218
Packit de3218
  EXCEPT(trie.tail_mode(), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.node_order(), MARISA_STATE_ERROR);
Packit de3218
Packit de3218
  EXCEPT(trie.empty(), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.size(), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.total_size(), MARISA_STATE_ERROR);
Packit de3218
  EXCEPT(trie.io_size(), MARISA_STATE_ERROR);
Packit de3218
Packit de3218
  marisa::Keyset keyset;
Packit de3218
  trie.build(keyset);
Packit de3218
Packit de3218
  ASSERT(!trie.lookup(agent));
Packit de3218
  EXCEPT(trie.reverse_lookup(agent), MARISA_BOUND_ERROR);
Packit de3218
  ASSERT(!trie.common_prefix_search(agent));
Packit de3218
  ASSERT(!trie.predictive_search(agent));
Packit de3218
Packit de3218
  ASSERT(trie.num_tries() == 1);
Packit de3218
  ASSERT(trie.num_keys() == 0);
Packit de3218
  ASSERT(trie.num_nodes() == 1);
Packit de3218
Packit de3218
  ASSERT(trie.tail_mode() == MARISA_DEFAULT_TAIL);
Packit de3218
  ASSERT(trie.node_order() == MARISA_DEFAULT_ORDER);
Packit de3218
Packit de3218
  ASSERT(trie.empty());
Packit de3218
  ASSERT(trie.size() == 0);
Packit de3218
  ASSERT(trie.total_size() != 0);
Packit de3218
  ASSERT(trie.io_size() != 0);
Packit de3218
Packit de3218
  keyset.push_back("");
Packit de3218
  trie.build(keyset);
Packit de3218
Packit de3218
  ASSERT(trie.lookup(agent));
Packit de3218
  trie.reverse_lookup(agent);
Packit de3218
  ASSERT(trie.common_prefix_search(agent));
Packit de3218
  ASSERT(!trie.common_prefix_search(agent));
Packit de3218
  ASSERT(trie.predictive_search(agent));
Packit de3218
  ASSERT(!trie.predictive_search(agent));
Packit de3218
Packit de3218
  ASSERT(trie.num_keys() == 1);
Packit de3218
  ASSERT(trie.num_nodes() == 1);
Packit de3218
Packit de3218
  ASSERT(!trie.empty());
Packit de3218
  ASSERT(trie.size() == 1);
Packit de3218
  ASSERT(trie.total_size() != 0);
Packit de3218
  ASSERT(trie.io_size() != 0);
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestTinyTrie() {
Packit de3218
  TEST_START();
Packit de3218
Packit de3218
  marisa::Keyset keyset;
Packit de3218
  keyset.push_back("bach");
Packit de3218
  keyset.push_back("bet");
Packit de3218
  keyset.push_back("chat");
Packit de3218
  keyset.push_back("check");
Packit de3218
  keyset.push_back("check");
Packit de3218
Packit de3218
  marisa::Trie trie;
Packit de3218
  trie.build(keyset, 1);
Packit de3218
Packit de3218
  ASSERT(trie.num_tries() == 1);
Packit de3218
  ASSERT(trie.num_keys() == 4);
Packit de3218
  ASSERT(trie.num_nodes() == 7);
Packit de3218
Packit de3218
  ASSERT(trie.tail_mode() == MARISA_DEFAULT_TAIL);
Packit de3218
  ASSERT(trie.node_order() == MARISA_DEFAULT_ORDER);
Packit de3218
Packit de3218
  ASSERT(keyset[0].id() == 2);
Packit de3218
  ASSERT(keyset[1].id() == 3);
Packit de3218
  ASSERT(keyset[2].id() == 1);
Packit de3218
  ASSERT(keyset[3].id() == 0);
Packit de3218
  ASSERT(keyset[4].id() == 0);
Packit de3218
Packit de3218
  marisa::Agent agent;
Packit de3218
  for (std::size_t i = 0; i < keyset.size(); ++i) {
Packit de3218
    agent.set_query(keyset[i].ptr(), keyset[i].length());
Packit de3218
    ASSERT(trie.lookup(agent));
Packit de3218
    ASSERT(agent.key().id() == keyset[i].id());
Packit de3218
Packit de3218
    agent.set_query(keyset[i].id());
Packit de3218
    trie.reverse_lookup(agent);
Packit de3218
    ASSERT(agent.key().length() == keyset[i].length());
Packit de3218
    ASSERT(std::memcmp(agent.key().ptr(), keyset[i].ptr(),
Packit de3218
        agent.key().length()) == 0);
Packit de3218
  }
Packit de3218
Packit de3218
  agent.set_query("be");
Packit de3218
  ASSERT(!trie.common_prefix_search(agent));
Packit de3218
  agent.set_query("beX");
Packit de3218
  ASSERT(!trie.common_prefix_search(agent));
Packit de3218
  agent.set_query("bet");
Packit de3218
  ASSERT(trie.common_prefix_search(agent));
Packit de3218
  ASSERT(!trie.common_prefix_search(agent));
Packit de3218
  agent.set_query("betX");
Packit de3218
  ASSERT(trie.common_prefix_search(agent));
Packit de3218
  ASSERT(!trie.common_prefix_search(agent));
Packit de3218
Packit de3218
  agent.set_query("chatX");
Packit de3218
  ASSERT(!trie.predictive_search(agent));
Packit de3218
  agent.set_query("chat");
Packit de3218
  ASSERT(trie.predictive_search(agent));
Packit de3218
  ASSERT(agent.key().length() == 4);
Packit de3218
  ASSERT(!trie.predictive_search(agent));
Packit de3218
Packit de3218
  agent.set_query("cha");
Packit de3218
  ASSERT(trie.predictive_search(agent));
Packit de3218
  ASSERT(agent.key().length() == 4);
Packit de3218
  ASSERT(!trie.predictive_search(agent));
Packit de3218
Packit de3218
  agent.set_query("c");
Packit de3218
  ASSERT(trie.predictive_search(agent));
Packit de3218
  ASSERT(agent.key().length() == 5);
Packit de3218
  ASSERT(std::memcmp(agent.key().ptr(), "check", 5) == 0);
Packit de3218
  ASSERT(trie.predictive_search(agent));
Packit de3218
  ASSERT(agent.key().length() == 4);
Packit de3218
  ASSERT(std::memcmp(agent.key().ptr(), "chat", 4) == 0);
Packit de3218
  ASSERT(!trie.predictive_search(agent));
Packit de3218
Packit de3218
  agent.set_query("ch");
Packit de3218
  ASSERT(trie.predictive_search(agent));
Packit de3218
  ASSERT(agent.key().length() == 5);
Packit de3218
  ASSERT(std::memcmp(agent.key().ptr(), "check", 5) == 0);
Packit de3218
  ASSERT(trie.predictive_search(agent));
Packit de3218
  ASSERT(agent.key().length() == 4);
Packit de3218
  ASSERT(std::memcmp(agent.key().ptr(), "chat", 4) == 0);
Packit de3218
  ASSERT(!trie.predictive_search(agent));
Packit de3218
Packit de3218
  trie.build(keyset, 1 | MARISA_LABEL_ORDER);
Packit de3218
Packit de3218
  ASSERT(trie.num_tries() == 1);
Packit de3218
  ASSERT(trie.num_keys() == 4);
Packit de3218
  ASSERT(trie.num_nodes() == 7);
Packit de3218
Packit de3218
  ASSERT(trie.tail_mode() == MARISA_DEFAULT_TAIL);
Packit de3218
  ASSERT(trie.node_order() == MARISA_LABEL_ORDER);
Packit de3218
Packit de3218
  ASSERT(keyset[0].id() == 0);
Packit de3218
  ASSERT(keyset[1].id() == 1);
Packit de3218
  ASSERT(keyset[2].id() == 2);
Packit de3218
  ASSERT(keyset[3].id() == 3);
Packit de3218
  ASSERT(keyset[4].id() == 3);
Packit de3218
Packit de3218
  for (std::size_t i = 0; i < keyset.size(); ++i) {
Packit de3218
    agent.set_query(keyset[i].ptr(), keyset[i].length());
Packit de3218
    ASSERT(trie.lookup(agent));
Packit de3218
    ASSERT(agent.key().id() == keyset[i].id());
Packit de3218
Packit de3218
    agent.set_query(keyset[i].id());
Packit de3218
    trie.reverse_lookup(agent);
Packit de3218
    ASSERT(agent.key().length() == keyset[i].length());
Packit de3218
    ASSERT(std::memcmp(agent.key().ptr(), keyset[i].ptr(),
Packit de3218
        agent.key().length()) == 0);
Packit de3218
  }
Packit de3218
Packit de3218
  agent.set_query("");
Packit de3218
  for (std::size_t i = 0; i < trie.size(); ++i) {
Packit de3218
    ASSERT(trie.predictive_search(agent));
Packit de3218
    ASSERT(agent.key().id() == i);
Packit de3218
  }
Packit de3218
  ASSERT(!trie.predictive_search(agent));
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void MakeKeyset(std::size_t num_keys, marisa::TailMode tail_mode,
Packit de3218
    marisa::Keyset *keyset) {
Packit de3218
  char key_buf[16];
Packit de3218
  for (std::size_t i = 0; i < num_keys; ++i) {
Packit de3218
    const std::size_t length = std::rand() % sizeof(key_buf);
Packit de3218
    for (std::size_t j = 0; j < length; ++j) {
Packit de3218
      key_buf[j] = (char)(std::rand() % 10);
Packit de3218
      if (tail_mode == MARISA_TEXT_TAIL) {
Packit de3218
        key_buf[j] += '0';
Packit de3218
      }
Packit de3218
    }
Packit de3218
    keyset->push_back(key_buf, length);
Packit de3218
  }
Packit de3218
}
Packit de3218
Packit de3218
void TestLookup(const marisa::Trie &trie, const marisa::Keyset &keyset) {
Packit de3218
  marisa::Agent agent;
Packit de3218
  for (std::size_t i = 0; i < keyset.size(); ++i) {
Packit de3218
    agent.set_query(keyset[i].ptr(), keyset[i].length());
Packit de3218
    ASSERT(trie.lookup(agent));
Packit de3218
    ASSERT(agent.key().id() == keyset[i].id());
Packit de3218
Packit de3218
    agent.set_query(keyset[i].id());
Packit de3218
    trie.reverse_lookup(agent);
Packit de3218
    ASSERT(agent.key().length() == keyset[i].length());
Packit de3218
    ASSERT(std::memcmp(agent.key().ptr(), keyset[i].ptr(),
Packit de3218
        agent.key().length()) == 0);
Packit de3218
  }
Packit de3218
}
Packit de3218
Packit de3218
void TestCommonPrefixSearch(const marisa::Trie &trie,
Packit de3218
    const marisa::Keyset &keyset) {
Packit de3218
  marisa::Agent agent;
Packit de3218
  for (std::size_t i = 0; i < keyset.size(); ++i) {
Packit de3218
    agent.set_query(keyset[i].ptr(), keyset[i].length());
Packit de3218
    ASSERT(trie.common_prefix_search(agent));
Packit de3218
    ASSERT(agent.key().id() <= keyset[i].id());
Packit de3218
    while (trie.common_prefix_search(agent)) {
Packit de3218
      ASSERT(agent.key().id() <= keyset[i].id());
Packit de3218
    }
Packit de3218
    ASSERT(agent.key().id() == keyset[i].id());
Packit de3218
  }
Packit de3218
}
Packit de3218
Packit de3218
void TestPredictiveSearch(const marisa::Trie &trie,
Packit de3218
    const marisa::Keyset &keyset) {
Packit de3218
  marisa::Agent agent;
Packit de3218
  for (std::size_t i = 0; i < keyset.size(); ++i) {
Packit de3218
    agent.set_query(keyset[i].ptr(), keyset[i].length());
Packit de3218
    ASSERT(trie.predictive_search(agent));
Packit de3218
    ASSERT(agent.key().id() == keyset[i].id());
Packit de3218
    while (trie.predictive_search(agent)) {
Packit de3218
      ASSERT(agent.key().id() > keyset[i].id());
Packit de3218
    }
Packit de3218
  }
Packit de3218
}
Packit de3218
Packit de3218
void TestTrie(int num_tries, marisa::TailMode tail_mode,
Packit de3218
    marisa::NodeOrder node_order, marisa::Keyset &keyset) {
Packit de3218
  for (std::size_t i = 0; i < keyset.size(); ++i) {
Packit de3218
    keyset[i].set_weight(1.0F);
Packit de3218
  }
Packit de3218
Packit de3218
  marisa::Trie trie;
Packit de3218
  trie.build(keyset, num_tries | tail_mode | node_order);
Packit de3218
Packit de3218
  ASSERT(trie.num_tries() == (std::size_t)num_tries);
Packit de3218
  ASSERT(trie.num_keys() <= keyset.size());
Packit de3218
Packit de3218
  ASSERT(trie.tail_mode() == tail_mode);
Packit de3218
  ASSERT(trie.node_order() == node_order);
Packit de3218
Packit de3218
  TestLookup(trie, keyset);
Packit de3218
  TestCommonPrefixSearch(trie, keyset);
Packit de3218
  TestPredictiveSearch(trie, keyset);
Packit de3218
Packit de3218
  trie.save("marisa-test.dat");
Packit de3218
Packit de3218
  trie.clear();
Packit de3218
  trie.load("marisa-test.dat");
Packit de3218
Packit de3218
  ASSERT(trie.num_tries() == (std::size_t)num_tries);
Packit de3218
  ASSERT(trie.num_keys() <= keyset.size());
Packit de3218
Packit de3218
  ASSERT(trie.tail_mode() == tail_mode);
Packit de3218
  ASSERT(trie.node_order() == node_order);
Packit de3218
Packit de3218
  TestLookup(trie, keyset);
Packit de3218
Packit de3218
  {
Packit de3218
    std::FILE *file;
Packit de3218
#ifdef _MSC_VER
Packit de3218
    ASSERT(::fopen_s(&file, "marisa-test.dat", "wb") == 0);
Packit de3218
#else  // _MSC_VER
Packit de3218
    file = std::fopen("marisa-test.dat", "wb");
Packit de3218
    ASSERT(file != NULL);
Packit de3218
#endif  // _MSC_VER
Packit de3218
    marisa::fwrite(file, trie);
Packit de3218
    std::fclose(file);
Packit de3218
    trie.clear();
Packit de3218
#ifdef _MSC_VER
Packit de3218
    ASSERT(::fopen_s(&file, "marisa-test.dat", "rb") == 0);
Packit de3218
#else  // _MSC_VER
Packit de3218
    file = std::fopen("marisa-test.dat", "rb");
Packit de3218
    ASSERT(file != NULL);
Packit de3218
#endif  // _MSC_VER
Packit de3218
    marisa::fread(file, &trie);
Packit de3218
    std::fclose(file);
Packit de3218
  }
Packit de3218
Packit de3218
  ASSERT(trie.num_tries() == (std::size_t)num_tries);
Packit de3218
  ASSERT(trie.num_keys() <= keyset.size());
Packit de3218
Packit de3218
  ASSERT(trie.tail_mode() == tail_mode);
Packit de3218
  ASSERT(trie.node_order() == node_order);
Packit de3218
Packit de3218
  TestLookup(trie, keyset);
Packit de3218
Packit de3218
  trie.clear();
Packit de3218
  trie.mmap("marisa-test.dat");
Packit de3218
Packit de3218
  ASSERT(trie.num_tries() == (std::size_t)num_tries);
Packit de3218
  ASSERT(trie.num_keys() <= keyset.size());
Packit de3218
Packit de3218
  ASSERT(trie.tail_mode() == tail_mode);
Packit de3218
  ASSERT(trie.node_order() == node_order);
Packit de3218
Packit de3218
  TestLookup(trie, keyset);
Packit de3218
Packit de3218
  {
Packit de3218
    std::stringstream stream;
Packit de3218
    stream << trie;
Packit de3218
    trie.clear();
Packit de3218
    stream >> trie;
Packit de3218
  }
Packit de3218
Packit de3218
  ASSERT(trie.num_tries() == (std::size_t)num_tries);
Packit de3218
  ASSERT(trie.num_keys() <= keyset.size());
Packit de3218
Packit de3218
  ASSERT(trie.tail_mode() == tail_mode);
Packit de3218
  ASSERT(trie.node_order() == node_order);
Packit de3218
Packit de3218
  TestLookup(trie, keyset);
Packit de3218
}
Packit de3218
Packit de3218
void TestTrie(marisa::TailMode tail_mode, marisa::NodeOrder node_order,
Packit de3218
    marisa::Keyset &keyset) {
Packit de3218
  TEST_START();
Packit de3218
  std::cout << ((tail_mode == MARISA_TEXT_TAIL) ? "TEXT" : "BINARY") << ", ";
Packit de3218
  std::cout << ((node_order == MARISA_WEIGHT_ORDER) ?
Packit de3218
      "WEIGHT" : "LABEL") << ": ";
Packit de3218
Packit de3218
  for (int i = 1; i < 5; ++i) {
Packit de3218
    TestTrie(i, tail_mode, node_order, keyset);
Packit de3218
  }
Packit de3218
Packit de3218
  TEST_END();
Packit de3218
}
Packit de3218
Packit de3218
void TestTrie(marisa::TailMode tail_mode) {
Packit de3218
  marisa::Keyset keyset;
Packit de3218
  MakeKeyset(1000, tail_mode, &keyset);
Packit de3218
Packit de3218
  TestTrie(tail_mode, MARISA_WEIGHT_ORDER, keyset);
Packit de3218
  TestTrie(tail_mode, MARISA_LABEL_ORDER, keyset);
Packit de3218
}
Packit de3218
Packit de3218
void TestTrie() {
Packit de3218
  TestTrie(MARISA_TEXT_TAIL);
Packit de3218
  TestTrie(MARISA_BINARY_TAIL);
Packit de3218
}
Packit de3218
Packit de3218
}  // namespace
Packit de3218
Packit de3218
int main() try {
Packit de3218
  std::srand((unsigned int)std::time(NULL));
Packit de3218
Packit de3218
  TestEmptyTrie();
Packit de3218
  TestTinyTrie();
Packit de3218
  TestTrie();
Packit de3218
Packit de3218
  return 0;
Packit de3218
} catch (const marisa::Exception &ex) {
Packit de3218
  std::cerr << ex.what() << std::endl;
Packit de3218
  throw;
Packit de3218
}