Blob Blame History Raw
/*************************************************************************************************
 * The command line utility of the word dictionary
 *                                                               Copyright (C) 2009-2012 FAL Labs
 * This file is part of Kyoto Cabinet.
 * This program is free software: you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by the Free Software Foundation, either version
 * 3 of the License, or any later version.
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with this program.
 * If not, see <http://www.gnu.org/licenses/>.
 *************************************************************************************************/


#include <kcutil.h>
#include <kcpolydb.h>
#include <kcdbext.h>

namespace kc = kyotocabinet;


// enumurations
enum { ZM_DEFAULT, ZM_ZLIB, ZM_LZO, ZM_LZMA };


// constants
const size_t THREADNUM = 8;              // number of threads
const size_t AMBGRATIO = 3;              // ratio of threshold of ambiguous search
const size_t AMBGMIN = 3;                // minimum threshold of ambiguous search


// global variables
const char* g_progname;                  // program name


// function prototypes
int main(int argc, char** argv);
static void usage();
static void dberrprint(kc::BasicDB* db, const char* info);
static void utftoucs(const char* src, size_t size, uint32_t* dest, size_t* np);
static void normalizequery(const char* qbuf, size_t qsiz, std::string* dest);
static void normalizeucs(uint32_t* ary, size_t onum, size_t* np);
template<class CHARTYPE>
static size_t levdist(const CHARTYPE* abuf, size_t asiz, const CHARTYPE* bbuf, size_t bsiz);
static int32_t runimport(int argc, char** argv);
static int32_t runsearch(int argc, char** argv);
static int32_t runsuggest(int argc, char** argv);
static int32_t procimport(const char* path, const char* srcpath, int32_t zmode);
static int32_t procsearch(const char* path, const char* query, int32_t zmode, int64_t max,
                          int32_t mode, bool ts, bool pk);
static int32_t procsuggest(const char* path, const char* query, int64_t max, int32_t mode);


// structure for sorting indexed records
struct IndexedRecord {
  int64_t rank;
  std::string text;
  bool operator <(const IndexedRecord& right) const {
    if (rank != right.rank) return rank < right.rank;
    return text < right.text;
  }
};


// structure for sorting ambiguous records
struct AmbiguousRecord {
  size_t dist;
  std::string key;
  uint32_t order;
  std::string text;
  bool operator <(const AmbiguousRecord& right) const {
    if (dist != right.dist) return dist < right.dist;
    if (key != right.key) return key < right.key;
    return order < right.order;
  }
  bool less(size_t rdist, const std::string& rkey, uint32_t rorder) const {
    if (dist != rdist) return dist < rdist;
    if (key != rkey) return key < rkey;
    return order < rorder;
  }
};


// structure for sorting plain records
struct PlainRecord {
  std::string key;
  uint32_t order;
  std::string text;
  bool operator <(const PlainRecord& right) const {
    if (key != right.key) return key < right.key;
    return order < right.order;
  }
  bool less(const std::string& rkey, uint32_t rorder) const {
    if (key != rkey) return key < rkey;
    return order < rorder;
  }
};


// main routine
int main(int argc, char** argv) {
  g_progname = argv[0];
  kc::setstdiobin();
  if (argc < 2) usage();
  int32_t rv = 0;
  if (!std::strcmp(argv[1], "import")) {
    rv = runimport(argc, argv);
  } else if (!std::strcmp(argv[1], "search")) {
    rv = runsearch(argc, argv);
  } else if (!std::strcmp(argv[1], "suggest")) {
    rv = runsuggest(argc, argv);
  } else {
    usage();
  }
  return rv;
}


// print the usage and exit
static void usage() {
  std::cerr << g_progname << ": the command line utility of the word dictionary" << std::endl;
  std::cerr << std::endl;
  std::cerr << "  " << g_progname << " import [-cz|-co|-cx] path src" << std::endl;
  std::cerr << "  " << g_progname << " search [-cz|-co|-cx] [-max num] [-f|-a|-m|-r|-tm|-tr]"
      " [-ts] [-iu] [-pk] path query" << std::endl;
  std::cerr << "  " << g_progname << " suggest [-max num] [-m|-r] [-iu] path query" <<
      std::endl;
  std::cerr << std::endl;
  std::exit(1);
}


// print error message of database
static void dberrprint(kc::BasicDB* db, const char* info) {
  const kc::BasicDB::Error& err = db->error();
  std::cerr << g_progname << ": " << info << ": " << db->path().c_str() << ": " << err.code() <<
      ": " << err.name() << ": " << err.message() << std::endl;
}


// convert a UTF-8 string into a UCS-4 array
static void utftoucs(const char* src, size_t size, uint32_t* dest, size_t* np) {
  _assert_(src && dest && np);
  const unsigned char* rp = (unsigned char*)src;
  const unsigned char* ep = rp + size;
  size_t dnum = 0;
  while (rp < ep) {
    uint32_t c = *rp;
    if (c < 0x80) {
      dest[dnum++] = c;
    } else if (c < 0xe0) {
      if (c >= 0xc0 && rp + 1 < ep) {
        c = ((c & 0x1f) << 6) | (rp[1] & 0x3f);
        if (c >= 0x80) dest[dnum++] = c;
        rp++;
      }
    } else if (c < 0xf0) {
      if (rp + 2 < ep) {
        c = ((c & 0x0f) << 12) | ((rp[1] & 0x3f) << 6) | (rp[2] & 0x3f);
        if (c >= 0x800) dest[dnum++] = c;
        rp += 2;
      }
    } else if (c < 0xf8) {
      if (rp + 3 < ep) {
        c = ((c & 0x07) << 18) | ((rp[1] & 0x3f) << 12) | ((rp[2] & 0x3f) << 6) |
            (rp[3] & 0x3f);
        if (c >= 0x10000) dest[dnum++] = c;
        rp += 3;
      }
    } else if (c < 0xfc) {
      if (rp + 4 < ep) {
        c = ((c & 0x03) << 24) | ((rp[1] & 0x3f) << 18) | ((rp[2] & 0x3f) << 12) |
            ((rp[3] & 0x3f) << 6) | (rp[4] & 0x3f);
        if (c >= 0x200000) dest[dnum++] = c;
        rp += 4;
      }
    } else if (c < 0xfe) {
      if (rp + 5 < ep) {
        c = ((c & 0x01) << 30) | ((rp[1] & 0x3f) << 24) | ((rp[2] & 0x3f) << 18) |
            ((rp[3] & 0x3f) << 12) | ((rp[4] & 0x3f) << 6) | (rp[5] & 0x3f);
        if (c >= 0x4000000) dest[dnum++] = c;
        rp += 5;
      }
    }
    rp++;
  }
  *np = dnum;
}


// normalize a query
static void normalizequery(const char* qbuf, size_t qsiz, std::string* dest) {
  uint32_t ucsstack[1024];
  uint32_t* ucs = qsiz > sizeof(ucsstack) / sizeof(*ucsstack) ? new uint32_t[qsiz] : ucsstack;
  size_t unum;
  utftoucs(qbuf, qsiz, ucs, &unum);
  size_t nnum;
  normalizeucs(ucs, unum, &nnum);
  qsiz++;
  char utfstack[2048];
  char* utf = qsiz > sizeof(utfstack) ? new char[qsiz] : utfstack;
  qsiz = kc::strucstoutf(ucs, nnum, utf);
  dest->append(utf, qsiz);
  if (utf != utfstack) delete[] utf;
  if (ucs != ucsstack) delete[] ucs;
}


// normalize a USC-4 array
static void normalizeucs(uint32_t* ary, size_t onum, size_t* np) {
  bool lowmode = true;
  bool nacmode = true;
  bool spcmode = true;
  size_t nnum = 0;
  for (size_t i = 0; i < onum; i++) {
    uint32_t c = ary[i];
    if (c >= 0x10000) {
      ary[nnum++] = c;
      continue;
    }
    uint32_t high = c >> 8;
    if (high == 0x00) {
      if (c < 0x0020 || c == 0x007f) {
        // control characters
        if (spcmode) {
          ary[nnum++] = 0x0020;
        } else if (c == 0x0009 || c == 0x000a || c == 0x000d) {
          ary[nnum++] = c;
        } else {
          ary[nnum++] = 0x0020;
        }
      } else if (c == 0x00a0) {
        // no-break space
        ary[nnum++] = 0x0020;
      } else {
        // otherwise
        if (lowmode) {
          if (c < 0x007f) {
            if (c >= 0x0041 && c <= 0x005a) c += 0x20;
          } else if (c >= 0x00c0 && c <= 0x00de && c != 0x00d7) {
            c += 0x20;
          }
        }
        if (nacmode) {
          if (c >= 0x00c0 && c <= 0x00c5) {
            c = 'A';
          } else if (c == 0x00c7) {
            c = 'C';
          } if (c >= 0x00c7 && c <= 0x00cb) {
            c = 'E';
          } if (c >= 0x00cc && c <= 0x00cf) {
            c = 'I';
          } else if (c == 0x00d0) {
            c = 'D';
          } else if (c == 0x00d1) {
            c = 'N';
          } if ((c >= 0x00d2 && c <= 0x00d6) || c == 0x00d8) {
            c = 'O';
          } if (c >= 0x00d9 && c <= 0x00dc) {
            c = 'U';
          } if (c == 0x00dd || c == 0x00de) {
            c = 'Y';
          } else if (c == 0x00df) {
            c = 's';
          } else if (c >= 0x00e0 && c <= 0x00e5) {
            c = 'a';
          } else if (c == 0x00e7) {
            c = 'c';
          } if (c >= 0x00e7 && c <= 0x00eb) {
            c = 'e';
          } if (c >= 0x00ec && c <= 0x00ef) {
            c = 'i';
          } else if (c == 0x00f0) {
            c = 'd';
          } else if (c == 0x00f1) {
            c = 'n';
          } if ((c >= 0x00f2 && c <= 0x00f6) || c == 0x00f8) {
            c = 'o';
          } if (c >= 0x00f9 && c <= 0x00fc) {
            c = 'u';
          } if (c >= 0x00fd && c <= 0x00ff) {
            c = 'y';
          }
        }
        ary[nnum++] = c;
      }
    } else if (high == 0x01) {
      // latin-1 extended
      if (lowmode) {
        if (c <= 0x0137) {
          if ((c & 1) == 0) c++;
        } else if (c == 0x0138) {
          c += 0;
        } else if (c <= 0x0148) {
          if ((c & 1) == 1) c++;
        } else if (c == 0x0149) {
          c += 0;
        } else if (c <= 0x0177) {
          if ((c & 1) == 0) c++;
        } else if (c == 0x0178) {
          c = 0x00ff;
        } else if (c <= 0x017e) {
          if ((c & 1) == 1) c++;
        } else if (c == 0x017f) {
          c += 0;
        }
      }
      if (nacmode) {
        if (c == 0x00ff) {
          c = 'y';
        } else if (c <= 0x0105) {
          c = ((c & 1) == 0) ? 'A' : 'a';
        } else if (c <= 0x010d) {
          c = ((c & 1) == 0) ? 'C' : 'c';
        } else if (c <= 0x0111) {
          c = ((c & 1) == 0) ? 'D' : 'd';
        } else if (c <= 0x011b) {
          c = ((c & 1) == 0) ? 'E' : 'e';
        } else if (c <= 0x0123) {
          c = ((c & 1) == 0) ? 'G' : 'g';
        } else if (c <= 0x0127) {
          c = ((c & 1) == 0) ? 'H' : 'h';
        } else if (c <= 0x0131) {
          c = ((c & 1) == 0) ? 'I' : 'i';
        } else if (c == 0x0134) {
          c = 'J';
        } else if (c == 0x0135) {
          c = 'j';
        } else if (c == 0x0136) {
          c = 'K';
        } else if (c == 0x0137) {
          c = 'k';
        } else if (c == 0x0138) {
          c = 'k';
        } else if (c >= 0x0139 && c <= 0x0142) {
          c = ((c & 1) == 1) ? 'L' : 'l';
        } else if (c >= 0x0143 && c <= 0x0148) {
          c = ((c & 1) == 1) ? 'N' : 'n';
        } else if (c >= 0x0149 && c <= 0x014b) {
          c = ((c & 1) == 0) ? 'N' : 'n';
        } else if (c >= 0x014c && c <= 0x0151) {
          c = ((c & 1) == 0) ? 'O' : 'o';
        } else if (c >= 0x0154 && c <= 0x0159) {
          c = ((c & 1) == 0) ? 'R' : 'r';
        } else if (c >= 0x015a && c <= 0x0161) {
          c = ((c & 1) == 0) ? 'S' : 's';
        } else if (c >= 0x0162 && c <= 0x0167) {
          c = ((c & 1) == 0) ? 'T' : 't';
        } else if (c >= 0x0168 && c <= 0x0173) {
          c = ((c & 1) == 0) ? 'U' : 'u';
        } else if (c == 0x0174) {
          c = 'W';
        } else if (c == 0x0175) {
          c = 'w';
        } else if (c == 0x0176) {
          c = 'Y';
        } else if (c == 0x0177) {
          c = 'y';
        } else if (c == 0x0178) {
          c = 'Y';
        } else if (c >= 0x0179 && c <= 0x017e) {
          c = ((c & 1) == 1) ? 'Z' : 'z';
        } else if (c == 0x017f) {
          c = 's';
        }
      }
      ary[nnum++] = c;
    } else if (high == 0x03) {
      // greek
      if (lowmode) {
        if (c >= 0x0391 && c <= 0x03a9) {
          c += 0x20;
        } else if (c >= 0x03d8 && c <= 0x03ef) {
          if ((c & 1) == 0) c++;
        } else if (c == 0x0374 || c == 0x03f7 || c == 0x03fa) {
          c++;
        }
      }
      ary[nnum++] = c;
    } else if (high == 0x04) {
      // cyrillic
      if (lowmode) {
        if (c <= 0x040f) {
          c += 0x50;
        } else if (c <= 0x042f) {
          c += 0x20;
        } else if (c >= 0x0460 && c <= 0x0481) {
          if ((c & 1) == 0) c++;
        } else if (c >= 0x048a && c <= 0x04bf) {
          if ((c & 1) == 0) c++;
        } else if (c == 0x04c0) {
          c = 0x04cf;
        } else if (c >= 0x04c1 && c <= 0x04ce) {
          if ((c & 1) == 1) c++;
        } else if (c >= 0x04d0) {
          if ((c & 1) == 0) c++;
        }
      }
      ary[nnum++] = c;
    } else if (high == 0x20) {
      if (c == 0x2002) {
        // en space
        ary[nnum++] = 0x0020;
      } else if (c == 0x2003) {
        // em space
        ary[nnum++] = 0x0020;
      } else if (c == 0x2009) {
        // thin space
        ary[nnum++] = 0x0020;
      } else if (c == 0x2010) {
        // hyphen
        ary[nnum++] = 0x002d;
      } else if (c == 0x2015) {
        // fullwidth horizontal line
        ary[nnum++] = 0x002d;
      } else if (c == 0x2019) {
        // apostrophe
        ary[nnum++] = 0x0027;
      } else if (c == 0x2033) {
        // double quotes
        ary[nnum++] = 0x0022;
      } else {
        // (otherwise)
        ary[nnum++] = c;
      }
    } else if (high == 0x22) {
      if (c == 0x2212) {
        // minus sign
        ary[nnum++] = 0x002d;
      } else {
        // (otherwise)
        ary[nnum++] = c;
      }
    } else if (high == 0x30) {
      if (c == 0x3000) {
        // fullwidth space
        if (spcmode) {
          ary[nnum++] = 0x0020;
        } else {
          ary[nnum++] = c;
        }
      } else {
        // (otherwise)
        ary[nnum++] = c;
      }
    } else if (high == 0xff) {
      if (c == 0xff01) {
        // fullwidth exclamation
        ary[nnum++] = 0x0021;
      } else if (c == 0xff03) {
        // fullwidth igeta
        ary[nnum++] = 0x0023;
      } else if (c == 0xff04) {
        // fullwidth dollar
        ary[nnum++] = 0x0024;
      } else if (c == 0xff05) {
        // fullwidth parcent
        ary[nnum++] = 0x0025;
      } else if (c == 0xff06) {
        // fullwidth ampersand
        ary[nnum++] = 0x0026;
      } else if (c == 0xff0a) {
        // fullwidth asterisk
        ary[nnum++] = 0x002a;
      } else if (c == 0xff0b) {
        // fullwidth plus
        ary[nnum++] = 0x002b;
      } else if (c == 0xff0c) {
        // fullwidth comma
        ary[nnum++] = 0x002c;
      } else if (c == 0xff0e) {
        // fullwidth period
        ary[nnum++] = 0x002e;
      } else if (c == 0xff0f) {
        // fullwidth slash
        ary[nnum++] = 0x002f;
      } else if (c == 0xff1a) {
        // fullwidth colon
        ary[nnum++] = 0x003a;
      } else if (c == 0xff1b) {
        // fullwidth semicolon
        ary[nnum++] = 0x003b;
      } else if (c == 0xff1d) {
        // fullwidth equal
        ary[nnum++] = 0x003d;
      } else if (c == 0xff1f) {
        // fullwidth question
        ary[nnum++] = 0x003f;
      } else if (c == 0xff20) {
        // fullwidth atmark
        ary[nnum++] = 0x0040;
      } else if (c == 0xff3c) {
        // fullwidth backslash
        ary[nnum++] = 0x005c;
      } else if (c == 0xff3e) {
        // fullwidth circumflex
        ary[nnum++] = 0x005e;
      } else if (c == 0xff3f) {
        // fullwidth underscore
        ary[nnum++] = 0x005f;
      } else if (c == 0xff5c) {
        // fullwidth vertical line
        ary[nnum++] = 0x007c;
      } else if (c >= 0xff21 && c <= 0xff3a) {
        // fullwidth alphabets
        if (lowmode) {
          c -= 0xfee0;
          if (c >= 0x0041 && c <= 0x005a) c += 0x20;
          ary[nnum++] = c;
        } else {
          ary[nnum++] = c - 0xfee0;
        }
      } else if (c >= 0xff41 && c <= 0xff5a) {
        // fullwidth small alphabets
        ary[nnum++] = c - 0xfee0;
      } else if (c >= 0xff10 && c <= 0xff19) {
        // fullwidth numbers
        ary[nnum++] = c - 0xfee0;
      } else if (c == 0xff61) {
        // halfwidth full stop
        ary[nnum++] = 0x3002;
      } else if (c == 0xff62) {
        // halfwidth left corner
        ary[nnum++] = 0x300c;
      } else if (c == 0xff63) {
        // halfwidth right corner
        ary[nnum++] = 0x300d;
      } else if (c == 0xff64) {
        // halfwidth comma
        ary[nnum++] = 0x3001;
      } else if (c == 0xff65) {
        // halfwidth middle dot
        ary[nnum++] = 0x30fb;
      } else if (c == 0xff66) {
        // halfwidth wo
        ary[nnum++] = 0x30f2;
      } else if (c >= 0xff67 && c <= 0xff6b) {
        // halfwidth small a-o
        ary[nnum++] = (c - 0xff67) * 2 + 0x30a1;
      } else if (c >= 0xff6c && c <= 0xff6e) {
        // halfwidth small ya-yo
        ary[nnum++] = (c - 0xff6c) * 2 + 0x30e3;
      } else if (c == 0xff6f) {
        // halfwidth small tu
        ary[nnum++] = 0x30c3;
      } else if (c == 0xff70) {
        // halfwidth prolonged mark
        ary[nnum++] = 0x30fc;
      } else if (c >= 0xff71 && c <= 0xff75) {
        // halfwidth a-o
        uint32_t tc = (c - 0xff71) * 2 + 0x30a2;
        if (c == 0xff73 && i < onum - 1 && ary[i+1] == 0xff9e) {
          tc = 0x30f4;
          i++;
        }
        ary[nnum++] = tc;
      } else if (c >= 0xff76 && c <= 0xff7a) {
        // halfwidth ka-ko
        uint32_t tc = (c - 0xff76) * 2 + 0x30ab;
        if (i < onum - 1 && ary[i+1] == 0xff9e) {
          tc++;
          i++;
        }
        ary[nnum++] = tc;
      } else if (c >= 0xff7b && c <= 0xff7f) {
        // halfwidth sa-so
        uint32_t tc = (c - 0xff7b) * 2 + 0x30b5;
        if (i < onum - 1 && ary[i+1] == 0xff9e) {
          tc++;
          i++;
        }
        ary[nnum++] = tc;
      } else if (c >= 0xff80 && c <= 0xff84) {
        // halfwidth ta-to
        uint32_t tc = (c - 0xff80) * 2 + 0x30bf + (c >= 0xff82 ? 1 : 0);
        if (i < onum - 1 && ary[i+1] == 0xff9e) {
          tc++;
          i++;
        }
        ary[nnum++] = tc;
      } else if (c >= 0xff85 && c <= 0xff89) {
        // halfwidth na-no
        ary[nnum++] = c - 0xcebb;
      } else if (c >= 0xff8a && c <= 0xff8e) {
        // halfwidth ha-ho
        uint32_t tc = (c - 0xff8a) * 3 + 0x30cf;
        if (i < onum - 1) {
          if (ary[i+1] == 0xff9e) {
            tc++;
            i++;
          } else if (ary[i+1] == 0xff9f) {
            tc += 2;
            i++;
          }
        }
        ary[nnum++] = tc;
      } else if (c >= 0xff8f && c <= 0xff93) {
        // halfwidth ma-mo
        ary[nnum++] = c - 0xceb1;
      } else if (c >= 0xff94 && c <= 0xff96) {
        // halfwidth ya-yo
        ary[nnum++] = (c - 0xff94) * 2 + 0x30e4;
      } else if (c >= 0xff97 && c <= 0xff9b) {
        // halfwidth ra-ro
        ary[nnum++] = c - 0xceae;
      } else if (c == 0xff9c) {
        // halfwidth wa
        ary[nnum++] = 0x30ef;
      } else if (c == 0xff9d) {
        // halfwidth nn
        ary[nnum++] = 0x30f3;
      } else {
        // otherwise
        ary[nnum++] = c;
      }
    } else {
      // otherwise
      ary[nnum++] = c;
    }
  }
  *np = nnum;
}


// get the levenshtein distance of two arrays
template<class CHARTYPE>
static size_t levdist(const CHARTYPE* abuf, size_t asiz, const CHARTYPE* bbuf, size_t bsiz) {
  size_t dsiz = bsiz + 1;
  size_t tsiz = (asiz + 1) * dsiz;
  uint8_t tblstack[2048];
  uint8_t* tbl = tsiz > sizeof(tblstack) ? new uint8_t[tsiz] : tblstack;
  tbl[0] = 0;
  for (size_t i = 1; i <= asiz; i++) {
    tbl[i*dsiz] = i;
  }
  for (size_t i = 1; i <= bsiz; i++) {
    tbl[i] = i;
  }
  abuf--;
  bbuf--;
  for (size_t i = 1; i <= asiz; i++) {
    for (size_t j = 1; j <= bsiz; j++) {
      uint32_t ac = tbl[(i-1)*dsiz+j] + 1;
      uint32_t bc = tbl[i*dsiz+j-1] + 1;
      uint32_t cc = tbl[(i-1)*dsiz+j-1] + (abuf[i] != bbuf[j]);
      ac = ac < bc ? ac : bc;
      tbl[i*dsiz+j] = ac < cc ? ac : cc;
    }
  }
  size_t ed = tbl[asiz*dsiz+bsiz];
  if (tbl != tblstack) delete[] tbl;
  return ed;
}


// parse arguments of import command
static int32_t runimport(int argc, char** argv) {
  bool argbrk = false;
  const char* path = NULL;
  const char* srcpath = NULL;
  int32_t zmode = ZM_DEFAULT;
  for (int32_t i = 2; i < argc; i++) {
    if (!argbrk && argv[i][0] == '-') {
      if (!std::strcmp(argv[i], "--")) {
        argbrk = true;
      } else if (!std::strcmp(argv[i], "-cz")) {
        zmode = ZM_ZLIB;
      } else if (!std::strcmp(argv[i], "-co")) {
        zmode = ZM_LZO;
      } else if (!std::strcmp(argv[i], "-cx")) {
        zmode = ZM_LZMA;
      } else {
        usage();
      }
    } else if (!path) {
      argbrk = true;
      path = argv[i];
    } else if (!srcpath) {
      srcpath = argv[i];
    } else {
      usage();
    }
  }
  if (!path || !srcpath) usage();
  int32_t rv = procimport(path, srcpath, zmode);
  return rv;
}


// parse arguments of search command
static int32_t runsearch(int argc, char** argv) {
  bool argbrk = false;
  const char* path = NULL;
  const char* query = NULL;
  int32_t zmode = ZM_DEFAULT;
  int64_t max = 10;
  int32_t mode = 0;
  bool ts = false;
  bool iu = false;
  bool pk = false;
  for (int32_t i = 2; i < argc; i++) {
    if (!argbrk && argv[i][0] == '-') {
      if (!std::strcmp(argv[i], "--")) {
        argbrk = true;
      } else if (!std::strcmp(argv[i], "-cz")) {
        zmode = ZM_ZLIB;
      } else if (!std::strcmp(argv[i], "-co")) {
        zmode = ZM_LZO;
      } else if (!std::strcmp(argv[i], "-cx")) {
        zmode = ZM_LZMA;
      } else if (!std::strcmp(argv[i], "-max")) {
        if (++i >= argc) usage();
        max = kc::atoix(argv[i]);
      } else if (!std::strcmp(argv[i], "-f")) {
        mode = 'f';
      } else if (!std::strcmp(argv[i], "-a")) {
        mode = 'a';
      } else if (!std::strcmp(argv[i], "-m")) {
        mode = 'm';
      } else if (!std::strcmp(argv[i], "-r")) {
        mode = 'r';
      } else if (!std::strcmp(argv[i], "-tm")) {
        mode = 'M';
      } else if (!std::strcmp(argv[i], "-tr")) {
        mode = 'R';
      } else if (!std::strcmp(argv[i], "-ts")) {
        ts = true;
      } else if (!std::strcmp(argv[i], "-iu")) {
        iu = true;
      } else if (!std::strcmp(argv[i], "-pk")) {
        pk = true;
      } else {
        usage();
      }
    } else if (!path) {
      argbrk = true;
      path = argv[i];
    } else if (!query) {
      query = argv[i];
    } else {
      usage();
    }
  }
  if (!path || !query) usage();
  const char* qbuf;
  if (iu) {
    size_t qsiz;
    qbuf = kc::urldecode(query, &qsiz);
    query = qbuf;
  } else {
    qbuf = NULL;
  }
  int32_t rv = procsearch(path, query, zmode, max, mode, ts, pk);
  delete[] qbuf;
  return rv;
}


// parse arguments of suggest command
static int32_t runsuggest(int argc, char** argv) {
  bool argbrk = false;
  const char* path = NULL;
  const char* query = NULL;
  int64_t max = 10;
  int32_t mode = 0;
  bool iu = false;
  for (int32_t i = 2; i < argc; i++) {
    if (!argbrk && argv[i][0] == '-') {
      if (!std::strcmp(argv[i], "--")) {
        argbrk = true;
      } else if (!std::strcmp(argv[i], "-max")) {
        if (++i >= argc) usage();
        max = kc::atoix(argv[i]);
      } else if (!std::strcmp(argv[i], "-f")) {
        mode = 'f';
      } else if (!std::strcmp(argv[i], "-m")) {
        mode = 'm';
      } else if (!std::strcmp(argv[i], "-r")) {
        mode = 'r';
      } else if (!std::strcmp(argv[i], "-iu")) {
        iu = true;
      } else {
        usage();
      }
    } else if (!path) {
      argbrk = true;
      path = argv[i];
    } else if (!query) {
      query = argv[i];
    } else {
      usage();
    }
  }
  if (!path || !query) usage();
  const char* qbuf;
  if (iu) {
    size_t qsiz;
    qbuf = kc::urldecode(query, &qsiz);
    query = qbuf;
  } else {
    qbuf = NULL;
  }
  int32_t rv = procsuggest(path, query, max, mode);
  delete[] qbuf;
  return rv;
}


// perform import command
static int32_t procimport(const char* path, const char* srcpath, int32_t zmode) {
  kc::TextDB srcdb;
  if (!srcdb.open(srcpath, kc::TextDB::OREADER)) {
    dberrprint(&srcdb, "DB::open failed");
    return 1;
  }
  kc::TreeDB destdb;
  int32_t opts = kc::TreeDB::TSMALL | kc::TreeDB::TLINEAR;
  kc::Compressor* zcomp = NULL;
  if (zmode != ZM_DEFAULT) {
    opts |= kc::TreeDB::TCOMPRESS;
    switch (zmode) {
      case ZM_LZO: {
        zcomp = new kc::LZOCompressor<kc::LZO::RAW>;
        break;
      }
      case ZM_LZMA: {
        zcomp = new kc::LZMACompressor<kc::LZMA::RAW>;
        break;
      }
    }
  }
  destdb.tune_options(opts);
  if (zcomp) destdb.tune_compressor(zcomp);
  if (!destdb.open(path, kc::TreeDB::OWRITER | kc::TreeDB::OCREATE | kc::TreeDB::OTRUNCATE)) {
    dberrprint(&destdb, "DB::open failed");
    delete zcomp;
    return 1;
  }
  bool err = false;
  class MapReduceImpl : public kc::MapReduce {
   public:
    MapReduceImpl(kc::TreeDB* destdb) :
        destdb_(destdb), lock_(), err_(false), mapcnt_(0), redcnt_(0) {}
    bool error() {
      return err_;
    }
   private:
    bool map(const char* kbuf, size_t ksiz, const char* vbuf, size_t vsiz) {
      bool err = false;
      std::vector<std::string> fields;
      kc::strsplit(std::string(vbuf, vsiz), '\t', &fields);
      if (fields.size() >= 5) {
        std::string key;
        normalizequery(fields[0].data(), fields[0].size(), &key);
        std::string value;
        kc::strprintf(&value, "%s\t%s\t%s\t%s",
                      fields[1].c_str(), fields[2].c_str(),
                      fields[3].c_str(), fields[4].c_str());
        if (!emit(key.data(), key.size(), value.data(), value.size())) err = true;
      }
      int64_t cnt = mapcnt_.add(1) + 1;
      if (cnt % 10000 == 0) {
        std::string message = kc::strprintf("processed %lld entries", (long long)cnt);
        if (!log("map", message.c_str())) err = true;
      }
      return !err;
    }
    bool reduce(const char* kbuf, size_t ksiz, ValueIterator* iter) {
      bool err = false;
      std::vector<IndexedRecord> records;
      const char* vbuf;
      size_t vsiz;
      while ((vbuf = iter->next(&vsiz)) != NULL) {
        std::vector<std::string> fields;
        kc::strsplit(std::string(vbuf, vsiz), '\t', &fields);
        if (fields.size() >= 4) {
          int64_t rank = kc::atoi(fields[0].c_str());
          std::string text;
          kc::strprintf(&text, "%s\t%s\t%s",
                        fields[1].c_str(), fields[2].c_str(), fields[3].c_str());
          IndexedRecord rec = { rank, text };
          records.push_back(rec);
        }
      }
      std::sort(records.begin(), records.end());
      if (records.size() > 1000) records.resize(1000);
      std::vector<IndexedRecord>::iterator rit = records.begin();
      std::vector<IndexedRecord>::iterator ritend = records.end();
      int32_t seq = 0;
      while (rit != ritend) {
        std::string key(kbuf, ksiz);
        kc::strprintf(&key, "\t%03lld", (long long)++seq);
        if (!destdb_->set(key.data(), key.size(), rit->text.data(), rit->text.size())) {
          err = true;
          err_ = true;
        }
        rit++;
      }
      int64_t cnt = redcnt_.add(1) + 1;
      if (cnt % 10000 == 0) {
        std::string message = kc::strprintf("processed %lld entries", (long long)cnt);
        if (!log("reduce", message.c_str())) err = true;
      }
      return !err;
    }
    bool log(const char* name, const char* message) {
      kc::ScopedMutex lock(&lock_);
      std::cout << name << ": " << message << std::endl;
      return true;
    }
   private:
    kc::TreeDB* destdb_;
    kc::Mutex lock_;
    bool err_;
    kc::AtomicInt64 mapcnt_;
    kc::AtomicInt64 redcnt_;
  };
  MapReduceImpl mr(&destdb);
  mr.tune_thread(THREADNUM, THREADNUM, THREADNUM);
  if (!mr.execute(&srcdb, "", kc::MapReduce::XPARAMAP | kc::MapReduce::XPARAFLS)) {
    dberrprint(&srcdb, "MapReduce::execute failed");
    err = true;
  }
  if (mr.error()) {
    dberrprint(&srcdb, "MapReduce::execute failed");
    err = true;
  }
  if (!destdb.close()) {
    dberrprint(&destdb, "DB::close failed");
    err = true;
  }
  delete zcomp;
  if (!srcdb.close()) {
    dberrprint(&srcdb, "DB::close failed");
    err = true;
  }
  return err ? 1 : 0;
}


// perform search command
static int32_t procsearch(const char* path, const char* query, int32_t zmode, int64_t max,
                          int32_t mode, bool ts, bool pk) {
  kc::TreeDB db;
  kc::Compressor* zcomp = NULL;
  if (zmode != ZM_DEFAULT) {
    switch (zmode) {
      case ZM_LZO: {
        zcomp = new kc::LZOCompressor<kc::LZO::RAW>;
        break;
      }
      case ZM_LZMA: {
        zcomp = new kc::LZMACompressor<kc::LZMA::RAW>;
        break;
      }
    }
  }
  if (zcomp) db.tune_compressor(zcomp);
  if (!db.open(path, kc::TreeDB::OREADER)) {
    dberrprint(&db, "DB::open failed");
    delete zcomp;
    return 1;
  }
  std::string nquery;
  if (ts) {
    nquery = query;
    kc::strtolower(&nquery);
  } else {
    normalizequery(query, std::strlen(query), &nquery);
  }
  bool err = false;
  if (mode == 'a') {
    class VisitorImpl : public kc::DB::Visitor {
     public:
      VisitorImpl(const std::string& query, int64_t max, bool pk) :
          qbuf_(), qsiz_(0), max_(max), pk_(pk),
          thres_(0), minsiz_(0), maxsiz_(0), lock_(), queue_() {
        qsiz_ = query.size();
        qbuf_ = new uint32_t[qsiz_+1];
        utftoucs(query.data(), query.size(), qbuf_, &qsiz_);
        if (qsiz_ > kc::UINT8MAX) qsiz_ = kc::UINT8MAX;
        thres_ = qsiz_ / AMBGRATIO;
        if (thres_ < AMBGMIN) thres_ = AMBGMIN;
        minsiz_ = qsiz_ > thres_ ? qsiz_ - thres_ : 0;
        maxsiz_ = qsiz_ + thres_;
      }
      ~VisitorImpl() {
        delete[] qbuf_;
      }
     private:
      const char* visit_full(const char* kbuf, size_t ksiz,
                             const char* vbuf, size_t vsiz, size_t* sp) {
        size_t oksiz = ksiz;
        while (ksiz > 0 && kbuf[ksiz-1] != '\t') {
          ksiz--;
        }
        if (ksiz > 0 && kbuf[ksiz-1] == '\t') ksiz--;
        uint32_t ucsstack[kc::UINT8MAX];
        uint32_t* ucs = ksiz > sizeof(ucsstack) / sizeof(*ucsstack) ?
            new uint32_t[ksiz] : ucsstack;
        size_t usiz;
        utftoucs(kbuf, ksiz, ucs, &usiz);
        if (usiz > kc::UINT8MAX) usiz = kc::UINT8MAX;
        if (usiz < minsiz_ || usiz > maxsiz_) {
          if (ucs != ucsstack) delete[] ucs;
          return NOP;
        }
        size_t dist = levdist(ucs, usiz, qbuf_, qsiz_);
        if (dist <= thres_) {
          std::string key(kbuf, ksiz);
          uint32_t order = kc::atoin(kbuf + ksiz, oksiz - ksiz);
          kc::ScopedMutex lock(&lock_);
          if ((int64_t)queue_.size() < max_) {
            AmbiguousRecord rec = { dist, key, order, std::string(vbuf, vsiz) };
            queue_.push(rec);
          } else {
            const AmbiguousRecord& top = queue_.top();
            if (!top.less(dist, key, order)) {
              queue_.pop();
              AmbiguousRecord rec = { dist, key, order, std::string(vbuf, vsiz) };
              queue_.push(rec);
            }
          }
        }
        if (ucs != ucsstack) delete[] ucs;
        return NOP;
      }
      void visit_after() {
        std::vector<AmbiguousRecord> recs;
        while (!queue_.empty()) {
          recs.push_back(queue_.top());
          queue_.pop();
        }
        std::vector<AmbiguousRecord>::reverse_iterator rit = recs.rbegin();
        std::vector<AmbiguousRecord>::reverse_iterator ritend = recs.rend();
        while (rit != ritend) {
          if (pk_) std::cout << rit->key << "\t";
          std::cout << rit->text << "\t" << rit->dist << std::endl;
          ++rit;
        }
      }
      uint32_t* qbuf_;
      size_t qsiz_;
      int64_t max_;
      bool pk_;
      size_t thres_;
      size_t minsiz_;
      size_t maxsiz_;
      kc::Mutex lock_;
      std::priority_queue<AmbiguousRecord> queue_;
    };
    VisitorImpl visitor(nquery, max, pk);
    if (!db.scan_parallel(&visitor, THREADNUM)) {
      dberrprint(&db, "DB::scan_parallel faileda");
      err = true;
    }
  } else if (mode == 'm') {
    class VisitorImpl : public kc::DB::Visitor {
     public:
      VisitorImpl(const std::string& query, int64_t max, bool pk) :
          query_(query), max_(max), pk_(pk), lock_(), queue_() {}
     private:
      const char* visit_full(const char* kbuf, size_t ksiz,
                             const char* vbuf, size_t vsiz, size_t* sp) {
        size_t oksiz = ksiz;
        while (ksiz > 0 && kbuf[ksiz-1] != '\t') {
          ksiz--;
        }
        if (ksiz > 0 && kbuf[ksiz-1] == '\t') ksiz--;
        std::string key(kbuf, ksiz);
        if (key.find(query_) != std::string::npos) {
          uint32_t order = kc::atoin(kbuf + ksiz, oksiz - ksiz);
          kc::ScopedMutex lock(&lock_);
          if ((int64_t)queue_.size() < max_) {
            PlainRecord rec = { key, order, std::string(vbuf, vsiz) };
            queue_.push(rec);
          } else {
            const PlainRecord& top = queue_.top();
            if (!top.less(key, order)) {
              queue_.pop();
              PlainRecord rec = { key, order, std::string(vbuf, vsiz) };
              queue_.push(rec);
            }
          }
        }
        return NOP;
      }
      void visit_after() {
        std::vector<PlainRecord> recs;
        while (!queue_.empty()) {
          recs.push_back(queue_.top());
          queue_.pop();
        }
        std::vector<PlainRecord>::reverse_iterator rit = recs.rbegin();
        std::vector<PlainRecord>::reverse_iterator ritend = recs.rend();
        while (rit != ritend) {
          if (pk_) std::cout << rit->key << "\t";
          std::cout << rit->text << std::endl;
          ++rit;
        }
      }
      std::string query_;
      int64_t max_;
      bool pk_;
      kc::Mutex lock_;
      std::priority_queue<PlainRecord> queue_;
    };
    VisitorImpl visitor(nquery, max, pk);
    if (!db.scan_parallel(&visitor, THREADNUM)) {
      dberrprint(&db, "DB::scan_parallel failed");
      err = true;
    }
  } else if (mode == 'M') {
    class VisitorImpl : public kc::DB::Visitor {
     public:
      VisitorImpl(const std::string& query, int64_t max, bool ts, bool pk) :
          query_(query), max_(max), ts_(ts), pk_(pk), lock_(), queue_() {}
     private:
      const char* visit_full(const char* kbuf, size_t ksiz,
                             const char* vbuf, size_t vsiz, size_t* sp) {
        const char* rbuf = vbuf;
        size_t rsiz = vsiz;
        while (rsiz > 0 && *rbuf != '\t') {
          rbuf++;
          rsiz--;
        }
        if (rsiz > 0 && *rbuf == '\t') {
          rbuf++;
          rsiz--;
        }
        while (rsiz > 0 && *rbuf != '\t') {
          rbuf++;
          rsiz--;
        }
        if (rsiz > 0 && *rbuf == '\t') {
          rbuf++;
          rsiz--;
        }
        bool hit = false;
        if (ts_) {
          hit = kc::memimem(rbuf, rsiz, query_.data(), query_.size()) != NULL;
        } else {
          std::string value;
          normalizequery(rbuf, rsiz, &value);
          hit = value.find(query_) != std::string::npos;
        }
        if (hit) {
          size_t oksiz = ksiz;
          while (ksiz > 0 && kbuf[ksiz-1] != '\t') {
            ksiz--;
          }
          if (ksiz > 0 && kbuf[ksiz-1] == '\t') ksiz--;
          std::string key(kbuf, ksiz);
          uint32_t order = kc::atoin(kbuf + ksiz, oksiz - ksiz);
          kc::ScopedMutex lock(&lock_);
          if ((int64_t)queue_.size() < max_) {
            PlainRecord rec = { key, order, std::string(vbuf, vsiz) };
            queue_.push(rec);
          } else {
            const PlainRecord& top = queue_.top();
            if (!top.less(key, order)) {
              queue_.pop();
              PlainRecord rec = { key, order, std::string(vbuf, vsiz) };
              queue_.push(rec);
            }
          }
        }
        return NOP;
      }
      void visit_after() {
        std::vector<PlainRecord> recs;
        while (!queue_.empty()) {
          recs.push_back(queue_.top());
          queue_.pop();
        }
        std::vector<PlainRecord>::reverse_iterator rit = recs.rbegin();
        std::vector<PlainRecord>::reverse_iterator ritend = recs.rend();
        while (rit != ritend) {
          if (pk_) std::cout << rit->key << "\t";
          std::cout << rit->text << std::endl;
          ++rit;
        }
      }
      std::string query_;
      int64_t max_;
      bool ts_;
      bool pk_;
      kc::Mutex lock_;
      std::priority_queue<PlainRecord> queue_;
    };
    VisitorImpl visitor(nquery, max, ts, pk);
    if (!db.scan_parallel(&visitor, THREADNUM)) {
      dberrprint(&db, "DB::scan_parallel failed");
      err = true;
    }
  } else if (mode == 'r') {
    class VisitorImpl : public kc::DB::Visitor {
     public:
      VisitorImpl(const std::string& query, int64_t max, bool pk) :
          regex_(), max_(max), pk_(pk), lock_(), queue_() {
        regex_.compile(query, kc::Regex::MATCHONLY);
      }
     private:
      const char* visit_full(const char* kbuf, size_t ksiz,
                             const char* vbuf, size_t vsiz, size_t* sp) {
        size_t oksiz = ksiz;
        while (ksiz > 0 && kbuf[ksiz-1] != '\t') {
          ksiz--;
        }
        if (ksiz > 0 && kbuf[ksiz-1] == '\t') ksiz--;
        std::string key(kbuf, ksiz);
        if (regex_.match(key)) {
          uint32_t order = kc::atoin(kbuf + ksiz, oksiz - ksiz);
          kc::ScopedMutex lock(&lock_);
          if ((int64_t)queue_.size() < max_) {
            PlainRecord rec = { key, order, std::string(vbuf, vsiz) };
            queue_.push(rec);
          } else {
            const PlainRecord& top = queue_.top();
            if (!top.less(key, order)) {
              queue_.pop();
              PlainRecord rec = { key, order, std::string(vbuf, vsiz) };
              queue_.push(rec);
            }
          }
        }
        return NOP;
      }
      void visit_after() {
        std::vector<PlainRecord> recs;
        while (!queue_.empty()) {
          recs.push_back(queue_.top());
          queue_.pop();
        }
        std::vector<PlainRecord>::reverse_iterator rit = recs.rbegin();
        std::vector<PlainRecord>::reverse_iterator ritend = recs.rend();
        while (rit != ritend) {
          if (pk_) std::cout << rit->key << "\t";
          std::cout << rit->text << std::endl;
          ++rit;
        }
      }
      kc::Regex regex_;
      int64_t max_;
      bool pk_;
      kc::Mutex lock_;
      std::priority_queue<PlainRecord> queue_;
    };
    VisitorImpl visitor(nquery, max, pk);
    if (!db.scan_parallel(&visitor, THREADNUM)) {
      dberrprint(&db, "DB::scan_parallel failed");
      err = true;
    }
  } else if (mode == 'R') {
    class VisitorImpl : public kc::DB::Visitor {
     public:
      VisitorImpl(const std::string& query, int64_t max, bool ts, bool pk) :
          regex_(), max_(max), ts_(ts), pk_(pk), lock_(), queue_() {
        regex_.compile(query, kc::Regex::MATCHONLY);
      }
     private:
      const char* visit_full(const char* kbuf, size_t ksiz,
                             const char* vbuf, size_t vsiz, size_t* sp) {
        const char* rbuf = vbuf;
        size_t rsiz = vsiz;
        while (rsiz > 0 && *rbuf != '\t') {
          rbuf++;
          rsiz--;
        }
        if (rsiz > 0 && *rbuf == '\t') {
          rbuf++;
          rsiz--;
        }
        while (rsiz > 0 && *rbuf != '\t') {
          rbuf++;
          rsiz--;
        }
        if (rsiz > 0 && *rbuf == '\t') {
          rbuf++;
          rsiz--;
        }
        bool hit = false;
        if (ts_) {
          std::string value(rbuf, rsiz);
          kc::strtolower(&value);
          hit = regex_.match(value);
        } else {
          std::string value(rbuf, rsiz);
          normalizequery(rbuf, rsiz, &value);
          hit = regex_.match(value);
        }
        if (hit) {
          size_t oksiz = ksiz;
          while (ksiz > 0 && kbuf[ksiz-1] != '\t') {
            ksiz--;
          }
          if (ksiz > 0 && kbuf[ksiz-1] == '\t') ksiz--;
          std::string key(kbuf, ksiz);
          uint32_t order = kc::atoin(kbuf + ksiz, oksiz - ksiz);
          kc::ScopedMutex lock(&lock_);
          if ((int64_t)queue_.size() < max_) {
            PlainRecord rec = { key, order, std::string(vbuf, vsiz) };
            queue_.push(rec);
          } else {
            const PlainRecord& top = queue_.top();
            if (!top.less(key, order)) {
              queue_.pop();
              PlainRecord rec = { key, order, std::string(vbuf, vsiz) };
              queue_.push(rec);
            }
          }
        }
        return NOP;
      }
      void visit_after() {
        std::vector<PlainRecord> recs;
        while (!queue_.empty()) {
          recs.push_back(queue_.top());
          queue_.pop();
        }
        std::vector<PlainRecord>::reverse_iterator rit = recs.rbegin();
        std::vector<PlainRecord>::reverse_iterator ritend = recs.rend();
        while (rit != ritend) {
          if (pk_) std::cout << rit->key << "\t";
          std::cout << rit->text << std::endl;
          ++rit;
        }
      }
      kc::Regex regex_;
      int64_t max_;
      bool ts_;
      bool pk_;
      kc::Mutex lock_;
      std::priority_queue<PlainRecord> queue_;
    };
    VisitorImpl visitor(nquery, max, ts, pk);
    if (!db.scan_parallel(&visitor, THREADNUM)) {
      dberrprint(&db, "DB::scan_parallel failed");
      err = true;
    }
  } else {
    std::string qstr(nquery);
    if (mode == 'f') qstr.append("\t");
    kc::TreeDB::Cursor* cur = db.cursor();
    cur->jump(qstr);
    char* kbuf;
    size_t ksiz;
    const char* vbuf;
    size_t vsiz;
    while (max > 0 && (kbuf = cur->get(&ksiz, &vbuf, &vsiz, true)) != NULL) {
      if (ksiz >= qstr.size() && !std::memcmp(kbuf, qstr.data(), qstr.size())) {
        if (pk) {
          while (ksiz > 0 && kbuf[ksiz-1] != '\t') {
            ksiz--;
          }
          if (ksiz > 0 && kbuf[ksiz-1] == '\t') ksiz--;
          std::cout.write(kbuf, ksiz);
          std::cout << "\t";
        }
        std::cout.write(vbuf, vsiz);
        std::cout << std::endl;
      } else {
        max = 0;
      }
      delete[] kbuf;
      max--;
    }
    delete cur;
  }
  if (!db.close()) {
    dberrprint(&db, "DB::close failed");
    err = true;
  }
  delete zcomp;
  return err ? 1 : 0;
}


// perform suggest command
static int32_t procsuggest(const char* path, const char* query, int64_t max, int32_t mode) {
  kc::TextDB db;
  if (!db.open(path, kc::TextDB::OREADER)) {
    dberrprint(&db, "DB::open failed");
    return 1;
  }
  std::string nquery;
  normalizequery(query, std::strlen(query), &nquery);
  bool err = false;
  if (mode == 'm') {
    class VisitorImpl : public kc::DB::Visitor {
     public:
      VisitorImpl(const std::string& query, int64_t max) :
          query_(query), max_(max), lock_(), queue_() {}
     private:
      const char* visit_full(const char* kbuf, size_t ksiz,
                             const char* vbuf, size_t vsiz, size_t* sp) {
        std::string key(vbuf, vsiz);
        if (key.find(query_) != std::string::npos) {
          kc::ScopedMutex lock(&lock_);
          if ((int64_t)queue_.size() < max_) {
            PlainRecord rec = { key, 0, "" };
            queue_.push(rec);
          } else {
            const PlainRecord& top = queue_.top();
            if (!top.less(key, 0)) {
              queue_.pop();
              PlainRecord rec = { key, 0, "" };
              queue_.push(rec);
            }
          }
        }
        return NOP;
      }
      void visit_after() {
        std::vector<PlainRecord> recs;
        while (!queue_.empty()) {
          recs.push_back(queue_.top());
          queue_.pop();
        }
        std::vector<PlainRecord>::reverse_iterator rit = recs.rbegin();
        std::vector<PlainRecord>::reverse_iterator ritend = recs.rend();
        while (rit != ritend) {
          std::cout << rit->key << std::endl;
          ++rit;
        }
      }
      std::string query_;
      int64_t max_;
      kc::Mutex lock_;
      std::priority_queue<PlainRecord> queue_;
    };
    VisitorImpl visitor(nquery, max);
    if (!db.scan_parallel(&visitor, THREADNUM)) {
      dberrprint(&db, "DB::scan_parallel failed");
      err = true;
    }
  } else if (mode == 'r') {
    class VisitorImpl : public kc::DB::Visitor {
     public:
      VisitorImpl(const std::string& query, int64_t max) :
          regex_(), max_(max), lock_(), queue_() {
        regex_.compile(query, kc::Regex::MATCHONLY);
      }
     private:
      const char* visit_full(const char* kbuf, size_t ksiz,
                             const char* vbuf, size_t vsiz, size_t* sp) {
        std::string key(vbuf, vsiz);
        if (regex_.match(key)) {
          kc::ScopedMutex lock(&lock_);
          if ((int64_t)queue_.size() < max_) {
            PlainRecord rec = { key, 0, "" };
            queue_.push(rec);
          } else {
            const PlainRecord& top = queue_.top();
            if (!top.less(key, 0)) {
              queue_.pop();
              PlainRecord rec = { key, 0, "" };
              queue_.push(rec);
            }
          }
        }
        return NOP;
      }
      void visit_after() {
        std::vector<PlainRecord> recs;
        while (!queue_.empty()) {
          recs.push_back(queue_.top());
          queue_.pop();
        }
        std::vector<PlainRecord>::reverse_iterator rit = recs.rbegin();
        std::vector<PlainRecord>::reverse_iterator ritend = recs.rend();
        while (rit != ritend) {
          std::cout << rit->key << std::endl;
          ++rit;
        }
      }
      kc::Regex regex_;
      int64_t max_;
      kc::Mutex lock_;
      std::priority_queue<PlainRecord> queue_;
    };
    VisitorImpl visitor(nquery, max);
    if (!db.scan_parallel(&visitor, THREADNUM)) {
      dberrprint(&db, "DB::scan_parallel failed");
      err = true;
    }
  } else {
    class VisitorImpl : public kc::DB::Visitor {
     public:
      VisitorImpl(const std::string& query, int64_t max) :
          qbuf_(), qsiz_(0), max_(max),
          thres_(0), minsiz_(0), maxsiz_(0), lock_(), queue_() {
        qsiz_ = query.size();
        qbuf_ = new uint32_t[qsiz_+1];
        utftoucs(query.data(), query.size(), qbuf_, &qsiz_);
        if (qsiz_ > kc::UINT8MAX) qsiz_ = kc::UINT8MAX;
        thres_ = qsiz_ / AMBGRATIO;
        if (thres_ < AMBGMIN) thres_ = AMBGMIN;
        minsiz_ = qsiz_ > thres_ ? qsiz_ - thres_ : 0;
        maxsiz_ = qsiz_ + thres_;
      }
      ~VisitorImpl() {
        delete[] qbuf_;
      }
     private:
      const char* visit_full(const char* kbuf, size_t ksiz,
                             const char* vbuf, size_t vsiz, size_t* sp) {
        uint32_t ucsstack[kc::UINT8MAX];
        uint32_t* ucs = vsiz > sizeof(ucsstack) / sizeof(*ucsstack) ?
            new uint32_t[vsiz] : ucsstack;
        size_t usiz;
        utftoucs(vbuf, vsiz, ucs, &usiz);
        if (usiz > kc::UINT8MAX) usiz = kc::UINT8MAX;
        if (usiz < minsiz_ || usiz > maxsiz_) {
          if (ucs != ucsstack) delete[] ucs;
          return NOP;
        }
        size_t dist = levdist(ucs, usiz, qbuf_, qsiz_);
        if (dist <= thres_) {
          std::string key(vbuf, vsiz);
          kc::ScopedMutex lock(&lock_);
          if ((int64_t)queue_.size() < max_) {
            AmbiguousRecord rec = { dist, key, 0, "" };
            queue_.push(rec);
          } else {
            const AmbiguousRecord& top = queue_.top();
            if (!top.less(dist, key, 0)) {
              queue_.pop();
              AmbiguousRecord rec = { dist, key, 0, "" };
              queue_.push(rec);
            }
          }
        }
        if (ucs != ucsstack) delete[] ucs;
        return NOP;
      }
      void visit_after() {
        std::vector<AmbiguousRecord> recs;
        while (!queue_.empty()) {
          recs.push_back(queue_.top());
          queue_.pop();
        }
        std::vector<AmbiguousRecord>::reverse_iterator rit = recs.rbegin();
        std::vector<AmbiguousRecord>::reverse_iterator ritend = recs.rend();
        while (rit != ritend) {
          std::cout << rit->key << "\t" << rit->dist << std::endl;
          ++rit;
        }
      }
      uint32_t* qbuf_;
      size_t qsiz_;
      int64_t max_;
      size_t thres_;
      size_t minsiz_;
      size_t maxsiz_;
      kc::Mutex lock_;
      std::priority_queue<AmbiguousRecord> queue_;
    };
    VisitorImpl visitor(nquery, max);
    if (!db.scan_parallel(&visitor, THREADNUM)) {
      dberrprint(&db, "DB::scan_parallel faileda");
      err = true;
    }
  }
  if (!db.close()) {
    dberrprint(&db, "DB::close failed");
    err = true;
  }
  return err ? 1 : 0;
}



// END OF FILE