Blob Blame History Raw
/* The contents of this file are subject to the Mozilla Public License Version 
 * 1.1 (the "License"); you may not use this file except in compliance with 
 * the License. You may obtain a copy of the License at 
 * http://www.mozilla.org/MPL/
 * 
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 * 
 * The Original Code is Libvoikko: Library of natural language processing tools.
 * The Initial Developer of the Original Code is Harri Pitkänen <hatapitk@iki.fi>.
 * Portions created by the Initial Developer are Copyright (C) 2010
 * the Initial Developer. All Rights Reserved.
 * 
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *********************************************************************************/

#include "spellchecker/SpellerCache.hpp"
#include <cwchar>

namespace libvoikko { namespace spellchecker {

/* A small result cache:
 * word_length order elements wchars offset
 * 1           4     16       16     0
 * 2           5     32       64     16
 * 3           6     64       192    80
 * 4           7     128      512    272
 * 5           7     128      640    784
 * 6           7     128      768    1424
 * 7           7     128      896    2192
 * 8           7     128      1024   3088
 * 9           7     128      1152   4112
 * 10          7     128      1280   5264
 * total size      sizeof(wchar_t) * 6544
 */

static const int VOIKKO_HASH_ORDERS[]   =  {0, 3+0,  5+0,  6+0,   7+0,   7+0,    7+0,    7+0,    7+0,    7+0,    7+0};
static const int VOIKKO_CACHE_OFFSETS[] =  {0,   0, 1*16, 1*80, 1*272, 1*784, 1*1424, 1*2192, 1*3088, 1*4112, 1*5264};
static const int VOIKKO_META_OFFSETS[]  =  {0,   0, 1*16, 1*48, 1*112, 1*240,  1*368,  1*496,  1*624,  1*752,  1*880};

/** 
 * Simple string hashing algorithm
 * @param word string to hash
 * @param len length of the word
 * @param order order of the resulting hash value
 * @return integer from range [0, 2^order - 1]
 */
static int voikko_hash(const wchar_t * word, size_t len, int order) {
	int hash = 0;
	for (size_t counter = 0; counter < len; counter++) {
		hash = (hash * 37 + word[counter]) % (1 << order);
	}
	return hash;
}

SpellerCache::SpellerCache(int sizeParam) : sizeParam(sizeParam) {
	words = new wchar_t[6544 << sizeParam];
	memset(words, 0, 6544 * sizeof(wchar_t) << sizeParam);
	spellResults = new char[1008 * sizeof(char) << sizeParam];
	memset(spellResults, 0, 1008 * sizeof(char) << sizeParam);
}

SpellerCache::~SpellerCache() {
	delete[] spellResults;
	delete[] words;
}

int SpellerCache::getSizeParam() const {
	return sizeParam;
}

bool SpellerCache::isInCache(const wchar_t * word, size_t wlen) const {
	if (wlen > 10) {
		return false;
	}
	int hashCode = voikko_hash(word, wlen, VOIKKO_HASH_ORDERS[wlen] + sizeParam);
	int cacheOffset = (VOIKKO_CACHE_OFFSETS[wlen] << sizeParam) + hashCode * static_cast<int>(wlen);
	return wcsncmp(words + cacheOffset, word, wlen) == 0;
}

spellresult SpellerCache::getSpellResult(const wchar_t * word, size_t wlen) const {
	int hashCode = voikko_hash(word, wlen, VOIKKO_HASH_ORDERS[wlen] + sizeParam);
	int resultOffset = (VOIKKO_META_OFFSETS[wlen] << sizeParam) + hashCode;
	if (spellResults[resultOffset] == 'i') {
		return SPELL_CAP_FIRST;
	} else {
		return SPELL_OK;
	}
}

void SpellerCache::setSpellResult(const wchar_t * word, size_t wlen, spellresult result) {
	if (wlen > 10 || (result != SPELL_OK && result != SPELL_CAP_FIRST)) {
		return;
	}
	int hashCode = voikko_hash(word, wlen, VOIKKO_HASH_ORDERS[wlen] + sizeParam);
	int cacheOffset = (VOIKKO_CACHE_OFFSETS[wlen] << sizeParam) + hashCode * static_cast<int>(wlen);
	int resultOffset = (VOIKKO_META_OFFSETS[wlen] << sizeParam) + hashCode;
	wcsncpy(words + cacheOffset, word, wlen);
	spellResults[resultOffset] = (result == SPELL_OK) ? 'p' : 'i';
}

} }