Blob Blame History Raw
# Libreoffice-voikko: Linguistic extension for LibreOffice
# Copyright (C) 2015 Harri Pitkänen <hatapitk@iki.fi>
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at http://mozilla.org/MPL/2.0/.
# 
# Alternatively, the contents of this file may be used under the terms of
# the GNU General Public License Version 3 or later (the "GPL"), in which
# case the provisions of the GPL are applicable instead of those above.

import logging
import os
import platform
from libvoikko import Voikko, VoikkoException
from collections import defaultdict
from threading import RLock
from com.sun.star.lang import Locale

class Bcp47ToLoMapping:

	def __init__(self, bcpTag, loLanguage, loRegion):
		self.bcpTag = bcpTag
		self.loLanguage = loLanguage
		self.loRegion = loRegion

BCP_TO_LO_MAPPING = [
	Bcp47ToLoMapping("af", "af", "NA"), \
	Bcp47ToLoMapping("af", "af", "ZA"), \
	Bcp47ToLoMapping("am", "am", "ET"), \
	Bcp47ToLoMapping("ar", "ar", "AE"), \
	Bcp47ToLoMapping("ar", "ar", "BH"), \
	Bcp47ToLoMapping("ar", "ar", "DJ"), \
	Bcp47ToLoMapping("ar", "ar", "DZ"), \
	Bcp47ToLoMapping("ar", "ar", "EG"), \
	Bcp47ToLoMapping("ar", "ar", "ER"), \
	Bcp47ToLoMapping("ar", "ar", "IL"), \
	Bcp47ToLoMapping("ar", "ar", "IQ"), \
	Bcp47ToLoMapping("ar", "ar", "JO"), \
	Bcp47ToLoMapping("ar", "ar", "KM"), \
	Bcp47ToLoMapping("ar", "ar", "KW"), \
	Bcp47ToLoMapping("ar", "ar", "LB"), \
	Bcp47ToLoMapping("ar", "ar", "LY"), \
	Bcp47ToLoMapping("ar", "ar", "MA"), \
	Bcp47ToLoMapping("ar", "ar", "MR"), \
	Bcp47ToLoMapping("ar", "ar", "OM"), \
	Bcp47ToLoMapping("ar", "ar", "PS"), \
	Bcp47ToLoMapping("ar", "ar", "QA"), \
	Bcp47ToLoMapping("ar", "ar", "SA"), \
	Bcp47ToLoMapping("ar", "ar", "SD"), \
	Bcp47ToLoMapping("ar", "ar", "SO"), \
	Bcp47ToLoMapping("ar", "ar", "SY"), \
	Bcp47ToLoMapping("ar", "ar", "TD"), \
	Bcp47ToLoMapping("ar", "ar", "TN"), \
	Bcp47ToLoMapping("ar", "ar", "YE"), \
	Bcp47ToLoMapping("bn", "bn", "BD"), \
	Bcp47ToLoMapping("bn", "bn", "IN"), \
	Bcp47ToLoMapping("ca", "ca", "AD"), \
	Bcp47ToLoMapping("ca", "ca", "ES"), \
	Bcp47ToLoMapping("cs", "cs", "CZ"), \
	Bcp47ToLoMapping("csb", "csb", "PL"), \
	Bcp47ToLoMapping("cv", "cv", "RU"), \
	Bcp47ToLoMapping("cy", "cy", "GB"), \
	Bcp47ToLoMapping("da", "da", "DK"), \
	Bcp47ToLoMapping("de", "de", "DE"), \
	Bcp47ToLoMapping("de-AT", "de", "AT"), \
	Bcp47ToLoMapping("de-BE", "de", "BE"), \
	Bcp47ToLoMapping("de-CH", "de", "CH"), \
	Bcp47ToLoMapping("de-LU", "de", "LU"), \
	Bcp47ToLoMapping("el", "el", "GR"), \
	Bcp47ToLoMapping("en-US", "en", "US"), \
	Bcp47ToLoMapping("en-US", "en", "ZA"), \
	Bcp47ToLoMapping("eo", "eo", ""), \
	Bcp47ToLoMapping("es", "es", "ES"), \
	Bcp47ToLoMapping("es-MX", "es", "MX"), \
	Bcp47ToLoMapping("et", "et", "EE"), \
	Bcp47ToLoMapping("eu", "eu", ""), \
	Bcp47ToLoMapping("fa", "fa", "IR"), \
	Bcp47ToLoMapping("fi", "fi", "FI"), \
	Bcp47ToLoMapping("fo", "fo", "FO"), \
	Bcp47ToLoMapping("fr", "fr", "FR"), \
	Bcp47ToLoMapping("fr-BE", "fr", "BE"), \
	Bcp47ToLoMapping("fr-CA", "fr", "CA"), \
	Bcp47ToLoMapping("fr-CH", "fr", "CH"), \
	Bcp47ToLoMapping("fr-LU", "fr", "LU"), \
	Bcp47ToLoMapping("fr-MC", "fr", "MC"), \
	Bcp47ToLoMapping("fur", "fur", "IT"), \
	Bcp47ToLoMapping("fy", "fy", "NL"), \
	Bcp47ToLoMapping("ga", "ga", "IE"), \
	Bcp47ToLoMapping("gd", "gd", "GB"), \
	Bcp47ToLoMapping("gl", "gl", "ES"), \
	Bcp47ToLoMapping("gn", "gug", "PY"), \
	Bcp47ToLoMapping("gu", "gu", "IN"), \
	Bcp47ToLoMapping("he", "he", "IL"), \
	Bcp47ToLoMapping("hi", "hi", "IN"), \
	Bcp47ToLoMapping("hr", "hr", "BA"), \
	Bcp47ToLoMapping("hr", "hr", "HR"), \
	Bcp47ToLoMapping("hu", "hu", "HU"), \
	Bcp47ToLoMapping("ia", "ia", ""), \
	Bcp47ToLoMapping("id", "id", "ID"), \
	Bcp47ToLoMapping("is", "is", "IS"), \
	Bcp47ToLoMapping("it", "it", "CH"), \
	Bcp47ToLoMapping("it", "it", "IT"), \
	Bcp47ToLoMapping("kca", "kca", "RU"), \
	Bcp47ToLoMapping("kk", "kk", "KZ"), \
	Bcp47ToLoMapping("kl", "kl", "GL"), \
	Bcp47ToLoMapping("koi", "koi", "RU"), \
	Bcp47ToLoMapping("kpv", "kpv", "RU"), \
	Bcp47ToLoMapping("ku", "ku", "TR"), \
	Bcp47ToLoMapping("ku", "ku", "SY"), \
	Bcp47ToLoMapping("kum", "kum", "RU"), \
	Bcp47ToLoMapping("ky", "ky", "CN"), \
	Bcp47ToLoMapping("ky", "ky", "KG"), \
	Bcp47ToLoMapping("la", "la", "VA"), \
	Bcp47ToLoMapping("liv", "liv", "LV"), \
	Bcp47ToLoMapping("liv", "liv", "RU"), \
	Bcp47ToLoMapping("ln", "ln", "CD"), \
	Bcp47ToLoMapping("lt", "lt", "LT"), \
	Bcp47ToLoMapping("lv", "lv", "LV"), \
	Bcp47ToLoMapping("mdf", "mdf", "RU"), \
	Bcp47ToLoMapping("mhr", "mhr", "RU"), \
	Bcp47ToLoMapping("mk", "mk", "MK"), \
	Bcp47ToLoMapping("ml", "ml", "IN"), \
	Bcp47ToLoMapping("mrj", "mrj", "RU"), \
	Bcp47ToLoMapping("ms", "ms", ""), \
	Bcp47ToLoMapping("ms", "ms", "BN"), \
	Bcp47ToLoMapping("ms", "ms", "MY"), \
	Bcp47ToLoMapping("myv", "myv", "RU"), \
	Bcp47ToLoMapping("nb", "nb", "NO"), \
	Bcp47ToLoMapping("ne", "ne", "IN"), \
	Bcp47ToLoMapping("ne", "ne", "NP"), \
	Bcp47ToLoMapping("nio", "nio", "RU"), \
	Bcp47ToLoMapping("nl", "nl", "BE"), \
	Bcp47ToLoMapping("nl", "nl", "NL"), \
	Bcp47ToLoMapping("nn", "nn", "NO"), \
	Bcp47ToLoMapping("nog", "nog", "RU"), \
	Bcp47ToLoMapping("nr", "nr", "ZA"), \
	Bcp47ToLoMapping("nso", "ns", "ZA"), \
	Bcp47ToLoMapping("nso", "nso", "ZA"), \
	Bcp47ToLoMapping("ny", "ny", "MW"), \
	Bcp47ToLoMapping("oc", "oc", "FR"), \
	Bcp47ToLoMapping("olo", "olo", "RU"), \
	Bcp47ToLoMapping("or", "or", "IN"), \
	Bcp47ToLoMapping("pa", "pa", "IN"), \
	Bcp47ToLoMapping("pa", "pa", "PK"), \
	Bcp47ToLoMapping("pap-BQ", "pap", "BQ"), \
	Bcp47ToLoMapping("pap-CW", "pap", "CW"), \
	Bcp47ToLoMapping("pjt", "pjt", "AU"), \
	Bcp47ToLoMapping("pl", "pl", "PL"), \
	Bcp47ToLoMapping("pt", "pt", "PT"), \
	Bcp47ToLoMapping("pt-BR", "pt", "BR"), \
	Bcp47ToLoMapping("qu", "qu", "BO"), \
	Bcp47ToLoMapping("qu", "qu", "EC"), \
	Bcp47ToLoMapping("qu", "qu", "PE"), \
	Bcp47ToLoMapping("ro", "ro", "MD"), \
	Bcp47ToLoMapping("ro", "ro", "RO"), \
	Bcp47ToLoMapping("ru", "ru", "RU"), \
	Bcp47ToLoMapping("rw", "rw", "RW"), \
	Bcp47ToLoMapping("se", "se", "FI"), \
	Bcp47ToLoMapping("se", "se", "NO"), \
	Bcp47ToLoMapping("se", "se", "SE"), \
	Bcp47ToLoMapping("sid", "sid", "ET"), \
	Bcp47ToLoMapping("sjd", "sjd", "RU"), \
	Bcp47ToLoMapping("sk", "sk", "SK"), \
	Bcp47ToLoMapping("sl", "sl", "SI"), \
	Bcp47ToLoMapping("sma", "sma", "NO"), \
	Bcp47ToLoMapping("sma", "sma", "SE"), \
	Bcp47ToLoMapping("smj", "smj", "NO"), \
	Bcp47ToLoMapping("smj", "smj", "SE"), \
	Bcp47ToLoMapping("smn", "smn", "FI"), \
	Bcp47ToLoMapping("sms", "sms", "FI"), \
	Bcp47ToLoMapping("ss", "ss", "ZA"), \
	Bcp47ToLoMapping("st", "st", "ZA"), \
	Bcp47ToLoMapping("sv", "sv", "FI"), \
	Bcp47ToLoMapping("sv", "sv", "SE"), \
	Bcp47ToLoMapping("sw", "sw", "KE"), \
	Bcp47ToLoMapping("sw", "sw", "TZ"), \
	Bcp47ToLoMapping("ta", "ta", "IN"), \
	Bcp47ToLoMapping("tet", "tet", "ID"), \
	Bcp47ToLoMapping("tet", "tet", "TL"), \
	Bcp47ToLoMapping("tl", "tl", "PH"), \
	Bcp47ToLoMapping("tn", "tn", "BW"), \
	Bcp47ToLoMapping("tn", "tn", "ZA"), \
	Bcp47ToLoMapping("ts", "ts", "ZA"), \
	Bcp47ToLoMapping("tt", "tt", "RU"), \
	Bcp47ToLoMapping("udm", "udm", "RU"), \
	Bcp47ToLoMapping("uk", "uk", "UA"), \
	Bcp47ToLoMapping("vep", "vep", "RU"), \
	Bcp47ToLoMapping("vi", "vi", "VN"), \
	Bcp47ToLoMapping("vro", "vro", "EE"), \
	Bcp47ToLoMapping("xh", "xh", "ZA"), \
	Bcp47ToLoMapping("yrk", "yrk", "RU"), \
	# Added a new block of language codes - should be sorted and merged with the rest when accepted.
	Bcp47ToLoMapping("an",	"an",	"ES"), \
	Bcp47ToLoMapping("as",	"as",	"IN"), \
	Bcp47ToLoMapping("as-BT",	"as",	"BT"), \
	Bcp47ToLoMapping("ast",	"ast",	"ES"), \
	Bcp47ToLoMapping("av",	"av",	"RU"), \
	Bcp47ToLoMapping("av-AZ",	"av",	"AZ"), \
	Bcp47ToLoMapping("av-GE",	"av",	"GE"), \
	Bcp47ToLoMapping("av-KZ",	"av",	"KZ"), \
	Bcp47ToLoMapping("azj",	"azj",	"AZ"), \
	Bcp47ToLoMapping("azj-RU",	"azj",	"RU"), \
	Bcp47ToLoMapping("bak",	"bak",	"RU"), \
	Bcp47ToLoMapping("bak-KZ",	"bak",	"KZ"), \
	Bcp47ToLoMapping("be",	"be",	"BY"), \
	Bcp47ToLoMapping("bg",	"bg",	"BG"), \
	Bcp47ToLoMapping("bla",	"bla",	"CA"), \
	Bcp47ToLoMapping("bla-US",	"bla",	"US"), \
	Bcp47ToLoMapping("br",	"br",	"FR"), \
	Bcp47ToLoMapping("bxr",	"bxr",	"RU"), \
	Bcp47ToLoMapping("ce",	"ce",	"RU"), \
	Bcp47ToLoMapping("ceb",	"ceb",	"PH"), \
	Bcp47ToLoMapping("chp",	"chp",	"CA"), \
	Bcp47ToLoMapping("ciw",	"ciw",	"US"), \
	Bcp47ToLoMapping("cos",	"cos",	"FR"), \
	Bcp47ToLoMapping("crk",	"crk",	"CA"), \
	Bcp47ToLoMapping("crk-US",	"crk",	"US"), \
	Bcp47ToLoMapping("dsb",	"dsb",	"DE"), \
	Bcp47ToLoMapping("eve",	"eve",	"RU"), \
	Bcp47ToLoMapping("evn",	"evn",	"CN"), \
	Bcp47ToLoMapping("evn-RU",	"evn",	"RU"), \
	Bcp47ToLoMapping("fkv",	"fkv",	"NO"), \
	Bcp47ToLoMapping("fry",	"fry",	"NL"), \
	Bcp47ToLoMapping("glk",	"glk",	"IR"), \
	Bcp47ToLoMapping("grn",	"grn",	"PY"), \
	Bcp47ToLoMapping("guc",	"guc",	"CO"), \
	Bcp47ToLoMapping("guc-VE",	"guc",	"VE"), \
	Bcp47ToLoMapping("gv",	"gv",	"IM"), \
	Bcp47ToLoMapping("hdn",	"hdn",	"CA"), \
	Bcp47ToLoMapping("hdn-US",	"hdn",	"US"), \
	Bcp47ToLoMapping("hsb",	"hsb",	"DE"), \
	Bcp47ToLoMapping("hy",	"hy",	"AM"), \
	Bcp47ToLoMapping("ik",	"ik",	"US"), \
	Bcp47ToLoMapping("izh",	"izh",	"RU"), \
	Bcp47ToLoMapping("kaa",	"kaa",	"UZ"), \
	Bcp47ToLoMapping("kaa-RU",	"kaa",	"RU"), \
	Bcp47ToLoMapping("kaz",	"kaz",	"KZ"), \
	Bcp47ToLoMapping("khk",	"khk",	"MN"), \
	Bcp47ToLoMapping("khk-KG",	"khk",	"KG"), \
	Bcp47ToLoMapping("khk-RU",	"khk",	"RU"), \
	Bcp47ToLoMapping("khk-CN",	"khk",	"CN"), \
	Bcp47ToLoMapping("kw",	"kw",	"UK"), \
	Bcp47ToLoMapping("ltz",	"ltz",	"LU"), \
	Bcp47ToLoMapping("ml",	"ml",	"IN"), \
	Bcp47ToLoMapping("mr",	"mr",	"IN"), \
	Bcp47ToLoMapping("mt",	"mt",	"MT"), \
	Bcp47ToLoMapping("ndl",	"ndl",	"CD"), \
	Bcp47ToLoMapping("quz",	"quz",	"PE"), \
	Bcp47ToLoMapping("qve",	"qve",	"PE"), \
	Bcp47ToLoMapping("rup",	"rup",	"MK"), \
	Bcp47ToLoMapping("rup-GR",	"rup",	"GR"), \
	Bcp47ToLoMapping("rup-RO",	"rup",	"RO"), \
	Bcp47ToLoMapping("sc",	"sc",	"IT"), \
	Bcp47ToLoMapping("sel",	"sel",	"RU"), \
	Bcp47ToLoMapping("si",	"si",	"LK"), \
	Bcp47ToLoMapping("sje",	"sje",	"SE"), \
	Bcp47ToLoMapping("so",	"so",	"SO"), \
	Bcp47ToLoMapping("sq",	"sq",	"AL"), \
	Bcp47ToLoMapping("sto",	"sto",	"CA"), \
	Bcp47ToLoMapping("te",	"te",	"IN"), \
	Bcp47ToLoMapping("tg",	"tg",	"TJ"), \
	Bcp47ToLoMapping("tg-UZ",	"tg",	"UZ"), \
	Bcp47ToLoMapping("tl",	"tl",	"PH"), \
	Bcp47ToLoMapping("tk",	"tk",	"TM"), \
	Bcp47ToLoMapping("tku",	"tku",	"MX"), \
	Bcp47ToLoMapping("tus",	"tus",	"CA"), \
	Bcp47ToLoMapping("tyv",	"tyv",	"RU"), \
	Bcp47ToLoMapping("ur",	"ur",	"PK"), \
	Bcp47ToLoMapping("uz",	"uz",	"UZ"), \
	Bcp47ToLoMapping("uz-KG",	"uz",	"KG"), \
	Bcp47ToLoMapping("zu",	"zu",	"ZA"), \
	Bcp47ToLoMapping("zu-LS",	"zu",	"LS"), \
	Bcp47ToLoMapping("zu-SZ",	"zu",	"SZ")
]

class VoikkoHandlePool:

	def __init__(self):
		self.__supportedSpellingLocales = []
		self.__supportedHyphenationLocales = []
		self.__supportedGrammarCheckingLocales = []
		self.__installationPath = None
		self.__handles = {}
		self.__initializationErrors = {}
		self.__globalBooleanOptions = {}
		self.__globalIntegerOptions = {}
		self.__preferredGlobalVariant = None
		self.__bcpToOOoMap = defaultdict(list)
		for m in BCP_TO_LO_MAPPING:
			self.__bcpToOOoMap[m.bcpTag].append(m)

	def getInstance():
		if VoikkoHandlePool.instance is None:
			VoikkoHandlePool.instance = VoikkoHandlePool()
		return VoikkoHandlePool.instance
	getInstance = staticmethod(getInstance)

	def getInstallationPath(self):
		return self.__installationPath

	def getDictionaryPath(self):
		return os.path.join(self.getInstallationPath(), "voikko")

	def __openHandleWithVariant(self, language, fullVariant):
		logging.debug("VoikkoHandlePool.__openHandleWithVariant")
		try:
			voikkoHandle = Voikko(fullVariant, self.getDictionaryPath())
			self.__handles[language] = voikkoHandle
			for booleanOpt, booleanValue in self.__globalBooleanOptions.items():
				voikkoHandle.setBooleanOption(booleanOpt, booleanValue)
			for integerOpt, integerValue in self.__globalIntegerOptions.items():
				voikkoHandle.setIntegerOption(integerOpt, integerValue)
			return voikkoHandle;
		except VoikkoException as e:
			self.__initializationErrors[language] = e.args[0]
			return None

	def __openHandle(self, language):
		if self.__preferredGlobalVariant is not None:
			languageWithVariant = language + "-x-" + self.__preferredGlobalVariant
			handle = self.__openHandleWithVariant(language, languageWithVariant)
			if handle is not None:
				return handle
		return self.__openHandleWithVariant(language, language)

	def getHandle(self, locale):
		language = None
		if locale.Language == "qlt":
			language = locale.Variant
		else:
			language = locale.Language
		if language in self.__handles:
			return self.__handles[language]
		if language in self.__initializationErrors:
			return None
		return self.__openHandle(language)

	def closeAllHandles(self):
		for key, value in self.__handles.items():
			value.terminate()
		self.__handles.clear()
		self.__initializationErrors.clear()

	def setGlobalBooleanOption(self, option, value):
		if option in self.__globalBooleanOptions and self.__globalBooleanOptions[option] == value:
			return
		self.__globalBooleanOptions[option] = value
		for lang, handle in self.__handles.items():
			handle.setBooleanOption(option, value)

	def setGlobalIntegerOption(self, option, value):
		if option in self.__globalIntegerOptions and self.__globalIntegerOptions[option] == value:
			return
		self.__globalIntegerOptions[option] = value
		for lang, handle in self.__handles.items():
			handle.setIntegerOption(option, value)

	def __addLocale(self, locales, language):
		matchingMappings = self.__bcpToOOoMap[language]
		for bcpMapping in matchingMappings:
			locales.append(Locale(bcpMapping.loLanguage, bcpMapping.loRegion, ""))
		if len(matchingMappings) == 0:
			if len(language) <= 3:
				# assume this is ISO 639-1 or ISO 639-3 code
				locales.append(Locale(language, "", ""))
			else:
				locales.append(Locale("qlt", "", language))

	def __getSupportedLocalesForOperation(self, localeList, localeOperation):
		# optimization: if we already have found some locales, don't search for more
		if len(localeList) == 0:
			languages = localeOperation(self.getDictionaryPath())
			for lang in languages:
				self.__addLocale(localeList, lang)
		return tuple(localeList)

	def getSupportedSpellingLocales(self):
		return self.__getSupportedLocalesForOperation(self.__supportedSpellingLocales, Voikko.listSupportedSpellingLanguages)

	def getSupportedHyphenationLocales(self):
		return self.__getSupportedLocalesForOperation(self.__supportedHyphenationLocales, Voikko.listSupportedHyphenationLanguages)

	def getSupportedGrammarLocales(self):
		return self.__getSupportedLocalesForOperation(self.__supportedGrammarCheckingLocales, Voikko.listSupportedGrammarCheckingLanguages)

	def getInitializationStatus(self):
		"""Returns initialization status diagnostics"""
		status = "Init OK:["
		for key, value in self.__handles.items():
			status = status + key + " "
		status = status + "] FAILED:["
		for key, value in self.__initializationErrors.items():
			status = status + key + ":'" + value + "' "
		status = status + "]"
		return status

	def setPreferredGlobalVariant(self, variant):
		if variant != self.__preferredGlobalVariant:
			self.__preferredGlobalVariant = variant
			self.closeAllHandles()

	def setInstallationPath(self, path):
		self.__installationPath = path
		searchPath = os.path.join(path, "voikko", platform.system() + "-" + "-".join(platform.architecture()))
		logging.debug("VoikkoHandlePool.setInstallationPath: library search path is " + searchPath)
		Voikko.setLibrarySearchPath(searchPath)

	def getPreferredGlobalVariant(self):
		return self.__preferredGlobalVariant

	def __containsLocale(self, localeToFind, locales):
		for locale in locales:
			if locale.Language == localeToFind.Language and locale.Country == localeToFind.Country:
				return True
			if locale.Language == "qlt" and \
			   (locale.Variant == localeToFind.Language or (localeToFind.Language == "qlt" and locale.Variant == localeToFind.Variant)):
				return True
		if localeToFind.Language == "qlt":
			# See if we can try again with a trimmed tag: some tags may contain extra
			# components that can be skipped while matching such as country in crk-Cans-CN
			tagToFind = localeToFind.Variant
			tagLen = len(tagToFind)
			if tagLen > 9 and tagToFind[tagLen - 3] == "-":
				loc = Locale("qlt", "", tagToFind[0:-3])
				return __containsLocale(loc, locales)
		return False

	def supportsSpellingLocale(self, locale):
		return self.__containsLocale(locale, self.getSupportedSpellingLocales())

	def supportsHyphenationLocale(self, locale):
		return self.__containsLocale(locale, self.getSupportedHyphenationLocales())

	def supportsGrammarLocale(self, locale):
		return self.__containsLocale(locale, self.getSupportedGrammarLocales())

VoikkoHandlePool.instance = None
VoikkoHandlePool.mutex = RLock()