|
Packit |
2b3545 |
/* The contents of this file are subject to the Mozilla Public License Version
|
|
Packit |
2b3545 |
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
Packit |
2b3545 |
* the License. You may obtain a copy of the License at
|
|
Packit |
2b3545 |
* http://www.mozilla.org/MPL/
|
|
Packit |
2b3545 |
*
|
|
Packit |
2b3545 |
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
Packit |
2b3545 |
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
Packit |
2b3545 |
* for the specific language governing rights and limitations under the
|
|
Packit |
2b3545 |
* License.
|
|
Packit |
2b3545 |
*
|
|
Packit |
2b3545 |
* The Original Code is Libvoikko: Library of natural language processing tools.
|
|
Packit |
2b3545 |
* The Initial Developer of the Original Code is Harri Pitkänen <hatapitk@iki.fi>.
|
|
Packit |
2b3545 |
* Portions created by the Initial Developer are Copyright (C) 2010 - 2015
|
|
Packit |
2b3545 |
* the Initial Developer. All Rights Reserved.
|
|
Packit |
2b3545 |
*
|
|
Packit |
2b3545 |
* Alternatively, the contents of this file may be used under the terms of
|
|
Packit |
2b3545 |
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
Packit |
2b3545 |
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
Packit |
2b3545 |
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
Packit |
2b3545 |
* of those above. If you wish to allow use of your version of this file only
|
|
Packit |
2b3545 |
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
Packit |
2b3545 |
* use your version of this file under the terms of the MPL, indicate your
|
|
Packit |
2b3545 |
* decision by deleting the provisions above and replace them with the notice
|
|
Packit |
2b3545 |
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
Packit |
2b3545 |
* the provisions above, a recipient may use your version of this file under
|
|
Packit |
2b3545 |
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
Packit |
2b3545 |
*********************************************************************************/
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
#include "setup/LanguageTag.hpp"
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
using namespace std;
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
namespace libvoikko { namespace setup {
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
LanguageTag::LanguageTag() :
|
|
Packit |
2b3545 |
language(""),
|
|
Packit |
2b3545 |
script(""),
|
|
Packit |
2b3545 |
privateUse("") {
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
LanguageTag::LanguageTag(const LanguageTag & languageTag) :
|
|
Packit |
2b3545 |
language(languageTag.language),
|
|
Packit |
2b3545 |
script(languageTag.script),
|
|
Packit |
2b3545 |
privateUse(languageTag.privateUse) {
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
static void lowercaseTagPart(string & tagPart) {
|
|
Packit |
2b3545 |
for (size_t i = 0; i < tagPart.size(); ++i) {
|
|
Packit |
2b3545 |
char current = tagPart.at(i);
|
|
Packit |
2b3545 |
if (current >= 65 && current <= 90) {
|
|
Packit |
2b3545 |
tagPart[i] = current + 32;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
const string & LanguageTag::getLanguage() const {
|
|
Packit |
2b3545 |
return language;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
void LanguageTag::setLanguage(const string & language) {
|
|
Packit |
2b3545 |
size_t splitPos = language.find("_");
|
|
Packit |
2b3545 |
if (splitPos != string::npos) {
|
|
Packit |
2b3545 |
// if geographical area (such as FI in fi_FI) is given, discard it
|
|
Packit |
2b3545 |
this->language = language.substr(0, splitPos);
|
|
Packit |
2b3545 |
} else {
|
|
Packit |
2b3545 |
this->language = language;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
lowercaseTagPart(this->language);
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
const string & LanguageTag::getScript() const {
|
|
Packit |
2b3545 |
return script;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
void LanguageTag::setScript(const string & script) {
|
|
Packit |
2b3545 |
if (script.length() == 4) {
|
|
Packit |
2b3545 |
this->script = script;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
const string & LanguageTag::getPrivateUse() const {
|
|
Packit |
2b3545 |
return privateUse;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
void LanguageTag::setPrivateUse(const string & privateUse) {
|
|
Packit |
2b3545 |
this->privateUse = privateUse;
|
|
Packit |
2b3545 |
for (size_t hyphenPos = this->privateUse.find("-"); hyphenPos != string::npos;
|
|
Packit |
2b3545 |
hyphenPos = this->privateUse.find("-")) {
|
|
Packit |
2b3545 |
this->privateUse.erase(hyphenPos, 1);
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
lowercaseTagPart(this->privateUse);
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
void LanguageTag::setLanguageAndScript(const string & languageAndScript) {
|
|
Packit |
2b3545 |
size_t splitPos = languageAndScript.find("-");
|
|
Packit |
2b3545 |
if (splitPos != string::npos) {
|
|
Packit |
2b3545 |
// TODO: the trailing part is script only if it is four letters long
|
|
Packit |
2b3545 |
// We don't support other components for now so we just accept is as script anyway.
|
|
Packit |
2b3545 |
setLanguage(languageAndScript.substr(0, splitPos));
|
|
Packit |
2b3545 |
setScript(languageAndScript.substr(splitPos + 1));
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
else {
|
|
Packit |
2b3545 |
setLanguage(languageAndScript);
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
void LanguageTag::setBcp47(const string & bcp) {
|
|
Packit |
2b3545 |
size_t splitPos = bcp.find("-x-");
|
|
Packit |
2b3545 |
if (splitPos != string::npos) {
|
|
Packit |
2b3545 |
setLanguageAndScript(bcp.substr(0, splitPos));
|
|
Packit |
2b3545 |
setPrivateUse(bcp.substr(splitPos + 3));
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
else {
|
|
Packit |
2b3545 |
setLanguageAndScript(bcp);
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
string LanguageTag::toBcp47() const {
|
|
Packit |
2b3545 |
string tag = this->language;
|
|
Packit |
2b3545 |
if (!this->script.empty()) {
|
|
Packit |
2b3545 |
tag.append("-");
|
|
Packit |
2b3545 |
tag.append(this->script);
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
if (!this->privateUse.empty()) {
|
|
Packit |
2b3545 |
tag.append("-x-");
|
|
Packit |
2b3545 |
tag.append(this->privateUse);
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
return tag;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
bool operator<(const LanguageTag & l1, const LanguageTag & l2) {
|
|
Packit |
2b3545 |
if (l1.language != l2.language) {
|
|
Packit |
2b3545 |
return l1.language < l2.language;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
if (l1.script != l2.script) {
|
|
Packit |
2b3545 |
return l1.script < l2.script;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
return l1.privateUse < l2.privateUse;
|
|
Packit |
2b3545 |
}
|
|
Packit |
2b3545 |
|
|
Packit |
2b3545 |
} }
|