Blob Blame History Raw
#!@PYTHON@ -s
#
# Copyright (c) 2010-2018 Shaun McCance <shaunm@gnome.org>
#
# ITS Tool program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# ITS Tool is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with ITS Tool; if not, write to the Free Software Foundation, 59 Temple
# Place, Suite 330, Boston, MA  0211-1307  USA.
#
from __future__ import print_function
from __future__ import unicode_literals

VERSION="@VERSION@"
DATADIR="@DATADIR@"

import gettext
import hashlib
import libxml2
import optparse
import os
import os.path
import re
import sys
import time
import io

PY3 = sys.version_info[0] == 3
if PY3:
    string_types = str,
    def ustr(s, encoding=None):
        if isinstance(s, str):
            return s
        elif encoding:
            return str(s, encoding)
        else:
            return str(s)
    ustr_type = str
else:
    string_types = basestring,
    ustr = ustr_type = unicode

NS_ITS = 'http://www.w3.org/2005/11/its'
NS_ITST = 'http://itstool.org/extensions/'
NS_BLANK = 'http://itstool.org/extensions/blank/'
NS_XLINK = 'http://www.w3.org/1999/xlink'
NS_XML = 'http://www.w3.org/XML/1998/namespace'

class NoneTranslations:
    def gettext(self, message):
        return None

    def lgettext(self, message):
        return None

    def ngettext(self, msgid1, msgid2, n):
        return None

    def lngettext(self, msgid1, msgid2, n):
        return None

    def ugettext(self, message):
        return None

    def ungettext(self, msgid1, msgid2, n):
        return None


class MessageList (object):
    def __init__ (self):
        self._messages = []
        self._by_node = {}
        self._has_credits = False

    def add_message (self, message, node):
        self._messages.append (message)
        if node is not None:
            self._by_node[node] = message

    def add_credits(self):
        if self._has_credits:
            return
        msg = Message()
        msg.set_context('_')
        msg.add_text('translator-credits')
        msg.add_comment(Comment('Put one translator per line, in the form NAME <EMAIL>, YEAR1, YEAR2'))
        self._messages.append(msg)
        self._has_credits = True

    def get_message_by_node (self, node):
        return self._by_node.get(node, None)

    def get_nodes_with_messages (self):
        return list(self._by_node.keys())

    def output (self, out):
        msgs = []
        msgdict = {}
        for msg in self._messages:
            key = (msg.get_context(), msg.get_string())
            if key in msgdict:
                for source in msg.get_sources():
                    msgdict[key].add_source(source)
                for marker in msg.get_markers():
                    msgdict[key].add_marker(marker)
                for comment in msg.get_comments():
                    msgdict[key].add_comment(comment)
                for idvalue in msg.get_id_values():
                    msgdict[key].add_id_value(idvalue)
                if msg.get_preserve_space():
                    msgdict[key].set_preserve_space()
                if msg.get_locale_filter() is not None:
                    locale = msgdict[key].get_locale_filter()
                    if locale is not None:
                        msgdict[key].set_locale_filter('%s, %s' % (locale, msg.get_locale_filter()))
                    else:
                        msgdict[key].set_locale_filter(msg.get_locale_filter())

            else:
                msgs.append(msg)
                msgdict[key] = msg
        out.write('msgid ""\n')
        out.write('msgstr ""\n')
        out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n')
        out.write('"POT-Creation-Date: %s\\n"\n' % time.strftime("%Y-%m-%d %H:%M%z"))
        out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n')
        out.write('"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"\n')
        out.write('"Language-Team: LANGUAGE <LL@li.org>\\n"\n')
        out.write('"MIME-Version: 1.0\\n"\n')
        out.write('"Content-Type: text/plain; charset=UTF-8\\n"\n')
        out.write('"Content-Transfer-Encoding: 8bit\\n"\n')
        out.write('\n')
        for msg in msgs:
            out.write(msg.format())
            out.write('\n')


class Comment (object):
    def __init__ (self, text):
        self._text = ustr(text)
        assert(text is not None)
        self._markers = []

    def add_marker (self, marker):
        self._markers.append(marker)

    def get_markers (self):
        return self._markers

    def get_text (self):
        return self._text

    def format (self):
        ret = ''
        markers = {}
        for marker in self._markers:
            if marker not in markers:
                ret += '#. (itstool) comment: ' + marker + '\n'
                markers[marker] = marker
        if '\n' in self._text:
            doadd = False
            for line in self._text.split('\n'):
                if line != '':
                    doadd = True
                if not doadd:
                    continue
                ret += '#. %s\n' % line
        else:
            text = self._text
            while len(text) > 72:
                j = text.rfind(' ', 0, 72)
                if j == -1:
                    j = text.find(' ')
                if j == -1:
                    break
                ret += '#. %s\n' % text[:j]
                text = text[j+1:]
            ret += '#. %s\n' % text
        return ret


class Placeholder (object):
    def __init__ (self, node):
        self.node = node
        self.name = ustr(node.name, 'utf-8')


class Message (object):
    def __init__ (self):
        self._message = []
        self._empty = True
        self._ctxt = None
        self._placeholders = []
        self._sources = []
        self._markers = []
        self._id_values = []
        self._locale_filter = None
        self._comments = []
        self._preserve = False

    def __repr__(self):
        if self._empty:
            return "Empty message"
        return self.get_string()

    def escape (self, text):
        return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")

    def add_text (self, text):
        if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
            self._message.append('')
        if not isinstance(text, ustr_type):
            text = ustr(text, 'utf-8')
        self._message[-1] += text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
        if re.sub('\s+', ' ', text).strip() != '':
            self._empty = False

    def add_entity_ref (self, name):
        self._message.append('&' + name + ';')
        self._empty = False

    def add_placeholder (self, node):
        holder = Placeholder(node)
        self._placeholders.append(holder)
        self._message.append(holder)

    def get_placeholder (self, name):
        placeholder = 1
        for holder in self._placeholders:
            holdername = '%s-%i' % (holder.name, placeholder)
            if holdername == ustr(name, 'utf-8'):
                return holder
            placeholder += 1

    def add_start_tag (self, node):
        if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
            self._message.append('')
        if node.ns() is not None and node.ns().name is not None:
            self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
        else:
            self._message[-1] += ('<%s' % ustr(node.name, 'utf-8'))
        for prop in xml_attr_iter(node):
            name = prop.name
            if prop.ns() is not None:
                name = prop.ns().name + ':' + name
            atval = prop.content
            if not isinstance(atval, ustr_type):
                atval = ustr(atval, 'utf-8')
            atval = atval.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
            self._message += " %s=\"%s\"" % (name, atval)
        if node.children is not None:
            self._message[-1] += '>'
        else:
            self._message[-1] += '/>'

    def add_end_tag (self, node):
        if node.children is not None:
            if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
                self._message.append('')
            if node.ns() is not None and node.ns().name is not None:
                self._message[-1] += ('</%s:%s>' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
            else:
                self._message[-1] += ('</%s>' % ustr(node.name, 'utf-8'))

    def is_empty (self):
        return self._empty

    def get_context (self):
        return self._ctxt

    def set_context (self, ctxt):
        self._ctxt = ctxt

    def add_source (self, source):
        if not isinstance(source, ustr_type):
            source = ustr(source, 'utf-8')
        self._sources.append(source)

    def get_sources (self):
        return self._sources

    def add_marker (self, marker):
        if not isinstance(marker, ustr_type):
            marker = ustr(marker, 'utf-8')
        self._markers.append(marker)

    def get_markers (self):
        return self._markers

    def add_id_value(self, id_value):
        self._id_values.append(id_value)

    def get_id_values(self):
        return self._id_values

    def add_comment (self, comment):
        if comment is not None:
            self._comments.append(comment)

    def get_comments (self):
        return self._comments

    def get_string (self):
        message = ''
        placeholder = 1
        for msg in self._message:
            if isinstance(msg, string_types):
                message += msg
            elif isinstance(msg, Placeholder):
                message += '<_:%s-%i/>' % (msg.name, placeholder)
                placeholder += 1
        if not self._preserve:
            message = re.sub('\s+', ' ', message).strip()
        return message

    def get_preserve_space (self):
        return self._preserve

    def set_preserve_space (self, preserve=True):
        self._preserve = preserve

    def get_locale_filter(self):
        return self._locale_filter

    def set_locale_filter(self, locale):
        self._locale_filter = locale

    def format (self):
        ret = ''
        markers = {}
        for marker in self._markers:
            if marker not in markers:
                ret += '#. (itstool) path: ' + marker + '\n'
                markers[marker] = marker
        for idvalue in self._id_values:
            ret += '#. (itstool) id: ' + idvalue + '\n'
        if self._locale_filter is not None:
            ret += '#. (itstool) ' + self._locale_filter[1] + ' locale: ' + self._locale_filter[0] + '\n'
        comments = []
        commentsdict = {}
        for comment in self._comments:
            key = comment.get_text()
            if key in commentsdict:
                for marker in comment.get_markers():
                    commentsdict[key].add_marker(marker)
            else:
                comments.append(comment)
                commentsdict[key] = comment
        for i in range(len(comments)):
            if i != 0:
                ret += '#.\n'
            ret += comments[i].format()
        for source in self._sources:
            ret += '#: %s\n' % source
        if self._preserve:
            ret += '#, no-wrap\n'
        if self._ctxt is not None:
            ret += 'msgctxt "%s"\n' % self._ctxt
        message = self.get_string()
        if self._preserve:
            ret += 'msgid ""\n'
            lines = message.split('\n')
            for line, no in zip(lines, list(range(len(lines)))):
                if no == len(lines) - 1:
                    ret += '"%s"\n' % self.escape(line)
                else:
                    ret += '"%s\\n"\n' % self.escape(line)
        else:
            ret += 'msgid "%s"\n' % self.escape(message)
        ret += 'msgstr ""\n'
        return ret


def xml_child_iter (node):
    child = node.children
    while child is not None:
        yield child
        child = child.next

def xml_attr_iter (node):
    attr = node.get_properties()
    while attr is not None:
        yield attr
        attr = attr.next

def xml_is_ns_name (node, ns, name):
    if node.type != 'element':
        return False
    return node.name == name and node.ns() is not None and node.ns().content == ns

def xml_get_node_path(node):
    # The built-in nodePath() method only does numeric indexes
    # when necessary for disambiguation. For various reasons,
    # we prefer always using indexes.
    name = node.name
    if node.ns() is not None and node.ns().name is not None:
        name = node.ns().name + ':' + name
    if node.type == 'attribute':
        name = '@' + name
    name = '/' + name
    if node.type == 'element' and node.parent.type == 'element':
        count = 1
        prev = node.previousElementSibling()
        while prev is not None:
            if prev.name == node.name:
                if prev.ns() is None:
                    if node.ns() is None:
                        count += 1
                else:
                    if node.ns() is not None:
                        if prev.ns().name == node.ns().name:
                            count += 1
            prev = prev.previousElementSibling()
        name = '%s[%i]' % (name, count)
    if node.parent.type == 'element':
        name = xml_get_node_path(node.parent) + name
    return name

def xml_error_catcher(doc, error):
    doc._xml_err += " %s" % error

def fix_node_ns (node, nsdefs):
    childnsdefs = nsdefs.copy()
    nsdef = node.nsDefs()
    while nsdef is not None:
        nextnsdef = nsdef.next
        if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content:
            node.removeNsDef(nsdef.content)
        else:
            childnsdefs[nsdef.name] = nsdef.content
        nsdef = nextnsdef
    for child in xml_child_iter(node):
        if child.type == 'element':
            fix_node_ns(child, childnsdefs)


class LocNote (object):
    def __init__(self, locnote=None, locnoteref=None, locnotetype=None, space=False):
        self.locnote = locnote
        self.locnoteref = locnoteref
        self.locnotetype = locnotetype
        if self.locnotetype != 'alert':
            self.locnotetype = 'description'
        self._preserve_space=space

    def __repr__(self):
        if self.locnote is not None:
            if self._preserve_space:
                return self.locnote
            else:
                return re.sub('\s+', ' ', self.locnote).strip()
        elif self.locnoteref is not None:
            return '(itstool) link: ' + re.sub('\s+', ' ', self.locnoteref).strip()
        return ''


class Document (object):
    def __init__ (self, filename, messages, load_dtd=False, keep_entities=False):
        self._xml_err = ''
        libxml2.registerErrorHandler(xml_error_catcher, self)
        try:
            ctxt = libxml2.createFileParserCtxt(filename)
        except:
            sys.stderr.write('Error: cannot open XML file %s\n' % filename)
            sys.exit(1)
        ctxt.lineNumbers(1)
        self._load_dtd = load_dtd
        self._keep_entities = keep_entities
        if load_dtd:
            ctxt.loadSubset(1)
        if keep_entities:
            ctxt.loadSubset(1)
            ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
            ctxt.replaceEntities(0)
        else:
            ctxt.replaceEntities(1)
        ctxt.parseDocument()
        self._filename = filename
        self._doc = ctxt.doc()
        self._localrules = []
        def pre_process (node):
            for child in xml_child_iter(node):
                if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'):
                    if child.nsProp('parse', None) == 'text':
                        child.xincludeProcessTree()
                elif xml_is_ns_name(child, NS_ITS, 'rules'):
                    if child.hasNsProp('href', NS_XLINK):
                        href = child.nsProp('href', NS_XLINK)
                        fileref = os.path.join(os.path.dirname(filename), href)
                        if not os.path.exists(fileref):
                            if opts.itspath is not None:
                                for pathdir in opts.itspath:
                                    fileref = os.path.join(pathdir, href)
                                    if os.path.exists(fileref):
                                        break
                        if not os.path.exists(fileref):
                            sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
                            sys.exit(1)
                        hctxt = libxml2.createFileParserCtxt(fileref)
                        hctxt.replaceEntities(1)
                        hctxt.parseDocument()
                        root = hctxt.doc().getRootElement()
                        version = None
                        if root.hasNsProp('version', None):
                            version = root.nsProp('version', None)
                        else:
                            sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
                                             os.path.basename(href))
                        if version is not None and version not in ('1.0', '2.0'):
                            sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
                                             (os.path.basename(href), root.nsProp('version', None)))
                        else:
                            self._localrules.append(root)
                    version = None
                    if child.hasNsProp('version', None):
                        version = child.nsProp('version', None)
                    else:
                        root = child.doc.getRootElement()
                        if root.hasNsProp('version', NS_ITS):
                            version = root.nsProp('version', NS_ITS)
                        else:
                            sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
                    if version is not None and version not in ('1.0', '2.0'):
                        sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
                                         version)
                    else:
                        self._localrules.append(child)
                pre_process(child)
        pre_process(self._doc)
        try:
            self._check_errors()
        except libxml2.parserError as e:
            sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e))
            sys.exit(1)
        self._msgs = messages
        self._its_translate_nodes = {}
        self._its_within_text_nodes = {}
        self._its_locale_filters = {}
        self._its_id_values = {}
        self._its_loc_notes = {}
        self._its_preserve_space_nodes = {}
        self._itst_drop_nodes = {}
        self._itst_contexts = {}
        self._its_lang = {}
        self._itst_lang_attr = {}
        self._itst_credits = None
        self._its_externals = {}

    def __del__ (self):
        self._doc.freeDoc()

    def _check_errors(self):
        if self._xml_err:
            raise libxml2.parserError(self._xml_err)

    def register_its_params(self, xpath, rules, params={}):
        for child in xml_child_iter(rules):
            if xml_is_ns_name(child, NS_ITS, 'param'):
                name = child.nsProp('name', None)
                if name in params:
                    value = params[name]
                else:
                    value = child.getContent()
                xpath.xpathRegisterVariable(name, None, value)

    def apply_its_rule(self, rule, xpath):
        if rule.type != 'element':
            return
        if xml_is_ns_name(rule, NS_ITS, 'translateRule'):
            if rule.nsProp('selector', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    self._its_translate_nodes[node] = rule.nsProp('translate', None)
        elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'):
            if rule.nsProp('selector', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    self._its_within_text_nodes[node] = rule.nsProp('withinText', None)
        elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'):
            if rule.nsProp('selector', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    val = rule.nsProp('preserveSpace', None)
                    if val == 'yes':
                        self._its_preserve_space_nodes[node] = 'preserve'
        elif xml_is_ns_name(rule, NS_ITS, 'preserveSpaceRule'):
            if rule.nsProp('selector', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    self._its_preserve_space_nodes[node] = rule.nsProp('space', None)
        elif xml_is_ns_name(rule, NS_ITS, 'localeFilterRule'):
            if rule.nsProp('selector', None) is not None:
                if rule.hasNsProp('localeFilterList', None):
                    lst = rule.nsProp('localeFilterList', None)
                else:
                    lst = '*'
                if rule.hasNsProp('localeFilterType', None):
                    typ = rule.nsProp('localeFilterType', None)
                else:
                    typ = 'include'
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    self._its_locale_filters[node] = (lst, typ)
        elif xml_is_ns_name(rule, NS_ITST, 'dropRule'):
            if rule.nsProp('selector', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    self._itst_drop_nodes[node] = rule.nsProp('drop', None)
        elif xml_is_ns_name(rule, NS_ITS, 'idValueRule'):
            sel = rule.nsProp('selector', None)
            idv = rule.nsProp('idValue', None)
            if sel is not None and idv is not None:
                for node in self._try_xpath_eval(xpath, sel):
                    try:
                        oldnode = xpath.contextNode()
                    except:
                        oldnode = None
                    xpath.setContextNode(node)
                    idvalue = self._try_xpath_eval(xpath, idv)
                    if isinstance(idvalue, string_types):
                        self._its_id_values[node] = idvalue
                    else:
                        for val in idvalue:
                            self._its_id_values[node] = val.content
                            break
                    xpath.setContextNode(oldnode)
            pass
        elif xml_is_ns_name(rule, NS_ITST, 'contextRule'):
            if rule.nsProp('selector', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    if rule.hasNsProp('context', None):
                        self._itst_contexts[node] = rule.nsProp('context', None)
                    elif rule.hasNsProp('contextPointer', None):
                        try:
                            oldnode = xpath.contextNode()
                        except:
                            oldnode = None
                        xpath.setContextNode(node)
                        ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None))
                        if isinstance(ctxt, string_types):
                            self._itst_contexts[node] = ctxt
                        else:
                            for ctxt in ctxt:
                                self._itst_contexts[node] = ctxt.content
                                break
                        xpath.setContextNode(oldnode)
        elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'):
            locnote = None
            notetype = rule.nsProp('locNoteType', None)
            for child in xml_child_iter(rule):
                if xml_is_ns_name(child, NS_ITS, 'locNote'):
                    locnote = LocNote(locnote=child.content, locnotetype=notetype)
                    break
            if locnote is None:
                if rule.hasNsProp('locNoteRef', None):
                    locnote = LocNote(locnoteref=rule.nsProp('locNoteRef', None), locnotetype=notetype)
            if rule.nsProp('selector', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    if locnote is not None:
                        self._its_loc_notes.setdefault(node, []).append(locnote)
                    else:
                        if rule.hasNsProp('locNotePointer', None):
                            sel = rule.nsProp('locNotePointer', None)
                            ref = False
                        elif rule.hasNsProp('locNoteRefPointer', None):
                            sel = rule.nsProp('locNoteRefPointer', None)
                            ref = True
                        else:
                            continue
                        try:
                            oldnode = xpath.contextNode()
                        except:
                            oldnode = None
                        xpath.setContextNode(node)
                        note = self._try_xpath_eval(xpath, sel)
                        if isinstance(note, string_types):
                            if ref:
                                nodenote = LocNote(locnoteref=note, locnotetype=notetype)
                            else:
                                nodenote = LocNote(locnote=note, locnotetype=notetype)
                            self._its_loc_notes.setdefault(node, []).append(nodenote)
                        else:
                            for note in note:
                                if ref:
                                    nodenote = LocNote(locnoteref=note.content, locnotetype=notetype)
                                else:
                                    nodenote = LocNote(locnote=note.content, locnotetype=notetype,
                                                       space=self.get_preserve_space(note))
                                self._its_loc_notes.setdefault(node, []).append(nodenote)
                                break
                        xpath.setContextNode(oldnode)
        elif xml_is_ns_name(rule, NS_ITS, 'langRule'):
            if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
                    try:
                        oldnode = xpath.contextNode()
                    except:
                        oldnode = None
                    xpath.setContextNode(node)
                    res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None))
                    if len(res) > 0:
                        self._its_lang[node] = res[0].content
                    # We need to construct language attributes, not just read
                    # language information. Technically, langPointer could be
                    # any XPath expression. But if it looks like an attribute
                    # accessor, just use the attribute name.
                    if rule.nsProp('langPointer', None)[0] == '@':
                        self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:]
                    xpath.setContextNode(oldnode)
        elif xml_is_ns_name(rule, NS_ITST, 'credits'):
            if rule.nsProp('appendTo', None) is not None:
                for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)):
                    self._itst_credits = (node, rule)
                    break
        elif (xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule') or
              xml_is_ns_name(rule, NS_ITST, 'externalRefRule')):
            sel = rule.nsProp('selector', None)
            if xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule'):
                ptr = rule.nsProp('externalResourceRefPointer', None)
            else:
                ptr = rule.nsProp('refPointer', None)
            if sel is not None and ptr is not None:
                for node in self._try_xpath_eval(xpath, sel):
                    try:
                        oldnode = xpath.contextNode()
                    except:
                        oldnode = None
                    xpath.setContextNode(node)
                    res = self._try_xpath_eval(xpath, ptr)
                    if len(res) > 0:
                        self._its_externals[node] = res[0].content
                    xpath.setContextNode(oldnode)

    def apply_its_rules(self, builtins, params={}):
        if builtins:
            dirs = []
            ddir = os.getenv('XDG_DATA_HOME', '')
            if ddir == '':
                ddir = os.path.join(os.path.expanduser('~'), '.local', 'share')
            dirs.append(ddir)
            ddir = os.getenv('XDG_DATA_DIRS', '')
            if ddir == '':
                if DATADIR not in ('/usr/local/share', '/usr/share'):
                    ddir += DATADIR + ':'
                ddir += '/usr/local/share:/usr/share'
            dirs.extend(ddir.split(':'))
            ddone = {}
            for ddir in dirs:
                itsdir = os.path.join(ddir, 'itstool', 'its')
                if not os.path.exists(itsdir):
                    continue
                for dfile in os.listdir(itsdir):
                    if dfile.endswith('.its'):
                        if not ddone.get(dfile, False):
                            self.apply_its_file(os.path.join(itsdir, dfile), params=params)
                            ddone[dfile] = True
        self.apply_local_its_rules(params=params)

    def apply_its_file(self, filename, params={}):
        doc = libxml2.parseFile(filename)
        root = doc.getRootElement()
        if not xml_is_ns_name(root, NS_ITS, 'rules'):
            return
        version = None
        if root.hasNsProp('version', None):
            version = root.nsProp('version', None)
        else:
            sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
                             os.path.basename(filename))
        if version is not None and version not in ('1.0', '2.0'):
            sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
                             (os.path.basename(filename), root.nsProp('version', None)))
            return
        matched = True
        for match in xml_child_iter(root):
            if xml_is_ns_name(match, NS_ITST, 'match'):
                matched = False
                xpath = self._doc.xpathNewContext()
                par = match
                nss = {}
                while par is not None:
                    nsdef = par.nsDefs()
                    while nsdef is not None:
                        if nsdef.name is not None:
                            if nsdef.name not in nss:
                                nss[nsdef.name] = nsdef.content
                                xpath.xpathRegisterNs(nsdef.name, nsdef.content)
                        nsdef = nsdef.next
                    par = par.parent
                if match.hasNsProp('selector', None):
                    if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0:
                        matched = True
                        break
        if matched == False:
            return
        for rule in xml_child_iter(root):
            xpath = self._doc.xpathNewContext()
            par = match
            nss = {}
            while par is not None:
                nsdef = par.nsDefs()
                while nsdef is not None:
                    if nsdef.name is not None:
                        if nsdef.name not in nss:
                            nss[nsdef.name] = nsdef.content
                            xpath.xpathRegisterNs(nsdef.name, nsdef.content)
                    nsdef = nsdef.next
                par = par.parent
            self.register_its_params(xpath, root, params=params)
            self.apply_its_rule(rule, xpath)

    def apply_local_its_rules(self, params={}):
        for rules in self._localrules:
            def reg_ns(xpath, node):
                if node.parent is not None:
                    reg_ns(xpath, node.parent)
                nsdef = node.nsDefs()
                while nsdef is not None:
                    if nsdef.name is not None:
                        xpath.xpathRegisterNs(nsdef.name, nsdef.content)
                    nsdef = nsdef.next
            xpath = self._doc.xpathNewContext()
            reg_ns(xpath, rules)
            self.register_its_params(xpath, rules, params=params)
            for rule in xml_child_iter(rules):
                if rule.type != 'element':
                    continue
                if rule.nsDefs() is not None:
                    rule_xpath = self._doc.xpathNewContext()
                    reg_ns(rule_xpath, rule)
                    self.register_its_params(rule_xpath, rules, params=params)
                else:
                    rule_xpath = xpath
                self.apply_its_rule(rule, rule_xpath)

    def _append_credits(self, parent, node, trdata):
        if xml_is_ns_name(node, NS_ITST, 'for-each'):
            select = node.nsProp('select', None)
            if select == 'years':
                for year in trdata[2].split(','):
                    for child in xml_child_iter(node):
                        self._append_credits(parent, child, trdata + (year.strip(),))
        elif xml_is_ns_name(node, NS_ITST, 'value-of'):
            select = node.nsProp('select', None)
            val = None
            if select == 'name':
                val = trdata[0]
            elif select == 'email':
                val = trdata[1]
            elif select == 'years':
                val = trdata[2]
            elif select == 'year' and len(trdata) == 4:
                val = trdata[3]
            if val is not None:
                if not PY3:
                    val = val.encode('utf-8')
                parent.addContent(val)
        else:
            newnode = node.copyNode(2)
            parent.addChild(newnode)
            for child in xml_child_iter(node):
                self._append_credits(newnode, child, trdata)

    def merge_credits(self, translations, language, node):
        if self._itst_credits is None:
            return
        # Dear Python, please implement pgettext.
        # http://bugs.python.org/issue2504
        # Sincerely, Shaun
        trans = translations.ugettext('_\x04translator-credits')
        if trans is None or trans == 'translator-credits':
            return
        regex = re.compile('(.*) \<(.*)\>, (.*)')
        for credit in trans.split('\n'):
            match = regex.match(credit)
            if not match:
                continue
            trdata = match.groups()
            for node in xml_child_iter(self._itst_credits[1]):
                self._append_credits(self._itst_credits[0], node, trdata)

    def join_translations(self, translations, node=None, strict=False):
        is_root = False
        if node is None:
            is_root = True
            self.generate_messages(comments=False)
            node = self._doc.getRootElement()
        if node is None or node.type != 'element':
            return
        if self.get_itst_drop(node) == 'yes':
            prev = node.prev
            node.unlinkNode()
            node.freeNode()
            if prev is not None and prev.isBlankNode():
                prev.unlinkNode()
                prev.freeNode()
            return
        msg = self._msgs.get_message_by_node(node)
        if msg is None:
            self.translate_attrs(node, node)
            children = [child for child in xml_child_iter(node)]
            for child in children:
                self.join_translations(translations, node=child, strict=strict)
        else:
            prevnode = None
            if node.prev is not None and node.prev.type == 'text':
                prevtext = node.prev.content
                if re.sub('\s+', '', prevtext) == '':
                    prevnode = node.prev
            for lang in sorted(list(translations.keys()), reverse=True):
                locale = self.get_its_locale_filter(node)
                lmatch = match_locale_list(locale[0], lang)
                if (locale[1] == 'include' and not lmatch) or (locale[1] == 'exclude' and lmatch):
                    continue
                newnode = self.get_translated(node, translations[lang], strict=strict, lang=lang)
                if newnode != node:
                    newnode.setProp('xml:lang', lang)
                    node.addNextSibling(newnode)
                    if prevnode is not None:
                        node.addNextSibling(prevnode.copyNode(0))
        if is_root:
            # Because of the way we create nodes and rewrite the document,
            # we end up with lots of redundant namespace definitions. We
            # kill them off in one fell swoop at the end.
            fix_node_ns(node, {})
            self._check_errors()

    def merge_translations(self, translations, language, node=None, strict=False):
        is_root = False
        if node is None:
            is_root = True
            self.generate_messages(comments=False)
            node = self._doc.getRootElement()
        if node is None or node.type != 'element':
            return
        drop = False
        locale = self.get_its_locale_filter(node)
        if locale[1] == 'include':
            if locale[0] != '*':
                if not match_locale_list(locale[0], language):
                    drop = True
        elif locale[1] == 'exclude':
            if match_locale_list(locale[0], language):
                drop = True
        if self.get_itst_drop(node) == 'yes' or drop:
            prev = node.prev
            node.unlinkNode()
            node.freeNode()
            if prev is not None and prev.isBlankNode():
                prev.unlinkNode()
                prev.freeNode()
            return
        if is_root:
            self.merge_credits(translations, language, node)
        msg = self._msgs.get_message_by_node(node)
        if msg is None:
            self.translate_attrs(node, node)
            children = [child for child in xml_child_iter(node)]
            for child in children:
                self.merge_translations(translations, language, node=child, strict=strict)
        else:
            newnode = self.get_translated(node, translations, strict=strict, lang=language)
            if newnode != node:
                self.translate_attrs(node, newnode)
                node.replaceNode(newnode)
        if is_root:
            # Apply language attributes to untranslated nodes. We don't do
            # this before processing, because then these attributes would
            # be copied into the new nodes. We apply the attribute without
            # checking whether it was translated, because any that were will
            # just be floating around, unattached to a document.
            for lcnode in self._msgs.get_nodes_with_messages():
                attr = self._itst_lang_attr.get(lcnode)
                if attr is None:
                    continue
                origlang = None
                lcpar = lcnode
                while lcpar is not None:
                    origlang = self._its_lang.get(lcpar)
                    if origlang is not None:
                        break
                    lcpar = lcpar.parent
                if origlang is not None:
                    lcnode.setProp(attr, origlang)
            # And then set the language attribute on the root node.
            if language is not None:
                attr = self._itst_lang_attr.get(node)
                if attr is not None:
                    node.setProp(attr, language)
            # Because of the way we create nodes and rewrite the document,
            # we end up with lots of redundant namespace definitions. We
            # kill them off in one fell swoop at the end.
            fix_node_ns(node, {})
            self._check_errors()

    def translate_attrs(self, oldnode, newnode):
        trans_attrs = [attr for attr in xml_attr_iter(oldnode) if self._its_translate_nodes.get(attr, 'no') == 'yes']
        for attr in trans_attrs:
            srccontent = attr.get_content()
            if not PY3:
                srccontent = srccontent.decode('utf-8')
            newcontent = translations.ugettext(srccontent)
            if newcontent:
                if not PY3:
                    newcontent = newcontent.encode('utf-8')
                newnode.setProp(attr.name, newcontent)

    def get_translated (self, node, translations, strict=False, lang=None):
        msg = self._msgs.get_message_by_node(node)
        if msg is None:
            return node
        msgstr = msg.get_string()
        # Dear Python, please implement pgettext.
        # http://bugs.python.org/issue2504
        # Sincerely, Shaun
        if msg.get_context() is not None:
            msgstr = msg.get_context() + '\x04' + msgstr
        trans = translations.ugettext(msgstr)
        if trans is None:
            return node
        nss = {}
        def reg_ns(node, nss):
            if node.parent is not None:
                reg_ns(node.parent, nss)
            nsdef = node.nsDefs()
            while nsdef is not None:
                nss[nsdef.name] = nsdef.content
                nsdef = nsdef.next
        reg_ns(node, nss)
        nss['_'] = NS_BLANK
        try:
            blurb = node.doc.intSubset().serialize('utf-8')
        except Exception:
            blurb = ''
        blurb += '<' + ustr(node.name, 'utf-8')
        for nsname in list(nss.keys()):
            if nsname is None:
                blurb += ' xmlns="%s"' % nss[nsname]
            else:
                blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname])
        blurb += '>%s</%s>' % (trans, ustr(node.name, 'utf-8'))
        if not PY3:
            blurb = blurb.encode('utf-8')
        ctxt = libxml2.createDocParserCtxt(blurb)
        if self._load_dtd:
            ctxt.loadSubset(1)
        if self._keep_entities:
            ctxt.loadSubset(1)
            ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
            ctxt.replaceEntities(0)
        else:
            ctxt.replaceEntities(1)
        ctxt.parseDocument()
        trnode = ctxt.doc().getRootElement()
        try:
            self._check_errors()
        except libxml2.parserError:
            if strict:
                raise
            else:
                sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
                        (lang + ' ') if lang is not None else '',
                        msgstr.encode('utf-8')))
                self._xml_err = ''
                return node
        def scan_node(node):
            children = [child for child in xml_child_iter(node)]
            for child in children:
                if child.type != 'element':
                    continue
                if child.ns() is not None and child.ns().content == NS_BLANK:
                    ph_node = msg.get_placeholder(child.name).node
                    if self.has_child_elements(ph_node):
                        self.merge_translations(translations, None, ph_node, strict=strict)
                        newnode = ph_node.copyNode(1)
                        newnode.setTreeDoc(self._doc)
                        child.replaceNode(newnode)
                    else:
                        repl = self.get_translated(ph_node, translations, strict=strict, lang=lang)
                        child.replaceNode(repl)
                scan_node(child)
        try:
            scan_node(trnode)
        except:
            if strict:
                raise
            else:
                sys.stderr.write('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
                    (lang + ' ') if lang is not None else '',
                    msgstr.encode('utf-8')))
                self._xml_err = ''
                ctxt.doc().freeDoc()
                return node
        retnode = node.copyNode(2)
        retnode.setTreeDoc(self._doc)
        for child in xml_child_iter(trnode):
            newnode = child.copyNode(1)
            newnode.setTreeDoc(self._doc)
            retnode.addChild(newnode)

        ctxt.doc().freeDoc()
        return retnode

    def generate_messages(self, comments=True):
        if self._itst_credits is not None:
            self._msgs.add_credits()
        for child in xml_child_iter(self._doc):
            if child.type == 'element':
                self.generate_message(child, None, comments=comments)
                break

    def generate_message (self, node, msg, comments=True, path=None):
        if node.type in ('text', 'cdata') and msg is not None:
            msg.add_text(node.content)
            return
        if node.type == 'entity_ref':
            msg.add_entity_ref(node.name);
        if node.type != 'element':
            return
        if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
            return
        if self._itst_drop_nodes.get(node, 'no') == 'yes':
            return
        locfil = self.get_its_locale_filter(node)
        if locfil == ('', 'include') or locfil == ('*', 'exclude'):
            return
        if path is None:
            path = ''
        translate = self.get_its_translate(node)
        withinText = False
        if translate == 'no':
            if msg is not None:
                msg.add_placeholder(node)
            is_unit = False
            msg = None
        else:
            is_unit = msg is None or self.is_translation_unit(node)
            if is_unit:
                if msg is not None:
                    msg.add_placeholder(node)
                msg = Message()
                ctxt = None
                if node.hasNsProp('context', NS_ITST):
                    ctxt = node.nsProp('context', NS_ITST)
                if ctxt is None:
                    ctxt = self._itst_contexts.get(node)
                if ctxt is not None:
                    msg.set_context(ctxt)
                idvalue = self.get_its_id_value(node)
                if idvalue is not None:
                    basename = os.path.basename(self._filename)
                    msg.add_id_value(basename + '#' + idvalue)
                if self.get_preserve_space(node):
                    msg.set_preserve_space()
                if self.get_its_locale_filter(node) != ('*', 'include'):
                    msg.set_locale_filter(self.get_its_locale_filter(node))
                msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
                msg.add_marker('%s/%s' % (ustr(node.parent.name, 'utf-8'), ustr(node.name, 'utf-8')))
            else:
                withinText = True
                msg.add_start_tag(node)

        if not withinText:
            # Add msg for translatable node attributes
            for attr in xml_attr_iter(node):
                if self._its_translate_nodes.get(attr, 'no') == 'yes':
                    attr_msg = Message()
                    if self.get_preserve_space(attr):
                        attr_msg.set_preserve_space()
                    attr_msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
                    attr_msg.add_marker('%s/%s@%s' % (node.parent.name, node.name, attr.name))
                    attr_msg.add_text(attr.content)
                    if comments:
                        for locnote in self.get_its_loc_notes(attr):
                            comment = Comment(locnote)
                            comment.add_marker ('%s/%s@%s' % (
                                    node.parent.name, node.name, attr.name))
                            attr_msg.add_comment(comment)
                    self._msgs.add_message(attr_msg, attr)

        if comments and msg is not None:
            cnode = node
            while cnode is not None:
                hasnote = False
                for locnote in self.get_its_loc_notes(cnode, inherit=(not withinText)):
                    comment = Comment(locnote)
                    if withinText:
                        comment.add_marker('.%s/%s' % (path, cnode.name))
                    msg.add_comment(comment)
                    hasnote = True
                if hasnote or not is_unit:
                    break
                cnode = cnode.parent

        self.generate_external_resource_message(node)
        for attr in xml_attr_iter(node):
            self.generate_external_resource_message(attr)
            idvalue = self.get_its_id_value(attr)
            if idvalue is not None:
                basename = os.path.basename(self._filename)
                msg.add_id_value(basename + '#' + idvalue)

        if withinText:
            path = path + '/' + node.name
        for child in xml_child_iter(node):
            self.generate_message(child, msg, comments=comments, path=path)

        if translate:
            if is_unit and not msg.is_empty():
                self._msgs.add_message(msg, node)
            elif msg is not None:
                msg.add_end_tag(node)

    def generate_external_resource_message(self, node):
        if node not in self._its_externals:
            return
        resref = self._its_externals[node]
        if node.type == 'element':
            translate = self.get_its_translate(node)
            marker = '%s/%s' % (node.parent.name, node.name)
        else:
            translate = self.get_its_translate(node.parent)
            marker = '%s/%s/@%s' % (node.parent.parent.name, node.parent.name, node.name)
        if translate == 'no':
            return
        msg = Message()
        try:
            fullfile = os.path.join(os.path.dirname(self._filename), resref)
            filefp = open(fullfile, 'rb')
            filemd5 = hashlib.md5(filefp.read()).hexdigest()
            filefp.close()
        except Exception:
            filemd5 = '__failed__'
        txt = "external ref='%s' md5='%s'" % (resref, filemd5)
        msg.set_context('_')
        msg.add_text(txt)
        msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
        msg.add_marker(marker)
        msg.add_comment(Comment('This is a reference to an external file such as an image or'
                                ' video. When the file changes, the md5 hash will change to'
                                ' let you know you need to update your localized copy. The'
                                ' msgstr is not used at all. Set it to whatever you like'
                                ' once you have updated your copy of the file.'))
        self._msgs.add_message(msg, None)

    def is_translation_unit (self, node):
        return self.get_its_within_text(node) != 'yes'

    def has_child_elements(self, node):
        return len([child for child in xml_child_iter(node) if child.type=='element'])

    def get_preserve_space (self, node):
        while node.type in ('attribute', 'element'):
            if node.getSpacePreserve() == 1:
                return True
            if node in self._its_preserve_space_nodes:
                return (self._its_preserve_space_nodes[node] == 'preserve')
            node = node.parent
        return False

    def get_its_translate(self, node):
        val = None
        if node.hasNsProp('translate', NS_ITS):
            val = node.nsProp('translate', NS_ITS)
        elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('translate', None):
            val = node.nsProp('translate', None)
        elif node in self._its_translate_nodes:
            val = self._its_translate_nodes[node]
        if val is not None:
            return val
        if node.type == 'attribute':
            return 'no'
        if node.parent.type == 'element':
            return self.get_its_translate(node.parent)
        return 'yes'

    def get_its_within_text(self, node):
        if node.hasNsProp('withinText', NS_ITS):
            val = node.nsProp('withinText', NS_ITS)
        elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('withinText', None):
            val = node.nsProp('withinText', None)
        else:
            return self._its_within_text_nodes.get(node, 'no')
        if val in ('yes', 'nested'):
            return val
        return 'no'

    def get_its_locale_filter(self, node):
        if node.hasNsProp('localeFilterList', NS_ITS) or node.hasNsProp('localeFilterType', NS_ITS):
            if node.hasNsProp('localeFilterList', NS_ITS):
                lst = node.nsProp('localeFilterList', NS_ITS)
            else:
                lst = '*'
            if node.hasNsProp('localeFilterType', NS_ITS):
                typ = node.nsProp('localeFilterType', NS_ITS)
            else:
                typ = 'include'
            return (lst, typ)
        if (xml_is_ns_name(node, NS_ITS, 'span') and
            (node.hasNsProp('localeFilterList', None) or node.hasNsProp('localeFilterType', None))):
            if node.hasNsProp('localeFilterList', None):
                lst = node.nsProp('localeFilterList', None)
            else:
                lst = '*'
            if node.hasNsProp('localeFilterType', None):
                typ = node.nsProp('localeFilterType', None)
            else:
                typ = 'include'
            return (lst, typ)
        if node in self._its_locale_filters:
            return self._its_locale_filters[node]
        if node.parent.type == 'element':
            return self.get_its_locale_filter(node.parent)
        return ('*', 'include')

    def get_itst_drop(self, node):
        if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
            return 'yes'
        if self._itst_drop_nodes.get(node, 'no') == 'yes':
            return 'yes'
        return 'no'

    def get_its_id_value(self, node):
        if node.hasNsProp('id', NS_XML):
            return node.nsProp('id', NS_XML)
        return self._its_id_values.get(node, None)

    def get_its_loc_notes(self, node, inherit=True):
        ret = []
        if node.hasNsProp('locNote', NS_ITS) or node.hasNsProp('locNoteRef', NS_ITS) or node.hasNsProp('locNoteType', NS_ITS):
            notetype = node.nsProp('locNoteType', NS_ITS)
            if node.hasNsProp('locNote', NS_ITS):
                ret.append(LocNote(locnote=node.nsProp('locNote', NS_ITS), locnotetype=notetype))
            elif node.hasNsProp('locNoteRef', NS_ITS):
                ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', NS_ITS), locnotetype=notetype))
        elif  xml_is_ns_name(node, NS_ITS, 'span'):
            if node.hasNsProp('locNote', None) or node.hasNsProp('locNoteRef', None) or node.hasNsProp('locNoteType', None):
                notetype = node.nsProp('locNoteType', None)
                if node.hasNsProp('locNote', None):
                    ret.append(LocNote(locnote=node.nsProp('locNote', None), locnotetype=notetype))
                elif node.hasNsProp('locNoteRef', None):
                    ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', None), locnotetype=notetype))
        for locnote in reversed(self._its_loc_notes.get(node, [])):
            ret.append(locnote)
        if (len(ret) == 0 and inherit and
            node.type != 'attribute' and node.parent is not None and node.parent.type == 'element'):
            return self.get_its_loc_notes(node.parent)
        return ret

    def output_test_data(self, category, out, node=None):
        if node is None:
            node = self._doc.getRootElement()
        compval = ''
        if category == 'translate':
            compval = 'translate="%s"' % self.get_its_translate(node)
        elif category == 'withinText':
            if node.type != 'attribute':
                compval = 'withinText="%s"' % self.get_its_within_text(node)
        elif category == 'localeFilter':
            compval = 'localeFilterList="%s"\tlocaleFilterType="%s"' % self.get_its_locale_filter(node)
        elif category == 'locNote':
            val = self.get_its_loc_notes(node)
            if len(val) > 0:
                if val[0].locnote is not None:
                    compval = 'locNote="%s"\tlocNoteType="%s"' % (ustr(val[0]), val[0].locnotetype)
                elif val[0].locnoteref is not None:
                    compval = 'locNoteRef="%s"\tlocNoteType="%s"' % (val[0].locnoteref, val[0].locnotetype)
        elif category == 'externalResourceRef':
            val = self._its_externals.get(node, '')
            if val != '':
                compval = 'externalResourceRef="%s"' % val
        elif category == 'idValue':
            val = self.get_its_id_value(node)
            if val is not None:
                compval = 'idValue="%s"' % val
        elif category == 'preserveSpace':
            if self.get_preserve_space(node):
                compval = 'space="preserve"'
            else:
                compval = 'space="default"'
        else:
            sys.stderr.write('Error: Unrecognized category %s\n' % category)
            sys.exit(1)
        if compval != '':
            out.write('%s\t%s\r\n' % (xml_get_node_path(node), compval))
        else:
            out.write('%s\r\n' % (xml_get_node_path(node)))
        for attr in sorted(xml_attr_iter(node), key=ustr):
            self.output_test_data(category, out, attr)
        for child in xml_child_iter(node):
            if child.type == 'element':
                self.output_test_data(category, out, child)

    @staticmethod
    def _try_xpath_eval (xpath, expr):
        try:
            return xpath.xpathEval(expr)
        except:
            sys.stderr.write('Warning: Invalid XPath: %s\n' % expr)
            return []

def match_locale_list(extranges, locale):
    if extranges.strip() == '':
        return False
    for extrange in [extrange.strip() for extrange in extranges.split(',')]:
        if match_locale(extrange, locale):
            return True
    return False

def match_locale(extrange, locale):
    # Extended filtering for extended language ranges as
    # defined by RFC4647, part of BCP47.
    # http://tools.ietf.org/html/rfc4647#section-3.3.2
    rangelist = [x.lower() for x in extrange.split('-')]
    localelist = [x.lower() for x in locale.split('-')]
    if rangelist[0] not in ('*', localelist[0]):
        return False
    rangei = localei = 0
    while rangei < len(rangelist):
        if rangelist[rangei] == '*':
            rangei += 1
            continue
        if localei >= len(localelist):
            return False
        if rangelist[rangei] in ('*', localelist[localei]):
            rangei += 1
            localei += 1
            continue
        if len(localelist[localei]) == 1:
            return False
        localei += 1
    return True

_locale_pattern = re.compile('([a-zA-Z0-9-]+)(_[A-Za-z0-9]+)?(@[A-Za-z0-9]+)?(\.[A-Za-z0-9]+)?')
def convert_locale (locale):
    # Automatically convert POSIX-style locales to BCP47
    match = _locale_pattern.match(locale)
    if match is None:
        return locale
    ret = match.group(1).lower()
    variant = match.group(3)
    if variant == '@cyrillic':
        ret += '-Cyrl'
        variant = None
    if variant == '@devanagari':
        ret += '-Deva'
        variant = None
    elif variant == '@latin':
        ret += '-Latn'
        variant = None
    elif variant == '@shaw':
        ret += '-Shaw'
        variant = None
    if match.group(2) is not None:
        ret += '-' + match.group(2)[1:].upper()
    if variant is not None and variant != '@euro':
        ret += '-' + variant[1:].lower()
    return ret


if __name__ == '__main__':
    options = optparse.OptionParser()
    options.set_usage('\n  itstool [OPTIONS] [XMLFILES]\n' +
                      '  itstool -m <MOFILE> [OPTIONS] [XMLFILES]\n' +
                      '  itstool -j <XMLFILE> [OPTIONS] [MOFILES]')
    options.add_option('-i', '--its',
                       action='append',
                       dest='itsfile',
                       metavar='ITS',
                       help='Load the ITS rules in the file ITS (can specify multiple times)')
    options.add_option('-l', '--lang',
                       dest='lang',
                       default=None,
                       metavar='LANGUAGE',
                       help='Explicitly set the language code for output file')
    options.add_option('-j', '--join',
                       dest='join',
                       metavar='FILE',
                       help='Join multiple MO files with the XML file FILE and output XML file')
    options.add_option('-m', '--merge',
                       dest='merge',
                       metavar='FILE',
                       help='Merge from a PO or MO file FILE and output XML files')
    options.add_option('-n', '--no-builtins',
                       action='store_true',
                       dest='nobuiltins',
                       default=False,
                       help='Do not apply the built-in ITS rules')
    options.add_option('-o', '--output',
                       dest='output',
                       default=None,
                       metavar='OUT',
                       help='Output PO files to file OUT or XML files in directory OUT')
    options.add_option('--path',
                       action='append',
                       dest='itspath',
                       default=None,
                       metavar='PATHS',
                       help='Extra path where ITS files may be found (can specify multiple times)')
    options.add_option('-s', '--strict',
                       action='store_true',
                       dest='strict',
                       default=False,
                       help='Exit with error when PO files contain broken XML')
    options.add_option('-d', '--load-dtd',
                       action='store_true',
                       dest='load_dtd',
                       default=False,
                       help='Load external DTDs used by input XML')
    options.add_option('-k', '--keep-entities',
                       action='store_true',
                       dest='keep_entities',
                       default=False,
                       help='Keep entity reference unexpanded')
    options.add_option('-p', '--param',
                       action='append',
                       dest='params',
                       default=[],
                       nargs=2,
                       metavar='NAME VALUE',
                       help='Define the ITS parameter NAME to the value VALUE (can specify multiple times)')
    options.add_option('-t', '--test',
                       dest='test',
                       default=None,
                       metavar='CATEGORY',
                       help='Generate conformance test output for CATEGORY')
    options.add_option('-v', '--version',
                       action='store_true',
                       dest='version',
                       default=False,
                       help='Print itstool version and exit')
    (opts, args) = options.parse_args(sys.argv)

    if opts.version:
        print('itstool %s' % VERSION)
        sys.exit(0)

    params = {}
    for name, value in opts.params:
        params[name] = value

    if opts.merge is None and opts.join is None:
        messages = MessageList()
        for filename in args[1:]:
            doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities)
            doc.apply_its_rules(not(opts.nobuiltins), params=params)
            if opts.itsfile is not None:
                for itsfile in opts.itsfile:
                    doc.apply_its_file(itsfile, params=params)
            if opts.test is None:
                doc.generate_messages()
        if opts.output is None or opts.output == '-':
            out = sys.stdout
        else:
            try:
                out = io.open(opts.output, 'wt', encoding='utf-8')
            except:
                sys.stderr.write('Error: Cannot write to file %s\n' % opts.output)
                sys.exit(1)
        if opts.test is not None:
            doc.output_test_data(opts.test, out)
        else:
            messages.output(out)
            out.flush()
    elif opts.merge is not None:
        try:
            translations = gettext.GNUTranslations(open(opts.merge, 'rb'))
        except:
            sys.stderr.write('Error: cannot open mo file %s\n' % opts.merge)
            sys.exit(1)
        if PY3:
            translations.ugettext = translations.gettext
        translations.add_fallback(NoneTranslations())
        if opts.lang is None:
            opts.lang = convert_locale(os.path.splitext(os.path.basename(opts.merge))[0])
        if opts.output is None:
            out = './'
        elif os.path.isdir(opts.output):
            out = opts.output
        elif len(args) == 2:
            if opts.output == '-':
                out = sys.stdout
            else:
                out = open(opts.output, 'wb')
        else:
            sys.stderr.write('Error: Non-directory output for multiple files\n')
            sys.exit(1)
        for filename in args[1:]:
            messages = MessageList()
            doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities)
            doc.apply_its_rules(not(opts.nobuiltins), params=params)
            if opts.itsfile is not None:
                for itsfile in opts.itsfile:
                    doc.apply_its_file(itsfile, params=params)
            try:
                doc.merge_translations(translations, opts.lang, strict=opts.strict)
            except Exception as e:
                raise
                sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e))
                sys.exit(1)
            serialized = doc._doc.serialize('utf-8')
            if PY3:
                # For some reason, under py3, our serialized data is returns as a str.
                # Let's encode it to bytes
                serialized = serialized.encode('utf-8')
            fout = out
            fout_is_str = isinstance(fout, string_types)
            if fout_is_str:
                fout = open(os.path.join(fout, os.path.basename(filename)), 'wb')
            fout.write(serialized)
            fout.flush()
            if fout_is_str:
                fout.close()
    elif opts.join is not None:
        translations = {}
        for filename in args[1:]:
            try:
                thistr = gettext.GNUTranslations(open(filename, 'rb'))
            except:
                sys.stderr.write('Error: cannot open mo file %s\n' % filename)
                sys.exit(1)
            thistr.add_fallback(NoneTranslations())
            if PY3:
                thistr.ugettext = thistr.gettext
            lang = convert_locale(os.path.splitext(os.path.basename(filename))[0])
            translations[lang] = thistr
        if opts.output is None:
            out = sys.stdout
        elif os.path.isdir(opts.output):
            out = open(os.path.join(opts.output, os.path.basename(filename)), 'wb')
        else:
            out = open(opts.output, 'wb')
        messages = MessageList()
        doc = Document(opts.join, messages)
        doc.apply_its_rules(not(opts.nobuiltins), params=params)
        if opts.itsfile is not None:
            for itsfile in opts.itsfile:
                doc.apply_its_file(itsfile, params=params)
        doc.join_translations(translations, strict=opts.strict)
        serialized = doc._doc.serialize('utf-8')
        if PY3:
            # For some reason, under py3, our serialized data is returns as a str.
            # Let's encode it to bytes
            serialized = serialized.encode('utf-8')
        out.write(serialized)
        out.flush()