Tree - source-git/python-lxml - CentOS Git server

source-git / python-lxml

Files

Commit: d9acb67c9b615a0ad85ff8316fa45a8f0c5dd043
Blob Blame History Raw
# cython: binding=True
# cython: auto_pickle=False

"""
The ``lxml.objectify`` module implements a Python object API for XML.
It is based on `lxml.etree`.
"""

from __future__ import absolute_import

cimport cython

from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup
from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode
from lxml.includes.tree cimport const_xmlChar, _xcstr
from lxml cimport python
from lxml.includes cimport tree

cimport lxml.includes.etreepublic as cetree
cimport libc.string as cstring_h   # not to be confused with stdlib 'string'
from libc.string cimport const_char

__all__ = [u'BoolElement', u'DataElement', u'E', u'Element', u'ElementMaker',
           u'FloatElement', u'IntElement', u'LongElement', u'NoneElement',
           u'NumberElement', u'ObjectPath', u'ObjectifiedDataElement',
           u'ObjectifiedElement', u'ObjectifyElementClassLookup',
           u'PYTYPE_ATTRIBUTE', u'PyType', u'StringElement', u'SubElement',
           u'XML', u'annotate', u'deannotate', u'dump', u'enable_recursive_str',
           u'fromstring', u'getRegisteredTypes', u'makeparser', u'parse',
           u'pyannotate', u'pytypename', u'set_default_parser',
           u'set_pytype_attribute_tag', u'xsiannotate']

cdef object etree
from lxml import etree
# initialize C-API of lxml.etree
import_lxml__etree()

__version__ = etree.__version__

cdef object re
import re

cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError)
cdef object is_special_method = re.compile(u'__.*__$').match


# Duplicated from apihelpers.pxi, since dependencies obstruct
# including apihelpers.pxi.
cdef strrepr(s):
    """Build a representation of strings which we can use in __repr__
    methods, e.g. _Element.__repr__().
    """
    return s.encode('unicode-escape') if python.IS_PYTHON2 else s


cdef object _typename(object t):
    cdef const_char* c_name
    c_name = python._fqtypename(t)
    s = cstring_h.strrchr(c_name, c'.')
    if s is not NULL:
        c_name = s + 1
    return pyunicode(<const_xmlChar*>c_name)


# namespace/name for "pytype" hint attribute
cdef object PYTYPE_NAMESPACE
cdef bytes PYTYPE_NAMESPACE_UTF8
cdef const_xmlChar* _PYTYPE_NAMESPACE

cdef object PYTYPE_ATTRIBUTE_NAME
cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8
cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME

PYTYPE_ATTRIBUTE = None

cdef unicode TREE_PYTYPE_NAME = u"TREE"

cdef tuple _unicodeAndUtf8(s):
    return (s, python.PyUnicode_AsUTF8String(s))

def set_pytype_attribute_tag(attribute_tag=None):
    u"""set_pytype_attribute_tag(attribute_tag=None)
    Change name and namespace of the XML attribute that holds Python type
    information.

    Do not use this unless you know what you are doing.

    Reset by calling without argument.

    Default: "{http://codespeak.net/lxml/objectify/pytype}pytype"
    """
    global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME
    global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8
    global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8
    if attribute_tag is None:
        PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \
            _unicodeAndUtf8(u"http://codespeak.net/lxml/objectify/pytype")
        PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
            _unicodeAndUtf8(u"pytype")
    else:
        PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
            cetree.getNsTag(attribute_tag)
        PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8')
        PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8')

    _PYTYPE_NAMESPACE      = PYTYPE_NAMESPACE_UTF8
    _PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8
    PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName(
        _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)

set_pytype_attribute_tag()


# namespaces for XML Schema
cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8
XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \
    _unicodeAndUtf8(u"http://www.w3.org/2001/XMLSchema")
cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8)

cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8
XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \
    _unicodeAndUtf8(u"http://www.w3.org/2001/XMLSchema-instance")
cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8)

cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = u"{%s}nil" % XML_SCHEMA_INSTANCE_NS
cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = u"{%s}type" % XML_SCHEMA_INSTANCE_NS


################################################################################
# Element class for the main API

cdef class ObjectifiedElement(ElementBase):
    u"""Main XML Element class.

    Element children are accessed as object attributes.  Multiple children
    with the same name are available through a list index.  Example::

       >>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>")
       >>> second_c2 = root.c1.c2[1]
       >>> print(second_c2.text)
       1

    Note that you cannot (and must not) instantiate this class or its
    subclasses.
    """
    def __iter__(self):
        u"""Iterate over self and all siblings with the same tag.
        """
        parent = self.getparent()
        if parent is None:
            return iter([self])
        return etree.ElementChildIterator(parent, tag=self.tag)

    def __str__(self):
        if __RECURSIVE_STR:
            return _dump(self, 0)
        else:
            return textOf(self._c_node) or u''

    # pickle support for objectified Element
    def __reduce__(self):
        return (fromstring, (etree.tostring(self),))

    property text:
        def __get__(self):
            return textOf(self._c_node)

    property __dict__:
        u"""A fake implementation for __dict__ to support dir() etc.

        Note that this only considers the first child with a given name.
        """
        def __get__(self):
            cdef _Element child
            cdef dict children
            c_ns = tree._getNs(self._c_node)
            tag = u"{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None
            children = {}
            for child in etree.ElementChildIterator(self, tag=tag):
                if c_ns is NULL and tree._getNs(child._c_node) is not NULL:
                    continue
                name = pyunicode(child._c_node.name)
                if name not in children:
                    children[name] = child
            return children

    def __len__(self):
        u"""Count self and siblings with the same tag.
        """
        return _countSiblings(self._c_node)

    def countchildren(self):
        u"""countchildren(self)

        Return the number of children of this element, regardless of their
        name.
        """
        # copied from etree
        cdef Py_ssize_t c
        cdef tree.xmlNode* c_node
        c = 0
        c_node = self._c_node.children
        while c_node is not NULL:
            if tree._isElement(c_node):
                c += 1
            c_node = c_node.next
        return c

    def getchildren(self):
        u"""getchildren(self)

        Returns a sequence of all direct children.  The elements are
        returned in document order.
        """
        cdef tree.xmlNode* c_node
        result = []
        c_node = self._c_node.children
        while c_node is not NULL:
            if tree._isElement(c_node):
                result.append(cetree.elementFactory(self._doc, c_node))
            c_node = c_node.next
        return result

    def __getattr__(self, tag):
        u"""Return the (first) child with the given tag name.  If no namespace
        is provided, the child will be looked up in the same one as self.
        """
        if is_special_method(tag):
            return object.__getattr__(self, tag)
        return _lookupChildOrRaise(self, tag)

    def __setattr__(self, tag, value):
        u"""Set the value of the (first) child with the given tag name.  If no
        namespace is provided, the child will be looked up in the same one as
        self.
        """
        cdef _Element element
        # properties are looked up /after/ __setattr__, so we must emulate them
        if tag == u'text' or tag == u'pyval':
            # read-only !
            raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable"
        elif tag == u'tail':
            cetree.setTailText(self._c_node, value)
            return
        elif tag == u'tag':
            ElementBase.tag.__set__(self, value)
            return
        elif tag == u'base':
            ElementBase.base.__set__(self, value)
            return
        tag = _buildChildTag(self, tag)
        element = _lookupChild(self, tag)
        if element is None:
            _appendValue(self, tag, value)
        else:
            _replaceElement(element, value)

    def __delattr__(self, tag):
        child = _lookupChildOrRaise(self, tag)
        self.remove(child)

    def addattr(self, tag, value):
        u"""addattr(self, tag, value)

        Add a child value to the element.

        As opposed to append(), it sets a data value, not an element.
        """
        _appendValue(self, _buildChildTag(self, tag), value)

    def __getitem__(self, key):
        u"""Return a sibling, counting from the first child of the parent.  The
        method behaves like both a dict and a sequence.

        * If argument is an integer, returns the sibling at that position.

        * If argument is a string, does the same as getattr().  This can be
          used to provide namespaces for element lookup, or to look up
          children with special names (``text`` etc.).

        * If argument is a slice object, returns the matching slice.
        """
        cdef tree.xmlNode* c_self_node
        cdef tree.xmlNode* c_parent
        cdef tree.xmlNode* c_node
        cdef Py_ssize_t c_index
        if python._isString(key):
            return _lookupChildOrRaise(self, key)
        elif isinstance(key, slice):
            return list(self)[key]
        # normal item access
        c_index = key   # raises TypeError if necessary
        c_self_node = self._c_node
        c_parent = c_self_node.parent
        if c_parent is NULL:
            if c_index == 0:
                return self
            else:
                raise IndexError, unicode(key)
        if c_index < 0:
            c_node = c_parent.last
        else:
            c_node = c_parent.children
        c_node = _findFollowingSibling(
            c_node, tree._getNs(c_self_node), c_self_node.name, c_index)
        if c_node is NULL:
            raise IndexError, unicode(key)
        return elementFactory(self._doc, c_node)

    def __setitem__(self, key, value):
        u"""Set the value of a sibling, counting from the first child of the
        parent.  Implements key assignment, item assignment and slice
        assignment.

        * If argument is an integer, sets the sibling at that position.

        * If argument is a string, does the same as setattr().  This is used
          to provide namespaces for element lookup.

        * If argument is a sequence (list, tuple, etc.), assign the contained
          items to the siblings.
        """
        cdef _Element element
        cdef tree.xmlNode* c_node
        if python._isString(key):
            key = _buildChildTag(self, key)
            element = _lookupChild(self, key)
            if element is None:
                _appendValue(self, key, value)
            else:
                _replaceElement(element, value)
            return

        if self._c_node.parent is NULL:
            # the 'root[i] = ...' case
            raise TypeError, u"assignment to root element is invalid"

        if isinstance(key, slice):
            # slice assignment
            _setSlice(key, self, value)
        else:
            # normal index assignment
            if key < 0:
                c_node = self._c_node.parent.last
            else:
                c_node = self._c_node.parent.children
            c_node = _findFollowingSibling(
                c_node, tree._getNs(self._c_node), self._c_node.name, key)
            if c_node is NULL:
                raise IndexError, unicode(key)
            element = elementFactory(self._doc, c_node)
            _replaceElement(element, value)

    def __delitem__(self, key):
        parent = self.getparent()
        if parent is None:
            raise TypeError, u"deleting items not supported by root element"
        if isinstance(key, slice):
            # slice deletion
            del_items = list(self)[key]
            remove = parent.remove
            for el in del_items:
                remove(el)
        else:
            # normal index deletion
            sibling = self.__getitem__(key)
            parent.remove(sibling)

    def descendantpaths(self, prefix=None):
        u"""descendantpaths(self, prefix=None)

        Returns a list of object path expressions for all descendants.
        """
        if prefix is not None and not python._isString(prefix):
            prefix = u'.'.join(prefix)
        return _build_descendant_paths(self._c_node, prefix)


cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
    if c_node.name != c_name:
        return 0
    if c_href == NULL:
        return 1
    c_node_href = tree._getNs(c_node)
    if c_node_href == NULL:
        return c_href[0] == c'\0'
    return tree.xmlStrcmp(c_node_href, c_href) == 0


cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node):
    cdef tree.xmlNode* c_node
    cdef Py_ssize_t count
    c_tag  = c_start_node.name
    c_href = tree._getNs(c_start_node)
    count = 1
    c_node = c_start_node.next
    while c_node is not NULL:
        if c_node.type == tree.XML_ELEMENT_NODE and \
               _tagMatches(c_node, c_href, c_tag):
            count += 1
        c_node = c_node.next
    c_node = c_start_node.prev
    while c_node is not NULL:
        if c_node.type == tree.XML_ELEMENT_NODE and \
               _tagMatches(c_node, c_href, c_tag):
            count += 1
        c_node = c_node.prev
    return count

cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node,
                                         const_xmlChar* href, const_xmlChar* name,
                                         Py_ssize_t index):
    cdef tree.xmlNode* (*next)(tree.xmlNode*)
    if index >= 0:
        next = cetree.nextElement
    else:
        index = -1 - index
        next = cetree.previousElement
    while c_node is not NULL:
        if c_node.type == tree.XML_ELEMENT_NODE and \
               _tagMatches(c_node, href, name):
            index = index - 1
            if index < 0:
                return c_node
        c_node = next(c_node)
    return NULL

cdef object _lookupChild(_Element parent, tag):
    cdef tree.xmlNode* c_result
    cdef tree.xmlNode* c_node
    c_node = parent._c_node
    ns, tag = cetree.getNsTagWithEmptyNs(tag)
    c_tag = tree.xmlDictExists(
        c_node.doc.dict, _xcstr(tag), python.PyBytes_GET_SIZE(tag))
    if c_tag is NULL:
        return None # not in the hash map => not in the tree
    if ns is None:
        # either inherit ns from parent or use empty (i.e. no) namespace
        c_href = tree._getNs(c_node) or <const_xmlChar*>''
    else:
        c_href = _xcstr(ns)
    c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
    if c_result is NULL:
        return None
    return elementFactory(parent._doc, c_result)

cdef object _lookupChildOrRaise(_Element parent, tag):
    element = _lookupChild(parent, tag)
    if element is None:
        raise AttributeError, u"no such child: " + _buildChildTag(parent, tag)
    return element

cdef object _buildChildTag(_Element parent, tag):
    ns, tag = cetree.getNsTag(tag)
    c_tag = _xcstr(tag)
    c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns)
    return cetree.namespacedNameFromNsName(c_href, c_tag)

cdef _replaceElement(_Element element, value):
    cdef _Element new_element
    if isinstance(value, _Element):
        # deep copy the new element
        new_element = cetree.deepcopyNodeToDocument(
            element._doc, (<_Element>value)._c_node)
        new_element.tag = element.tag
    elif isinstance(value, (list, tuple)):
        element[:] = value
        return
    else:
        new_element = element.makeelement(element.tag)
        _setElementValue(new_element, value)
    element.getparent().replace(element, new_element)

cdef _appendValue(_Element parent, tag, value):
    cdef _Element new_element
    if isinstance(value, _Element):
        # deep copy the new element
        new_element = cetree.deepcopyNodeToDocument(
            parent._doc, (<_Element>value)._c_node)
        new_element.tag = tag
        cetree.appendChildToElement(parent, new_element)
    elif isinstance(value, (list, tuple)):
        for item in value:
            _appendValue(parent, tag, item)
    else:
        new_element = cetree.makeElement(
            tag, parent._doc, None, None, None, None, None)
        _setElementValue(new_element, value)
        cetree.appendChildToElement(parent, new_element)

cdef _setElementValue(_Element element, value):
    if value is None:
        cetree.setAttributeValue(
            element, XML_SCHEMA_INSTANCE_NIL_ATTR, u"true")
    elif isinstance(value, _Element):
        _replaceElement(element, value)
        return
    else:
        cetree.delAttributeFromNsName(
            element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil")
        if python._isString(value):
            pytype_name = u"str"
            py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
        else:
            pytype_name = _typename(value)
            py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
            if py_type is not None:
                value = py_type.stringify(value)
            else:
                value = unicode(value)
        if py_type is not None:
            cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
        else:
            cetree.delAttributeFromNsName(
                element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
    cetree.setNodeText(element._c_node, value)

cdef _setSlice(sliceobject, _Element target, items):
    cdef _Element parent
    cdef tree.xmlNode* c_node
    cdef Py_ssize_t c_step, c_start, pos
    # collect existing slice
    if (<slice>sliceobject).step is None:
        c_step = 1
    else:
        c_step = (<slice>sliceobject).step
    if c_step == 0:
        raise ValueError, u"Invalid slice"
    cdef list del_items = target[sliceobject]

    # collect new values
    new_items = []
    tag = target.tag
    for item in items:
        if isinstance(item, _Element):
            # deep copy the new element
            new_element = cetree.deepcopyNodeToDocument(
                target._doc, (<_Element>item)._c_node)
            new_element.tag = tag
        else:
            new_element = cetree.makeElement(
                tag, target._doc, None, None, None, None, None)
            _setElementValue(new_element, item)
        new_items.append(new_element)

    # sanity check - raise what a list would raise
    if c_step != 1 and len(del_items) != len(new_items):
        raise ValueError, \
            f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}"

    # replace existing items
    pos = 0
    parent = target.getparent()
    replace = parent.replace
    while pos < len(new_items) and pos < len(del_items):
        replace(del_items[pos], new_items[pos])
        pos += 1
    # remove leftover items
    if pos < len(del_items):
        remove = parent.remove
        while pos < len(del_items):
            remove(del_items[pos])
            pos += 1
    # append remaining new items
    if pos < len(new_items):
        # the sanity check above guarantees (step == 1)
        if pos > 0:
            item = new_items[pos-1]
        else:
            if (<slice>sliceobject).start > 0:
                c_node = parent._c_node.children
            else:
                c_node = parent._c_node.last
            c_node = _findFollowingSibling(
                c_node, tree._getNs(target._c_node), target._c_node.name,
                (<slice>sliceobject).start - 1)
            if c_node is NULL:
                while pos < len(new_items):
                    cetree.appendChildToElement(parent, new_items[pos])
                    pos += 1
                return
            item = cetree.elementFactory(parent._doc, c_node)
        while pos < len(new_items):
            add = item.addnext
            item = new_items[pos]
            add(item)
            pos += 1

################################################################################
# Data type support in subclasses

cdef class ObjectifiedDataElement(ObjectifiedElement):
    u"""This is the base class for all data type Elements.  Subclasses should
    override the 'pyval' property and possibly the __str__ method.
    """
    property pyval:
        def __get__(self):
            return textOf(self._c_node)

    def __str__(self):
        return textOf(self._c_node) or ''

    def __repr__(self):
        return strrepr(textOf(self._c_node) or '')

    def _setText(self, s):
        u"""For use in subclasses only. Don't use unless you know what you are
        doing.
        """
        cetree.setNodeText(self._c_node, s)

cdef class NumberElement(ObjectifiedDataElement):
    cdef object _parse_value
    def _setValueParser(self, function):
        u"""Set the function that parses the Python value from a string.

        Do not use this unless you know what you are doing.
        """
        self._parse_value = function

    property pyval:
        def __get__(self):
            return _parseNumber(self)

    def __int__(self):
        return int(_parseNumber(self))

    def __long__(self):
        return long(_parseNumber(self))

    def __float__(self):
        return float(_parseNumber(self))

    def __complex__(self):
        return complex(_parseNumber(self))

    def __str__(self):
        return unicode(_parseNumber(self))

    def __repr__(self):
        return repr(_parseNumber(self))

    def __oct__(self):
        return oct(_parseNumber(self))

    def __hex__(self):
        return hex(_parseNumber(self))

    def __richcmp__(self, other, int op):
        return _richcmpPyvals(self, other, op)

    def __hash__(self):
        return hash(_parseNumber(self))

    def __add__(self, other):
        return _numericValueOf(self) + _numericValueOf(other)

    def __sub__(self, other):
        return _numericValueOf(self) - _numericValueOf(other)

    def __mul__(self, other):
        return _numericValueOf(self) * _numericValueOf(other)

    def __div__(self, other):
        return _numericValueOf(self) / _numericValueOf(other)

    def __truediv__(self, other):
        return _numericValueOf(self) / _numericValueOf(other)

    def __mod__(self, other):
        return _numericValueOf(self) % _numericValueOf(other)

    def __pow__(self, other, modulo):
        if modulo is None:
            return _numericValueOf(self) ** _numericValueOf(other)
        else:
            return pow(_numericValueOf(self), _numericValueOf(other), modulo)

    def __neg__(self):
        return - _numericValueOf(self)

    def __pos__(self):
        return + _numericValueOf(self)

    def __abs__(self):
        return abs( _numericValueOf(self) )

    def __nonzero__(self):
        return bool(_numericValueOf(self))

    def __invert__(self):
        return ~ _numericValueOf(self)

    def __lshift__(self, other):
        return _numericValueOf(self) << _numericValueOf(other)

    def __rshift__(self, other):
        return _numericValueOf(self) >> _numericValueOf(other)

    def __and__(self, other):
        return _numericValueOf(self) & _numericValueOf(other)

    def __or__(self, other):
        return _numericValueOf(self) | _numericValueOf(other)

    def __xor__(self, other):
        return _numericValueOf(self) ^ _numericValueOf(other)

cdef class IntElement(NumberElement):
    def _init(self):
        self._parse_value = int

cdef class LongElement(NumberElement):
    def _init(self):
        self._parse_value = long

cdef class FloatElement(NumberElement):
    def _init(self):
        self._parse_value = float

cdef class StringElement(ObjectifiedDataElement):
    u"""String data class.

    Note that this class does *not* support the sequence protocol of strings:
    len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported.
    Instead, use the .text attribute to get a 'real' string.
    """
    property pyval:
        def __get__(self):
            return textOf(self._c_node) or u''

    def __repr__(self):
        return repr(textOf(self._c_node) or u'')

    def strlen(self):
        text = textOf(self._c_node)
        if text is None:
            return 0
        else:
            return len(text)

    def __nonzero__(self):
        return bool(textOf(self._c_node))

    def __richcmp__(self, other, int op):
        return _richcmpPyvals(self, other, op)

    def __hash__(self):
        return hash(textOf(self._c_node) or u'')

    def __add__(self, other):
        text  = _strValueOf(self)
        other = _strValueOf(other)
        if text is None:
            return other
        if other is None:
            return text
        return text + other

    def __mul__(self, other):
        if isinstance(self, StringElement):
            return textOf((<StringElement>self)._c_node) * _numericValueOf(other)
        elif isinstance(other, StringElement):
            return _numericValueOf(self) * textOf((<StringElement>other)._c_node)
        else:
            raise TypeError, u"invalid types for * operator"

    def __mod__(self, other):
        return _strValueOf(self) % other

    def __int__(self):
        return int(textOf(self._c_node))

    def __long__(self):
        return long(textOf(self._c_node))

    def __float__(self):
        return float(textOf(self._c_node))

    def __complex__(self):
        return complex(textOf(self._c_node))

cdef class NoneElement(ObjectifiedDataElement):
    def __str__(self):
        return u"None"

    def __repr__(self):
        return "None"

    def __nonzero__(self):
        return False

    def __richcmp__(self, other, int op):
        if other is None or self is None:
            return python.PyObject_RichCompare(None, None, op)
        if isinstance(self, NoneElement):
            return python.PyObject_RichCompare(None, other, op)
        else:
            return python.PyObject_RichCompare(self, None, op)

    def __hash__(self):
        return hash(None)

    property pyval:
        def __get__(self):
            return None

cdef class BoolElement(IntElement):
    u"""Boolean type base on string values: 'true' or 'false'.

    Note that this inherits from IntElement to mimic the behaviour of
    Python's bool type.
    """
    def _init(self):
        self._parse_value = __parseBool

    def __nonzero__(self):
        return __parseBool(textOf(self._c_node))

    def __richcmp__(self, other, int op):
        return _richcmpPyvals(self, other, op)

    def __hash__(self):
        return hash(__parseBool(textOf(self._c_node)))

    def __str__(self):
        return unicode(__parseBool(textOf(self._c_node)))

    def __repr__(self):
        return repr(__parseBool(textOf(self._c_node)))

    property pyval:
        def __get__(self):
            return __parseBool(textOf(self._c_node))

def __checkBool(s):
    cdef int value = -1
    if s is not None:
        value = __parseBoolAsInt(s)
    if value == -1:
        raise ValueError

cpdef bint __parseBool(s) except -1:
    cdef int value
    if s is None:
        return False
    value = __parseBoolAsInt(s)
    if value == -1:
        raise ValueError, f"Invalid boolean value: '{s}'"
    return value

cdef inline int __parseBoolAsInt(text) except -2:
    if text == 'false':
        return 0
    elif text == 'true':
        return 1
    elif text == '0':
        return 0
    elif text == '1':
        return 1
    return -1

cdef object _parseNumber(NumberElement element):
    return element._parse_value(textOf(element._c_node))

cdef object _strValueOf(obj):
    if python._isString(obj):
        return obj
    if isinstance(obj, _Element):
        return textOf((<_Element>obj)._c_node) or u''
    if obj is None:
        return u''
    return unicode(obj)

cdef object _numericValueOf(obj):
    if isinstance(obj, NumberElement):
        return _parseNumber(<NumberElement>obj)
    try:
        # not always numeric, but Python will raise the right exception
        return obj.pyval
    except AttributeError:
        pass
    return obj

cdef _richcmpPyvals(left, right, int op):
    left  = getattr(left,  'pyval', left)
    right = getattr(right, 'pyval', right)
    return python.PyObject_RichCompare(left, right, op)


################################################################################
# Python type registry

cdef class PyType:
    u"""PyType(self, name, type_check, type_class, stringify=None)
    User defined type.

    Named type that contains a type check function, a type class that
    inherits from ObjectifiedDataElement and an optional "stringification"
    function.  The type check must take a string as argument and raise
    ValueError or TypeError if it cannot handle the string value.  It may be
    None in which case it is not considered for type guessing.  For registered
    named types, the 'stringify' function (or unicode() if None) is used to
    convert a Python object with type name 'name' to the string representation
    stored in the XML tree.

    Example::

        PyType('int', int, MyIntClass).register()

    Note that the order in which types are registered matters.  The first
    matching type will be used.
    """
    cdef readonly object name
    cdef readonly object type_check
    cdef readonly object stringify
    cdef object _type
    cdef list _schema_types
    def __init__(self, name, type_check, type_class, stringify=None):
        if isinstance(name, bytes):
            name = (<bytes>name).decode('ascii')
        elif not isinstance(name, unicode):
            raise TypeError, u"Type name must be a string"
        if type_check is not None and not callable(type_check):
            raise TypeError, u"Type check function must be callable (or None)"
        if name != TREE_PYTYPE_NAME and \
               not issubclass(type_class, ObjectifiedDataElement):
            raise TypeError, \
                u"Data classes must inherit from ObjectifiedDataElement"
        self.name  = name
        self._type = type_class
        self.type_check = type_check
        if stringify is None:
            stringify = unicode
        self.stringify = stringify
        self._schema_types = []

    def __repr__(self):
        return "PyType(%s, %s)" % (self.name, self._type.__name__)

    def register(self, before=None, after=None):
        u"""register(self, before=None, after=None)

        Register the type.

        The additional keyword arguments 'before' and 'after' accept a
        sequence of type names that must appear before/after the new type in
        the type list.  If any of them is not currently known, it is simply
        ignored.  Raises ValueError if the dependencies cannot be fulfilled.
        """
        if self.name == TREE_PYTYPE_NAME:
            raise ValueError, u"Cannot register tree type"
        if self.type_check is not None:
            for item in _TYPE_CHECKS:
                if item[0] is self.type_check:
                    _TYPE_CHECKS.remove(item)
                    break
            entry = (self.type_check, self)
            first_pos = 0
            last_pos = -1
            if before or after:
                if before is None:
                    before = ()
                elif after is None:
                    after = ()
                for i, (check, pytype) in enumerate(_TYPE_CHECKS):
                    if last_pos == -1 and pytype.name in before:
                        last_pos = i
                    if pytype.name in after:
                        first_pos = i+1
            if last_pos == -1:
                _TYPE_CHECKS.append(entry)
            elif first_pos > last_pos:
                raise ValueError, u"inconsistent before/after dependencies"
            else:
                _TYPE_CHECKS.insert(last_pos, entry)

        _PYTYPE_DICT[self.name] = self
        for xs_type in self._schema_types:
            _SCHEMA_TYPE_DICT[xs_type] = self

    def unregister(self):
        u"unregister(self)"
        if _PYTYPE_DICT.get(self.name) is self:
            del _PYTYPE_DICT[self.name]
        for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()):
            if pytype is self:
                del _SCHEMA_TYPE_DICT[xs_type]
        if self.type_check is None:
            return
        try:
            _TYPE_CHECKS.remove( (self.type_check, self) )
        except ValueError:
            pass

    property xmlSchemaTypes:
        u"""The list of XML Schema datatypes this Python type maps to.

        Note that this must be set before registering the type!
        """
        def __get__(self):
            return self._schema_types
        def __set__(self, types):
            self._schema_types = list(map(unicode, types))


cdef dict _PYTYPE_DICT = {}
cdef dict _SCHEMA_TYPE_DICT = {}
cdef list _TYPE_CHECKS = []

def __lower_bool(b):
    return u"true" if b else u"false"

cdef _pytypename(obj):
    return u"str" if python._isString(obj) else _typename(obj)

def pytypename(obj):
    u"""pytypename(obj)

    Find the name of the corresponding PyType for a Python object.
    """
    return _pytypename(obj)

cdef _registerPyTypes():
    pytype = PyType(u'int', int, IntElement)
    pytype.xmlSchemaTypes = (u"integer", u"int", u"short", u"byte", u"unsignedShort",
                             u"unsignedByte", u"nonPositiveInteger",
                             u"negativeInteger", u"long", u"nonNegativeInteger",
                             u"unsignedLong", u"unsignedInt", u"positiveInteger",)
    pytype.register()

    # 'long' type just for backwards compatibility
    pytype = PyType(u'long', None, IntElement)
    pytype.register()

    pytype = PyType(u'float', float, FloatElement, repr)
    pytype.xmlSchemaTypes = (u"double", u"float")
    pytype.register()

    pytype = PyType(u'bool', __checkBool, BoolElement, __lower_bool)
    pytype.xmlSchemaTypes = (u"boolean",)
    pytype.register()

    pytype = PyType(u'str', None, StringElement)
    pytype.xmlSchemaTypes = (u"string", u"normalizedString", u"token", u"language",
                             u"Name", u"NCName", u"ID", u"IDREF", u"ENTITY",
                             u"NMTOKEN", )
    pytype.register()

    # since lxml 2.0
    pytype = PyType(u'NoneType', None, NoneElement)
    pytype.register()

    # backwards compatibility
    pytype = PyType(u'none', None, NoneElement)
    pytype.register()

# non-registered PyType for inner tree elements
cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement)

_registerPyTypes()

def getRegisteredTypes():
    u"""getRegisteredTypes()

    Returns a list of the currently registered PyType objects.

    To add a new type, retrieve this list and call unregister() for all
    entries.  Then add the new type at a suitable position (possibly replacing
    an existing one) and call register() for all entries.

    This is necessary if the new type interferes with the type check functions
    of existing ones (normally only int/float/bool) and must the tried before
    other types.  To add a type that is not yet parsable by the current type
    check functions, you can simply register() it, which will append it to the
    end of the type list.
    """
    cdef list types = []
    cdef set known = set()
    for check, pytype in _TYPE_CHECKS:
        name = pytype.name
        if name not in known:
            known.add(name)
            types.append(pytype)
    for pytype in _PYTYPE_DICT.values():
        name = pytype.name
        if name not in known:
            known.add(name)
            types.append(pytype)
    return types

cdef PyType _guessPyType(value, PyType defaulttype):
    if value is None:
        return None
    for type_check, tested_pytype in _TYPE_CHECKS:
        try:
            type_check(value)
            return <PyType>tested_pytype
        except IGNORABLE_ERRORS:
            # could not be parsed as the specified type => ignore
            pass
    return defaulttype

cdef object _guessElementClass(tree.xmlNode* c_node):
    value = textOf(c_node)
    if value is None:
        return None
    if value == '':
        return StringElement
    
    for type_check, pytype in _TYPE_CHECKS:
        try:
            type_check(value)
            return (<PyType>pytype)._type
        except IGNORABLE_ERRORS:
            pass
    return None

################################################################################
# adapted ElementMaker supports registered PyTypes

@cython.final
@cython.internal
cdef class _ObjectifyElementMakerCaller:
    cdef object _tag
    cdef object _nsmap
    cdef object _element_factory
    cdef bint _annotate

    def __call__(self, *children, **attrib):
        u"__call__(self, *children, **attrib)"
        cdef _ObjectifyElementMakerCaller elementMaker
        cdef _Element element
        cdef _Element childElement
        cdef bint has_children
        cdef bint has_string_value
        if self._element_factory is None:
            element = _makeElement(self._tag, None, attrib, self._nsmap)
        else:
            element = self._element_factory(self._tag, attrib, self._nsmap)

        pytype_name = None
        has_children = False
        has_string_value = False
        for child in children:
            if child is None:
                if len(children) == 1:
                    cetree.setAttributeValue(
                        element, XML_SCHEMA_INSTANCE_NIL_ATTR, u"true")
            elif python._isString(child):
                _add_text(element, child)
                has_string_value = True
            elif isinstance(child, _Element):
                cetree.appendChildToElement(element, <_Element>child)
                has_children = True
            elif isinstance(child, _ObjectifyElementMakerCaller):
                elementMaker = <_ObjectifyElementMakerCaller>child
                if elementMaker._element_factory is None:
                    cetree.makeSubElement(element, elementMaker._tag,
                                          None, None, None, None)
                else:
                    childElement = elementMaker._element_factory(
                        elementMaker._tag)
                    cetree.appendChildToElement(element, childElement)
                has_children = True
            elif isinstance(child, dict):
                for name, value in child.items():
                    # keyword arguments in attrib take precedence
                    if name in attrib:
                        continue
                    pytype = _PYTYPE_DICT.get(_typename(value))
                    if pytype is not None:
                        value = (<PyType>pytype).stringify(value)
                    elif not python._isString(value):
                        value = unicode(value)
                    cetree.setAttributeValue(element, name, value)
            else:
                if pytype_name is not None:
                    # concatenation always makes the result a string
                    has_string_value = True
                pytype_name = _typename(child)
                pytype = _PYTYPE_DICT.get(_typename(child))
                if pytype is not None:
                    _add_text(element, (<PyType>pytype).stringify(child))
                else:
                    has_string_value = True
                    child = unicode(child)
                    _add_text(element, child)

        if self._annotate and not has_children:
            if has_string_value:
                cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, u"str")
            elif pytype_name is not None:
                cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)

        return element

cdef _add_text(_Element elem, text):
    # add text to the tree in construction, either as element text or
    # tail text, depending on the current tree state
    cdef tree.xmlNode* c_child
    c_child = cetree.findChildBackwards(elem._c_node, 0)
    if c_child is not NULL:
        old = cetree.tailOf(c_child)
        if old is not None:
            text = old + text
        cetree.setTailText(c_child, text)
    else:
        old = cetree.textOf(elem._c_node)
        if old is not None:
            text = old + text
        cetree.setNodeText(elem._c_node, text)

cdef class ElementMaker:
    u"""ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)

    An ElementMaker that can be used for constructing trees.

    Example::

      >>> M = ElementMaker(annotate=False)
      >>> attributes = {'class': 'par'}
      >>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) )

      >>> from lxml.etree import tostring
      >>> print(tostring(html, method='html').decode('ascii'))
      <html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html>

    To create tags that are not valid Python identifiers, call the factory
    directly and pass the tag name as first argument::

      >>> root = M('tricky-tag', 'some text')
      >>> print(root.tag)
      tricky-tag
      >>> print(root.text)
      some text

    Note that this module has a predefined ElementMaker instance called ``E``.
    """
    cdef object _makeelement
    cdef object _namespace
    cdef object _nsmap
    cdef bint _annotate
    cdef dict _cache
    def __init__(self, *, namespace=None, nsmap=None, annotate=True,
                 makeelement=None):
        if nsmap is None:
            nsmap = _DEFAULT_NSMAP if annotate else {}
        self._nsmap = nsmap
        self._namespace = None if namespace is None else u"{%s}" % namespace
        self._annotate = annotate
        if makeelement is not None:
            if not callable(makeelement):
                raise TypeError(
                    f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}")
            self._makeelement = makeelement
        else:
            self._makeelement = None
        self._cache = {}

    @cython.final
    cdef _build_element_maker(self, tag, bint caching):
        cdef _ObjectifyElementMakerCaller element_maker
        element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller)
        if self._namespace is not None and tag[0] != u"{":
            element_maker._tag = self._namespace + tag
        else:
            element_maker._tag = tag
        element_maker._nsmap = self._nsmap
        element_maker._annotate = self._annotate
        element_maker._element_factory = self._makeelement
        if caching:
            if len(self._cache) > 200:
                self._cache.clear()
            self._cache[tag] = element_maker
        return element_maker

    def __getattr__(self, tag):
        element_maker = self._cache.get(tag)
        if element_maker is None:
            if is_special_method(tag):
                return object.__getattr__(self, tag)
            return self._build_element_maker(tag, caching=True)
        return element_maker

    def __call__(self, tag, *args, **kwargs):
        element_maker = self._cache.get(tag)
        if element_maker is None:
            element_maker = self._build_element_maker(
                tag, caching=not is_special_method(tag))
        return element_maker(*args, **kwargs)

################################################################################
# Recursive element dumping

cdef bint __RECURSIVE_STR = 0 # default: off

def enable_recursive_str(on=True):
    u"""enable_recursive_str(on=True)

    Enable a recursively generated tree representation for str(element),
    based on objectify.dump(element).
    """
    global __RECURSIVE_STR
    __RECURSIVE_STR = on

def dump(_Element element not None):
    u"""dump(_Element element not None)

    Return a recursively generated string representation of an element.
    """
    return _dump(element, 0)

cdef object _dump(_Element element, int indent):
    indentstr = u"    " * indent
    if isinstance(element, ObjectifiedDataElement):
        value = repr(element)
    else:
        value = textOf(element._c_node)
        if value is not None:
            if not value.strip():
                value = None
            else:
                value = repr(value)
    result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n"
    xsi_ns    = u"{%s}" % XML_SCHEMA_INSTANCE_NS
    pytype_ns = u"{%s}" % PYTYPE_NAMESPACE
    for name, value in cetree.iterattributes(element, 3):
        if u'{' in name:
            if name == PYTYPE_ATTRIBUTE:
                if value == TREE_PYTYPE_NAME:
                    continue
                else:
                    name = name.replace(pytype_ns, u'py:')
            name = name.replace(xsi_ns, u'xsi:')
        result += f"{indentstr}  * {name} = {value!r}\n"

    indent += 1
    for child in element.iterchildren():
        result += _dump(child, indent)
    if indent == 1:
        return result[:-1] # strip last '\n'
    else:
        return result


################################################################################
# Pickle support for objectified ElementTree

def __unpickleElementTree(data):
    return etree.ElementTree(fromstring(data))

cdef _setupPickle(elementTreeReduceFunction):
    if python.IS_PYTHON2:
        import copy_reg as copyreg
    else:
        import copyreg
    copyreg.pickle(etree._ElementTree,
                   elementTreeReduceFunction, __unpickleElementTree)

def pickleReduceElementTree(obj):
    return (__unpickleElementTree, (etree.tostring(obj),))

_setupPickle(pickleReduceElementTree)
del pickleReduceElementTree

################################################################################
# Element class lookup

cdef class ObjectifyElementClassLookup(ElementClassLookup):
    u"""ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
    Element class lookup method that uses the objectify classes.
    """
    cdef object empty_data_class
    cdef object tree_class
    def __init__(self, tree_class=None, empty_data_class=None):
        u"""Lookup mechanism for objectify.

        The default Element classes can be replaced by passing subclasses of
        ObjectifiedElement and ObjectifiedDataElement as keyword arguments.
        'tree_class' defines inner tree classes (defaults to
        ObjectifiedElement), 'empty_data_class' defines the default class for
        empty data elements (defaults to StringElement).
        """
        self._lookup_function = _lookupElementClass
        if tree_class is None:
            tree_class = ObjectifiedElement
        self.tree_class = tree_class
        if empty_data_class is None:
            empty_data_class = StringElement
        self.empty_data_class = empty_data_class

cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node):
    cdef ObjectifyElementClassLookup lookup
    lookup = <ObjectifyElementClassLookup>state
    # if element has children => no data class
    if cetree.hasChild(c_node):
        return lookup.tree_class

    # if element is defined as xsi:nil, return NoneElement class
    if u"true" == cetree.attributeValueFromNsName(
        c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"):
        return NoneElement

    # check for Python type hint
    value = cetree.attributeValueFromNsName(
        c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
    if value is not None:
        if value == TREE_PYTYPE_NAME:
            return lookup.tree_class
        py_type = <PyType>_PYTYPE_DICT.get(value)
        if py_type is not None:
            return py_type._type
        # unknown 'pyval' => try to figure it out ourself, just go on

    # check for XML Schema type hint
    value = cetree.attributeValueFromNsName(
        c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")

    if value is not None:
        schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
        if schema_type is None and u':' in value:
            prefix, value = value.split(u':', 1)
            schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
        if schema_type is not None:
            return schema_type._type

    # otherwise determine class based on text content type
    el_class = _guessElementClass(c_node)
    if el_class is not None:
        return el_class

    # if element is a root node => default to tree node
    if c_node.parent is NULL or not tree._isElement(c_node.parent):
        return lookup.tree_class

    return lookup.empty_data_class


################################################################################
# Type annotations

cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
    if pytype is None:
        return None
    value = textOf(c_node)
    try:
        pytype.type_check(value)
        return pytype
    except IGNORABLE_ERRORS:
        # could not be parsed as the specified type => ignore
        pass
    return None

def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
             empty_pytype=None):
    u"""pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)

    Recursively annotates the elements of an XML tree with 'pytype'
    attributes.

    If the 'ignore_old' keyword argument is True (the default), current 'pytype'
    attributes will be ignored and replaced.  Otherwise, they will be checked
    and only replaced if they no longer fit the current text value.

    Setting the keyword argument ``ignore_xsi`` to True makes the function
    additionally ignore existing ``xsi:type`` annotations.  The default is to
    use them as a type hint.

    The default annotation of empty elements can be set with the
    ``empty_pytype`` keyword argument.  The default is not to annotate empty
    elements.  Pass 'str', for example, to make string values the default.
    """
    cdef _Element  element
    element = cetree.rootNodeOrRaise(element_or_tree)
    _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype)

def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
                empty_type=None):
    u"""xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)

    Recursively annotates the elements of an XML tree with 'xsi:type'
    attributes.

    If the 'ignore_old' keyword argument is True (the default), current
    'xsi:type' attributes will be ignored and replaced.  Otherwise, they will be
    checked and only replaced if they no longer fit the current text value.

    Note that the mapping from Python types to XSI types is usually ambiguous.
    Currently, only the first XSI type name in the corresponding PyType
    definition will be used for annotation.  Thus, you should consider naming
    the widest type first if you define additional types.

    Setting the keyword argument ``ignore_pytype`` to True makes the function
    additionally ignore existing ``pytype`` annotations.  The default is to
    use them as a type hint.

    The default annotation of empty elements can be set with the
    ``empty_type`` keyword argument.  The default is not to annotate empty
    elements.  Pass 'string', for example, to make string values the default.
    """
    cdef _Element  element
    element = cetree.rootNodeOrRaise(element_or_tree)
    _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None)

def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
             empty_pytype=None, empty_type=None, annotate_xsi=0,
             annotate_pytype=1):
    u"""annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)

    Recursively annotates the elements of an XML tree with 'xsi:type'
    and/or 'py:pytype' attributes.

    If the 'ignore_old' keyword argument is True (the default), current
    'py:pytype' attributes will be ignored for the type annotation. Set to False
    if you want reuse existing 'py:pytype' information (iff appropriate for the
    element text value).

    If the 'ignore_xsi' keyword argument is False (the default), existing
    'xsi:type' attributes will be used for the type annotation, if they fit the
    element text values. 
    
    Note that the mapping from Python types to XSI types is usually ambiguous.
    Currently, only the first XSI type name in the corresponding PyType
    definition will be used for annotation.  Thus, you should consider naming
    the widest type first if you define additional types.

    The default 'py:pytype' annotation of empty elements can be set with the
    ``empty_pytype`` keyword argument. Pass 'str', for example, to make
    string values the default.

    The default 'xsi:type' annotation of empty elements can be set with the
    ``empty_type`` keyword argument.  The default is not to annotate empty
    elements.  Pass 'string', for example, to make string values the default.

    The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
    (default: 1) control which kind(s) of annotation to use. 
    """
    cdef _Element  element
    element = cetree.rootNodeOrRaise(element_or_tree)
    _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi,
              ignore_old, empty_type, empty_pytype)


cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype,
               bint ignore_xsi, bint ignore_pytype,
               empty_type_name, empty_pytype_name):
    cdef _Document doc
    cdef tree.xmlNode* c_node
    cdef PyType empty_pytype, StrType, NoneType

    if not annotate_xsi and not annotate_pytype:
        return

    if empty_type_name is not None:
        if isinstance(empty_type_name, bytes):
            empty_type_name = (<bytes>empty_type_name).decode("ascii")
        empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name)
    elif empty_pytype_name is not None:
        if isinstance(empty_pytype_name, bytes):
            empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii")
        empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name)
    else:
        empty_pytype = None

    StrType  = <PyType>_PYTYPE_DICT.get(u'str')
    NoneType = <PyType>_PYTYPE_DICT.get(u'NoneType')

    doc = element._doc
    c_node = element._c_node
    tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
    if c_node.type == tree.XML_ELEMENT_NODE:
        _annotate_element(c_node, doc, annotate_xsi, annotate_pytype,
                          ignore_xsi, ignore_pytype,
                          empty_type_name, empty_pytype, StrType, NoneType)
    tree.END_FOR_EACH_ELEMENT_FROM(c_node)

cdef int _annotate_element(tree.xmlNode* c_node, _Document doc,
                           bint annotate_xsi, bint annotate_pytype,
                           bint ignore_xsi, bint ignore_pytype,
                           empty_type_name, PyType empty_pytype,
                           PyType StrType, PyType NoneType) except -1:
    cdef tree.xmlNs*   c_ns
    cdef PyType pytype = None
    typename = None
    istree = 0

    # if element is defined as xsi:nil, represent it as None
    if cetree.attributeValueFromNsName(
        c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true":
        pytype = NoneType

    if pytype is None and not ignore_xsi:
        # check that old xsi type value is valid
        typename = cetree.attributeValueFromNsName(
            c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
        if typename is not None:
            pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
            if pytype is None and u':' in typename:
                prefix, typename = typename.split(u':', 1)
                pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
            if pytype is not None and pytype is not StrType:
                # StrType does not have a typecheck but is the default
                # anyway, so just accept it if given as type
                # information
                pytype = _check_type(c_node, pytype)
                if pytype is None:
                    typename = None

    if pytype is None and not ignore_pytype:
        # check that old pytype value is valid
        old_pytypename = cetree.attributeValueFromNsName(
            c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
        if old_pytypename is not None:
            if old_pytypename == TREE_PYTYPE_NAME:
                if not cetree.hasChild(c_node):
                    # only case where we should keep it,
                    # everything else is clear enough
                    pytype = TREE_PYTYPE
            else:
                if old_pytypename == 'none':
                    # transition from lxml 1.x
                    old_pytypename = "NoneType"
                pytype = <PyType>_PYTYPE_DICT.get(old_pytypename)
                if pytype is not None and pytype is not StrType:
                    # StrType does not have a typecheck but is the
                    # default anyway, so just accept it if given as
                    # type information
                    pytype = _check_type(c_node, pytype)

    if pytype is None:
        # try to guess type
        if not cetree.hasChild(c_node):
            # element has no children => data class
            pytype = _guessPyType(textOf(c_node), StrType)
        else:
            istree = 1

    if pytype is None:
        # use default type for empty elements
        if cetree.hasText(c_node):
            pytype = StrType
        else:
            pytype = empty_pytype
            if typename is None:
                typename = empty_type_name

    if pytype is not None:
        if typename is None:
            if not istree:
                if pytype._schema_types:
                    # pytype->xsi:type is a 1:n mapping
                    # simply take the first
                    typename = pytype._schema_types[0]
        elif typename not in pytype._schema_types:
            typename = pytype._schema_types[0]

    if annotate_xsi:
        if typename is None or istree:
            cetree.delAttributeFromNsName(
                c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
        else:
            # update or create attribute
            typename_utf8 = cetree.utf8(typename)
            c_ns = cetree.findOrBuildNodeNsPrefix(
                doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd')
            if c_ns is not NULL:
                if b':' in typename_utf8:
                    prefix, name = typename_utf8.split(b':', 1)
                    if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0':
                        typename_utf8 = name
                    elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0:
                        typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name
                elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0':
                    typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8
            c_ns = cetree.findOrBuildNodeNsPrefix(
                doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
            tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8))

    if annotate_pytype:
        if pytype is None:
            # delete attribute if it exists
            cetree.delAttributeFromNsName(
                c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
        else:
            # update or create attribute
            c_ns = cetree.findOrBuildNodeNsPrefix(
                doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py')
            pytype_name = cetree.utf8(pytype.name)
            tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME,
                              _xcstr(pytype_name))
            if pytype is NoneType:
                c_ns = cetree.findOrBuildNodeNsPrefix(
                    doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
                tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true")

    return 0

cdef object _strip_attributes = etree.strip_attributes
cdef object _cleanup_namespaces = etree.cleanup_namespaces

def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
               bint xsi_nil=False, bint cleanup_namespaces=False):
    u"""deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False)

    Recursively de-annotate the elements of an XML tree by removing 'py:pytype'
    and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.

    If the 'pytype' keyword argument is True (the default), 'py:pytype'
    attributes will be removed. If the 'xsi' keyword argument is True (the 
    default), 'xsi:type' attributes will be removed.
    If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
    attributes will be removed.

    Note that this does not touch the namespace declarations by
    default.  If you want to remove unused namespace declarations from
    the tree, pass the option ``cleanup_namespaces=True``.
    """
    cdef list attribute_names = []

    if pytype:
        attribute_names.append(PYTYPE_ATTRIBUTE)
    if xsi:
        attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR)
    if xsi_nil:
        attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR)

    _strip_attributes(element_or_tree, *attribute_names)
    if cleanup_namespaces:
        _cleanup_namespaces(element_or_tree)

################################################################################
# Module level parser setup

cdef object __DEFAULT_PARSER
__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True)
__DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() )

cdef object objectify_parser
objectify_parser = __DEFAULT_PARSER

def set_default_parser(new_parser = None):
    u"""set_default_parser(new_parser = None)

    Replace the default parser used by objectify's Element() and
    fromstring() functions.

    The new parser must be an etree.XMLParser.

    Call without arguments to reset to the original parser.
    """
    global objectify_parser
    if new_parser is None:
        objectify_parser = __DEFAULT_PARSER
    elif isinstance(new_parser, etree.XMLParser):
        objectify_parser = new_parser
    else:
        raise TypeError, u"parser must inherit from lxml.etree.XMLParser"

def makeparser(**kw):
    u"""makeparser(remove_blank_text=True, **kw)

    Create a new XML parser for objectify trees.

    You can pass all keyword arguments that are supported by
    ``etree.XMLParser()``.  Note that this parser defaults to removing
    blank text.  You can disable this by passing the
    ``remove_blank_text`` boolean keyword option yourself.
    """
    if 'remove_blank_text' not in kw:
        kw['remove_blank_text'] = True
    parser = etree.XMLParser(**kw)
    parser.set_element_class_lookup( ObjectifyElementClassLookup() )
    return parser

cdef _Element _makeElement(tag, text, attrib, nsmap):
    return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap)

################################################################################
# Module level factory functions

cdef object _fromstring
_fromstring = etree.fromstring

SubElement = etree.SubElement

def fromstring(xml, parser=None, *, base_url=None):
    u"""fromstring(xml, parser=None, base_url=None)

    Objectify specific version of the lxml.etree fromstring() function
    that uses the objectify parser.

    You can pass a different parser as second argument.

    The ``base_url`` keyword argument allows to set the original base URL of
    the document to support relative Paths when looking up external entities
    (DTD, XInclude, ...).
    """
    if parser is None:
        parser = objectify_parser
    return _fromstring(xml, parser, base_url=base_url)

def XML(xml, parser=None, *, base_url=None):
    u"""XML(xml, parser=None, base_url=None)

    Objectify specific version of the lxml.etree XML() literal factory
    that uses the objectify parser.

    You can pass a different parser as second argument.

    The ``base_url`` keyword argument allows to set the original base URL of
    the document to support relative Paths when looking up external entities
    (DTD, XInclude, ...).
    """
    if parser is None:
        parser = objectify_parser
    return _fromstring(xml, parser, base_url=base_url)

cdef object _parse
_parse = etree.parse

def parse(f, parser=None, *, base_url=None):
    u"""parse(f, parser=None, base_url=None)

    Parse a file or file-like object with the objectify parser.

    You can pass a different parser as second argument.

    The ``base_url`` keyword allows setting a URL for the document
    when parsing from a file-like object.  This is needed when looking
    up external entities (DTD, XInclude, ...) with relative paths.
    """
    if parser is None:
        parser = objectify_parser
    return _parse(f, parser, base_url=base_url)

cdef dict _DEFAULT_NSMAP = {
    "py"  : PYTYPE_NAMESPACE,
    "xsi" : XML_SCHEMA_INSTANCE_NS,
    "xsd" : XML_SCHEMA_NS
}

E = ElementMaker()

def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
    u"""Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)

    Objectify specific version of the lxml.etree Element() factory that
    always creates a structural (tree) element.

    NOTE: requires parser based element class lookup activated in lxml.etree!
    """
    if attrib is not None:
        if _attributes:
            attrib = dict(attrib)
            attrib.update(_attributes)
        _attributes = attrib
    if _pytype is None:
        _pytype = TREE_PYTYPE_NAME
    if nsmap is None:
        nsmap = _DEFAULT_NSMAP
    _attributes[PYTYPE_ATTRIBUTE] = _pytype
    return _makeElement(_tag, None, _attributes, nsmap)

def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
                **_attributes):
    u"""DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)

    Create a new element from a Python value and XML attributes taken from
    keyword arguments or a dictionary passed as second argument.

    Automatically adds a 'pytype' attribute for the Python type of the value,
    if the type can be identified.  If '_pytype' or '_xsi' are among the
    keyword arguments, they will be used instead.

    If the _value argument is an ObjectifiedDataElement instance, its py:pytype,
    xsi:type and other attributes and nsmap are reused unless they are redefined
    in attrib and/or keyword arguments.
    """
    if nsmap is None:
        nsmap = _DEFAULT_NSMAP
    if attrib is not None and attrib:
        if _attributes:
            attrib = dict(attrib)
            attrib.update(_attributes)
        _attributes = attrib
    if isinstance(_value, ObjectifiedElement):
        if _pytype is None:
            if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP:
                # special case: no change!
                return _value.__copy__()
    if isinstance(_value, ObjectifiedDataElement):
        # reuse existing nsmap unless redefined in nsmap parameter
        temp = _value.nsmap
        if temp is not None and temp:
            temp = dict(temp)
            temp.update(nsmap)
            nsmap = temp
        # reuse existing attributes unless redefined in attrib/_attributes
        temp = _value.attrib
        if temp is not None and temp:
            temp = dict(temp)
            temp.update(_attributes)
            _attributes = temp
        # reuse existing xsi:type or py:pytype attributes, unless provided as
        # arguments
        if _xsi is None and _pytype is None:
            _xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
            _pytype = _attributes.get(PYTYPE_ATTRIBUTE)

    if _xsi is not None:
        if u':' in _xsi:
            prefix, name = _xsi.split(u':', 1)
            ns = nsmap.get(prefix)
            if ns != XML_SCHEMA_NS:
                raise ValueError, u"XSD types require the XSD namespace"
        elif nsmap is _DEFAULT_NSMAP:
            name = _xsi
            _xsi = u'xsd:' + _xsi
        else:
            name = _xsi
            for prefix, ns in nsmap.items():
                if ns == XML_SCHEMA_NS:
                    if prefix is not None and prefix:
                        _xsi = prefix + u':' + _xsi
                    break
            else:
                raise ValueError, u"XSD types require the XSD namespace"
        _attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi
        if _pytype is None:
            # allow using unregistered or even wrong xsi:type names
            py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi)
            if py_type is None:
                py_type = <PyType>_SCHEMA_TYPE_DICT.get(name)
            if py_type is not None:
                _pytype = py_type.name

    if _pytype is None:
        _pytype = _pytypename(_value)

    if _value is None and _pytype != u"str":
        _pytype = _pytype or u"NoneType"
        strval = None
    elif python._isString(_value):
        strval = _value
    elif isinstance(_value, bool):
        if _value:
            strval = u"true"
        else:
            strval = u"false"
    else:
        py_type = <PyType>_PYTYPE_DICT.get(_pytype)
        stringify = unicode if py_type is None else py_type.stringify
        strval = stringify(_value)

    if _pytype is not None: 
        if _pytype == u"NoneType" or _pytype == u"none":
            strval = None
            _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = u"true"
        else:
            # check if type information from arguments is valid
            py_type = <PyType>_PYTYPE_DICT.get(_pytype)
            if py_type is not None:
                if py_type.type_check is not None:
                    py_type.type_check(strval)
                _attributes[PYTYPE_ATTRIBUTE] = _pytype

    return _makeElement(u"value", strval, _attributes, nsmap)


################################################################################
# ObjectPath

include "objectpath.pxi"
source-git / python-lxml

Source Code

Files