Blob Blame History Raw
################################################################################
# ObjectPath

ctypedef struct _ObjectPath:
    const_xmlChar* href
    const_xmlChar* name
    Py_ssize_t index


cdef object _NO_DEFAULT = object()


cdef class ObjectPath:
    u"""ObjectPath(path)
    Immutable object that represents a compiled object path.

    Example for a path: 'root.child[1].{other}child[25]'
    """
    cdef readonly object find
    cdef list _path
    cdef object _path_str
    cdef _ObjectPath*  _c_path
    cdef Py_ssize_t _path_len
    def __init__(self, path):
        if python._isString(path):
            self._path = _parse_object_path_string(path)
            self._path_str = path
        else:
            self._path = _parse_object_path_list(path)
            self._path_str = u'.'.join(path)
        self._path_len = len(self._path)
        self._c_path = _build_object_path_segments(self._path)
        self.find = self.__call__

    def __dealloc__(self):
        if self._c_path is not NULL:
            python.lxml_free(self._c_path)

    def __str__(self):
        return self._path_str

    def __call__(self, _Element root not None, *_default):
        u"""Follow the attribute path in the object structure and return the
        target attribute value.

        If it it not found, either returns a default value (if one was passed
        as second argument) or raises AttributeError.
        """
        if _default:
            if len(_default) > 1:
                raise TypeError, u"invalid number of arguments: needs one or two"
            default = _default[0]
        else:
            default = _NO_DEFAULT
        return _find_object_path(root, self._c_path, self._path_len, default)

    def hasattr(self, _Element root not None):
        u"hasattr(self, root)"
        try:
            _find_object_path(root, self._c_path, self._path_len, _NO_DEFAULT)
        except AttributeError:
            return False
        return True

    def setattr(self, _Element root not None, value):
        u"""setattr(self, root, value)

        Set the value of the target element in a subtree.

        If any of the children on the path does not exist, it is created.
        """
        _create_object_path(root, self._c_path, self._path_len, 1, value)

    def addattr(self, _Element root not None, value):
        u"""addattr(self, root, value)

        Append a value to the target element in a subtree.

        If any of the children on the path does not exist, it is created.
        """
        _create_object_path(root, self._c_path, self._path_len, 0, value)


cdef object __MATCH_PATH_SEGMENT = re.compile(
    ur"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?",
    re.U).match

cdef tuple _RELATIVE_PATH_SEGMENT = (None, None, 0)


cdef list _parse_object_path_string(_path):
    u"""Parse object path string into a (ns, name, index) list.
    """
    cdef bint has_dot
    cdef unicode path
    new_path = []
    if isinstance(_path, bytes):
        path = (<bytes>_path).decode('ascii')
    elif type(_path) is not unicode:
        path = unicode(_path)
    else:
        path = _path
    path = path.strip()
    if path == u'.':
        return [_RELATIVE_PATH_SEGMENT]
    path_pos = 0
    while path:
        match = __MATCH_PATH_SEGMENT(path, path_pos)
        if match is None:
            break

        dot, ns, name, index = match.groups()
        index = int(index) if index else 0
        has_dot = dot == u'.'
        if not new_path:
            if has_dot:
                # path '.child' => ignore root
                new_path.append(_RELATIVE_PATH_SEGMENT)
            elif index:
                raise ValueError, u"index not allowed on root node"
        elif not has_dot:
            raise ValueError, u"invalid path"
        if ns is not None:
            ns = python.PyUnicode_AsUTF8String(ns)
        name = python.PyUnicode_AsUTF8String(name)
        new_path.append( (ns, name, index) )

        path_pos = match.end()
    if not new_path or len(path) > path_pos:
        raise ValueError, u"invalid path"
    return new_path


cdef list _parse_object_path_list(path):
    u"""Parse object path sequence into a (ns, name, index) list.
    """
    new_path = []
    for item in path:
        item = item.strip()
        if not new_path and item == u'':
            # path '.child' => ignore root
            ns = name = None
            index = 0
        else:
            ns, name = cetree.getNsTag(item)
            c_name = _xcstr(name)
            index_pos = tree.xmlStrchr(c_name, c'[')
            if index_pos is NULL:
                index = 0
            else:
                index_end = tree.xmlStrchr(index_pos + 1, c']')
                if index_end is NULL:
                    raise ValueError, u"index must be enclosed in []"
                index = int(index_pos[1:index_end - index_pos])
                if not new_path and index != 0:
                    raise ValueError, u"index not allowed on root node"
                name = <bytes>c_name[:index_pos - c_name]
        new_path.append( (ns, name, index) )
    if not new_path:
        raise ValueError, u"invalid path"
    return new_path


cdef _ObjectPath* _build_object_path_segments(list path_list) except NULL:
    cdef _ObjectPath* c_path
    cdef _ObjectPath* c_path_segments
    c_path_segments = <_ObjectPath*>python.lxml_malloc(len(path_list), sizeof(_ObjectPath))
    if c_path_segments is NULL:
        raise MemoryError()
    c_path = c_path_segments
    for href, name, index in path_list:
        c_path[0].href = _xcstr(href) if href is not None else NULL
        c_path[0].name = _xcstr(name) if name is not None else NULL
        c_path[0].index = index
        c_path += 1
    return c_path_segments


cdef _find_object_path(_Element root, _ObjectPath* c_path, Py_ssize_t c_path_len, default_value):
    u"""Follow the path to find the target element.
    """
    cdef tree.xmlNode* c_node
    cdef Py_ssize_t c_index
    c_node = root._c_node
    c_name = c_path[0].name
    c_href = c_path[0].href
    if c_href is NULL or c_href[0] == c'\0':
        c_href = tree._getNs(c_node)
    if not cetree.tagMatches(c_node, c_href, c_name):
        if default_value is not _NO_DEFAULT:
            return default_value
        else:
            raise ValueError(
                f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}")

    while c_node is not NULL:
        c_path_len -= 1
        if c_path_len <= 0:
            break

        c_path += 1
        if c_path[0].href is not NULL:
            c_href = c_path[0].href # otherwise: keep parent namespace
        c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
        if c_name is NULL:
            c_name = c_path[0].name
            c_node = NULL
            break
        c_index = c_path[0].index
        c_node = c_node.last if c_index < 0 else c_node.children
        c_node = _findFollowingSibling(c_node, c_href, c_name, c_index)

    if c_node is not NULL:
        return cetree.elementFactory(root._doc, c_node)
    elif default_value is not _NO_DEFAULT:
        return default_value
    else:
        tag = cetree.namespacedNameFromNsName(c_href, c_name)
        raise AttributeError, f"no such child: {tag}"


cdef _create_object_path(_Element root, _ObjectPath* c_path,
                         Py_ssize_t c_path_len, int replace, value):
    u"""Follow the path to find the target element, build the missing children
    as needed and set the target element to 'value'.  If replace is true, an
    existing value is replaced, otherwise the new value is added.
    """
    cdef _Element child
    cdef tree.xmlNode* c_node
    cdef tree.xmlNode* c_child
    cdef Py_ssize_t c_index
    if c_path_len == 1:
        raise TypeError, u"cannot update root node"

    c_node = root._c_node
    c_name = c_path[0].name
    c_href = c_path[0].href
    if c_href is NULL or c_href[0] == c'\0':
        c_href = tree._getNs(c_node)
    if not cetree.tagMatches(c_node, c_href, c_name):
        raise ValueError(
            f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}")

    while c_path_len > 1:
        c_path_len -= 1
        c_path += 1
        if c_path[0].href is not NULL:
            c_href = c_path[0].href # otherwise: keep parent namespace
        c_index = c_path[0].index
        c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
        if c_name is NULL:
            c_name = c_path[0].name
            c_child = NULL
        else:
            c_child = c_node.last if c_index < 0 else c_node.children
            c_child = _findFollowingSibling(c_child, c_href, c_name, c_index)

        if c_child is not NULL:
            c_node = c_child
        elif c_index != 0:
            raise TypeError, u"creating indexed path attributes is not supported"
        elif c_path_len == 1:
            _appendValue(cetree.elementFactory(root._doc, c_node),
                         cetree.namespacedNameFromNsName(c_href, c_name),
                         value)
            return
        else:
            child = cetree.makeSubElement(
                cetree.elementFactory(root._doc, c_node),
                cetree.namespacedNameFromNsName(c_href, c_name),
                None, None, None, None)
            c_node = child._c_node

    # if we get here, the entire path was already there
    if replace:
        element = cetree.elementFactory(root._doc, c_node)
        _replaceElement(element, value)
    else:
        _appendValue(cetree.elementFactory(root._doc, c_node.parent),
                     cetree.namespacedName(c_node), value)


cdef list _build_descendant_paths(tree.xmlNode* c_node, prefix_string):
    u"""Returns a list of all descendant paths.
    """
    cdef list path, path_list
    tag = cetree.namespacedName(c_node)
    if prefix_string:
        if prefix_string[-1] != u'.':
            prefix_string += u'.'
        prefix_string = prefix_string + tag
    else:
        prefix_string = tag
    path = [prefix_string]
    path_list = []
    _recursive_build_descendant_paths(c_node, path, path_list)
    return path_list


cdef int _recursive_build_descendant_paths(tree.xmlNode* c_node,
                                           list path, list path_list) except -1:
    u"""Fills the list 'path_list' with all descendant paths, initial prefix
    being in the list 'path'.
    """
    cdef tree.xmlNode* c_child
    tags = {}
    path_list.append(u'.'.join(path))
    c_href = tree._getNs(c_node)
    c_child = c_node.children
    while c_child is not NULL:
        while c_child.type != tree.XML_ELEMENT_NODE:
            c_child = c_child.next
            if c_child is NULL:
                return 0
        if c_href is tree._getNs(c_child):
            tag = pyunicode(c_child.name)
        elif c_href is not NULL and tree._getNs(c_child) is NULL:
            # special case: parent has namespace, child does not
            tag = u'{}' + pyunicode(c_child.name)
        else:
            tag = cetree.namespacedName(c_child)
        count = tags.get(tag)
        if count is None:
            tags[tag] = 1
        else:
            tags[tag] = count + 1
            tag += f'[{count}]'
        path.append(tag)
        _recursive_build_descendant_paths(c_child, path, path_list)
        del path[-1]
        c_child = c_child.next
    return 0