Blob Blame History Raw
cdef object _find_id_attributes

def XMLID(text, parser=None, *, base_url=None):
    u"""XMLID(text, parser=None, base_url=None)

    Parse the text and return a tuple (root node, ID dictionary).  The root
    node is the same as returned by the XML() function.  The dictionary
    contains string-element pairs.  The dictionary keys are the values of 'id'
    attributes.  The elements referenced by the ID are stored as dictionary
    values.
    """
    cdef dict dic
    global _find_id_attributes
    if _find_id_attributes is None:
        _find_id_attributes = XPath(u'//*[string(@id)]')

    # ElementTree compatible implementation: parse and look for 'id' attributes
    root = XML(text, parser, base_url=base_url)
    dic = {}
    for elem in _find_id_attributes(root):
        dic[elem.get(u'id')] = elem
    return (root, dic)

def XMLDTDID(text, parser=None, *, base_url=None):
    u"""XMLDTDID(text, parser=None, base_url=None)

    Parse the text and return a tuple (root node, ID dictionary).  The root
    node is the same as returned by the XML() function.  The dictionary
    contains string-element pairs.  The dictionary keys are the values of ID
    attributes as defined by the DTD.  The elements referenced by the ID are
    stored as dictionary values.

    Note that you must not modify the XML tree if you use the ID dictionary.
    The results are undefined.
    """
    cdef _Element root
    root = XML(text, parser, base_url=base_url)
    # xml:id spec compatible implementation: use DTD ID attributes from libxml2
    if root._doc._c_doc.ids is NULL:
        return (root, {})
    else:
        return (root, _IDDict(root))

def parseid(source, parser=None, *, base_url=None):
    u"""parseid(source, parser=None)

    Parses the source into a tuple containing an ElementTree object and an
    ID dictionary.  If no parser is provided as second argument, the default
    parser is used.

    Note that you must not modify the XML tree if you use the ID dictionary.
    The results are undefined.
    """
    cdef _Document doc
    doc = _parseDocument(source, parser, base_url)
    return (_elementTreeFactory(doc, None), _IDDict(doc))

cdef class _IDDict:
    u"""IDDict(self, etree)
    A dictionary-like proxy class that mapps ID attributes to elements.

    The dictionary must be instantiated with the root element of a parsed XML
    document, otherwise the behaviour is undefined.  Elements and XML trees
    that were created or modified 'by hand' are not supported.
    """
    cdef _Document _doc
    cdef object _keys
    cdef object _items
    def __cinit__(self, etree):
        cdef _Document doc
        doc = _documentOrRaise(etree)
        if doc._c_doc.ids is NULL:
            raise ValueError, u"No ID dictionary available."
        self._doc = doc
        self._keys  = None
        self._items = None

    def copy(self):
        return _IDDict(self._doc)

    def __getitem__(self, id_name):
        cdef tree.xmlHashTable* c_ids
        cdef tree.xmlID* c_id
        cdef xmlAttr* c_attr
        c_ids = self._doc._c_doc.ids
        id_utf = _utf8(id_name)
        c_id = <tree.xmlID*>tree.xmlHashLookup(c_ids, _xcstr(id_utf))
        if c_id is NULL:
            raise KeyError, u"key not found."
        c_attr = c_id.attr
        if c_attr is NULL or c_attr.parent is NULL:
            raise KeyError, u"ID attribute not found."
        return _elementFactory(self._doc, c_attr.parent)

    def get(self, id_name):
        return self[id_name]

    def __contains__(self, id_name):
        cdef tree.xmlID* c_id
        id_utf = _utf8(id_name)
        c_id = <tree.xmlID*>tree.xmlHashLookup(
            self._doc._c_doc.ids, _xcstr(id_utf))
        return c_id is not NULL

    def has_key(self, id_name):
        return id_name in self

    def __repr__(self):
        return repr(dict(self))

    def keys(self):
        if self._keys is None:
            self._keys = self._build_keys()
        return self._keys[:]

    def __iter__(self):
        if self._keys is None:
            self._keys = self._build_keys()
        return iter(self._keys)

    def iterkeys(self):
        return self

    def __len__(self):
        if self._keys is None:
            self._keys = self._build_keys()
        return len(self._keys)

    def items(self):
        if self._items is None:
            self._items = self._build_items()
        return self._items[:]

    def iteritems(self):
        if self._items is None:
            self._items = self._build_items()
        return iter(self._items)

    def values(self):
        cdef list values = []
        if self._items is None:
            self._items = self._build_items()
        for item in self._items:
            value = python.PyTuple_GET_ITEM(item, 1)
            python.Py_INCREF(value)
            values.append(value)
        return values

    def itervalues(self):
        return iter(self.values())

    cdef object _build_keys(self):
        keys = []
        tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
                         <tree.xmlHashScanner>_collectIdHashKeys, <python.PyObject*>keys)
        return keys

    cdef object _build_items(self):
        items = []
        context = (items, self._doc)
        tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
                         <tree.xmlHashScanner>_collectIdHashItemList, <python.PyObject*>context)
        return items

cdef void _collectIdHashItemList(void* payload, void* context, xmlChar* name):
    # collect elements from ID attribute hash table
    cdef list lst
    c_id = <tree.xmlID*>payload
    if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
        return
    lst, doc = <tuple>context
    element = _elementFactory(doc, c_id.attr.parent)
    lst.append( (funicode(name), element) )

cdef void _collectIdHashKeys(void* payload, void* collect_list, xmlChar* name):
    c_id = <tree.xmlID*>payload
    if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
        return
    (<list>collect_list).append(funicode(name))