Blame src/lxml/docloader.pxi

Packit Service b74dd5
# Custom resolver API
Packit Service b74dd5
Packit Service b74dd5
ctypedef enum _InputDocumentDataType:
Packit Service b74dd5
    PARSER_DATA_INVALID
Packit Service b74dd5
    PARSER_DATA_EMPTY
Packit Service b74dd5
    PARSER_DATA_STRING
Packit Service b74dd5
    PARSER_DATA_FILENAME
Packit Service b74dd5
    PARSER_DATA_FILE
Packit Service b74dd5
Packit Service b74dd5
@cython.final
Packit Service b74dd5
@cython.internal
Packit Service b74dd5
cdef class _InputDocument:
Packit Service b74dd5
    cdef _InputDocumentDataType _type
Packit Service b74dd5
    cdef bytes _data_bytes
Packit Service b74dd5
    cdef object _filename
Packit Service b74dd5
    cdef object _file
Packit Service b74dd5
    cdef bint _close_file
Packit Service b74dd5
Packit Service b74dd5
    def __cinit__(self):
Packit Service b74dd5
        self._type = PARSER_DATA_INVALID
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
cdef class Resolver:
Packit Service b74dd5
    u"This is the base class of all resolvers."
Packit Service b74dd5
    def resolve(self, system_url, public_id, context):
Packit Service b74dd5
        u"""resolve(self, system_url, public_id, context)
Packit Service b74dd5
Packit Service b74dd5
        Override this method to resolve an external source by
Packit Service b74dd5
        ``system_url`` and ``public_id``.  The third argument is an
Packit Service b74dd5
        opaque context object.
Packit Service b74dd5
Packit Service b74dd5
        Return the result of one of the ``resolve_*()`` methods.
Packit Service b74dd5
        """
Packit Service b74dd5
        return None
Packit Service b74dd5
Packit Service b74dd5
    def resolve_empty(self, context):
Packit Service b74dd5
        u"""resolve_empty(self, context)
Packit Service b74dd5
Packit Service b74dd5
        Return an empty input document.
Packit Service b74dd5
Packit Service b74dd5
        Pass context as parameter.
Packit Service b74dd5
        """
Packit Service b74dd5
        cdef _InputDocument doc_ref
Packit Service b74dd5
        doc_ref = _InputDocument()
Packit Service b74dd5
        doc_ref._type = PARSER_DATA_EMPTY
Packit Service b74dd5
        return doc_ref
Packit Service b74dd5
Packit Service b74dd5
    def resolve_string(self, string, context, *, base_url=None):
Packit Service b74dd5
        u"""resolve_string(self, string, context, base_url=None)
Packit Service b74dd5
Packit Service b74dd5
        Return a parsable string as input document.
Packit Service b74dd5
Packit Service b74dd5
        Pass data string and context as parameters.  You can pass the
Packit Service b74dd5
        source URL or filename through the ``base_url`` keyword
Packit Service b74dd5
        argument.
Packit Service b74dd5
        """
Packit Service b74dd5
        cdef _InputDocument doc_ref
Packit Service b74dd5
        if isinstance(string, unicode):
Packit Service b74dd5
            string = (<unicode>string).encode('utf8')
Packit Service b74dd5
        elif not isinstance(string, bytes):
Packit Service b74dd5
            raise TypeError, "argument must be a byte string or unicode string"
Packit Service b74dd5
        doc_ref = _InputDocument()
Packit Service b74dd5
        doc_ref._type = PARSER_DATA_STRING
Packit Service b74dd5
        doc_ref._data_bytes = string
Packit Service b74dd5
        if base_url is not None:
Packit Service b74dd5
            doc_ref._filename = _encodeFilename(base_url)
Packit Service b74dd5
        return doc_ref
Packit Service b74dd5
Packit Service b74dd5
    def resolve_filename(self, filename, context):
Packit Service b74dd5
        u"""resolve_filename(self, filename, context)
Packit Service b74dd5
Packit Service b74dd5
        Return the name of a parsable file as input document.
Packit Service b74dd5
Packit Service b74dd5
        Pass filename and context as parameters.  You can also pass a
Packit Service b74dd5
        URL with an HTTP, FTP or file target.
Packit Service b74dd5
        """
Packit Service b74dd5
        cdef _InputDocument doc_ref
Packit Service b74dd5
        doc_ref = _InputDocument()
Packit Service b74dd5
        doc_ref._type = PARSER_DATA_FILENAME
Packit Service b74dd5
        doc_ref._filename = _encodeFilename(filename)
Packit Service b74dd5
        return doc_ref
Packit Service b74dd5
Packit Service b74dd5
    def resolve_file(self, f, context, *, base_url=None, bint close=True):
Packit Service b74dd5
        u"""resolve_file(self, f, context, base_url=None, close=True)
Packit Service b74dd5
Packit Service b74dd5
        Return an open file-like object as input document.
Packit Service b74dd5
Packit Service b74dd5
        Pass open file and context as parameters.  You can pass the
Packit Service b74dd5
        base URL or filename of the file through the ``base_url``
Packit Service b74dd5
        keyword argument.  If the ``close`` flag is True (the
Packit Service b74dd5
        default), the file will be closed after reading.
Packit Service b74dd5
Packit Service b74dd5
        Note that using ``.resolve_filename()`` is more efficient,
Packit Service b74dd5
        especially in threaded environments.
Packit Service b74dd5
        """
Packit Service b74dd5
        cdef _InputDocument doc_ref
Packit Service b74dd5
        try:
Packit Service b74dd5
            f.read
Packit Service b74dd5
        except AttributeError:
Packit Service b74dd5
            raise TypeError, u"Argument is not a file-like object"
Packit Service b74dd5
        doc_ref = _InputDocument()
Packit Service b74dd5
        doc_ref._type = PARSER_DATA_FILE
Packit Service b74dd5
        if base_url is not None:
Packit Service b74dd5
            doc_ref._filename = _encodeFilename(base_url)
Packit Service b74dd5
        else:
Packit Service b74dd5
            doc_ref._filename = _getFilenameForFile(f)
Packit Service b74dd5
        doc_ref._close_file = close
Packit Service b74dd5
        doc_ref._file = f
Packit Service b74dd5
        return doc_ref
Packit Service b74dd5
Packit Service b74dd5
@cython.final
Packit Service b74dd5
@cython.internal
Packit Service b74dd5
cdef class _ResolverRegistry:
Packit Service b74dd5
    cdef object _resolvers
Packit Service b74dd5
    cdef Resolver _default_resolver
Packit Service b74dd5
    def __cinit__(self, Resolver default_resolver=None):
Packit Service b74dd5
        self._resolvers = set()
Packit Service b74dd5
        self._default_resolver = default_resolver
Packit Service b74dd5
Packit Service b74dd5
    def add(self, Resolver resolver not None):
Packit Service b74dd5
        u"""add(self, resolver)
Packit Service b74dd5
Packit Service b74dd5
        Register a resolver.
Packit Service b74dd5
Packit Service b74dd5
        For each requested entity, the 'resolve' method of the resolver will
Packit Service b74dd5
        be called and the result will be passed to the parser.  If this method
Packit Service b74dd5
        returns None, the request will be delegated to other resolvers or the
Packit Service b74dd5
        default resolver.  The resolvers will be tested in an arbitrary order
Packit Service b74dd5
        until the first match is found.
Packit Service b74dd5
        """
Packit Service b74dd5
        self._resolvers.add(resolver)
Packit Service b74dd5
Packit Service b74dd5
    def remove(self, resolver):
Packit Service b74dd5
        u"remove(self, resolver)"
Packit Service b74dd5
        self._resolvers.discard(resolver)
Packit Service b74dd5
Packit Service b74dd5
    cdef _ResolverRegistry _copy(self):
Packit Service b74dd5
        cdef _ResolverRegistry registry
Packit Service b74dd5
        registry = _ResolverRegistry(self._default_resolver)
Packit Service b74dd5
        registry._resolvers = self._resolvers.copy()
Packit Service b74dd5
        return registry
Packit Service b74dd5
Packit Service b74dd5
    def copy(self):
Packit Service b74dd5
        u"copy(self)"
Packit Service b74dd5
        return self._copy()
Packit Service b74dd5
Packit Service b74dd5
    def resolve(self, system_url, public_id, context):
Packit Service b74dd5
        u"resolve(self, system_url, public_id, context)"
Packit Service b74dd5
        for resolver in self._resolvers:
Packit Service b74dd5
            result = resolver.resolve(system_url, public_id, context)
Packit Service b74dd5
            if result is not None:
Packit Service b74dd5
                return result
Packit Service b74dd5
        if self._default_resolver is None:
Packit Service b74dd5
            return None
Packit Service b74dd5
        return self._default_resolver.resolve(system_url, public_id, context)
Packit Service b74dd5
Packit Service b74dd5
    def __repr__(self):
Packit Service b74dd5
        return repr(self._resolvers)
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
@cython.internal
Packit Service b74dd5
cdef class _ResolverContext(_ExceptionContext):
Packit Service b74dd5
    cdef _ResolverRegistry _resolvers
Packit Service b74dd5
    cdef _TempStore _storage
Packit Service b74dd5
Packit Service b74dd5
    cdef int clear(self) except -1:
Packit Service b74dd5
        _ExceptionContext.clear(self)
Packit Service b74dd5
        self._storage.clear()
Packit Service b74dd5
        return 0
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
cdef _initResolverContext(_ResolverContext context,
Packit Service b74dd5
                          _ResolverRegistry resolvers):
Packit Service b74dd5
    if resolvers is None:
Packit Service b74dd5
        context._resolvers = _ResolverRegistry()
Packit Service b74dd5
    else:
Packit Service b74dd5
        context._resolvers = resolvers
Packit Service b74dd5
    context._storage = _TempStore()