# XSLT from lxml.includes cimport xslt cdef class XSLTError(LxmlError): """Base class of all XSLT errors. """ cdef class XSLTParseError(XSLTError): """Error parsing a stylesheet document. """ cdef class XSLTApplyError(XSLTError): """Error running an XSL transformation. """ class XSLTSaveError(XSLTError, SerialisationError): """Error serialising an XSLT result. """ cdef class XSLTExtensionError(XSLTError): """Error registering an XSLT extension. """ # version information LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION) LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion) ################################################################################ # Where do we store what? # # xsltStylesheet->doc->_private # == _XSLTResolverContext for XSL stylesheet # # xsltTransformContext->_private # == _XSLTResolverContext for transformed document # ################################################################################ ################################################################################ # XSLT document loaders @cython.final @cython.internal cdef class _XSLTResolverContext(_ResolverContext): cdef xmlDoc* _c_style_doc cdef _BaseParser _parser cdef _XSLTResolverContext _copy(self): cdef _XSLTResolverContext context context = _XSLTResolverContext() _initXSLTResolverContext(context, self._parser) context._c_style_doc = self._c_style_doc return context cdef _initXSLTResolverContext(_XSLTResolverContext context, _BaseParser parser): _initResolverContext(context, parser.resolvers) context._parser = parser context._c_style_doc = NULL cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context, int parse_options, int* error) with gil: # call the Python document loaders cdef _XSLTResolverContext context cdef _ResolverRegistry resolvers cdef _InputDocument doc_ref cdef xmlDoc* c_doc cdef xmlDoc* c_return_doc = NULL error[0] = 0 context = <_XSLTResolverContext>c_context # shortcut if we resolve the stylesheet itself c_doc = context._c_style_doc try: if c_doc is not NULL and c_doc.URL is not NULL: if tree.xmlStrcmp(c_uri, c_doc.URL) == 0: c_return_doc = _copyDoc(c_doc, 1) return c_return_doc # 'goto', see 'finally' below # delegate to the Python resolvers resolvers = context._resolvers if tree.xmlStrncmp('string://__STRING__XSLT__/', c_uri, 26) == 0: c_uri += 26 uri = _decodeFilename(c_uri) doc_ref = resolvers.resolve(uri, None, context) if doc_ref is not None: if doc_ref._type == PARSER_DATA_STRING: c_return_doc = _parseDoc( doc_ref._data_bytes, doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_FILENAME: c_return_doc = _parseDocFromFile( doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_FILE: c_return_doc = _parseDocFromFilelike( doc_ref._file, doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_EMPTY: c_return_doc = _newXMLDoc() if c_return_doc is not NULL and c_return_doc.URL is NULL: c_return_doc.URL = tree.xmlStrdup(c_uri) except: error[0] = 1 context._store_raised() finally: return c_return_doc # and swallow any further exceptions cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context, xslt.xsltLoadType c_type) with gil: try: message = f"Cannot resolve URI {_decodeFilename(c_uri)}" if c_type == xslt.XSLT_LOAD_DOCUMENT: exception = XSLTApplyError(message) else: exception = XSLTParseError(message) (<_XSLTResolverContext>context)._store_exception(exception) except BaseException as e: (<_XSLTResolverContext>context)._store_exception(e) finally: return # and swallow any further exceptions cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict, int parse_options, void* c_ctxt, xslt.xsltLoadType c_type) nogil: # nogil => no Python objects here, may be called without thread context ! cdef xmlDoc* c_doc cdef xmlDoc* result cdef void* c_pcontext cdef int error = 0 # find resolver contexts of stylesheet and transformed doc if c_type == xslt.XSLT_LOAD_DOCUMENT: # transformation time c_pcontext = (c_ctxt)._private elif c_type == xslt.XSLT_LOAD_STYLESHEET: # include/import resolution while parsing c_pcontext = (c_ctxt).doc._private else: c_pcontext = NULL if c_pcontext is NULL: # can't call Python without context, fall back to default loader return XSLT_DOC_DEFAULT_LOADER( c_uri, c_dict, parse_options, c_ctxt, c_type) c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error) if c_doc is NULL and not error: c_doc = XSLT_DOC_DEFAULT_LOADER( c_uri, c_dict, parse_options, c_ctxt, c_type) if c_doc is NULL: _xslt_store_resolver_exception(c_uri, c_pcontext, c_type) if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET: c_doc._private = c_pcontext return c_doc cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader xslt.xsltSetLoaderFunc(_xslt_doc_loader) ################################################################################ # XSLT file/network access control cdef class XSLTAccessControl: u"""XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True) Access control for XSLT: reading/writing files, directories and network I/O. Access to a type of resource is granted or denied by passing any of the following boolean keyword arguments. All of them default to True to allow access. - read_file - write_file - create_dir - read_network - write_network For convenience, there is also a class member `DENY_ALL` that provides an XSLTAccessControl instance that is readily configured to deny everything, and a `DENY_WRITE` member that denies all write access but allows read access. See `XSLT`. """ cdef xslt.xsltSecurityPrefs* _prefs def __cinit__(self): self._prefs = xslt.xsltNewSecurityPrefs() if self._prefs is NULL: raise MemoryError() def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True, bint read_network=True, bint write_network=True): self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file) self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file) self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir) self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network) self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network) DENY_ALL = XSLTAccessControl( read_file=False, write_file=False, create_dir=False, read_network=False, write_network=False) DENY_WRITE = XSLTAccessControl( read_file=True, write_file=False, create_dir=False, read_network=True, write_network=False) def __dealloc__(self): if self._prefs is not NULL: xslt.xsltFreeSecurityPrefs(self._prefs) @cython.final cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow): cdef xslt.xsltSecurityCheck function if allow: function = xslt.xsltSecurityAllow else: function = xslt.xsltSecurityForbid xslt.xsltSetSecurityPrefs(self._prefs, option, function) @cython.final cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt): xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt) property options: u"The access control configuration as a map of options." def __get__(self): return { u'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE), u'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE), u'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY), u'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK), u'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK), } @cython.final cdef _optval(self, xslt.xsltSecurityOption option): cdef xslt.xsltSecurityCheck function function = xslt.xsltGetSecurityPrefs(self._prefs, option) if function is xslt.xsltSecurityAllow: return True elif function is xslt.xsltSecurityForbid: return False else: return None def __repr__(self): items = sorted(self.options.items()) return u"%s(%s)" % ( python._fqtypename(self).decode('UTF-8').split(u'.')[-1], u', '.join([u"%s=%r" % item for item in items])) ################################################################################ # XSLT cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf): if ns_utf is None: return 0 # libxml2 internalises the strings if ctxt has a dict return xslt.xsltRegisterExtFunction( ctxt, _xcstr(name_utf), _xcstr(ns_utf), _xpath_function_call) cdef dict EMPTY_DICT = {} @cython.final @cython.internal cdef class _XSLTContext(_BaseContext): cdef xslt.xsltTransformContext* _xsltCtxt cdef _ReadOnlyElementProxy _extension_element_proxy cdef dict _extension_elements def __cinit__(self): self._xsltCtxt = NULL self._extension_elements = EMPTY_DICT def __init__(self, namespaces, extensions, error_log, enable_regexp, build_smart_strings): if extensions is not None and extensions: for ns_name_tuple, extension in extensions.items(): if ns_name_tuple[0] is None: raise XSLTExtensionError, \ u"extensions must not have empty namespaces" if isinstance(extension, XSLTExtension): if self._extension_elements is EMPTY_DICT: self._extension_elements = {} extensions = extensions.copy() ns_utf = _utf8(ns_name_tuple[0]) name_utf = _utf8(ns_name_tuple[1]) self._extension_elements[(ns_utf, name_utf)] = extension del extensions[ns_name_tuple] _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp, build_smart_strings) cdef _BaseContext _copy(self): cdef _XSLTContext context context = <_XSLTContext>_BaseContext._copy(self) context._extension_elements = self._extension_elements return context cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt self._set_xpath_context(xsltCtxt.xpathCtxt) self._register_context(doc) self.registerLocalFunctions(xsltCtxt, _register_xslt_function) self.registerGlobalFunctions(xsltCtxt, _register_xslt_function) _registerXSLTExtensions(xsltCtxt, self._extension_elements) cdef free_context(self): self._cleanup_context() self._release_context() if self._xsltCtxt is not NULL: xslt.xsltFreeTransformContext(self._xsltCtxt) self._xsltCtxt = NULL self._release_temp_refs() @cython.final @cython.internal @cython.freelist(8) cdef class _XSLTQuotedStringParam: u"""A wrapper class for literal XSLT string parameters that require quote escaping. """ cdef bytes strval def __cinit__(self, strval): self.strval = _utf8(strval) @cython.no_gc_clear cdef class XSLT: u"""XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None) Turn an XSL document into an XSLT object. Calling this object on a tree or Element will execute the XSLT:: transform = etree.XSLT(xsl_tree) result = transform(xml_tree) Keyword arguments of the constructor: - extensions: a dict mapping ``(namespace, name)`` pairs to extension functions or extension elements - regexp: enable exslt regular expression support in XPath (default: True) - access_control: access restrictions for network or file system (see `XSLTAccessControl`) Keyword arguments of the XSLT call: - profile_run: enable XSLT profiling (default: False) Other keyword arguments of the call are passed to the stylesheet as parameters. """ cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style cdef _XSLTResolverContext _xslt_resolver_context cdef XSLTAccessControl _access_control cdef _ErrorLog _error_log def __cinit__(self): self._c_style = NULL def __init__(self, xslt_input, *, extensions=None, regexp=True, access_control=None): cdef xslt.xsltStylesheet* c_style = NULL cdef xmlDoc* c_doc cdef _Document doc cdef _Element root_node doc = _documentOrRaise(xslt_input) root_node = _rootNodeOrRaise(xslt_input) # set access control or raise TypeError self._access_control = access_control # make a copy of the document as stylesheet parsing modifies it c_doc = _copyDocRoot(doc._c_doc, root_node._c_node) # make sure we always have a stylesheet URL if c_doc.URL is NULL: doc_url_utf = python.PyUnicode_AsASCIIString( f"string://__STRING__XSLT__/{id(self)}.xslt") c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf)) self._error_log = _ErrorLog() self._xslt_resolver_context = _XSLTResolverContext() _initXSLTResolverContext(self._xslt_resolver_context, doc._parser) # keep a copy in case we need to access the stylesheet via 'document()' self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1) c_doc._private = self._xslt_resolver_context with self._error_log: c_style = xslt.xsltParseStylesheetDoc(c_doc) if c_style is NULL or c_style.errors: tree.xmlFreeDoc(c_doc) if c_style is not NULL: xslt.xsltFreeStylesheet(c_style) self._xslt_resolver_context._raise_if_stored() # last error seems to be the most accurate here if self._error_log.last_error is not None and \ self._error_log.last_error.message: raise XSLTParseError(self._error_log.last_error.message, self._error_log) else: raise XSLTParseError( self._error_log._buildExceptionMessage( u"Cannot parse stylesheet"), self._error_log) c_doc._private = NULL # no longer used! self._c_style = c_style self._context = _XSLTContext(None, extensions, self._error_log, regexp, True) def __dealloc__(self): if self._xslt_resolver_context is not None and \ self._xslt_resolver_context._c_style_doc is not NULL: tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc) # this cleans up the doc copy as well if self._c_style is not NULL: xslt.xsltFreeStylesheet(self._c_style) property error_log: u"The log of errors and warnings of an XSLT execution." def __get__(self): return self._error_log.copy() @staticmethod def strparam(strval): u"""strparam(strval) Mark an XSLT string parameter that requires quote escaping before passing it into the transformation. Use it like this:: result = transform(doc, some_strval = XSLT.strparam( '''it's \"Monty Python's\" ...''')) Escaped string parameters can be reused without restriction. """ return _XSLTQuotedStringParam(strval) @staticmethod def set_global_max_depth(int max_depth): u"""set_global_max_depth(max_depth) The maximum traversal depth that the stylesheet engine will allow. This does not only count the template recursion depth but also takes the number of variables/parameters into account. The required setting for a run depends on both the stylesheet and the input data. Example:: XSLT.set_global_max_depth(5000) Note that this is currently a global, module-wide setting because libxslt does not support it at a per-stylesheet level. """ if max_depth < 0: raise ValueError("cannot set a maximum stylesheet traversal depth < 0") xslt.xsltMaxDepth = max_depth def apply(self, _input, *, profile_run=False, **kw): u"""apply(self, _input, profile_run=False, **kw) :deprecated: call the object, not this method.""" return self(_input, profile_run=profile_run, **kw) def tostring(self, _ElementTree result_tree): u"""tostring(self, result_tree) Save result doc to string based on stylesheet output method. :deprecated: use str(result_tree) instead. """ return str(result_tree) def __deepcopy__(self, memo): return self.__copy__() def __copy__(self): return _copyXSLT(self) def __call__(self, _input, *, profile_run=False, **kw): u"""__call__(self, _input, profile_run=False, **kw) Execute the XSL transformation on a tree or Element. Pass the ``profile_run`` option to get profile information about the XSLT. The result of the XSLT will have a property xslt_profile that holds an XML tree with profiling data. """ cdef _XSLTContext context = None cdef _XSLTResolverContext resolver_context cdef _Document input_doc cdef _Element root_node cdef _Document result_doc cdef _Document profile_doc = None cdef xmlDoc* c_profile_doc cdef xslt.xsltTransformContext* transform_ctxt cdef xmlDoc* c_result = NULL cdef xmlDoc* c_doc cdef tree.xmlDict* c_dict cdef const_char** params = NULL assert self._c_style is not NULL, "XSLT stylesheet not initialised" input_doc = _documentOrRaise(_input) root_node = _rootNodeOrRaise(_input) c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) if transform_ctxt is NULL: _destroyFakeDoc(input_doc._c_doc, c_doc) raise MemoryError() # using the stylesheet dict is safer than using a possibly # unrelated dict from the current thread. Almost all # non-input tag/attr names will come from the stylesheet # anyway. if transform_ctxt.dict is not NULL: xmlparser.xmlDictFree(transform_ctxt.dict) if kw: # parameter values are stored in the dict # => avoid unnecessarily cluttering the global dict transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict) if transform_ctxt.dict is NULL: xslt.xsltFreeTransformContext(transform_ctxt) raise MemoryError() else: transform_ctxt.dict = self._c_style.doc.dict xmlparser.xmlDictReference(transform_ctxt.dict) xslt.xsltSetCtxtParseOptions( transform_ctxt, input_doc._parser._parse_options) if profile_run: transform_ctxt.profile = 1 try: context = self._context._copy() context.register_context(transform_ctxt, input_doc) resolver_context = self._xslt_resolver_context._copy() transform_ctxt._private = resolver_context _convert_xslt_parameters(transform_ctxt, kw, ¶ms) c_result = self._run_transform( c_doc, params, context, transform_ctxt) if params is not NULL: # deallocate space for parameters python.lxml_free(params) if transform_ctxt.state != xslt.XSLT_STATE_OK: if c_result is not NULL: tree.xmlFreeDoc(c_result) c_result = NULL if transform_ctxt.profile: c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt) if c_profile_doc is not NULL: profile_doc = _documentFactory( c_profile_doc, input_doc._parser) finally: if context is not None: context.free_context() _destroyFakeDoc(input_doc._c_doc, c_doc) try: if resolver_context is not None and resolver_context._has_raised(): if c_result is not NULL: tree.xmlFreeDoc(c_result) c_result = NULL resolver_context._raise_if_stored() if context._exc._has_raised(): if c_result is not NULL: tree.xmlFreeDoc(c_result) c_result = NULL context._exc._raise_if_stored() if c_result is NULL: # last error seems to be the most accurate here error = self._error_log.last_error if error is not None and error.message: if error.line > 0: message = f"{error.message}, line {error.line}" else: message = error.message elif error is not None and error.line > 0: message = f"Error applying stylesheet, line {error.line}" else: message = u"Error applying stylesheet" raise XSLTApplyError(message, self._error_log) finally: if resolver_context is not None: resolver_context.clear() result_doc = _documentFactory(c_result, input_doc._parser) c_dict = c_result.dict xmlparser.xmlDictReference(c_dict) __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict) if c_dict is not c_result.dict or \ self._c_style.doc.dict is not c_result.dict or \ input_doc._c_doc.dict is not c_result.dict: with nogil: if c_dict is not c_result.dict: fixThreadDictNames(c_result, c_dict, c_result.dict) if self._c_style.doc.dict is not c_result.dict: fixThreadDictNames(c_result, self._c_style.doc.dict, c_result.dict) if input_doc._c_doc.dict is not c_result.dict: fixThreadDictNames(c_result, input_doc._c_doc.dict, c_result.dict) xmlparser.xmlDictFree(c_dict) return _xsltResultTreeFactory(result_doc, self, profile_doc) cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc, const_char** params, _XSLTContext context, xslt.xsltTransformContext* transform_ctxt): cdef xmlDoc* c_result xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, _receiveXSLTError) if self._access_control is not None: self._access_control._register_in_context(transform_ctxt) with self._error_log, nogil: c_result = xslt.xsltApplyStylesheetUser( self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt) return c_result cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt, dict parameters, const_char*** params_ptr): cdef Py_ssize_t i, parameter_count cdef const_char** params cdef tree.xmlDict* c_dict = transform_ctxt.dict params_ptr[0] = NULL parameter_count = len(parameters) if parameter_count == 0: return # allocate space for parameters # * 2 as we want an entry for both key and value, # and + 1 as array is NULL terminated params = python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*)) if not params: raise MemoryError() try: i = 0 for key, value in parameters.iteritems(): k = _utf8(key) if isinstance(value, _XSLTQuotedStringParam): v = (<_XSLTQuotedStringParam>value).strval xslt.xsltQuoteOneUserParam( transform_ctxt, _xcstr(k), _xcstr(v)) else: if isinstance(value, XPath): v = (value)._path else: v = _utf8(value) params[i] = tree.xmlDictLookup(c_dict, _xcstr(k), len(k)) i += 1 params[i] = tree.xmlDictLookup(c_dict, _xcstr(v), len(v)) i += 1 except: python.lxml_free(params) raise params[i] = NULL params_ptr[0] = params cdef XSLT _copyXSLT(XSLT stylesheet): cdef XSLT new_xslt cdef xmlDoc* c_doc assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised" new_xslt = XSLT.__new__(XSLT) new_xslt._access_control = stylesheet._access_control new_xslt._error_log = _ErrorLog() new_xslt._context = stylesheet._context._copy() new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy() new_xslt._xslt_resolver_context._c_style_doc = _copyDoc( stylesheet._xslt_resolver_context._c_style_doc, 1) c_doc = _copyDoc(stylesheet._c_style.doc, 1) new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc) if new_xslt._c_style is NULL: tree.xmlFreeDoc(c_doc) raise MemoryError() return new_xslt @cython.final cdef class _XSLTResultTree(_ElementTree): """The result of an XSLT evaluation. Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string, and the ``.write_output()`` method to write serialise to a file. """ cdef XSLT _xslt cdef _Document _profile cdef xmlChar* _buffer cdef Py_ssize_t _buffer_len cdef Py_ssize_t _buffer_refcnt def write_output(self, file, *, compression=0): """write_output(self, file, *, compression=0) Serialise the XSLT output to a file or file-like object. As opposed to the generic ``.write()`` method, ``.write_output()`` serialises the result as defined by the ```` tag. """ cdef _FilelikeWriter writer = None cdef _Document doc cdef int r, c_compression cdef const_xmlChar* c_encoding = NULL cdef tree.xmlOutputBuffer* c_buffer if self._context_node is not None: doc = self._context_node._doc else: doc = None if doc is None: doc = self._doc if doc is None: raise XSLTSaveError("No document to serialise") c_compression = compression or 0 if _isString(file): file_path = _encodeFilename(file) c_filename = _cstr(file_path) with nogil: r = xslt.xsltSaveResultToFilename( c_filename, doc._c_doc, self._xslt._c_style, c_compression) else: xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style) writer = _create_output_buffer(file, c_encoding, compression, &c_buffer, close=False) if writer is None: with nogil: r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style) else: r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style) if writer is not None: writer._exc_context._raise_if_stored() if r == -1: python.PyErr_SetFromErrno(XSLTSaveError) # raises cdef _saveToStringAndSize(self, xmlChar** s, int* l): cdef _Document doc cdef int r if self._context_node is not None: doc = self._context_node._doc else: doc = None if doc is None: doc = self._doc if doc is None: s[0] = NULL return with nogil: r = xslt.xsltSaveResultToString(s, l, doc._c_doc, self._xslt._c_style) if r == -1: raise MemoryError() def __str__(self): cdef xmlChar* s = NULL cdef int l = 0 if not python.IS_PYTHON2: return self.__unicode__() self._saveToStringAndSize(&s, &l) if s is NULL: return '' # we must not use 'funicode()' here as this is not always UTF-8 try: result = s[:l] finally: tree.xmlFree(s) return result def __unicode__(self): cdef xmlChar* encoding cdef xmlChar* s = NULL cdef int l = 0 self._saveToStringAndSize(&s, &l) if s is NULL: return u'' encoding = self._xslt._c_style.encoding try: if encoding is NULL: result = s[:l].decode('UTF-8') else: result = s[:l].decode(encoding) finally: tree.xmlFree(s) return _stripEncodingDeclaration(result) def __getbuffer__(self, Py_buffer* buffer, int flags): cdef int l = 0 if buffer is NULL: return if self._buffer is NULL or flags & python.PyBUF_WRITABLE: self._saveToStringAndSize(&buffer.buf, &l) buffer.len = l if self._buffer is NULL and not flags & python.PyBUF_WRITABLE: self._buffer = buffer.buf self._buffer_len = l self._buffer_refcnt = 1 else: buffer.buf = self._buffer buffer.len = self._buffer_len self._buffer_refcnt += 1 if flags & python.PyBUF_WRITABLE: buffer.readonly = 0 else: buffer.readonly = 1 if flags & python.PyBUF_FORMAT: buffer.format = "B" else: buffer.format = NULL buffer.ndim = 0 buffer.shape = NULL buffer.strides = NULL buffer.suboffsets = NULL buffer.itemsize = 1 buffer.internal = NULL if buffer.obj is not self: # set by Cython? buffer.obj = self def __releasebuffer__(self, Py_buffer* buffer): if buffer is NULL: return if buffer.buf is self._buffer: self._buffer_refcnt -= 1 if self._buffer_refcnt == 0: tree.xmlFree(self._buffer) self._buffer = NULL else: tree.xmlFree(buffer.buf) buffer.buf = NULL property xslt_profile: u"""Return an ElementTree with profiling data for the stylesheet run. """ def __get__(self): cdef object root if self._profile is None: return None root = self._profile.getroot() if root is None: return None return ElementTree(root) def __del__(self): self._profile = None cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile): cdef _XSLTResultTree result result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree) result._xslt = xslt result._profile = profile return result # functions like "output" and "write" are a potential security risk, but we # rely on the user to configure XSLTAccessControl as needed xslt.xsltRegisterAllExtras() # enable EXSLT support for XSLT xslt.exsltRegisterAll() ################################################################################ # XSLT PI support cdef object _RE_PI_HREF = re.compile(ur'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")') cdef object _FIND_PI_HREF = _RE_PI_HREF.findall cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub cdef XPath __findStylesheetByID = None cdef _findStylesheetByID(_Document doc, id): global __findStylesheetByID if __findStylesheetByID is None: __findStylesheetByID = XPath( u"//xsl:stylesheet[@xml:id = $id]", namespaces={u"xsl" : u"http://www.w3.org/1999/XSL/Transform"}) return __findStylesheetByID(doc, id=id) cdef class _XSLTProcessingInstruction(PIBase): def parseXSL(self, parser=None): u"""parseXSL(self, parser=None) Try to parse the stylesheet referenced by this PI and return an ElementTree for it. If the stylesheet is embedded in the same document (referenced via xml:id), find and return an ElementTree for the stylesheet Element. The optional ``parser`` keyword argument can be passed to specify the parser used to read from external stylesheet URLs. """ cdef _Document result_doc cdef _Element result_node cdef bytes href_utf cdef const_xmlChar* c_href cdef xmlAttr* c_attr _assertValidNode(self) if self._c_node.content is NULL: raise ValueError, u"PI lacks content" hrefs = _FIND_PI_HREF(u' ' + (self._c_node.content).decode('UTF-8')) if len(hrefs) != 1: raise ValueError, u"malformed PI attributes" hrefs = hrefs[0] href_utf = utf8(hrefs[0] or hrefs[1]) c_href = _xcstr(href_utf) if c_href[0] != c'#': # normal URL, try to parse from it c_href = tree.xmlBuildURI( c_href, tree.xmlNodeGetBase(self._c_node.doc, self._c_node)) if c_href is not NULL: try: href_utf = c_href finally: tree.xmlFree(c_href) result_doc = _parseDocumentFromURL(href_utf, parser) return _elementTreeFactory(result_doc, None) # ID reference to embedded stylesheet # try XML:ID lookup _assertValidDoc(self._doc) c_href += 1 # skip leading '#' c_attr = tree.xmlGetID(self._c_node.doc, c_href) if c_attr is not NULL and c_attr.doc is self._c_node.doc: result_node = _elementFactory(self._doc, c_attr.parent) return _elementTreeFactory(result_node._doc, result_node) # try XPath search root = _findStylesheetByID(self._doc, funicode(c_href)) if not root: raise ValueError, u"reference to non-existing embedded stylesheet" elif len(root) > 1: raise ValueError, u"ambiguous reference to embedded stylesheet" result_node = root[0] return _elementTreeFactory(result_node._doc, result_node) def set(self, key, value): u"""set(self, key, value) Supports setting the 'href' pseudo-attribute in the text of the processing instruction. """ if key != u"href": raise AttributeError, \ u"only setting the 'href' attribute is supported on XSLT-PIs" if value is None: attrib = u"" elif u'"' in value or u'>' in value: raise ValueError, u"Invalid URL, must not contain '\"' or '>'" else: attrib = f' href="{value}"' text = u' ' + self.text if _FIND_PI_HREF(text): self.text = _REPLACE_PI_HREF(attrib, text) else: self.text = text + attrib