Blame src/lxml/html/tests/test_xhtml.txt
|
Packit Service |
b74dd5 |
>>> from lxml.html import document_fromstring, fragment_fromstring, tostring
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
lxml.html has two parsers, one for HTML, one for XHTML:
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
>>> from lxml.html import HTMLParser, XHTMLParser
|
|
Packit Service |
b74dd5 |
>>> html = "<html><body>Hi! </body></html>"
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
>>> root = document_fromstring(html, parser=HTMLParser())
|
|
Packit Service |
b74dd5 |
>>> print(root.tag)
|
|
Packit Service |
b74dd5 |
html
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
>>> root = document_fromstring(html, parser=XHTMLParser())
|
|
Packit Service |
b74dd5 |
>>> print(root.tag)
|
|
Packit Service |
b74dd5 |
html
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
There are two functions for converting between HTML and XHTML:
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
>>> from lxml.html import xhtml_to_html, html_to_xhtml
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
>>> doc = document_fromstring(html, parser=HTMLParser())
|
|
Packit Service |
b74dd5 |
>>> tostring(doc)
|
|
Packit Service |
b74dd5 |
b'<html><body>Hi! </body></html>'
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
>>> html_to_xhtml(doc)
|
|
Packit Service |
b74dd5 |
>>> tostring(doc)
|
|
Packit Service |
b74dd5 |
b'<html:html xmlns:html="http://www.w3.org/1999/xhtml"><html:body><html:p>Hi!</html:p></html:body></html:html>'
|
|
Packit Service |
b74dd5 |
|
|
Packit Service |
b74dd5 |
>>> xhtml_to_html(doc)
|
|
Packit Service |
b74dd5 |
>>> tostring(doc)
|
|
Packit Service |
b74dd5 |
b'<html xmlns:html="http://www.w3.org/1999/xhtml"><body>Hi! </body></html>'
|