# -*- coding: utf-8 -*-
"""
Test cases related to DTD parsing and validation
"""
import unittest, sys, os.path
this_dir = os.path.dirname(__file__)
if this_dir not in sys.path:
sys.path.insert(0, this_dir) # needed for Py3
from common_imports import etree, html, BytesIO, _bytes, _str
from common_imports import HelperTestCase, make_doctest, skipIf
from common_imports import fileInTestDir, fileUrlInTestDir
class ETreeDtdTestCase(HelperTestCase):
def test_dtd(self):
pass
def test_dtd_file(self):
parse = etree.parse
tree = parse(fileInTestDir("test.xml"))
root = tree.getroot()
dtd = etree.DTD(fileInTestDir("test.dtd"))
self.assertTrue(dtd.validate(root))
def test_dtd_stringio(self):
root = etree.XML(_bytes("<b/>"))
dtd = etree.DTD(BytesIO("<!ELEMENT b EMPTY>"))
self.assertTrue(dtd.validate(root))
def test_dtd_parse_invalid(self):
fromstring = etree.fromstring
parser = etree.XMLParser(dtd_validation=True)
xml = _bytes('<!DOCTYPE b SYSTEM "%s"><b><a/></b>' %
fileInTestDir("test.dtd"))
self.assertRaises(etree.XMLSyntaxError,
fromstring, xml, parser=parser)
def test_dtd_parse_file_not_found(self):
fromstring = etree.fromstring
dtd_filename = fileUrlInTestDir("__nosuch.dtd")
parser = etree.XMLParser(dtd_validation=True)
xml = _bytes('<!DOCTYPE b SYSTEM "%s"><b><a/></b>' % dtd_filename)
self.assertRaises(etree.XMLSyntaxError,
fromstring, xml, parser=parser)
errors = None
try:
fromstring(xml, parser=parser)
except etree.XMLSyntaxError:
e = sys.exc_info()[1]
self.assertTrue(e.error_log)
self.assertTrue(parser.error_log)
errors = [entry.message for entry in e.error_log
if dtd_filename in entry.message]
self.assertTrue(errors)
def test_dtd_parse_valid(self):
parser = etree.XMLParser(dtd_validation=True)
xml = ('<!DOCTYPE a SYSTEM "%s"><a><b/></a>' %
fileUrlInTestDir("test.dtd"))
root = etree.fromstring(xml, parser=parser)
def test_dtd_parse_valid_file_url(self):
parser = etree.XMLParser(dtd_validation=True)
xml = ('<!DOCTYPE a SYSTEM "%s"><a><b/></a>' %
fileUrlInTestDir("test.dtd"))
root = etree.fromstring(xml, parser=parser)
def test_dtd_parse_valid_relative(self):
parser = etree.XMLParser(dtd_validation=True)
xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>'
root = etree.fromstring(
xml, parser=parser, base_url=fileUrlInTestDir("test.xml"))
def test_dtd_parse_valid_relative_file_url(self):
parser = etree.XMLParser(dtd_validation=True)
xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>'
root = etree.fromstring(
xml, parser=parser, base_url=fileUrlInTestDir("test.xml"))
def test_dtd_invalid(self):
root = etree.XML("<b><a/></b>")
dtd = etree.DTD(BytesIO("<!ELEMENT b EMPTY>"))
self.assertRaises(etree.DocumentInvalid, dtd.assertValid, root)
def test_dtd_assertValid(self):
root = etree.XML("<b><a/></b>")
dtd = etree.DTD(BytesIO("<!ELEMENT b (a)><!ELEMENT a EMPTY>"))
dtd.assertValid(root)
def test_dtd_internal(self):
root = etree.XML(_bytes('''
<!DOCTYPE b SYSTEM "none" [
<!ELEMENT b (a)>
<!ELEMENT a EMPTY>
]>
<b><a/></b>
'''))
dtd = etree.ElementTree(root).docinfo.internalDTD
self.assertTrue(dtd)
dtd.assertValid(root)
def test_dtd_internal_invalid(self):
root = etree.XML(_bytes('''
<!DOCTYPE b SYSTEM "none" [
<!ELEMENT b (a)>
<!ELEMENT a (c)>
<!ELEMENT c EMPTY>
]>
<b><a/></b>
'''))
dtd = etree.ElementTree(root).docinfo.internalDTD
self.assertTrue(dtd)
self.assertFalse(dtd.validate(root))
def test_dtd_invalid_duplicate_id(self):
root = etree.XML(_bytes('''
<a><b id="id1"/><b id="id2"/><b id="id1"/></a>
'''))
dtd = etree.DTD(BytesIO(_bytes("""
<!ELEMENT a (b*)>
<!ATTLIST b
id ID #REQUIRED
>
<!ELEMENT b EMPTY>
""")))
self.assertFalse(dtd.validate(root))
self.assertTrue(dtd.error_log)
self.assertTrue([error for error in dtd.error_log
if 'id1' in error.message])
def test_dtd_api_internal(self):
root = etree.XML(_bytes('''
<!DOCTYPE b SYSTEM "none" [
<!ATTLIST a
attr1 (x | y | z) "z"
attr2 CDATA #FIXED "X"
>
<!ELEMENT b (a)>
<!ELEMENT a EMPTY>
]>
<b><a/></b>
'''))
dtd = etree.ElementTree(root).docinfo.internalDTD
self.assertTrue(dtd)
dtd.assertValid(root)
seen = []
for el in dtd.iterelements():
if el.name == 'a':
self.assertEqual(2, len(el.attributes()))
for attr in el.iterattributes():
if attr.name == 'attr1':
self.assertEqual('enumeration', attr.type)
self.assertEqual('none', attr.default)
self.assertEqual('z', attr.default_value)
values = attr.values()
values.sort()
self.assertEqual(['x', 'y', 'z'], values)
else:
self.assertEqual('attr2', attr.name)
self.assertEqual('cdata', attr.type)
self.assertEqual('fixed', attr.default)
self.assertEqual('X', attr.default_value)
else:
self.assertEqual('b', el.name)
self.assertEqual(0, len(el.attributes()))
seen.append(el.name)
seen.sort()
self.assertEqual(['a', 'b'], seen)
self.assertEqual(2, len(dtd.elements()))
def test_internal_dtds(self):
for el_count in range(2, 5):
for attr_count in range(4):
root = etree.XML(_bytes('''
<!DOCTYPE el0 SYSTEM "none" [
''' + ''.join(['''
<!ATTLIST el%d
attr%d (x | y | z) "z"
>
''' % (e, a) for a in range(attr_count) for e in range(el_count)
]) + ''.join(['''
<!ELEMENT el%d EMPTY>
''' % e for e in range(1, el_count)
]) + '''
''' + '<!ELEMENT el0 (%s)>' % '|'.join([
'el%d' % e for e in range(1, el_count)]) + '''
]>
<el0><el1 %s /></el0>
''' % ' '.join(['attr%d="x"' % a for a in range(attr_count)])))
dtd = etree.ElementTree(root).docinfo.internalDTD
self.assertTrue(dtd)
dtd.assertValid(root)
e = -1
for e, el in enumerate(dtd.iterelements()):
self.assertEqual(attr_count, len(el.attributes()))
a = -1
for a, attr in enumerate(el.iterattributes()):
self.assertEqual('enumeration', attr.type)
self.assertEqual('none', attr.default)
self.assertEqual('z', attr.default_value)
values = sorted(attr.values())
self.assertEqual(['x', 'y', 'z'], values)
self.assertEqual(attr_count - 1, a)
self.assertEqual(el_count - 1, e)
self.assertEqual(el_count, len(dtd.elements()))
def test_dtd_broken(self):
self.assertRaises(etree.DTDParseError, etree.DTD,
BytesIO("<!ELEMENT b HONKEY>"))
def test_parse_file_dtd(self):
parser = etree.XMLParser(attribute_defaults=True)
tree = etree.parse(fileInTestDir('test.xml'), parser)
root = tree.getroot()
self.assertEqual(
"valueA",
root.get("default"))
self.assertEqual(
"valueB",
root[0].get("default"))
@skipIf(etree.LIBXML_VERSION == (2, 9, 0),
"DTD loading is broken for incremental parsing in libxml2 2.9.0")
def test_iterparse_file_dtd_start(self):
iterparse = etree.iterparse
iterator = iterparse(fileInTestDir("test.xml"), events=('start',),
attribute_defaults=True)
attributes = [ element.get("default")
for event, element in iterator ]
self.assertEqual(
["valueA", "valueB"],
attributes)
@skipIf(etree.LIBXML_VERSION == (2, 9, 0),
"DTD loading is broken for incremental parsing in libxml2 2.9.0")
def test_iterparse_file_dtd_end(self):
iterparse = etree.iterparse
iterator = iterparse(fileInTestDir("test.xml"), events=('end',),
attribute_defaults=True)
attributes = [ element.get("default")
for event, element in iterator ]
self.assertEqual(
["valueB", "valueA"],
attributes)
def test_dtd_attrs(self):
dtd = etree.DTD(fileUrlInTestDir("test.dtd"))
# Test DTD.system_url attribute
self.assertTrue(dtd.system_url.endswith("test.dtd"))
# Test elements and their attributes
a = dtd.elements()[0]
self.assertEqual(a.name, "a")
self.assertEqual(a.type, "element")
self.assertEqual(a.content.name, "b")
self.assertEqual(a.content.type, "element")
self.assertEqual(a.content.occur, "once")
aattr = a.attributes()[0]
self.assertEqual(aattr.name, "default")
self.assertEqual(aattr.type, "enumeration")
self.assertEqual(aattr.values(), ["valueA", "valueB"])
self.assertEqual(aattr.default_value, "valueA")
b = dtd.elements()[1]
self.assertEqual(b.name, "b")
self.assertEqual(b.type, "empty")
self.assertEqual(b.content, None)
# Test entities and their attributes
c = dtd.entities()[0]
self.assertEqual(c.name, "c")
self.assertEqual(c.orig, "*")
self.assertEqual(c.content, "*")
# Test DTD.name attribute
root = etree.XML(_bytes('''
<!DOCTYPE a SYSTEM "none" [
<!ELEMENT a EMPTY>
]>
<a/>
'''))
dtd = etree.ElementTree(root).docinfo.internalDTD
self.assertEqual(dtd.name, "a")
# Test DTD.name and DTD.systemID attributes
parser = etree.XMLParser(dtd_validation=True)
xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>'
root = etree.fromstring(xml, parser=parser,
base_url=fileUrlInTestDir("test.xml"))
dtd = root.getroottree().docinfo.internalDTD
self.assertEqual(dtd.name, "a")
self.assertEqual(dtd.system_url, "test.dtd")
def test_declaration_escape_quote_pid(self):
# Standard allows quotes in systemliteral, but in that case
# systemliteral must be escaped with single quotes.
# See http://www.w3.org/TR/REC-xml/#sec-prolog-dtd.
root = etree.XML('''<!DOCTYPE a PUBLIC 'foo' '"'><a/>''')
doc = root.getroottree()
self.assertEqual(doc.docinfo.doctype,
'''<!DOCTYPE a PUBLIC "foo" '"'>''')
self.assertEqual(etree.tostring(doc),
_bytes('''<!DOCTYPE a PUBLIC "foo" '"'>\n<a/>'''))
def test_declaration_quote_withoutpid(self):
root = etree.XML('''<!DOCTYPE a SYSTEM '"'><a/>''')
doc = root.getroottree()
self.assertEqual(doc.docinfo.doctype, '''<!DOCTYPE a SYSTEM '"'>''')
self.assertEqual(etree.tostring(doc),
_bytes('''<!DOCTYPE a SYSTEM '"'>\n<a/>'''))
def test_declaration_apos(self):
root = etree.XML('''<!DOCTYPE a SYSTEM "'"><a/>''')
doc = root.getroottree()
self.assertEqual(doc.docinfo.doctype, '''<!DOCTYPE a SYSTEM "'">''')
self.assertEqual(etree.tostring(doc),
_bytes('''<!DOCTYPE a SYSTEM "'">\n<a/>'''))
def test_ietf_decl(self):
html_data = (
'<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">\n'
'<html></html>')
root = etree.HTML(html_data)
doc = root.getroottree()
self.assertEqual(doc.docinfo.doctype,
'<!DOCTYPE html PUBLIC "-//IETF//DTD HTML//EN">')
self.assertEqual(etree.tostring(doc, method='html'), _bytes(html_data))
def test_set_decl_public(self):
doc = etree.Element('test').getroottree()
doc.docinfo.public_id = 'bar'
doc.docinfo.system_url = 'baz'
self.assertEqual(doc.docinfo.doctype,
'<!DOCTYPE test PUBLIC "bar" "baz">')
self.assertEqual(etree.tostring(doc),
_bytes('<!DOCTYPE test PUBLIC "bar" "baz">\n<test/>'))
def test_html_decl(self):
# Slightly different to one above: when we create an html element,
# we do not start with a blank slate.
doc = html.Element('html').getroottree()
doc.docinfo.public_id = 'bar'
doc.docinfo.system_url = 'baz'
self.assertEqual(doc.docinfo.doctype,
'<!DOCTYPE html PUBLIC "bar" "baz">')
self.assertEqual(etree.tostring(doc),
_bytes('<!DOCTYPE html PUBLIC "bar" "baz">\n<html/>'))
def test_clean_doctype(self):
doc = html.Element('html').getroottree()
self.assertTrue(doc.docinfo.doctype != '')
doc.docinfo.clear()
self.assertTrue(doc.docinfo.doctype == '')
def test_set_decl_system(self):
doc = etree.Element('test').getroottree()
doc.docinfo.system_url = 'baz'
self.assertEqual(doc.docinfo.doctype,
'<!DOCTYPE test SYSTEM "baz">')
self.assertEqual(etree.tostring(doc),
_bytes('<!DOCTYPE test SYSTEM "baz">\n<test/>'))
def test_empty_decl(self):
doc = etree.Element('test').getroottree()
doc.docinfo.public_id = None
self.assertEqual(doc.docinfo.doctype,
'<!DOCTYPE test>')
self.assertTrue(doc.docinfo.public_id is None)
self.assertTrue(doc.docinfo.system_url is None)
self.assertEqual(etree.tostring(doc),
_bytes('<!DOCTYPE test>\n<test/>'))
def test_invalid_decl_1(self):
docinfo = etree.Element('test').getroottree().docinfo
def set_public_id(value):
docinfo.public_id = value
self.assertRaises(ValueError, set_public_id, _str('ä'))
self.assertRaises(ValueError, set_public_id, _str('qwerty ä asdf'))
def test_invalid_decl_2(self):
docinfo = etree.Element('test').getroottree().docinfo
def set_system_url(value):
docinfo.system_url = value
self.assertRaises(ValueError, set_system_url, '\'"')
self.assertRaises(ValueError, set_system_url, '"\'')
self.assertRaises(ValueError, set_system_url, ' " \' ')
def test_comment_before_dtd(self):
data = '<!--comment--><!DOCTYPE test>\n<!-- --><test/>'
doc = etree.fromstring(data).getroottree()
self.assertEqual(etree.tostring(doc),
_bytes(data))
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ETreeDtdTestCase)])
suite.addTests(
[make_doctest('../../../doc/validation.txt')])
return suite
if __name__ == '__main__':
print('to test use test.py %s' % __file__)