Blob Blame History Raw
# -*- coding: utf-8 -*-

"""
Test cases related to XML Schema parsing and validation
"""

import unittest, sys, os.path

this_dir = os.path.dirname(__file__)
if this_dir not in sys.path:
    sys.path.insert(0, this_dir) # needed for Py3

from common_imports import etree, BytesIO, HelperTestCase, fileInTestDir
from common_imports import doctest, make_doctest


class ETreeXMLSchemaTestCase(HelperTestCase):
    def test_xmlschema(self):
        tree_valid = self.parse('<a><b></b></a>')
        tree_invalid = self.parse('<a><c></c></a>')
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        self.assertTrue(schema.validate(tree_valid))
        self.assertFalse(schema.validate(tree_invalid))
        self.assertTrue(schema.validate(tree_valid))     # retry valid
        self.assertFalse(schema.validate(tree_invalid))  # retry invalid

    def test_xmlschema_error_log(self):
        tree_valid = self.parse('<a><b></b></a>')
        tree_invalid = self.parse('<a><c></c></a>')
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        self.assertTrue(schema.validate(tree_valid))
        self.assertFalse(schema.error_log.filter_from_errors())

        self.assertFalse(schema.validate(tree_invalid))
        self.assertTrue(schema.error_log.filter_from_errors())
        self.assertTrue(schema.error_log.filter_types(
            etree.ErrorTypes.SCHEMAV_ELEMENT_CONTENT))

        self.assertTrue(schema.validate(tree_valid))
        self.assertFalse(schema.error_log.filter_from_errors())

        self.assertFalse(schema.validate(tree_invalid))
        self.assertTrue(schema.error_log.filter_from_errors())
        self.assertTrue(schema.error_log.filter_types(
            etree.ErrorTypes.SCHEMAV_ELEMENT_CONTENT))

    def test_xmlschema_error_log_path(self):
        """We don't have a guarantee that there will always be a path
        for a _LogEntry object (or even a node for which to determina
        a path), but at least when this test was created schema validation
        errors always got a node and an XPath value. If that ever changes,
        we can modify this test to something like:
            self.assertTrue(error_path is None or tree_path == error_path)
        That way, we can at least verify that if we did get a path value
        it wasn't bogus.
        """
        tree = self.parse('<a><b>42</b><b>dada</b></a>')
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:integer" maxOccurs="2"/>
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        schema.validate(tree)
        tree_path = tree.getpath(tree.findall('b')[1])
        error_path = schema.error_log[0].path
        self.assertTrue(tree_path == error_path)

    def test_xmlschema_default_attributes(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence minOccurs="4" maxOccurs="4">
      <xsd:element name="b" type="BType" />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="BType">
    <xsd:attribute name="hardy" type="xsd:string" default="hey" />
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema, attribute_defaults=True)

        tree = self.parse('<a><b hardy="ho"/><b/><b hardy="ho"/><b/></a>')

        root = tree.getroot()
        self.assertEqual('ho', root[0].get('hardy'))
        self.assertEqual(None, root[1].get('hardy'))
        self.assertEqual('ho', root[2].get('hardy'))
        self.assertEqual(None, root[3].get('hardy'))

        self.assertTrue(schema(tree))

        root = tree.getroot()
        self.assertEqual('ho', root[0].get('hardy'))
        self.assertEqual('hey', root[1].get('hardy'))
        self.assertEqual('ho', root[2].get('hardy'))
        self.assertEqual('hey', root[3].get('hardy'))

    def test_xmlschema_parse(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        parser = etree.XMLParser(schema=schema)

        tree_valid = self.parse('<a><b></b></a>', parser=parser)
        self.assertEqual('a', tree_valid.getroot().tag)

        self.assertRaises(etree.XMLSyntaxError,
                          self.parse, '<a><c></c></a>', parser=parser)

    def test_xmlschema_parse_default_attributes(self):
        # does not work as of libxml2 2.7.3
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence minOccurs="4" maxOccurs="4">
      <xsd:element name="b" type="BType" />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="BType">
    <xsd:attribute name="hardy" type="xsd:string" default="hey" />
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        parser = etree.XMLParser(schema=schema, attribute_defaults=True)

        tree_valid = self.parse('<a><b hardy="ho"/><b/><b hardy="ho"/><b/></a>',
                                parser=parser)
        root = tree_valid.getroot()
        self.assertEqual('ho', root[0].get('hardy'))
        self.assertEqual('hey', root[1].get('hardy'))
        self.assertEqual('ho', root[2].get('hardy'))
        self.assertEqual('hey', root[3].get('hardy'))

    def test_xmlschema_parse_default_attributes_schema_config(self):
        # does not work as of libxml2 2.7.3
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence minOccurs="4" maxOccurs="4">
      <xsd:element name="b" type="BType" />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="BType">
    <xsd:attribute name="hardy" type="xsd:string" default="hey" />
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema, attribute_defaults=True)
        parser = etree.XMLParser(schema=schema)

        tree_valid = self.parse('<a><b hardy="ho"/><b/><b hardy="ho"/><b/></a>',
                                parser=parser)
        root = tree_valid.getroot()
        self.assertEqual('ho', root[0].get('hardy'))
        self.assertEqual('hey', root[1].get('hardy'))
        self.assertEqual('ho', root[2].get('hardy'))
        self.assertEqual('hey', root[3].get('hardy'))

    def test_xmlschema_parse_fixed_attributes(self):
        # does not work as of libxml2 2.7.3
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence minOccurs="3" maxOccurs="3">
      <xsd:element name="b" type="BType" />
    </xsd:sequence>
  </xsd:complexType>
  <xsd:complexType name="BType">
    <xsd:attribute name="hardy" type="xsd:string" fixed="hey" />
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        parser = etree.XMLParser(schema=schema, attribute_defaults=True)

        tree_valid = self.parse('<a><b/><b hardy="hey"/><b/></a>',
                                parser=parser)
        root = tree_valid.getroot()
        self.assertEqual('hey', root[0].get('hardy'))
        self.assertEqual('hey', root[1].get('hardy'))
        self.assertEqual('hey', root[2].get('hardy'))

    def test_xmlschema_stringio(self):
        schema_file = BytesIO('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(file=schema_file)
        parser = etree.XMLParser(schema=schema)

        tree_valid = self.parse('<a><b></b></a>', parser=parser)
        self.assertEqual('a', tree_valid.getroot().tag)

        self.assertRaises(etree.XMLSyntaxError,
                          self.parse, '<a><c></c></a>', parser=parser)

    def test_xmlschema_iterparse(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        xml = BytesIO('<a><b></b></a>')
        events = [ (event, el.tag)
                   for (event, el) in etree.iterparse(xml, schema=schema) ]

        self.assertEqual([('end', 'b'), ('end', 'a')],
                          events)

    def test_xmlschema_iterparse_incomplete(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        xml = BytesIO('<a><b></b></a>')
        event, element = next(iter(etree.iterparse(xml, schema=schema)))
        self.assertEqual('end', event)
        self.assertEqual('b', element.tag)

    def test_xmlschema_iterparse_fail(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)
        self.assertRaises(
            etree.XMLSyntaxError,
            list, etree.iterparse(BytesIO('<a><c></c></a>'), schema=schema))

    def test_xmlschema_elementtree_error(self):
        self.assertRaises(ValueError, etree.XMLSchema, etree.ElementTree())

    def test_xmlschema_comment_error(self):
        self.assertRaises(ValueError, etree.XMLSchema, etree.Comment('TEST'))

    def test_xmlschema_illegal_validation_error(self):
        schema = self.parse('''
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="xsd:string"/>
</xsd:schema>
''')
        schema = etree.XMLSchema(schema)

        root = etree.Element('a')
        root.text = 'TEST'
        self.assertTrue(schema(root))

        self.assertRaises(ValueError, schema, etree.Comment('TEST'))
        self.assertRaises(ValueError, schema, etree.PI('a', 'text'))
        self.assertRaises(ValueError, schema, etree.Entity('text'))

    def test_xmlschema_invalid_schema1(self):
        schema = self.parse('''\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        self.assertRaises(etree.XMLSchemaParseError,
                          etree.XMLSchema, schema)

    def test_xmlschema_invalid_schema2(self):
        schema = self.parse('<test/>')
        self.assertRaises(etree.XMLSchemaParseError,
                          etree.XMLSchema, schema)

    def test_xmlschema_file(self):
        # this will only work if we access the file through path or
        # file object..
        f = open(fileInTestDir('test.xsd'), 'rb')
        try:
            schema = etree.XMLSchema(file=f)
        finally:
            f.close()
        tree_valid = self.parse('<a><b></b></a>')
        self.assertTrue(schema.validate(tree_valid))

    def test_xmlschema_import_file(self):
        # this will only work if we access the file through path or
        # file object..
        schema = etree.XMLSchema(file=fileInTestDir('test_import.xsd'))
        tree_valid = self.parse(
            '<a:x xmlns:a="http://codespeak.net/lxml/schema/ns1"><b></b></a:x>')
        self.assertTrue(schema.validate(tree_valid))

    def test_xmlschema_shortcut(self):
        tree_valid = self.parse('<a><b></b></a>')
        tree_invalid = self.parse('<a><c></c></a>')
        schema = self.parse('''\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="a" type="AType"/>
  <xsd:complexType name="AType">
    <xsd:sequence>
      <xsd:element name="b" type="xsd:string" />
    </xsd:sequence>
  </xsd:complexType>
</xsd:schema>
''')
        self.assertTrue(tree_valid.xmlschema(schema))
        self.assertFalse(tree_invalid.xmlschema(schema))

    def test_create_from_partial_doc(self):
        # this used to crash because the schema part was not properly copied out
        wsdl = self.parse('''\
<wsdl:definitions
   xmlns:wsdl="http://schemas.xmlsoap.org/wsdl/"
   xmlns:xs="http://www.w3.org/2001/XMLSchema">
 <wsdl:types>
  <xs:schema>
  </xs:schema>
 </wsdl:types>
</wsdl:definitions>
        ''')
        schema_element = wsdl.find(
            "{http://schemas.xmlsoap.org/wsdl/}types/"
            "{http://www.w3.org/2001/XMLSchema}schema"
        )
        etree.XMLSchema(schema_element)
        etree.XMLSchema(schema_element)
        etree.XMLSchema(schema_element)


class ETreeXMLSchemaResolversTestCase(HelperTestCase):
    resolver_schema_int = BytesIO("""\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
    xmlns:etype="http://codespeak.net/lxml/test/external"
    targetNamespace="http://codespeak.net/lxml/test/internal">
        <xsd:import namespace="http://codespeak.net/lxml/test/external" schemaLocation="XXX.xsd" />
        <xsd:element name="a" type="etype:AType"/>
</xsd:schema>""")

    resolver_schema_int2 = BytesIO("""\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
    xmlns:etype="http://codespeak.net/lxml/test/external"
    targetNamespace="http://codespeak.net/lxml/test/internal">
        <xsd:import namespace="http://codespeak.net/lxml/test/external" schemaLocation="YYY.xsd" />
        <xsd:element name="a" type="etype:AType"/>
</xsd:schema>""")

    resolver_schema_ext = """\
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
    targetNamespace="http://codespeak.net/lxml/test/external">
    <xsd:complexType name="AType">
      <xsd:sequence><xsd:element name="b" type="xsd:string" minOccurs="0" maxOccurs="unbounded" /></xsd:sequence>
    </xsd:complexType>
</xsd:schema>""" 

    class simple_resolver(etree.Resolver):
        def __init__(self, schema):
            self.schema = schema

        def resolve(self, url, id, context):
            assert url == 'XXX.xsd'
            return self.resolve_string(self.schema, context)

    # tests:

    def test_xmlschema_resolvers(self):
        # test that resolvers work with schema.
        parser = etree.XMLParser()
        parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext))
        schema_doc = etree.parse(self.resolver_schema_int, parser = parser)
        schema = etree.XMLSchema(schema_doc)

    def test_xmlschema_resolvers_root(self):
        # test that the default resolver will get called if there's no
        # specific parser resolver.
        root_resolver = self.simple_resolver(self.resolver_schema_ext)
        etree.get_default_parser().resolvers.add(root_resolver)
        schema_doc = etree.parse(self.resolver_schema_int)
        schema = etree.XMLSchema(schema_doc)
        etree.get_default_parser().resolvers.remove(root_resolver)

    def test_xmlschema_resolvers_noroot(self):
        # test that the default resolver will not get called when a
        # more specific resolver is registered.

        class res_root(etree.Resolver):
            def resolve(self, url, id, context):
                assert False
                return None

        root_resolver = res_root()
        etree.get_default_parser().resolvers.add(root_resolver)

        parser = etree.XMLParser()
        parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext))

        schema_doc = etree.parse(self.resolver_schema_int, parser = parser)
        schema = etree.XMLSchema(schema_doc)
        etree.get_default_parser().resolvers.remove(root_resolver)

    def test_xmlschema_nested_resolvers(self):
        # test that resolvers work in a nested fashion.

        resolver_schema = self.resolver_schema_ext

        class res_nested(etree.Resolver):
            def __init__(self, ext_schema):
                self.ext_schema = ext_schema

            def resolve(self, url, id, context):
                assert url == 'YYY.xsd'
                return self.resolve_string(self.ext_schema, context)

        class res(etree.Resolver):
            def __init__(self, ext_schema_1, ext_schema_2):
                self.ext_schema_1 = ext_schema_1
                self.ext_schema_2 = ext_schema_2

            def resolve(self, url, id, context):
                assert url == 'XXX.xsd'

                new_parser = etree.XMLParser()
                new_parser.resolvers.add(res_nested(self.ext_schema_2))
                new_schema_doc = etree.parse(self.ext_schema_1, parser = new_parser)
                new_schema = etree.XMLSchema(new_schema_doc)

                return self.resolve_string(resolver_schema, context)

        parser = etree.XMLParser()
        parser.resolvers.add(res(self.resolver_schema_int2, self.resolver_schema_ext))
        schema_doc = etree.parse(self.resolver_schema_int, parser = parser)
        schema = etree.XMLSchema(schema_doc)


def test_suite():
    suite = unittest.TestSuite()
    suite.addTests([unittest.makeSuite(ETreeXMLSchemaTestCase)])
    suite.addTests([unittest.makeSuite(ETreeXMLSchemaResolversTestCase)])
    suite.addTests(
        [make_doctest('../../../doc/validation.txt')])
    return suite


if __name__ == '__main__':
    print('to test use test.py %s' % __file__)