Blame src/lxml/html/tests/test_feedparser_data.py

rpm-build d9acb6
import sys
rpm-build d9acb6
import os
rpm-build d9acb6
import re
rpm-build d9acb6
try:
rpm-build d9acb6
    from rfc822 import Message
rpm-build d9acb6
except ImportError:
rpm-build d9acb6
    # Python 3
rpm-build d9acb6
    from email import message_from_file as Message
rpm-build d9acb6
import unittest
rpm-build d9acb6
from lxml.tests.common_imports import doctest
rpm-build d9acb6
if sys.version_info >= (2,4):
rpm-build d9acb6
    from lxml.doctestcompare import LHTMLOutputChecker
rpm-build d9acb6
rpm-build d9acb6
from lxml.html.clean import clean, Cleaner
rpm-build d9acb6
rpm-build d9acb6
feed_dirs = [
rpm-build d9acb6
    os.path.join(os.path.dirname(__file__), 'feedparser-data'),
rpm-build d9acb6
    os.path.join(os.path.dirname(__file__), 'hackers-org-data'),
rpm-build d9acb6
    ]
rpm-build d9acb6
bar_re = re.compile(r"-----+")
rpm-build d9acb6
rpm-build d9acb6
class DummyInput:
rpm-build d9acb6
    def __init__(self, **kw):
rpm-build d9acb6
        for name, value in kw.items():
rpm-build d9acb6
            setattr(self, name, value)
rpm-build d9acb6
rpm-build d9acb6
class FeedTestCase(unittest.TestCase):
rpm-build d9acb6
rpm-build d9acb6
    def __init__(self, filename):
rpm-build d9acb6
        self.filename = filename
rpm-build d9acb6
        unittest.TestCase.__init__(self)
rpm-build d9acb6
rpm-build d9acb6
    def parse(self):
rpm-build d9acb6
        f = open(self.filename, 'r')
rpm-build d9acb6
        headers = Message(f)
rpm-build d9acb6
        c = f.read()
rpm-build d9acb6
        f.close()
rpm-build d9acb6
        if not c.strip():
rpm-build d9acb6
            c = headers.get_payload()
rpm-build d9acb6
        if not headers.keys():
rpm-build d9acb6
            raise Exception(
rpm-build d9acb6
                "File %s has no headers" % self.filename)
rpm-build d9acb6
        self.description = headers['Description']
rpm-build d9acb6
        self.expect = headers.get('Expect', '')
rpm-build d9acb6
        self.ignore = headers.get('Ignore')
rpm-build d9acb6
        self.options = [
rpm-build d9acb6
            o.strip() for o in headers.get('Options', '').split(',')
rpm-build d9acb6
            if o.strip()]
rpm-build d9acb6
        parts = bar_re.split(c)
rpm-build d9acb6
        self.input = parts[0].rstrip() + '\n'
rpm-build d9acb6
        if parts[1:]:
rpm-build d9acb6
            self.expect = parts[1].rstrip() + '\n'
rpm-build d9acb6
        else:
rpm-build d9acb6
            self.expect = None
rpm-build d9acb6
rpm-build d9acb6
    def runTest(self):
rpm-build d9acb6
        self.parse()
rpm-build d9acb6
        if self.ignore:
rpm-build d9acb6
            # We've marked this test to be ignored.
rpm-build d9acb6
            return
rpm-build d9acb6
        kw = {}
rpm-build d9acb6
        for name in self.options:
rpm-build d9acb6
            if name.startswith('-'):
rpm-build d9acb6
                kw[name[1:]] = False
rpm-build d9acb6
            else:
rpm-build d9acb6
                kw[name] = True
rpm-build d9acb6
        if kw.get('clean', True):
rpm-build d9acb6
            transformed = Cleaner(**kw).clean_html(self.input)
rpm-build d9acb6
        else:
rpm-build d9acb6
            transformed = self.input
rpm-build d9acb6
        assert self.expect is not None, (
rpm-build d9acb6
            "No expected output in %s" % self.filename)
rpm-build d9acb6
        checker = LHTMLOutputChecker()
rpm-build d9acb6
        if not checker.check_output(self.expect, transformed, 0):
rpm-build d9acb6
            result = checker.output_difference(
rpm-build d9acb6
                DummyInput(want=self.expect), transformed, 0)
rpm-build d9acb6
            #result += '\noptions: %s %r' % (', '.join(self.options), kw)
rpm-build d9acb6
            #result += repr(transformed)
rpm-build d9acb6
            raise Exception("\n"+result)
rpm-build d9acb6
rpm-build d9acb6
    def shortDescription(self):
rpm-build d9acb6
        return self.filename
rpm-build d9acb6
rpm-build d9acb6
def test_suite():
rpm-build d9acb6
    suite = unittest.TestSuite()
rpm-build d9acb6
    if sys.version_info >= (2,4):
rpm-build d9acb6
        for dir in feed_dirs:
rpm-build d9acb6
            for fn in os.listdir(dir):
rpm-build d9acb6
                fn = os.path.join(dir, fn)
rpm-build d9acb6
                if fn.endswith('.data'):
rpm-build d9acb6
                    case = FeedTestCase(fn)
rpm-build d9acb6
                    suite.addTests([case])
rpm-build d9acb6
                    # This is my lazy way of stopping on first error:
rpm-build d9acb6
                    try:
rpm-build d9acb6
                        case.runTest()
rpm-build d9acb6
                    except:
rpm-build d9acb6
                        break
rpm-build d9acb6
    return suite