Blob Blame History Raw
import sys
import os
import re
try:
    from rfc822 import Message
except ImportError:
    # Python 3
    from email import message_from_file as Message
import unittest
from lxml.tests.common_imports import doctest
if sys.version_info >= (2,4):
    from lxml.doctestcompare import LHTMLOutputChecker

from lxml.html.clean import clean, Cleaner

feed_dirs = [
    os.path.join(os.path.dirname(__file__), 'feedparser-data'),
    os.path.join(os.path.dirname(__file__), 'hackers-org-data'),
    ]
bar_re = re.compile(r"-----+")

class DummyInput:
    def __init__(self, **kw):
        for name, value in kw.items():
            setattr(self, name, value)

class FeedTestCase(unittest.TestCase):

    def __init__(self, filename):
        self.filename = filename
        unittest.TestCase.__init__(self)

    def parse(self):
        f = open(self.filename, 'r')
        headers = Message(f)
        c = f.read()
        f.close()
        if not c.strip():
            c = headers.get_payload()
        if not headers.keys():
            raise Exception(
                "File %s has no headers" % self.filename)
        self.description = headers['Description']
        self.expect = headers.get('Expect', '')
        self.ignore = headers.get('Ignore')
        self.options = [
            o.strip() for o in headers.get('Options', '').split(',')
            if o.strip()]
        parts = bar_re.split(c)
        self.input = parts[0].rstrip() + '\n'
        if parts[1:]:
            self.expect = parts[1].rstrip() + '\n'
        else:
            self.expect = None

    def runTest(self):
        self.parse()
        if self.ignore:
            # We've marked this test to be ignored.
            return
        kw = {}
        for name in self.options:
            if name.startswith('-'):
                kw[name[1:]] = False
            else:
                kw[name] = True
        if kw.get('clean', True):
            transformed = Cleaner(**kw).clean_html(self.input)
        else:
            transformed = self.input
        assert self.expect is not None, (
            "No expected output in %s" % self.filename)
        checker = LHTMLOutputChecker()
        if not checker.check_output(self.expect, transformed, 0):
            result = checker.output_difference(
                DummyInput(want=self.expect), transformed, 0)
            #result += '\noptions: %s %r' % (', '.join(self.options), kw)
            #result += repr(transformed)
            raise Exception("\n"+result)

    def shortDescription(self):
        return self.filename

def test_suite():
    suite = unittest.TestSuite()
    if sys.version_info >= (2,4):
        for dir in feed_dirs:
            for fn in os.listdir(dir):
                fn = os.path.join(dir, fn)
                if fn.endswith('.data'):
                    case = FeedTestCase(fn)
                    suite.addTests([case])
                    # This is my lazy way of stopping on first error:
                    try:
                        case.runTest()
                    except:
                        break
    return suite