1 import sys
2 import os
3 import re
4 import rfc822
5 import unittest
6 from lxml.tests.common_imports import doctest
7 if sys.version_info >= (2,4):
8 from lxml.doctestcompare import LHTMLOutputChecker
9
10 from lxml.html.clean import clean, Cleaner
11
12 feed_dirs = [
13 os.path.join(os.path.dirname(__file__), 'feedparser-data'),
14 os.path.join(os.path.dirname(__file__), 'hackers-org-data'),
15 ]
16 bar_re = re.compile(r"-----+")
17
22
24
28
30 f = open(self.filename, 'rb')
31 headers = rfc822.Message(f)
32 c = f.read()
33 f.close()
34 if not headers.keys():
35 raise Exception(
36 "File %s has no headers" % self.filename)
37 self.description = headers['Description']
38 self.expect = headers.get('Expect', '')
39 self.ignore = headers.get('Ignore')
40 self.options = [
41 o.strip() for o in headers.get('Options', '').split(',')
42 if o.strip()]
43 parts = bar_re.split(c)
44 self.input = parts[0].rstrip() + '\n'
45 if parts[1:]:
46 self.expect = parts[1].rstrip() + '\n'
47 else:
48 self.expect = None
49
51 self.parse()
52 if self.ignore:
53
54 return
55 kw = {}
56 for name in self.options:
57 if name.startswith('-'):
58 kw[name[1:]] = False
59 else:
60 kw[name] = True
61 if kw.get('clean', True):
62 transformed = Cleaner(**kw).clean_html(self.input)
63 else:
64 transformed = self.input
65 assert self.expect is not None, (
66 "No expected output in %s" % self.filename)
67 checker = LHTMLOutputChecker()
68 if not checker.check_output(self.expect, transformed, 0):
69 result = checker.output_difference(
70 DummyInput(want=self.expect), transformed, 0)
71
72
73 raise Exception("\n"+result)
74
77
79 suite = unittest.TestSuite()
80 if sys.version_info >= (2,4):
81 for dir in feed_dirs:
82 for fn in os.listdir(dir):
83 fn = os.path.join(dir, fn)
84 if fn.endswith('.data'):
85 case = FeedTestCase(fn)
86 suite.addTests([case])
87
88 try:
89 case.runTest()
90 except:
91 break
92 return suite
93