1 import os
2 import re
3 import rfc822
4 import unittest
5 from lxml.tests.common_imports import doctest
6 from lxml.doctestcompare import LHTMLOutputChecker
7
8 from lxml.html.clean import clean, Cleaner
9
10 feed_dirs = [
11 os.path.join(os.path.dirname(__file__), 'feedparser-data'),
12 os.path.join(os.path.dirname(__file__), 'hackers-org-data'),
13 ]
14 bar_re = re.compile(r"-----+")
15
20
22
26
28 f = open(self.filename, 'rb')
29 headers = rfc822.Message(f)
30 c = f.read()
31 f.close()
32 if not headers.keys():
33 raise Exception(
34 "File %s has no headers" % self.filename)
35 self.description = headers['Description']
36 self.expect = headers.get('Expect', '')
37 self.ignore = headers.get('Ignore')
38 self.options = [
39 o.strip() for o in headers.get('Options', '').split(',')
40 if o.strip()]
41 parts = bar_re.split(c)
42 self.input = parts[0].rstrip() + '\n'
43 if parts[1:]:
44 self.expect = parts[1].rstrip() + '\n'
45 else:
46 self.expect = None
47
49 self.parse()
50 if self.ignore:
51
52 return
53 kw = {}
54 for name in self.options:
55 if name.startswith('-'):
56 kw[name[1:]] = False
57 else:
58 kw[name] = True
59 if kw.get('clean', True):
60 transformed = Cleaner(**kw).clean_html(self.input)
61 else:
62 transformed = self.input
63 assert self.expect is not None, (
64 "No expected output in %s" % self.filename)
65 checker = LHTMLOutputChecker()
66 if not checker.check_output(self.expect, transformed, 0):
67 result = checker.output_difference(
68 DummyInput(want=self.expect), transformed, 0)
69
70
71 raise Exception("\n"+result)
72
75
77 suite = unittest.TestSuite()
78 for dir in feed_dirs:
79 for fn in os.listdir(dir):
80 fn = os.path.join(dir, fn)
81 if fn.endswith('.data'):
82 case = FeedTestCase(fn)
83 suite.addTests([case])
84
85 try:
86 case.runTest()
87 except:
88 break
89 return suite
90