1 """
2 This takes the feedparser tests from here:
3
4 http://feedparser.org/tests/wellformed/sanitize/
5
6 and rewrites them to be easier to handle (not using the internal model
7 of feedparser). The input format is::
8
9 <!--
10 Description: {description}
11 Expect: {expression}
12 -->
13 ...
14 <content ...>{content}</content>
15 ...
16
17 The Expect expression is checked for
18 ``entries[0]['content'][0]['value'] == {data}``.
19
20 The output format is::
21
22 Description: {description}
23 Expect: {expression} (if data couldn't be parsed)
24 Options:
25
26 {content, unescaped}
27 ----------
28 {data, unescaped, if found}
29
30 """
31
32 import re
33 import os
34 import traceback
35
36 _desc_re = re.compile(r'\s*Description:\s*(.*)')
37 _expect_re = re.compile(r'\s*Expect:\s*(.*)')
38 _data_expect_re = re.compile(r"entries\[0\]\['[^']+'\](?:\[0\]\['value'\])?\s*==\s*(.*)")
39 _feed_data_expect_re = re.compile(r"feed\['[^']+'\]\s*==\s*(.*)")
40
41 -def parse_content(content):
42 match = _desc_re.search(content)
43 desc = match.group(1)
44 match = _expect_re.search(content)
45 expect = match.group(1)
46 data = None
47 for regex in [_data_expect_re, _feed_data_expect_re]:
48 match = regex.search(expect)
49 if match:
50
51 data = eval(match.group(1).strip())
52 break
53 c = None
54 for tag in ['content', 'summary', 'title', 'copyright', 'tagline', 'info', 'subtitle', 'fullitem', 'body', 'description', 'content:encoded']:
55 regex = re.compile(r"<%s.*?>(.*)</%s>" % (tag, tag), re.S)
56 match = regex.search(content)
57 if match:
58 c = match.group(1)
59 break
60 assert c is not None
61
62 if tag != 'body':
63 c = c.replace('<', '<')
64 c = c.replace('&', '&')
65
66 return {
67 'Description': desc,
68 'Expect': expect,
69 'data': data,
70 'content': c}
71
73 s = '''\
74 Description: %(Description)s
75 Expect: %(Expect)s
76 Options:
77
78 %(content)s
79 ''' % d
80 if d.get('data') is not None:
81 s += '----------\n%s' % d['data']
82 return s
83
100
102 for fn in os.listdir(dir):
103 fn = os.path.join(dir, fn)
104 if fn.endswith('.xml'):
105 translate_file(fn)
106
107 if __name__ == '__main__':
108 import sys
109 translate_all(os.path.join(os.path.dirname(__file__), 'feedparser-data'))
110