1 """
2 lxml-based doctest output comparison.
3
4 To use this you must call ``lxmldoctest.install()``, which will cause
5 doctest to use this in all subsequent calls.
6
7 This changes the way output is checked and comparisons are made for
8 XML or HTML-like content.
9
10 XML or HTML content is noticed because the example starts with ``<``
11 (it's HTML if it starts with ``<html``). You can also use the
12 ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
13
14 Some rough wildcard-like things are allowed. Whitespace is generally
15 ignored (except in attributes). In text (attributes and text in the
16 body) you can use ``...`` as a wildcard. In an example it also
17 matches any trailing tags in the element, though it does not match
18 leading tags. You may create a tag ``<any>`` or include an ``any``
19 attribute in the tag. An ``any`` tag matches any tag, while the
20 attribute matches any and all attributes.
21
22 When a match fails, the reformatted example and gotten text is
23 displayed (indented), and a rough diff-like output is given. Anything
24 marked with ``-`` is in the output but wasn't supposed to be, and
25 similarly ``+`` means its in the example but wasn't in the output.
26
27 You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
28 """
29
30 from lxml import etree
31 from lxml.html import document_fromstring
32 import re
33 import doctest
34 import cgi
35
36 __all__ = ['PARSE_HTML', 'PARSE_XML', 'LXMLOutputChecker',
37 'LHTMLOutputChecker', 'install', 'temp_install']
38
39 PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
40 PARSE_XML = doctest.register_optionflag('PARSE_XML')
41 NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
42
43 OutputChecker = doctest.OutputChecker
44
46 if v is None:
47 return None
48 else:
49 return v.strip()
50
53
54
55 _repr_re = re.compile(r'^<[^>]+ (at|object) ')
56 _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
57
59
60 empty_tags = (
61 'param', 'img', 'area', 'br', 'basefont', 'input',
62 'base', 'meta', 'link', 'col')
63
66
68 alt_self = getattr(self, '_temp_override_self', None)
69 if alt_self is not None:
70 super_method = self._temp_call_super_check_output
71 self = alt_self
72 else:
73 super_method = OutputChecker.check_output
74 parser = self.get_parser(want, got, optionflags)
75 if not parser:
76 return super_method(
77 self, want, got, optionflags)
78 try:
79 want_doc = parser(want)
80 except etree.XMLSyntaxError:
81 return False
82 try:
83 got_doc = parser(got)
84 except etree.XMLSyntaxError:
85 return False
86 return self.compare_docs(want_doc, got_doc)
87
103
105 s = s.strip()
106 return (s.startswith('<')
107 and not _repr_re.search(s))
108
110 if not self.tag_compare(want.tag, got.tag):
111 return False
112 if not self.text_compare(want.text, got.text, True):
113 return False
114 if not self.text_compare(want.tail, got.tail, True):
115 return False
116 if 'any' not in want.attrib:
117 want_keys = sorted(want.attrib.keys())
118 got_keys = sorted(got.attrib.keys())
119 if want_keys != got_keys:
120 return False
121 for key in want_keys:
122 if not self.text_compare(want.attrib[key], got.attrib[key], False):
123 return False
124 if want.text != '...' or len(want):
125 want_children = list(want)
126 got_children = list(got)
127 while want_children or got_children:
128 if not want_children or not got_children:
129 return False
130 want_first = want_children.pop(0)
131 got_first = got_children.pop(0)
132 if not self.compare_docs(want_first, got_first):
133 return False
134 if not got_children and want_first.tail == '...':
135 break
136 return True
137
138 - def text_compare(self, want, got, strip):
139 want = want or ''
140 got = got or ''
141 if strip:
142 want = norm_whitespace(want).strip()
143 got = norm_whitespace(got).strip()
144 want = '^%s$' % re.escape(want)
145 want = want.replace(r'\.\.\.', '.*')
146 if re.search(want, got):
147 return True
148 else:
149 return False
150
152 if want == 'any':
153 return True
154 if (not isinstance(want, basestring)
155 or not isinstance(got, basestring)):
156 return want == got
157 want = want or ''
158 got = got or ''
159 if want.startswith('{...}'):
160
161 return want.split('}')[-1] == got.split('}')[-1]
162 else:
163 return want == got
164
166 want = example.want
167 parser = self.get_parser(want, got, optionflags)
168 errors = []
169 if parser is not None:
170 try:
171 want_doc = parser(want)
172 except etree.XMLSyntaxError, e:
173 errors.append('In example: %s' % e)
174 try:
175 got_doc = parser(got)
176 except etree.XMLSyntaxError, e:
177 errors.append('In actual output: %s' % e)
178 if parser is None or errors:
179 value = OutputChecker.output_difference(
180 self, example, got, optionflags)
181 if errors:
182 errors.append(value)
183 return '\n'.join(errors)
184 else:
185 return value
186 html = parser is document_fromstring
187 diff_parts = []
188 diff_parts.append('Expected:')
189 diff_parts.append(self.format_doc(want_doc, html, 2))
190 diff_parts.append('Got:')
191 diff_parts.append(self.format_doc(got_doc, html, 2))
192 diff_parts.append('Diff:')
193 diff_parts.append(self.collect_diff(want_doc, got_doc, html, 2))
194 return '\n'.join(diff_parts)
195
197 if not html:
198 return False
199 if el.tag not in self.empty_tags:
200 return False
201 if el.text or len(el):
202
203 return False
204 return True
205
240
247
258
264
266 parts = []
267 if not len(want) and not len(got):
268 parts.append(' '*indent)
269 parts.append(self.collect_diff_tag(want, got))
270 if not self.html_empty_tag(got, html):
271 parts.append(self.collect_diff_text(want.text, got.text))
272 parts.append(self.collect_diff_end_tag(want, got))
273 parts.append(self.collect_diff_text(want.tail, got.tail))
274 parts.append('\n')
275 return ''.join(parts)
276 parts.append(' '*indent)
277 parts.append(self.collect_diff_tag(want, got))
278 parts.append('\n')
279 if strip(want.text) or strip(got.text):
280 parts.append(' '*indent)
281 parts.append(self.collect_diff_text(want.text, got.text))
282 parts.append('\n')
283 want_children = list(want)
284 got_children = list(got)
285 while want_children or got_children:
286 if not want_children:
287 parts.append(self.format_doc(got_children.pop(0), html, indent+2, '-'))
288 continue
289 if not got_children:
290 parts.append(self.format_doc(want_children.pop(0), html, indent+2, '+'))
291 continue
292 parts.append(self.collect_diff(
293 want_children.pop(0), got_children.pop(0), html, indent+2))
294 parts.append(' '*indent)
295 parts.append(self.collect_diff_end_tag(want, got))
296 parts.append('\n')
297 if strip(want.tail) or strip(got.tail):
298 parts.append(' '*indent)
299 parts.append(self.collect_diff_text(want.tail, got.tail))
300 parts.append('\n')
301 return ''.join(parts)
302
304 if not self.tag_compare(want.tag, got.tag):
305 tag = '%s (got: %s)' % (want.tag, got.tag)
306 else:
307 tag = got.tag
308 attrs = []
309 any = want.tag == 'any' or 'any' in want.attrib
310 for name, value in sorted(got.attrib.items()):
311 if name not in want.attrib and not any:
312 attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
313 else:
314 if name in want.attrib:
315 text = self.collect_diff_text(value, want.attrib[name], False)
316 else:
317 text = self.format_text(value, False)
318 attrs.append('%s="%s"' % (name, text))
319 if not any:
320 for name, value in sorted(want.attrib.items()):
321 if name in got.attrib:
322 continue
323 attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
324 if attrs:
325 tag = '<%s %s>' % (tag, ' '.join(attrs))
326 else:
327 tag = '<%s>' % tag
328 return tag
329
331 if want.tag != got.tag:
332 tag = '%s (got: %s)' % (want.tag, got.tag)
333 else:
334 tag = got.tag
335 return '</%s>' % tag
336
337 - def collect_diff_text(self, want, got, strip=True):
338 if self.text_compare(want, got, strip):
339 if not got:
340 return ''
341 return self.format_text(got, strip)
342 text = '%s (got: %s)' % (want, got)
343 return self.format_text(text, strip)
344
348
350 """
351 Install doctestcompare for all future doctests.
352
353 If html is true, then by default the HTML parser will be used;
354 otherwise the XML parser is used.
355 """
356 if html:
357 doctest.OutputChecker = LHTMLOutputChecker
358 else:
359 doctest.OutputChecker = LXMLOutputChecker
360
362 """
363 Use this *inside* a doctest to enable this checker for this
364 doctest only.
365
366 If html is true, then by default the HTML parser will be used;
367 otherwise the XML parser is used.
368 """
369 if html:
370 Checker = LHTMLOutputChecker
371 else:
372 Checker = LXMLOutputChecker
373 frame = _find_doctest_frame()
374 dt_self = frame.f_locals['self']
375 checker = Checker()
376 old_checker = dt_self._checker
377 dt_self._checker = checker
378
379
380
381
382
383
384
385
386
387 check_func = frame.f_locals['check'].im_func
388
389
390 doctest.etree = etree
391 _RestoreChecker(dt_self, old_checker, checker,
392 check_func, checker.check_output.im_func,
393 del_module)
394
396 - def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
397 del_module):
398 self.dt_self = dt_self
399 self.checker = old_checker
400 self.checker._temp_call_super_check_output = self.call_super
401 self.checker._temp_override_self = new_checker
402 self.check_func = check_func
403 self.clone_func = clone_func
404 self.del_module = del_module
405 self.install_clone()
406 self.install_dt_self()
408 self.func_code = self.check_func.func_code
409 self.func_globals = self.check_func.func_globals
410 self.check_func.func_code = self.clone_func.func_code
412 self.check_func.func_code = self.func_code
414 self.prev_func = self.dt_self._DocTestRunner__record_outcome
415 self.dt_self._DocTestRunner__record_outcome = self
417 self.dt_self._DocTestRunner__record_outcome = self.prev_func
419 if self.del_module:
420 import sys
421 del sys.modules[self.del_module]
422 if '.' in self.del_module:
423 package, module = self.del_module.rsplit('.', 1)
424 package_mod = sys.modules[package]
425 delattr(package_mod, module)
440
442 import sys
443 frame = sys._getframe(1)
444 while frame:
445 l = frame.f_locals
446 if 'BOOM' in l:
447
448 return frame
449 frame = frame.f_back
450 raise LookupError(
451 "Could not find doctest (only use this function *inside* a doctest)")
452
453 __test__ = {
454 'basic': '''
455 >>> temp_install()
456 >>> print """<xml a="1" b="2">stuff</xml>"""
457 <xml b="2" a="1">...</xml>
458 >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
459 <xml xmlns="...">
460 <tag attr="..." />
461 </xml>
462 >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
463 <xml>...foo /></xml>
464 '''}
465
466 if __name__ == '__main__':
467 import doctest
468 doctest.testmod()
469