1 """
2 lxml-based doctest output comparison.
3
4 Note: normally, you should just import the `lxml.usedoctest` and
5 `lxml.html.usedoctest` modules from within a doctest, instead of this
6 one::
7
8 >>> import lxml.usedoctest # for XML output
9
10 >>> import lxml.html.usedoctest # for HTML output
11
12 To use this module directly, you must call ``lxmldoctest.install()``,
13 which will cause doctest to use this in all subsequent calls.
14
15 This changes the way output is checked and comparisons are made for
16 XML or HTML-like content.
17
18 XML or HTML content is noticed because the example starts with ``<``
19 (it's HTML if it starts with ``<html``). You can also use the
20 ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
21
22 Some rough wildcard-like things are allowed. Whitespace is generally
23 ignored (except in attributes). In text (attributes and text in the
24 body) you can use ``...`` as a wildcard. In an example it also
25 matches any trailing tags in the element, though it does not match
26 leading tags. You may create a tag ``<any>`` or include an ``any``
27 attribute in the tag. An ``any`` tag matches any tag, while the
28 attribute matches any and all attributes.
29
30 When a match fails, the reformatted example and gotten text is
31 displayed (indented), and a rough diff-like output is given. Anything
32 marked with ``-`` is in the output but wasn't supposed to be, and
33 similarly ``+`` means its in the example but wasn't in the output.
34
35 You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
36 """
37
38 from lxml import etree
39 import sys
40 import re
41 import doctest
42 import cgi
43
44 __all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
45 'LHTMLOutputChecker', 'install', 'temp_install']
46
47 try:
48 _basestring = basestring
49 except NameError:
50 _basestring = (str, bytes)
51
52 _IS_PYTHON_3 = sys.version_info[0] >= 3
53
54 PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
55 PARSE_XML = doctest.register_optionflag('PARSE_XML')
56 NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
57
58 OutputChecker = doctest.OutputChecker
59
61 if v is None:
62 return None
63 else:
64 return v.strip()
65
68
69 _html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
70
73
74
75 _repr_re = re.compile(r'^<[^>]+ (at|object) ')
76 _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
77
79
80 empty_tags = (
81 'param', 'img', 'area', 'br', 'basefont', 'input',
82 'base', 'meta', 'link', 'col')
83
86
88 alt_self = getattr(self, '_temp_override_self', None)
89 if alt_self is not None:
90 super_method = self._temp_call_super_check_output
91 self = alt_self
92 else:
93 super_method = OutputChecker.check_output
94 parser = self.get_parser(want, got, optionflags)
95 if not parser:
96 return super_method(
97 self, want, got, optionflags)
98 try:
99 want_doc = parser(want)
100 except etree.XMLSyntaxError:
101 return False
102 try:
103 got_doc = parser(got)
104 except etree.XMLSyntaxError:
105 return False
106 return self.compare_docs(want_doc, got_doc)
107
123
125 s = s.strip()
126 return (s.startswith('<')
127 and not _repr_re.search(s))
128
130 if not self.tag_compare(want.tag, got.tag):
131 return False
132 if not self.text_compare(want.text, got.text, True):
133 return False
134 if not self.text_compare(want.tail, got.tail, True):
135 return False
136 if 'any' not in want.attrib:
137 want_keys = sorted(want.attrib.keys())
138 got_keys = sorted(got.attrib.keys())
139 if want_keys != got_keys:
140 return False
141 for key in want_keys:
142 if not self.text_compare(want.attrib[key], got.attrib[key], False):
143 return False
144 if want.text != '...' or len(want):
145 want_children = list(want)
146 got_children = list(got)
147 while want_children or got_children:
148 if not want_children or not got_children:
149 return False
150 want_first = want_children.pop(0)
151 got_first = got_children.pop(0)
152 if not self.compare_docs(want_first, got_first):
153 return False
154 if not got_children and want_first.tail == '...':
155 break
156 return True
157
158 - def text_compare(self, want, got, strip):
159 want = want or ''
160 got = got or ''
161 if strip:
162 want = norm_whitespace(want).strip()
163 got = norm_whitespace(got).strip()
164 want = '^%s$' % re.escape(want)
165 want = want.replace(r'\.\.\.', '.*')
166 if re.search(want, got):
167 return True
168 else:
169 return False
170
172 if want == 'any':
173 return True
174 if (not isinstance(want, _basestring)
175 or not isinstance(got, _basestring)):
176 return want == got
177 want = want or ''
178 got = got or ''
179 if want.startswith('{...}'):
180
181 return want.split('}')[-1] == got.split('}')[-1]
182 else:
183 return want == got
184
186 want = example.want
187 parser = self.get_parser(want, got, optionflags)
188 errors = []
189 if parser is not None:
190 try:
191 want_doc = parser(want)
192 except etree.XMLSyntaxError:
193 e = sys.exc_info()[1]
194 errors.append('In example: %s' % e)
195 try:
196 got_doc = parser(got)
197 except etree.XMLSyntaxError:
198 e = sys.exc_info()[1]
199 errors.append('In actual output: %s' % e)
200 if parser is None or errors:
201 value = OutputChecker.output_difference(
202 self, example, got, optionflags)
203 if errors:
204 errors.append(value)
205 return '\n'.join(errors)
206 else:
207 return value
208 html = parser is html_fromstring
209 diff_parts = []
210 diff_parts.append('Expected:')
211 diff_parts.append(self.format_doc(want_doc, html, 2))
212 diff_parts.append('Got:')
213 diff_parts.append(self.format_doc(got_doc, html, 2))
214 diff_parts.append('Diff:')
215 diff_parts.append(self.collect_diff(want_doc, got_doc, html, 2))
216 return '\n'.join(diff_parts)
217
219 if not html:
220 return False
221 if el.tag not in self.empty_tags:
222 return False
223 if el.text or len(el):
224
225 return False
226 return True
227
262
269
280
286
288 parts = []
289 if not len(want) and not len(got):
290 parts.append(' '*indent)
291 parts.append(self.collect_diff_tag(want, got))
292 if not self.html_empty_tag(got, html):
293 parts.append(self.collect_diff_text(want.text, got.text))
294 parts.append(self.collect_diff_end_tag(want, got))
295 parts.append(self.collect_diff_text(want.tail, got.tail))
296 parts.append('\n')
297 return ''.join(parts)
298 parts.append(' '*indent)
299 parts.append(self.collect_diff_tag(want, got))
300 parts.append('\n')
301 if strip(want.text) or strip(got.text):
302 parts.append(' '*indent)
303 parts.append(self.collect_diff_text(want.text, got.text))
304 parts.append('\n')
305 want_children = list(want)
306 got_children = list(got)
307 while want_children or got_children:
308 if not want_children:
309 parts.append(self.format_doc(got_children.pop(0), html, indent+2, '-'))
310 continue
311 if not got_children:
312 parts.append(self.format_doc(want_children.pop(0), html, indent+2, '+'))
313 continue
314 parts.append(self.collect_diff(
315 want_children.pop(0), got_children.pop(0), html, indent+2))
316 parts.append(' '*indent)
317 parts.append(self.collect_diff_end_tag(want, got))
318 parts.append('\n')
319 if strip(want.tail) or strip(got.tail):
320 parts.append(' '*indent)
321 parts.append(self.collect_diff_text(want.tail, got.tail))
322 parts.append('\n')
323 return ''.join(parts)
324
326 if not self.tag_compare(want.tag, got.tag):
327 tag = '%s (got: %s)' % (want.tag, got.tag)
328 else:
329 tag = got.tag
330 attrs = []
331 any = want.tag == 'any' or 'any' in want.attrib
332 for name, value in sorted(got.attrib.items()):
333 if name not in want.attrib and not any:
334 attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
335 else:
336 if name in want.attrib:
337 text = self.collect_diff_text(value, want.attrib[name], False)
338 else:
339 text = self.format_text(value, False)
340 attrs.append('%s="%s"' % (name, text))
341 if not any:
342 for name, value in sorted(want.attrib.items()):
343 if name in got.attrib:
344 continue
345 attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
346 if attrs:
347 tag = '<%s %s>' % (tag, ' '.join(attrs))
348 else:
349 tag = '<%s>' % tag
350 return tag
351
353 if want.tag != got.tag:
354 tag = '%s (got: %s)' % (want.tag, got.tag)
355 else:
356 tag = got.tag
357 return '</%s>' % tag
358
359 - def collect_diff_text(self, want, got, strip=True):
360 if self.text_compare(want, got, strip):
361 if not got:
362 return ''
363 return self.format_text(got, strip)
364 text = '%s (got: %s)' % (want, got)
365 return self.format_text(text, strip)
366
370
372 """
373 Install doctestcompare for all future doctests.
374
375 If html is true, then by default the HTML parser will be used;
376 otherwise the XML parser is used.
377 """
378 if html:
379 doctest.OutputChecker = LHTMLOutputChecker
380 else:
381 doctest.OutputChecker = LXMLOutputChecker
382
384 """
385 Use this *inside* a doctest to enable this checker for this
386 doctest only.
387
388 If html is true, then by default the HTML parser will be used;
389 otherwise the XML parser is used.
390 """
391 if html:
392 Checker = LHTMLOutputChecker
393 else:
394 Checker = LXMLOutputChecker
395 frame = _find_doctest_frame()
396 dt_self = frame.f_locals['self']
397 checker = Checker()
398 old_checker = dt_self._checker
399 dt_self._checker = checker
400
401
402
403
404
405
406
407
408
409 if _IS_PYTHON_3:
410 check_func = frame.f_locals['check'].__func__
411 checker_check_func = checker.check_output.__func__
412 else:
413 check_func = frame.f_locals['check'].im_func
414 checker_check_func = checker.check_output.im_func
415
416
417 doctest.etree = etree
418 _RestoreChecker(dt_self, old_checker, checker,
419 check_func, checker_check_func,
420 del_module)
421
423 - def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
424 del_module):
425 self.dt_self = dt_self
426 self.checker = old_checker
427 self.checker._temp_call_super_check_output = self.call_super
428 self.checker._temp_override_self = new_checker
429 self.check_func = check_func
430 self.clone_func = clone_func
431 self.del_module = del_module
432 self.install_clone()
433 self.install_dt_self()
435 if _IS_PYTHON_3:
436 self.func_code = self.check_func.__code__
437 self.func_globals = self.check_func.__globals__
438 self.check_func.__code__ = self.clone_func.__code__
439 else:
440 self.func_code = self.check_func.func_code
441 self.func_globals = self.check_func.func_globals
442 self.check_func.func_code = self.clone_func.func_code
444 if _IS_PYTHON_3:
445 self.check_func.__code__ = self.func_code
446 else:
447 self.check_func.func_code = self.func_code
449 self.prev_func = self.dt_self._DocTestRunner__record_outcome
450 self.dt_self._DocTestRunner__record_outcome = self
452 self.dt_self._DocTestRunner__record_outcome = self.prev_func
454 if self.del_module:
455 import sys
456 del sys.modules[self.del_module]
457 if '.' in self.del_module:
458 package, module = self.del_module.rsplit('.', 1)
459 package_mod = sys.modules[package]
460 delattr(package_mod, module)
475
477 import sys
478 frame = sys._getframe(1)
479 while frame:
480 l = frame.f_locals
481 if 'BOOM' in l:
482
483 return frame
484 frame = frame.f_back
485 raise LookupError(
486 "Could not find doctest (only use this function *inside* a doctest)")
487
488 __test__ = {
489 'basic': '''
490 >>> temp_install()
491 >>> print """<xml a="1" b="2">stuff</xml>"""
492 <xml b="2" a="1">...</xml>
493 >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
494 <xml xmlns="...">
495 <tag attr="..." />
496 </xml>
497 >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
498 <xml>...foo /></xml>
499 '''}
500
501 if __name__ == '__main__':
502 import doctest
503 doctest.testmod()
504