1 """
2 lxml-based doctest output comparison.
3
4 Note: normally, you should just import the `lxml.usedoctest` and
5 `lxml.html.usedoctest` modules from within a doctest, instead of this
6 one::
7
8 >>> import lxml.usedoctest # for XML output
9
10 >>> import lxml.html.usedoctest # for HTML output
11
12 To use this module directly, you must call ``lxmldoctest.install()``,
13 which will cause doctest to use this in all subsequent calls.
14
15 This changes the way output is checked and comparisons are made for
16 XML or HTML-like content.
17
18 XML or HTML content is noticed because the example starts with ``<``
19 (it's HTML if it starts with ``<html``). You can also use the
20 ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
21
22 Some rough wildcard-like things are allowed. Whitespace is generally
23 ignored (except in attributes). In text (attributes and text in the
24 body) you can use ``...`` as a wildcard. In an example it also
25 matches any trailing tags in the element, though it does not match
26 leading tags. You may create a tag ``<any>`` or include an ``any``
27 attribute in the tag. An ``any`` tag matches any tag, while the
28 attribute matches any and all attributes.
29
30 When a match fails, the reformatted example and gotten text is
31 displayed (indented), and a rough diff-like output is given. Anything
32 marked with ``-`` is in the output but wasn't supposed to be, and
33 similarly ``+`` means its in the example but wasn't in the output.
34
35 You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
36 """
37
38 from lxml import etree
39 import re
40 import doctest
41 import cgi
42
43 __all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
44 'LHTMLOutputChecker', 'install', 'temp_install']
45
46 PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
47 PARSE_XML = doctest.register_optionflag('PARSE_XML')
48 NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
49
50 OutputChecker = doctest.OutputChecker
51
53 if v is None:
54 return None
55 else:
56 return v.strip()
57
60
61 _html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
62
65
66
67 _repr_re = re.compile(r'^<[^>]+ (at|object) ')
68 _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
69
71
72 empty_tags = (
73 'param', 'img', 'area', 'br', 'basefont', 'input',
74 'base', 'meta', 'link', 'col')
75
78
80 alt_self = getattr(self, '_temp_override_self', None)
81 if alt_self is not None:
82 super_method = self._temp_call_super_check_output
83 self = alt_self
84 else:
85 super_method = OutputChecker.check_output
86 parser = self.get_parser(want, got, optionflags)
87 if not parser:
88 return super_method(
89 self, want, got, optionflags)
90 try:
91 want_doc = parser(want)
92 except etree.XMLSyntaxError:
93 return False
94 try:
95 got_doc = parser(got)
96 except etree.XMLSyntaxError:
97 return False
98 return self.compare_docs(want_doc, got_doc)
99
115
117 s = s.strip()
118 return (s.startswith('<')
119 and not _repr_re.search(s))
120
122 if not self.tag_compare(want.tag, got.tag):
123 return False
124 if not self.text_compare(want.text, got.text, True):
125 return False
126 if not self.text_compare(want.tail, got.tail, True):
127 return False
128 if 'any' not in want.attrib:
129 want_keys = sorted(want.attrib.keys())
130 got_keys = sorted(got.attrib.keys())
131 if want_keys != got_keys:
132 return False
133 for key in want_keys:
134 if not self.text_compare(want.attrib[key], got.attrib[key], False):
135 return False
136 if want.text != '...' or len(want):
137 want_children = list(want)
138 got_children = list(got)
139 while want_children or got_children:
140 if not want_children or not got_children:
141 return False
142 want_first = want_children.pop(0)
143 got_first = got_children.pop(0)
144 if not self.compare_docs(want_first, got_first):
145 return False
146 if not got_children and want_first.tail == '...':
147 break
148 return True
149
150 - def text_compare(self, want, got, strip):
151 want = want or ''
152 got = got or ''
153 if strip:
154 want = norm_whitespace(want).strip()
155 got = norm_whitespace(got).strip()
156 want = '^%s$' % re.escape(want)
157 want = want.replace(r'\.\.\.', '.*')
158 if re.search(want, got):
159 return True
160 else:
161 return False
162
164 if want == 'any':
165 return True
166 if (not isinstance(want, basestring)
167 or not isinstance(got, basestring)):
168 return want == got
169 want = want or ''
170 got = got or ''
171 if want.startswith('{...}'):
172
173 return want.split('}')[-1] == got.split('}')[-1]
174 else:
175 return want == got
176
178 want = example.want
179 parser = self.get_parser(want, got, optionflags)
180 errors = []
181 if parser is not None:
182 try:
183 want_doc = parser(want)
184 except etree.XMLSyntaxError, e:
185 errors.append('In example: %s' % e)
186 try:
187 got_doc = parser(got)
188 except etree.XMLSyntaxError, e:
189 errors.append('In actual output: %s' % e)
190 if parser is None or errors:
191 value = OutputChecker.output_difference(
192 self, example, got, optionflags)
193 if errors:
194 errors.append(value)
195 return '\n'.join(errors)
196 else:
197 return value
198 html = parser is html_fromstring
199 diff_parts = []
200 diff_parts.append('Expected:')
201 diff_parts.append(self.format_doc(want_doc, html, 2))
202 diff_parts.append('Got:')
203 diff_parts.append(self.format_doc(got_doc, html, 2))
204 diff_parts.append('Diff:')
205 diff_parts.append(self.collect_diff(want_doc, got_doc, html, 2))
206 return '\n'.join(diff_parts)
207
209 if not html:
210 return False
211 if el.tag not in self.empty_tags:
212 return False
213 if el.text or len(el):
214
215 return False
216 return True
217
252
254 if text is None:
255 return ''
256 if strip:
257 text = text.strip()
258 return cgi.escape(text, 1)
259
270
276
278 parts = []
279 if not len(want) and not len(got):
280 parts.append(' '*indent)
281 parts.append(self.collect_diff_tag(want, got))
282 if not self.html_empty_tag(got, html):
283 parts.append(self.collect_diff_text(want.text, got.text))
284 parts.append(self.collect_diff_end_tag(want, got))
285 parts.append(self.collect_diff_text(want.tail, got.tail))
286 parts.append('\n')
287 return ''.join(parts)
288 parts.append(' '*indent)
289 parts.append(self.collect_diff_tag(want, got))
290 parts.append('\n')
291 if strip(want.text) or strip(got.text):
292 parts.append(' '*indent)
293 parts.append(self.collect_diff_text(want.text, got.text))
294 parts.append('\n')
295 want_children = list(want)
296 got_children = list(got)
297 while want_children or got_children:
298 if not want_children:
299 parts.append(self.format_doc(got_children.pop(0), html, indent+2, '-'))
300 continue
301 if not got_children:
302 parts.append(self.format_doc(want_children.pop(0), html, indent+2, '+'))
303 continue
304 parts.append(self.collect_diff(
305 want_children.pop(0), got_children.pop(0), html, indent+2))
306 parts.append(' '*indent)
307 parts.append(self.collect_diff_end_tag(want, got))
308 parts.append('\n')
309 if strip(want.tail) or strip(got.tail):
310 parts.append(' '*indent)
311 parts.append(self.collect_diff_text(want.tail, got.tail))
312 parts.append('\n')
313 return ''.join(parts)
314
316 if not self.tag_compare(want.tag, got.tag):
317 tag = '%s (got: %s)' % (want.tag, got.tag)
318 else:
319 tag = got.tag
320 attrs = []
321 any = want.tag == 'any' or 'any' in want.attrib
322 for name, value in sorted(got.attrib.items()):
323 if name not in want.attrib and not any:
324 attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
325 else:
326 if name in want.attrib:
327 text = self.collect_diff_text(value, want.attrib[name], False)
328 else:
329 text = self.format_text(value, False)
330 attrs.append('%s="%s"' % (name, text))
331 if not any:
332 for name, value in sorted(want.attrib.items()):
333 if name in got.attrib:
334 continue
335 attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
336 if attrs:
337 tag = '<%s %s>' % (tag, ' '.join(attrs))
338 else:
339 tag = '<%s>' % tag
340 return tag
341
343 if want.tag != got.tag:
344 tag = '%s (got: %s)' % (want.tag, got.tag)
345 else:
346 tag = got.tag
347 return '</%s>' % tag
348
349 - def collect_diff_text(self, want, got, strip=True):
350 if self.text_compare(want, got, strip):
351 if not got:
352 return ''
353 return self.format_text(got, strip)
354 text = '%s (got: %s)' % (want, got)
355 return self.format_text(text, strip)
356
359 return html_fromstring
360
362 """
363 Install doctestcompare for all future doctests.
364
365 If html is true, then by default the HTML parser will be used;
366 otherwise the XML parser is used.
367 """
368 if html:
369 doctest.OutputChecker = LHTMLOutputChecker
370 else:
371 doctest.OutputChecker = LXMLOutputChecker
372
374 """
375 Use this *inside* a doctest to enable this checker for this
376 doctest only.
377
378 If html is true, then by default the HTML parser will be used;
379 otherwise the XML parser is used.
380 """
381 if html:
382 Checker = LHTMLOutputChecker
383 else:
384 Checker = LXMLOutputChecker
385 frame = _find_doctest_frame()
386 dt_self = frame.f_locals['self']
387 checker = Checker()
388 old_checker = dt_self._checker
389 dt_self._checker = checker
390
391
392
393
394
395
396
397
398
399 check_func = frame.f_locals['check'].im_func
400
401
402 doctest.etree = etree
403 _RestoreChecker(dt_self, old_checker, checker,
404 check_func, checker.check_output.im_func,
405 del_module)
406
408 - def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
409 del_module):
410 self.dt_self = dt_self
411 self.checker = old_checker
412 self.checker._temp_call_super_check_output = self.call_super
413 self.checker._temp_override_self = new_checker
414 self.check_func = check_func
415 self.clone_func = clone_func
416 self.del_module = del_module
417 self.install_clone()
418 self.install_dt_self()
420 self.func_code = self.check_func.func_code
421 self.func_globals = self.check_func.func_globals
422 self.check_func.func_code = self.clone_func.func_code
424 self.check_func.func_code = self.func_code
426 self.prev_func = self.dt_self._DocTestRunner__record_outcome
427 self.dt_self._DocTestRunner__record_outcome = self
429 self.dt_self._DocTestRunner__record_outcome = self.prev_func
431 if self.del_module:
432 import sys
433 del sys.modules[self.del_module]
434 if '.' in self.del_module:
435 package, module = self.del_module.rsplit('.', 1)
436 package_mod = sys.modules[package]
437 delattr(package_mod, module)
439 self.uninstall_clone()
440 self.uninstall_dt_self()
441 del self.checker._temp_override_self
442 del self.checker._temp_call_super_check_output
443 result = self.prev_func(*args, **kw)
444 self.uninstall_module()
445 return result
447 self.uninstall_clone()
448 try:
449 return self.check_func(*args, **kw)
450 finally:
451 self.install_clone()
452
454 import sys
455 frame = sys._getframe(1)
456 while frame:
457 l = frame.f_locals
458 if 'BOOM' in l:
459
460 return frame
461 frame = frame.f_back
462 raise LookupError(
463 "Could not find doctest (only use this function *inside* a doctest)")
464
465 __test__ = {
466 'basic': '''
467 >>> temp_install()
468 >>> print """<xml a="1" b="2">stuff</xml>"""
469 <xml b="2" a="1">...</xml>
470 >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
471 <xml xmlns="...">
472 <tag attr="..." />
473 </xml>
474 >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
475 <xml>...foo /></xml>
476 '''}
477
478 if __name__ == '__main__':
479 import doctest
480 doctest.testmod()
481