1 """
2 lxml-based doctest output comparison.
3
4 To use this you must call ``lxmldoctest.install()``, which will cause
5 doctest to use this in all subsequent calls.
6
7 This changes the way output is checked and comparisons are made for
8 XML or HTML-like content.
9
10 XML or HTML content is noticed because the example starts with ``<``
11 (it's HTML if it starts with ``<html``). You can also use the
12 ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
13
14 Some rough wildcard-like things are allowed. Whitespace is generally
15 ignored (except in attributes). In text (attributes and text in the
16 body) you can use ``...`` as a wildcard. In an example it also
17 matches any trailing tags in the element, though it does not match
18 leading tags. You may create a tag ``<any>`` or include an ``any``
19 attribute in the tag. An ``any`` tag matches any tag, while the
20 attribute matches any and all attributes.
21
22 When a match fails, the reformatted example and gotten text is
23 displayed (indented), and a rough diff-like output is given. Anything
24 marked with ``-`` is in the output but wasn't supposed to be, and
25 similarly ``+`` means its in the example but wasn't in the output.
26
27 You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
28 """
29
30 from lxml import etree
31 import re
32 import doctest
33 import cgi
34
35 __all__ = ['PARSE_HTML', 'PARSE_XML', 'LXMLOutputChecker',
36 'LHTMLOutputChecker', 'install', 'temp_install']
37
38 PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
39 PARSE_XML = doctest.register_optionflag('PARSE_XML')
40 NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
41
42 OutputChecker = doctest.OutputChecker
43
45 if v is None:
46 return None
47 else:
48 return v.strip()
49
52
53 _html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
54
57
58
59 _repr_re = re.compile(r'^<[^>]+ (at|object) ')
60 _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
61
63
64 empty_tags = (
65 'param', 'img', 'area', 'br', 'basefont', 'input',
66 'base', 'meta', 'link', 'col')
67
70
72 alt_self = getattr(self, '_temp_override_self', None)
73 if alt_self is not None:
74 super_method = self._temp_call_super_check_output
75 self = alt_self
76 else:
77 super_method = OutputChecker.check_output
78 parser = self.get_parser(want, got, optionflags)
79 if not parser:
80 return super_method(
81 self, want, got, optionflags)
82 try:
83 want_doc = parser(want)
84 except etree.XMLSyntaxError:
85 return False
86 try:
87 got_doc = parser(got)
88 except etree.XMLSyntaxError:
89 return False
90 return self.compare_docs(want_doc, got_doc)
91
107
109 s = s.strip()
110 return (s.startswith('<')
111 and not _repr_re.search(s))
112
114 if not self.tag_compare(want.tag, got.tag):
115 return False
116 if not self.text_compare(want.text, got.text, True):
117 return False
118 if not self.text_compare(want.tail, got.tail, True):
119 return False
120 if 'any' not in want.attrib:
121 want_keys = sorted(want.attrib.keys())
122 got_keys = sorted(got.attrib.keys())
123 if want_keys != got_keys:
124 return False
125 for key in want_keys:
126 if not self.text_compare(want.attrib[key], got.attrib[key], False):
127 return False
128 if want.text != '...' or len(want):
129 want_children = list(want)
130 got_children = list(got)
131 while want_children or got_children:
132 if not want_children or not got_children:
133 return False
134 want_first = want_children.pop(0)
135 got_first = got_children.pop(0)
136 if not self.compare_docs(want_first, got_first):
137 return False
138 if not got_children and want_first.tail == '...':
139 break
140 return True
141
142 - def text_compare(self, want, got, strip):
143 want = want or ''
144 got = got or ''
145 if strip:
146 want = norm_whitespace(want).strip()
147 got = norm_whitespace(got).strip()
148 want = '^%s$' % re.escape(want)
149 want = want.replace(r'\.\.\.', '.*')
150 if re.search(want, got):
151 return True
152 else:
153 return False
154
156 if want == 'any':
157 return True
158 if (not isinstance(want, basestring)
159 or not isinstance(got, basestring)):
160 return want == got
161 want = want or ''
162 got = got or ''
163 if want.startswith('{...}'):
164
165 return want.split('}')[-1] == got.split('}')[-1]
166 else:
167 return want == got
168
170 want = example.want
171 parser = self.get_parser(want, got, optionflags)
172 errors = []
173 if parser is not None:
174 try:
175 want_doc = parser(want)
176 except etree.XMLSyntaxError, e:
177 errors.append('In example: %s' % e)
178 try:
179 got_doc = parser(got)
180 except etree.XMLSyntaxError, e:
181 errors.append('In actual output: %s' % e)
182 if parser is None or errors:
183 value = OutputChecker.output_difference(
184 self, example, got, optionflags)
185 if errors:
186 errors.append(value)
187 return '\n'.join(errors)
188 else:
189 return value
190 html = parser is html_fromstring
191 diff_parts = []
192 diff_parts.append('Expected:')
193 diff_parts.append(self.format_doc(want_doc, html, 2))
194 diff_parts.append('Got:')
195 diff_parts.append(self.format_doc(got_doc, html, 2))
196 diff_parts.append('Diff:')
197 diff_parts.append(self.collect_diff(want_doc, got_doc, html, 2))
198 return '\n'.join(diff_parts)
199
201 if not html:
202 return False
203 if el.tag not in self.empty_tags:
204 return False
205 if el.text or len(el):
206
207 return False
208 return True
209
244
251
262
268
270 parts = []
271 if not len(want) and not len(got):
272 parts.append(' '*indent)
273 parts.append(self.collect_diff_tag(want, got))
274 if not self.html_empty_tag(got, html):
275 parts.append(self.collect_diff_text(want.text, got.text))
276 parts.append(self.collect_diff_end_tag(want, got))
277 parts.append(self.collect_diff_text(want.tail, got.tail))
278 parts.append('\n')
279 return ''.join(parts)
280 parts.append(' '*indent)
281 parts.append(self.collect_diff_tag(want, got))
282 parts.append('\n')
283 if strip(want.text) or strip(got.text):
284 parts.append(' '*indent)
285 parts.append(self.collect_diff_text(want.text, got.text))
286 parts.append('\n')
287 want_children = list(want)
288 got_children = list(got)
289 while want_children or got_children:
290 if not want_children:
291 parts.append(self.format_doc(got_children.pop(0), html, indent+2, '-'))
292 continue
293 if not got_children:
294 parts.append(self.format_doc(want_children.pop(0), html, indent+2, '+'))
295 continue
296 parts.append(self.collect_diff(
297 want_children.pop(0), got_children.pop(0), html, indent+2))
298 parts.append(' '*indent)
299 parts.append(self.collect_diff_end_tag(want, got))
300 parts.append('\n')
301 if strip(want.tail) or strip(got.tail):
302 parts.append(' '*indent)
303 parts.append(self.collect_diff_text(want.tail, got.tail))
304 parts.append('\n')
305 return ''.join(parts)
306
308 if not self.tag_compare(want.tag, got.tag):
309 tag = '%s (got: %s)' % (want.tag, got.tag)
310 else:
311 tag = got.tag
312 attrs = []
313 any = want.tag == 'any' or 'any' in want.attrib
314 for name, value in sorted(got.attrib.items()):
315 if name not in want.attrib and not any:
316 attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
317 else:
318 if name in want.attrib:
319 text = self.collect_diff_text(value, want.attrib[name], False)
320 else:
321 text = self.format_text(value, False)
322 attrs.append('%s="%s"' % (name, text))
323 if not any:
324 for name, value in sorted(want.attrib.items()):
325 if name in got.attrib:
326 continue
327 attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
328 if attrs:
329 tag = '<%s %s>' % (tag, ' '.join(attrs))
330 else:
331 tag = '<%s>' % tag
332 return tag
333
335 if want.tag != got.tag:
336 tag = '%s (got: %s)' % (want.tag, got.tag)
337 else:
338 tag = got.tag
339 return '</%s>' % tag
340
341 - def collect_diff_text(self, want, got, strip=True):
342 if self.text_compare(want, got, strip):
343 if not got:
344 return ''
345 return self.format_text(got, strip)
346 text = '%s (got: %s)' % (want, got)
347 return self.format_text(text, strip)
348
352
354 """
355 Install doctestcompare for all future doctests.
356
357 If html is true, then by default the HTML parser will be used;
358 otherwise the XML parser is used.
359 """
360 if html:
361 doctest.OutputChecker = LHTMLOutputChecker
362 else:
363 doctest.OutputChecker = LXMLOutputChecker
364
366 """
367 Use this *inside* a doctest to enable this checker for this
368 doctest only.
369
370 If html is true, then by default the HTML parser will be used;
371 otherwise the XML parser is used.
372 """
373 if html:
374 Checker = LHTMLOutputChecker
375 else:
376 Checker = LXMLOutputChecker
377 frame = _find_doctest_frame()
378 dt_self = frame.f_locals['self']
379 checker = Checker()
380 old_checker = dt_self._checker
381 dt_self._checker = checker
382
383
384
385
386
387
388
389
390
391 check_func = frame.f_locals['check'].im_func
392
393
394 doctest.etree = etree
395 _RestoreChecker(dt_self, old_checker, checker,
396 check_func, checker.check_output.im_func,
397 del_module)
398
400 - def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
401 del_module):
402 self.dt_self = dt_self
403 self.checker = old_checker
404 self.checker._temp_call_super_check_output = self.call_super
405 self.checker._temp_override_self = new_checker
406 self.check_func = check_func
407 self.clone_func = clone_func
408 self.del_module = del_module
409 self.install_clone()
410 self.install_dt_self()
412 self.func_code = self.check_func.func_code
413 self.func_globals = self.check_func.func_globals
414 self.check_func.func_code = self.clone_func.func_code
416 self.check_func.func_code = self.func_code
418 self.prev_func = self.dt_self._DocTestRunner__record_outcome
419 self.dt_self._DocTestRunner__record_outcome = self
421 self.dt_self._DocTestRunner__record_outcome = self.prev_func
423 if self.del_module:
424 import sys
425 del sys.modules[self.del_module]
426 if '.' in self.del_module:
427 package, module = self.del_module.rsplit('.', 1)
428 package_mod = sys.modules[package]
429 delattr(package_mod, module)
444
446 import sys
447 frame = sys._getframe(1)
448 while frame:
449 l = frame.f_locals
450 if 'BOOM' in l:
451
452 return frame
453 frame = frame.f_back
454 raise LookupError(
455 "Could not find doctest (only use this function *inside* a doctest)")
456
457 __test__ = {
458 'basic': '''
459 >>> temp_install()
460 >>> print """<xml a="1" b="2">stuff</xml>"""
461 <xml b="2" a="1">...</xml>
462 >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
463 <xml xmlns="...">
464 <tag attr="..." />
465 </xml>
466 >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
467 <xml>...foo /></xml>
468 '''}
469
470 if __name__ == '__main__':
471 import doctest
472 doctest.testmod()
473