1
2
3
4
5
6
7
8
9
10
11
12 import re, sys
13
15 if sys.version_info[0] < 3:
16 return sys.stdout
17 class bytes_stdout(object):
18 def write(self, data):
19 if isinstance(data, bytes):
20 data = data.decode('ISO8859-1')
21 sys.stdout.write(data)
22 return bytes_stdout()
23
24 try:
25 from StringIO import StringIO as BytesIO
26 except ImportError:
27 from io import BytesIO
28
29 from lxml import etree as ElementTree
30 from lxml import _elementpath as ElementPath
31 from lxml import ElementInclude
32 ET = ElementTree
33
34
35
36
37
38
39
41 xml_data = re.sub(r'\s*xmlns:[a-z0-9]+="http://www.w3.org/2001/XInclude"', '', xml_data)
42 xml_data = xml_data.replace(' />', '/>')
43 if xml_data[-1:] == '\n':
44 xml_data = xml_data[:-1]
45 return xml_data
46
62
65
68
73
74 SAMPLE_XML = ElementTree.XML("""
75 <body>
76 <tag class='a'>text</tag>
77 <tag class='b' />
78 <section>
79 <tag class='b' id='inner'>subtext</tag>
80 </section>
81 </body>
82 """)
83
84
85
86
88 len(string)
89 for char in string:
90 if len(char) != 1:
91 print("expected one-character string, got %r" % char)
92 new_string = string + ""
93 new_string = string + " "
94 string[:0]
95
100
102 len(mapping)
103 keys = mapping.keys()
104 items = mapping.items()
105 for key in keys:
106 item = mapping[key]
107 mapping["key"] = "value"
108 if mapping["key"] != "value":
109 print("expected value string, got %r" % mapping["key"])
110
126
129
130
131
132
134 """
135 >>> from elementtree.ElementTree import *
136 >>> from elementtree.ElementInclude import *
137 >>> from elementtree.ElementPath import *
138 >>> from elementtree.HTMLTreeBuilder import *
139 >>> from elementtree.SimpleXMLWriter import *
140 >>> from elementtree.TidyTools import *
141 """
142
143
144 del sanity
145
147 """
148 >>> ElementTree.VERSION
149 '1.3a2'
150 """
151
152
153 del version
154
156 """
157 Test element tree interface.
158
159 >>> element = ElementTree.Element("tag")
160 >>> check_element(element)
161 >>> tree = ElementTree.ElementTree(element)
162 >>> check_element_tree(tree)
163 """
164
166 """
167 >>> elem = ElementTree.XML("<body><tag/></body>")
168 >>> serialize(elem)
169 '<body><tag/></body>'
170 >>> e = ElementTree.Element("tag2")
171 >>> elem.append(e)
172 >>> serialize(elem)
173 '<body><tag/><tag2/></body>'
174 >>> elem.remove(e)
175 >>> serialize(elem)
176 '<body><tag/></body>'
177 >>> elem.insert(0, e)
178 >>> serialize(elem)
179 '<body><tag2/><tag/></body>'
180 >>> elem.remove(e)
181 >>> elem.extend([e])
182 >>> serialize(elem)
183 '<body><tag/><tag2/></body>'
184 >>> elem.remove(e)
185 """
186
188 """
189 Test find methods using the elementpath fallback.
190
191 >>> CurrentElementPath = ElementTree.ElementPath
192 >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
193 >>> elem = SAMPLE_XML
194 >>> elem.find("tag").tag
195 'tag'
196 >>> ElementTree.ElementTree(elem).find("tag").tag
197 'tag'
198 >>> elem.findtext("tag")
199 'text'
200 >>> elem.findtext("tog")
201 >>> elem.findtext("tog", "default")
202 'default'
203 >>> ElementTree.ElementTree(elem).findtext("tag")
204 'text'
205 >>> summarize_list(elem.findall("tag"))
206 ['tag', 'tag']
207 >>> summarize_list(elem.findall(".//tag"))
208 ['tag', 'tag', 'tag']
209
210 Path syntax doesn't work in this case.
211
212 >>> elem.find("section/tag")
213 >>> elem.findtext("section/tag")
214 >>> elem.findall("section/tag")
215 []
216
217 >>> ElementTree.ElementPath = CurrentElementPath
218 """
219
220
221 del simplefind
222
224 """
225 Test find methods (including xpath syntax).
226
227 >>> elem = SAMPLE_XML
228 >>> elem.find("tag").tag
229 'tag'
230 >>> ElementTree.ElementTree(elem).find("tag").tag
231 'tag'
232 >>> elem.find("section/tag").tag
233 'tag'
234 >>> ElementTree.ElementTree(elem).find("section/tag").tag
235 'tag'
236 >>> elem.findtext("tag")
237 'text'
238 >>> elem.findtext("tog")
239 >>> elem.findtext("tog", "default")
240 'default'
241 >>> ElementTree.ElementTree(elem).findtext("tag")
242 'text'
243 >>> elem.findtext("section/tag")
244 'subtext'
245 >>> ElementTree.ElementTree(elem).findtext("section/tag")
246 'subtext'
247 >>> summarize_list(elem.findall("tag"))
248 ['tag', 'tag']
249 >>> summarize_list(elem.findall("*"))
250 ['tag', 'tag', 'section']
251 >>> summarize_list(elem.findall(".//tag"))
252 ['tag', 'tag', 'tag']
253 >>> summarize_list(elem.findall("section/tag"))
254 ['tag']
255 >>> summarize_list(elem.findall("section//tag"))
256 ['tag']
257 >>> summarize_list(elem.findall("section/*"))
258 ['tag']
259 >>> summarize_list(elem.findall("section//*"))
260 ['tag']
261 >>> summarize_list(elem.findall("section/.//*"))
262 ['tag']
263 >>> summarize_list(elem.findall("*/*"))
264 ['tag']
265 >>> summarize_list(elem.findall("*//*"))
266 ['tag']
267 >>> summarize_list(elem.findall("*/tag"))
268 ['tag']
269 >>> summarize_list(elem.findall("*/./tag"))
270 ['tag']
271 >>> summarize_list(elem.findall("./tag"))
272 ['tag', 'tag']
273 >>> summarize_list(elem.findall(".//tag"))
274 ['tag', 'tag', 'tag']
275 >>> summarize_list(elem.findall("././tag"))
276 ['tag', 'tag']
277 >>> summarize_list(elem.findall(".//tag[@class]"))
278 ['tag', 'tag', 'tag']
279 >>> summarize_list(elem.findall(".//tag[@class='a']"))
280 ['tag']
281 >>> summarize_list(elem.findall(".//tag[@class='b']"))
282 ['tag', 'tag']
283 >>> summarize_list(elem.findall(".//tag[@id]"))
284 ['tag']
285 >>> summarize_list(elem.findall(".//section[tag]"))
286 ['section']
287 >>> summarize_list(elem.findall(".//section[element]"))
288 []
289 >>> summarize_list(elem.findall("../tag"))
290 []
291 >>> summarize_list(elem.findall("section/../tag"))
292 ['tag', 'tag']
293 >>> summarize_list(ElementTree.ElementTree(elem).findall("./tag"))
294 ['tag', 'tag']
295
296 FIXME: ET's Path module handles this case incorrectly; this gives
297 a warning in 1.3, and the behaviour will be modified in 1.4.
298
299 >>> summarize_list(ElementTree.ElementTree(elem).findall("/tag"))
300 ['tag', 'tag']
301 """
302
304 """
305 Check bad or unsupported path expressions.
306
307 >>> elem = SAMPLE_XML
308 >>> elem.findall("/tag")
309 Traceback (most recent call last):
310 SyntaxError: cannot use absolute path on element
311
312 # this is supported in ET 1.3:
313 #>>> elem.findall("section//")
314 #Traceback (most recent call last):
315 #SyntaxError: invalid path
316 """
317
319 """
320 Test parsing from file.
321
322 >>> tree = ElementTree.parse("samples/simple.xml")
323 >>> normalize_crlf(tree)
324 >>> tree.write(stdout())
325 <root>
326 <element key="value">text</element>
327 <element>text</element>tail
328 <empty-element/>
329 </root>
330 >>> tree = ElementTree.parse("samples/simple-ns.xml")
331 >>> normalize_crlf(tree)
332 >>> tree.write(stdout())
333 <root xmlns="http://namespace/">
334 <element key="value">text</element>
335 <element>text</element>tail
336 <empty-element/>
337 </root>
338
339 ## <ns0:root xmlns:ns0="http://namespace/">
340 ## <ns0:element key="value">text</ns0:element>
341 ## <ns0:element>text</ns0:element>tail
342 ## <ns0:empty-element/>
343 ## </ns0:root>
344 """
345
347 """
348 Test HTML parsing.
349
350 >>> # p = HTMLTreeBuilder.TreeBuilder()
351 >>> p = ElementTree.HTMLParser()
352 >>> p.feed("<p><p>spam<b>egg</b></p>")
353 >>> serialize(p.close())
354 '<p>spam<b>egg</b></p>'
355 """
356
357
358 del parsehtml
359
361 r"""
362 >>> element = ElementTree.XML("<html><body>text</body></html>")
363 >>> ElementTree.ElementTree(element).write(stdout())
364 <html><body>text</body></html>
365 >>> element = ElementTree.fromstring("<html><body>text</body></html>")
366 >>> ElementTree.ElementTree(element).write(stdout())
367 <html><body>text</body></html>
368
369 ## >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
370 ## >>> element = ElementTree.fromstringlist(sequence)
371 ## >>> ElementTree.ElementTree(element).write(stdout())
372 ## <html><body>text</body></html>
373
374 >>> print(repr(ElementTree.tostring(element)).lstrip('b'))
375 '<html><body>text</body></html>'
376
377 # looks different in lxml
378 # >>> print(ElementTree.tostring(element, "ascii"))
379 # <?xml version='1.0' encoding='ascii'?>
380 # <html><body>text</body></html>
381
382 >>> _, ids = ElementTree.XMLID("<html><body>text</body></html>")
383 >>> len(ids)
384 0
385 >>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>")
386 >>> len(ids)
387 1
388 >>> ids["body"].tag
389 'body'
390 """
391
393 """
394 Test the xmllib-based parser.
395
396 >>> from elementtree import SimpleXMLTreeBuilder
397 >>> parser = SimpleXMLTreeBuilder.TreeBuilder()
398 >>> tree = ElementTree.parse("samples/simple.xml", parser)
399 >>> normalize_crlf(tree)
400 >>> tree.write(sys.stdout)
401 <root>
402 <element key="value">text</element>
403 <element>text</element>tail
404 <empty-element />
405 </root>
406 """
407
408
409 del simpleparsefile
410
412 """
413 Test iterparse interface.
414
415 >>> iterparse = ElementTree.iterparse
416
417 >>> context = iterparse("samples/simple.xml")
418 >>> for action, elem in context:
419 ... print("%s %s" % (action, elem.tag))
420 end element
421 end element
422 end empty-element
423 end root
424 >>> context.root.tag
425 'root'
426
427 >>> context = iterparse("samples/simple-ns.xml")
428 >>> for action, elem in context:
429 ... print("%s %s" % (action, elem.tag))
430 end {http://namespace/}element
431 end {http://namespace/}element
432 end {http://namespace/}empty-element
433 end {http://namespace/}root
434
435 >>> events = ()
436 >>> context = iterparse("samples/simple.xml", events)
437 >>> for action, elem in context:
438 ... print("%s %s" % (action, elem.tag))
439
440 >>> events = ()
441 >>> context = iterparse("samples/simple.xml", events=events)
442 >>> for action, elem in context:
443 ... print("%s %s" % (action, elem.tag))
444
445 >>> events = ("start", "end")
446 >>> context = iterparse("samples/simple.xml", events)
447 >>> for action, elem in context:
448 ... print("%s %s" % (action, elem.tag))
449 start root
450 start element
451 end element
452 start element
453 end element
454 start empty-element
455 end empty-element
456 end root
457
458 >>> events = ("start", "end", "start-ns", "end-ns")
459 >>> context = iterparse("samples/simple-ns.xml", events)
460 >>> for action, elem in context:
461 ... if action in ("start", "end"):
462 ... print("%s %s" % (action, elem.tag))
463 ... else:
464 ... print("%s %s" % (action, elem))
465 start-ns ('', 'http://namespace/')
466 start {http://namespace/}root
467 start {http://namespace/}element
468 end {http://namespace/}element
469 start {http://namespace/}element
470 end {http://namespace/}element
471 start {http://namespace/}empty-element
472 end {http://namespace/}empty-element
473 end {http://namespace/}root
474 end-ns None
475
476 """
477
479 """
480 Test the "fancy" parser.
481
482 Sanity check.
483 >>> from elementtree import XMLTreeBuilder
484 >>> parser = XMLTreeBuilder.FancyTreeBuilder()
485 >>> tree = ElementTree.parse("samples/simple.xml", parser)
486 >>> normalize_crlf(tree)
487 >>> tree.write(sys.stdout)
488 <root>
489 <element key="value">text</element>
490 <element>text</element>tail
491 <empty-element />
492 </root>
493
494 Callback check.
495 >>> class MyFancyParser(XMLTreeBuilder.FancyTreeBuilder):
496 ... def start(self, elem):
497 ... print("START %s" % elem.tag)
498 ... def end(self, elem):
499 ... print("END %s" % elem.tag)
500 >>> parser = MyFancyParser()
501 >>> tree = ElementTree.parse("samples/simple.xml", parser)
502 START root
503 START element
504 END element
505 START element
506 END element
507 START empty-element
508 END empty-element
509 END root
510 """
511
512
513 del fancyparsefile
514
516 """
517 >>> elem = ElementTree.Element("tag")
518 >>> elem.text = "text"
519 >>> serialize(elem)
520 '<tag>text</tag>'
521 >>> ElementTree.SubElement(elem, "subtag").text = "subtext"
522 >>> serialize(elem)
523 '<tag>text<subtag>subtext</subtag></tag>'
524
525 ## Test tag suppression
526 ## >>> elem.tag = None
527 ## >>> serialize(elem)
528 ## 'text<subtag>subtext</subtag>'
529 """
530
532 """
533 >>> elem = ElementTree.XML("<html><body>text</body></html>")
534 >>> print(repr(ElementTree.tostring(elem)).lstrip('b'))
535 '<html><body>text</body></html>'
536 >>> elem = ElementTree.fromstring("<html><body>text</body></html>")
537 >>> print(repr(ElementTree.tostring(elem)).lstrip('b'))
538 '<html><body>text</body></html>'
539 """
540
542 r"""
543 Test encoding issues.
544
545 >>> elem = ElementTree.Element("tag")
546 >>> elem.text = u'abc'
547 >>> serialize(elem)
548 '<tag>abc</tag>'
549 >>> serialize(elem, encoding="utf-8")
550 '<tag>abc</tag>'
551 >>> serialize(elem, encoding="us-ascii")
552 '<tag>abc</tag>'
553 >>> serialize(elem, encoding="iso-8859-1").lower()
554 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
555
556 >>> elem.text = "<&\"\'>"
557 >>> serialize(elem)
558 '<tag><&"\'></tag>'
559 >>> serialize(elem, encoding="utf-8")
560 '<tag><&"\'></tag>'
561 >>> serialize(elem, encoding="us-ascii") # cdata characters
562 '<tag><&"\'></tag>'
563 >>> serialize(elem, encoding="iso-8859-1").lower()
564 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>'
565
566 >>> elem.attrib["key"] = "<&\"\'>"
567 >>> elem.text = None
568 >>> serialize(elem)
569 '<tag key="<&"\'>"/>'
570 >>> serialize(elem, encoding="utf-8")
571 '<tag key="<&"\'>"/>'
572 >>> serialize(elem, encoding="us-ascii")
573 '<tag key="<&"\'>"/>'
574 >>> serialize(elem, encoding="iso-8859-1").lower()
575 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="<&"\'>"/>'
576
577 >>> elem.text = u'\xe5\xf6\xf6<>'
578 >>> elem.attrib.clear()
579 >>> serialize(elem)
580 '<tag>åöö<></tag>'
581 >>> serialize(elem, encoding="utf-8")
582 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>'
583 >>> serialize(elem, encoding="us-ascii")
584 '<tag>åöö<></tag>'
585 >>> serialize(elem, encoding="iso-8859-1").lower()
586 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>"
587
588 >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
589 >>> elem.text = None
590 >>> serialize(elem)
591 '<tag key="åöö<>"/>'
592 >>> serialize(elem, encoding="utf-8")
593 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>"/>'
594 >>> serialize(elem, encoding="us-ascii")
595 '<tag key="åöö<>"/>'
596 >>> serialize(elem, encoding="iso-8859-1").lower()
597 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6<>"/>'
598 """
599
600 if sys.version_info[0] >= 3:
601 encoding.__doc__ = encoding.__doc__.replace("u'", "'")
602
604 r"""
605 Test serialization methods.
606
607 >>> e = ET.XML("<html><link/><script>1 < 2</script></html>")
608 >>> e.tail = "\n"
609 >>> serialize(e)
610 '<html><link /><script>1 < 2</script></html>\n'
611 >>> serialize(e, method=None)
612 '<html><link /><script>1 < 2</script></html>\n'
613 >>> serialize(e, method="xml")
614 '<html><link /><script>1 < 2</script></html>\n'
615 >>> serialize(e, method="html")
616 '<html><link><script>1 < 2</script></html>\n'
617 >>> serialize(e, method="text")
618 '1 < 2\n'
619
620 """
621
622
623 del methods
624
626 """
627 Test iterators.
628
629 >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
630 >>> summarize_list(e.iter())
631 ['html', 'body', 'i']
632 >>> summarize_list(e.find("body").iter())
633 ['body', 'i']
634 >>> "".join(e.itertext())
635 'this is a paragraph...'
636 >>> "".join(e.find("body").itertext())
637 'this is a paragraph.'
638 """
639
640 ENTITY_XML = """\
641 <!DOCTYPE points [
642 <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
643 %user-entities;
644 ]>
645 <document>&entity;</document>
646 """
647
649 """
650 Test entity handling.
651
652 1) bad entities
653
654 >>> ElementTree.XML("<document>&entity;</document>")
655 Traceback (most recent call last):
656 ExpatError: undefined entity: line 1, column 10
657
658 >>> ElementTree.XML(ENTITY_XML)
659 Traceback (most recent call last):
660 ExpatError: undefined entity &entity;: line 5, column 10
661
662 (add more tests here)
663
664 """
665
666
667 del entity
668
670 """
671 Test error handling.
672
673 >>> error("foo").position
674 (1, 0)
675 >>> error("<tag>&foo;</tag>").position
676 (1, 5)
677 >>> error("foobar<").position
678 (1, 6)
679
680 """
681 try:
682 ET.XML(xml)
683 except ET.ParseError:
684 return sys.exc_value
685
686
687 del error
688
690 """
691 Test namespace issues.
692
693 1) xml namespace
694
695 >>> elem = ElementTree.XML("<tag xml:lang='en' />")
696 >>> serialize(elem) # 1.1
697 '<tag xml:lang="en"/>'
698
699 2) other "well-known" namespaces
700
701 >>> elem = ElementTree.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
702 >>> serialize(elem) # 2.1
703 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>'
704
705 >>> elem = ElementTree.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
706 >>> serialize(elem) # 2.2
707 '<html:html xmlns:html="http://www.w3.org/1999/xhtml"/>'
708
709 >>> elem = ElementTree.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
710 >>> serialize(elem) # 2.3
711 '<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope"/>'
712
713 3) unknown namespaces
714
715 """
716
718 """
719 Test QName handling.
720
721 1) decorated tags
722
723 >>> elem = ElementTree.Element("{uri}tag")
724 >>> serialize(elem) # 1.1
725 '<ns0:tag xmlns:ns0="uri"/>'
726 >>> elem = ElementTree.Element(ElementTree.QName("{uri}tag"))
727 >>> serialize(elem) # 1.2
728 '<ns0:tag xmlns:ns0="uri"/>'
729 >>> elem = ElementTree.Element(ElementTree.QName("uri", "tag"))
730 >>> serialize(elem) # 1.3
731 '<ns0:tag xmlns:ns0="uri"/>'
732
733 # ns/attribute order ...
734
735 ## 2) decorated attributes
736
737 ## >>> elem.clear()
738 ## >>> elem.attrib["{uri}key"] = "value"
739 ## >>> serialize(elem) # 2.1
740 ## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>'
741
742 ## >>> elem.clear()
743 ## >>> elem.attrib[ElementTree.QName("{uri}key")] = "value"
744 ## >>> serialize(elem) # 2.2
745 ## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>'
746
747 ## 3) decorated values are not converted by default, but the
748 ## QName wrapper can be used for values
749
750 ## >>> elem.clear()
751 ## >>> elem.attrib["{uri}key"] = "{uri}value"
752 ## >>> serialize(elem) # 3.1
753 ## '<ns0:tag ns0:key="{uri}value" xmlns:ns0="uri"/>'
754
755 ## >>> elem.clear()
756 ## >>> elem.attrib["{uri}key"] = ElementTree.QName("{uri}value")
757 ## >>> serialize(elem) # 3.2
758 ## '<ns0:tag ns0:key="ns0:value" xmlns:ns0="uri"/>'
759
760 ## >>> elem.clear()
761 ## >>> subelem = ElementTree.Element("tag")
762 ## >>> subelem.attrib["{uri1}key"] = ElementTree.QName("{uri2}value")
763 ## >>> elem.append(subelem)
764 ## >>> elem.append(subelem)
765 ## >>> serialize(elem) # 3.3
766 ## '<ns0:tag xmlns:ns0="uri"><tag ns1:key="ns2:value" xmlns:ns1="uri1" xmlns:ns2="uri2"/><tag ns1:key="ns2:value" xmlns:ns1="uri1" xmlns:ns2="uri2"/></ns0:tag>'
767
768 """
769
771 """
772 Test the XPath tokenizer.
773
774 >>> # tests from the xml specification
775 >>> xpath_tokenizer("*")
776 ['*']
777 >>> xpath_tokenizer("text()")
778 ['text', '()']
779 >>> xpath_tokenizer("@name")
780 ['@', 'name']
781 >>> xpath_tokenizer("@*")
782 ['@', '*']
783 >>> xpath_tokenizer("para[1]")
784 ['para', '[', '1', ']']
785 >>> xpath_tokenizer("para[last()]")
786 ['para', '[', 'last', '()', ']']
787 >>> xpath_tokenizer("*/para")
788 ['*', '/', 'para']
789 >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
790 ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
791 >>> xpath_tokenizer("chapter//para")
792 ['chapter', '//', 'para']
793 >>> xpath_tokenizer("//para")
794 ['//', 'para']
795 >>> xpath_tokenizer("//olist/item")
796 ['//', 'olist', '/', 'item']
797 >>> xpath_tokenizer(".")
798 ['.']
799 >>> xpath_tokenizer(".//para")
800 ['.', '//', 'para']
801 >>> xpath_tokenizer("..")
802 ['..']
803 >>> xpath_tokenizer("../@lang")
804 ['..', '/', '@', 'lang']
805 >>> xpath_tokenizer("chapter[title]")
806 ['chapter', '[', 'title', ']']
807 >>> xpath_tokenizer("employee[@secretary and @assistant]")
808 ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
809
810 >>> # additional tests
811 >>> xpath_tokenizer("{http://spam}egg")
812 ['{http://spam}egg']
813 >>> xpath_tokenizer("./spam.egg")
814 ['.', '/', 'spam.egg']
815 >>> xpath_tokenizer(".//{http://spam}egg")
816 ['.', '//', '{http://spam}egg']
817 """
818 out = []
819 for op, tag in ElementPath.xpath_tokenizer(p):
820 out.append(op or tag)
821 return out
822
823
824
825
826 XINCLUDE = {
827 "C1.xml": """\
828 <?xml version='1.0'?>
829 <document xmlns:xi="http://www.w3.org/2001/XInclude">
830 <p>120 Mz is adequate for an average home user.</p>
831 <xi:include href="disclaimer.xml"/>
832 </document>
833 """, "disclaimer.xml": """\
834 <?xml version='1.0'?>
835 <disclaimer>
836 <p>The opinions represented herein represent those of the individual
837 and should not be interpreted as official policy endorsed by this
838 organization.</p>
839 </disclaimer>
840 """,
841 "C2.xml": """\
842 <?xml version='1.0'?>
843 <document xmlns:xi="http://www.w3.org/2001/XInclude">
844 <p>This document has been accessed
845 <xi:include href="count.txt" parse="text"/> times.</p>
846 </document>
847 """, "count.txt": "324387", "C3.xml": """\
848 <?xml version='1.0'?>
849 <document xmlns:xi="http://www.w3.org/2001/XInclude">
850 <p>The following is the source of the "data.xml" resource:</p>
851 <example><xi:include href="data.xml" parse="text"/></example>
852 </document>
853 """, "data.xml": """\
854 <?xml version='1.0'?>
855 <data>
856 <item><![CDATA[Brooks & Shields]]></item>
857 </data>
858 """,
859 "C5.xml": """\
860 <?xml version='1.0'?>
861 <div xmlns:xi="http://www.w3.org/2001/XInclude">
862 <xi:include href="example.txt" parse="text">
863 <xi:fallback>
864 <xi:include href="fallback-example.txt" parse="text">
865 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
866 </xi:include>
867 </xi:fallback>
868 </xi:include>
869 </div>
870 """,
871 "default.xml": """\
872 <?xml version='1.0'?>
873 <document xmlns:xi="http://www.w3.org/2001/XInclude">
874 <p>Example.</p>
875 <xi:include href="samples/simple.xml"/>
876 </document>
877 """}
878
879
888
890 r"""
891 Basic inclusion example (XInclude C.1)
892
893 >>> document = xinclude_loader("C1.xml")
894 >>> ElementInclude.include(document, xinclude_loader)
895 >>> print(serialize(document)) # C1
896 <document>
897 <p>120 Mz is adequate for an average home user.</p>
898 <disclaimer>
899 <p>The opinions represented herein represent those of the individual
900 and should not be interpreted as official policy endorsed by this
901 organization.</p>
902 </disclaimer>
903 </document>
904
905 Textual inclusion example (XInclude C.2)
906
907 >>> document = xinclude_loader("C2.xml")
908 >>> ElementInclude.include(document, xinclude_loader)
909 >>> print(serialize(document)) # C2
910 <document>
911 <p>This document has been accessed
912 324387 times.</p>
913 </document>
914
915 Textual inclusion of XML example (XInclude C.3)
916
917 >>> document = xinclude_loader("C3.xml")
918 >>> ElementInclude.include(document, xinclude_loader)
919 >>> print(serialize(document)) # C3
920 <document>
921 <p>The following is the source of the "data.xml" resource:</p>
922 <example><?xml version='1.0'?>
923 <data>
924 <item><![CDATA[Brooks & Shields]]></item>
925 </data>
926 </example>
927 </document>
928
929 ## Fallback example (XInclude C.5)
930 ## Note! Fallback support is not yet implemented
931
932 ## >>> document = xinclude_loader("C5.xml")
933 ## >>> ElementInclude.include(document, xinclude_loader)
934 ## Traceback (most recent call last):
935 ## IOError: resource not found
936 ## >>> # print(serialize(document)) # C5
937
938 """
939
941 """
942 >>> document = xinclude_loader("default.xml")
943 >>> ElementInclude.include(document)
944 >>> print(serialize(document)) # default
945 <document>
946 <p>Example.</p>
947 <root>
948 <element key="value">text</element>
949 <element>text</element>tail
950 <empty-element/>
951 </root>
952 </document>
953 """
954
955
956
957
959 r"""
960 >>> file = BytesIO()
961 >>> w = SimpleXMLWriter.XMLWriter(file)
962 >>> html = w.start("html")
963 >>> x = w.start("head")
964 >>> w.element("title", "my document")
965 >>> w.data("\n")
966 >>> w.element("meta", name="hello", value="goodbye")
967 >>> w.data("\n")
968 >>> w.end()
969 >>> x = w.start("body")
970 >>> w.element("h1", "this is a heading")
971 >>> w.data("\n")
972 >>> w.element("p", u"this is a paragraph")
973 >>> w.data("\n")
974 >>> w.element("p", u"reserved characters: <&>")
975 >>> w.data("\n")
976 >>> w.element("p", u"detta är också ett stycke")
977 >>> w.data("\n")
978 >>> w.close(html)
979 >>> print(file.getvalue())
980 <html><head><title>my document</title>
981 <meta name="hello" value="goodbye" />
982 </head><body><h1>this is a heading</h1>
983 <p>this is a paragraph</p>
984 <p>reserved characters: <&></p>
985 <p>detta är också ett stycke</p>
986 </body></html>
987 """
988
989
990 del xmlwriter
991
992
993
994
1025
1026
1027 del bug_xmltoolkit21
1028
1039
1050
1060
1061
1062 del bug_xmltoolkitX1
1063
1092
1093
1094 del bug_xmltoolkit39
1095
1143
1144
1145 del bug_xmltoolkit45
1146
1157
1158
1159 del bug_xmltoolkit46
1160
1169
1170
1171 del bug_xmltoolkit54
1172
1181
1182
1183 del bug_xmltoolkit55
1184
1186 """
1187 >>> parser = ET.XMLParser()
1188 >>> parser.version
1189 'Expat 2.0.0'
1190 >>> parser.feed(open("samples/simple.xml").read())
1191 >>> print(serialize(parser.close()))
1192 <root>
1193 <element key="value">text</element>
1194 <element>text</element>tail
1195 <empty-element />
1196 </root>
1197 """
1198
1199
1200 del bug_200708_version
1201
1203 r"""
1204
1205 Preserve newlines in attributes.
1206
1207 >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
1208 >>> ET.tostring(e)
1209 '<SomeTag text="def _f(): return 3 " />'
1210 >>> ET.XML(ET.tostring(e)).get("text")
1211 'def _f():\n return 3\n'
1212 >>> ET.tostring(ET.XML(ET.tostring(e)))
1213 '<SomeTag text="def _f(): return 3 " />'
1214 """
1215
1216
1217 del bug_200708_newline
1218
1220 """
1221
1222 >>> e = ET.Element("{default}elem")
1223 >>> s = ET.SubElement(e, "{default}elem")
1224 >>> serialize(e, default_namespace="default") # 1
1225 '<elem xmlns="default"><elem /></elem>'
1226
1227 >>> e = ET.Element("{default}elem")
1228 >>> s = ET.SubElement(e, "{default}elem")
1229 >>> s = ET.SubElement(e, "{not-default}elem")
1230 >>> serialize(e, default_namespace="default") # 2
1231 '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
1232
1233 >>> e = ET.Element("{default}elem")
1234 >>> s = ET.SubElement(e, "{default}elem")
1235 >>> s = ET.SubElement(e, "elem") # unprefixed name
1236 >>> serialize(e, default_namespace="default") # 3
1237 Traceback (most recent call last):
1238 ValueError: cannot use non-qualified names with default_namespace option
1239
1240 """
1241
1242
1243 del bug_200709_default_namespace
1244
1245
1246
1247 if __name__ == "__main__":
1248 import doctest, selftest
1249 failed, tested = doctest.testmod(selftest)
1250 print("%d tests ok." % (tested - failed))
1251 if failed > 0:
1252 print("%d tests failed. Exiting with non-zero return code." % failed)
1253 sys.exit(1)
1254