1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 from collections import OrderedDict
13 import os.path
14 import unittest
15 import copy
16 import sys
17 import re
18 import gc
19 import operator
20 import textwrap
21 import zlib
22 import gzip
23
24 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
25 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url, tmpfile
26 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
27 from .common_imports import canonicalize, _str, _bytes
28
29 print("""
30 TESTED VERSION: %s""" % etree.__version__ + """
31 Python: %r""" % (sys.version_info,) + """
32 lxml.etree: %r""" % (etree.LXML_VERSION,) + """
33 libxml used: %r""" % (etree.LIBXML_VERSION,) + """
34 libxml compiled: %r""" % (etree.LIBXML_COMPILED_VERSION,) + """
35 libxslt used: %r""" % (etree.LIBXSLT_VERSION,) + """
36 libxslt compiled: %r""" % (etree.LIBXSLT_COMPILED_VERSION,) + """
37 FS encoding: %s""" % (sys.getfilesystemencoding(),) + """
38 Default encoding: %s""" % (sys.getdefaultencoding(),) + """
39 Max Unicode: %s""" % (sys.maxunicode,) + """
40 """)
41
42 try:
43 _unicode = unicode
44 except NameError:
45
46 _unicode = str
47
48
50 """Tests only for etree, not ElementTree"""
51 etree = etree
52
63
72
80
87
89 Element = self.etree.Element
90 el = Element('name')
91 self.assertRaises(ValueError, Element, '{}')
92 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
93
94 self.assertRaises(ValueError, Element, '{test}')
95 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
96
98 Element = self.etree.Element
99 self.assertRaises(ValueError, Element, 'p:name')
100 self.assertRaises(ValueError, Element, '{test}p:name')
101
102 el = Element('name')
103 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, "p'name")
108 self.assertRaises(ValueError, Element, 'p"name')
109
110 self.assertRaises(ValueError, Element, "{test}p'name")
111 self.assertRaises(ValueError, Element, '{test}p"name')
112
113 el = Element('name')
114 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
115 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
116
118 Element = self.etree.Element
119 self.assertRaises(ValueError, Element, ' name ')
120 self.assertRaises(ValueError, Element, 'na me')
121 self.assertRaises(ValueError, Element, '{test} name')
122
123 el = Element('name')
124 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
125
133
141
143 Element = self.etree.Element
144 SubElement = self.etree.SubElement
145
146 el = Element('name')
147 self.assertRaises(ValueError, SubElement, el, "p'name")
148 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
149
150 self.assertRaises(ValueError, SubElement, el, 'p"name')
151 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
152
161
170
172 QName = self.etree.QName
173 self.assertRaises(ValueError, QName, '')
174 self.assertRaises(ValueError, QName, None)
175 self.assertRaises(ValueError, QName, None, None)
176 self.assertRaises(ValueError, QName, 'test', '')
177
184
186 QName = self.etree.QName
187 self.assertRaises(ValueError, QName, 'p:name')
188 self.assertRaises(ValueError, QName, 'test', 'p:name')
189
191 QName = self.etree.QName
192 self.assertRaises(ValueError, QName, ' name ')
193 self.assertRaises(ValueError, QName, 'na me')
194 self.assertRaises(ValueError, QName, 'test', ' name')
195
203
205
206 QName = self.etree.QName
207 qname1 = QName('http://myns', 'a')
208 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
209
210 qname2 = QName(a)
211 self.assertEqual(a.tag, qname1.text)
212 self.assertEqual(a.tag, qname1)
213 self.assertEqual(qname1.text, qname2.text)
214 self.assertEqual(qname1, qname2.text)
215 self.assertEqual(qname1.text, qname2)
216 self.assertEqual(qname1, qname2)
217
219
220 etree = self.etree
221 qname = etree.QName('http://myns', 'a')
222 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
223 a.text = qname
224
225 self.assertEqual("p:a", a.text)
226
235
242
257
263
273
285
287 Element = self.etree.Element
288
289 keys = ["attr%d" % i for i in range(12, 4, -1)]
290 values = ["TEST-%d" % i for i in range(12, 4, -1)]
291 items = list(zip(keys, values))
292
293 root = Element("root")
294 for key, value in items:
295 root.set(key, value)
296 self.assertEqual(keys, root.attrib.keys())
297 self.assertEqual(values, root.attrib.values())
298
299 attr_order = [
300 ('attr_99', 'TOAST-1'),
301 ('attr_98', 'TOAST-2'),
302 ]
303 ordered_dict_types = [OrderedDict, lambda x:x]
304 if sys.version_info >= (3, 6):
305 ordered_dict_types.append(dict)
306 else:
307
308 attr_order.sort()
309 attr_order += items
310 expected_keys = [attr[0] for attr in attr_order]
311 expected_values = [attr[1] for attr in attr_order]
312 expected_items = list(zip(expected_keys, expected_values))
313
314 for dict_type in ordered_dict_types:
315 root2 = Element("root2", dict_type(root.attrib),
316 attr_99='TOAST-1', attr_98='TOAST-2')
317
318 try:
319 self.assertSequenceEqual(expected_keys, root2.attrib.keys())
320 self.assertSequenceEqual(expected_values, root2.attrib.values())
321 self.assertSequenceEqual(expected_items, root2.attrib.items())
322 except AssertionError as exc:
323 exc.args = ("Order of '%s': %s" % (dict_type.__name__, exc.args[0]),) + exc.args[1:]
324 raise
325
326 self.assertEqual(keys, root.attrib.keys())
327 self.assertEqual(values, root.attrib.values())
328
330
331
332 Element = self.etree.Element
333 root = Element("root")
334 self.assertRaises(TypeError, root.set, "newattr", 5)
335 self.assertRaises(TypeError, root.set, "newattr", object)
336 self.assertRaises(TypeError, root.set, "newattr", None)
337 self.assertRaises(TypeError, root.set, "newattr")
338
352
374
376 XML = self.etree.XML
377 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
378
379 root = XML(xml)
380 self.etree.strip_elements(root, 'a')
381 self.assertEqual(_bytes('<test><x></x></test>'),
382 self._writeElement(root))
383
384 root = XML(xml)
385 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
386 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
387 self._writeElement(root))
388
389 root = XML(xml)
390 self.etree.strip_elements(root, 'c')
391 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
392 self._writeElement(root))
393
395 XML = self.etree.XML
396 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
397
398 root = XML(xml)
399 self.etree.strip_elements(root, 'a')
400 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
401 self._writeElement(root))
402
403 root = XML(xml)
404 self.etree.strip_elements(root, '{urn:a}b', 'c')
405 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
406 self._writeElement(root))
407
408 root = XML(xml)
409 self.etree.strip_elements(root, '{urn:a}*', 'c')
410 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
411 self._writeElement(root))
412
413 root = XML(xml)
414 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
415 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
416 self._writeElement(root))
417
436
462
489
516
535
548
559
565
567 XML = self.etree.XML
568 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
569 self.assertEqual(root[0].target, "mypi")
570 self.assertEqual(root[0].get('my'), "1")
571 self.assertEqual(root[0].get('test'), " abc ")
572 self.assertEqual(root[0].get('quotes'), "' '")
573 self.assertEqual(root[0].get('only'), None)
574 self.assertEqual(root[0].get('names'), None)
575 self.assertEqual(root[0].get('nope'), None)
576
578 XML = self.etree.XML
579 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
580 self.assertEqual(root[0].target, "mypi")
581 self.assertEqual(root[0].attrib['my'], "1")
582 self.assertEqual(root[0].attrib['test'], " abc ")
583 self.assertEqual(root[0].attrib['quotes'], "' '")
584 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
585 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
586 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
587
589
590 ProcessingInstruction = self.etree.ProcessingInstruction
591
592 a = ProcessingInstruction("PI", "ONE")
593 b = copy.deepcopy(a)
594 b.text = "ANOTHER"
595
596 self.assertEqual('ONE', a.text)
597 self.assertEqual('ANOTHER', b.text)
598
614
629
640
652
671
676
689
700
701 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
702 events = list(iterparse(f, events=('end', 'comment')))
703 root = events[-1][1]
704 self.assertEqual(6, len(events))
705 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
706 [ name(*item) for item in events ])
707 self.assertEqual(
708 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
709 tostring(root))
710
722
723 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
724 events = list(iterparse(f, events=('end', 'pi')))
725 root = events[-2][1]
726 self.assertEqual(8, len(events))
727 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
728 ('pid','d'), 'a', ('pie','e')],
729 [ name(*item) for item in events ])
730 self.assertEqual(
731 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
732 tostring(ElementTree(root)))
733
748
754
756 iterparse = self.etree.iterparse
757 f = BytesIO('<a><b><c/></a>')
758 it = iterparse(f, events=('start', 'end'), recover=True)
759 events = [(ev, el.tag) for ev, el in it]
760 root = it.root
761 self.assertTrue(root is not None)
762
763 self.assertEqual(1, events.count(('start', 'a')))
764 self.assertEqual(1, events.count(('end', 'a')))
765
766 self.assertEqual(1, events.count(('start', 'b')))
767 self.assertEqual(1, events.count(('end', 'b')))
768
769 self.assertEqual(1, events.count(('start', 'c')))
770 self.assertEqual(1, events.count(('end', 'c')))
771
773 iterparse = self.etree.iterparse
774 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
775 it = iterparse(f, events=('start', 'end'), recover=True)
776 events = [(ev, el.tag) for ev, el in it]
777 root = it.root
778 self.assertTrue(root is not None)
779
780 self.assertEqual(1, events.count(('start', 'a')))
781 self.assertEqual(1, events.count(('end', 'a')))
782
783 self.assertEqual(2, events.count(('start', 'b')))
784 self.assertEqual(2, events.count(('end', 'b')))
785
786 self.assertEqual(2, events.count(('start', 'c')))
787 self.assertEqual(2, events.count(('end', 'c')))
788
790 iterparse = self.etree.iterparse
791 f = BytesIO("""
792 <a> \n \n <b> b test </b> \n
793
794 \n\t <c> \n </c> </a> \n """)
795 iterator = iterparse(f, remove_blank_text=True)
796 text = [ (element.text, element.tail)
797 for event, element in iterator ]
798 self.assertEqual(
799 [(" b test ", None), (" \n ", None), (None, None)],
800 text)
801
803 iterparse = self.etree.iterparse
804 f = BytesIO('<a><b><d/></b><c/></a>')
805
806 iterator = iterparse(f, tag="b", events=('start', 'end'))
807 events = list(iterator)
808 root = iterator.root
809 self.assertEqual(
810 [('start', root[0]), ('end', root[0])],
811 events)
812
814 iterparse = self.etree.iterparse
815 f = BytesIO('<a><b><d/></b><c/></a>')
816
817 iterator = iterparse(f, tag="*", events=('start', 'end'))
818 events = list(iterator)
819 self.assertEqual(
820 8,
821 len(events))
822
824 iterparse = self.etree.iterparse
825 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
826
827 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
828 events = list(iterator)
829 root = iterator.root
830 self.assertEqual(
831 [('start', root[0]), ('end', root[0])],
832 events)
833
835 iterparse = self.etree.iterparse
836 f = BytesIO('<a><b><d/></b><c/></a>')
837 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
838 events = list(iterator)
839 root = iterator.root
840 self.assertEqual(
841 [('start', root[0]), ('end', root[0])],
842 events)
843
844 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
845 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
846 events = list(iterator)
847 root = iterator.root
848 self.assertEqual([], events)
849
851 iterparse = self.etree.iterparse
852 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
853 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
854 events = list(iterator)
855 self.assertEqual(8, len(events))
856
858 iterparse = self.etree.iterparse
859 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
860 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
861 events = list(iterator)
862 self.assertEqual([], events)
863
864 f = BytesIO('<a><b><d/></b><c/></a>')
865 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
866 events = list(iterator)
867 self.assertEqual(8, len(events))
868
870 text = _str('Søk på nettet')
871 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
872 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
873 ).encode('iso-8859-1')
874
875 self.assertRaises(self.etree.ParseError,
876 list, self.etree.iterparse(BytesIO(xml_latin1)))
877
879 text = _str('Søk på nettet', encoding="UTF-8")
880 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
881 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
882 ).encode('iso-8859-1')
883
884 iterator = self.etree.iterparse(BytesIO(xml_latin1),
885 encoding="iso-8859-1")
886 self.assertEqual(1, len(list(iterator)))
887
888 a = iterator.root
889 self.assertEqual(a.text, text)
890
892 tostring = self.etree.tostring
893 f = BytesIO('<root><![CDATA[test]]></root>')
894 context = self.etree.iterparse(f, strip_cdata=False)
895 content = [ el.text for event,el in context ]
896
897 self.assertEqual(['test'], content)
898 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
899 tostring(context.root))
900
904
909
928
929
930
953
954
955
957 assertEqual = self.assertEqual
958 assertFalse = self.assertFalse
959
960 events = []
961 class Target(object):
962 def start(self, tag, attrib):
963 events.append("start")
964 assertFalse(attrib)
965 assertEqual("TAG", tag)
966 def end(self, tag):
967 events.append("end")
968 assertEqual("TAG", tag)
969 def close(self):
970 return "DONE"
971
972 parser = self.etree.XMLParser(target=Target())
973 tree = self.etree.ElementTree()
974
975 self.assertRaises(TypeError,
976 tree.parse, BytesIO("<TAG/>"), parser=parser)
977 self.assertEqual(["start", "end"], events)
978
980
981 events = []
982 class Target(object):
983 def start(self, tag, attrib):
984 events.append("start-" + tag)
985 def end(self, tag):
986 events.append("end-" + tag)
987 if tag == 'a':
988 raise ValueError("dead and gone")
989 def data(self, data):
990 events.append("data-" + data)
991 def close(self):
992 events.append("close")
993 return "DONE"
994
995 parser = self.etree.XMLParser(target=Target())
996
997 try:
998 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
999 done = parser.close()
1000 self.fail("error expected, but parsing succeeded")
1001 except ValueError:
1002 done = 'value error received as expected'
1003
1004 self.assertEqual(["start-root", "data-A", "start-a",
1005 "data-ca", "end-a", "close"],
1006 events)
1007
1009
1010 events = []
1011 class Target(object):
1012 def start(self, tag, attrib):
1013 events.append("start-" + tag)
1014 def end(self, tag):
1015 events.append("end-" + tag)
1016 if tag == 'a':
1017 raise ValueError("dead and gone")
1018 def data(self, data):
1019 events.append("data-" + data)
1020 def close(self):
1021 events.append("close")
1022 return "DONE"
1023
1024 parser = self.etree.XMLParser(target=Target())
1025
1026 try:
1027 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1028 parser=parser)
1029 self.fail("error expected, but parsing succeeded")
1030 except ValueError:
1031 done = 'value error received as expected'
1032
1033 self.assertEqual(["start-root", "data-A", "start-a",
1034 "data-ca", "end-a", "close"],
1035 events)
1036
1038
1039 events = []
1040 class Target(object):
1041 def start(self, tag, attrib):
1042 events.append("start-" + tag)
1043 def end(self, tag):
1044 events.append("end-" + tag)
1045 def data(self, data):
1046 events.append("data-" + data)
1047 def comment(self, text):
1048 events.append("comment-" + text)
1049 def close(self):
1050 return "DONE"
1051
1052 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1053
1054 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1055 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1056 done = parser.close()
1057
1058 self.assertEqual("DONE", done)
1059 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1060 "start-sub", "end-sub", "data-B", "end-root"],
1061 events)
1062
1068 def end(self, tag):
1069 events.append("end-" + tag)
1070 def data(self, data):
1071 events.append("data-" + data)
1072 def comment(self, text):
1073 events.append("comment-" + text)
1074 def close(self):
1075 return "DONE"
1076
1077 parser = self.etree.XMLParser(target=Target())
1078
1079 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1080 done = parser.close()
1081
1082 self.assertEqual("DONE", done)
1083 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1084 "start-sub", "end-sub", "comment-c", "data-B",
1085 "end-root", "comment-d"],
1086 events)
1087
1089 events = []
1090 class Target(object):
1091 def start(self, tag, attrib):
1092 events.append("start-" + tag)
1093 def end(self, tag):
1094 events.append("end-" + tag)
1095 def data(self, data):
1096 events.append("data-" + data)
1097 def pi(self, target, data):
1098 events.append("pi-" + target + "-" + data)
1099 def close(self):
1100 return "DONE"
1101
1102 parser = self.etree.XMLParser(target=Target())
1103
1104 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1105 done = parser.close()
1106
1107 self.assertEqual("DONE", done)
1108 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1109 "data-B", "end-root", "pi-test-c"],
1110 events)
1111
1113 events = []
1114 class Target(object):
1115 def start(self, tag, attrib):
1116 events.append("start-" + tag)
1117 def end(self, tag):
1118 events.append("end-" + tag)
1119 def data(self, data):
1120 events.append("data-" + data)
1121 def close(self):
1122 return "DONE"
1123
1124 parser = self.etree.XMLParser(target=Target(),
1125 strip_cdata=False)
1126
1127 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1128 done = parser.close()
1129
1130 self.assertEqual("DONE", done)
1131 self.assertEqual(["start-root", "data-A", "start-a",
1132 "data-ca", "end-a", "data-B", "end-root"],
1133 events)
1134
1136 events = []
1137 class Target(object):
1138 def start(self, tag, attrib):
1139 events.append("start-" + tag)
1140 def end(self, tag):
1141 events.append("end-" + tag)
1142 def data(self, data):
1143 events.append("data-" + data)
1144 def close(self):
1145 events.append("close")
1146 return "DONE"
1147
1148 parser = self.etree.XMLParser(target=Target(),
1149 recover=True)
1150
1151 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1152 done = parser.close()
1153
1154 self.assertEqual("DONE", done)
1155 self.assertEqual(["start-root", "data-A", "start-a",
1156 "data-ca", "end-a", "data-B",
1157 "end-root", "close"],
1158 events)
1159
1169
1179
1188
1203
1219
1221 iterwalk = self.etree.iterwalk
1222 root = self.etree.XML(
1223 b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1224
1225 iterator = iterwalk(root, events=('start', 'end', 'pi'))
1226 events = list(iterator)
1227 self.assertEqual(
1228 [('start', root), ('pi', root[0]),
1229 ('start', root[1]), ('pi', root[1][0]), ('end', root[1]),
1230 ('pi', root[2]), ('start', root[3]), ('end', root[3]),
1231 ('end', root),
1232 ],
1233 events)
1234
1236 iterwalk = self.etree.iterwalk
1237 root = self.etree.XML(
1238 b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1239
1240 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi'))
1241 events = list(iterator)
1242 self.assertEqual(
1243 [('pi', root.getprevious()),
1244 ('start', root), ('pi', root[0]),
1245 ('start', root[1]), ('pi', root[1][0]), ('end', root[1]),
1246 ('pi', root[2]), ('start', root[3]), ('end', root[3]),
1247 ('end', root), ('pi', root.getnext()),
1248 ],
1249 events)
1250
1268
1283
1293
1295 iterwalk = self.etree.iterwalk
1296 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1297
1298 iterator = iterwalk(root, events=('start','end'))
1299 events = list(iterator)
1300 self.assertEqual(
1301 [('start', root), ('start', root[0]), ('end', root[0]),
1302 ('start', root[1]), ('end', root[1]), ('end', root)],
1303 events)
1304
1314
1324
1338
1349
1351 iterwalk = self.etree.iterwalk
1352 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1353
1354 attr_name = '{testns}bla'
1355 events = []
1356 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1357 for event, elem in iterator:
1358 events.append(event)
1359 if event == 'start':
1360 if elem.tag != '{ns1}a':
1361 elem.set(attr_name, 'value')
1362
1363 self.assertEqual(
1364 ['start-ns', 'start', 'start', 'start-ns', 'start',
1365 'end', 'end-ns', 'end', 'end', 'end-ns'],
1366 events)
1367
1368 self.assertEqual(
1369 None,
1370 root.get(attr_name))
1371 self.assertEqual(
1372 'value',
1373 root[0].get(attr_name))
1374
1387
1389 iterwalk = self.etree.iterwalk
1390 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1391
1392 iterator = iterwalk(root, events=('start', 'end'))
1393 tags = []
1394 for event, elem in iterator:
1395 tags.append((event, elem.tag))
1396 if elem.tag in ('b', 'e'):
1397
1398 iterator.skip_subtree()
1399
1400 self.assertEqual(
1401 [('start', 'a'),
1402 ('start', 'b'), ('end', 'b'),
1403 ('start', 'd'),
1404 ('start', 'e'), ('end', 'e'),
1405 ('end', 'd'),
1406 ('end', 'a')],
1407 tags)
1408
1410 iterwalk = self.etree.iterwalk
1411 root = self.etree.XML(_bytes(
1412 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1413
1414 events = []
1415 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1416 for event, elem in iterator:
1417 if event in ('start-ns', 'end-ns'):
1418 events.append((event, elem))
1419 if event == 'start-ns' and elem == ('', 'nsb'):
1420 events.append('skip')
1421 iterator.skip_subtree()
1422 else:
1423 events.append((event, elem.tag))
1424
1425 self.assertEqual(
1426 [('start-ns', ('', 'ns1')),
1427 ('start', '{ns1}a'),
1428 ('start-ns', ('', 'nsb')),
1429 'skip',
1430 ('start', '{nsb}b'),
1431 ('end-ns', None),
1432 ('start-ns', ('', 'ns2')),
1433 ('start', '{ns2}d'),
1434 ('start', '{ns2}e'),
1435 ('end-ns', None),
1436 ('end-ns', None)
1437 ],
1438 events)
1439
1450
1452
1453 XML = self.etree.XML
1454 root = XML(_bytes(
1455 "<root>RTEXT<a></a>ATAIL<b/><!-- COMMENT -->CTAIL<?PI PITEXT?> PITAIL </root>"
1456 ))
1457
1458 text = list(root.itertext())
1459 self.assertEqual(["RTEXT", "ATAIL", "CTAIL", " PITAIL "],
1460 text)
1461
1463 parse = self.etree.parse
1464 parser = self.etree.XMLParser(dtd_validation=True)
1465 assertEqual = self.assertEqual
1466 test_url = _str("__nosuch.dtd")
1467
1468 class MyResolver(self.etree.Resolver):
1469 def resolve(self, url, id, context):
1470 assertEqual(url, test_url)
1471 return self.resolve_string(
1472 _str('''<!ENTITY myentity "%s">
1473 <!ELEMENT doc ANY>''') % url, context)
1474
1475 parser.resolvers.add(MyResolver())
1476
1477 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1478 tree = parse(StringIO(xml), parser)
1479 root = tree.getroot()
1480 self.assertEqual(root.text, test_url)
1481
1483 parse = self.etree.parse
1484 parser = self.etree.XMLParser(dtd_validation=True)
1485 assertEqual = self.assertEqual
1486 test_url = _str("__nosuch.dtd")
1487
1488 class MyResolver(self.etree.Resolver):
1489 def resolve(self, url, id, context):
1490 assertEqual(url, test_url)
1491 return self.resolve_string(
1492 (_str('''<!ENTITY myentity "%s">
1493 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1494 context)
1495
1496 parser.resolvers.add(MyResolver())
1497
1498 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1499 tree = parse(StringIO(xml), parser)
1500 root = tree.getroot()
1501 self.assertEqual(root.text, test_url)
1502
1504 parse = self.etree.parse
1505 parser = self.etree.XMLParser(dtd_validation=True)
1506 assertEqual = self.assertEqual
1507 test_url = _str("__nosuch.dtd")
1508
1509 class MyResolver(self.etree.Resolver):
1510 def resolve(self, url, id, context):
1511 assertEqual(url, test_url)
1512 return self.resolve_file(
1513 SillyFileLike(
1514 _str('''<!ENTITY myentity "%s">
1515 <!ELEMENT doc ANY>''') % url), context)
1516
1517 parser.resolvers.add(MyResolver())
1518
1519 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1520 tree = parse(StringIO(xml), parser)
1521 root = tree.getroot()
1522 self.assertEqual(root.text, test_url)
1523
1525 parse = self.etree.parse
1526 parser = self.etree.XMLParser(attribute_defaults=True)
1527 assertEqual = self.assertEqual
1528 test_url = _str("__nosuch.dtd")
1529
1530 class MyResolver(self.etree.Resolver):
1531 def resolve(self, url, id, context):
1532 assertEqual(url, test_url)
1533 return self.resolve_filename(
1534 fileInTestDir('test.dtd'), context)
1535
1536 parser.resolvers.add(MyResolver())
1537
1538 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1539 tree = parse(StringIO(xml), parser)
1540 root = tree.getroot()
1541 self.assertEqual(
1542 root.attrib, {'default': 'valueA'})
1543 self.assertEqual(
1544 root[0].attrib, {'default': 'valueB'})
1545
1560
1561 parser.resolvers.add(MyResolver())
1562
1563 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1564 tree = parse(StringIO(xml), parser,
1565 base_url=fileUrlInTestDir('__test.xml'))
1566 root = tree.getroot()
1567 self.assertEqual(
1568 root.attrib, {'default': 'valueA'})
1569 self.assertEqual(
1570 root[0].attrib, {'default': 'valueB'})
1571
1573 parse = self.etree.parse
1574 parser = self.etree.XMLParser(attribute_defaults=True)
1575 assertEqual = self.assertEqual
1576 test_url = _str("__nosuch.dtd")
1577
1578 class MyResolver(self.etree.Resolver):
1579 def resolve(self, url, id, context):
1580 assertEqual(url, test_url)
1581 return self.resolve_file(
1582 open(fileInTestDir('test.dtd'), 'rb'), context)
1583
1584 parser.resolvers.add(MyResolver())
1585
1586 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1587 tree = parse(StringIO(xml), parser)
1588 root = tree.getroot()
1589 self.assertEqual(
1590 root.attrib, {'default': 'valueA'})
1591 self.assertEqual(
1592 root[0].attrib, {'default': 'valueB'})
1593
1595 parse = self.etree.parse
1596 parser = self.etree.XMLParser(load_dtd=True)
1597 assertEqual = self.assertEqual
1598 test_url = _str("__nosuch.dtd")
1599
1600 class check(object):
1601 resolved = False
1602
1603 class MyResolver(self.etree.Resolver):
1604 def resolve(self, url, id, context):
1605 assertEqual(url, test_url)
1606 check.resolved = True
1607 return self.resolve_empty(context)
1608
1609 parser.resolvers.add(MyResolver())
1610
1611 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1612 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1613 self.assertTrue(check.resolved)
1614
1621
1622 class MyResolver(self.etree.Resolver):
1623 def resolve(self, url, id, context):
1624 raise _LocalException
1625
1626 parser.resolvers.add(MyResolver())
1627
1628 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1629 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1630
1647
1649 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1650 <root>
1651 <child1/>
1652 <child2/>
1653 <child3> </child3>
1654 </root>''')
1655
1656 parser = self.etree.XMLParser(resolve_entities=False)
1657 root = etree.fromstring(xml, parser)
1658 self.assertEqual([ el.tag for el in root ],
1659 ['child1', 'child2', 'child3'])
1660
1661 root[0] = root[-1]
1662 self.assertEqual([ el.tag for el in root ],
1663 ['child3', 'child2'])
1664 self.assertEqual(root[0][0].text, ' ')
1665 self.assertEqual(root[0][0].name, 'nbsp')
1666
1682
1700
1707
1709 Entity = self.etree.Entity
1710 self.assertRaises(ValueError, Entity, 'a b c')
1711 self.assertRaises(ValueError, Entity, 'a,b')
1712 self.assertRaises(ValueError, Entity, 'a\0b')
1713 self.assertRaises(ValueError, Entity, '#abc')
1714 self.assertRaises(ValueError, Entity, '#xxyz')
1715
1728
1749
1762
1774
1783
1792
1793
1803
1812
1814 Element = self.etree.Element
1815 SubElement = self.etree.SubElement
1816 root = Element('root')
1817 self.assertRaises(ValueError, root.append, root)
1818 child = SubElement(root, 'child')
1819 self.assertRaises(ValueError, child.append, root)
1820 child2 = SubElement(child, 'child2')
1821 self.assertRaises(ValueError, child2.append, root)
1822 self.assertRaises(ValueError, child2.append, child)
1823 self.assertEqual('child2', root[0][0].tag)
1824
1837
1850
1861
1872
1882
1892
1908
1924
1930
1945
1958
1973
1986
2001
2014
2029
2042
2043
2051
2052
2062
2063
2078
2079
2089
2090
2101
2128
2129
2131 self.assertRaises(TypeError, self.etree.dump, None)
2132
2145
2158
2179
2188
2190 XML = self.etree.XML
2191
2192 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2193 result = []
2194 for el in root.iterchildren(reversed=True):
2195 result.append(el.tag)
2196 self.assertEqual(['three', 'two', 'one'], result)
2197
2206
2215
2224
2226 XML = self.etree.XML
2227
2228 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2229 result = []
2230 for el in root.iterchildren(tag=['two', 'three']):
2231 result.append(el.text)
2232 self.assertEqual(['Two', 'Bla', None], result)
2233
2235 XML = self.etree.XML
2236
2237 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2238 result = []
2239 for el in root.iterchildren('two', 'three'):
2240 result.append(el.text)
2241 self.assertEqual(['Two', 'Bla', None], result)
2242
2244 XML = self.etree.XML
2245
2246 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2247 result = []
2248 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2249 result.append(el.text)
2250 self.assertEqual([None, 'Bla', 'Two'], result)
2251
2272
2294
2296 Element = self.etree.Element
2297 SubElement = self.etree.SubElement
2298
2299 a = Element('a')
2300 b = SubElement(a, 'b')
2301 c = SubElement(a, 'c')
2302 d = SubElement(b, 'd')
2303 self.assertEqual(
2304 [b, a],
2305 list(d.iterancestors(tag=('a', 'b'))))
2306 self.assertEqual(
2307 [b, a],
2308 list(d.iterancestors('a', 'b')))
2309
2310 self.assertEqual(
2311 [],
2312 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2313 self.assertEqual(
2314 [],
2315 list(d.iterancestors('w', 'x', 'y', 'z')))
2316
2317 self.assertEqual(
2318 [],
2319 list(d.iterancestors(tag=('d', 'x'))))
2320 self.assertEqual(
2321 [],
2322 list(d.iterancestors('d', 'x')))
2323
2324 self.assertEqual(
2325 [b, a],
2326 list(d.iterancestors(tag=('b', '*'))))
2327 self.assertEqual(
2328 [b, a],
2329 list(d.iterancestors('b', '*')))
2330
2331 self.assertEqual(
2332 [b],
2333 list(d.iterancestors(tag=('b', 'c'))))
2334 self.assertEqual(
2335 [b],
2336 list(d.iterancestors('b', 'c')))
2337
2354
2356 Element = self.etree.Element
2357 SubElement = self.etree.SubElement
2358
2359 a = Element('a')
2360 b = SubElement(a, 'b')
2361 c = SubElement(a, 'c')
2362 d = SubElement(b, 'd')
2363 e = SubElement(c, 'e')
2364
2365 self.assertEqual(
2366 [],
2367 list(a.iterdescendants('a')))
2368 self.assertEqual(
2369 [],
2370 list(a.iterdescendants(tag='a')))
2371
2372 a2 = SubElement(e, 'a')
2373 self.assertEqual(
2374 [a2],
2375 list(a.iterdescendants('a')))
2376
2377 self.assertEqual(
2378 [a2],
2379 list(c.iterdescendants('a')))
2380 self.assertEqual(
2381 [a2],
2382 list(c.iterdescendants(tag='a')))
2383
2385 Element = self.etree.Element
2386 SubElement = self.etree.SubElement
2387
2388 a = Element('a')
2389 b = SubElement(a, 'b')
2390 c = SubElement(a, 'c')
2391 d = SubElement(b, 'd')
2392 e = SubElement(c, 'e')
2393
2394 self.assertEqual(
2395 [b, e],
2396 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2397 self.assertEqual(
2398 [b, e],
2399 list(a.iterdescendants('a', 'b', 'e')))
2400
2401 a2 = SubElement(e, 'a')
2402 self.assertEqual(
2403 [b, a2],
2404 list(a.iterdescendants(tag=('a', 'b'))))
2405 self.assertEqual(
2406 [b, a2],
2407 list(a.iterdescendants('a', 'b')))
2408
2409 self.assertEqual(
2410 [],
2411 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2412 self.assertEqual(
2413 [],
2414 list(c.iterdescendants('x', 'y', 'z')))
2415
2416 self.assertEqual(
2417 [b, d, c, e, a2],
2418 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2419 self.assertEqual(
2420 [b, d, c, e, a2],
2421 list(a.iterdescendants('x', 'y', 'z', '*')))
2422
2440
2457
2475
2499
2501 Element = self.etree.Element
2502 SubElement = self.etree.SubElement
2503
2504 a = Element('a')
2505 b = SubElement(a, 'b')
2506 c = SubElement(a, 'c')
2507 d = SubElement(b, 'd')
2508 self.assertEqual(
2509 [],
2510 list(a.itersiblings(tag='XXX')))
2511 self.assertEqual(
2512 [c],
2513 list(b.itersiblings(tag='c')))
2514 self.assertEqual(
2515 [c],
2516 list(b.itersiblings(tag='*')))
2517 self.assertEqual(
2518 [b],
2519 list(c.itersiblings(preceding=True, tag='b')))
2520 self.assertEqual(
2521 [],
2522 list(c.itersiblings(preceding=True, tag='c')))
2523
2525 Element = self.etree.Element
2526 SubElement = self.etree.SubElement
2527
2528 a = Element('a')
2529 b = SubElement(a, 'b')
2530 c = SubElement(a, 'c')
2531 d = SubElement(b, 'd')
2532 e = SubElement(a, 'e')
2533 self.assertEqual(
2534 [],
2535 list(a.itersiblings(tag=('XXX', 'YYY'))))
2536 self.assertEqual(
2537 [c, e],
2538 list(b.itersiblings(tag=('c', 'd', 'e'))))
2539 self.assertEqual(
2540 [b],
2541 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2542 self.assertEqual(
2543 [c, b],
2544 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2545
2547 parseid = self.etree.parseid
2548 XML = self.etree.XML
2549 xml_text = _bytes('''
2550 <!DOCTYPE document [
2551 <!ELEMENT document (h1,p)*>
2552 <!ELEMENT h1 (#PCDATA)>
2553 <!ATTLIST h1 myid ID #REQUIRED>
2554 <!ELEMENT p (#PCDATA)>
2555 <!ATTLIST p someid ID #REQUIRED>
2556 ]>
2557 <document>
2558 <h1 myid="chapter1">...</h1>
2559 <p id="note1" class="note">...</p>
2560 <p>Regular paragraph.</p>
2561 <p xml:id="xmlid">XML:ID paragraph.</p>
2562 <p someid="warn1" class="warning">...</p>
2563 </document>
2564 ''')
2565
2566 tree, dic = parseid(BytesIO(xml_text))
2567 root = tree.getroot()
2568 root2 = XML(xml_text)
2569 self.assertEqual(self._writeElement(root),
2570 self._writeElement(root2))
2571 expected = {
2572 "chapter1" : root[0],
2573 "xmlid" : root[3],
2574 "warn1" : root[4]
2575 }
2576 self.assertTrue("chapter1" in dic)
2577 self.assertTrue("warn1" in dic)
2578 self.assertTrue("xmlid" in dic)
2579 self._checkIDDict(dic, expected)
2580
2582 XMLDTDID = self.etree.XMLDTDID
2583 XML = self.etree.XML
2584 xml_text = _bytes('''
2585 <!DOCTYPE document [
2586 <!ELEMENT document (h1,p)*>
2587 <!ELEMENT h1 (#PCDATA)>
2588 <!ATTLIST h1 myid ID #REQUIRED>
2589 <!ELEMENT p (#PCDATA)>
2590 <!ATTLIST p someid ID #REQUIRED>
2591 ]>
2592 <document>
2593 <h1 myid="chapter1">...</h1>
2594 <p id="note1" class="note">...</p>
2595 <p>Regular paragraph.</p>
2596 <p xml:id="xmlid">XML:ID paragraph.</p>
2597 <p someid="warn1" class="warning">...</p>
2598 </document>
2599 ''')
2600
2601 root, dic = XMLDTDID(xml_text)
2602 root2 = XML(xml_text)
2603 self.assertEqual(self._writeElement(root),
2604 self._writeElement(root2))
2605 expected = {
2606 "chapter1" : root[0],
2607 "xmlid" : root[3],
2608 "warn1" : root[4]
2609 }
2610 self.assertTrue("chapter1" in dic)
2611 self.assertTrue("warn1" in dic)
2612 self.assertTrue("xmlid" in dic)
2613 self._checkIDDict(dic, expected)
2614
2616 XMLDTDID = self.etree.XMLDTDID
2617 XML = self.etree.XML
2618 xml_text = _bytes('''
2619 <document>
2620 <h1 myid="chapter1">...</h1>
2621 <p id="note1" class="note">...</p>
2622 <p>Regular paragraph.</p>
2623 <p someid="warn1" class="warning">...</p>
2624 </document>
2625 ''')
2626
2627 root, dic = XMLDTDID(xml_text)
2628 root2 = XML(xml_text)
2629 self.assertEqual(self._writeElement(root),
2630 self._writeElement(root2))
2631 expected = {}
2632 self._checkIDDict(dic, expected)
2633
2635 XMLDTDID = self.etree.XMLDTDID
2636 XML = self.etree.XML
2637 xml_text = _bytes('''
2638 <!DOCTYPE document [
2639 <!ELEMENT document (h1,p)*>
2640 <!ELEMENT h1 (#PCDATA)>
2641 <!ATTLIST h1 myid ID #REQUIRED>
2642 <!ELEMENT p (#PCDATA)>
2643 <!ATTLIST p someid ID #REQUIRED>
2644 ]>
2645 <document>
2646 <h1 myid="chapter1">...</h1>
2647 <p id="note1" class="note">...</p>
2648 <p>Regular paragraph.</p>
2649 <p xml:id="xmlid">XML:ID paragraph.</p>
2650 <p someid="warn1" class="warning">...</p>
2651 </document>
2652 ''')
2653
2654 parser = etree.XMLParser(collect_ids=False)
2655 root, dic = XMLDTDID(xml_text, parser=parser)
2656 root2 = XML(xml_text)
2657 self.assertEqual(self._writeElement(root),
2658 self._writeElement(root2))
2659 self.assertFalse(dic)
2660 self._checkIDDict(dic, {})
2661
2663 self.assertEqual(len(dic),
2664 len(expected))
2665 self.assertEqual(sorted(dic.items()),
2666 sorted(expected.items()))
2667 if sys.version_info < (3,):
2668 self.assertEqual(sorted(dic.iteritems()),
2669 sorted(expected.iteritems()))
2670 self.assertEqual(sorted(dic.keys()),
2671 sorted(expected.keys()))
2672 if sys.version_info < (3,):
2673 self.assertEqual(sorted(dic.iterkeys()),
2674 sorted(expected.iterkeys()))
2675 if sys.version_info < (3,):
2676 self.assertEqual(sorted(dic.values()),
2677 sorted(expected.values()))
2678 self.assertEqual(sorted(dic.itervalues()),
2679 sorted(expected.itervalues()))
2680
2687
2689 etree = self.etree
2690
2691 r = {'foo': 'http://ns.infrae.com/foo'}
2692 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2693 self.assertEqual(
2694 'foo',
2695 e.prefix)
2696 self.assertEqual(
2697 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2698 self._writeElement(e))
2699
2701 etree = self.etree
2702
2703 r = {None: 'http://ns.infrae.com/foo'}
2704 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2705 self.assertEqual(
2706 None,
2707 e.prefix)
2708 self.assertEqual(
2709 '{http://ns.infrae.com/foo}bar',
2710 e.tag)
2711 self.assertEqual(
2712 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2713 self._writeElement(e))
2714
2716 etree = self.etree
2717
2718 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2719 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2720 self.assertEqual(None, e.prefix)
2721 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2722 self.assertEqual(
2723 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2724 self._writeElement(e))
2725
2727 etree = self.etree
2728
2729 r = {None: 'http://ns.infrae.com/foo',
2730 'hoi': 'http://ns.infrae.com/hoi'}
2731 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2732 e.set('{http://ns.infrae.com/hoi}test', 'value')
2733 self.assertEqual(
2734 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2735 self._writeElement(e))
2736
2738 etree = self.etree
2739
2740 root = etree.Element('{http://test/ns}root',
2741 nsmap={None: 'http://test/ns'})
2742 sub = etree.Element('{http://test/ns}sub',
2743 nsmap={'test': 'http://test/ns'})
2744
2745 sub.attrib['{http://test/ns}attr'] = 'value'
2746 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2747 self.assertEqual(
2748 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2749 etree.tostring(sub))
2750
2751 root.append(sub)
2752 self.assertEqual(
2753 _bytes('<root xmlns="http://test/ns">'
2754 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2755 '</root>'),
2756 etree.tostring(root))
2757
2759 etree = self.etree
2760
2761 root = etree.Element('root')
2762 sub = etree.Element('{http://test/ns}sub',
2763 nsmap={'test': 'http://test/ns'})
2764
2765 sub.attrib['{http://test/ns}attr'] = 'value'
2766 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2767 self.assertEqual(
2768 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2769 etree.tostring(sub))
2770
2771 root.append(sub)
2772 self.assertEqual(
2773 _bytes('<root>'
2774 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2775 '</root>'),
2776 etree.tostring(root))
2777
2779 etree = self.etree
2780
2781 root = etree.Element('root')
2782 sub = etree.Element('{http://test/ns}sub',
2783 nsmap={None: 'http://test/ns'})
2784
2785 sub.attrib['{http://test/ns}attr'] = 'value'
2786 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2787 self.assertEqual(
2788 _bytes('<sub xmlns="http://test/ns" '
2789 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2790 etree.tostring(sub))
2791
2792 root.append(sub)
2793 self.assertEqual(
2794 _bytes('<root>'
2795 '<sub xmlns="http://test/ns"'
2796 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2797 '</root>'),
2798 etree.tostring(root))
2799
2801 etree = self.etree
2802
2803 root = etree.Element('{http://test/ns}root',
2804 nsmap={'test': 'http://test/ns',
2805 None: 'http://test/ns'})
2806 sub = etree.Element('{http://test/ns}sub',
2807 nsmap={None: 'http://test/ns'})
2808
2809 sub.attrib['{http://test/ns}attr'] = 'value'
2810 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2811 self.assertEqual(
2812 _bytes('<sub xmlns="http://test/ns" '
2813 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2814 etree.tostring(sub))
2815
2816 root.append(sub)
2817 self.assertEqual(
2818 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2819 '<test:sub test:attr="value"/>'
2820 '</test:root>'),
2821 etree.tostring(root))
2822
2824 etree = self.etree
2825 r = {None: 'http://ns.infrae.com/foo',
2826 'hoi': 'http://ns.infrae.com/hoi'}
2827 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2828 tree = etree.ElementTree(element=e)
2829 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2830 self.assertEqual(
2831 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2832 self._writeElement(e))
2833
2835 etree = self.etree
2836
2837 r = {None: 'http://ns.infrae.com/foo'}
2838 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2839 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2840
2841 e1.append(e2)
2842
2843 self.assertEqual(
2844 None,
2845 e1.prefix)
2846 self.assertEqual(
2847 None,
2848 e1[0].prefix)
2849 self.assertEqual(
2850 '{http://ns.infrae.com/foo}bar',
2851 e1.tag)
2852 self.assertEqual(
2853 '{http://ns.infrae.com/foo}bar',
2854 e1[0].tag)
2855
2857 etree = self.etree
2858
2859 r = {None: 'http://ns.infrae.com/BAR'}
2860 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2861 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2862
2863 e1.append(e2)
2864
2865 self.assertEqual(
2866 None,
2867 e1.prefix)
2868 self.assertNotEqual(
2869 None,
2870 e2.prefix)
2871 self.assertEqual(
2872 '{http://ns.infrae.com/BAR}bar',
2873 e1.tag)
2874 self.assertEqual(
2875 '{http://ns.infrae.com/foo}bar',
2876 e2.tag)
2877
2879 ns_href = "http://a.b.c"
2880 one = self.etree.fromstring(
2881 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2882 baz = one[0][0]
2883
2884 two = self.etree.fromstring(
2885 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2886 two.append(baz)
2887 del one
2888
2889 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2890 self.assertEqual(
2891 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2892 self.etree.tostring(two))
2893
2907
2924
2935
2937 xml = ('<root>' +
2938 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2939 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2940 root = self.etree.fromstring(xml)
2941 self.assertEqual(xml, self.etree.tostring(root))
2942 self.etree.cleanup_namespaces(root)
2943 self.assertEqual(
2944 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2945 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2946 self.etree.tostring(root))
2947
2949 xml = ('<root>' +
2950 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2951 '<n64:x xmlns:a="A" a:attr="X"/>' +
2952 '</a>'*100 +
2953 '</root>').encode('utf8')
2954 root = self.etree.fromstring(xml)
2955 self.assertEqual(xml, self.etree.tostring(root))
2956 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2957 self.assertEqual(
2958 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2959 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2960 self.etree.tostring(root))
2961
2963 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2964 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2965 '<foo>foo:bar</foo>'
2966 '</root>').encode('utf8')
2967 root = self.etree.fromstring(xml)
2968 self.assertEqual(xml, self.etree.tostring(root))
2969 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2970 self.assertEqual(
2971 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2972 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2973 b'<foo>foo:bar</foo>'
2974 b'</root>',
2975 self.etree.tostring(root))
2976
2978 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2979 '<sub xmlns:foo="FOO">'
2980 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2981 '<foo>foo:bar</foo>'
2982 '</sub>'
2983 '</root>').encode('utf8')
2984 root = self.etree.fromstring(xml)
2985 self.assertEqual(xml, self.etree.tostring(root))
2986 self.etree.cleanup_namespaces(
2987 root,
2988 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2989 keep_ns_prefixes=['foo'])
2990 self.assertEqual(
2991 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2992 b'<sub>'
2993 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2994 b'<foo>foo:bar</foo>'
2995 b'</sub>'
2996 b'</root>',
2997 self.etree.tostring(root))
2998
3000 etree = self.etree
3001
3002 r = {None: 'http://ns.infrae.com/foo',
3003 'hoi': 'http://ns.infrae.com/hoi'}
3004 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
3005 self.assertEqual(
3006 r,
3007 e.nsmap)
3008
3010 etree = self.etree
3011
3012 re = {None: 'http://ns.infrae.com/foo',
3013 'hoi': 'http://ns.infrae.com/hoi'}
3014 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
3015
3016 rs = {None: 'http://ns.infrae.com/honk',
3017 'top': 'http://ns.infrae.com/top'}
3018 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
3019
3020 r = re.copy()
3021 r.update(rs)
3022 self.assertEqual(re, e.nsmap)
3023 self.assertEqual(r, s.nsmap)
3024
3026 etree = self.etree
3027 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
3028 self.assertEqual({'hha': None}, el.nsmap)
3029
3031 Element = self.etree.Element
3032 SubElement = self.etree.SubElement
3033
3034 a = Element('a')
3035 b = SubElement(a, 'b')
3036 c = SubElement(a, 'c')
3037 d = SubElement(b, 'd')
3038 e = SubElement(c, 'e')
3039 f = SubElement(c, 'f')
3040
3041 self.assertEqual(
3042 [a, b],
3043 list(a.getiterator('a', 'b')))
3044 self.assertEqual(
3045 [],
3046 list(a.getiterator('x', 'y')))
3047 self.assertEqual(
3048 [a, f],
3049 list(a.getiterator('f', 'a')))
3050 self.assertEqual(
3051 [c, e, f],
3052 list(c.getiterator('c', '*', 'a')))
3053 self.assertEqual(
3054 [],
3055 list(a.getiterator( (), () )))
3056
3058 Element = self.etree.Element
3059 SubElement = self.etree.SubElement
3060
3061 a = Element('a')
3062 b = SubElement(a, 'b')
3063 c = SubElement(a, 'c')
3064 d = SubElement(b, 'd')
3065 e = SubElement(c, 'e')
3066 f = SubElement(c, 'f')
3067
3068 self.assertEqual(
3069 [a, b],
3070 list(a.getiterator( ('a', 'b') )))
3071 self.assertEqual(
3072 [],
3073 list(a.getiterator( ('x', 'y') )))
3074 self.assertEqual(
3075 [a, f],
3076 list(a.getiterator( ('f', 'a') )))
3077 self.assertEqual(
3078 [c, e, f],
3079 list(c.getiterator( ('c', '*', 'a') )))
3080 self.assertEqual(
3081 [],
3082 list(a.getiterator( () )))
3083
3085 Element = self.etree.Element
3086 SubElement = self.etree.SubElement
3087
3088 a = Element('{a}a')
3089 b = SubElement(a, '{a}b')
3090 c = SubElement(a, '{a}c')
3091 d = SubElement(b, '{b}d')
3092 e = SubElement(c, '{a}e')
3093 f = SubElement(c, '{b}f')
3094 g = SubElement(c, 'g')
3095
3096 self.assertEqual(
3097 [a],
3098 list(a.getiterator('{a}a')))
3099 self.assertEqual(
3100 [],
3101 list(a.getiterator('{b}a')))
3102 self.assertEqual(
3103 [],
3104 list(a.getiterator('a')))
3105 self.assertEqual(
3106 [a,b,d,c,e,f,g],
3107 list(a.getiterator('*')))
3108 self.assertEqual(
3109 [f],
3110 list(c.getiterator('{b}*')))
3111 self.assertEqual(
3112 [d, f],
3113 list(a.getiterator('{b}*')))
3114 self.assertEqual(
3115 [g],
3116 list(a.getiterator('g')))
3117 self.assertEqual(
3118 [g],
3119 list(a.getiterator('{}g')))
3120 self.assertEqual(
3121 [g],
3122 list(a.getiterator('{}*')))
3123
3125 Element = self.etree.Element
3126 Comment = self.etree.Comment
3127 SubElement = self.etree.SubElement
3128
3129 a = Element('{a}a')
3130 b = SubElement(a, '{nsA}b')
3131 c = SubElement(b, '{nsB}b')
3132 d = SubElement(a, 'b')
3133 e = SubElement(a, '{nsA}e')
3134 f = SubElement(e, '{nsB}e')
3135 g = SubElement(e, 'e')
3136 a.append(Comment('test'))
3137
3138 self.assertEqual(
3139 [b, c, d],
3140 list(a.getiterator('{*}b')))
3141 self.assertEqual(
3142 [e, f, g],
3143 list(a.getiterator('{*}e')))
3144 self.assertEqual(
3145 [a, b, c, d, e, f, g],
3146 list(a.getiterator('{*}*')))
3147
3172
3188
3205
3207 a = etree.Element("a")
3208 b = etree.SubElement(a, "b")
3209 c = etree.SubElement(a, "c")
3210 d1 = etree.SubElement(c, "d")
3211 d2 = etree.SubElement(c, "d")
3212 c.text = d1.text = 'TEXT'
3213
3214 tree = etree.ElementTree(a)
3215 self.assertEqual('.', tree.getelementpath(a))
3216 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3217 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3218
3219 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3220 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3221
3222 tree = etree.ElementTree(c)
3223 self.assertEqual('.', tree.getelementpath(c))
3224 self.assertEqual('d[2]', tree.getelementpath(d2))
3225 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3226
3227 tree = etree.ElementTree(b)
3228 self.assertEqual('.', tree.getelementpath(b))
3229 self.assertRaises(ValueError, tree.getelementpath, a)
3230 self.assertRaises(ValueError, tree.getelementpath, c)
3231 self.assertRaises(ValueError, tree.getelementpath, d2)
3232
3234 a = etree.Element("{http://ns1/}a")
3235 b = etree.SubElement(a, "{http://ns1/}b")
3236 c = etree.SubElement(a, "{http://ns1/}c")
3237 d1 = etree.SubElement(c, "{http://ns1/}d")
3238 d2 = etree.SubElement(c, "{http://ns2/}d")
3239 d3 = etree.SubElement(c, "{http://ns1/}d")
3240
3241 tree = etree.ElementTree(a)
3242 self.assertEqual('.', tree.getelementpath(a))
3243 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3244 tree.getelementpath(d1))
3245 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3246 tree.getelementpath(d2))
3247 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3248 tree.getelementpath(d3))
3249
3250 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3251 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3252 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3253 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3254 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3255 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3256
3257 tree = etree.ElementTree(c)
3258 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3259 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3260 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3261 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3262 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3263 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3264
3265 tree = etree.ElementTree(b)
3266 self.assertRaises(ValueError, tree.getelementpath, d1)
3267 self.assertRaises(ValueError, tree.getelementpath, d2)
3268
3275
3282
3291
3293 XML = self.etree.XML
3294 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3295 self.assertEqual(len(root.findall(".//{X}b")), 2)
3296 self.assertEqual(len(root.findall(".//{X}*")), 2)
3297 self.assertEqual(len(root.findall(".//b")), 3)
3298
3300 XML = self.etree.XML
3301 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3302 nsmap = {'xx': 'X'}
3303 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3304 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3305 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3306 nsmap = {'xx': 'Y'}
3307 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3308 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3309 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3310
3320
3327
3329 etree = self.etree
3330 e = etree.Element('foo')
3331 for i in range(10):
3332 etree.SubElement(e, 'a%s' % i)
3333 for i in range(10):
3334 self.assertEqual(
3335 i,
3336 e.index(e[i]))
3337 self.assertEqual(
3338 3, e.index(e[3], 3))
3339 self.assertRaises(
3340 ValueError, e.index, e[3], 4)
3341 self.assertRaises(
3342 ValueError, e.index, e[3], 0, 2)
3343 self.assertRaises(
3344 ValueError, e.index, e[8], 0, -3)
3345 self.assertRaises(
3346 ValueError, e.index, e[8], -5, -3)
3347 self.assertEqual(
3348 8, e.index(e[8], 0, -1))
3349 self.assertEqual(
3350 8, e.index(e[8], -12, -1))
3351 self.assertEqual(
3352 0, e.index(e[0], -12, -1))
3353
3355 etree = self.etree
3356 e = etree.Element('foo')
3357 for i in range(10):
3358 el = etree.SubElement(e, 'a%s' % i)
3359 el.text = "text%d" % i
3360 el.tail = "tail%d" % i
3361
3362 child0 = e[0]
3363 child1 = e[1]
3364 child2 = e[2]
3365
3366 e.replace(e[0], e[1])
3367 self.assertEqual(
3368 9, len(e))
3369 self.assertEqual(
3370 child1, e[0])
3371 self.assertEqual(
3372 child1.text, "text1")
3373 self.assertEqual(
3374 child1.tail, "tail1")
3375 self.assertEqual(
3376 child0.tail, "tail0")
3377 self.assertEqual(
3378 child2, e[1])
3379
3380 e.replace(e[-1], e[0])
3381 self.assertEqual(
3382 child1, e[-1])
3383 self.assertEqual(
3384 child1.text, "text1")
3385 self.assertEqual(
3386 child1.tail, "tail1")
3387 self.assertEqual(
3388 child2, e[0])
3389
3391 etree = self.etree
3392 e = etree.Element('foo')
3393 for i in range(10):
3394 etree.SubElement(e, 'a%s' % i)
3395
3396 new_element = etree.Element("test")
3397 new_element.text = "TESTTEXT"
3398 new_element.tail = "TESTTAIL"
3399 child1 = e[1]
3400 e.replace(e[0], new_element)
3401 self.assertEqual(
3402 new_element, e[0])
3403 self.assertEqual(
3404 "TESTTEXT",
3405 e[0].text)
3406 self.assertEqual(
3407 "TESTTAIL",
3408 e[0].tail)
3409 self.assertEqual(
3410 child1, e[1])
3411
3413 Element = self.etree.Element
3414 SubElement = self.etree.SubElement
3415
3416 a = Element('a')
3417
3418 e = Element('e')
3419 f = Element('f')
3420 g = Element('g')
3421
3422 a[:] = [e, f, g]
3423 self.assertEqual(
3424 [e, f, g],
3425 list(a))
3426
3427 a[::-1] = [e, f, g]
3428 self.assertEqual(
3429 [g, f, e],
3430 list(a))
3431
3449
3467
3485
3487 Element = self.etree.Element
3488 SubElement = self.etree.SubElement
3489 try:
3490 slice
3491 except NameError:
3492 print("slice() not found")
3493 return
3494
3495 a = Element('a')
3496 b = SubElement(a, 'b')
3497 c = SubElement(a, 'c')
3498 d = SubElement(a, 'd')
3499 e = SubElement(a, 'e')
3500
3501 x = Element('x')
3502 y = Element('y')
3503 z = Element('z')
3504
3505 self.assertRaises(
3506 ValueError,
3507 operator.setitem, a, slice(1,None,2), [x, y, z])
3508
3509 self.assertEqual(
3510 [b, c, d, e],
3511 list(a))
3512
3525
3527 XML = self.etree.XML
3528 root = XML(_bytes(
3529 '<?xml version="1.0"?>\n'
3530 '<root>' + '\n' * 65536 +
3531 '<p>' + '\n' * 65536 + '</p>\n' +
3532 '<br/>\n'
3533 '</root>'))
3534
3535 if self.etree.LIBXML_VERSION >= (2, 9):
3536 expected = [2, 131074, 131076]
3537 else:
3538 expected = [2, 65535, 65535]
3539
3540 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3541
3549
3558
3568
3578
3584
3592
3598
3605
3611
3613 etree = self.etree
3614 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3615 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3616 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3617 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3618
3619 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3620
3621 tree = etree.parse(BytesIO(xml))
3622 docinfo = tree.docinfo
3623 self.assertEqual(docinfo.encoding, "ascii")
3624 self.assertEqual(docinfo.xml_version, "1.0")
3625 self.assertEqual(docinfo.public_id, pub_id)
3626 self.assertEqual(docinfo.system_url, sys_id)
3627 self.assertEqual(docinfo.root_name, 'html')
3628 self.assertEqual(docinfo.doctype, doctype_string)
3629
3645
3657
3669
3675
3677 etree = self.etree
3678 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3679 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3680 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3681
3682 xml = _bytes('<!DOCTYPE root>\n<root/>')
3683 tree = etree.parse(BytesIO(xml))
3684 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3685 etree.tostring(tree, doctype=doctype_string))
3686
3688 etree = self.etree
3689 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3690 self.assertEqual(root.base, "http://no/such/url")
3691 self.assertEqual(
3692 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3693 root.base = "https://secret/url"
3694 self.assertEqual(root.base, "https://secret/url")
3695 self.assertEqual(
3696 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3697 "https://secret/url")
3698
3700 etree = self.etree
3701 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3702 self.assertEqual(root.base, "http://no/such/url")
3703 self.assertEqual(
3704 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3705 root.set('{http://www.w3.org/XML/1998/namespace}base',
3706 "https://secret/url")
3707 self.assertEqual(root.base, "https://secret/url")
3708 self.assertEqual(
3709 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3710 "https://secret/url")
3711
3717
3722
3729
3743
3745 Element = self.etree.Element
3746
3747 a = Element('a')
3748 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3749 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3750
3751 self.assertRaises(ValueError, Element, 'ha\0ho')
3752
3754 Element = self.etree.Element
3755
3756 a = Element('a')
3757 self.assertRaises(ValueError, setattr, a, "text",
3758 _str('ha\0ho'))
3759 self.assertRaises(ValueError, setattr, a, "tail",
3760 _str('ha\0ho'))
3761
3762 self.assertRaises(ValueError, Element,
3763 _str('ha\0ho'))
3764
3766 Element = self.etree.Element
3767
3768 a = Element('a')
3769 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3770 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3771
3772 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3773 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3774
3775 self.assertRaises(ValueError, Element, 'ha\x07ho')
3776 self.assertRaises(ValueError, Element, 'ha\x02ho')
3777
3779 Element = self.etree.Element
3780
3781 a = Element('a')
3782 self.assertRaises(ValueError, setattr, a, "text",
3783 _str('ha\x07ho'))
3784 self.assertRaises(ValueError, setattr, a, "text",
3785 _str('ha\x02ho'))
3786
3787 self.assertRaises(ValueError, setattr, a, "tail",
3788 _str('ha\x07ho'))
3789 self.assertRaises(ValueError, setattr, a, "tail",
3790 _str('ha\x02ho'))
3791
3792 self.assertRaises(ValueError, Element,
3793 _str('ha\x07ho'))
3794 self.assertRaises(ValueError, Element,
3795 _str('ha\x02ho'))
3796
3798 Element = self.etree.Element
3799
3800 a = Element('a')
3801 self.assertRaises(ValueError, setattr, a, "text",
3802 _str('ha\u1234\x07ho'))
3803 self.assertRaises(ValueError, setattr, a, "text",
3804 _str('ha\u1234\x02ho'))
3805
3806 self.assertRaises(ValueError, setattr, a, "tail",
3807 _str('ha\u1234\x07ho'))
3808 self.assertRaises(ValueError, setattr, a, "tail",
3809 _str('ha\u1234\x02ho'))
3810
3811 self.assertRaises(ValueError, Element,
3812 _str('ha\u1234\x07ho'))
3813 self.assertRaises(ValueError, Element,
3814 _str('ha\u1234\x02ho'))
3815
3829
3834
3852
3872
3874 tostring = self.etree.tostring
3875 html = self.etree.fromstring(
3876 '<html><body>'
3877 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3878 '</body></html>',
3879 parser=self.etree.HTMLParser())
3880 self.assertEqual(html.tag, 'html')
3881 div = html.find('.//div')
3882 self.assertEqual(div.tail, '\r\n')
3883 result = tostring(div, method='html')
3884 self.assertEqual(
3885 result,
3886 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3887 result = tostring(div, method='html', with_tail=True)
3888 self.assertEqual(
3889 result,
3890 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3891 result = tostring(div, method='html', with_tail=False)
3892 self.assertEqual(
3893 result,
3894 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3895
3917
3919 tostring = self.etree.tostring
3920 XML = self.etree.XML
3921 ElementTree = self.etree.ElementTree
3922
3923 root = XML(_bytes("<root/>"))
3924
3925 tree = ElementTree(root)
3926 self.assertEqual(None, tree.docinfo.standalone)
3927
3928 result = tostring(root, xml_declaration=True, encoding="ASCII")
3929 self.assertEqual(result, _bytes(
3930 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3931
3932 result = tostring(root, xml_declaration=True, encoding="ASCII",
3933 standalone=True)
3934 self.assertEqual(result, _bytes(
3935 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3936
3937 tree = ElementTree(XML(result))
3938 self.assertEqual(True, tree.docinfo.standalone)
3939
3940 result = tostring(root, xml_declaration=True, encoding="ASCII",
3941 standalone=False)
3942 self.assertEqual(result, _bytes(
3943 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3944
3945 tree = ElementTree(XML(result))
3946 self.assertEqual(False, tree.docinfo.standalone)
3947
3967
3969 tostring = self.etree.tostring
3970 Element = self.etree.Element
3971 SubElement = self.etree.SubElement
3972
3973 a = Element('a')
3974 a.text = "A"
3975 a.tail = "tail"
3976 b = SubElement(a, 'b')
3977 b.text = "B"
3978 b.tail = _str("Søk på nettet")
3979 c = SubElement(a, 'c')
3980 c.text = "C"
3981
3982 result = tostring(a, method="text", encoding="UTF-16")
3983
3984 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3985 result)
3986
3988 tostring = self.etree.tostring
3989 Element = self.etree.Element
3990 SubElement = self.etree.SubElement
3991
3992 a = Element('a')
3993 a.text = _str('Søk på nettetA')
3994 a.tail = "tail"
3995 b = SubElement(a, 'b')
3996 b.text = "B"
3997 b.tail = _str('Søk på nettetB')
3998 c = SubElement(a, 'c')
3999 c.text = "C"
4000
4001 self.assertRaises(UnicodeEncodeError,
4002 tostring, a, method="text")
4003
4004 self.assertEqual(
4005 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
4006 tostring(a, encoding="UTF-8", method="text"))
4007
4020
4036
4040
4055
4073
4086
4088 tostring = self.etree.tostring
4089 Element = self.etree.Element
4090 SubElement = self.etree.SubElement
4091
4092 a = Element('a')
4093 b = SubElement(a, 'b')
4094 c = SubElement(a, 'c')
4095 d = SubElement(c, 'd')
4096 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4097 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
4098 self.assertEqual(_bytes('<b></b>'),
4099 canonicalize(tostring(b, encoding=_unicode)))
4100 self.assertEqual(_bytes('<c><d></d></c>'),
4101 canonicalize(tostring(c, encoding=_unicode)))
4102
4107
4122
4124 tostring = self.etree.tostring
4125 Element = self.etree.Element
4126 SubElement = self.etree.SubElement
4127
4128 a = Element('a')
4129 b = SubElement(a, 'b')
4130 c = SubElement(a, 'c')
4131
4132 result = tostring(a, encoding=_unicode)
4133 self.assertEqual(result, "<a><b/><c/></a>")
4134
4135 result = tostring(a, encoding=_unicode, pretty_print=False)
4136 self.assertEqual(result, "<a><b/><c/></a>")
4137
4138 result = tostring(a, encoding=_unicode, pretty_print=True)
4139 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4140
4152
4154 class SubEl(etree.ElementBase):
4155 pass
4156
4157 el1 = SubEl()
4158 el2 = SubEl()
4159 self.assertEqual('SubEl', el1.tag)
4160 self.assertEqual('SubEl', el2.tag)
4161 el1.other = el2
4162 el2.other = el1
4163
4164 del el1, el2
4165 gc.collect()
4166
4167
4181
4183 root = etree.Element('parent')
4184 c1 = etree.SubElement(root, 'child1')
4185 c2 = etree.SubElement(root, 'child2')
4186
4187 root.remove(c1)
4188 root.remove(c2)
4189 c1.addnext(c2)
4190 c1.tail = 'abc'
4191 c2.tail = 'xyz'
4192 del c1
4193
4194 c2.getprevious()
4195
4196 self.assertEqual('child1', c2.getprevious().tag)
4197 self.assertEqual('abc', c2.getprevious().tail)
4198
4199
4200
4201 - def _writeElement(self, element, encoding='us-ascii', compression=0):
4212
4213
4257
4258 res_instance = res()
4259 parser = etree.XMLParser(load_dtd = True)
4260 parser.resolvers.add(res_instance)
4261
4262 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4263 parser = parser)
4264
4265 self.include(tree)
4266
4267 called = list(res_instance.called.items())
4268 called.sort()
4269 self.assertEqual(
4270 [("dtd", True), ("include", True), ("input", True)],
4271 called)
4272
4274 data = textwrap.dedent('''
4275 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4276 <foo/>
4277 <xi:include href="./test.xml" />
4278 </doc>
4279 ''')
4280
4281 class Resolver(etree.Resolver):
4282 called = {}
4283
4284 def resolve(self, url, id, context):
4285 if url.endswith("test_xinclude.xml"):
4286 assert not self.called.get("input")
4287 self.called["input"] = True
4288 return None
4289 elif url.endswith('/test5.xml'):
4290 assert not self.called.get("DONE")
4291 self.called["DONE"] = True
4292 return self.resolve_string('<DONE/>', context)
4293 else:
4294 _, filename = url.rsplit('/', 1)
4295 assert not self.called.get(filename)
4296 self.called[filename] = True
4297 next_data = data.replace(
4298 'test.xml', 'test%d.xml' % len(self.called))
4299 return self.resolve_string(next_data, context)
4300
4301 res_instance = Resolver()
4302 parser = etree.XMLParser(load_dtd=True)
4303 parser.resolvers.add(res_instance)
4304
4305 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4306 parser=parser)
4307
4308 self.include(tree)
4309
4310 called = list(res_instance.called.items())
4311 called.sort()
4312 self.assertEqual(
4313 [("DONE", True), ("input", True), ("test.xml", True),
4314 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4315 called)
4316
4317
4321
4322
4324 from lxml import ElementInclude
4325
4326 - def include(self, tree, loader=None, max_depth=None):
4328
4329 XINCLUDE = {}
4330
4331 XINCLUDE["Recursive1.xml"] = """\
4332 <?xml version='1.0'?>
4333 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4334 <p>The following is the source code of Recursive2.xml:</p>
4335 <xi:include href="Recursive2.xml"/>
4336 </document>
4337 """
4338
4339 XINCLUDE["Recursive2.xml"] = """\
4340 <?xml version='1.0'?>
4341 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4342 <p>The following is the source code of Recursive3.xml:</p>
4343 <xi:include href="Recursive3.xml"/>
4344 </document>
4345 """
4346
4347 XINCLUDE["Recursive3.xml"] = """\
4348 <?xml version='1.0'?>
4349 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4350 <p>The following is the source code of Recursive1.xml:</p>
4351 <xi:include href="Recursive1.xml"/>
4352 </document>
4353 """
4354
4355 XINCLUDE["NonRecursive1.xml"] = """\
4356 <?xml version='1.0'?>
4357 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4358 <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4359 <xi:include href="NonRecursive3.xml"/>
4360 <xi:include href="NonRecursive3.xml"/>
4361 <p>The following is multiple times the source code of Leaf.xml:</p>
4362 <xi:include href="Leaf.xml"/>
4363 <xi:include href="Leaf.xml"/>
4364 <xi:include href="Leaf.xml"/>
4365 <p>One more time the source code of NonRecursive3.xml:</p>
4366 <xi:include href="NonRecursive3.xml"/>
4367 </document>
4368 """
4369
4370 XINCLUDE["NonRecursive2.xml"] = """\
4371 <?xml version='1.0'?>
4372 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4373 <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4374 <xi:include href="NonRecursive3.xml"/>
4375 <xi:include href="NonRecursive3.xml"/>
4376 </document>
4377 """
4378
4379 XINCLUDE["NonRecursive3.xml"] = """\
4380 <?xml version='1.0'?>
4381 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4382 <p>The following is multiple times the source code of Leaf.xml:</p>
4383 <xi:include href="Leaf.xml"/>
4384 <xi:include href="Leaf.xml"/>
4385 </document>
4386 """
4387
4388 XINCLUDE["Leaf.xml"] = """\
4389 <?xml version='1.0'?>
4390 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4391 <p>No further includes</p>
4392 </document>
4393 """
4394
4403
4405
4406 document = self.xinclude_loader("Recursive1.xml").getroottree()
4407 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4408 self.include(document, self.xinclude_loader)
4409 self.assertEqual(str(cm.exception),
4410 "recursive include of 'Recursive2.xml' detected")
4411
4412
4413 document = self.xinclude_loader("Recursive1.xml").getroottree()
4414 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4415 self.include(document, self.xinclude_loader, max_depth=None)
4416 self.assertEqual(str(cm.exception),
4417 "recursive include of 'Recursive2.xml' detected")
4418
4419 document = self.xinclude_loader("Recursive1.xml").getroottree()
4420 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4421 self.include(document, self.xinclude_loader, max_depth=0)
4422 self.assertEqual(str(cm.exception),
4423 "maximum xinclude depth reached when including file Recursive2.xml")
4424
4425 document = self.xinclude_loader("Recursive1.xml").getroottree()
4426 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4427 self.include(document, self.xinclude_loader, max_depth=1)
4428 self.assertEqual(str(cm.exception),
4429 "maximum xinclude depth reached when including file Recursive3.xml")
4430
4431 document = self.xinclude_loader("Recursive1.xml").getroottree()
4432 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4433 self.include(document, self.xinclude_loader, max_depth=2)
4434 self.assertEqual(str(cm.exception),
4435 "maximum xinclude depth reached when including file Recursive1.xml")
4436
4437 document = self.xinclude_loader("Recursive1.xml").getroottree()
4438 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4439 self.include(document, self.xinclude_loader, max_depth=3)
4440 self.assertEqual(str(cm.exception),
4441 "recursive include of 'Recursive2.xml' detected")
4442
4456
4457
4460 tree = self.parse(_bytes('<a><b/></a>'))
4461 f = BytesIO()
4462 tree.write_c14n(f)
4463 s = f.getvalue()
4464 self.assertEqual(_bytes('<a><b></b></a>'),
4465 s)
4466
4468 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4469 f = BytesIO()
4470 tree.write_c14n(f, compression=9)
4471 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4472 s = gzfile.read()
4473 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4474 s)
4475
4483
4492
4501
4503 tree = self.parse(
4504 b'<?xml version="1.0"?> <a> abc \n <b> btext </b> btail <c/> ctail </a> ')
4505 f = BytesIO()
4506 tree.write(f, method='c14n2')
4507 s = f.getvalue()
4508 self.assertEqual(b'<a> abc \n <b> btext </b> btail <c></c> ctail </a>',
4509 s)
4510
4511 f = BytesIO()
4512 tree.write(f, method='c14n2', strip_text=True)
4513 s = f.getvalue()
4514 self.assertEqual(b'<a>abc<b>btext</b>btail<c></c>ctail</a>',
4515 s)
4516
4534
4552
4564
4576
4588
4590 tree = self.parse(_bytes(
4591 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4592 f = BytesIO()
4593 tree.write_c14n(f)
4594 s = f.getvalue()
4595 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4596 s)
4597 f = BytesIO()
4598 tree.write_c14n(f, exclusive=False)
4599 s = f.getvalue()
4600 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4601 s)
4602 f = BytesIO()
4603 tree.write_c14n(f, exclusive=True)
4604 s = f.getvalue()
4605 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4606 s)
4607
4608 f = BytesIO()
4609 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4610 s = f.getvalue()
4611 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4612 s)
4613
4615 tree = self.parse(_bytes(
4616 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4617 s = etree.tostring(tree, method='c14n')
4618 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4619 s)
4620 s = etree.tostring(tree, method='c14n', exclusive=False)
4621 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4622 s)
4623 s = etree.tostring(tree, method='c14n', exclusive=True)
4624 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4625 s)
4626
4627 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4628 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4629 s)
4630
4632 tree = self.parse(_bytes(
4633 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4634 s = etree.tostring(tree.getroot(), method='c14n')
4635 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4636 s)
4637 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4638 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4639 s)
4640 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4641 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4642 s)
4643
4644 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4645 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4646 s)
4647 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4648 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4649 s)
4650
4651 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4652 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4653 s)
4654
4656 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4657 tree = self.parse(_bytes(
4658 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4659
4660 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4661 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4662 s)
4663
4664
4667 tree = self.parse(_bytes('<a><b/></a>'))
4668 f = BytesIO()
4669 tree.write(f)
4670 s = f.getvalue()
4671 self.assertEqual(_bytes('<a><b/></a>'),
4672 s)
4673
4675 tree = self.parse(_bytes('<a><b/></a>'))
4676 f = BytesIO()
4677 tree.write(f, doctype='HUHU')
4678 s = f.getvalue()
4679 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
4680 s)
4681
4683 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4684 f = BytesIO()
4685 tree.write(f, compression=9)
4686 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4687 s = gzfile.read()
4688 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4689 s)
4690
4692 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4693 f = BytesIO()
4694 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
4695 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4696 s = gzfile.read()
4697 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
4698 s)
4699
4701 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4702 f = BytesIO()
4703 tree.write(f, compression=0)
4704 s0 = f.getvalue()
4705
4706 f = BytesIO()
4707 tree.write(f)
4708 self.assertEqual(f.getvalue(), s0)
4709
4710 f = BytesIO()
4711 tree.write(f, compression=1)
4712 s = f.getvalue()
4713 self.assertTrue(len(s) <= len(s0))
4714 with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
4715 s1 = gzfile.read()
4716
4717 f = BytesIO()
4718 tree.write(f, compression=9)
4719 s = f.getvalue()
4720 self.assertTrue(len(s) <= len(s0))
4721 with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
4722 s9 = gzfile.read()
4723
4724 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4725 s0)
4726 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4727 s1)
4728 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4729 s9)
4730
4738
4747
4755
4764
4774
4775
4777 etree = etree
4778
4800
4802 """This can't really be tested as long as there isn't a way to
4803 reset the logging setup ...
4804 """
4805 parse = self.etree.parse
4806
4807 messages = []
4808 class Logger(self.etree.PyErrorLog):
4809 def log(self, entry, message, *args):
4810 messages.append(message)
4811
4812 self.etree.use_global_python_log(Logger())
4813 f = BytesIO('<a><b></c></b></a>')
4814 try:
4815 parse(f)
4816 except SyntaxError:
4817 pass
4818 f.close()
4819
4820 self.assertTrue([ message for message in messages
4821 if 'mismatch' in message ])
4822 self.assertTrue([ message for message in messages
4823 if ':PARSER:' in message])
4824 self.assertTrue([ message for message in messages
4825 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4826 self.assertTrue([ message for message in messages
4827 if ':1:15:' in message ])
4828
4829
4831 etree = etree
4832
4836
4838 class Target(object):
4839 def start(self, tag, attrib):
4840 return 'start(%s)' % tag
4841 def end(self, tag):
4842 return 'end(%s)' % tag
4843 def close(self):
4844 return 'close()'
4845
4846 parser = self.etree.XMLPullParser(target=Target())
4847 events = parser.read_events()
4848
4849 parser.feed('<root><element>')
4850 self.assertFalse(list(events))
4851 self.assertFalse(list(events))
4852 parser.feed('</element><child>')
4853 self.assertEqual([('end', 'end(element)')], list(events))
4854 parser.feed('</child>')
4855 self.assertEqual([('end', 'end(child)')], list(events))
4856 parser.feed('</root>')
4857 self.assertEqual([('end', 'end(root)')], list(events))
4858 self.assertFalse(list(events))
4859 self.assertEqual('close()', parser.close())
4860
4862 class Target(object):
4863 def start(self, tag, attrib):
4864 return 'start(%s)' % tag
4865 def end(self, tag):
4866 return 'end(%s)' % tag
4867 def close(self):
4868 return 'close()'
4869
4870 parser = self.etree.XMLPullParser(
4871 ['start', 'end'], target=Target())
4872 events = parser.read_events()
4873
4874 parser.feed('<root><element>')
4875 self.assertEqual(
4876 [('start', 'start(root)'), ('start', 'start(element)')],
4877 list(events))
4878 self.assertFalse(list(events))
4879 parser.feed('</element><child>')
4880 self.assertEqual(
4881 [('end', 'end(element)'), ('start', 'start(child)')],
4882 list(events))
4883 parser.feed('</child>')
4884 self.assertEqual(
4885 [('end', 'end(child)')],
4886 list(events))
4887 parser.feed('</root>')
4888 self.assertEqual(
4889 [('end', 'end(root)')],
4890 list(events))
4891 self.assertFalse(list(events))
4892 self.assertEqual('close()', parser.close())
4893
4895 parser = self.etree.XMLPullParser(
4896 ['start', 'end'], target=etree.TreeBuilder())
4897 events = parser.read_events()
4898
4899 parser.feed('<root><element>')
4900 self.assert_event_tags(
4901 events, [('start', 'root'), ('start', 'element')])
4902 self.assertFalse(list(events))
4903 parser.feed('</element><child>')
4904 self.assert_event_tags(
4905 events, [('end', 'element'), ('start', 'child')])
4906 parser.feed('</child>')
4907 self.assert_event_tags(
4908 events, [('end', 'child')])
4909 parser.feed('</root>')
4910 self.assert_event_tags(
4911 events, [('end', 'root')])
4912 self.assertFalse(list(events))
4913 root = parser.close()
4914 self.assertEqual('root', root.tag)
4915
4917 class Target(etree.TreeBuilder):
4918 def end(self, tag):
4919 el = super(Target, self).end(tag)
4920 el.tag += '-huhu'
4921 return el
4922
4923 parser = self.etree.XMLPullParser(
4924 ['start', 'end'], target=Target())
4925 events = parser.read_events()
4926
4927 parser.feed('<root><element>')
4928 self.assert_event_tags(
4929 events, [('start', 'root'), ('start', 'element')])
4930 self.assertFalse(list(events))
4931 parser.feed('</element><child>')
4932 self.assert_event_tags(
4933 events, [('end', 'element-huhu'), ('start', 'child')])
4934 parser.feed('</child>')
4935 self.assert_event_tags(
4936 events, [('end', 'child-huhu')])
4937 parser.feed('</root>')
4938 self.assert_event_tags(
4939 events, [('end', 'root-huhu')])
4940 self.assertFalse(list(events))
4941 root = parser.close()
4942 self.assertEqual('root-huhu', root.tag)
4943
4944
4973
4974
4975 if __name__ == '__main__':
4976 print('to test use test.py %s' % __file__)
4977