1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 import os.path
13 import unittest
14 import copy
15 import sys
16 import re
17 import gc
18 import operator
19 import tempfile
20 import textwrap
21 import zlib
22 import gzip
23 from contextlib import closing, contextmanager
24
25 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
26 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
27 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
28 from .common_imports import canonicalize, _str, _bytes
29
30 print("")
31 print("TESTED VERSION: %s" % etree.__version__)
32 print(" Python: " + repr(sys.version_info))
33 print(" lxml.etree: " + repr(etree.LXML_VERSION))
34 print(" libxml used: " + repr(etree.LIBXML_VERSION))
35 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
36 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
37 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
38 print("")
39
40 try:
41 _unicode = unicode
42 except NameError:
43
44 _unicode = str
45
46
47 @contextmanager
48 -def tmpfile():
55
58 """Tests only for etree, not ElementTree"""
59 etree = etree
60
71
80
88
95
97 Element = self.etree.Element
98 el = Element('name')
99 self.assertRaises(ValueError, Element, '{}')
100 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
101
102 self.assertRaises(ValueError, Element, '{test}')
103 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, 'p:name')
108 self.assertRaises(ValueError, Element, '{test}p:name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
112
114 Element = self.etree.Element
115 self.assertRaises(ValueError, Element, "p'name")
116 self.assertRaises(ValueError, Element, 'p"name')
117
118 self.assertRaises(ValueError, Element, "{test}p'name")
119 self.assertRaises(ValueError, Element, '{test}p"name')
120
121 el = Element('name')
122 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
123 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
124
126 Element = self.etree.Element
127 self.assertRaises(ValueError, Element, ' name ')
128 self.assertRaises(ValueError, Element, 'na me')
129 self.assertRaises(ValueError, Element, '{test} name')
130
131 el = Element('name')
132 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
133
141
149
151 Element = self.etree.Element
152 SubElement = self.etree.SubElement
153
154 el = Element('name')
155 self.assertRaises(ValueError, SubElement, el, "p'name")
156 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
157
158 self.assertRaises(ValueError, SubElement, el, 'p"name')
159 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
160
169
178
180 QName = self.etree.QName
181 self.assertRaises(ValueError, QName, '')
182 self.assertRaises(ValueError, QName, None)
183 self.assertRaises(ValueError, QName, None, None)
184 self.assertRaises(ValueError, QName, 'test', '')
185
192
194 QName = self.etree.QName
195 self.assertRaises(ValueError, QName, 'p:name')
196 self.assertRaises(ValueError, QName, 'test', 'p:name')
197
199 QName = self.etree.QName
200 self.assertRaises(ValueError, QName, ' name ')
201 self.assertRaises(ValueError, QName, 'na me')
202 self.assertRaises(ValueError, QName, 'test', ' name')
203
211
213
214 QName = self.etree.QName
215 qname1 = QName('http://myns', 'a')
216 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
217
218 qname2 = QName(a)
219 self.assertEqual(a.tag, qname1.text)
220 self.assertEqual(a.tag, qname1)
221 self.assertEqual(qname1.text, qname2.text)
222 self.assertEqual(qname1, qname2.text)
223 self.assertEqual(qname1.text, qname2)
224 self.assertEqual(qname1, qname2)
225
227
228 etree = self.etree
229 qname = etree.QName('http://myns', 'a')
230 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
231 a.text = qname
232
233 self.assertEqual("p:a", a.text)
234
243
258
264
274
286
288 Element = self.etree.Element
289
290 keys = ["attr%d" % i for i in range(10)]
291 values = ["TEST-%d" % i for i in range(10)]
292 items = list(zip(keys, values))
293
294 root = Element("root")
295 for key, value in items:
296 root.set(key, value)
297 self.assertEqual(keys, root.attrib.keys())
298 self.assertEqual(values, root.attrib.values())
299
300 root2 = Element("root2", root.attrib,
301 attr_99='TOAST-1', attr_98='TOAST-2')
302 self.assertEqual(['attr_98', 'attr_99'] + keys,
303 root2.attrib.keys())
304 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
305 root2.attrib.values())
306
307 self.assertEqual(keys, root.attrib.keys())
308 self.assertEqual(values, root.attrib.values())
309
311
312
313 Element = self.etree.Element
314 root = Element("root")
315 self.assertRaises(TypeError, root.set, "newattr", 5)
316 self.assertRaises(TypeError, root.set, "newattr", object)
317 self.assertRaises(TypeError, root.set, "newattr", None)
318 self.assertRaises(TypeError, root.set, "newattr")
319
333
355
357 XML = self.etree.XML
358 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
359
360 root = XML(xml)
361 self.etree.strip_elements(root, 'a')
362 self.assertEqual(_bytes('<test><x></x></test>'),
363 self._writeElement(root))
364
365 root = XML(xml)
366 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
367 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
368 self._writeElement(root))
369
370 root = XML(xml)
371 self.etree.strip_elements(root, 'c')
372 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
373 self._writeElement(root))
374
376 XML = self.etree.XML
377 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
378
379 root = XML(xml)
380 self.etree.strip_elements(root, 'a')
381 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
382 self._writeElement(root))
383
384 root = XML(xml)
385 self.etree.strip_elements(root, '{urn:a}b', 'c')
386 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
387 self._writeElement(root))
388
389 root = XML(xml)
390 self.etree.strip_elements(root, '{urn:a}*', 'c')
391 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
392 self._writeElement(root))
393
394 root = XML(xml)
395 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
396 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
397 self._writeElement(root))
398
417
443
470
497
516
529
540
546
548 XML = self.etree.XML
549 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
550 self.assertEqual(root[0].target, "mypi")
551 self.assertEqual(root[0].get('my'), "1")
552 self.assertEqual(root[0].get('test'), " abc ")
553 self.assertEqual(root[0].get('quotes'), "' '")
554 self.assertEqual(root[0].get('only'), None)
555 self.assertEqual(root[0].get('names'), None)
556 self.assertEqual(root[0].get('nope'), None)
557
559 XML = self.etree.XML
560 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
561 self.assertEqual(root[0].target, "mypi")
562 self.assertEqual(root[0].attrib['my'], "1")
563 self.assertEqual(root[0].attrib['test'], " abc ")
564 self.assertEqual(root[0].attrib['quotes'], "' '")
565 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
566 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
567 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
568
570
571 ProcessingInstruction = self.etree.ProcessingInstruction
572
573 a = ProcessingInstruction("PI", "ONE")
574 b = copy.deepcopy(a)
575 b.text = "ANOTHER"
576
577 self.assertEqual('ONE', a.text)
578 self.assertEqual('ANOTHER', b.text)
579
595
610
621
633
652
657
670
681
682 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
683 events = list(iterparse(f, events=('end', 'comment')))
684 root = events[-1][1]
685 self.assertEqual(6, len(events))
686 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
687 [ name(*item) for item in events ])
688 self.assertEqual(
689 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
690 tostring(root))
691
703
704 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
705 events = list(iterparse(f, events=('end', 'pi')))
706 root = events[-2][1]
707 self.assertEqual(8, len(events))
708 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
709 ('pid','d'), 'a', ('pie','e')],
710 [ name(*item) for item in events ])
711 self.assertEqual(
712 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
713 tostring(ElementTree(root)))
714
729
735
737 iterparse = self.etree.iterparse
738 f = BytesIO('<a><b><c/></a>')
739 it = iterparse(f, events=('start', 'end'), recover=True)
740 events = [(ev, el.tag) for ev, el in it]
741 root = it.root
742 self.assertTrue(root is not None)
743
744 self.assertEqual(1, events.count(('start', 'a')))
745 self.assertEqual(1, events.count(('end', 'a')))
746
747 self.assertEqual(1, events.count(('start', 'b')))
748 self.assertEqual(1, events.count(('end', 'b')))
749
750 self.assertEqual(1, events.count(('start', 'c')))
751 self.assertEqual(1, events.count(('end', 'c')))
752
754 iterparse = self.etree.iterparse
755 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
756 it = iterparse(f, events=('start', 'end'), recover=True)
757 events = [(ev, el.tag) for ev, el in it]
758 root = it.root
759 self.assertTrue(root is not None)
760
761 self.assertEqual(1, events.count(('start', 'a')))
762 self.assertEqual(1, events.count(('end', 'a')))
763
764 self.assertEqual(2, events.count(('start', 'b')))
765 self.assertEqual(2, events.count(('end', 'b')))
766
767 self.assertEqual(2, events.count(('start', 'c')))
768 self.assertEqual(2, events.count(('end', 'c')))
769
771 iterparse = self.etree.iterparse
772 f = BytesIO("""
773 <a> \n \n <b> b test </b> \n
774
775 \n\t <c> \n </c> </a> \n """)
776 iterator = iterparse(f, remove_blank_text=True)
777 text = [ (element.text, element.tail)
778 for event, element in iterator ]
779 self.assertEqual(
780 [(" b test ", None), (" \n ", None), (None, None)],
781 text)
782
784 iterparse = self.etree.iterparse
785 f = BytesIO('<a><b><d/></b><c/></a>')
786
787 iterator = iterparse(f, tag="b", events=('start', 'end'))
788 events = list(iterator)
789 root = iterator.root
790 self.assertEqual(
791 [('start', root[0]), ('end', root[0])],
792 events)
793
795 iterparse = self.etree.iterparse
796 f = BytesIO('<a><b><d/></b><c/></a>')
797
798 iterator = iterparse(f, tag="*", events=('start', 'end'))
799 events = list(iterator)
800 self.assertEqual(
801 8,
802 len(events))
803
805 iterparse = self.etree.iterparse
806 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
807
808 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
809 events = list(iterator)
810 root = iterator.root
811 self.assertEqual(
812 [('start', root[0]), ('end', root[0])],
813 events)
814
816 iterparse = self.etree.iterparse
817 f = BytesIO('<a><b><d/></b><c/></a>')
818 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
819 events = list(iterator)
820 root = iterator.root
821 self.assertEqual(
822 [('start', root[0]), ('end', root[0])],
823 events)
824
825 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
826 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
827 events = list(iterator)
828 root = iterator.root
829 self.assertEqual([], events)
830
832 iterparse = self.etree.iterparse
833 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
834 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
835 events = list(iterator)
836 self.assertEqual(8, len(events))
837
839 iterparse = self.etree.iterparse
840 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
841 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
842 events = list(iterator)
843 self.assertEqual([], events)
844
845 f = BytesIO('<a><b><d/></b><c/></a>')
846 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
847 events = list(iterator)
848 self.assertEqual(8, len(events))
849
851 text = _str('Søk på nettet')
852 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
853 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
854 ).encode('iso-8859-1')
855
856 self.assertRaises(self.etree.ParseError,
857 list, self.etree.iterparse(BytesIO(xml_latin1)))
858
860 text = _str('Søk på nettet', encoding="UTF-8")
861 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
862 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
863 ).encode('iso-8859-1')
864
865 iterator = self.etree.iterparse(BytesIO(xml_latin1),
866 encoding="iso-8859-1")
867 self.assertEqual(1, len(list(iterator)))
868
869 a = iterator.root
870 self.assertEqual(a.text, text)
871
873 tostring = self.etree.tostring
874 f = BytesIO('<root><![CDATA[test]]></root>')
875 context = self.etree.iterparse(f, strip_cdata=False)
876 content = [ el.text for event,el in context ]
877
878 self.assertEqual(['test'], content)
879 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
880 tostring(context.root))
881
885
890
909
910
911
934
935
936
938 assertEqual = self.assertEqual
939 assertFalse = self.assertFalse
940
941 events = []
942 class Target(object):
943 def start(self, tag, attrib):
944 events.append("start")
945 assertFalse(attrib)
946 assertEqual("TAG", tag)
947 def end(self, tag):
948 events.append("end")
949 assertEqual("TAG", tag)
950 def close(self):
951 return "DONE"
952
953 parser = self.etree.XMLParser(target=Target())
954 tree = self.etree.ElementTree()
955
956 self.assertRaises(TypeError,
957 tree.parse, BytesIO("<TAG/>"), parser=parser)
958 self.assertEqual(["start", "end"], events)
959
961
962 events = []
963 class Target(object):
964 def start(self, tag, attrib):
965 events.append("start-" + tag)
966 def end(self, tag):
967 events.append("end-" + tag)
968 if tag == 'a':
969 raise ValueError("dead and gone")
970 def data(self, data):
971 events.append("data-" + data)
972 def close(self):
973 events.append("close")
974 return "DONE"
975
976 parser = self.etree.XMLParser(target=Target())
977
978 try:
979 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
980 done = parser.close()
981 self.fail("error expected, but parsing succeeded")
982 except ValueError:
983 done = 'value error received as expected'
984
985 self.assertEqual(["start-root", "data-A", "start-a",
986 "data-ca", "end-a", "close"],
987 events)
988
990
991 events = []
992 class Target(object):
993 def start(self, tag, attrib):
994 events.append("start-" + tag)
995 def end(self, tag):
996 events.append("end-" + tag)
997 if tag == 'a':
998 raise ValueError("dead and gone")
999 def data(self, data):
1000 events.append("data-" + data)
1001 def close(self):
1002 events.append("close")
1003 return "DONE"
1004
1005 parser = self.etree.XMLParser(target=Target())
1006
1007 try:
1008 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1009 parser=parser)
1010 self.fail("error expected, but parsing succeeded")
1011 except ValueError:
1012 done = 'value error received as expected'
1013
1014 self.assertEqual(["start-root", "data-A", "start-a",
1015 "data-ca", "end-a", "close"],
1016 events)
1017
1019
1020 events = []
1021 class Target(object):
1022 def start(self, tag, attrib):
1023 events.append("start-" + tag)
1024 def end(self, tag):
1025 events.append("end-" + tag)
1026 def data(self, data):
1027 events.append("data-" + data)
1028 def comment(self, text):
1029 events.append("comment-" + text)
1030 def close(self):
1031 return "DONE"
1032
1033 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1034
1035 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1036 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1037 done = parser.close()
1038
1039 self.assertEqual("DONE", done)
1040 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1041 "start-sub", "end-sub", "data-B", "end-root"],
1042 events)
1043
1049 def end(self, tag):
1050 events.append("end-" + tag)
1051 def data(self, data):
1052 events.append("data-" + data)
1053 def comment(self, text):
1054 events.append("comment-" + text)
1055 def close(self):
1056 return "DONE"
1057
1058 parser = self.etree.XMLParser(target=Target())
1059
1060 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1061 done = parser.close()
1062
1063 self.assertEqual("DONE", done)
1064 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1065 "start-sub", "end-sub", "comment-c", "data-B",
1066 "end-root", "comment-d"],
1067 events)
1068
1070 events = []
1071 class Target(object):
1072 def start(self, tag, attrib):
1073 events.append("start-" + tag)
1074 def end(self, tag):
1075 events.append("end-" + tag)
1076 def data(self, data):
1077 events.append("data-" + data)
1078 def pi(self, target, data):
1079 events.append("pi-" + target + "-" + data)
1080 def close(self):
1081 return "DONE"
1082
1083 parser = self.etree.XMLParser(target=Target())
1084
1085 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1086 done = parser.close()
1087
1088 self.assertEqual("DONE", done)
1089 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1090 "data-B", "end-root", "pi-test-c"],
1091 events)
1092
1094 events = []
1095 class Target(object):
1096 def start(self, tag, attrib):
1097 events.append("start-" + tag)
1098 def end(self, tag):
1099 events.append("end-" + tag)
1100 def data(self, data):
1101 events.append("data-" + data)
1102 def close(self):
1103 return "DONE"
1104
1105 parser = self.etree.XMLParser(target=Target(),
1106 strip_cdata=False)
1107
1108 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1109 done = parser.close()
1110
1111 self.assertEqual("DONE", done)
1112 self.assertEqual(["start-root", "data-A", "start-a",
1113 "data-ca", "end-a", "data-B", "end-root"],
1114 events)
1115
1117 events = []
1118 class Target(object):
1119 def start(self, tag, attrib):
1120 events.append("start-" + tag)
1121 def end(self, tag):
1122 events.append("end-" + tag)
1123 def data(self, data):
1124 events.append("data-" + data)
1125 def close(self):
1126 events.append("close")
1127 return "DONE"
1128
1129 parser = self.etree.XMLParser(target=Target(),
1130 recover=True)
1131
1132 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1133 done = parser.close()
1134
1135 self.assertEqual("DONE", done)
1136 self.assertEqual(["start-root", "data-A", "start-a",
1137 "data-ca", "end-a", "data-B",
1138 "end-root", "close"],
1139 events)
1140
1150
1160
1169
1179
1181 iterwalk = self.etree.iterwalk
1182 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1183
1184 iterator = iterwalk(root, events=('start','end'))
1185 events = list(iterator)
1186 self.assertEqual(
1187 [('start', root), ('start', root[0]), ('end', root[0]),
1188 ('start', root[1]), ('end', root[1]), ('end', root)],
1189 events)
1190
1200
1210
1224
1235
1237 iterwalk = self.etree.iterwalk
1238 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1239
1240 attr_name = '{testns}bla'
1241 events = []
1242 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1243 for event, elem in iterator:
1244 events.append(event)
1245 if event == 'start':
1246 if elem.tag != '{ns1}a':
1247 elem.set(attr_name, 'value')
1248
1249 self.assertEqual(
1250 ['start-ns', 'start', 'start', 'start-ns', 'start',
1251 'end', 'end-ns', 'end', 'end', 'end-ns'],
1252 events)
1253
1254 self.assertEqual(
1255 None,
1256 root.get(attr_name))
1257 self.assertEqual(
1258 'value',
1259 root[0].get(attr_name))
1260
1273
1275 iterwalk = self.etree.iterwalk
1276 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1277
1278 iterator = iterwalk(root, events=('start', 'end'))
1279 tags = []
1280 for event, elem in iterator:
1281 tags.append((event, elem.tag))
1282 if elem.tag in ('b', 'e'):
1283
1284 iterator.skip_subtree()
1285
1286 self.assertEqual(
1287 [('start', 'a'),
1288 ('start', 'b'), ('end', 'b'),
1289 ('start', 'd'),
1290 ('start', 'e'), ('end', 'e'),
1291 ('end', 'd'),
1292 ('end', 'a')],
1293 tags)
1294
1296 iterwalk = self.etree.iterwalk
1297 root = self.etree.XML(_bytes(
1298 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1299
1300 events = []
1301 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1302 for event, elem in iterator:
1303 if event in ('start-ns', 'end-ns'):
1304 events.append((event, elem))
1305 if event == 'start-ns' and elem == ('', 'nsb'):
1306 events.append('skip')
1307 iterator.skip_subtree()
1308 else:
1309 events.append((event, elem.tag))
1310
1311 self.assertEqual(
1312 [('start-ns', ('', 'ns1')),
1313 ('start', '{ns1}a'),
1314 ('start-ns', ('', 'nsb')),
1315 'skip',
1316 ('start', '{nsb}b'),
1317 ('end-ns', None),
1318 ('start-ns', ('', 'ns2')),
1319 ('start', '{ns2}d'),
1320 ('start', '{ns2}e'),
1321 ('end-ns', None),
1322 ('end-ns', None)
1323 ],
1324 events)
1325
1336
1338 parse = self.etree.parse
1339 parser = self.etree.XMLParser(dtd_validation=True)
1340 assertEqual = self.assertEqual
1341 test_url = _str("__nosuch.dtd")
1342
1343 class MyResolver(self.etree.Resolver):
1344 def resolve(self, url, id, context):
1345 assertEqual(url, test_url)
1346 return self.resolve_string(
1347 _str('''<!ENTITY myentity "%s">
1348 <!ELEMENT doc ANY>''') % url, context)
1349
1350 parser.resolvers.add(MyResolver())
1351
1352 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1353 tree = parse(StringIO(xml), parser)
1354 root = tree.getroot()
1355 self.assertEqual(root.text, test_url)
1356
1358 parse = self.etree.parse
1359 parser = self.etree.XMLParser(dtd_validation=True)
1360 assertEqual = self.assertEqual
1361 test_url = _str("__nosuch.dtd")
1362
1363 class MyResolver(self.etree.Resolver):
1364 def resolve(self, url, id, context):
1365 assertEqual(url, test_url)
1366 return self.resolve_string(
1367 (_str('''<!ENTITY myentity "%s">
1368 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1369 context)
1370
1371 parser.resolvers.add(MyResolver())
1372
1373 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1374 tree = parse(StringIO(xml), parser)
1375 root = tree.getroot()
1376 self.assertEqual(root.text, test_url)
1377
1379 parse = self.etree.parse
1380 parser = self.etree.XMLParser(dtd_validation=True)
1381 assertEqual = self.assertEqual
1382 test_url = _str("__nosuch.dtd")
1383
1384 class MyResolver(self.etree.Resolver):
1385 def resolve(self, url, id, context):
1386 assertEqual(url, test_url)
1387 return self.resolve_file(
1388 SillyFileLike(
1389 _str('''<!ENTITY myentity "%s">
1390 <!ELEMENT doc ANY>''') % url), context)
1391
1392 parser.resolvers.add(MyResolver())
1393
1394 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1395 tree = parse(StringIO(xml), parser)
1396 root = tree.getroot()
1397 self.assertEqual(root.text, test_url)
1398
1400 parse = self.etree.parse
1401 parser = self.etree.XMLParser(attribute_defaults=True)
1402 assertEqual = self.assertEqual
1403 test_url = _str("__nosuch.dtd")
1404
1405 class MyResolver(self.etree.Resolver):
1406 def resolve(self, url, id, context):
1407 assertEqual(url, test_url)
1408 return self.resolve_filename(
1409 fileInTestDir('test.dtd'), context)
1410
1411 parser.resolvers.add(MyResolver())
1412
1413 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1414 tree = parse(StringIO(xml), parser)
1415 root = tree.getroot()
1416 self.assertEqual(
1417 root.attrib, {'default': 'valueA'})
1418 self.assertEqual(
1419 root[0].attrib, {'default': 'valueB'})
1420
1435
1436 parser.resolvers.add(MyResolver())
1437
1438 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1439 tree = parse(StringIO(xml), parser,
1440 base_url=fileUrlInTestDir('__test.xml'))
1441 root = tree.getroot()
1442 self.assertEqual(
1443 root.attrib, {'default': 'valueA'})
1444 self.assertEqual(
1445 root[0].attrib, {'default': 'valueB'})
1446
1448 parse = self.etree.parse
1449 parser = self.etree.XMLParser(attribute_defaults=True)
1450 assertEqual = self.assertEqual
1451 test_url = _str("__nosuch.dtd")
1452
1453 class MyResolver(self.etree.Resolver):
1454 def resolve(self, url, id, context):
1455 assertEqual(url, test_url)
1456 return self.resolve_file(
1457 open(fileInTestDir('test.dtd'), 'rb'), context)
1458
1459 parser.resolvers.add(MyResolver())
1460
1461 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1462 tree = parse(StringIO(xml), parser)
1463 root = tree.getroot()
1464 self.assertEqual(
1465 root.attrib, {'default': 'valueA'})
1466 self.assertEqual(
1467 root[0].attrib, {'default': 'valueB'})
1468
1470 parse = self.etree.parse
1471 parser = self.etree.XMLParser(load_dtd=True)
1472 assertEqual = self.assertEqual
1473 test_url = _str("__nosuch.dtd")
1474
1475 class check(object):
1476 resolved = False
1477
1478 class MyResolver(self.etree.Resolver):
1479 def resolve(self, url, id, context):
1480 assertEqual(url, test_url)
1481 check.resolved = True
1482 return self.resolve_empty(context)
1483
1484 parser.resolvers.add(MyResolver())
1485
1486 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1487 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1488 self.assertTrue(check.resolved)
1489
1496
1497 class MyResolver(self.etree.Resolver):
1498 def resolve(self, url, id, context):
1499 raise _LocalException
1500
1501 parser.resolvers.add(MyResolver())
1502
1503 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1504 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1505
1522
1524 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1525 <root>
1526 <child1/>
1527 <child2/>
1528 <child3> </child3>
1529 </root>''')
1530
1531 parser = self.etree.XMLParser(resolve_entities=False)
1532 root = etree.fromstring(xml, parser)
1533 self.assertEqual([ el.tag for el in root ],
1534 ['child1', 'child2', 'child3'])
1535
1536 root[0] = root[-1]
1537 self.assertEqual([ el.tag for el in root ],
1538 ['child3', 'child2'])
1539 self.assertEqual(root[0][0].text, ' ')
1540 self.assertEqual(root[0][0].name, 'nbsp')
1541
1557
1575
1582
1584 Entity = self.etree.Entity
1585 self.assertRaises(ValueError, Entity, 'a b c')
1586 self.assertRaises(ValueError, Entity, 'a,b')
1587 self.assertRaises(ValueError, Entity, 'a\0b')
1588 self.assertRaises(ValueError, Entity, '#abc')
1589 self.assertRaises(ValueError, Entity, '#xxyz')
1590
1603
1624
1637
1649
1658
1667
1668
1678
1687
1689 Element = self.etree.Element
1690 SubElement = self.etree.SubElement
1691 root = Element('root')
1692 self.assertRaises(ValueError, root.append, root)
1693 child = SubElement(root, 'child')
1694 self.assertRaises(ValueError, child.append, root)
1695 child2 = SubElement(child, 'child2')
1696 self.assertRaises(ValueError, child2.append, root)
1697 self.assertRaises(ValueError, child2.append, child)
1698 self.assertEqual('child2', root[0][0].tag)
1699
1712
1725
1736
1747
1757
1767
1783
1799
1805
1820
1833
1848
1861
1876
1889
1904
1917
1918
1926
1927
1937
1938
1953
1954
1964
1965
1976
2003
2004
2006 self.assertRaises(TypeError, self.etree.dump, None)
2007
2020
2033
2054
2063
2065 XML = self.etree.XML
2066
2067 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2068 result = []
2069 for el in root.iterchildren(reversed=True):
2070 result.append(el.tag)
2071 self.assertEqual(['three', 'two', 'one'], result)
2072
2081
2090
2099
2101 XML = self.etree.XML
2102
2103 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2104 result = []
2105 for el in root.iterchildren(tag=['two', 'three']):
2106 result.append(el.text)
2107 self.assertEqual(['Two', 'Bla', None], result)
2108
2110 XML = self.etree.XML
2111
2112 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2113 result = []
2114 for el in root.iterchildren('two', 'three'):
2115 result.append(el.text)
2116 self.assertEqual(['Two', 'Bla', None], result)
2117
2119 XML = self.etree.XML
2120
2121 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2122 result = []
2123 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2124 result.append(el.text)
2125 self.assertEqual([None, 'Bla', 'Two'], result)
2126
2147
2169
2171 Element = self.etree.Element
2172 SubElement = self.etree.SubElement
2173
2174 a = Element('a')
2175 b = SubElement(a, 'b')
2176 c = SubElement(a, 'c')
2177 d = SubElement(b, 'd')
2178 self.assertEqual(
2179 [b, a],
2180 list(d.iterancestors(tag=('a', 'b'))))
2181 self.assertEqual(
2182 [b, a],
2183 list(d.iterancestors('a', 'b')))
2184
2185 self.assertEqual(
2186 [],
2187 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2188 self.assertEqual(
2189 [],
2190 list(d.iterancestors('w', 'x', 'y', 'z')))
2191
2192 self.assertEqual(
2193 [],
2194 list(d.iterancestors(tag=('d', 'x'))))
2195 self.assertEqual(
2196 [],
2197 list(d.iterancestors('d', 'x')))
2198
2199 self.assertEqual(
2200 [b, a],
2201 list(d.iterancestors(tag=('b', '*'))))
2202 self.assertEqual(
2203 [b, a],
2204 list(d.iterancestors('b', '*')))
2205
2206 self.assertEqual(
2207 [b],
2208 list(d.iterancestors(tag=('b', 'c'))))
2209 self.assertEqual(
2210 [b],
2211 list(d.iterancestors('b', 'c')))
2212
2229
2231 Element = self.etree.Element
2232 SubElement = self.etree.SubElement
2233
2234 a = Element('a')
2235 b = SubElement(a, 'b')
2236 c = SubElement(a, 'c')
2237 d = SubElement(b, 'd')
2238 e = SubElement(c, 'e')
2239
2240 self.assertEqual(
2241 [],
2242 list(a.iterdescendants('a')))
2243 self.assertEqual(
2244 [],
2245 list(a.iterdescendants(tag='a')))
2246
2247 a2 = SubElement(e, 'a')
2248 self.assertEqual(
2249 [a2],
2250 list(a.iterdescendants('a')))
2251
2252 self.assertEqual(
2253 [a2],
2254 list(c.iterdescendants('a')))
2255 self.assertEqual(
2256 [a2],
2257 list(c.iterdescendants(tag='a')))
2258
2260 Element = self.etree.Element
2261 SubElement = self.etree.SubElement
2262
2263 a = Element('a')
2264 b = SubElement(a, 'b')
2265 c = SubElement(a, 'c')
2266 d = SubElement(b, 'd')
2267 e = SubElement(c, 'e')
2268
2269 self.assertEqual(
2270 [b, e],
2271 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2272 self.assertEqual(
2273 [b, e],
2274 list(a.iterdescendants('a', 'b', 'e')))
2275
2276 a2 = SubElement(e, 'a')
2277 self.assertEqual(
2278 [b, a2],
2279 list(a.iterdescendants(tag=('a', 'b'))))
2280 self.assertEqual(
2281 [b, a2],
2282 list(a.iterdescendants('a', 'b')))
2283
2284 self.assertEqual(
2285 [],
2286 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2287 self.assertEqual(
2288 [],
2289 list(c.iterdescendants('x', 'y', 'z')))
2290
2291 self.assertEqual(
2292 [b, d, c, e, a2],
2293 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2294 self.assertEqual(
2295 [b, d, c, e, a2],
2296 list(a.iterdescendants('x', 'y', 'z', '*')))
2297
2315
2332
2350
2374
2376 Element = self.etree.Element
2377 SubElement = self.etree.SubElement
2378
2379 a = Element('a')
2380 b = SubElement(a, 'b')
2381 c = SubElement(a, 'c')
2382 d = SubElement(b, 'd')
2383 self.assertEqual(
2384 [],
2385 list(a.itersiblings(tag='XXX')))
2386 self.assertEqual(
2387 [c],
2388 list(b.itersiblings(tag='c')))
2389 self.assertEqual(
2390 [c],
2391 list(b.itersiblings(tag='*')))
2392 self.assertEqual(
2393 [b],
2394 list(c.itersiblings(preceding=True, tag='b')))
2395 self.assertEqual(
2396 [],
2397 list(c.itersiblings(preceding=True, tag='c')))
2398
2400 Element = self.etree.Element
2401 SubElement = self.etree.SubElement
2402
2403 a = Element('a')
2404 b = SubElement(a, 'b')
2405 c = SubElement(a, 'c')
2406 d = SubElement(b, 'd')
2407 e = SubElement(a, 'e')
2408 self.assertEqual(
2409 [],
2410 list(a.itersiblings(tag=('XXX', 'YYY'))))
2411 self.assertEqual(
2412 [c, e],
2413 list(b.itersiblings(tag=('c', 'd', 'e'))))
2414 self.assertEqual(
2415 [b],
2416 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2417 self.assertEqual(
2418 [c, b],
2419 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2420
2422 parseid = self.etree.parseid
2423 XML = self.etree.XML
2424 xml_text = _bytes('''
2425 <!DOCTYPE document [
2426 <!ELEMENT document (h1,p)*>
2427 <!ELEMENT h1 (#PCDATA)>
2428 <!ATTLIST h1 myid ID #REQUIRED>
2429 <!ELEMENT p (#PCDATA)>
2430 <!ATTLIST p someid ID #REQUIRED>
2431 ]>
2432 <document>
2433 <h1 myid="chapter1">...</h1>
2434 <p id="note1" class="note">...</p>
2435 <p>Regular paragraph.</p>
2436 <p xml:id="xmlid">XML:ID paragraph.</p>
2437 <p someid="warn1" class="warning">...</p>
2438 </document>
2439 ''')
2440
2441 tree, dic = parseid(BytesIO(xml_text))
2442 root = tree.getroot()
2443 root2 = XML(xml_text)
2444 self.assertEqual(self._writeElement(root),
2445 self._writeElement(root2))
2446 expected = {
2447 "chapter1" : root[0],
2448 "xmlid" : root[3],
2449 "warn1" : root[4]
2450 }
2451 self.assertTrue("chapter1" in dic)
2452 self.assertTrue("warn1" in dic)
2453 self.assertTrue("xmlid" in dic)
2454 self._checkIDDict(dic, expected)
2455
2457 XMLDTDID = self.etree.XMLDTDID
2458 XML = self.etree.XML
2459 xml_text = _bytes('''
2460 <!DOCTYPE document [
2461 <!ELEMENT document (h1,p)*>
2462 <!ELEMENT h1 (#PCDATA)>
2463 <!ATTLIST h1 myid ID #REQUIRED>
2464 <!ELEMENT p (#PCDATA)>
2465 <!ATTLIST p someid ID #REQUIRED>
2466 ]>
2467 <document>
2468 <h1 myid="chapter1">...</h1>
2469 <p id="note1" class="note">...</p>
2470 <p>Regular paragraph.</p>
2471 <p xml:id="xmlid">XML:ID paragraph.</p>
2472 <p someid="warn1" class="warning">...</p>
2473 </document>
2474 ''')
2475
2476 root, dic = XMLDTDID(xml_text)
2477 root2 = XML(xml_text)
2478 self.assertEqual(self._writeElement(root),
2479 self._writeElement(root2))
2480 expected = {
2481 "chapter1" : root[0],
2482 "xmlid" : root[3],
2483 "warn1" : root[4]
2484 }
2485 self.assertTrue("chapter1" in dic)
2486 self.assertTrue("warn1" in dic)
2487 self.assertTrue("xmlid" in dic)
2488 self._checkIDDict(dic, expected)
2489
2491 XMLDTDID = self.etree.XMLDTDID
2492 XML = self.etree.XML
2493 xml_text = _bytes('''
2494 <document>
2495 <h1 myid="chapter1">...</h1>
2496 <p id="note1" class="note">...</p>
2497 <p>Regular paragraph.</p>
2498 <p someid="warn1" class="warning">...</p>
2499 </document>
2500 ''')
2501
2502 root, dic = XMLDTDID(xml_text)
2503 root2 = XML(xml_text)
2504 self.assertEqual(self._writeElement(root),
2505 self._writeElement(root2))
2506 expected = {}
2507 self._checkIDDict(dic, expected)
2508
2510 XMLDTDID = self.etree.XMLDTDID
2511 XML = self.etree.XML
2512 xml_text = _bytes('''
2513 <!DOCTYPE document [
2514 <!ELEMENT document (h1,p)*>
2515 <!ELEMENT h1 (#PCDATA)>
2516 <!ATTLIST h1 myid ID #REQUIRED>
2517 <!ELEMENT p (#PCDATA)>
2518 <!ATTLIST p someid ID #REQUIRED>
2519 ]>
2520 <document>
2521 <h1 myid="chapter1">...</h1>
2522 <p id="note1" class="note">...</p>
2523 <p>Regular paragraph.</p>
2524 <p xml:id="xmlid">XML:ID paragraph.</p>
2525 <p someid="warn1" class="warning">...</p>
2526 </document>
2527 ''')
2528
2529 parser = etree.XMLParser(collect_ids=False)
2530 root, dic = XMLDTDID(xml_text, parser=parser)
2531 root2 = XML(xml_text)
2532 self.assertEqual(self._writeElement(root),
2533 self._writeElement(root2))
2534 self.assertFalse(dic)
2535 self._checkIDDict(dic, {})
2536
2538 self.assertEqual(len(dic),
2539 len(expected))
2540 self.assertEqual(sorted(dic.items()),
2541 sorted(expected.items()))
2542 if sys.version_info < (3,):
2543 self.assertEqual(sorted(dic.iteritems()),
2544 sorted(expected.iteritems()))
2545 self.assertEqual(sorted(dic.keys()),
2546 sorted(expected.keys()))
2547 if sys.version_info < (3,):
2548 self.assertEqual(sorted(dic.iterkeys()),
2549 sorted(expected.iterkeys()))
2550 if sys.version_info < (3,):
2551 self.assertEqual(sorted(dic.values()),
2552 sorted(expected.values()))
2553 self.assertEqual(sorted(dic.itervalues()),
2554 sorted(expected.itervalues()))
2555
2557 etree = self.etree
2558
2559 r = {'foo': 'http://ns.infrae.com/foo'}
2560 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2561 self.assertEqual(
2562 'foo',
2563 e.prefix)
2564 self.assertEqual(
2565 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2566 self._writeElement(e))
2567
2569 etree = self.etree
2570
2571 r = {None: 'http://ns.infrae.com/foo'}
2572 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2573 self.assertEqual(
2574 None,
2575 e.prefix)
2576 self.assertEqual(
2577 '{http://ns.infrae.com/foo}bar',
2578 e.tag)
2579 self.assertEqual(
2580 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2581 self._writeElement(e))
2582
2584 etree = self.etree
2585
2586 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2587 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2588 self.assertEqual(None, e.prefix)
2589 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2590 self.assertEqual(
2591 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2592 self._writeElement(e))
2593
2595 etree = self.etree
2596
2597 r = {None: 'http://ns.infrae.com/foo',
2598 'hoi': 'http://ns.infrae.com/hoi'}
2599 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2600 e.set('{http://ns.infrae.com/hoi}test', 'value')
2601 self.assertEqual(
2602 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2603 self._writeElement(e))
2604
2606 etree = self.etree
2607
2608 root = etree.Element('{http://test/ns}root',
2609 nsmap={None: 'http://test/ns'})
2610 sub = etree.Element('{http://test/ns}sub',
2611 nsmap={'test': 'http://test/ns'})
2612
2613 sub.attrib['{http://test/ns}attr'] = 'value'
2614 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2615 self.assertEqual(
2616 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2617 etree.tostring(sub))
2618
2619 root.append(sub)
2620 self.assertEqual(
2621 _bytes('<root xmlns="http://test/ns">'
2622 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2623 '</root>'),
2624 etree.tostring(root))
2625
2627 etree = self.etree
2628
2629 root = etree.Element('root')
2630 sub = etree.Element('{http://test/ns}sub',
2631 nsmap={'test': 'http://test/ns'})
2632
2633 sub.attrib['{http://test/ns}attr'] = 'value'
2634 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2635 self.assertEqual(
2636 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2637 etree.tostring(sub))
2638
2639 root.append(sub)
2640 self.assertEqual(
2641 _bytes('<root>'
2642 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2643 '</root>'),
2644 etree.tostring(root))
2645
2647 etree = self.etree
2648
2649 root = etree.Element('root')
2650 sub = etree.Element('{http://test/ns}sub',
2651 nsmap={None: 'http://test/ns'})
2652
2653 sub.attrib['{http://test/ns}attr'] = 'value'
2654 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2655 self.assertEqual(
2656 _bytes('<sub xmlns="http://test/ns" '
2657 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2658 etree.tostring(sub))
2659
2660 root.append(sub)
2661 self.assertEqual(
2662 _bytes('<root>'
2663 '<sub xmlns="http://test/ns"'
2664 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2665 '</root>'),
2666 etree.tostring(root))
2667
2669 etree = self.etree
2670
2671 root = etree.Element('{http://test/ns}root',
2672 nsmap={'test': 'http://test/ns',
2673 None: 'http://test/ns'})
2674 sub = etree.Element('{http://test/ns}sub',
2675 nsmap={None: 'http://test/ns'})
2676
2677 sub.attrib['{http://test/ns}attr'] = 'value'
2678 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2679 self.assertEqual(
2680 _bytes('<sub xmlns="http://test/ns" '
2681 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2682 etree.tostring(sub))
2683
2684 root.append(sub)
2685 self.assertEqual(
2686 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2687 '<test:sub test:attr="value"/>'
2688 '</test:root>'),
2689 etree.tostring(root))
2690
2692 etree = self.etree
2693 r = {None: 'http://ns.infrae.com/foo',
2694 'hoi': 'http://ns.infrae.com/hoi'}
2695 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2696 tree = etree.ElementTree(element=e)
2697 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2698 self.assertEqual(
2699 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2700 self._writeElement(e))
2701
2703 etree = self.etree
2704
2705 r = {None: 'http://ns.infrae.com/foo'}
2706 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2707 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2708
2709 e1.append(e2)
2710
2711 self.assertEqual(
2712 None,
2713 e1.prefix)
2714 self.assertEqual(
2715 None,
2716 e1[0].prefix)
2717 self.assertEqual(
2718 '{http://ns.infrae.com/foo}bar',
2719 e1.tag)
2720 self.assertEqual(
2721 '{http://ns.infrae.com/foo}bar',
2722 e1[0].tag)
2723
2725 etree = self.etree
2726
2727 r = {None: 'http://ns.infrae.com/BAR'}
2728 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2729 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2730
2731 e1.append(e2)
2732
2733 self.assertEqual(
2734 None,
2735 e1.prefix)
2736 self.assertNotEqual(
2737 None,
2738 e2.prefix)
2739 self.assertEqual(
2740 '{http://ns.infrae.com/BAR}bar',
2741 e1.tag)
2742 self.assertEqual(
2743 '{http://ns.infrae.com/foo}bar',
2744 e2.tag)
2745
2747 ns_href = "http://a.b.c"
2748 one = self.etree.fromstring(
2749 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2750 baz = one[0][0]
2751
2752 two = self.etree.fromstring(
2753 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2754 two.append(baz)
2755 del one
2756
2757 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2758 self.assertEqual(
2759 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2760 self.etree.tostring(two))
2761
2775
2792
2803
2805 xml = ('<root>' +
2806 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2807 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2808 root = self.etree.fromstring(xml)
2809 self.assertEqual(xml, self.etree.tostring(root))
2810 self.etree.cleanup_namespaces(root)
2811 self.assertEqual(
2812 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2813 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2814 self.etree.tostring(root))
2815
2817 xml = ('<root>' +
2818 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2819 '<n64:x xmlns:a="A" a:attr="X"/>' +
2820 '</a>'*100 +
2821 '</root>').encode('utf8')
2822 root = self.etree.fromstring(xml)
2823 self.assertEqual(xml, self.etree.tostring(root))
2824 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2825 self.assertEqual(
2826 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2827 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2828 self.etree.tostring(root))
2829
2831 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2832 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2833 '<foo>foo:bar</foo>'
2834 '</root>').encode('utf8')
2835 root = self.etree.fromstring(xml)
2836 self.assertEqual(xml, self.etree.tostring(root))
2837 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2838 self.assertEqual(
2839 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2840 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2841 b'<foo>foo:bar</foo>'
2842 b'</root>',
2843 self.etree.tostring(root))
2844
2846 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2847 '<sub xmlns:foo="FOO">'
2848 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2849 '<foo>foo:bar</foo>'
2850 '</sub>'
2851 '</root>').encode('utf8')
2852 root = self.etree.fromstring(xml)
2853 self.assertEqual(xml, self.etree.tostring(root))
2854 self.etree.cleanup_namespaces(
2855 root,
2856 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2857 keep_ns_prefixes=['foo'])
2858 self.assertEqual(
2859 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2860 b'<sub>'
2861 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2862 b'<foo>foo:bar</foo>'
2863 b'</sub>'
2864 b'</root>',
2865 self.etree.tostring(root))
2866
2868 etree = self.etree
2869
2870 r = {None: 'http://ns.infrae.com/foo',
2871 'hoi': 'http://ns.infrae.com/hoi'}
2872 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2873 self.assertEqual(
2874 r,
2875 e.nsmap)
2876
2878 etree = self.etree
2879
2880 re = {None: 'http://ns.infrae.com/foo',
2881 'hoi': 'http://ns.infrae.com/hoi'}
2882 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2883
2884 rs = {None: 'http://ns.infrae.com/honk',
2885 'top': 'http://ns.infrae.com/top'}
2886 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2887
2888 r = re.copy()
2889 r.update(rs)
2890 self.assertEqual(re, e.nsmap)
2891 self.assertEqual(r, s.nsmap)
2892
2894 etree = self.etree
2895 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2896 self.assertEqual({'hha': None}, el.nsmap)
2897
2899 Element = self.etree.Element
2900 SubElement = self.etree.SubElement
2901
2902 a = Element('a')
2903 b = SubElement(a, 'b')
2904 c = SubElement(a, 'c')
2905 d = SubElement(b, 'd')
2906 e = SubElement(c, 'e')
2907 f = SubElement(c, 'f')
2908
2909 self.assertEqual(
2910 [a, b],
2911 list(a.getiterator('a', 'b')))
2912 self.assertEqual(
2913 [],
2914 list(a.getiterator('x', 'y')))
2915 self.assertEqual(
2916 [a, f],
2917 list(a.getiterator('f', 'a')))
2918 self.assertEqual(
2919 [c, e, f],
2920 list(c.getiterator('c', '*', 'a')))
2921 self.assertEqual(
2922 [],
2923 list(a.getiterator( (), () )))
2924
2926 Element = self.etree.Element
2927 SubElement = self.etree.SubElement
2928
2929 a = Element('a')
2930 b = SubElement(a, 'b')
2931 c = SubElement(a, 'c')
2932 d = SubElement(b, 'd')
2933 e = SubElement(c, 'e')
2934 f = SubElement(c, 'f')
2935
2936 self.assertEqual(
2937 [a, b],
2938 list(a.getiterator( ('a', 'b') )))
2939 self.assertEqual(
2940 [],
2941 list(a.getiterator( ('x', 'y') )))
2942 self.assertEqual(
2943 [a, f],
2944 list(a.getiterator( ('f', 'a') )))
2945 self.assertEqual(
2946 [c, e, f],
2947 list(c.getiterator( ('c', '*', 'a') )))
2948 self.assertEqual(
2949 [],
2950 list(a.getiterator( () )))
2951
2953 Element = self.etree.Element
2954 SubElement = self.etree.SubElement
2955
2956 a = Element('{a}a')
2957 b = SubElement(a, '{a}b')
2958 c = SubElement(a, '{a}c')
2959 d = SubElement(b, '{b}d')
2960 e = SubElement(c, '{a}e')
2961 f = SubElement(c, '{b}f')
2962 g = SubElement(c, 'g')
2963
2964 self.assertEqual(
2965 [a],
2966 list(a.getiterator('{a}a')))
2967 self.assertEqual(
2968 [],
2969 list(a.getiterator('{b}a')))
2970 self.assertEqual(
2971 [],
2972 list(a.getiterator('a')))
2973 self.assertEqual(
2974 [a,b,d,c,e,f,g],
2975 list(a.getiterator('*')))
2976 self.assertEqual(
2977 [f],
2978 list(c.getiterator('{b}*')))
2979 self.assertEqual(
2980 [d, f],
2981 list(a.getiterator('{b}*')))
2982 self.assertEqual(
2983 [g],
2984 list(a.getiterator('g')))
2985 self.assertEqual(
2986 [g],
2987 list(a.getiterator('{}g')))
2988 self.assertEqual(
2989 [g],
2990 list(a.getiterator('{}*')))
2991
2993 Element = self.etree.Element
2994 SubElement = self.etree.SubElement
2995
2996 a = Element('{a}a')
2997 b = SubElement(a, '{nsA}b')
2998 c = SubElement(b, '{nsB}b')
2999 d = SubElement(a, 'b')
3000 e = SubElement(a, '{nsA}e')
3001 f = SubElement(e, '{nsB}e')
3002 g = SubElement(e, 'e')
3003
3004 self.assertEqual(
3005 [b, c, d],
3006 list(a.getiterator('{*}b')))
3007 self.assertEqual(
3008 [e, f, g],
3009 list(a.getiterator('{*}e')))
3010 self.assertEqual(
3011 [a, b, c, d, e, f, g],
3012 list(a.getiterator('{*}*')))
3013
3038
3054
3071
3073 a = etree.Element("a")
3074 b = etree.SubElement(a, "b")
3075 c = etree.SubElement(a, "c")
3076 d1 = etree.SubElement(c, "d")
3077 d2 = etree.SubElement(c, "d")
3078 c.text = d1.text = 'TEXT'
3079
3080 tree = etree.ElementTree(a)
3081 self.assertEqual('.', tree.getelementpath(a))
3082 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3083 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3084
3085 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3086 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3087
3088 tree = etree.ElementTree(c)
3089 self.assertEqual('.', tree.getelementpath(c))
3090 self.assertEqual('d[2]', tree.getelementpath(d2))
3091 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3092
3093 tree = etree.ElementTree(b)
3094 self.assertEqual('.', tree.getelementpath(b))
3095 self.assertRaises(ValueError, tree.getelementpath, a)
3096 self.assertRaises(ValueError, tree.getelementpath, c)
3097 self.assertRaises(ValueError, tree.getelementpath, d2)
3098
3100 a = etree.Element("{http://ns1/}a")
3101 b = etree.SubElement(a, "{http://ns1/}b")
3102 c = etree.SubElement(a, "{http://ns1/}c")
3103 d1 = etree.SubElement(c, "{http://ns1/}d")
3104 d2 = etree.SubElement(c, "{http://ns2/}d")
3105 d3 = etree.SubElement(c, "{http://ns1/}d")
3106
3107 tree = etree.ElementTree(a)
3108 self.assertEqual('.', tree.getelementpath(a))
3109 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3110 tree.getelementpath(d1))
3111 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3112 tree.getelementpath(d2))
3113 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3114 tree.getelementpath(d3))
3115
3116 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3117 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3118 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3119 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3120 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3121 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3122
3123 tree = etree.ElementTree(c)
3124 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3125 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3126 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3127 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3128 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3129 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3130
3131 tree = etree.ElementTree(b)
3132 self.assertRaises(ValueError, tree.getelementpath, d1)
3133 self.assertRaises(ValueError, tree.getelementpath, d2)
3134
3141
3148
3157
3159 XML = self.etree.XML
3160 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3161 self.assertEqual(len(root.findall(".//{X}b")), 2)
3162 self.assertEqual(len(root.findall(".//{X}*")), 2)
3163 self.assertEqual(len(root.findall(".//b")), 3)
3164
3166 XML = self.etree.XML
3167 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3168 nsmap = {'xx': 'X'}
3169 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3170 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3171 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3172 nsmap = {'xx': 'Y'}
3173 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3174 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3175 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3176
3178 XML = self.etree.XML
3179 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3180 nsmap = {'xx': 'X'}
3181 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3182 nsmap = {'xx': 'X', None: 'Y'}
3183 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3184 nsmap = {'xx': 'X', '': 'Y'}
3185 self.assertRaises(ValueError, root.findall, ".//xx:b", namespaces=nsmap)
3186
3193
3195 etree = self.etree
3196 e = etree.Element('foo')
3197 for i in range(10):
3198 etree.SubElement(e, 'a%s' % i)
3199 for i in range(10):
3200 self.assertEqual(
3201 i,
3202 e.index(e[i]))
3203 self.assertEqual(
3204 3, e.index(e[3], 3))
3205 self.assertRaises(
3206 ValueError, e.index, e[3], 4)
3207 self.assertRaises(
3208 ValueError, e.index, e[3], 0, 2)
3209 self.assertRaises(
3210 ValueError, e.index, e[8], 0, -3)
3211 self.assertRaises(
3212 ValueError, e.index, e[8], -5, -3)
3213 self.assertEqual(
3214 8, e.index(e[8], 0, -1))
3215 self.assertEqual(
3216 8, e.index(e[8], -12, -1))
3217 self.assertEqual(
3218 0, e.index(e[0], -12, -1))
3219
3221 etree = self.etree
3222 e = etree.Element('foo')
3223 for i in range(10):
3224 el = etree.SubElement(e, 'a%s' % i)
3225 el.text = "text%d" % i
3226 el.tail = "tail%d" % i
3227
3228 child0 = e[0]
3229 child1 = e[1]
3230 child2 = e[2]
3231
3232 e.replace(e[0], e[1])
3233 self.assertEqual(
3234 9, len(e))
3235 self.assertEqual(
3236 child1, e[0])
3237 self.assertEqual(
3238 child1.text, "text1")
3239 self.assertEqual(
3240 child1.tail, "tail1")
3241 self.assertEqual(
3242 child0.tail, "tail0")
3243 self.assertEqual(
3244 child2, e[1])
3245
3246 e.replace(e[-1], e[0])
3247 self.assertEqual(
3248 child1, e[-1])
3249 self.assertEqual(
3250 child1.text, "text1")
3251 self.assertEqual(
3252 child1.tail, "tail1")
3253 self.assertEqual(
3254 child2, e[0])
3255
3257 etree = self.etree
3258 e = etree.Element('foo')
3259 for i in range(10):
3260 etree.SubElement(e, 'a%s' % i)
3261
3262 new_element = etree.Element("test")
3263 new_element.text = "TESTTEXT"
3264 new_element.tail = "TESTTAIL"
3265 child1 = e[1]
3266 e.replace(e[0], new_element)
3267 self.assertEqual(
3268 new_element, e[0])
3269 self.assertEqual(
3270 "TESTTEXT",
3271 e[0].text)
3272 self.assertEqual(
3273 "TESTTAIL",
3274 e[0].tail)
3275 self.assertEqual(
3276 child1, e[1])
3277
3293
3311
3329
3347
3349 Element = self.etree.Element
3350 SubElement = self.etree.SubElement
3351 try:
3352 slice
3353 except NameError:
3354 print("slice() not found")
3355 return
3356
3357 a = Element('a')
3358 b = SubElement(a, 'b')
3359 c = SubElement(a, 'c')
3360 d = SubElement(a, 'd')
3361 e = SubElement(a, 'e')
3362
3363 x = Element('x')
3364 y = Element('y')
3365 z = Element('z')
3366
3367 self.assertRaises(
3368 ValueError,
3369 operator.setitem, a, slice(1,None,2), [x, y, z])
3370
3371 self.assertEqual(
3372 [b, c, d, e],
3373 list(a))
3374
3387
3389 XML = self.etree.XML
3390 root = XML(_bytes(
3391 '<?xml version="1.0"?>\n'
3392 '<root>' + '\n' * 65536 +
3393 '<p>' + '\n' * 65536 + '</p>\n' +
3394 '<br/>\n'
3395 '</root>'))
3396
3397 if self.etree.LIBXML_VERSION >= (2, 9):
3398 expected = [2, 131074, 131076]
3399 else:
3400 expected = [2, 65535, 65535]
3401
3402 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3403
3411
3420
3430
3440
3446
3454
3460
3467
3473
3475 etree = self.etree
3476 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3477 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3478 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3479 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3480
3481 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3482
3483 tree = etree.parse(BytesIO(xml))
3484 docinfo = tree.docinfo
3485 self.assertEqual(docinfo.encoding, "ascii")
3486 self.assertEqual(docinfo.xml_version, "1.0")
3487 self.assertEqual(docinfo.public_id, pub_id)
3488 self.assertEqual(docinfo.system_url, sys_id)
3489 self.assertEqual(docinfo.root_name, 'html')
3490 self.assertEqual(docinfo.doctype, doctype_string)
3491
3507
3519
3531
3537
3539 etree = self.etree
3540 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3541 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3542 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3543
3544 xml = _bytes('<!DOCTYPE root>\n<root/>')
3545 tree = etree.parse(BytesIO(xml))
3546 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3547 etree.tostring(tree, doctype=doctype_string))
3548
3550 etree = self.etree
3551 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3552 self.assertEqual(root.base, "http://no/such/url")
3553 self.assertEqual(
3554 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3555 root.base = "https://secret/url"
3556 self.assertEqual(root.base, "https://secret/url")
3557 self.assertEqual(
3558 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3559 "https://secret/url")
3560
3562 etree = self.etree
3563 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3564 self.assertEqual(root.base, "http://no/such/url")
3565 self.assertEqual(
3566 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3567 root.set('{http://www.w3.org/XML/1998/namespace}base',
3568 "https://secret/url")
3569 self.assertEqual(root.base, "https://secret/url")
3570 self.assertEqual(
3571 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3572 "https://secret/url")
3573
3579
3584
3591
3605
3607 Element = self.etree.Element
3608
3609 a = Element('a')
3610 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3611 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3612
3613 self.assertRaises(ValueError, Element, 'ha\0ho')
3614
3616 Element = self.etree.Element
3617
3618 a = Element('a')
3619 self.assertRaises(ValueError, setattr, a, "text",
3620 _str('ha\0ho'))
3621 self.assertRaises(ValueError, setattr, a, "tail",
3622 _str('ha\0ho'))
3623
3624 self.assertRaises(ValueError, Element,
3625 _str('ha\0ho'))
3626
3628 Element = self.etree.Element
3629
3630 a = Element('a')
3631 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3632 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3633
3634 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3635 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3636
3637 self.assertRaises(ValueError, Element, 'ha\x07ho')
3638 self.assertRaises(ValueError, Element, 'ha\x02ho')
3639
3641 Element = self.etree.Element
3642
3643 a = Element('a')
3644 self.assertRaises(ValueError, setattr, a, "text",
3645 _str('ha\x07ho'))
3646 self.assertRaises(ValueError, setattr, a, "text",
3647 _str('ha\x02ho'))
3648
3649 self.assertRaises(ValueError, setattr, a, "tail",
3650 _str('ha\x07ho'))
3651 self.assertRaises(ValueError, setattr, a, "tail",
3652 _str('ha\x02ho'))
3653
3654 self.assertRaises(ValueError, Element,
3655 _str('ha\x07ho'))
3656 self.assertRaises(ValueError, Element,
3657 _str('ha\x02ho'))
3658
3660 Element = self.etree.Element
3661
3662 a = Element('a')
3663 self.assertRaises(ValueError, setattr, a, "text",
3664 _str('ha\u1234\x07ho'))
3665 self.assertRaises(ValueError, setattr, a, "text",
3666 _str('ha\u1234\x02ho'))
3667
3668 self.assertRaises(ValueError, setattr, a, "tail",
3669 _str('ha\u1234\x07ho'))
3670 self.assertRaises(ValueError, setattr, a, "tail",
3671 _str('ha\u1234\x02ho'))
3672
3673 self.assertRaises(ValueError, Element,
3674 _str('ha\u1234\x07ho'))
3675 self.assertRaises(ValueError, Element,
3676 _str('ha\u1234\x02ho'))
3677
3691
3696
3714
3734
3736 tostring = self.etree.tostring
3737 html = self.etree.fromstring(
3738 '<html><body>'
3739 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3740 '</body></html>',
3741 parser=self.etree.HTMLParser())
3742 self.assertEqual(html.tag, 'html')
3743 div = html.find('.//div')
3744 self.assertEqual(div.tail, '\r\n')
3745 result = tostring(div, method='html')
3746 self.assertEqual(
3747 result,
3748 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3749 result = tostring(div, method='html', with_tail=True)
3750 self.assertEqual(
3751 result,
3752 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3753 result = tostring(div, method='html', with_tail=False)
3754 self.assertEqual(
3755 result,
3756 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3757
3779
3781 tostring = self.etree.tostring
3782 XML = self.etree.XML
3783 ElementTree = self.etree.ElementTree
3784
3785 root = XML(_bytes("<root/>"))
3786
3787 tree = ElementTree(root)
3788 self.assertEqual(None, tree.docinfo.standalone)
3789
3790 result = tostring(root, xml_declaration=True, encoding="ASCII")
3791 self.assertEqual(result, _bytes(
3792 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3793
3794 result = tostring(root, xml_declaration=True, encoding="ASCII",
3795 standalone=True)
3796 self.assertEqual(result, _bytes(
3797 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3798
3799 tree = ElementTree(XML(result))
3800 self.assertEqual(True, tree.docinfo.standalone)
3801
3802 result = tostring(root, xml_declaration=True, encoding="ASCII",
3803 standalone=False)
3804 self.assertEqual(result, _bytes(
3805 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3806
3807 tree = ElementTree(XML(result))
3808 self.assertEqual(False, tree.docinfo.standalone)
3809
3829
3831 tostring = self.etree.tostring
3832 Element = self.etree.Element
3833 SubElement = self.etree.SubElement
3834
3835 a = Element('a')
3836 a.text = "A"
3837 a.tail = "tail"
3838 b = SubElement(a, 'b')
3839 b.text = "B"
3840 b.tail = _str("Søk på nettet")
3841 c = SubElement(a, 'c')
3842 c.text = "C"
3843
3844 result = tostring(a, method="text", encoding="UTF-16")
3845
3846 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3847 result)
3848
3850 tostring = self.etree.tostring
3851 Element = self.etree.Element
3852 SubElement = self.etree.SubElement
3853
3854 a = Element('a')
3855 a.text = _str('Søk på nettetA')
3856 a.tail = "tail"
3857 b = SubElement(a, 'b')
3858 b.text = "B"
3859 b.tail = _str('Søk på nettetB')
3860 c = SubElement(a, 'c')
3861 c.text = "C"
3862
3863 self.assertRaises(UnicodeEncodeError,
3864 tostring, a, method="text")
3865
3866 self.assertEqual(
3867 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3868 tostring(a, encoding="UTF-8", method="text"))
3869
3882
3898
3902
3917
3935
3948
3950 tostring = self.etree.tostring
3951 Element = self.etree.Element
3952 SubElement = self.etree.SubElement
3953
3954 a = Element('a')
3955 b = SubElement(a, 'b')
3956 c = SubElement(a, 'c')
3957 d = SubElement(c, 'd')
3958 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3959 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3960 self.assertEqual(_bytes('<b></b>'),
3961 canonicalize(tostring(b, encoding=_unicode)))
3962 self.assertEqual(_bytes('<c><d></d></c>'),
3963 canonicalize(tostring(c, encoding=_unicode)))
3964
3969
3984
3986 tostring = self.etree.tostring
3987 Element = self.etree.Element
3988 SubElement = self.etree.SubElement
3989
3990 a = Element('a')
3991 b = SubElement(a, 'b')
3992 c = SubElement(a, 'c')
3993
3994 result = tostring(a, encoding=_unicode)
3995 self.assertEqual(result, "<a><b/><c/></a>")
3996
3997 result = tostring(a, encoding=_unicode, pretty_print=False)
3998 self.assertEqual(result, "<a><b/><c/></a>")
3999
4000 result = tostring(a, encoding=_unicode, pretty_print=True)
4001 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4002
4014
4016 class SubEl(etree.ElementBase):
4017 pass
4018
4019 el1 = SubEl()
4020 el2 = SubEl()
4021 self.assertEqual('SubEl', el1.tag)
4022 self.assertEqual('SubEl', el2.tag)
4023 el1.other = el2
4024 el2.other = el1
4025
4026 del el1, el2
4027 gc.collect()
4028
4029
4043
4045 root = etree.Element('parent')
4046 c1 = etree.SubElement(root, 'child1')
4047 c2 = etree.SubElement(root, 'child2')
4048
4049 root.remove(c1)
4050 root.remove(c2)
4051 c1.addnext(c2)
4052 c1.tail = 'abc'
4053 c2.tail = 'xyz'
4054 del c1
4055
4056 c2.getprevious()
4057
4058 self.assertEqual('child1', c2.getprevious().tag)
4059 self.assertEqual('abc', c2.getprevious().tail)
4060
4061
4062
4063 - def _writeElement(self, element, encoding='us-ascii', compression=0):
4074
4119
4120 res_instance = res()
4121 parser = etree.XMLParser(load_dtd = True)
4122 parser.resolvers.add(res_instance)
4123
4124 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4125 parser = parser)
4126
4127 self.include(tree)
4128
4129 called = list(res_instance.called.items())
4130 called.sort()
4131 self.assertEqual(
4132 [("dtd", True), ("include", True), ("input", True)],
4133 called)
4134
4136 data = textwrap.dedent('''
4137 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4138 <foo/>
4139 <xi:include href="./test.xml" />
4140 </doc>
4141 ''')
4142
4143 class Resolver(etree.Resolver):
4144 called = {}
4145
4146 def resolve(self, url, id, context):
4147 if url.endswith("test_xinclude.xml"):
4148 assert not self.called.get("input")
4149 self.called["input"] = True
4150 return None
4151 elif url.endswith('/test5.xml'):
4152 assert not self.called.get("DONE")
4153 self.called["DONE"] = True
4154 return self.resolve_string('<DONE/>', context)
4155 else:
4156 _, filename = url.rsplit('/', 1)
4157 assert not self.called.get(filename)
4158 self.called[filename] = True
4159 next_data = data.replace(
4160 'test.xml', 'test%d.xml' % len(self.called))
4161 return self.resolve_string(next_data, context)
4162
4163 res_instance = Resolver()
4164 parser = etree.XMLParser(load_dtd=True)
4165 parser.resolvers.add(res_instance)
4166
4167 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4168 parser=parser)
4169
4170 self.include(tree)
4171
4172 called = list(res_instance.called.items())
4173 called.sort()
4174 self.assertEqual(
4175 [("DONE", True), ("input", True), ("test.xml", True),
4176 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4177 called)
4178
4183
4189
4193 tree = self.parse(_bytes('<a><b/></a>'))
4194 f = BytesIO()
4195 tree.write_c14n(f)
4196 s = f.getvalue()
4197 self.assertEqual(_bytes('<a><b></b></a>'),
4198 s)
4199
4201 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4202 f = BytesIO()
4203 tree.write_c14n(f, compression=9)
4204 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4205 s = gzfile.read()
4206 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4207 s)
4208
4216
4225
4243
4255
4267
4269 tree = self.parse(_bytes(
4270 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4271 f = BytesIO()
4272 tree.write_c14n(f)
4273 s = f.getvalue()
4274 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4275 s)
4276 f = BytesIO()
4277 tree.write_c14n(f, exclusive=False)
4278 s = f.getvalue()
4279 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4280 s)
4281 f = BytesIO()
4282 tree.write_c14n(f, exclusive=True)
4283 s = f.getvalue()
4284 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4285 s)
4286
4287 f = BytesIO()
4288 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4289 s = f.getvalue()
4290 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4291 s)
4292
4294 tree = self.parse(_bytes(
4295 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4296 s = etree.tostring(tree, method='c14n')
4297 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4298 s)
4299 s = etree.tostring(tree, method='c14n', exclusive=False)
4300 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4301 s)
4302 s = etree.tostring(tree, method='c14n', exclusive=True)
4303 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4304 s)
4305
4306 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4307 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4308 s)
4309
4311 tree = self.parse(_bytes(
4312 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4313 s = etree.tostring(tree.getroot(), method='c14n')
4314 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4315 s)
4316 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4317 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4318 s)
4319 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4320 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4321 s)
4322
4323 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4324 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4325 s)
4326 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4327 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4328 s)
4329
4330 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4331 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4332 s)
4333
4335 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4336 tree = self.parse(_bytes(
4337 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4338
4339 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4340 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4341 s)
4342
4346 tree = self.parse(_bytes('<a><b/></a>'))
4347 f = BytesIO()
4348 tree.write(f)
4349 s = f.getvalue()
4350 self.assertEqual(_bytes('<a><b/></a>'),
4351 s)
4352
4354 tree = self.parse(_bytes('<a><b/></a>'))
4355 f = BytesIO()
4356 tree.write(f, doctype='HUHU')
4357 s = f.getvalue()
4358 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
4359 s)
4360
4362 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4363 f = BytesIO()
4364 tree.write(f, compression=9)
4365 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4366 s = gzfile.read()
4367 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4368 s)
4369
4371 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4372 f = BytesIO()
4373 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
4374 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4375 s = gzfile.read()
4376 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
4377 s)
4378
4380 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4381 f = BytesIO()
4382 tree.write(f, compression=0)
4383 s0 = f.getvalue()
4384
4385 f = BytesIO()
4386 tree.write(f)
4387 self.assertEqual(f.getvalue(), s0)
4388
4389 f = BytesIO()
4390 tree.write(f, compression=1)
4391 s = f.getvalue()
4392 self.assertTrue(len(s) <= len(s0))
4393 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4394 s1 = gzfile.read()
4395
4396 f = BytesIO()
4397 tree.write(f, compression=9)
4398 s = f.getvalue()
4399 self.assertTrue(len(s) <= len(s0))
4400 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4401 s9 = gzfile.read()
4402
4403 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4404 s0)
4405 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4406 s1)
4407 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4408 s9)
4409
4417
4426
4434
4443
4446 etree = etree
4447
4469
4471 """This can't really be tested as long as there isn't a way to
4472 reset the logging setup ...
4473 """
4474 parse = self.etree.parse
4475
4476 messages = []
4477 class Logger(self.etree.PyErrorLog):
4478 def log(self, entry, message, *args):
4479 messages.append(message)
4480
4481 self.etree.use_global_python_log(Logger())
4482 f = BytesIO('<a><b></c></b></a>')
4483 try:
4484 parse(f)
4485 except SyntaxError:
4486 pass
4487 f.close()
4488
4489 self.assertTrue([ message for message in messages
4490 if 'mismatch' in message ])
4491 self.assertTrue([ message for message in messages
4492 if ':PARSER:' in message])
4493 self.assertTrue([ message for message in messages
4494 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4495 self.assertTrue([ message for message in messages
4496 if ':1:15:' in message ])
4497
4500 etree = etree
4501
4505
4507 class Target(object):
4508 def start(self, tag, attrib):
4509 return 'start(%s)' % tag
4510 def end(self, tag):
4511 return 'end(%s)' % tag
4512 def close(self):
4513 return 'close()'
4514
4515 parser = self.etree.XMLPullParser(target=Target())
4516 events = parser.read_events()
4517
4518 parser.feed('<root><element>')
4519 self.assertFalse(list(events))
4520 self.assertFalse(list(events))
4521 parser.feed('</element><child>')
4522 self.assertEqual([('end', 'end(element)')], list(events))
4523 parser.feed('</child>')
4524 self.assertEqual([('end', 'end(child)')], list(events))
4525 parser.feed('</root>')
4526 self.assertEqual([('end', 'end(root)')], list(events))
4527 self.assertFalse(list(events))
4528 self.assertEqual('close()', parser.close())
4529
4531 class Target(object):
4532 def start(self, tag, attrib):
4533 return 'start(%s)' % tag
4534 def end(self, tag):
4535 return 'end(%s)' % tag
4536 def close(self):
4537 return 'close()'
4538
4539 parser = self.etree.XMLPullParser(
4540 ['start', 'end'], target=Target())
4541 events = parser.read_events()
4542
4543 parser.feed('<root><element>')
4544 self.assertEqual(
4545 [('start', 'start(root)'), ('start', 'start(element)')],
4546 list(events))
4547 self.assertFalse(list(events))
4548 parser.feed('</element><child>')
4549 self.assertEqual(
4550 [('end', 'end(element)'), ('start', 'start(child)')],
4551 list(events))
4552 parser.feed('</child>')
4553 self.assertEqual(
4554 [('end', 'end(child)')],
4555 list(events))
4556 parser.feed('</root>')
4557 self.assertEqual(
4558 [('end', 'end(root)')],
4559 list(events))
4560 self.assertFalse(list(events))
4561 self.assertEqual('close()', parser.close())
4562
4564 parser = self.etree.XMLPullParser(
4565 ['start', 'end'], target=etree.TreeBuilder())
4566 events = parser.read_events()
4567
4568 parser.feed('<root><element>')
4569 self.assert_event_tags(
4570 events, [('start', 'root'), ('start', 'element')])
4571 self.assertFalse(list(events))
4572 parser.feed('</element><child>')
4573 self.assert_event_tags(
4574 events, [('end', 'element'), ('start', 'child')])
4575 parser.feed('</child>')
4576 self.assert_event_tags(
4577 events, [('end', 'child')])
4578 parser.feed('</root>')
4579 self.assert_event_tags(
4580 events, [('end', 'root')])
4581 self.assertFalse(list(events))
4582 root = parser.close()
4583 self.assertEqual('root', root.tag)
4584
4586 class Target(etree.TreeBuilder):
4587 def end(self, tag):
4588 el = super(Target, self).end(tag)
4589 el.tag += '-huhu'
4590 return el
4591
4592 parser = self.etree.XMLPullParser(
4593 ['start', 'end'], target=Target())
4594 events = parser.read_events()
4595
4596 parser.feed('<root><element>')
4597 self.assert_event_tags(
4598 events, [('start', 'root'), ('start', 'element')])
4599 self.assertFalse(list(events))
4600 parser.feed('</element><child>')
4601 self.assert_event_tags(
4602 events, [('end', 'element-huhu'), ('start', 'child')])
4603 parser.feed('</child>')
4604 self.assert_event_tags(
4605 events, [('end', 'child-huhu')])
4606 parser.feed('</root>')
4607 self.assert_event_tags(
4608 events, [('end', 'root-huhu')])
4609 self.assertFalse(list(events))
4610 root = parser.close()
4611 self.assertEqual('root-huhu', root.tag)
4612
4642
4643
4644 if __name__ == '__main__':
4645 print('to test use test.py %s' % __file__)
4646