1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 import os.path
13 import unittest
14 import copy
15 import sys
16 import re
17 import gc
18 import operator
19 import tempfile
20 import textwrap
21 import zlib
22 import gzip
23 from contextlib import closing, contextmanager
24
25 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
26 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
27 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
28 from .common_imports import canonicalize, _str, _bytes
29
30 print("")
31 print("TESTED VERSION: %s" % etree.__version__)
32 print(" Python: " + repr(sys.version_info))
33 print(" lxml.etree: " + repr(etree.LXML_VERSION))
34 print(" libxml used: " + repr(etree.LIBXML_VERSION))
35 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
36 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
37 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
38 print("")
39
40 try:
41 _unicode = unicode
42 except NameError:
43
44 _unicode = str
45
46
47 @contextmanager
48 -def tmpfile():
55
58 """Tests only for etree, not ElementTree"""
59 etree = etree
60
71
80
88
95
97 Element = self.etree.Element
98 el = Element('name')
99 self.assertRaises(ValueError, Element, '{}')
100 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
101
102 self.assertRaises(ValueError, Element, '{test}')
103 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, 'p:name')
108 self.assertRaises(ValueError, Element, '{test}p:name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
112
114 Element = self.etree.Element
115 self.assertRaises(ValueError, Element, "p'name")
116 self.assertRaises(ValueError, Element, 'p"name')
117
118 self.assertRaises(ValueError, Element, "{test}p'name")
119 self.assertRaises(ValueError, Element, '{test}p"name')
120
121 el = Element('name')
122 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
123 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
124
126 Element = self.etree.Element
127 self.assertRaises(ValueError, Element, ' name ')
128 self.assertRaises(ValueError, Element, 'na me')
129 self.assertRaises(ValueError, Element, '{test} name')
130
131 el = Element('name')
132 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
133
141
149
151 Element = self.etree.Element
152 SubElement = self.etree.SubElement
153
154 el = Element('name')
155 self.assertRaises(ValueError, SubElement, el, "p'name")
156 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
157
158 self.assertRaises(ValueError, SubElement, el, 'p"name')
159 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
160
169
178
180 QName = self.etree.QName
181 self.assertRaises(ValueError, QName, '')
182 self.assertRaises(ValueError, QName, 'test', '')
183
185 QName = self.etree.QName
186 self.assertRaises(ValueError, QName, 'p:name')
187 self.assertRaises(ValueError, QName, 'test', 'p:name')
188
190 QName = self.etree.QName
191 self.assertRaises(ValueError, QName, ' name ')
192 self.assertRaises(ValueError, QName, 'na me')
193 self.assertRaises(ValueError, QName, 'test', ' name')
194
202
204
205 QName = self.etree.QName
206 qname1 = QName('http://myns', 'a')
207 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
208
209 qname2 = QName(a)
210 self.assertEqual(a.tag, qname1.text)
211 self.assertEqual(qname1.text, qname2.text)
212 self.assertEqual(qname1, qname2)
213
215
216 etree = self.etree
217 qname = etree.QName('http://myns', 'a')
218 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
219 a.text = qname
220
221 self.assertEqual("p:a", a.text)
222
231
246
252
262
274
276 Element = self.etree.Element
277
278 keys = ["attr%d" % i for i in range(10)]
279 values = ["TEST-%d" % i for i in range(10)]
280 items = list(zip(keys, values))
281
282 root = Element("root")
283 for key, value in items:
284 root.set(key, value)
285 self.assertEqual(keys, root.attrib.keys())
286 self.assertEqual(values, root.attrib.values())
287
288 root2 = Element("root2", root.attrib,
289 attr_99='TOAST-1', attr_98='TOAST-2')
290 self.assertEqual(['attr_98', 'attr_99'] + keys,
291 root2.attrib.keys())
292 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
293 root2.attrib.values())
294
295 self.assertEqual(keys, root.attrib.keys())
296 self.assertEqual(values, root.attrib.values())
297
299
300
301 Element = self.etree.Element
302 root = Element("root")
303 self.assertRaises(TypeError, root.set, "newattr", 5)
304 self.assertRaises(TypeError, root.set, "newattr", object)
305 self.assertRaises(TypeError, root.set, "newattr", None)
306 self.assertRaises(TypeError, root.set, "newattr")
307
321
343
345 XML = self.etree.XML
346 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
347
348 root = XML(xml)
349 self.etree.strip_elements(root, 'a')
350 self.assertEqual(_bytes('<test><x></x></test>'),
351 self._writeElement(root))
352
353 root = XML(xml)
354 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
355 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
356 self._writeElement(root))
357
358 root = XML(xml)
359 self.etree.strip_elements(root, 'c')
360 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
361 self._writeElement(root))
362
364 XML = self.etree.XML
365 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
366
367 root = XML(xml)
368 self.etree.strip_elements(root, 'a')
369 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
370 self._writeElement(root))
371
372 root = XML(xml)
373 self.etree.strip_elements(root, '{urn:a}b', 'c')
374 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
375 self._writeElement(root))
376
377 root = XML(xml)
378 self.etree.strip_elements(root, '{urn:a}*', 'c')
379 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
380 self._writeElement(root))
381
382 root = XML(xml)
383 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
384 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
385 self._writeElement(root))
386
405
431
458
485
504
517
528
534
536 XML = self.etree.XML
537 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
538 self.assertEqual(root[0].target, "mypi")
539 self.assertEqual(root[0].get('my'), "1")
540 self.assertEqual(root[0].get('test'), " abc ")
541 self.assertEqual(root[0].get('quotes'), "' '")
542 self.assertEqual(root[0].get('only'), None)
543 self.assertEqual(root[0].get('names'), None)
544 self.assertEqual(root[0].get('nope'), None)
545
547 XML = self.etree.XML
548 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
549 self.assertEqual(root[0].target, "mypi")
550 self.assertEqual(root[0].attrib['my'], "1")
551 self.assertEqual(root[0].attrib['test'], " abc ")
552 self.assertEqual(root[0].attrib['quotes'], "' '")
553 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
554 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
555 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
556
558
559 ProcessingInstruction = self.etree.ProcessingInstruction
560
561 a = ProcessingInstruction("PI", "ONE")
562 b = copy.deepcopy(a)
563 b.text = "ANOTHER"
564
565 self.assertEqual('ONE', a.text)
566 self.assertEqual('ANOTHER', b.text)
567
583
598
609
621
640
645
658
669
670 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
671 events = list(iterparse(f, events=('end', 'comment')))
672 root = events[-1][1]
673 self.assertEqual(6, len(events))
674 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
675 [ name(*item) for item in events ])
676 self.assertEqual(
677 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
678 tostring(root))
679
691
692 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
693 events = list(iterparse(f, events=('end', 'pi')))
694 root = events[-2][1]
695 self.assertEqual(8, len(events))
696 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
697 ('pid','d'), 'a', ('pie','e')],
698 [ name(*item) for item in events ])
699 self.assertEqual(
700 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
701 tostring(ElementTree(root)))
702
717
723
725 iterparse = self.etree.iterparse
726 f = BytesIO('<a><b><c/></a>')
727 it = iterparse(f, events=('start', 'end'), recover=True)
728 events = [(ev, el.tag) for ev, el in it]
729 root = it.root
730 self.assertTrue(root is not None)
731
732 self.assertEqual(1, events.count(('start', 'a')))
733 self.assertEqual(1, events.count(('end', 'a')))
734
735 self.assertEqual(1, events.count(('start', 'b')))
736 self.assertEqual(1, events.count(('end', 'b')))
737
738 self.assertEqual(1, events.count(('start', 'c')))
739 self.assertEqual(1, events.count(('end', 'c')))
740
742 iterparse = self.etree.iterparse
743 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
744 it = iterparse(f, events=('start', 'end'), recover=True)
745 events = [(ev, el.tag) for ev, el in it]
746 root = it.root
747 self.assertTrue(root is not None)
748
749 self.assertEqual(1, events.count(('start', 'a')))
750 self.assertEqual(1, events.count(('end', 'a')))
751
752 self.assertEqual(2, events.count(('start', 'b')))
753 self.assertEqual(2, events.count(('end', 'b')))
754
755 self.assertEqual(2, events.count(('start', 'c')))
756 self.assertEqual(2, events.count(('end', 'c')))
757
759 iterparse = self.etree.iterparse
760 f = BytesIO("""
761 <a> \n \n <b> b test </b> \n
762
763 \n\t <c> \n </c> </a> \n """)
764 iterator = iterparse(f, remove_blank_text=True)
765 text = [ (element.text, element.tail)
766 for event, element in iterator ]
767 self.assertEqual(
768 [(" b test ", None), (" \n ", None), (None, None)],
769 text)
770
772 iterparse = self.etree.iterparse
773 f = BytesIO('<a><b><d/></b><c/></a>')
774
775 iterator = iterparse(f, tag="b", events=('start', 'end'))
776 events = list(iterator)
777 root = iterator.root
778 self.assertEqual(
779 [('start', root[0]), ('end', root[0])],
780 events)
781
783 iterparse = self.etree.iterparse
784 f = BytesIO('<a><b><d/></b><c/></a>')
785
786 iterator = iterparse(f, tag="*", events=('start', 'end'))
787 events = list(iterator)
788 self.assertEqual(
789 8,
790 len(events))
791
793 iterparse = self.etree.iterparse
794 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
795
796 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
797 events = list(iterator)
798 root = iterator.root
799 self.assertEqual(
800 [('start', root[0]), ('end', root[0])],
801 events)
802
804 iterparse = self.etree.iterparse
805 f = BytesIO('<a><b><d/></b><c/></a>')
806 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
807 events = list(iterator)
808 root = iterator.root
809 self.assertEqual(
810 [('start', root[0]), ('end', root[0])],
811 events)
812
813 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
814 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
815 events = list(iterator)
816 root = iterator.root
817 self.assertEqual([], events)
818
820 iterparse = self.etree.iterparse
821 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
822 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
823 events = list(iterator)
824 self.assertEqual(8, len(events))
825
827 iterparse = self.etree.iterparse
828 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
829 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
830 events = list(iterator)
831 self.assertEqual([], events)
832
833 f = BytesIO('<a><b><d/></b><c/></a>')
834 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
835 events = list(iterator)
836 self.assertEqual(8, len(events))
837
839 text = _str('Søk på nettet')
840 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
841 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
842 ).encode('iso-8859-1')
843
844 self.assertRaises(self.etree.ParseError,
845 list, self.etree.iterparse(BytesIO(xml_latin1)))
846
848 text = _str('Søk på nettet', encoding="UTF-8")
849 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
850 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
851 ).encode('iso-8859-1')
852
853 iterator = self.etree.iterparse(BytesIO(xml_latin1),
854 encoding="iso-8859-1")
855 self.assertEqual(1, len(list(iterator)))
856
857 a = iterator.root
858 self.assertEqual(a.text, text)
859
861 tostring = self.etree.tostring
862 f = BytesIO('<root><![CDATA[test]]></root>')
863 context = self.etree.iterparse(f, strip_cdata=False)
864 content = [ el.text for event,el in context ]
865
866 self.assertEqual(['test'], content)
867 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
868 tostring(context.root))
869
873
878
897
898
899
922
923
924
926 assertEqual = self.assertEqual
927 assertFalse = self.assertFalse
928
929 events = []
930 class Target(object):
931 def start(self, tag, attrib):
932 events.append("start")
933 assertFalse(attrib)
934 assertEqual("TAG", tag)
935 def end(self, tag):
936 events.append("end")
937 assertEqual("TAG", tag)
938 def close(self):
939 return "DONE"
940
941 parser = self.etree.XMLParser(target=Target())
942 tree = self.etree.ElementTree()
943
944 self.assertRaises(TypeError,
945 tree.parse, BytesIO("<TAG/>"), parser=parser)
946 self.assertEqual(["start", "end"], events)
947
949
950 events = []
951 class Target(object):
952 def start(self, tag, attrib):
953 events.append("start-" + tag)
954 def end(self, tag):
955 events.append("end-" + tag)
956 if tag == 'a':
957 raise ValueError("dead and gone")
958 def data(self, data):
959 events.append("data-" + data)
960 def close(self):
961 events.append("close")
962 return "DONE"
963
964 parser = self.etree.XMLParser(target=Target())
965
966 try:
967 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
968 done = parser.close()
969 self.fail("error expected, but parsing succeeded")
970 except ValueError:
971 done = 'value error received as expected'
972
973 self.assertEqual(["start-root", "data-A", "start-a",
974 "data-ca", "end-a", "close"],
975 events)
976
978
979 events = []
980 class Target(object):
981 def start(self, tag, attrib):
982 events.append("start-" + tag)
983 def end(self, tag):
984 events.append("end-" + tag)
985 if tag == 'a':
986 raise ValueError("dead and gone")
987 def data(self, data):
988 events.append("data-" + data)
989 def close(self):
990 events.append("close")
991 return "DONE"
992
993 parser = self.etree.XMLParser(target=Target())
994
995 try:
996 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
997 parser=parser)
998 self.fail("error expected, but parsing succeeded")
999 except ValueError:
1000 done = 'value error received as expected'
1001
1002 self.assertEqual(["start-root", "data-A", "start-a",
1003 "data-ca", "end-a", "close"],
1004 events)
1005
1007
1008 events = []
1009 class Target(object):
1010 def start(self, tag, attrib):
1011 events.append("start-" + tag)
1012 def end(self, tag):
1013 events.append("end-" + tag)
1014 def data(self, data):
1015 events.append("data-" + data)
1016 def comment(self, text):
1017 events.append("comment-" + text)
1018 def close(self):
1019 return "DONE"
1020
1021 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1022
1023 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1024 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1025 done = parser.close()
1026
1027 self.assertEqual("DONE", done)
1028 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1029 "start-sub", "end-sub", "data-B", "end-root"],
1030 events)
1031
1037 def end(self, tag):
1038 events.append("end-" + tag)
1039 def data(self, data):
1040 events.append("data-" + data)
1041 def comment(self, text):
1042 events.append("comment-" + text)
1043 def close(self):
1044 return "DONE"
1045
1046 parser = self.etree.XMLParser(target=Target())
1047
1048 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1049 done = parser.close()
1050
1051 self.assertEqual("DONE", done)
1052 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1053 "start-sub", "end-sub", "comment-c", "data-B",
1054 "end-root", "comment-d"],
1055 events)
1056
1058 events = []
1059 class Target(object):
1060 def start(self, tag, attrib):
1061 events.append("start-" + tag)
1062 def end(self, tag):
1063 events.append("end-" + tag)
1064 def data(self, data):
1065 events.append("data-" + data)
1066 def pi(self, target, data):
1067 events.append("pi-" + target + "-" + data)
1068 def close(self):
1069 return "DONE"
1070
1071 parser = self.etree.XMLParser(target=Target())
1072
1073 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1074 done = parser.close()
1075
1076 self.assertEqual("DONE", done)
1077 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1078 "data-B", "end-root", "pi-test-c"],
1079 events)
1080
1082 events = []
1083 class Target(object):
1084 def start(self, tag, attrib):
1085 events.append("start-" + tag)
1086 def end(self, tag):
1087 events.append("end-" + tag)
1088 def data(self, data):
1089 events.append("data-" + data)
1090 def close(self):
1091 return "DONE"
1092
1093 parser = self.etree.XMLParser(target=Target(),
1094 strip_cdata=False)
1095
1096 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1097 done = parser.close()
1098
1099 self.assertEqual("DONE", done)
1100 self.assertEqual(["start-root", "data-A", "start-a",
1101 "data-ca", "end-a", "data-B", "end-root"],
1102 events)
1103
1105 events = []
1106 class Target(object):
1107 def start(self, tag, attrib):
1108 events.append("start-" + tag)
1109 def end(self, tag):
1110 events.append("end-" + tag)
1111 def data(self, data):
1112 events.append("data-" + data)
1113 def close(self):
1114 events.append("close")
1115 return "DONE"
1116
1117 parser = self.etree.XMLParser(target=Target(),
1118 recover=True)
1119
1120 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1121 done = parser.close()
1122
1123 self.assertEqual("DONE", done)
1124 self.assertEqual(["start-root", "data-A", "start-a",
1125 "data-ca", "end-a", "data-B",
1126 "end-root", "close"],
1127 events)
1128
1138
1148
1157
1167
1169 iterwalk = self.etree.iterwalk
1170 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1171
1172 iterator = iterwalk(root, events=('start','end'))
1173 events = list(iterator)
1174 self.assertEqual(
1175 [('start', root), ('start', root[0]), ('end', root[0]),
1176 ('start', root[1]), ('end', root[1]), ('end', root)],
1177 events)
1178
1189
1191 iterwalk = self.etree.iterwalk
1192 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1193
1194 attr_name = '{testns}bla'
1195 events = []
1196 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1197 for event, elem in iterator:
1198 events.append(event)
1199 if event == 'start':
1200 if elem.tag != '{ns1}a':
1201 elem.set(attr_name, 'value')
1202
1203 self.assertEqual(
1204 ['start-ns', 'start', 'start', 'start-ns', 'start',
1205 'end', 'end-ns', 'end', 'end', 'end-ns'],
1206 events)
1207
1208 self.assertEqual(
1209 None,
1210 root.get(attr_name))
1211 self.assertEqual(
1212 'value',
1213 root[0].get(attr_name))
1214
1227
1229 iterwalk = self.etree.iterwalk
1230 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1231
1232 iterator = iterwalk(root, events=('start', 'end'))
1233 tags = []
1234 for event, elem in iterator:
1235 tags.append((event, elem.tag))
1236 if elem.tag in ('b', 'e'):
1237
1238 iterator.skip_subtree()
1239
1240 self.assertEqual(
1241 [('start', 'a'),
1242 ('start', 'b'), ('end', 'b'),
1243 ('start', 'd'),
1244 ('start', 'e'), ('end', 'e'),
1245 ('end', 'd'),
1246 ('end', 'a')],
1247 tags)
1248
1250 iterwalk = self.etree.iterwalk
1251 root = self.etree.XML(_bytes(
1252 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1253
1254 events = []
1255 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1256 for event, elem in iterator:
1257 if event in ('start-ns', 'end-ns'):
1258 events.append((event, elem))
1259 if event == 'start-ns' and elem == ('', 'nsb'):
1260 events.append('skip')
1261 iterator.skip_subtree()
1262 else:
1263 events.append((event, elem.tag))
1264
1265 self.assertEqual(
1266 [('start-ns', ('', 'ns1')),
1267 ('start', '{ns1}a'),
1268 ('start-ns', ('', 'nsb')),
1269 'skip',
1270 ('start', '{nsb}b'),
1271 ('end-ns', None),
1272 ('start-ns', ('', 'ns2')),
1273 ('start', '{ns2}d'),
1274 ('start', '{ns2}e'),
1275 ('end-ns', None),
1276 ('end-ns', None)
1277 ],
1278 events)
1279
1290
1292 parse = self.etree.parse
1293 parser = self.etree.XMLParser(dtd_validation=True)
1294 assertEqual = self.assertEqual
1295 test_url = _str("__nosuch.dtd")
1296
1297 class MyResolver(self.etree.Resolver):
1298 def resolve(self, url, id, context):
1299 assertEqual(url, test_url)
1300 return self.resolve_string(
1301 _str('''<!ENTITY myentity "%s">
1302 <!ELEMENT doc ANY>''') % url, context)
1303
1304 parser.resolvers.add(MyResolver())
1305
1306 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1307 tree = parse(StringIO(xml), parser)
1308 root = tree.getroot()
1309 self.assertEqual(root.text, test_url)
1310
1312 parse = self.etree.parse
1313 parser = self.etree.XMLParser(dtd_validation=True)
1314 assertEqual = self.assertEqual
1315 test_url = _str("__nosuch.dtd")
1316
1317 class MyResolver(self.etree.Resolver):
1318 def resolve(self, url, id, context):
1319 assertEqual(url, test_url)
1320 return self.resolve_string(
1321 (_str('''<!ENTITY myentity "%s">
1322 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1323 context)
1324
1325 parser.resolvers.add(MyResolver())
1326
1327 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1328 tree = parse(StringIO(xml), parser)
1329 root = tree.getroot()
1330 self.assertEqual(root.text, test_url)
1331
1333 parse = self.etree.parse
1334 parser = self.etree.XMLParser(dtd_validation=True)
1335 assertEqual = self.assertEqual
1336 test_url = _str("__nosuch.dtd")
1337
1338 class MyResolver(self.etree.Resolver):
1339 def resolve(self, url, id, context):
1340 assertEqual(url, test_url)
1341 return self.resolve_file(
1342 SillyFileLike(
1343 _str('''<!ENTITY myentity "%s">
1344 <!ELEMENT doc ANY>''') % url), context)
1345
1346 parser.resolvers.add(MyResolver())
1347
1348 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1349 tree = parse(StringIO(xml), parser)
1350 root = tree.getroot()
1351 self.assertEqual(root.text, test_url)
1352
1354 parse = self.etree.parse
1355 parser = self.etree.XMLParser(attribute_defaults=True)
1356 assertEqual = self.assertEqual
1357 test_url = _str("__nosuch.dtd")
1358
1359 class MyResolver(self.etree.Resolver):
1360 def resolve(self, url, id, context):
1361 assertEqual(url, test_url)
1362 return self.resolve_filename(
1363 fileInTestDir('test.dtd'), context)
1364
1365 parser.resolvers.add(MyResolver())
1366
1367 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1368 tree = parse(StringIO(xml), parser)
1369 root = tree.getroot()
1370 self.assertEqual(
1371 root.attrib, {'default': 'valueA'})
1372 self.assertEqual(
1373 root[0].attrib, {'default': 'valueB'})
1374
1389
1390 parser.resolvers.add(MyResolver())
1391
1392 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1393 tree = parse(StringIO(xml), parser,
1394 base_url=fileUrlInTestDir('__test.xml'))
1395 root = tree.getroot()
1396 self.assertEqual(
1397 root.attrib, {'default': 'valueA'})
1398 self.assertEqual(
1399 root[0].attrib, {'default': 'valueB'})
1400
1402 parse = self.etree.parse
1403 parser = self.etree.XMLParser(attribute_defaults=True)
1404 assertEqual = self.assertEqual
1405 test_url = _str("__nosuch.dtd")
1406
1407 class MyResolver(self.etree.Resolver):
1408 def resolve(self, url, id, context):
1409 assertEqual(url, test_url)
1410 return self.resolve_file(
1411 open(fileInTestDir('test.dtd'), 'rb'), context)
1412
1413 parser.resolvers.add(MyResolver())
1414
1415 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1416 tree = parse(StringIO(xml), parser)
1417 root = tree.getroot()
1418 self.assertEqual(
1419 root.attrib, {'default': 'valueA'})
1420 self.assertEqual(
1421 root[0].attrib, {'default': 'valueB'})
1422
1424 parse = self.etree.parse
1425 parser = self.etree.XMLParser(load_dtd=True)
1426 assertEqual = self.assertEqual
1427 test_url = _str("__nosuch.dtd")
1428
1429 class check(object):
1430 resolved = False
1431
1432 class MyResolver(self.etree.Resolver):
1433 def resolve(self, url, id, context):
1434 assertEqual(url, test_url)
1435 check.resolved = True
1436 return self.resolve_empty(context)
1437
1438 parser.resolvers.add(MyResolver())
1439
1440 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1441 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1442 self.assertTrue(check.resolved)
1443
1450
1451 class MyResolver(self.etree.Resolver):
1452 def resolve(self, url, id, context):
1453 raise _LocalException
1454
1455 parser.resolvers.add(MyResolver())
1456
1457 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1458 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1459
1460 if etree.LIBXML_VERSION > (2,6,20):
1477
1479 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1480 <root>
1481 <child1/>
1482 <child2/>
1483 <child3> </child3>
1484 </root>''')
1485
1486 parser = self.etree.XMLParser(resolve_entities=False)
1487 root = etree.fromstring(xml, parser)
1488 self.assertEqual([ el.tag for el in root ],
1489 ['child1', 'child2', 'child3'])
1490
1491 root[0] = root[-1]
1492 self.assertEqual([ el.tag for el in root ],
1493 ['child3', 'child2'])
1494 self.assertEqual(root[0][0].text, ' ')
1495 self.assertEqual(root[0][0].name, 'nbsp')
1496
1512
1519
1521 Entity = self.etree.Entity
1522 self.assertRaises(ValueError, Entity, 'a b c')
1523 self.assertRaises(ValueError, Entity, 'a,b')
1524 self.assertRaises(ValueError, Entity, 'a\0b')
1525 self.assertRaises(ValueError, Entity, '#abc')
1526 self.assertRaises(ValueError, Entity, '#xxyz')
1527
1540
1561
1574
1586
1595
1604
1605
1615
1624
1626 Element = self.etree.Element
1627 SubElement = self.etree.SubElement
1628 root = Element('root')
1629 self.assertRaises(ValueError, root.append, root)
1630 child = SubElement(root, 'child')
1631 self.assertRaises(ValueError, child.append, root)
1632 child2 = SubElement(child, 'child2')
1633 self.assertRaises(ValueError, child2.append, root)
1634 self.assertRaises(ValueError, child2.append, child)
1635 self.assertEqual('child2', root[0][0].tag)
1636
1649
1662
1673
1684
1694
1704
1720
1736
1742
1757
1770
1785
1798
1813
1826
1841
1854
1855
1863
1864
1874
1875
1890
1891
1901
1902
1913
1940
1941
1943 self.assertRaises(TypeError, self.etree.dump, None)
1944
1957
1970
1991
2000
2002 XML = self.etree.XML
2003
2004 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2005 result = []
2006 for el in root.iterchildren(reversed=True):
2007 result.append(el.tag)
2008 self.assertEqual(['three', 'two', 'one'], result)
2009
2018
2027
2036
2038 XML = self.etree.XML
2039
2040 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2041 result = []
2042 for el in root.iterchildren(tag=['two', 'three']):
2043 result.append(el.text)
2044 self.assertEqual(['Two', 'Bla', None], result)
2045
2047 XML = self.etree.XML
2048
2049 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2050 result = []
2051 for el in root.iterchildren('two', 'three'):
2052 result.append(el.text)
2053 self.assertEqual(['Two', 'Bla', None], result)
2054
2056 XML = self.etree.XML
2057
2058 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2059 result = []
2060 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2061 result.append(el.text)
2062 self.assertEqual([None, 'Bla', 'Two'], result)
2063
2084
2106
2108 Element = self.etree.Element
2109 SubElement = self.etree.SubElement
2110
2111 a = Element('a')
2112 b = SubElement(a, 'b')
2113 c = SubElement(a, 'c')
2114 d = SubElement(b, 'd')
2115 self.assertEqual(
2116 [b, a],
2117 list(d.iterancestors(tag=('a', 'b'))))
2118 self.assertEqual(
2119 [b, a],
2120 list(d.iterancestors('a', 'b')))
2121
2122 self.assertEqual(
2123 [],
2124 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2125 self.assertEqual(
2126 [],
2127 list(d.iterancestors('w', 'x', 'y', 'z')))
2128
2129 self.assertEqual(
2130 [],
2131 list(d.iterancestors(tag=('d', 'x'))))
2132 self.assertEqual(
2133 [],
2134 list(d.iterancestors('d', 'x')))
2135
2136 self.assertEqual(
2137 [b, a],
2138 list(d.iterancestors(tag=('b', '*'))))
2139 self.assertEqual(
2140 [b, a],
2141 list(d.iterancestors('b', '*')))
2142
2143 self.assertEqual(
2144 [b],
2145 list(d.iterancestors(tag=('b', 'c'))))
2146 self.assertEqual(
2147 [b],
2148 list(d.iterancestors('b', 'c')))
2149
2166
2168 Element = self.etree.Element
2169 SubElement = self.etree.SubElement
2170
2171 a = Element('a')
2172 b = SubElement(a, 'b')
2173 c = SubElement(a, 'c')
2174 d = SubElement(b, 'd')
2175 e = SubElement(c, 'e')
2176
2177 self.assertEqual(
2178 [],
2179 list(a.iterdescendants('a')))
2180 self.assertEqual(
2181 [],
2182 list(a.iterdescendants(tag='a')))
2183
2184 a2 = SubElement(e, 'a')
2185 self.assertEqual(
2186 [a2],
2187 list(a.iterdescendants('a')))
2188
2189 self.assertEqual(
2190 [a2],
2191 list(c.iterdescendants('a')))
2192 self.assertEqual(
2193 [a2],
2194 list(c.iterdescendants(tag='a')))
2195
2197 Element = self.etree.Element
2198 SubElement = self.etree.SubElement
2199
2200 a = Element('a')
2201 b = SubElement(a, 'b')
2202 c = SubElement(a, 'c')
2203 d = SubElement(b, 'd')
2204 e = SubElement(c, 'e')
2205
2206 self.assertEqual(
2207 [b, e],
2208 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2209 self.assertEqual(
2210 [b, e],
2211 list(a.iterdescendants('a', 'b', 'e')))
2212
2213 a2 = SubElement(e, 'a')
2214 self.assertEqual(
2215 [b, a2],
2216 list(a.iterdescendants(tag=('a', 'b'))))
2217 self.assertEqual(
2218 [b, a2],
2219 list(a.iterdescendants('a', 'b')))
2220
2221 self.assertEqual(
2222 [],
2223 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2224 self.assertEqual(
2225 [],
2226 list(c.iterdescendants('x', 'y', 'z')))
2227
2228 self.assertEqual(
2229 [b, d, c, e, a2],
2230 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2231 self.assertEqual(
2232 [b, d, c, e, a2],
2233 list(a.iterdescendants('x', 'y', 'z', '*')))
2234
2252
2269
2287
2311
2313 Element = self.etree.Element
2314 SubElement = self.etree.SubElement
2315
2316 a = Element('a')
2317 b = SubElement(a, 'b')
2318 c = SubElement(a, 'c')
2319 d = SubElement(b, 'd')
2320 self.assertEqual(
2321 [],
2322 list(a.itersiblings(tag='XXX')))
2323 self.assertEqual(
2324 [c],
2325 list(b.itersiblings(tag='c')))
2326 self.assertEqual(
2327 [c],
2328 list(b.itersiblings(tag='*')))
2329 self.assertEqual(
2330 [b],
2331 list(c.itersiblings(preceding=True, tag='b')))
2332 self.assertEqual(
2333 [],
2334 list(c.itersiblings(preceding=True, tag='c')))
2335
2337 Element = self.etree.Element
2338 SubElement = self.etree.SubElement
2339
2340 a = Element('a')
2341 b = SubElement(a, 'b')
2342 c = SubElement(a, 'c')
2343 d = SubElement(b, 'd')
2344 e = SubElement(a, 'e')
2345 self.assertEqual(
2346 [],
2347 list(a.itersiblings(tag=('XXX', 'YYY'))))
2348 self.assertEqual(
2349 [c, e],
2350 list(b.itersiblings(tag=('c', 'd', 'e'))))
2351 self.assertEqual(
2352 [b],
2353 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2354 self.assertEqual(
2355 [c, b],
2356 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2357
2359 parseid = self.etree.parseid
2360 XML = self.etree.XML
2361 xml_text = _bytes('''
2362 <!DOCTYPE document [
2363 <!ELEMENT document (h1,p)*>
2364 <!ELEMENT h1 (#PCDATA)>
2365 <!ATTLIST h1 myid ID #REQUIRED>
2366 <!ELEMENT p (#PCDATA)>
2367 <!ATTLIST p someid ID #REQUIRED>
2368 ]>
2369 <document>
2370 <h1 myid="chapter1">...</h1>
2371 <p id="note1" class="note">...</p>
2372 <p>Regular paragraph.</p>
2373 <p xml:id="xmlid">XML:ID paragraph.</p>
2374 <p someid="warn1" class="warning">...</p>
2375 </document>
2376 ''')
2377
2378 tree, dic = parseid(BytesIO(xml_text))
2379 root = tree.getroot()
2380 root2 = XML(xml_text)
2381 self.assertEqual(self._writeElement(root),
2382 self._writeElement(root2))
2383 expected = {
2384 "chapter1" : root[0],
2385 "xmlid" : root[3],
2386 "warn1" : root[4]
2387 }
2388 self.assertTrue("chapter1" in dic)
2389 self.assertTrue("warn1" in dic)
2390 self.assertTrue("xmlid" in dic)
2391 self._checkIDDict(dic, expected)
2392
2394 XMLDTDID = self.etree.XMLDTDID
2395 XML = self.etree.XML
2396 xml_text = _bytes('''
2397 <!DOCTYPE document [
2398 <!ELEMENT document (h1,p)*>
2399 <!ELEMENT h1 (#PCDATA)>
2400 <!ATTLIST h1 myid ID #REQUIRED>
2401 <!ELEMENT p (#PCDATA)>
2402 <!ATTLIST p someid ID #REQUIRED>
2403 ]>
2404 <document>
2405 <h1 myid="chapter1">...</h1>
2406 <p id="note1" class="note">...</p>
2407 <p>Regular paragraph.</p>
2408 <p xml:id="xmlid">XML:ID paragraph.</p>
2409 <p someid="warn1" class="warning">...</p>
2410 </document>
2411 ''')
2412
2413 root, dic = XMLDTDID(xml_text)
2414 root2 = XML(xml_text)
2415 self.assertEqual(self._writeElement(root),
2416 self._writeElement(root2))
2417 expected = {
2418 "chapter1" : root[0],
2419 "xmlid" : root[3],
2420 "warn1" : root[4]
2421 }
2422 self.assertTrue("chapter1" in dic)
2423 self.assertTrue("warn1" in dic)
2424 self.assertTrue("xmlid" in dic)
2425 self._checkIDDict(dic, expected)
2426
2428 XMLDTDID = self.etree.XMLDTDID
2429 XML = self.etree.XML
2430 xml_text = _bytes('''
2431 <document>
2432 <h1 myid="chapter1">...</h1>
2433 <p id="note1" class="note">...</p>
2434 <p>Regular paragraph.</p>
2435 <p someid="warn1" class="warning">...</p>
2436 </document>
2437 ''')
2438
2439 root, dic = XMLDTDID(xml_text)
2440 root2 = XML(xml_text)
2441 self.assertEqual(self._writeElement(root),
2442 self._writeElement(root2))
2443 expected = {}
2444 self._checkIDDict(dic, expected)
2445
2447 XMLDTDID = self.etree.XMLDTDID
2448 XML = self.etree.XML
2449 xml_text = _bytes('''
2450 <!DOCTYPE document [
2451 <!ELEMENT document (h1,p)*>
2452 <!ELEMENT h1 (#PCDATA)>
2453 <!ATTLIST h1 myid ID #REQUIRED>
2454 <!ELEMENT p (#PCDATA)>
2455 <!ATTLIST p someid ID #REQUIRED>
2456 ]>
2457 <document>
2458 <h1 myid="chapter1">...</h1>
2459 <p id="note1" class="note">...</p>
2460 <p>Regular paragraph.</p>
2461 <p xml:id="xmlid">XML:ID paragraph.</p>
2462 <p someid="warn1" class="warning">...</p>
2463 </document>
2464 ''')
2465
2466 parser = etree.XMLParser(collect_ids=False)
2467 root, dic = XMLDTDID(xml_text, parser=parser)
2468 root2 = XML(xml_text)
2469 self.assertEqual(self._writeElement(root),
2470 self._writeElement(root2))
2471 self.assertFalse(dic)
2472 self._checkIDDict(dic, {})
2473
2475 self.assertEqual(len(dic),
2476 len(expected))
2477 self.assertEqual(sorted(dic.items()),
2478 sorted(expected.items()))
2479 if sys.version_info < (3,):
2480 self.assertEqual(sorted(dic.iteritems()),
2481 sorted(expected.iteritems()))
2482 self.assertEqual(sorted(dic.keys()),
2483 sorted(expected.keys()))
2484 if sys.version_info < (3,):
2485 self.assertEqual(sorted(dic.iterkeys()),
2486 sorted(expected.iterkeys()))
2487 if sys.version_info < (3,):
2488 self.assertEqual(sorted(dic.values()),
2489 sorted(expected.values()))
2490 self.assertEqual(sorted(dic.itervalues()),
2491 sorted(expected.itervalues()))
2492
2494 etree = self.etree
2495
2496 r = {'foo': 'http://ns.infrae.com/foo'}
2497 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2498 self.assertEqual(
2499 'foo',
2500 e.prefix)
2501 self.assertEqual(
2502 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2503 self._writeElement(e))
2504
2506 etree = self.etree
2507
2508 r = {None: 'http://ns.infrae.com/foo'}
2509 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2510 self.assertEqual(
2511 None,
2512 e.prefix)
2513 self.assertEqual(
2514 '{http://ns.infrae.com/foo}bar',
2515 e.tag)
2516 self.assertEqual(
2517 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2518 self._writeElement(e))
2519
2521 etree = self.etree
2522
2523 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2524 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2525 self.assertEqual(None, e.prefix)
2526 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2527 self.assertEqual(
2528 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2529 self._writeElement(e))
2530
2532 etree = self.etree
2533
2534 r = {None: 'http://ns.infrae.com/foo',
2535 'hoi': 'http://ns.infrae.com/hoi'}
2536 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2537 e.set('{http://ns.infrae.com/hoi}test', 'value')
2538 self.assertEqual(
2539 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2540 self._writeElement(e))
2541
2543 etree = self.etree
2544
2545 root = etree.Element('{http://test/ns}root',
2546 nsmap={None: 'http://test/ns'})
2547 sub = etree.Element('{http://test/ns}sub',
2548 nsmap={'test': 'http://test/ns'})
2549
2550 sub.attrib['{http://test/ns}attr'] = 'value'
2551 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2552 self.assertEqual(
2553 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2554 etree.tostring(sub))
2555
2556 root.append(sub)
2557 self.assertEqual(
2558 _bytes('<root xmlns="http://test/ns">'
2559 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2560 '</root>'),
2561 etree.tostring(root))
2562
2564 etree = self.etree
2565
2566 root = etree.Element('root')
2567 sub = etree.Element('{http://test/ns}sub',
2568 nsmap={'test': 'http://test/ns'})
2569
2570 sub.attrib['{http://test/ns}attr'] = 'value'
2571 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2572 self.assertEqual(
2573 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2574 etree.tostring(sub))
2575
2576 root.append(sub)
2577 self.assertEqual(
2578 _bytes('<root>'
2579 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2580 '</root>'),
2581 etree.tostring(root))
2582
2584 etree = self.etree
2585
2586 root = etree.Element('root')
2587 sub = etree.Element('{http://test/ns}sub',
2588 nsmap={None: 'http://test/ns'})
2589
2590 sub.attrib['{http://test/ns}attr'] = 'value'
2591 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2592 self.assertEqual(
2593 _bytes('<sub xmlns="http://test/ns" '
2594 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2595 etree.tostring(sub))
2596
2597 root.append(sub)
2598 self.assertEqual(
2599 _bytes('<root>'
2600 '<sub xmlns="http://test/ns"'
2601 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2602 '</root>'),
2603 etree.tostring(root))
2604
2606 etree = self.etree
2607
2608 root = etree.Element('{http://test/ns}root',
2609 nsmap={'test': 'http://test/ns',
2610 None: 'http://test/ns'})
2611 sub = etree.Element('{http://test/ns}sub',
2612 nsmap={None: 'http://test/ns'})
2613
2614 sub.attrib['{http://test/ns}attr'] = 'value'
2615 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2616 self.assertEqual(
2617 _bytes('<sub xmlns="http://test/ns" '
2618 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2619 etree.tostring(sub))
2620
2621 root.append(sub)
2622 self.assertEqual(
2623 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2624 '<test:sub test:attr="value"/>'
2625 '</test:root>'),
2626 etree.tostring(root))
2627
2629 etree = self.etree
2630 r = {None: 'http://ns.infrae.com/foo',
2631 'hoi': 'http://ns.infrae.com/hoi'}
2632 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2633 tree = etree.ElementTree(element=e)
2634 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2635 self.assertEqual(
2636 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2637 self._writeElement(e))
2638
2640 etree = self.etree
2641
2642 r = {None: 'http://ns.infrae.com/foo'}
2643 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2644 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2645
2646 e1.append(e2)
2647
2648 self.assertEqual(
2649 None,
2650 e1.prefix)
2651 self.assertEqual(
2652 None,
2653 e1[0].prefix)
2654 self.assertEqual(
2655 '{http://ns.infrae.com/foo}bar',
2656 e1.tag)
2657 self.assertEqual(
2658 '{http://ns.infrae.com/foo}bar',
2659 e1[0].tag)
2660
2662 etree = self.etree
2663
2664 r = {None: 'http://ns.infrae.com/BAR'}
2665 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2666 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2667
2668 e1.append(e2)
2669
2670 self.assertEqual(
2671 None,
2672 e1.prefix)
2673 self.assertNotEqual(
2674 None,
2675 e2.prefix)
2676 self.assertEqual(
2677 '{http://ns.infrae.com/BAR}bar',
2678 e1.tag)
2679 self.assertEqual(
2680 '{http://ns.infrae.com/foo}bar',
2681 e2.tag)
2682
2684 ns_href = "http://a.b.c"
2685 one = self.etree.fromstring(
2686 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2687 baz = one[0][0]
2688
2689 two = self.etree.fromstring(
2690 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2691 two.append(baz)
2692 del one
2693
2694 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2695 self.assertEqual(
2696 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2697 self.etree.tostring(two))
2698
2712
2729
2740
2742 xml = ('<root>' +
2743 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2744 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2745 root = self.etree.fromstring(xml)
2746 self.assertEqual(xml, self.etree.tostring(root))
2747 self.etree.cleanup_namespaces(root)
2748 self.assertEqual(
2749 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2750 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2751 self.etree.tostring(root))
2752
2754 xml = ('<root>' +
2755 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2756 '<n64:x xmlns:a="A" a:attr="X"/>' +
2757 '</a>'*100 +
2758 '</root>').encode('utf8')
2759 root = self.etree.fromstring(xml)
2760 self.assertEqual(xml, self.etree.tostring(root))
2761 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2762 self.assertEqual(
2763 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2764 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2765 self.etree.tostring(root))
2766
2768 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2769 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2770 '<foo>foo:bar</foo>'
2771 '</root>').encode('utf8')
2772 root = self.etree.fromstring(xml)
2773 self.assertEqual(xml, self.etree.tostring(root))
2774 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2775 self.assertEqual(
2776 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2777 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2778 b'<foo>foo:bar</foo>'
2779 b'</root>',
2780 self.etree.tostring(root))
2781
2783 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2784 '<sub xmlns:foo="FOO">'
2785 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2786 '<foo>foo:bar</foo>'
2787 '</sub>'
2788 '</root>').encode('utf8')
2789 root = self.etree.fromstring(xml)
2790 self.assertEqual(xml, self.etree.tostring(root))
2791 self.etree.cleanup_namespaces(
2792 root,
2793 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2794 keep_ns_prefixes=['foo'])
2795 self.assertEqual(
2796 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2797 b'<sub>'
2798 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2799 b'<foo>foo:bar</foo>'
2800 b'</sub>'
2801 b'</root>',
2802 self.etree.tostring(root))
2803
2805 etree = self.etree
2806
2807 r = {None: 'http://ns.infrae.com/foo',
2808 'hoi': 'http://ns.infrae.com/hoi'}
2809 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2810 self.assertEqual(
2811 r,
2812 e.nsmap)
2813
2815 etree = self.etree
2816
2817 re = {None: 'http://ns.infrae.com/foo',
2818 'hoi': 'http://ns.infrae.com/hoi'}
2819 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2820
2821 rs = {None: 'http://ns.infrae.com/honk',
2822 'top': 'http://ns.infrae.com/top'}
2823 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2824
2825 r = re.copy()
2826 r.update(rs)
2827 self.assertEqual(re, e.nsmap)
2828 self.assertEqual(r, s.nsmap)
2829
2831 etree = self.etree
2832 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2833 self.assertEqual({'hha': None}, el.nsmap)
2834
2836 Element = self.etree.Element
2837 SubElement = self.etree.SubElement
2838
2839 a = Element('a')
2840 b = SubElement(a, 'b')
2841 c = SubElement(a, 'c')
2842 d = SubElement(b, 'd')
2843 e = SubElement(c, 'e')
2844 f = SubElement(c, 'f')
2845
2846 self.assertEqual(
2847 [a, b],
2848 list(a.getiterator('a', 'b')))
2849 self.assertEqual(
2850 [],
2851 list(a.getiterator('x', 'y')))
2852 self.assertEqual(
2853 [a, f],
2854 list(a.getiterator('f', 'a')))
2855 self.assertEqual(
2856 [c, e, f],
2857 list(c.getiterator('c', '*', 'a')))
2858 self.assertEqual(
2859 [],
2860 list(a.getiterator( (), () )))
2861
2863 Element = self.etree.Element
2864 SubElement = self.etree.SubElement
2865
2866 a = Element('a')
2867 b = SubElement(a, 'b')
2868 c = SubElement(a, 'c')
2869 d = SubElement(b, 'd')
2870 e = SubElement(c, 'e')
2871 f = SubElement(c, 'f')
2872
2873 self.assertEqual(
2874 [a, b],
2875 list(a.getiterator( ('a', 'b') )))
2876 self.assertEqual(
2877 [],
2878 list(a.getiterator( ('x', 'y') )))
2879 self.assertEqual(
2880 [a, f],
2881 list(a.getiterator( ('f', 'a') )))
2882 self.assertEqual(
2883 [c, e, f],
2884 list(c.getiterator( ('c', '*', 'a') )))
2885 self.assertEqual(
2886 [],
2887 list(a.getiterator( () )))
2888
2890 Element = self.etree.Element
2891 SubElement = self.etree.SubElement
2892
2893 a = Element('{a}a')
2894 b = SubElement(a, '{a}b')
2895 c = SubElement(a, '{a}c')
2896 d = SubElement(b, '{b}d')
2897 e = SubElement(c, '{a}e')
2898 f = SubElement(c, '{b}f')
2899 g = SubElement(c, 'g')
2900
2901 self.assertEqual(
2902 [a],
2903 list(a.getiterator('{a}a')))
2904 self.assertEqual(
2905 [],
2906 list(a.getiterator('{b}a')))
2907 self.assertEqual(
2908 [],
2909 list(a.getiterator('a')))
2910 self.assertEqual(
2911 [a,b,d,c,e,f,g],
2912 list(a.getiterator('*')))
2913 self.assertEqual(
2914 [f],
2915 list(c.getiterator('{b}*')))
2916 self.assertEqual(
2917 [d, f],
2918 list(a.getiterator('{b}*')))
2919 self.assertEqual(
2920 [g],
2921 list(a.getiterator('g')))
2922 self.assertEqual(
2923 [g],
2924 list(a.getiterator('{}g')))
2925 self.assertEqual(
2926 [g],
2927 list(a.getiterator('{}*')))
2928
2930 Element = self.etree.Element
2931 SubElement = self.etree.SubElement
2932
2933 a = Element('{a}a')
2934 b = SubElement(a, '{nsA}b')
2935 c = SubElement(b, '{nsB}b')
2936 d = SubElement(a, 'b')
2937 e = SubElement(a, '{nsA}e')
2938 f = SubElement(e, '{nsB}e')
2939 g = SubElement(e, 'e')
2940
2941 self.assertEqual(
2942 [b, c, d],
2943 list(a.getiterator('{*}b')))
2944 self.assertEqual(
2945 [e, f, g],
2946 list(a.getiterator('{*}e')))
2947 self.assertEqual(
2948 [a, b, c, d, e, f, g],
2949 list(a.getiterator('{*}*')))
2950
2975
2991
3008
3010 a = etree.Element("a")
3011 b = etree.SubElement(a, "b")
3012 c = etree.SubElement(a, "c")
3013 d1 = etree.SubElement(c, "d")
3014 d2 = etree.SubElement(c, "d")
3015 c.text = d1.text = 'TEXT'
3016
3017 tree = etree.ElementTree(a)
3018 self.assertEqual('.', tree.getelementpath(a))
3019 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3020 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3021
3022 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3023 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3024
3025 tree = etree.ElementTree(c)
3026 self.assertEqual('.', tree.getelementpath(c))
3027 self.assertEqual('d[2]', tree.getelementpath(d2))
3028 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3029
3030 tree = etree.ElementTree(b)
3031 self.assertEqual('.', tree.getelementpath(b))
3032 self.assertRaises(ValueError, tree.getelementpath, a)
3033 self.assertRaises(ValueError, tree.getelementpath, c)
3034 self.assertRaises(ValueError, tree.getelementpath, d2)
3035
3037 a = etree.Element("{http://ns1/}a")
3038 b = etree.SubElement(a, "{http://ns1/}b")
3039 c = etree.SubElement(a, "{http://ns1/}c")
3040 d1 = etree.SubElement(c, "{http://ns1/}d")
3041 d2 = etree.SubElement(c, "{http://ns2/}d")
3042 d3 = etree.SubElement(c, "{http://ns1/}d")
3043
3044 tree = etree.ElementTree(a)
3045 self.assertEqual('.', tree.getelementpath(a))
3046 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3047 tree.getelementpath(d1))
3048 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3049 tree.getelementpath(d2))
3050 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3051 tree.getelementpath(d3))
3052
3053 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3054 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3055 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3056 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3057 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3058 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3059
3060 tree = etree.ElementTree(c)
3061 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3062 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3063 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3064 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3065 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3066 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3067
3068 tree = etree.ElementTree(b)
3069 self.assertRaises(ValueError, tree.getelementpath, d1)
3070 self.assertRaises(ValueError, tree.getelementpath, d2)
3071
3078
3085
3094
3096 XML = self.etree.XML
3097 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3098 self.assertEqual(len(root.findall(".//{X}b")), 2)
3099 self.assertEqual(len(root.findall(".//{X}*")), 2)
3100 self.assertEqual(len(root.findall(".//b")), 3)
3101
3103 XML = self.etree.XML
3104 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3105 nsmap = {'xx': 'X'}
3106 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3107 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3108 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3109 nsmap = {'xx': 'Y'}
3110 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3111 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3112 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3113
3115 XML = self.etree.XML
3116 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3117 nsmap = {'xx': 'X'}
3118 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3119 nsmap = {'xx': 'X', None: 'Y'}
3120 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3121 nsmap = {'xx': 'X', '': 'Y'}
3122 self.assertRaises(ValueError, root.findall, ".//xx:b", namespaces=nsmap)
3123
3130
3132 etree = self.etree
3133 e = etree.Element('foo')
3134 for i in range(10):
3135 etree.SubElement(e, 'a%s' % i)
3136 for i in range(10):
3137 self.assertEqual(
3138 i,
3139 e.index(e[i]))
3140 self.assertEqual(
3141 3, e.index(e[3], 3))
3142 self.assertRaises(
3143 ValueError, e.index, e[3], 4)
3144 self.assertRaises(
3145 ValueError, e.index, e[3], 0, 2)
3146 self.assertRaises(
3147 ValueError, e.index, e[8], 0, -3)
3148 self.assertRaises(
3149 ValueError, e.index, e[8], -5, -3)
3150 self.assertEqual(
3151 8, e.index(e[8], 0, -1))
3152 self.assertEqual(
3153 8, e.index(e[8], -12, -1))
3154 self.assertEqual(
3155 0, e.index(e[0], -12, -1))
3156
3158 etree = self.etree
3159 e = etree.Element('foo')
3160 for i in range(10):
3161 el = etree.SubElement(e, 'a%s' % i)
3162 el.text = "text%d" % i
3163 el.tail = "tail%d" % i
3164
3165 child0 = e[0]
3166 child1 = e[1]
3167 child2 = e[2]
3168
3169 e.replace(e[0], e[1])
3170 self.assertEqual(
3171 9, len(e))
3172 self.assertEqual(
3173 child1, e[0])
3174 self.assertEqual(
3175 child1.text, "text1")
3176 self.assertEqual(
3177 child1.tail, "tail1")
3178 self.assertEqual(
3179 child0.tail, "tail0")
3180 self.assertEqual(
3181 child2, e[1])
3182
3183 e.replace(e[-1], e[0])
3184 self.assertEqual(
3185 child1, e[-1])
3186 self.assertEqual(
3187 child1.text, "text1")
3188 self.assertEqual(
3189 child1.tail, "tail1")
3190 self.assertEqual(
3191 child2, e[0])
3192
3194 etree = self.etree
3195 e = etree.Element('foo')
3196 for i in range(10):
3197 etree.SubElement(e, 'a%s' % i)
3198
3199 new_element = etree.Element("test")
3200 new_element.text = "TESTTEXT"
3201 new_element.tail = "TESTTAIL"
3202 child1 = e[1]
3203 e.replace(e[0], new_element)
3204 self.assertEqual(
3205 new_element, e[0])
3206 self.assertEqual(
3207 "TESTTEXT",
3208 e[0].text)
3209 self.assertEqual(
3210 "TESTTAIL",
3211 e[0].tail)
3212 self.assertEqual(
3213 child1, e[1])
3214
3230
3248
3266
3284
3286 Element = self.etree.Element
3287 SubElement = self.etree.SubElement
3288 try:
3289 slice
3290 except NameError:
3291 print("slice() not found")
3292 return
3293
3294 a = Element('a')
3295 b = SubElement(a, 'b')
3296 c = SubElement(a, 'c')
3297 d = SubElement(a, 'd')
3298 e = SubElement(a, 'e')
3299
3300 x = Element('x')
3301 y = Element('y')
3302 z = Element('z')
3303
3304 self.assertRaises(
3305 ValueError,
3306 operator.setitem, a, slice(1,None,2), [x, y, z])
3307
3308 self.assertEqual(
3309 [b, c, d, e],
3310 list(a))
3311
3324
3326 XML = self.etree.XML
3327 root = XML(_bytes(
3328 '<?xml version="1.0"?>\n'
3329 '<root>' + '\n' * 65536 +
3330 '<p>' + '\n' * 65536 + '</p>\n' +
3331 '<br/>\n'
3332 '</root>'))
3333
3334 if self.etree.LIBXML_VERSION >= (2, 9):
3335 expected = [2, 131074, 131076]
3336 else:
3337 expected = [2, 65535, 65535]
3338
3339 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3340
3348
3357
3367
3377
3383
3391
3397
3404
3410
3412 etree = self.etree
3413 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3414 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3415 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3416 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3417
3418 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3419
3420 tree = etree.parse(BytesIO(xml))
3421 docinfo = tree.docinfo
3422 self.assertEqual(docinfo.encoding, "ascii")
3423 self.assertEqual(docinfo.xml_version, "1.0")
3424 self.assertEqual(docinfo.public_id, pub_id)
3425 self.assertEqual(docinfo.system_url, sys_id)
3426 self.assertEqual(docinfo.root_name, 'html')
3427 self.assertEqual(docinfo.doctype, doctype_string)
3428
3444
3456
3468
3474
3476 etree = self.etree
3477 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3478 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3479 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3480
3481 xml = _bytes('<!DOCTYPE root>\n<root/>')
3482 tree = etree.parse(BytesIO(xml))
3483 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3484 etree.tostring(tree, doctype=doctype_string))
3485
3487 etree = self.etree
3488 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3489 self.assertEqual(root.base, "http://no/such/url")
3490 self.assertEqual(
3491 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3492 root.base = "https://secret/url"
3493 self.assertEqual(root.base, "https://secret/url")
3494 self.assertEqual(
3495 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3496 "https://secret/url")
3497
3499 etree = self.etree
3500 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3501 self.assertEqual(root.base, "http://no/such/url")
3502 self.assertEqual(
3503 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3504 root.set('{http://www.w3.org/XML/1998/namespace}base',
3505 "https://secret/url")
3506 self.assertEqual(root.base, "https://secret/url")
3507 self.assertEqual(
3508 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3509 "https://secret/url")
3510
3516
3521
3528
3542
3544 Element = self.etree.Element
3545
3546 a = Element('a')
3547 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3548 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3549
3550 self.assertRaises(ValueError, Element, 'ha\0ho')
3551
3553 Element = self.etree.Element
3554
3555 a = Element('a')
3556 self.assertRaises(ValueError, setattr, a, "text",
3557 _str('ha\0ho'))
3558 self.assertRaises(ValueError, setattr, a, "tail",
3559 _str('ha\0ho'))
3560
3561 self.assertRaises(ValueError, Element,
3562 _str('ha\0ho'))
3563
3565 Element = self.etree.Element
3566
3567 a = Element('a')
3568 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3569 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3570
3571 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3572 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3573
3574 self.assertRaises(ValueError, Element, 'ha\x07ho')
3575 self.assertRaises(ValueError, Element, 'ha\x02ho')
3576
3578 Element = self.etree.Element
3579
3580 a = Element('a')
3581 self.assertRaises(ValueError, setattr, a, "text",
3582 _str('ha\x07ho'))
3583 self.assertRaises(ValueError, setattr, a, "text",
3584 _str('ha\x02ho'))
3585
3586 self.assertRaises(ValueError, setattr, a, "tail",
3587 _str('ha\x07ho'))
3588 self.assertRaises(ValueError, setattr, a, "tail",
3589 _str('ha\x02ho'))
3590
3591 self.assertRaises(ValueError, Element,
3592 _str('ha\x07ho'))
3593 self.assertRaises(ValueError, Element,
3594 _str('ha\x02ho'))
3595
3597 Element = self.etree.Element
3598
3599 a = Element('a')
3600 self.assertRaises(ValueError, setattr, a, "text",
3601 _str('ha\u1234\x07ho'))
3602 self.assertRaises(ValueError, setattr, a, "text",
3603 _str('ha\u1234\x02ho'))
3604
3605 self.assertRaises(ValueError, setattr, a, "tail",
3606 _str('ha\u1234\x07ho'))
3607 self.assertRaises(ValueError, setattr, a, "tail",
3608 _str('ha\u1234\x02ho'))
3609
3610 self.assertRaises(ValueError, Element,
3611 _str('ha\u1234\x07ho'))
3612 self.assertRaises(ValueError, Element,
3613 _str('ha\u1234\x02ho'))
3614
3628
3633
3651
3671
3673 tostring = self.etree.tostring
3674 html = self.etree.fromstring(
3675 '<html><body>'
3676 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3677 '</body></html>',
3678 parser=self.etree.HTMLParser())
3679 self.assertEqual(html.tag, 'html')
3680 div = html.find('.//div')
3681 self.assertEqual(div.tail, '\r\n')
3682 result = tostring(div, method='html')
3683 self.assertEqual(
3684 result,
3685 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3686 result = tostring(div, method='html', with_tail=True)
3687 self.assertEqual(
3688 result,
3689 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3690 result = tostring(div, method='html', with_tail=False)
3691 self.assertEqual(
3692 result,
3693 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3694
3716
3718 tostring = self.etree.tostring
3719 XML = self.etree.XML
3720 ElementTree = self.etree.ElementTree
3721
3722 root = XML(_bytes("<root/>"))
3723
3724 tree = ElementTree(root)
3725 self.assertEqual(None, tree.docinfo.standalone)
3726
3727 result = tostring(root, xml_declaration=True, encoding="ASCII")
3728 self.assertEqual(result, _bytes(
3729 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3730
3731 result = tostring(root, xml_declaration=True, encoding="ASCII",
3732 standalone=True)
3733 self.assertEqual(result, _bytes(
3734 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3735
3736 tree = ElementTree(XML(result))
3737 self.assertEqual(True, tree.docinfo.standalone)
3738
3739 result = tostring(root, xml_declaration=True, encoding="ASCII",
3740 standalone=False)
3741 self.assertEqual(result, _bytes(
3742 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3743
3744 tree = ElementTree(XML(result))
3745 self.assertEqual(False, tree.docinfo.standalone)
3746
3766
3768 tostring = self.etree.tostring
3769 Element = self.etree.Element
3770 SubElement = self.etree.SubElement
3771
3772 a = Element('a')
3773 a.text = "A"
3774 a.tail = "tail"
3775 b = SubElement(a, 'b')
3776 b.text = "B"
3777 b.tail = _str("Søk på nettet")
3778 c = SubElement(a, 'c')
3779 c.text = "C"
3780
3781 result = tostring(a, method="text", encoding="UTF-16")
3782
3783 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3784 result)
3785
3787 tostring = self.etree.tostring
3788 Element = self.etree.Element
3789 SubElement = self.etree.SubElement
3790
3791 a = Element('a')
3792 a.text = _str('Søk på nettetA')
3793 a.tail = "tail"
3794 b = SubElement(a, 'b')
3795 b.text = "B"
3796 b.tail = _str('Søk på nettetB')
3797 c = SubElement(a, 'c')
3798 c.text = "C"
3799
3800 self.assertRaises(UnicodeEncodeError,
3801 tostring, a, method="text")
3802
3803 self.assertEqual(
3804 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3805 tostring(a, encoding="UTF-8", method="text"))
3806
3819
3835
3839
3854
3872
3885
3887 tostring = self.etree.tostring
3888 Element = self.etree.Element
3889 SubElement = self.etree.SubElement
3890
3891 a = Element('a')
3892 b = SubElement(a, 'b')
3893 c = SubElement(a, 'c')
3894 d = SubElement(c, 'd')
3895 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3896 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3897 self.assertEqual(_bytes('<b></b>'),
3898 canonicalize(tostring(b, encoding=_unicode)))
3899 self.assertEqual(_bytes('<c><d></d></c>'),
3900 canonicalize(tostring(c, encoding=_unicode)))
3901
3906
3921
3923 tostring = self.etree.tostring
3924 Element = self.etree.Element
3925 SubElement = self.etree.SubElement
3926
3927 a = Element('a')
3928 b = SubElement(a, 'b')
3929 c = SubElement(a, 'c')
3930
3931 result = tostring(a, encoding=_unicode)
3932 self.assertEqual(result, "<a><b/><c/></a>")
3933
3934 result = tostring(a, encoding=_unicode, pretty_print=False)
3935 self.assertEqual(result, "<a><b/><c/></a>")
3936
3937 result = tostring(a, encoding=_unicode, pretty_print=True)
3938 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3939
3951
3953 class SubEl(etree.ElementBase):
3954 pass
3955
3956 el1 = SubEl()
3957 el2 = SubEl()
3958 self.assertEqual('SubEl', el1.tag)
3959 self.assertEqual('SubEl', el2.tag)
3960 el1.other = el2
3961 el2.other = el1
3962
3963 del el1, el2
3964 gc.collect()
3965
3966
3980
3982 root = etree.Element('parent')
3983 c1 = etree.SubElement(root, 'child1')
3984 c2 = etree.SubElement(root, 'child2')
3985
3986 root.remove(c1)
3987 root.remove(c2)
3988 c1.addnext(c2)
3989 c1.tail = 'abc'
3990 c2.tail = 'xyz'
3991 del c1
3992
3993 c2.getprevious()
3994
3995 self.assertEqual('child1', c2.getprevious().tag)
3996 self.assertEqual('abc', c2.getprevious().tail)
3997
3998
3999
4000 - def _writeElement(self, element, encoding='us-ascii', compression=0):
4011
4056
4057 res_instance = res()
4058 parser = etree.XMLParser(load_dtd = True)
4059 parser.resolvers.add(res_instance)
4060
4061 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4062 parser = parser)
4063
4064 self.include(tree)
4065
4066 called = list(res_instance.called.items())
4067 called.sort()
4068 self.assertEqual(
4069 [("dtd", True), ("include", True), ("input", True)],
4070 called)
4071
4073 data = textwrap.dedent('''
4074 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4075 <foo/>
4076 <xi:include href="./test.xml" />
4077 </doc>
4078 ''')
4079
4080 class Resolver(etree.Resolver):
4081 called = {}
4082
4083 def resolve(self, url, id, context):
4084 if url.endswith("test_xinclude.xml"):
4085 assert not self.called.get("input")
4086 self.called["input"] = True
4087 return None
4088 elif url.endswith('/test5.xml'):
4089 assert not self.called.get("DONE")
4090 self.called["DONE"] = True
4091 return self.resolve_string('<DONE/>', context)
4092 else:
4093 _, filename = url.rsplit('/', 1)
4094 assert not self.called.get(filename)
4095 self.called[filename] = True
4096 next_data = data.replace(
4097 'test.xml', 'test%d.xml' % len(self.called))
4098 return self.resolve_string(next_data, context)
4099
4100 res_instance = Resolver()
4101 parser = etree.XMLParser(load_dtd=True)
4102 parser.resolvers.add(res_instance)
4103
4104 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4105 parser=parser)
4106
4107 self.include(tree)
4108
4109 called = list(res_instance.called.items())
4110 called.sort()
4111 self.assertEqual(
4112 [("DONE", True), ("input", True), ("test.xml", True),
4113 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4114 called)
4115
4120
4126
4130 tree = self.parse(_bytes('<a><b/></a>'))
4131 f = BytesIO()
4132 tree.write_c14n(f)
4133 s = f.getvalue()
4134 self.assertEqual(_bytes('<a><b></b></a>'),
4135 s)
4136
4138 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4139 f = BytesIO()
4140 tree.write_c14n(f, compression=9)
4141 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4142 s = gzfile.read()
4143 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4144 s)
4145
4153
4162
4180
4192
4204
4206 tree = self.parse(_bytes(
4207 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4208 f = BytesIO()
4209 tree.write_c14n(f)
4210 s = f.getvalue()
4211 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4212 s)
4213 f = BytesIO()
4214 tree.write_c14n(f, exclusive=False)
4215 s = f.getvalue()
4216 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4217 s)
4218 f = BytesIO()
4219 tree.write_c14n(f, exclusive=True)
4220 s = f.getvalue()
4221 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4222 s)
4223
4224 f = BytesIO()
4225 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4226 s = f.getvalue()
4227 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4228 s)
4229
4231 tree = self.parse(_bytes(
4232 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4233 s = etree.tostring(tree, method='c14n')
4234 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4235 s)
4236 s = etree.tostring(tree, method='c14n', exclusive=False)
4237 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4238 s)
4239 s = etree.tostring(tree, method='c14n', exclusive=True)
4240 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4241 s)
4242
4243 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4244 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4245 s)
4246
4248 tree = self.parse(_bytes(
4249 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4250 s = etree.tostring(tree.getroot(), method='c14n')
4251 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4252 s)
4253 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4254 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4255 s)
4256 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4257 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4258 s)
4259
4260 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4261 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4262 s)
4263 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4264 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4265 s)
4266
4267 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4268 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4269 s)
4270
4272 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4273 tree = self.parse(_bytes(
4274 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4275
4276 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4277 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4278 s)
4279
4283 tree = self.parse(_bytes('<a><b/></a>'))
4284 f = BytesIO()
4285 tree.write(f)
4286 s = f.getvalue()
4287 self.assertEqual(_bytes('<a><b/></a>'),
4288 s)
4289
4291 tree = self.parse(_bytes('<a><b/></a>'))
4292 f = BytesIO()
4293 tree.write(f, doctype='HUHU')
4294 s = f.getvalue()
4295 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
4296 s)
4297
4299 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4300 f = BytesIO()
4301 tree.write(f, compression=9)
4302 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4303 s = gzfile.read()
4304 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4305 s)
4306
4308 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4309 f = BytesIO()
4310 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
4311 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4312 s = gzfile.read()
4313 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
4314 s)
4315
4317 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4318 f = BytesIO()
4319 tree.write(f, compression=0)
4320 s0 = f.getvalue()
4321
4322 f = BytesIO()
4323 tree.write(f)
4324 self.assertEqual(f.getvalue(), s0)
4325
4326 f = BytesIO()
4327 tree.write(f, compression=1)
4328 s = f.getvalue()
4329 self.assertTrue(len(s) <= len(s0))
4330 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4331 s1 = gzfile.read()
4332
4333 f = BytesIO()
4334 tree.write(f, compression=9)
4335 s = f.getvalue()
4336 self.assertTrue(len(s) <= len(s0))
4337 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4338 s9 = gzfile.read()
4339
4340 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4341 s0)
4342 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4343 s1)
4344 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4345 s9)
4346
4354
4363
4371
4380
4383 etree = etree
4384
4406
4408 """This can't really be tested as long as there isn't a way to
4409 reset the logging setup ...
4410 """
4411 parse = self.etree.parse
4412
4413 messages = []
4414 class Logger(self.etree.PyErrorLog):
4415 def log(self, entry, message, *args):
4416 messages.append(message)
4417
4418 self.etree.use_global_python_log(Logger())
4419 f = BytesIO('<a><b></c></b></a>')
4420 try:
4421 parse(f)
4422 except SyntaxError:
4423 pass
4424 f.close()
4425
4426 self.assertTrue([ message for message in messages
4427 if 'mismatch' in message ])
4428 self.assertTrue([ message for message in messages
4429 if ':PARSER:' in message])
4430 self.assertTrue([ message for message in messages
4431 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4432 self.assertTrue([ message for message in messages
4433 if ':1:15:' in message ])
4434
4437 etree = etree
4438
4442
4444 class Target(object):
4445 def start(self, tag, attrib):
4446 return 'start(%s)' % tag
4447 def end(self, tag):
4448 return 'end(%s)' % tag
4449 def close(self):
4450 return 'close()'
4451
4452 parser = self.etree.XMLPullParser(target=Target())
4453 events = parser.read_events()
4454
4455 parser.feed('<root><element>')
4456 self.assertFalse(list(events))
4457 self.assertFalse(list(events))
4458 parser.feed('</element><child>')
4459 self.assertEqual([('end', 'end(element)')], list(events))
4460 parser.feed('</child>')
4461 self.assertEqual([('end', 'end(child)')], list(events))
4462 parser.feed('</root>')
4463 self.assertEqual([('end', 'end(root)')], list(events))
4464 self.assertFalse(list(events))
4465 self.assertEqual('close()', parser.close())
4466
4468 class Target(object):
4469 def start(self, tag, attrib):
4470 return 'start(%s)' % tag
4471 def end(self, tag):
4472 return 'end(%s)' % tag
4473 def close(self):
4474 return 'close()'
4475
4476 parser = self.etree.XMLPullParser(
4477 ['start', 'end'], target=Target())
4478 events = parser.read_events()
4479
4480 parser.feed('<root><element>')
4481 self.assertEqual(
4482 [('start', 'start(root)'), ('start', 'start(element)')],
4483 list(events))
4484 self.assertFalse(list(events))
4485 parser.feed('</element><child>')
4486 self.assertEqual(
4487 [('end', 'end(element)'), ('start', 'start(child)')],
4488 list(events))
4489 parser.feed('</child>')
4490 self.assertEqual(
4491 [('end', 'end(child)')],
4492 list(events))
4493 parser.feed('</root>')
4494 self.assertEqual(
4495 [('end', 'end(root)')],
4496 list(events))
4497 self.assertFalse(list(events))
4498 self.assertEqual('close()', parser.close())
4499
4501 parser = self.etree.XMLPullParser(
4502 ['start', 'end'], target=etree.TreeBuilder())
4503 events = parser.read_events()
4504
4505 parser.feed('<root><element>')
4506 self.assert_event_tags(
4507 events, [('start', 'root'), ('start', 'element')])
4508 self.assertFalse(list(events))
4509 parser.feed('</element><child>')
4510 self.assert_event_tags(
4511 events, [('end', 'element'), ('start', 'child')])
4512 parser.feed('</child>')
4513 self.assert_event_tags(
4514 events, [('end', 'child')])
4515 parser.feed('</root>')
4516 self.assert_event_tags(
4517 events, [('end', 'root')])
4518 self.assertFalse(list(events))
4519 root = parser.close()
4520 self.assertEqual('root', root.tag)
4521
4523 class Target(etree.TreeBuilder):
4524 def end(self, tag):
4525 el = super(Target, self).end(tag)
4526 el.tag += '-huhu'
4527 return el
4528
4529 parser = self.etree.XMLPullParser(
4530 ['start', 'end'], target=Target())
4531 events = parser.read_events()
4532
4533 parser.feed('<root><element>')
4534 self.assert_event_tags(
4535 events, [('start', 'root'), ('start', 'element')])
4536 self.assertFalse(list(events))
4537 parser.feed('</element><child>')
4538 self.assert_event_tags(
4539 events, [('end', 'element-huhu'), ('start', 'child')])
4540 parser.feed('</child>')
4541 self.assert_event_tags(
4542 events, [('end', 'child-huhu')])
4543 parser.feed('</root>')
4544 self.assert_event_tags(
4545 events, [('end', 'root-huhu')])
4546 self.assertFalse(list(events))
4547 root = parser.close()
4548 self.assertEqual('root-huhu', root.tag)
4549
4581
4582
4583 if __name__ == '__main__':
4584 print('to test use test.py %s' % __file__)
4585