1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 import os.path
13 import unittest
14 import copy
15 import sys
16 import re
17 import gc
18 import operator
19 import tempfile
20 import textwrap
21 import zlib
22 import gzip
23
24 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
25 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
26 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
27 from .common_imports import canonicalize, sorted, _str, _bytes
28
29 print("")
30 print("TESTED VERSION: %s" % etree.__version__)
31 print(" Python: " + repr(sys.version_info))
32 print(" lxml.etree: " + repr(etree.LXML_VERSION))
33 print(" libxml used: " + repr(etree.LIBXML_VERSION))
34 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
35 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
36 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
37 print("")
38
39 try:
40 _unicode = unicode
41 except NameError:
42
43 _unicode = str
44
45
47 """Tests only for etree, not ElementTree"""
48 etree = etree
49
60
69
77
84
86 Element = self.etree.Element
87 el = Element('name')
88 self.assertRaises(ValueError, Element, '{}')
89 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
90
91 self.assertRaises(ValueError, Element, '{test}')
92 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
93
101
103 Element = self.etree.Element
104 self.assertRaises(ValueError, Element, "p'name")
105 self.assertRaises(ValueError, Element, 'p"name')
106
107 self.assertRaises(ValueError, Element, "{test}p'name")
108 self.assertRaises(ValueError, Element, '{test}p"name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
112 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
113
115 Element = self.etree.Element
116 self.assertRaises(ValueError, Element, ' name ')
117 self.assertRaises(ValueError, Element, 'na me')
118 self.assertRaises(ValueError, Element, '{test} name')
119
120 el = Element('name')
121 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
122
130
138
140 Element = self.etree.Element
141 SubElement = self.etree.SubElement
142
143 el = Element('name')
144 self.assertRaises(ValueError, SubElement, el, "p'name")
145 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
146
147 self.assertRaises(ValueError, SubElement, el, 'p"name')
148 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
149
158
167
169 QName = self.etree.QName
170 self.assertRaises(ValueError, QName, '')
171 self.assertRaises(ValueError, QName, 'test', '')
172
174 QName = self.etree.QName
175 self.assertRaises(ValueError, QName, 'p:name')
176 self.assertRaises(ValueError, QName, 'test', 'p:name')
177
179 QName = self.etree.QName
180 self.assertRaises(ValueError, QName, ' name ')
181 self.assertRaises(ValueError, QName, 'na me')
182 self.assertRaises(ValueError, QName, 'test', ' name')
183
191
193
194 QName = self.etree.QName
195 qname1 = QName('http://myns', 'a')
196 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
197
198 qname2 = QName(a)
199 self.assertEqual(a.tag, qname1.text)
200 self.assertEqual(qname1.text, qname2.text)
201 self.assertEqual(qname1, qname2)
202
204
205 etree = self.etree
206 qname = etree.QName('http://myns', 'a')
207 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
208 a.text = qname
209
210 self.assertEqual("p:a", a.text)
211
220
235
241
251
263
265 Element = self.etree.Element
266
267 keys = ["attr%d" % i for i in range(10)]
268 values = ["TEST-%d" % i for i in range(10)]
269 items = list(zip(keys, values))
270
271 root = Element("root")
272 for key, value in items:
273 root.set(key, value)
274 self.assertEqual(keys, root.attrib.keys())
275 self.assertEqual(values, root.attrib.values())
276
277 root2 = Element("root2", root.attrib,
278 attr_99='TOAST-1', attr_98='TOAST-2')
279 self.assertEqual(['attr_98', 'attr_99'] + keys,
280 root2.attrib.keys())
281 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
282 root2.attrib.values())
283
284 self.assertEqual(keys, root.attrib.keys())
285 self.assertEqual(values, root.attrib.values())
286
294
308
330
332 XML = self.etree.XML
333 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
334
335 root = XML(xml)
336 self.etree.strip_elements(root, 'a')
337 self.assertEqual(_bytes('<test><x></x></test>'),
338 self._writeElement(root))
339
340 root = XML(xml)
341 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
342 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
343 self._writeElement(root))
344
345 root = XML(xml)
346 self.etree.strip_elements(root, 'c')
347 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
348 self._writeElement(root))
349
351 XML = self.etree.XML
352 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
353
354 root = XML(xml)
355 self.etree.strip_elements(root, 'a')
356 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
357 self._writeElement(root))
358
359 root = XML(xml)
360 self.etree.strip_elements(root, '{urn:a}b', 'c')
361 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
362 self._writeElement(root))
363
364 root = XML(xml)
365 self.etree.strip_elements(root, '{urn:a}*', 'c')
366 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
367 self._writeElement(root))
368
369 root = XML(xml)
370 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
371 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
372 self._writeElement(root))
373
392
418
445
472
491
504
515
521
523 XML = self.etree.XML
524 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
525 self.assertEqual(root[0].target, "mypi")
526 self.assertEqual(root[0].get('my'), "1")
527 self.assertEqual(root[0].get('test'), " abc ")
528 self.assertEqual(root[0].get('quotes'), "' '")
529 self.assertEqual(root[0].get('only'), None)
530 self.assertEqual(root[0].get('names'), None)
531 self.assertEqual(root[0].get('nope'), None)
532
534 XML = self.etree.XML
535 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
536 self.assertEqual(root[0].target, "mypi")
537 self.assertEqual(root[0].attrib['my'], "1")
538 self.assertEqual(root[0].attrib['test'], " abc ")
539 self.assertEqual(root[0].attrib['quotes'], "' '")
540 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
541 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
542 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
543
545
546 ProcessingInstruction = self.etree.ProcessingInstruction
547
548 a = ProcessingInstruction("PI", "ONE")
549 b = copy.deepcopy(a)
550 b.text = "ANOTHER"
551
552 self.assertEqual('ONE', a.text)
553 self.assertEqual('ANOTHER', b.text)
554
570
585
596
608
627
632
645
656
657 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
658 events = list(iterparse(f, events=('end', 'comment')))
659 root = events[-1][1]
660 self.assertEqual(6, len(events))
661 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
662 [ name(*item) for item in events ])
663 self.assertEqual(
664 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
665 tostring(root))
666
678
679 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
680 events = list(iterparse(f, events=('end', 'pi')))
681 root = events[-2][1]
682 self.assertEqual(8, len(events))
683 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
684 ('pid','d'), 'a', ('pie','e')],
685 [ name(*item) for item in events ])
686 self.assertEqual(
687 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
688 tostring(ElementTree(root)))
689
704
710
712 iterparse = self.etree.iterparse
713 f = BytesIO('<a><b><c/></a>')
714 it = iterparse(f, events=('start', 'end'), recover=True)
715 events = [(ev, el.tag) for ev, el in it]
716 root = it.root
717 self.assertTrue(root is not None)
718
719 self.assertEqual(1, events.count(('start', 'a')))
720 self.assertEqual(1, events.count(('end', 'a')))
721
722 self.assertEqual(1, events.count(('start', 'b')))
723 self.assertEqual(1, events.count(('end', 'b')))
724
725 self.assertEqual(1, events.count(('start', 'c')))
726 self.assertEqual(1, events.count(('end', 'c')))
727
729 iterparse = self.etree.iterparse
730 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
731 it = iterparse(f, events=('start', 'end'), recover=True)
732 events = [(ev, el.tag) for ev, el in it]
733 root = it.root
734 self.assertTrue(root is not None)
735
736 self.assertEqual(1, events.count(('start', 'a')))
737 self.assertEqual(1, events.count(('end', 'a')))
738
739 self.assertEqual(2, events.count(('start', 'b')))
740 self.assertEqual(2, events.count(('end', 'b')))
741
742 self.assertEqual(2, events.count(('start', 'c')))
743 self.assertEqual(2, events.count(('end', 'c')))
744
746 iterparse = self.etree.iterparse
747 f = BytesIO("""
748 <a> \n \n <b> b test </b> \n
749
750 \n\t <c> \n </c> </a> \n """)
751 iterator = iterparse(f, remove_blank_text=True)
752 text = [ (element.text, element.tail)
753 for event, element in iterator ]
754 self.assertEqual(
755 [(" b test ", None), (" \n ", None), (None, None)],
756 text)
757
759 iterparse = self.etree.iterparse
760 f = BytesIO('<a><b><d/></b><c/></a>')
761
762 iterator = iterparse(f, tag="b", events=('start', 'end'))
763 events = list(iterator)
764 root = iterator.root
765 self.assertEqual(
766 [('start', root[0]), ('end', root[0])],
767 events)
768
770 iterparse = self.etree.iterparse
771 f = BytesIO('<a><b><d/></b><c/></a>')
772
773 iterator = iterparse(f, tag="*", events=('start', 'end'))
774 events = list(iterator)
775 self.assertEqual(
776 8,
777 len(events))
778
780 iterparse = self.etree.iterparse
781 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
782
783 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
784 events = list(iterator)
785 root = iterator.root
786 self.assertEqual(
787 [('start', root[0]), ('end', root[0])],
788 events)
789
791 iterparse = self.etree.iterparse
792 f = BytesIO('<a><b><d/></b><c/></a>')
793 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
794 events = list(iterator)
795 root = iterator.root
796 self.assertEqual(
797 [('start', root[0]), ('end', root[0])],
798 events)
799
800 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
801 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
802 events = list(iterator)
803 root = iterator.root
804 self.assertEqual([], events)
805
807 iterparse = self.etree.iterparse
808 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
809 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
810 events = list(iterator)
811 self.assertEqual(8, len(events))
812
814 iterparse = self.etree.iterparse
815 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
816 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
817 events = list(iterator)
818 self.assertEqual([], events)
819
820 f = BytesIO('<a><b><d/></b><c/></a>')
821 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
822 events = list(iterator)
823 self.assertEqual(8, len(events))
824
826 text = _str('Søk på nettet')
827 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
828 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
829 ).encode('iso-8859-1')
830
831 self.assertRaises(self.etree.ParseError,
832 list, self.etree.iterparse(BytesIO(xml_latin1)))
833
835 text = _str('Søk på nettet', encoding="UTF-8")
836 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
837 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
838 ).encode('iso-8859-1')
839
840 iterator = self.etree.iterparse(BytesIO(xml_latin1),
841 encoding="iso-8859-1")
842 self.assertEqual(1, len(list(iterator)))
843
844 a = iterator.root
845 self.assertEqual(a.text, text)
846
848 tostring = self.etree.tostring
849 f = BytesIO('<root><![CDATA[test]]></root>')
850 context = self.etree.iterparse(f, strip_cdata=False)
851 content = [ el.text for event,el in context ]
852
853 self.assertEqual(['test'], content)
854 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
855 tostring(context.root))
856
860
865
884
885
886
909
910
911
922 def end(self, tag):
923 events.append("end")
924 assertEqual("TAG", tag)
925 def close(self):
926 return "DONE"
927
928 parser = self.etree.XMLParser(target=Target())
929 tree = self.etree.ElementTree()
930
931 self.assertRaises(TypeError,
932 tree.parse, BytesIO("<TAG/>"), parser=parser)
933 self.assertEqual(["start", "end"], events)
934
936
937 events = []
938 class Target(object):
939 def start(self, tag, attrib):
940 events.append("start-" + tag)
941 def end(self, tag):
942 events.append("end-" + tag)
943 if tag == 'a':
944 raise ValueError("dead and gone")
945 def data(self, data):
946 events.append("data-" + data)
947 def close(self):
948 events.append("close")
949 return "DONE"
950
951 parser = self.etree.XMLParser(target=Target())
952
953 try:
954 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
955 done = parser.close()
956 self.fail("error expected, but parsing succeeded")
957 except ValueError:
958 done = 'value error received as expected'
959
960 self.assertEqual(["start-root", "data-A", "start-a",
961 "data-ca", "end-a", "close"],
962 events)
963
965
966 events = []
967 class Target(object):
968 def start(self, tag, attrib):
969 events.append("start-" + tag)
970 def end(self, tag):
971 events.append("end-" + tag)
972 if tag == 'a':
973 raise ValueError("dead and gone")
974 def data(self, data):
975 events.append("data-" + data)
976 def close(self):
977 events.append("close")
978 return "DONE"
979
980 parser = self.etree.XMLParser(target=Target())
981
982 try:
983 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
984 parser=parser)
985 self.fail("error expected, but parsing succeeded")
986 except ValueError:
987 done = 'value error received as expected'
988
989 self.assertEqual(["start-root", "data-A", "start-a",
990 "data-ca", "end-a", "close"],
991 events)
992
994
995 events = []
996 class Target(object):
997 def start(self, tag, attrib):
998 events.append("start-" + tag)
999 def end(self, tag):
1000 events.append("end-" + tag)
1001 def data(self, data):
1002 events.append("data-" + data)
1003 def comment(self, text):
1004 events.append("comment-" + text)
1005 def close(self):
1006 return "DONE"
1007
1008 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1009
1010 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1011 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1012 done = parser.close()
1013
1014 self.assertEqual("DONE", done)
1015 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1016 "start-sub", "end-sub", "data-B", "end-root"],
1017 events)
1018
1024 def end(self, tag):
1025 events.append("end-" + tag)
1026 def data(self, data):
1027 events.append("data-" + data)
1028 def comment(self, text):
1029 events.append("comment-" + text)
1030 def close(self):
1031 return "DONE"
1032
1033 parser = self.etree.XMLParser(target=Target())
1034
1035 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1036 done = parser.close()
1037
1038 self.assertEqual("DONE", done)
1039 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1040 "start-sub", "end-sub", "comment-c", "data-B",
1041 "end-root", "comment-d"],
1042 events)
1043
1045 events = []
1046 class Target(object):
1047 def start(self, tag, attrib):
1048 events.append("start-" + tag)
1049 def end(self, tag):
1050 events.append("end-" + tag)
1051 def data(self, data):
1052 events.append("data-" + data)
1053 def pi(self, target, data):
1054 events.append("pi-" + target + "-" + data)
1055 def close(self):
1056 return "DONE"
1057
1058 parser = self.etree.XMLParser(target=Target())
1059
1060 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1061 done = parser.close()
1062
1063 self.assertEqual("DONE", done)
1064 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1065 "data-B", "end-root", "pi-test-c"],
1066 events)
1067
1069 events = []
1070 class Target(object):
1071 def start(self, tag, attrib):
1072 events.append("start-" + tag)
1073 def end(self, tag):
1074 events.append("end-" + tag)
1075 def data(self, data):
1076 events.append("data-" + data)
1077 def close(self):
1078 return "DONE"
1079
1080 parser = self.etree.XMLParser(target=Target(),
1081 strip_cdata=False)
1082
1083 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1084 done = parser.close()
1085
1086 self.assertEqual("DONE", done)
1087 self.assertEqual(["start-root", "data-A", "start-a",
1088 "data-ca", "end-a", "data-B", "end-root"],
1089 events)
1090
1092 events = []
1093 class Target(object):
1094 def start(self, tag, attrib):
1095 events.append("start-" + tag)
1096 def end(self, tag):
1097 events.append("end-" + tag)
1098 def data(self, data):
1099 events.append("data-" + data)
1100 def close(self):
1101 events.append("close")
1102 return "DONE"
1103
1104 parser = self.etree.XMLParser(target=Target(),
1105 recover=True)
1106
1107 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1108 done = parser.close()
1109
1110 self.assertEqual("DONE", done)
1111 self.assertEqual(["start-root", "data-A", "start-a",
1112 "data-ca", "end-a", "data-B",
1113 "end-root", "close"],
1114 events)
1115
1125
1135
1144
1154
1156 iterwalk = self.etree.iterwalk
1157 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1158
1159 iterator = iterwalk(root, events=('start','end'))
1160 events = list(iterator)
1161 self.assertEqual(
1162 [('start', root), ('start', root[0]), ('end', root[0]),
1163 ('start', root[1]), ('end', root[1]), ('end', root)],
1164 events)
1165
1176
1178 iterwalk = self.etree.iterwalk
1179 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1180
1181 attr_name = '{testns}bla'
1182 events = []
1183 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1184 for event, elem in iterator:
1185 events.append(event)
1186 if event == 'start':
1187 if elem.tag != '{ns1}a':
1188 elem.set(attr_name, 'value')
1189
1190 self.assertEqual(
1191 ['start-ns', 'start', 'start', 'start-ns', 'start',
1192 'end', 'end-ns', 'end', 'end', 'end-ns'],
1193 events)
1194
1195 self.assertEqual(
1196 None,
1197 root.get(attr_name))
1198 self.assertEqual(
1199 'value',
1200 root[0].get(attr_name))
1201
1212
1214 parse = self.etree.parse
1215 parser = self.etree.XMLParser(dtd_validation=True)
1216 assertEqual = self.assertEqual
1217 test_url = _str("__nosuch.dtd")
1218
1219 class MyResolver(self.etree.Resolver):
1220 def resolve(self, url, id, context):
1221 assertEqual(url, test_url)
1222 return self.resolve_string(
1223 _str('''<!ENTITY myentity "%s">
1224 <!ELEMENT doc ANY>''') % url, context)
1225
1226 parser.resolvers.add(MyResolver())
1227
1228 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1229 tree = parse(StringIO(xml), parser)
1230 root = tree.getroot()
1231 self.assertEqual(root.text, test_url)
1232
1234 parse = self.etree.parse
1235 parser = self.etree.XMLParser(dtd_validation=True)
1236 assertEqual = self.assertEqual
1237 test_url = _str("__nosuch.dtd")
1238
1239 class MyResolver(self.etree.Resolver):
1240 def resolve(self, url, id, context):
1241 assertEqual(url, test_url)
1242 return self.resolve_string(
1243 (_str('''<!ENTITY myentity "%s">
1244 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1245 context)
1246
1247 parser.resolvers.add(MyResolver())
1248
1249 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1250 tree = parse(StringIO(xml), parser)
1251 root = tree.getroot()
1252 self.assertEqual(root.text, test_url)
1253
1255 parse = self.etree.parse
1256 parser = self.etree.XMLParser(dtd_validation=True)
1257 assertEqual = self.assertEqual
1258 test_url = _str("__nosuch.dtd")
1259
1260 class MyResolver(self.etree.Resolver):
1261 def resolve(self, url, id, context):
1262 assertEqual(url, test_url)
1263 return self.resolve_file(
1264 SillyFileLike(
1265 _str('''<!ENTITY myentity "%s">
1266 <!ELEMENT doc ANY>''') % url), context)
1267
1268 parser.resolvers.add(MyResolver())
1269
1270 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1271 tree = parse(StringIO(xml), parser)
1272 root = tree.getroot()
1273 self.assertEqual(root.text, test_url)
1274
1276 parse = self.etree.parse
1277 parser = self.etree.XMLParser(attribute_defaults=True)
1278 assertEqual = self.assertEqual
1279 test_url = _str("__nosuch.dtd")
1280
1281 class MyResolver(self.etree.Resolver):
1282 def resolve(self, url, id, context):
1283 assertEqual(url, test_url)
1284 return self.resolve_filename(
1285 fileInTestDir('test.dtd'), context)
1286
1287 parser.resolvers.add(MyResolver())
1288
1289 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1290 tree = parse(StringIO(xml), parser)
1291 root = tree.getroot()
1292 self.assertEqual(
1293 root.attrib, {'default': 'valueA'})
1294 self.assertEqual(
1295 root[0].attrib, {'default': 'valueB'})
1296
1311
1312 parser.resolvers.add(MyResolver())
1313
1314 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1315 tree = parse(StringIO(xml), parser,
1316 base_url=fileUrlInTestDir('__test.xml'))
1317 root = tree.getroot()
1318 self.assertEqual(
1319 root.attrib, {'default': 'valueA'})
1320 self.assertEqual(
1321 root[0].attrib, {'default': 'valueB'})
1322
1324 parse = self.etree.parse
1325 parser = self.etree.XMLParser(attribute_defaults=True)
1326 assertEqual = self.assertEqual
1327 test_url = _str("__nosuch.dtd")
1328
1329 class MyResolver(self.etree.Resolver):
1330 def resolve(self, url, id, context):
1331 assertEqual(url, test_url)
1332 return self.resolve_file(
1333 open(fileInTestDir('test.dtd'), 'rb'), context)
1334
1335 parser.resolvers.add(MyResolver())
1336
1337 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1338 tree = parse(StringIO(xml), parser)
1339 root = tree.getroot()
1340 self.assertEqual(
1341 root.attrib, {'default': 'valueA'})
1342 self.assertEqual(
1343 root[0].attrib, {'default': 'valueB'})
1344
1346 parse = self.etree.parse
1347 parser = self.etree.XMLParser(load_dtd=True)
1348 assertEqual = self.assertEqual
1349 test_url = _str("__nosuch.dtd")
1350
1351 class check(object):
1352 resolved = False
1353
1354 class MyResolver(self.etree.Resolver):
1355 def resolve(self, url, id, context):
1356 assertEqual(url, test_url)
1357 check.resolved = True
1358 return self.resolve_empty(context)
1359
1360 parser.resolvers.add(MyResolver())
1361
1362 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1363 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1364 self.assertTrue(check.resolved)
1365
1372
1373 class MyResolver(self.etree.Resolver):
1374 def resolve(self, url, id, context):
1375 raise _LocalException
1376
1377 parser.resolvers.add(MyResolver())
1378
1379 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1380 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1381
1382 if etree.LIBXML_VERSION > (2,6,20):
1399
1401 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1402 <root>
1403 <child1/>
1404 <child2/>
1405 <child3> </child3>
1406 </root>''')
1407
1408 parser = self.etree.XMLParser(resolve_entities=False)
1409 root = etree.fromstring(xml, parser)
1410 self.assertEqual([ el.tag for el in root ],
1411 ['child1', 'child2', 'child3'])
1412
1413 root[0] = root[-1]
1414 self.assertEqual([ el.tag for el in root ],
1415 ['child3', 'child2'])
1416 self.assertEqual(root[0][0].text, ' ')
1417 self.assertEqual(root[0][0].name, 'nbsp')
1418
1434
1441
1443 Entity = self.etree.Entity
1444 self.assertRaises(ValueError, Entity, 'a b c')
1445 self.assertRaises(ValueError, Entity, 'a,b')
1446 self.assertRaises(ValueError, Entity, 'a\0b')
1447 self.assertRaises(ValueError, Entity, '#abc')
1448 self.assertRaises(ValueError, Entity, '#xxyz')
1449
1462
1483
1496
1508
1517
1526
1527
1537
1546
1548 Element = self.etree.Element
1549 SubElement = self.etree.SubElement
1550 root = Element('root')
1551 self.assertRaises(ValueError, root.append, root)
1552 child = SubElement(root, 'child')
1553 self.assertRaises(ValueError, child.append, root)
1554 child2 = SubElement(child, 'child2')
1555 self.assertRaises(ValueError, child2.append, root)
1556 self.assertRaises(ValueError, child2.append, child)
1557 self.assertEqual('child2', root[0][0].tag)
1558
1571
1584
1595
1606
1616
1626
1642
1658
1664
1679
1692
1707
1720
1735
1748
1763
1776
1777
1785
1786
1796
1797
1812
1813
1823
1824
1835
1862
1863
1865 self.assertRaises(TypeError, self.etree.dump, None)
1866
1879
1892
1913
1922
1931
1940
1949
1958
1960 XML = self.etree.XML
1961
1962 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1963 result = []
1964 for el in root.iterchildren(tag=['two', 'three']):
1965 result.append(el.text)
1966 self.assertEqual(['Two', 'Bla', None], result)
1967
1969 XML = self.etree.XML
1970
1971 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1972 result = []
1973 for el in root.iterchildren('two', 'three'):
1974 result.append(el.text)
1975 self.assertEqual(['Two', 'Bla', None], result)
1976
1985
2006
2028
2030 Element = self.etree.Element
2031 SubElement = self.etree.SubElement
2032
2033 a = Element('a')
2034 b = SubElement(a, 'b')
2035 c = SubElement(a, 'c')
2036 d = SubElement(b, 'd')
2037 self.assertEqual(
2038 [b, a],
2039 list(d.iterancestors(tag=('a', 'b'))))
2040 self.assertEqual(
2041 [b, a],
2042 list(d.iterancestors('a', 'b')))
2043
2044 self.assertEqual(
2045 [],
2046 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2047 self.assertEqual(
2048 [],
2049 list(d.iterancestors('w', 'x', 'y', 'z')))
2050
2051 self.assertEqual(
2052 [],
2053 list(d.iterancestors(tag=('d', 'x'))))
2054 self.assertEqual(
2055 [],
2056 list(d.iterancestors('d', 'x')))
2057
2058 self.assertEqual(
2059 [b, a],
2060 list(d.iterancestors(tag=('b', '*'))))
2061 self.assertEqual(
2062 [b, a],
2063 list(d.iterancestors('b', '*')))
2064
2065 self.assertEqual(
2066 [b],
2067 list(d.iterancestors(tag=('b', 'c'))))
2068 self.assertEqual(
2069 [b],
2070 list(d.iterancestors('b', 'c')))
2071
2088
2090 Element = self.etree.Element
2091 SubElement = self.etree.SubElement
2092
2093 a = Element('a')
2094 b = SubElement(a, 'b')
2095 c = SubElement(a, 'c')
2096 d = SubElement(b, 'd')
2097 e = SubElement(c, 'e')
2098
2099 self.assertEqual(
2100 [],
2101 list(a.iterdescendants('a')))
2102 self.assertEqual(
2103 [],
2104 list(a.iterdescendants(tag='a')))
2105
2106 a2 = SubElement(e, 'a')
2107 self.assertEqual(
2108 [a2],
2109 list(a.iterdescendants('a')))
2110
2111 self.assertEqual(
2112 [a2],
2113 list(c.iterdescendants('a')))
2114 self.assertEqual(
2115 [a2],
2116 list(c.iterdescendants(tag='a')))
2117
2119 Element = self.etree.Element
2120 SubElement = self.etree.SubElement
2121
2122 a = Element('a')
2123 b = SubElement(a, 'b')
2124 c = SubElement(a, 'c')
2125 d = SubElement(b, 'd')
2126 e = SubElement(c, 'e')
2127
2128 self.assertEqual(
2129 [b, e],
2130 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2131 self.assertEqual(
2132 [b, e],
2133 list(a.iterdescendants('a', 'b', 'e')))
2134
2135 a2 = SubElement(e, 'a')
2136 self.assertEqual(
2137 [b, a2],
2138 list(a.iterdescendants(tag=('a', 'b'))))
2139 self.assertEqual(
2140 [b, a2],
2141 list(a.iterdescendants('a', 'b')))
2142
2143 self.assertEqual(
2144 [],
2145 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2146 self.assertEqual(
2147 [],
2148 list(c.iterdescendants('x', 'y', 'z')))
2149
2150 self.assertEqual(
2151 [b, d, c, e, a2],
2152 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2153 self.assertEqual(
2154 [b, d, c, e, a2],
2155 list(a.iterdescendants('x', 'y', 'z', '*')))
2156
2174
2191
2209
2233
2235 Element = self.etree.Element
2236 SubElement = self.etree.SubElement
2237
2238 a = Element('a')
2239 b = SubElement(a, 'b')
2240 c = SubElement(a, 'c')
2241 d = SubElement(b, 'd')
2242 self.assertEqual(
2243 [],
2244 list(a.itersiblings(tag='XXX')))
2245 self.assertEqual(
2246 [c],
2247 list(b.itersiblings(tag='c')))
2248 self.assertEqual(
2249 [c],
2250 list(b.itersiblings(tag='*')))
2251 self.assertEqual(
2252 [b],
2253 list(c.itersiblings(preceding=True, tag='b')))
2254 self.assertEqual(
2255 [],
2256 list(c.itersiblings(preceding=True, tag='c')))
2257
2259 Element = self.etree.Element
2260 SubElement = self.etree.SubElement
2261
2262 a = Element('a')
2263 b = SubElement(a, 'b')
2264 c = SubElement(a, 'c')
2265 d = SubElement(b, 'd')
2266 e = SubElement(a, 'e')
2267 self.assertEqual(
2268 [],
2269 list(a.itersiblings(tag=('XXX', 'YYY'))))
2270 self.assertEqual(
2271 [c, e],
2272 list(b.itersiblings(tag=('c', 'd', 'e'))))
2273 self.assertEqual(
2274 [b],
2275 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2276 self.assertEqual(
2277 [c, b],
2278 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2279
2281 parseid = self.etree.parseid
2282 XML = self.etree.XML
2283 xml_text = _bytes('''
2284 <!DOCTYPE document [
2285 <!ELEMENT document (h1,p)*>
2286 <!ELEMENT h1 (#PCDATA)>
2287 <!ATTLIST h1 myid ID #REQUIRED>
2288 <!ELEMENT p (#PCDATA)>
2289 <!ATTLIST p someid ID #REQUIRED>
2290 ]>
2291 <document>
2292 <h1 myid="chapter1">...</h1>
2293 <p id="note1" class="note">...</p>
2294 <p>Regular paragraph.</p>
2295 <p xml:id="xmlid">XML:ID paragraph.</p>
2296 <p someid="warn1" class="warning">...</p>
2297 </document>
2298 ''')
2299
2300 tree, dic = parseid(BytesIO(xml_text))
2301 root = tree.getroot()
2302 root2 = XML(xml_text)
2303 self.assertEqual(self._writeElement(root),
2304 self._writeElement(root2))
2305 expected = {
2306 "chapter1" : root[0],
2307 "xmlid" : root[3],
2308 "warn1" : root[4]
2309 }
2310 self.assertTrue("chapter1" in dic)
2311 self.assertTrue("warn1" in dic)
2312 self.assertTrue("xmlid" in dic)
2313 self._checkIDDict(dic, expected)
2314
2316 XMLDTDID = self.etree.XMLDTDID
2317 XML = self.etree.XML
2318 xml_text = _bytes('''
2319 <!DOCTYPE document [
2320 <!ELEMENT document (h1,p)*>
2321 <!ELEMENT h1 (#PCDATA)>
2322 <!ATTLIST h1 myid ID #REQUIRED>
2323 <!ELEMENT p (#PCDATA)>
2324 <!ATTLIST p someid ID #REQUIRED>
2325 ]>
2326 <document>
2327 <h1 myid="chapter1">...</h1>
2328 <p id="note1" class="note">...</p>
2329 <p>Regular paragraph.</p>
2330 <p xml:id="xmlid">XML:ID paragraph.</p>
2331 <p someid="warn1" class="warning">...</p>
2332 </document>
2333 ''')
2334
2335 root, dic = XMLDTDID(xml_text)
2336 root2 = XML(xml_text)
2337 self.assertEqual(self._writeElement(root),
2338 self._writeElement(root2))
2339 expected = {
2340 "chapter1" : root[0],
2341 "xmlid" : root[3],
2342 "warn1" : root[4]
2343 }
2344 self.assertTrue("chapter1" in dic)
2345 self.assertTrue("warn1" in dic)
2346 self.assertTrue("xmlid" in dic)
2347 self._checkIDDict(dic, expected)
2348
2350 XMLDTDID = self.etree.XMLDTDID
2351 XML = self.etree.XML
2352 xml_text = _bytes('''
2353 <document>
2354 <h1 myid="chapter1">...</h1>
2355 <p id="note1" class="note">...</p>
2356 <p>Regular paragraph.</p>
2357 <p someid="warn1" class="warning">...</p>
2358 </document>
2359 ''')
2360
2361 root, dic = XMLDTDID(xml_text)
2362 root2 = XML(xml_text)
2363 self.assertEqual(self._writeElement(root),
2364 self._writeElement(root2))
2365 expected = {}
2366 self._checkIDDict(dic, expected)
2367
2369 XMLDTDID = self.etree.XMLDTDID
2370 XML = self.etree.XML
2371 xml_text = _bytes('''
2372 <!DOCTYPE document [
2373 <!ELEMENT document (h1,p)*>
2374 <!ELEMENT h1 (#PCDATA)>
2375 <!ATTLIST h1 myid ID #REQUIRED>
2376 <!ELEMENT p (#PCDATA)>
2377 <!ATTLIST p someid ID #REQUIRED>
2378 ]>
2379 <document>
2380 <h1 myid="chapter1">...</h1>
2381 <p id="note1" class="note">...</p>
2382 <p>Regular paragraph.</p>
2383 <p xml:id="xmlid">XML:ID paragraph.</p>
2384 <p someid="warn1" class="warning">...</p>
2385 </document>
2386 ''')
2387
2388 parser = etree.XMLParser(collect_ids=False)
2389 root, dic = XMLDTDID(xml_text, parser=parser)
2390 root2 = XML(xml_text)
2391 self.assertEqual(self._writeElement(root),
2392 self._writeElement(root2))
2393 self.assertFalse(dic)
2394 self._checkIDDict(dic, {})
2395
2397 self.assertEqual(len(dic),
2398 len(expected))
2399 self.assertEqual(sorted(dic.items()),
2400 sorted(expected.items()))
2401 if sys.version_info < (3,):
2402 self.assertEqual(sorted(dic.iteritems()),
2403 sorted(expected.iteritems()))
2404 self.assertEqual(sorted(dic.keys()),
2405 sorted(expected.keys()))
2406 if sys.version_info < (3,):
2407 self.assertEqual(sorted(dic.iterkeys()),
2408 sorted(expected.iterkeys()))
2409 if sys.version_info < (3,):
2410 self.assertEqual(sorted(dic.values()),
2411 sorted(expected.values()))
2412 self.assertEqual(sorted(dic.itervalues()),
2413 sorted(expected.itervalues()))
2414
2416 etree = self.etree
2417
2418 r = {'foo': 'http://ns.infrae.com/foo'}
2419 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2420 self.assertEqual(
2421 'foo',
2422 e.prefix)
2423 self.assertEqual(
2424 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2425 self._writeElement(e))
2426
2428 etree = self.etree
2429
2430 r = {None: 'http://ns.infrae.com/foo'}
2431 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2432 self.assertEqual(
2433 None,
2434 e.prefix)
2435 self.assertEqual(
2436 '{http://ns.infrae.com/foo}bar',
2437 e.tag)
2438 self.assertEqual(
2439 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2440 self._writeElement(e))
2441
2443 etree = self.etree
2444
2445 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2446 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2447 self.assertEqual(None, e.prefix)
2448 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2449 self.assertEqual(
2450 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2451 self._writeElement(e))
2452
2454 etree = self.etree
2455
2456 r = {None: 'http://ns.infrae.com/foo',
2457 'hoi': 'http://ns.infrae.com/hoi'}
2458 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2459 e.set('{http://ns.infrae.com/hoi}test', 'value')
2460 self.assertEqual(
2461 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2462 self._writeElement(e))
2463
2465 etree = self.etree
2466
2467 root = etree.Element('{http://test/ns}root',
2468 nsmap={None: 'http://test/ns'})
2469 sub = etree.Element('{http://test/ns}sub',
2470 nsmap={'test': 'http://test/ns'})
2471
2472 sub.attrib['{http://test/ns}attr'] = 'value'
2473 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2474 self.assertEqual(
2475 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2476 etree.tostring(sub))
2477
2478 root.append(sub)
2479 self.assertEqual(
2480 _bytes('<root xmlns="http://test/ns">'
2481 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2482 '</root>'),
2483 etree.tostring(root))
2484
2486 etree = self.etree
2487
2488 root = etree.Element('root')
2489 sub = etree.Element('{http://test/ns}sub',
2490 nsmap={'test': 'http://test/ns'})
2491
2492 sub.attrib['{http://test/ns}attr'] = 'value'
2493 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2494 self.assertEqual(
2495 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2496 etree.tostring(sub))
2497
2498 root.append(sub)
2499 self.assertEqual(
2500 _bytes('<root>'
2501 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2502 '</root>'),
2503 etree.tostring(root))
2504
2506 etree = self.etree
2507
2508 root = etree.Element('root')
2509 sub = etree.Element('{http://test/ns}sub',
2510 nsmap={None: 'http://test/ns'})
2511
2512 sub.attrib['{http://test/ns}attr'] = 'value'
2513 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2514 self.assertEqual(
2515 _bytes('<sub xmlns="http://test/ns" '
2516 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2517 etree.tostring(sub))
2518
2519 root.append(sub)
2520 self.assertEqual(
2521 _bytes('<root>'
2522 '<sub xmlns="http://test/ns"'
2523 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2524 '</root>'),
2525 etree.tostring(root))
2526
2528 etree = self.etree
2529
2530 root = etree.Element('{http://test/ns}root',
2531 nsmap={'test': 'http://test/ns',
2532 None: 'http://test/ns'})
2533 sub = etree.Element('{http://test/ns}sub',
2534 nsmap={None: 'http://test/ns'})
2535
2536 sub.attrib['{http://test/ns}attr'] = 'value'
2537 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2538 self.assertEqual(
2539 _bytes('<sub xmlns="http://test/ns" '
2540 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2541 etree.tostring(sub))
2542
2543 root.append(sub)
2544 self.assertEqual(
2545 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2546 '<test:sub test:attr="value"/>'
2547 '</test:root>'),
2548 etree.tostring(root))
2549
2551 etree = self.etree
2552 r = {None: 'http://ns.infrae.com/foo',
2553 'hoi': 'http://ns.infrae.com/hoi'}
2554 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2555 tree = etree.ElementTree(element=e)
2556 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2557 self.assertEqual(
2558 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2559 self._writeElement(e))
2560
2562 etree = self.etree
2563
2564 r = {None: 'http://ns.infrae.com/foo'}
2565 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2566 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2567
2568 e1.append(e2)
2569
2570 self.assertEqual(
2571 None,
2572 e1.prefix)
2573 self.assertEqual(
2574 None,
2575 e1[0].prefix)
2576 self.assertEqual(
2577 '{http://ns.infrae.com/foo}bar',
2578 e1.tag)
2579 self.assertEqual(
2580 '{http://ns.infrae.com/foo}bar',
2581 e1[0].tag)
2582
2584 etree = self.etree
2585
2586 r = {None: 'http://ns.infrae.com/BAR'}
2587 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2588 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2589
2590 e1.append(e2)
2591
2592 self.assertEqual(
2593 None,
2594 e1.prefix)
2595 self.assertNotEqual(
2596 None,
2597 e2.prefix)
2598 self.assertEqual(
2599 '{http://ns.infrae.com/BAR}bar',
2600 e1.tag)
2601 self.assertEqual(
2602 '{http://ns.infrae.com/foo}bar',
2603 e2.tag)
2604
2606 ns_href = "http://a.b.c"
2607 one = self.etree.fromstring(
2608 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2609 baz = one[0][0]
2610
2611 two = self.etree.fromstring(
2612 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2613 two.append(baz)
2614 del one
2615
2616 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2617 self.assertEqual(
2618 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2619 self.etree.tostring(two))
2620
2634
2651
2662
2664 xml = ('<root>' +
2665 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2666 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2667 root = self.etree.fromstring(xml)
2668 self.assertEqual(xml, self.etree.tostring(root))
2669 self.etree.cleanup_namespaces(root)
2670 self.assertEqual(
2671 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2672 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2673 self.etree.tostring(root))
2674
2676 xml = ('<root>' +
2677 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2678 '<n64:x xmlns:a="A" a:attr="X"/>' +
2679 '</a>'*100 +
2680 '</root>').encode('utf8')
2681 root = self.etree.fromstring(xml)
2682 self.assertEqual(xml, self.etree.tostring(root))
2683 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2684 self.assertEqual(
2685 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2686 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2687 self.etree.tostring(root))
2688
2690 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2691 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2692 '<foo>foo:bar</foo>'
2693 '</root>').encode('utf8')
2694 root = self.etree.fromstring(xml)
2695 self.assertEqual(xml, self.etree.tostring(root))
2696 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2697 self.assertEqual(
2698 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2699 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2700 b'<foo>foo:bar</foo>'
2701 b'</root>',
2702 self.etree.tostring(root))
2703
2705 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2706 '<sub xmlns:foo="FOO">'
2707 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2708 '<foo>foo:bar</foo>'
2709 '</sub>'
2710 '</root>').encode('utf8')
2711 root = self.etree.fromstring(xml)
2712 self.assertEqual(xml, self.etree.tostring(root))
2713 self.etree.cleanup_namespaces(
2714 root,
2715 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2716 keep_ns_prefixes=['foo'])
2717 self.assertEqual(
2718 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2719 b'<sub>'
2720 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2721 b'<foo>foo:bar</foo>'
2722 b'</sub>'
2723 b'</root>',
2724 self.etree.tostring(root))
2725
2727 etree = self.etree
2728
2729 r = {None: 'http://ns.infrae.com/foo',
2730 'hoi': 'http://ns.infrae.com/hoi'}
2731 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2732 self.assertEqual(
2733 r,
2734 e.nsmap)
2735
2737 etree = self.etree
2738
2739 re = {None: 'http://ns.infrae.com/foo',
2740 'hoi': 'http://ns.infrae.com/hoi'}
2741 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2742
2743 rs = {None: 'http://ns.infrae.com/honk',
2744 'top': 'http://ns.infrae.com/top'}
2745 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2746
2747 r = re.copy()
2748 r.update(rs)
2749 self.assertEqual(re, e.nsmap)
2750 self.assertEqual(r, s.nsmap)
2751
2753 etree = self.etree
2754 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2755 self.assertEqual({'hha': None}, el.nsmap)
2756
2758 Element = self.etree.Element
2759 SubElement = self.etree.SubElement
2760
2761 a = Element('a')
2762 b = SubElement(a, 'b')
2763 c = SubElement(a, 'c')
2764 d = SubElement(b, 'd')
2765 e = SubElement(c, 'e')
2766 f = SubElement(c, 'f')
2767
2768 self.assertEqual(
2769 [a, b],
2770 list(a.getiterator('a', 'b')))
2771 self.assertEqual(
2772 [],
2773 list(a.getiterator('x', 'y')))
2774 self.assertEqual(
2775 [a, f],
2776 list(a.getiterator('f', 'a')))
2777 self.assertEqual(
2778 [c, e, f],
2779 list(c.getiterator('c', '*', 'a')))
2780 self.assertEqual(
2781 [],
2782 list(a.getiterator( (), () )))
2783
2785 Element = self.etree.Element
2786 SubElement = self.etree.SubElement
2787
2788 a = Element('a')
2789 b = SubElement(a, 'b')
2790 c = SubElement(a, 'c')
2791 d = SubElement(b, 'd')
2792 e = SubElement(c, 'e')
2793 f = SubElement(c, 'f')
2794
2795 self.assertEqual(
2796 [a, b],
2797 list(a.getiterator( ('a', 'b') )))
2798 self.assertEqual(
2799 [],
2800 list(a.getiterator( ('x', 'y') )))
2801 self.assertEqual(
2802 [a, f],
2803 list(a.getiterator( ('f', 'a') )))
2804 self.assertEqual(
2805 [c, e, f],
2806 list(c.getiterator( ('c', '*', 'a') )))
2807 self.assertEqual(
2808 [],
2809 list(a.getiterator( () )))
2810
2812 Element = self.etree.Element
2813 SubElement = self.etree.SubElement
2814
2815 a = Element('{a}a')
2816 b = SubElement(a, '{a}b')
2817 c = SubElement(a, '{a}c')
2818 d = SubElement(b, '{b}d')
2819 e = SubElement(c, '{a}e')
2820 f = SubElement(c, '{b}f')
2821 g = SubElement(c, 'g')
2822
2823 self.assertEqual(
2824 [a],
2825 list(a.getiterator('{a}a')))
2826 self.assertEqual(
2827 [],
2828 list(a.getiterator('{b}a')))
2829 self.assertEqual(
2830 [],
2831 list(a.getiterator('a')))
2832 self.assertEqual(
2833 [a,b,d,c,e,f,g],
2834 list(a.getiterator('*')))
2835 self.assertEqual(
2836 [f],
2837 list(c.getiterator('{b}*')))
2838 self.assertEqual(
2839 [d, f],
2840 list(a.getiterator('{b}*')))
2841 self.assertEqual(
2842 [g],
2843 list(a.getiterator('g')))
2844 self.assertEqual(
2845 [g],
2846 list(a.getiterator('{}g')))
2847 self.assertEqual(
2848 [g],
2849 list(a.getiterator('{}*')))
2850
2852 Element = self.etree.Element
2853 SubElement = self.etree.SubElement
2854
2855 a = Element('{a}a')
2856 b = SubElement(a, '{nsA}b')
2857 c = SubElement(b, '{nsB}b')
2858 d = SubElement(a, 'b')
2859 e = SubElement(a, '{nsA}e')
2860 f = SubElement(e, '{nsB}e')
2861 g = SubElement(e, 'e')
2862
2863 self.assertEqual(
2864 [b, c, d],
2865 list(a.getiterator('{*}b')))
2866 self.assertEqual(
2867 [e, f, g],
2868 list(a.getiterator('{*}e')))
2869 self.assertEqual(
2870 [a, b, c, d, e, f, g],
2871 list(a.getiterator('{*}*')))
2872
2897
2913
2930
2932 a = etree.Element("a")
2933 b = etree.SubElement(a, "b")
2934 c = etree.SubElement(a, "c")
2935 d1 = etree.SubElement(c, "d")
2936 d2 = etree.SubElement(c, "d")
2937 c.text = d1.text = 'TEXT'
2938
2939 tree = etree.ElementTree(a)
2940 self.assertEqual('.', tree.getelementpath(a))
2941 self.assertEqual('c/d[1]', tree.getelementpath(d1))
2942 self.assertEqual('c/d[2]', tree.getelementpath(d2))
2943
2944 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2945 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2946
2947 tree = etree.ElementTree(c)
2948 self.assertEqual('.', tree.getelementpath(c))
2949 self.assertEqual('d[2]', tree.getelementpath(d2))
2950 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2951
2952 tree = etree.ElementTree(b)
2953 self.assertEqual('.', tree.getelementpath(b))
2954 self.assertRaises(ValueError, tree.getelementpath, a)
2955 self.assertRaises(ValueError, tree.getelementpath, c)
2956 self.assertRaises(ValueError, tree.getelementpath, d2)
2957
2959 a = etree.Element("{http://ns1/}a")
2960 b = etree.SubElement(a, "{http://ns1/}b")
2961 c = etree.SubElement(a, "{http://ns1/}c")
2962 d1 = etree.SubElement(c, "{http://ns1/}d")
2963 d2 = etree.SubElement(c, "{http://ns2/}d")
2964 d3 = etree.SubElement(c, "{http://ns1/}d")
2965
2966 tree = etree.ElementTree(a)
2967 self.assertEqual('.', tree.getelementpath(a))
2968 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
2969 tree.getelementpath(d1))
2970 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
2971 tree.getelementpath(d2))
2972 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
2973 tree.getelementpath(d3))
2974
2975 self.assertEqual(a, tree.find(tree.getelementpath(a)))
2976 self.assertEqual(b, tree.find(tree.getelementpath(b)))
2977 self.assertEqual(c, tree.find(tree.getelementpath(c)))
2978 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2979 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2980 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
2981
2982 tree = etree.ElementTree(c)
2983 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
2984 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
2985 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
2986 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2987 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2988 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
2989
2990 tree = etree.ElementTree(b)
2991 self.assertRaises(ValueError, tree.getelementpath, d1)
2992 self.assertRaises(ValueError, tree.getelementpath, d2)
2993
3000
3007
3016
3018 XML = self.etree.XML
3019 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3020 self.assertEqual(len(root.findall(".//{X}b")), 2)
3021 self.assertEqual(len(root.findall(".//{X}*")), 2)
3022 self.assertEqual(len(root.findall(".//b")), 3)
3023
3025 XML = self.etree.XML
3026 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3027 nsmap = {'xx': 'X'}
3028 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3029 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3030 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3031 nsmap = {'xx': 'Y'}
3032 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3033 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3034 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3035
3037 XML = self.etree.XML
3038 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3039 nsmap = {'xx': 'X'}
3040 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3041 nsmap = {'xx': 'X', None: 'Y'}
3042 self.assertRaises(ValueError, root.findall, ".//xx:b", namespaces=nsmap)
3043 nsmap = {'xx': 'X', '': 'Y'}
3044 self.assertRaises(ValueError, root.findall, ".//xx:b", namespaces=nsmap)
3045
3052
3054 etree = self.etree
3055 e = etree.Element('foo')
3056 for i in range(10):
3057 etree.SubElement(e, 'a%s' % i)
3058 for i in range(10):
3059 self.assertEqual(
3060 i,
3061 e.index(e[i]))
3062 self.assertEqual(
3063 3, e.index(e[3], 3))
3064 self.assertRaises(
3065 ValueError, e.index, e[3], 4)
3066 self.assertRaises(
3067 ValueError, e.index, e[3], 0, 2)
3068 self.assertRaises(
3069 ValueError, e.index, e[8], 0, -3)
3070 self.assertRaises(
3071 ValueError, e.index, e[8], -5, -3)
3072 self.assertEqual(
3073 8, e.index(e[8], 0, -1))
3074 self.assertEqual(
3075 8, e.index(e[8], -12, -1))
3076 self.assertEqual(
3077 0, e.index(e[0], -12, -1))
3078
3080 etree = self.etree
3081 e = etree.Element('foo')
3082 for i in range(10):
3083 el = etree.SubElement(e, 'a%s' % i)
3084 el.text = "text%d" % i
3085 el.tail = "tail%d" % i
3086
3087 child0 = e[0]
3088 child1 = e[1]
3089 child2 = e[2]
3090
3091 e.replace(e[0], e[1])
3092 self.assertEqual(
3093 9, len(e))
3094 self.assertEqual(
3095 child1, e[0])
3096 self.assertEqual(
3097 child1.text, "text1")
3098 self.assertEqual(
3099 child1.tail, "tail1")
3100 self.assertEqual(
3101 child0.tail, "tail0")
3102 self.assertEqual(
3103 child2, e[1])
3104
3105 e.replace(e[-1], e[0])
3106 self.assertEqual(
3107 child1, e[-1])
3108 self.assertEqual(
3109 child1.text, "text1")
3110 self.assertEqual(
3111 child1.tail, "tail1")
3112 self.assertEqual(
3113 child2, e[0])
3114
3116 etree = self.etree
3117 e = etree.Element('foo')
3118 for i in range(10):
3119 etree.SubElement(e, 'a%s' % i)
3120
3121 new_element = etree.Element("test")
3122 new_element.text = "TESTTEXT"
3123 new_element.tail = "TESTTAIL"
3124 child1 = e[1]
3125 e.replace(e[0], new_element)
3126 self.assertEqual(
3127 new_element, e[0])
3128 self.assertEqual(
3129 "TESTTEXT",
3130 e[0].text)
3131 self.assertEqual(
3132 "TESTTAIL",
3133 e[0].tail)
3134 self.assertEqual(
3135 child1, e[1])
3136
3152
3170
3188
3206
3208 Element = self.etree.Element
3209 SubElement = self.etree.SubElement
3210 try:
3211 slice
3212 except NameError:
3213 print("slice() not found")
3214 return
3215
3216 a = Element('a')
3217 b = SubElement(a, 'b')
3218 c = SubElement(a, 'c')
3219 d = SubElement(a, 'd')
3220 e = SubElement(a, 'e')
3221
3222 x = Element('x')
3223 y = Element('y')
3224 z = Element('z')
3225
3226 self.assertRaises(
3227 ValueError,
3228 operator.setitem, a, slice(1,None,2), [x, y, z])
3229
3230 self.assertEqual(
3231 [b, c, d, e],
3232 list(a))
3233
3246
3248 XML = self.etree.XML
3249 root = XML(_bytes(
3250 '<?xml version="1.0"?>\n'
3251 '<root>' + '\n' * 65536 +
3252 '<p>' + '\n' * 65536 + '</p>\n' +
3253 '<br/>\n'
3254 '</root>'))
3255
3256 if self.etree.LIBXML_VERSION >= (2, 9):
3257 expected = [2, 131074, 131076]
3258 else:
3259 expected = [2, 65535, 65535]
3260
3261 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3262
3270
3279
3289
3299
3305
3313
3319
3326
3332
3334 etree = self.etree
3335 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3336 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3337 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3338 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3339
3340 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3341
3342 tree = etree.parse(BytesIO(xml))
3343 docinfo = tree.docinfo
3344 self.assertEqual(docinfo.encoding, "ascii")
3345 self.assertEqual(docinfo.xml_version, "1.0")
3346 self.assertEqual(docinfo.public_id, pub_id)
3347 self.assertEqual(docinfo.system_url, sys_id)
3348 self.assertEqual(docinfo.root_name, 'html')
3349 self.assertEqual(docinfo.doctype, doctype_string)
3350
3366
3378
3390
3396
3398 etree = self.etree
3399 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3400 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3401 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3402
3403 xml = _bytes('<!DOCTYPE root>\n<root/>')
3404 tree = etree.parse(BytesIO(xml))
3405 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3406 etree.tostring(tree, doctype=doctype_string))
3407
3409 etree = self.etree
3410 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3411 self.assertEqual(root.base, "http://no/such/url")
3412 self.assertEqual(
3413 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3414 root.base = "https://secret/url"
3415 self.assertEqual(root.base, "https://secret/url")
3416 self.assertEqual(
3417 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3418 "https://secret/url")
3419
3421 etree = self.etree
3422 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3423 self.assertEqual(root.base, "http://no/such/url")
3424 self.assertEqual(
3425 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3426 root.set('{http://www.w3.org/XML/1998/namespace}base',
3427 "https://secret/url")
3428 self.assertEqual(root.base, "https://secret/url")
3429 self.assertEqual(
3430 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3431 "https://secret/url")
3432
3438
3443
3450
3464
3466 Element = self.etree.Element
3467
3468 a = Element('a')
3469 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3470 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3471
3472 self.assertRaises(ValueError, Element, 'ha\0ho')
3473
3475 Element = self.etree.Element
3476
3477 a = Element('a')
3478 self.assertRaises(ValueError, setattr, a, "text",
3479 _str('ha\0ho'))
3480 self.assertRaises(ValueError, setattr, a, "tail",
3481 _str('ha\0ho'))
3482
3483 self.assertRaises(ValueError, Element,
3484 _str('ha\0ho'))
3485
3487 Element = self.etree.Element
3488
3489 a = Element('a')
3490 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3491 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3492
3493 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3494 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3495
3496 self.assertRaises(ValueError, Element, 'ha\x07ho')
3497 self.assertRaises(ValueError, Element, 'ha\x02ho')
3498
3500 Element = self.etree.Element
3501
3502 a = Element('a')
3503 self.assertRaises(ValueError, setattr, a, "text",
3504 _str('ha\x07ho'))
3505 self.assertRaises(ValueError, setattr, a, "text",
3506 _str('ha\x02ho'))
3507
3508 self.assertRaises(ValueError, setattr, a, "tail",
3509 _str('ha\x07ho'))
3510 self.assertRaises(ValueError, setattr, a, "tail",
3511 _str('ha\x02ho'))
3512
3513 self.assertRaises(ValueError, Element,
3514 _str('ha\x07ho'))
3515 self.assertRaises(ValueError, Element,
3516 _str('ha\x02ho'))
3517
3519 Element = self.etree.Element
3520
3521 a = Element('a')
3522 self.assertRaises(ValueError, setattr, a, "text",
3523 _str('ha\u1234\x07ho'))
3524 self.assertRaises(ValueError, setattr, a, "text",
3525 _str('ha\u1234\x02ho'))
3526
3527 self.assertRaises(ValueError, setattr, a, "tail",
3528 _str('ha\u1234\x07ho'))
3529 self.assertRaises(ValueError, setattr, a, "tail",
3530 _str('ha\u1234\x02ho'))
3531
3532 self.assertRaises(ValueError, Element,
3533 _str('ha\u1234\x07ho'))
3534 self.assertRaises(ValueError, Element,
3535 _str('ha\u1234\x02ho'))
3536
3550
3555
3573
3593
3595 tostring = self.etree.tostring
3596 html = self.etree.fromstring(
3597 '<html><body>'
3598 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3599 '</body></html>',
3600 parser=self.etree.HTMLParser())
3601 self.assertEqual(html.tag, 'html')
3602 div = html.find('.//div')
3603 self.assertEqual(div.tail, '\r\n')
3604 result = tostring(div, method='html')
3605 self.assertEqual(
3606 result,
3607 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3608 result = tostring(div, method='html', with_tail=True)
3609 self.assertEqual(
3610 result,
3611 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3612 result = tostring(div, method='html', with_tail=False)
3613 self.assertEqual(
3614 result,
3615 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3616
3638
3640 tostring = self.etree.tostring
3641 XML = self.etree.XML
3642 ElementTree = self.etree.ElementTree
3643
3644 root = XML(_bytes("<root/>"))
3645
3646 tree = ElementTree(root)
3647 self.assertEqual(None, tree.docinfo.standalone)
3648
3649 result = tostring(root, xml_declaration=True, encoding="ASCII")
3650 self.assertEqual(result, _bytes(
3651 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3652
3653 result = tostring(root, xml_declaration=True, encoding="ASCII",
3654 standalone=True)
3655 self.assertEqual(result, _bytes(
3656 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3657
3658 tree = ElementTree(XML(result))
3659 self.assertEqual(True, tree.docinfo.standalone)
3660
3661 result = tostring(root, xml_declaration=True, encoding="ASCII",
3662 standalone=False)
3663 self.assertEqual(result, _bytes(
3664 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3665
3666 tree = ElementTree(XML(result))
3667 self.assertEqual(False, tree.docinfo.standalone)
3668
3688
3690 tostring = self.etree.tostring
3691 Element = self.etree.Element
3692 SubElement = self.etree.SubElement
3693
3694 a = Element('a')
3695 a.text = "A"
3696 a.tail = "tail"
3697 b = SubElement(a, 'b')
3698 b.text = "B"
3699 b.tail = _str("Søk på nettet")
3700 c = SubElement(a, 'c')
3701 c.text = "C"
3702
3703 result = tostring(a, method="text", encoding="UTF-16")
3704
3705 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3706 result)
3707
3709 tostring = self.etree.tostring
3710 Element = self.etree.Element
3711 SubElement = self.etree.SubElement
3712
3713 a = Element('a')
3714 a.text = _str('Søk på nettetA')
3715 a.tail = "tail"
3716 b = SubElement(a, 'b')
3717 b.text = "B"
3718 b.tail = _str('Søk på nettetB')
3719 c = SubElement(a, 'c')
3720 c.text = "C"
3721
3722 self.assertRaises(UnicodeEncodeError,
3723 tostring, a, method="text")
3724
3725 self.assertEqual(
3726 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3727 tostring(a, encoding="UTF-8", method="text"))
3728
3741
3757
3761
3776
3794
3807
3809 tostring = self.etree.tostring
3810 Element = self.etree.Element
3811 SubElement = self.etree.SubElement
3812
3813 a = Element('a')
3814 b = SubElement(a, 'b')
3815 c = SubElement(a, 'c')
3816 d = SubElement(c, 'd')
3817 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3818 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3819 self.assertEqual(_bytes('<b></b>'),
3820 canonicalize(tostring(b, encoding=_unicode)))
3821 self.assertEqual(_bytes('<c><d></d></c>'),
3822 canonicalize(tostring(c, encoding=_unicode)))
3823
3828
3843
3845 tostring = self.etree.tostring
3846 Element = self.etree.Element
3847 SubElement = self.etree.SubElement
3848
3849 a = Element('a')
3850 b = SubElement(a, 'b')
3851 c = SubElement(a, 'c')
3852
3853 result = tostring(a, encoding=_unicode)
3854 self.assertEqual(result, "<a><b/><c/></a>")
3855
3856 result = tostring(a, encoding=_unicode, pretty_print=False)
3857 self.assertEqual(result, "<a><b/><c/></a>")
3858
3859 result = tostring(a, encoding=_unicode, pretty_print=True)
3860 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3861
3873
3875 class SubEl(etree.ElementBase):
3876 pass
3877
3878 el1 = SubEl()
3879 el2 = SubEl()
3880 self.assertEqual('SubEl', el1.tag)
3881 self.assertEqual('SubEl', el2.tag)
3882 el1.other = el2
3883 el2.other = el1
3884
3885 del el1, el2
3886 gc.collect()
3887
3888
3902
3904 root = etree.Element('parent')
3905 c1 = etree.SubElement(root, 'child1')
3906 c2 = etree.SubElement(root, 'child2')
3907
3908 root.remove(c1)
3909 root.remove(c2)
3910 c1.addnext(c2)
3911 c1.tail = 'abc'
3912 c2.tail = 'xyz'
3913 del c1
3914
3915 c2.getprevious()
3916
3917 self.assertEqual('child1', c2.getprevious().tag)
3918 self.assertEqual('abc', c2.getprevious().tail)
3919
3920
3921
3922 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3933
3934
3978
3979 res_instance = res()
3980 parser = etree.XMLParser(load_dtd = True)
3981 parser.resolvers.add(res_instance)
3982
3983 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3984 parser = parser)
3985
3986 self.include(tree)
3987
3988 called = list(res_instance.called.items())
3989 called.sort()
3990 self.assertEqual(
3991 [("dtd", True), ("include", True), ("input", True)],
3992 called)
3993
3995 data = textwrap.dedent('''
3996 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
3997 <foo/>
3998 <xi:include href="./test.xml" />
3999 </doc>
4000 ''')
4001
4002 class Resolver(etree.Resolver):
4003 called = {}
4004
4005 def resolve(self, url, id, context):
4006 if url.endswith("test_xinclude.xml"):
4007 assert not self.called.get("input")
4008 self.called["input"] = True
4009 return None
4010 elif url.endswith('/test5.xml'):
4011 assert not self.called.get("DONE")
4012 self.called["DONE"] = True
4013 return self.resolve_string('<DONE/>', context)
4014 else:
4015 _, filename = url.rsplit('/', 1)
4016 assert not self.called.get(filename)
4017 self.called[filename] = True
4018 next_data = data.replace(
4019 'test.xml', 'test%d.xml' % len(self.called))
4020 return self.resolve_string(next_data, context)
4021
4022 res_instance = Resolver()
4023 parser = etree.XMLParser(load_dtd=True)
4024 parser.resolvers.add(res_instance)
4025
4026 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4027 parser=parser)
4028
4029 self.include(tree)
4030
4031 called = list(res_instance.called.items())
4032 called.sort()
4033 self.assertEqual(
4034 [("DONE", True), ("input", True), ("test.xml", True),
4035 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4036 called)
4037
4038
4042
4043
4048
4049
4052 tree = self.parse(_bytes('<a><b/></a>'))
4053 f = BytesIO()
4054 tree.write_c14n(f)
4055 s = f.getvalue()
4056 self.assertEqual(_bytes('<a><b></b></a>'),
4057 s)
4058
4060 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4061 f = BytesIO()
4062 tree.write_c14n(f, compression=9)
4063 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
4064 try:
4065 s = gzfile.read()
4066 finally:
4067 gzfile.close()
4068 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4069 s)
4070
4082
4098
4116
4128
4140
4142 tree = self.parse(_bytes(
4143 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4144 f = BytesIO()
4145 tree.write_c14n(f)
4146 s = f.getvalue()
4147 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4148 s)
4149 f = BytesIO()
4150 tree.write_c14n(f, exclusive=False)
4151 s = f.getvalue()
4152 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4153 s)
4154 f = BytesIO()
4155 tree.write_c14n(f, exclusive=True)
4156 s = f.getvalue()
4157 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4158 s)
4159
4160 f = BytesIO()
4161 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4162 s = f.getvalue()
4163 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4164 s)
4165
4167 tree = self.parse(_bytes(
4168 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4169 s = etree.tostring(tree, method='c14n')
4170 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4171 s)
4172 s = etree.tostring(tree, method='c14n', exclusive=False)
4173 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4174 s)
4175 s = etree.tostring(tree, method='c14n', exclusive=True)
4176 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4177 s)
4178
4179 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4180 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4181 s)
4182
4184 tree = self.parse(_bytes(
4185 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4186 s = etree.tostring(tree.getroot(), method='c14n')
4187 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4188 s)
4189 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4190 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4191 s)
4192 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4193 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4194 s)
4195
4196 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4197 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4198 s)
4199 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4200 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4201 s)
4202
4203 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4204 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4205 s)
4206
4208 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4209 tree = self.parse(_bytes(
4210 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4211
4212 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4213 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4214 s)
4215
4216
4219 tree = self.parse(_bytes('<a><b/></a>'))
4220 f = BytesIO()
4221 tree.write(f)
4222 s = f.getvalue()
4223 self.assertEqual(_bytes('<a><b/></a>'),
4224 s)
4225
4227 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4228 f = BytesIO()
4229 tree.write(f, compression=9)
4230 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
4231 try:
4232 s = gzfile.read()
4233 finally:
4234 gzfile.close()
4235 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4236 s)
4237
4239 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4240 f = BytesIO()
4241 tree.write(f, compression=0)
4242 s0 = f.getvalue()
4243
4244 f = BytesIO()
4245 tree.write(f)
4246 self.assertEqual(f.getvalue(), s0)
4247
4248 f = BytesIO()
4249 tree.write(f, compression=1)
4250 s = f.getvalue()
4251 self.assertTrue(len(s) <= len(s0))
4252 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
4253 try:
4254 s1 = gzfile.read()
4255 finally:
4256 gzfile.close()
4257
4258 f = BytesIO()
4259 tree.write(f, compression=9)
4260 s = f.getvalue()
4261 self.assertTrue(len(s) <= len(s0))
4262 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
4263 try:
4264 s9 = gzfile.read()
4265 finally:
4266 gzfile.close()
4267
4268 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4269 s0)
4270 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4271 s1)
4272 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4273 s9)
4274
4286
4302
4314
4327
4329 etree = etree
4330
4352
4354 """This can't really be tested as long as there isn't a way to
4355 reset the logging setup ...
4356 """
4357 parse = self.etree.parse
4358
4359 messages = []
4360 class Logger(self.etree.PyErrorLog):
4361 def log(self, entry, message, *args):
4362 messages.append(message)
4363
4364 self.etree.use_global_python_log(Logger())
4365 f = BytesIO('<a><b></c></b></a>')
4366 try:
4367 parse(f)
4368 except SyntaxError:
4369 pass
4370 f.close()
4371
4372 self.assertTrue([ message for message in messages
4373 if 'mismatch' in message ])
4374 self.assertTrue([ message for message in messages
4375 if ':PARSER:' in message])
4376 self.assertTrue([ message for message in messages
4377 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4378 self.assertTrue([ message for message in messages
4379 if ':1:15:' in message ])
4380
4381
4383 etree = etree
4384
4388
4390 class Target(object):
4391 def start(self, tag, attrib):
4392 return 'start(%s)' % tag
4393 def end(self, tag):
4394 return 'end(%s)' % tag
4395 def close(self):
4396 return 'close()'
4397
4398 parser = self.etree.XMLPullParser(target=Target())
4399 events = parser.read_events()
4400
4401 parser.feed('<root><element>')
4402 self.assertFalse(list(events))
4403 self.assertFalse(list(events))
4404 parser.feed('</element><child>')
4405 self.assertEqual([('end', 'end(element)')], list(events))
4406 parser.feed('</child>')
4407 self.assertEqual([('end', 'end(child)')], list(events))
4408 parser.feed('</root>')
4409 self.assertEqual([('end', 'end(root)')], list(events))
4410 self.assertFalse(list(events))
4411 self.assertEqual('close()', parser.close())
4412
4414 class Target(object):
4415 def start(self, tag, attrib):
4416 return 'start(%s)' % tag
4417 def end(self, tag):
4418 return 'end(%s)' % tag
4419 def close(self):
4420 return 'close()'
4421
4422 parser = self.etree.XMLPullParser(
4423 ['start', 'end'], target=Target())
4424 events = parser.read_events()
4425
4426 parser.feed('<root><element>')
4427 self.assertEqual(
4428 [('start', 'start(root)'), ('start', 'start(element)')],
4429 list(events))
4430 self.assertFalse(list(events))
4431 parser.feed('</element><child>')
4432 self.assertEqual(
4433 [('end', 'end(element)'), ('start', 'start(child)')],
4434 list(events))
4435 parser.feed('</child>')
4436 self.assertEqual(
4437 [('end', 'end(child)')],
4438 list(events))
4439 parser.feed('</root>')
4440 self.assertEqual(
4441 [('end', 'end(root)')],
4442 list(events))
4443 self.assertFalse(list(events))
4444 self.assertEqual('close()', parser.close())
4445
4447 parser = self.etree.XMLPullParser(
4448 ['start', 'end'], target=etree.TreeBuilder())
4449 events = parser.read_events()
4450
4451 parser.feed('<root><element>')
4452 self.assert_event_tags(
4453 events, [('start', 'root'), ('start', 'element')])
4454 self.assertFalse(list(events))
4455 parser.feed('</element><child>')
4456 self.assert_event_tags(
4457 events, [('end', 'element'), ('start', 'child')])
4458 parser.feed('</child>')
4459 self.assert_event_tags(
4460 events, [('end', 'child')])
4461 parser.feed('</root>')
4462 self.assert_event_tags(
4463 events, [('end', 'root')])
4464 self.assertFalse(list(events))
4465 root = parser.close()
4466 self.assertEqual('root', root.tag)
4467
4469 class Target(etree.TreeBuilder):
4470 def end(self, tag):
4471 el = super(Target, self).end(tag)
4472 el.tag += '-huhu'
4473 return el
4474
4475 parser = self.etree.XMLPullParser(
4476 ['start', 'end'], target=Target())
4477 events = parser.read_events()
4478
4479 parser.feed('<root><element>')
4480 self.assert_event_tags(
4481 events, [('start', 'root'), ('start', 'element')])
4482 self.assertFalse(list(events))
4483 parser.feed('</element><child>')
4484 self.assert_event_tags(
4485 events, [('end', 'element-huhu'), ('start', 'child')])
4486 parser.feed('</child>')
4487 self.assert_event_tags(
4488 events, [('end', 'child-huhu')])
4489 parser.feed('</root>')
4490 self.assert_event_tags(
4491 events, [('end', 'root-huhu')])
4492 self.assertFalse(list(events))
4493 root = parser.close()
4494 self.assertEqual('root-huhu', root.tag)
4495
4496
4527
4528 if __name__ == '__main__':
4529 print('to test use test.py %s' % __file__)
4530