1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 import os.path
13 import unittest
14 import copy
15 import sys
16 import re
17 import gc
18 import operator
19 import tempfile
20 import textwrap
21 import zlib
22 import gzip
23 from contextlib import closing, contextmanager
24
25 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
26 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
27 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
28 from .common_imports import canonicalize, _str, _bytes
29
30 print("")
31 print("TESTED VERSION: %s" % etree.__version__)
32 print(" Python: " + repr(sys.version_info))
33 print(" lxml.etree: " + repr(etree.LXML_VERSION))
34 print(" libxml used: " + repr(etree.LIBXML_VERSION))
35 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
36 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
37 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
38 print("")
39
40 try:
41 _unicode = unicode
42 except NameError:
43
44 _unicode = str
45
46
47 @contextmanager
48 -def tmpfile():
55
58 """Tests only for etree, not ElementTree"""
59 etree = etree
60
71
80
88
95
97 Element = self.etree.Element
98 el = Element('name')
99 self.assertRaises(ValueError, Element, '{}')
100 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
101
102 self.assertRaises(ValueError, Element, '{test}')
103 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, 'p:name')
108 self.assertRaises(ValueError, Element, '{test}p:name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
112
114 Element = self.etree.Element
115 self.assertRaises(ValueError, Element, "p'name")
116 self.assertRaises(ValueError, Element, 'p"name')
117
118 self.assertRaises(ValueError, Element, "{test}p'name")
119 self.assertRaises(ValueError, Element, '{test}p"name')
120
121 el = Element('name')
122 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
123 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
124
126 Element = self.etree.Element
127 self.assertRaises(ValueError, Element, ' name ')
128 self.assertRaises(ValueError, Element, 'na me')
129 self.assertRaises(ValueError, Element, '{test} name')
130
131 el = Element('name')
132 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
133
141
149
151 Element = self.etree.Element
152 SubElement = self.etree.SubElement
153
154 el = Element('name')
155 self.assertRaises(ValueError, SubElement, el, "p'name")
156 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
157
158 self.assertRaises(ValueError, SubElement, el, 'p"name')
159 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
160
169
178
180 QName = self.etree.QName
181 self.assertRaises(ValueError, QName, '')
182 self.assertRaises(ValueError, QName, 'test', '')
183
185 QName = self.etree.QName
186 self.assertRaises(ValueError, QName, 'p:name')
187 self.assertRaises(ValueError, QName, 'test', 'p:name')
188
190 QName = self.etree.QName
191 self.assertRaises(ValueError, QName, ' name ')
192 self.assertRaises(ValueError, QName, 'na me')
193 self.assertRaises(ValueError, QName, 'test', ' name')
194
202
204
205 QName = self.etree.QName
206 qname1 = QName('http://myns', 'a')
207 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
208
209 qname2 = QName(a)
210 self.assertEqual(a.tag, qname1.text)
211 self.assertEqual(qname1.text, qname2.text)
212 self.assertEqual(qname1, qname2)
213
215
216 etree = self.etree
217 qname = etree.QName('http://myns', 'a')
218 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
219 a.text = qname
220
221 self.assertEqual("p:a", a.text)
222
231
246
252
262
274
276 Element = self.etree.Element
277
278 keys = ["attr%d" % i for i in range(10)]
279 values = ["TEST-%d" % i for i in range(10)]
280 items = list(zip(keys, values))
281
282 root = Element("root")
283 for key, value in items:
284 root.set(key, value)
285 self.assertEqual(keys, root.attrib.keys())
286 self.assertEqual(values, root.attrib.values())
287
288 root2 = Element("root2", root.attrib,
289 attr_99='TOAST-1', attr_98='TOAST-2')
290 self.assertEqual(['attr_98', 'attr_99'] + keys,
291 root2.attrib.keys())
292 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
293 root2.attrib.values())
294
295 self.assertEqual(keys, root.attrib.keys())
296 self.assertEqual(values, root.attrib.values())
297
299
300
301 Element = self.etree.Element
302 root = Element("root")
303 self.assertRaises(TypeError, root.set, "newattr", 5)
304 self.assertRaises(TypeError, root.set, "newattr", object)
305 self.assertRaises(TypeError, root.set, "newattr", None)
306 self.assertRaises(TypeError, root.set, "newattr")
307
321
343
345 XML = self.etree.XML
346 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
347
348 root = XML(xml)
349 self.etree.strip_elements(root, 'a')
350 self.assertEqual(_bytes('<test><x></x></test>'),
351 self._writeElement(root))
352
353 root = XML(xml)
354 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
355 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
356 self._writeElement(root))
357
358 root = XML(xml)
359 self.etree.strip_elements(root, 'c')
360 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
361 self._writeElement(root))
362
364 XML = self.etree.XML
365 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
366
367 root = XML(xml)
368 self.etree.strip_elements(root, 'a')
369 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
370 self._writeElement(root))
371
372 root = XML(xml)
373 self.etree.strip_elements(root, '{urn:a}b', 'c')
374 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
375 self._writeElement(root))
376
377 root = XML(xml)
378 self.etree.strip_elements(root, '{urn:a}*', 'c')
379 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
380 self._writeElement(root))
381
382 root = XML(xml)
383 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
384 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
385 self._writeElement(root))
386
405
431
458
485
504
517
528
534
536 XML = self.etree.XML
537 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
538 self.assertEqual(root[0].target, "mypi")
539 self.assertEqual(root[0].get('my'), "1")
540 self.assertEqual(root[0].get('test'), " abc ")
541 self.assertEqual(root[0].get('quotes'), "' '")
542 self.assertEqual(root[0].get('only'), None)
543 self.assertEqual(root[0].get('names'), None)
544 self.assertEqual(root[0].get('nope'), None)
545
547 XML = self.etree.XML
548 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
549 self.assertEqual(root[0].target, "mypi")
550 self.assertEqual(root[0].attrib['my'], "1")
551 self.assertEqual(root[0].attrib['test'], " abc ")
552 self.assertEqual(root[0].attrib['quotes'], "' '")
553 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
554 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
555 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
556
558
559 ProcessingInstruction = self.etree.ProcessingInstruction
560
561 a = ProcessingInstruction("PI", "ONE")
562 b = copy.deepcopy(a)
563 b.text = "ANOTHER"
564
565 self.assertEqual('ONE', a.text)
566 self.assertEqual('ANOTHER', b.text)
567
583
598
609
621
640
645
658
669
670 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
671 events = list(iterparse(f, events=('end', 'comment')))
672 root = events[-1][1]
673 self.assertEqual(6, len(events))
674 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
675 [ name(*item) for item in events ])
676 self.assertEqual(
677 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
678 tostring(root))
679
691
692 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
693 events = list(iterparse(f, events=('end', 'pi')))
694 root = events[-2][1]
695 self.assertEqual(8, len(events))
696 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
697 ('pid','d'), 'a', ('pie','e')],
698 [ name(*item) for item in events ])
699 self.assertEqual(
700 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
701 tostring(ElementTree(root)))
702
717
723
725 iterparse = self.etree.iterparse
726 f = BytesIO('<a><b><c/></a>')
727 it = iterparse(f, events=('start', 'end'), recover=True)
728 events = [(ev, el.tag) for ev, el in it]
729 root = it.root
730 self.assertTrue(root is not None)
731
732 self.assertEqual(1, events.count(('start', 'a')))
733 self.assertEqual(1, events.count(('end', 'a')))
734
735 self.assertEqual(1, events.count(('start', 'b')))
736 self.assertEqual(1, events.count(('end', 'b')))
737
738 self.assertEqual(1, events.count(('start', 'c')))
739 self.assertEqual(1, events.count(('end', 'c')))
740
742 iterparse = self.etree.iterparse
743 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
744 it = iterparse(f, events=('start', 'end'), recover=True)
745 events = [(ev, el.tag) for ev, el in it]
746 root = it.root
747 self.assertTrue(root is not None)
748
749 self.assertEqual(1, events.count(('start', 'a')))
750 self.assertEqual(1, events.count(('end', 'a')))
751
752 self.assertEqual(2, events.count(('start', 'b')))
753 self.assertEqual(2, events.count(('end', 'b')))
754
755 self.assertEqual(2, events.count(('start', 'c')))
756 self.assertEqual(2, events.count(('end', 'c')))
757
759 iterparse = self.etree.iterparse
760 f = BytesIO("""
761 <a> \n \n <b> b test </b> \n
762
763 \n\t <c> \n </c> </a> \n """)
764 iterator = iterparse(f, remove_blank_text=True)
765 text = [ (element.text, element.tail)
766 for event, element in iterator ]
767 self.assertEqual(
768 [(" b test ", None), (" \n ", None), (None, None)],
769 text)
770
772 iterparse = self.etree.iterparse
773 f = BytesIO('<a><b><d/></b><c/></a>')
774
775 iterator = iterparse(f, tag="b", events=('start', 'end'))
776 events = list(iterator)
777 root = iterator.root
778 self.assertEqual(
779 [('start', root[0]), ('end', root[0])],
780 events)
781
783 iterparse = self.etree.iterparse
784 f = BytesIO('<a><b><d/></b><c/></a>')
785
786 iterator = iterparse(f, tag="*", events=('start', 'end'))
787 events = list(iterator)
788 self.assertEqual(
789 8,
790 len(events))
791
793 iterparse = self.etree.iterparse
794 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
795
796 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
797 events = list(iterator)
798 root = iterator.root
799 self.assertEqual(
800 [('start', root[0]), ('end', root[0])],
801 events)
802
804 iterparse = self.etree.iterparse
805 f = BytesIO('<a><b><d/></b><c/></a>')
806 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
807 events = list(iterator)
808 root = iterator.root
809 self.assertEqual(
810 [('start', root[0]), ('end', root[0])],
811 events)
812
813 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
814 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
815 events = list(iterator)
816 root = iterator.root
817 self.assertEqual([], events)
818
820 iterparse = self.etree.iterparse
821 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
822 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
823 events = list(iterator)
824 self.assertEqual(8, len(events))
825
827 iterparse = self.etree.iterparse
828 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
829 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
830 events = list(iterator)
831 self.assertEqual([], events)
832
833 f = BytesIO('<a><b><d/></b><c/></a>')
834 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
835 events = list(iterator)
836 self.assertEqual(8, len(events))
837
839 text = _str('Søk på nettet')
840 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
841 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
842 ).encode('iso-8859-1')
843
844 self.assertRaises(self.etree.ParseError,
845 list, self.etree.iterparse(BytesIO(xml_latin1)))
846
848 text = _str('Søk på nettet', encoding="UTF-8")
849 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
850 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
851 ).encode('iso-8859-1')
852
853 iterator = self.etree.iterparse(BytesIO(xml_latin1),
854 encoding="iso-8859-1")
855 self.assertEqual(1, len(list(iterator)))
856
857 a = iterator.root
858 self.assertEqual(a.text, text)
859
861 tostring = self.etree.tostring
862 f = BytesIO('<root><![CDATA[test]]></root>')
863 context = self.etree.iterparse(f, strip_cdata=False)
864 content = [ el.text for event,el in context ]
865
866 self.assertEqual(['test'], content)
867 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
868 tostring(context.root))
869
873
878
897
898
899
922
923
924
926 assertEqual = self.assertEqual
927 assertFalse = self.assertFalse
928
929 events = []
930 class Target(object):
931 def start(self, tag, attrib):
932 events.append("start")
933 assertFalse(attrib)
934 assertEqual("TAG", tag)
935 def end(self, tag):
936 events.append("end")
937 assertEqual("TAG", tag)
938 def close(self):
939 return "DONE"
940
941 parser = self.etree.XMLParser(target=Target())
942 tree = self.etree.ElementTree()
943
944 self.assertRaises(TypeError,
945 tree.parse, BytesIO("<TAG/>"), parser=parser)
946 self.assertEqual(["start", "end"], events)
947
949
950 events = []
951 class Target(object):
952 def start(self, tag, attrib):
953 events.append("start-" + tag)
954 def end(self, tag):
955 events.append("end-" + tag)
956 if tag == 'a':
957 raise ValueError("dead and gone")
958 def data(self, data):
959 events.append("data-" + data)
960 def close(self):
961 events.append("close")
962 return "DONE"
963
964 parser = self.etree.XMLParser(target=Target())
965
966 try:
967 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
968 done = parser.close()
969 self.fail("error expected, but parsing succeeded")
970 except ValueError:
971 done = 'value error received as expected'
972
973 self.assertEqual(["start-root", "data-A", "start-a",
974 "data-ca", "end-a", "close"],
975 events)
976
978
979 events = []
980 class Target(object):
981 def start(self, tag, attrib):
982 events.append("start-" + tag)
983 def end(self, tag):
984 events.append("end-" + tag)
985 if tag == 'a':
986 raise ValueError("dead and gone")
987 def data(self, data):
988 events.append("data-" + data)
989 def close(self):
990 events.append("close")
991 return "DONE"
992
993 parser = self.etree.XMLParser(target=Target())
994
995 try:
996 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
997 parser=parser)
998 self.fail("error expected, but parsing succeeded")
999 except ValueError:
1000 done = 'value error received as expected'
1001
1002 self.assertEqual(["start-root", "data-A", "start-a",
1003 "data-ca", "end-a", "close"],
1004 events)
1005
1007
1008 events = []
1009 class Target(object):
1010 def start(self, tag, attrib):
1011 events.append("start-" + tag)
1012 def end(self, tag):
1013 events.append("end-" + tag)
1014 def data(self, data):
1015 events.append("data-" + data)
1016 def comment(self, text):
1017 events.append("comment-" + text)
1018 def close(self):
1019 return "DONE"
1020
1021 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1022
1023 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1024 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1025 done = parser.close()
1026
1027 self.assertEqual("DONE", done)
1028 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1029 "start-sub", "end-sub", "data-B", "end-root"],
1030 events)
1031
1037 def end(self, tag):
1038 events.append("end-" + tag)
1039 def data(self, data):
1040 events.append("data-" + data)
1041 def comment(self, text):
1042 events.append("comment-" + text)
1043 def close(self):
1044 return "DONE"
1045
1046 parser = self.etree.XMLParser(target=Target())
1047
1048 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1049 done = parser.close()
1050
1051 self.assertEqual("DONE", done)
1052 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1053 "start-sub", "end-sub", "comment-c", "data-B",
1054 "end-root", "comment-d"],
1055 events)
1056
1058 events = []
1059 class Target(object):
1060 def start(self, tag, attrib):
1061 events.append("start-" + tag)
1062 def end(self, tag):
1063 events.append("end-" + tag)
1064 def data(self, data):
1065 events.append("data-" + data)
1066 def pi(self, target, data):
1067 events.append("pi-" + target + "-" + data)
1068 def close(self):
1069 return "DONE"
1070
1071 parser = self.etree.XMLParser(target=Target())
1072
1073 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1074 done = parser.close()
1075
1076 self.assertEqual("DONE", done)
1077 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1078 "data-B", "end-root", "pi-test-c"],
1079 events)
1080
1082 events = []
1083 class Target(object):
1084 def start(self, tag, attrib):
1085 events.append("start-" + tag)
1086 def end(self, tag):
1087 events.append("end-" + tag)
1088 def data(self, data):
1089 events.append("data-" + data)
1090 def close(self):
1091 return "DONE"
1092
1093 parser = self.etree.XMLParser(target=Target(),
1094 strip_cdata=False)
1095
1096 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1097 done = parser.close()
1098
1099 self.assertEqual("DONE", done)
1100 self.assertEqual(["start-root", "data-A", "start-a",
1101 "data-ca", "end-a", "data-B", "end-root"],
1102 events)
1103
1105 events = []
1106 class Target(object):
1107 def start(self, tag, attrib):
1108 events.append("start-" + tag)
1109 def end(self, tag):
1110 events.append("end-" + tag)
1111 def data(self, data):
1112 events.append("data-" + data)
1113 def close(self):
1114 events.append("close")
1115 return "DONE"
1116
1117 parser = self.etree.XMLParser(target=Target(),
1118 recover=True)
1119
1120 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1121 done = parser.close()
1122
1123 self.assertEqual("DONE", done)
1124 self.assertEqual(["start-root", "data-A", "start-a",
1125 "data-ca", "end-a", "data-B",
1126 "end-root", "close"],
1127 events)
1128
1138
1148
1157
1167
1169 iterwalk = self.etree.iterwalk
1170 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1171
1172 iterator = iterwalk(root, events=('start','end'))
1173 events = list(iterator)
1174 self.assertEqual(
1175 [('start', root), ('start', root[0]), ('end', root[0]),
1176 ('start', root[1]), ('end', root[1]), ('end', root)],
1177 events)
1178
1189
1191 iterwalk = self.etree.iterwalk
1192 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1193
1194 attr_name = '{testns}bla'
1195 events = []
1196 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1197 for event, elem in iterator:
1198 events.append(event)
1199 if event == 'start':
1200 if elem.tag != '{ns1}a':
1201 elem.set(attr_name, 'value')
1202
1203 self.assertEqual(
1204 ['start-ns', 'start', 'start', 'start-ns', 'start',
1205 'end', 'end-ns', 'end', 'end', 'end-ns'],
1206 events)
1207
1208 self.assertEqual(
1209 None,
1210 root.get(attr_name))
1211 self.assertEqual(
1212 'value',
1213 root[0].get(attr_name))
1214
1225
1227 parse = self.etree.parse
1228 parser = self.etree.XMLParser(dtd_validation=True)
1229 assertEqual = self.assertEqual
1230 test_url = _str("__nosuch.dtd")
1231
1232 class MyResolver(self.etree.Resolver):
1233 def resolve(self, url, id, context):
1234 assertEqual(url, test_url)
1235 return self.resolve_string(
1236 _str('''<!ENTITY myentity "%s">
1237 <!ELEMENT doc ANY>''') % url, context)
1238
1239 parser.resolvers.add(MyResolver())
1240
1241 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1242 tree = parse(StringIO(xml), parser)
1243 root = tree.getroot()
1244 self.assertEqual(root.text, test_url)
1245
1247 parse = self.etree.parse
1248 parser = self.etree.XMLParser(dtd_validation=True)
1249 assertEqual = self.assertEqual
1250 test_url = _str("__nosuch.dtd")
1251
1252 class MyResolver(self.etree.Resolver):
1253 def resolve(self, url, id, context):
1254 assertEqual(url, test_url)
1255 return self.resolve_string(
1256 (_str('''<!ENTITY myentity "%s">
1257 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1258 context)
1259
1260 parser.resolvers.add(MyResolver())
1261
1262 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1263 tree = parse(StringIO(xml), parser)
1264 root = tree.getroot()
1265 self.assertEqual(root.text, test_url)
1266
1268 parse = self.etree.parse
1269 parser = self.etree.XMLParser(dtd_validation=True)
1270 assertEqual = self.assertEqual
1271 test_url = _str("__nosuch.dtd")
1272
1273 class MyResolver(self.etree.Resolver):
1274 def resolve(self, url, id, context):
1275 assertEqual(url, test_url)
1276 return self.resolve_file(
1277 SillyFileLike(
1278 _str('''<!ENTITY myentity "%s">
1279 <!ELEMENT doc ANY>''') % url), context)
1280
1281 parser.resolvers.add(MyResolver())
1282
1283 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1284 tree = parse(StringIO(xml), parser)
1285 root = tree.getroot()
1286 self.assertEqual(root.text, test_url)
1287
1289 parse = self.etree.parse
1290 parser = self.etree.XMLParser(attribute_defaults=True)
1291 assertEqual = self.assertEqual
1292 test_url = _str("__nosuch.dtd")
1293
1294 class MyResolver(self.etree.Resolver):
1295 def resolve(self, url, id, context):
1296 assertEqual(url, test_url)
1297 return self.resolve_filename(
1298 fileInTestDir('test.dtd'), context)
1299
1300 parser.resolvers.add(MyResolver())
1301
1302 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1303 tree = parse(StringIO(xml), parser)
1304 root = tree.getroot()
1305 self.assertEqual(
1306 root.attrib, {'default': 'valueA'})
1307 self.assertEqual(
1308 root[0].attrib, {'default': 'valueB'})
1309
1324
1325 parser.resolvers.add(MyResolver())
1326
1327 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1328 tree = parse(StringIO(xml), parser,
1329 base_url=fileUrlInTestDir('__test.xml'))
1330 root = tree.getroot()
1331 self.assertEqual(
1332 root.attrib, {'default': 'valueA'})
1333 self.assertEqual(
1334 root[0].attrib, {'default': 'valueB'})
1335
1337 parse = self.etree.parse
1338 parser = self.etree.XMLParser(attribute_defaults=True)
1339 assertEqual = self.assertEqual
1340 test_url = _str("__nosuch.dtd")
1341
1342 class MyResolver(self.etree.Resolver):
1343 def resolve(self, url, id, context):
1344 assertEqual(url, test_url)
1345 return self.resolve_file(
1346 open(fileInTestDir('test.dtd'), 'rb'), context)
1347
1348 parser.resolvers.add(MyResolver())
1349
1350 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1351 tree = parse(StringIO(xml), parser)
1352 root = tree.getroot()
1353 self.assertEqual(
1354 root.attrib, {'default': 'valueA'})
1355 self.assertEqual(
1356 root[0].attrib, {'default': 'valueB'})
1357
1359 parse = self.etree.parse
1360 parser = self.etree.XMLParser(load_dtd=True)
1361 assertEqual = self.assertEqual
1362 test_url = _str("__nosuch.dtd")
1363
1364 class check(object):
1365 resolved = False
1366
1367 class MyResolver(self.etree.Resolver):
1368 def resolve(self, url, id, context):
1369 assertEqual(url, test_url)
1370 check.resolved = True
1371 return self.resolve_empty(context)
1372
1373 parser.resolvers.add(MyResolver())
1374
1375 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1376 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1377 self.assertTrue(check.resolved)
1378
1385
1386 class MyResolver(self.etree.Resolver):
1387 def resolve(self, url, id, context):
1388 raise _LocalException
1389
1390 parser.resolvers.add(MyResolver())
1391
1392 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1393 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1394
1395 if etree.LIBXML_VERSION > (2,6,20):
1412
1414 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1415 <root>
1416 <child1/>
1417 <child2/>
1418 <child3> </child3>
1419 </root>''')
1420
1421 parser = self.etree.XMLParser(resolve_entities=False)
1422 root = etree.fromstring(xml, parser)
1423 self.assertEqual([ el.tag for el in root ],
1424 ['child1', 'child2', 'child3'])
1425
1426 root[0] = root[-1]
1427 self.assertEqual([ el.tag for el in root ],
1428 ['child3', 'child2'])
1429 self.assertEqual(root[0][0].text, ' ')
1430 self.assertEqual(root[0][0].name, 'nbsp')
1431
1447
1454
1456 Entity = self.etree.Entity
1457 self.assertRaises(ValueError, Entity, 'a b c')
1458 self.assertRaises(ValueError, Entity, 'a,b')
1459 self.assertRaises(ValueError, Entity, 'a\0b')
1460 self.assertRaises(ValueError, Entity, '#abc')
1461 self.assertRaises(ValueError, Entity, '#xxyz')
1462
1475
1496
1509
1521
1530
1539
1540
1550
1559
1561 Element = self.etree.Element
1562 SubElement = self.etree.SubElement
1563 root = Element('root')
1564 self.assertRaises(ValueError, root.append, root)
1565 child = SubElement(root, 'child')
1566 self.assertRaises(ValueError, child.append, root)
1567 child2 = SubElement(child, 'child2')
1568 self.assertRaises(ValueError, child2.append, root)
1569 self.assertRaises(ValueError, child2.append, child)
1570 self.assertEqual('child2', root[0][0].tag)
1571
1584
1597
1608
1619
1629
1639
1655
1671
1677
1692
1705
1720
1733
1748
1761
1776
1789
1790
1798
1799
1809
1810
1825
1826
1836
1837
1848
1875
1876
1878 self.assertRaises(TypeError, self.etree.dump, None)
1879
1892
1905
1926
1935
1937 XML = self.etree.XML
1938
1939 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
1940 result = []
1941 for el in root.iterchildren(reversed=True):
1942 result.append(el.tag)
1943 self.assertEqual(['three', 'two', 'one'], result)
1944
1953
1962
1971
1973 XML = self.etree.XML
1974
1975 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1976 result = []
1977 for el in root.iterchildren(tag=['two', 'three']):
1978 result.append(el.text)
1979 self.assertEqual(['Two', 'Bla', None], result)
1980
1982 XML = self.etree.XML
1983
1984 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1985 result = []
1986 for el in root.iterchildren('two', 'three'):
1987 result.append(el.text)
1988 self.assertEqual(['Two', 'Bla', None], result)
1989
1991 XML = self.etree.XML
1992
1993 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1994 result = []
1995 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
1996 result.append(el.text)
1997 self.assertEqual([None, 'Bla', 'Two'], result)
1998
2019
2041
2043 Element = self.etree.Element
2044 SubElement = self.etree.SubElement
2045
2046 a = Element('a')
2047 b = SubElement(a, 'b')
2048 c = SubElement(a, 'c')
2049 d = SubElement(b, 'd')
2050 self.assertEqual(
2051 [b, a],
2052 list(d.iterancestors(tag=('a', 'b'))))
2053 self.assertEqual(
2054 [b, a],
2055 list(d.iterancestors('a', 'b')))
2056
2057 self.assertEqual(
2058 [],
2059 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2060 self.assertEqual(
2061 [],
2062 list(d.iterancestors('w', 'x', 'y', 'z')))
2063
2064 self.assertEqual(
2065 [],
2066 list(d.iterancestors(tag=('d', 'x'))))
2067 self.assertEqual(
2068 [],
2069 list(d.iterancestors('d', 'x')))
2070
2071 self.assertEqual(
2072 [b, a],
2073 list(d.iterancestors(tag=('b', '*'))))
2074 self.assertEqual(
2075 [b, a],
2076 list(d.iterancestors('b', '*')))
2077
2078 self.assertEqual(
2079 [b],
2080 list(d.iterancestors(tag=('b', 'c'))))
2081 self.assertEqual(
2082 [b],
2083 list(d.iterancestors('b', 'c')))
2084
2101
2103 Element = self.etree.Element
2104 SubElement = self.etree.SubElement
2105
2106 a = Element('a')
2107 b = SubElement(a, 'b')
2108 c = SubElement(a, 'c')
2109 d = SubElement(b, 'd')
2110 e = SubElement(c, 'e')
2111
2112 self.assertEqual(
2113 [],
2114 list(a.iterdescendants('a')))
2115 self.assertEqual(
2116 [],
2117 list(a.iterdescendants(tag='a')))
2118
2119 a2 = SubElement(e, 'a')
2120 self.assertEqual(
2121 [a2],
2122 list(a.iterdescendants('a')))
2123
2124 self.assertEqual(
2125 [a2],
2126 list(c.iterdescendants('a')))
2127 self.assertEqual(
2128 [a2],
2129 list(c.iterdescendants(tag='a')))
2130
2132 Element = self.etree.Element
2133 SubElement = self.etree.SubElement
2134
2135 a = Element('a')
2136 b = SubElement(a, 'b')
2137 c = SubElement(a, 'c')
2138 d = SubElement(b, 'd')
2139 e = SubElement(c, 'e')
2140
2141 self.assertEqual(
2142 [b, e],
2143 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2144 self.assertEqual(
2145 [b, e],
2146 list(a.iterdescendants('a', 'b', 'e')))
2147
2148 a2 = SubElement(e, 'a')
2149 self.assertEqual(
2150 [b, a2],
2151 list(a.iterdescendants(tag=('a', 'b'))))
2152 self.assertEqual(
2153 [b, a2],
2154 list(a.iterdescendants('a', 'b')))
2155
2156 self.assertEqual(
2157 [],
2158 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2159 self.assertEqual(
2160 [],
2161 list(c.iterdescendants('x', 'y', 'z')))
2162
2163 self.assertEqual(
2164 [b, d, c, e, a2],
2165 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2166 self.assertEqual(
2167 [b, d, c, e, a2],
2168 list(a.iterdescendants('x', 'y', 'z', '*')))
2169
2187
2204
2222
2246
2248 Element = self.etree.Element
2249 SubElement = self.etree.SubElement
2250
2251 a = Element('a')
2252 b = SubElement(a, 'b')
2253 c = SubElement(a, 'c')
2254 d = SubElement(b, 'd')
2255 self.assertEqual(
2256 [],
2257 list(a.itersiblings(tag='XXX')))
2258 self.assertEqual(
2259 [c],
2260 list(b.itersiblings(tag='c')))
2261 self.assertEqual(
2262 [c],
2263 list(b.itersiblings(tag='*')))
2264 self.assertEqual(
2265 [b],
2266 list(c.itersiblings(preceding=True, tag='b')))
2267 self.assertEqual(
2268 [],
2269 list(c.itersiblings(preceding=True, tag='c')))
2270
2272 Element = self.etree.Element
2273 SubElement = self.etree.SubElement
2274
2275 a = Element('a')
2276 b = SubElement(a, 'b')
2277 c = SubElement(a, 'c')
2278 d = SubElement(b, 'd')
2279 e = SubElement(a, 'e')
2280 self.assertEqual(
2281 [],
2282 list(a.itersiblings(tag=('XXX', 'YYY'))))
2283 self.assertEqual(
2284 [c, e],
2285 list(b.itersiblings(tag=('c', 'd', 'e'))))
2286 self.assertEqual(
2287 [b],
2288 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2289 self.assertEqual(
2290 [c, b],
2291 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2292
2294 parseid = self.etree.parseid
2295 XML = self.etree.XML
2296 xml_text = _bytes('''
2297 <!DOCTYPE document [
2298 <!ELEMENT document (h1,p)*>
2299 <!ELEMENT h1 (#PCDATA)>
2300 <!ATTLIST h1 myid ID #REQUIRED>
2301 <!ELEMENT p (#PCDATA)>
2302 <!ATTLIST p someid ID #REQUIRED>
2303 ]>
2304 <document>
2305 <h1 myid="chapter1">...</h1>
2306 <p id="note1" class="note">...</p>
2307 <p>Regular paragraph.</p>
2308 <p xml:id="xmlid">XML:ID paragraph.</p>
2309 <p someid="warn1" class="warning">...</p>
2310 </document>
2311 ''')
2312
2313 tree, dic = parseid(BytesIO(xml_text))
2314 root = tree.getroot()
2315 root2 = XML(xml_text)
2316 self.assertEqual(self._writeElement(root),
2317 self._writeElement(root2))
2318 expected = {
2319 "chapter1" : root[0],
2320 "xmlid" : root[3],
2321 "warn1" : root[4]
2322 }
2323 self.assertTrue("chapter1" in dic)
2324 self.assertTrue("warn1" in dic)
2325 self.assertTrue("xmlid" in dic)
2326 self._checkIDDict(dic, expected)
2327
2329 XMLDTDID = self.etree.XMLDTDID
2330 XML = self.etree.XML
2331 xml_text = _bytes('''
2332 <!DOCTYPE document [
2333 <!ELEMENT document (h1,p)*>
2334 <!ELEMENT h1 (#PCDATA)>
2335 <!ATTLIST h1 myid ID #REQUIRED>
2336 <!ELEMENT p (#PCDATA)>
2337 <!ATTLIST p someid ID #REQUIRED>
2338 ]>
2339 <document>
2340 <h1 myid="chapter1">...</h1>
2341 <p id="note1" class="note">...</p>
2342 <p>Regular paragraph.</p>
2343 <p xml:id="xmlid">XML:ID paragraph.</p>
2344 <p someid="warn1" class="warning">...</p>
2345 </document>
2346 ''')
2347
2348 root, dic = XMLDTDID(xml_text)
2349 root2 = XML(xml_text)
2350 self.assertEqual(self._writeElement(root),
2351 self._writeElement(root2))
2352 expected = {
2353 "chapter1" : root[0],
2354 "xmlid" : root[3],
2355 "warn1" : root[4]
2356 }
2357 self.assertTrue("chapter1" in dic)
2358 self.assertTrue("warn1" in dic)
2359 self.assertTrue("xmlid" in dic)
2360 self._checkIDDict(dic, expected)
2361
2363 XMLDTDID = self.etree.XMLDTDID
2364 XML = self.etree.XML
2365 xml_text = _bytes('''
2366 <document>
2367 <h1 myid="chapter1">...</h1>
2368 <p id="note1" class="note">...</p>
2369 <p>Regular paragraph.</p>
2370 <p someid="warn1" class="warning">...</p>
2371 </document>
2372 ''')
2373
2374 root, dic = XMLDTDID(xml_text)
2375 root2 = XML(xml_text)
2376 self.assertEqual(self._writeElement(root),
2377 self._writeElement(root2))
2378 expected = {}
2379 self._checkIDDict(dic, expected)
2380
2382 XMLDTDID = self.etree.XMLDTDID
2383 XML = self.etree.XML
2384 xml_text = _bytes('''
2385 <!DOCTYPE document [
2386 <!ELEMENT document (h1,p)*>
2387 <!ELEMENT h1 (#PCDATA)>
2388 <!ATTLIST h1 myid ID #REQUIRED>
2389 <!ELEMENT p (#PCDATA)>
2390 <!ATTLIST p someid ID #REQUIRED>
2391 ]>
2392 <document>
2393 <h1 myid="chapter1">...</h1>
2394 <p id="note1" class="note">...</p>
2395 <p>Regular paragraph.</p>
2396 <p xml:id="xmlid">XML:ID paragraph.</p>
2397 <p someid="warn1" class="warning">...</p>
2398 </document>
2399 ''')
2400
2401 parser = etree.XMLParser(collect_ids=False)
2402 root, dic = XMLDTDID(xml_text, parser=parser)
2403 root2 = XML(xml_text)
2404 self.assertEqual(self._writeElement(root),
2405 self._writeElement(root2))
2406 self.assertFalse(dic)
2407 self._checkIDDict(dic, {})
2408
2410 self.assertEqual(len(dic),
2411 len(expected))
2412 self.assertEqual(sorted(dic.items()),
2413 sorted(expected.items()))
2414 if sys.version_info < (3,):
2415 self.assertEqual(sorted(dic.iteritems()),
2416 sorted(expected.iteritems()))
2417 self.assertEqual(sorted(dic.keys()),
2418 sorted(expected.keys()))
2419 if sys.version_info < (3,):
2420 self.assertEqual(sorted(dic.iterkeys()),
2421 sorted(expected.iterkeys()))
2422 if sys.version_info < (3,):
2423 self.assertEqual(sorted(dic.values()),
2424 sorted(expected.values()))
2425 self.assertEqual(sorted(dic.itervalues()),
2426 sorted(expected.itervalues()))
2427
2429 etree = self.etree
2430
2431 r = {'foo': 'http://ns.infrae.com/foo'}
2432 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2433 self.assertEqual(
2434 'foo',
2435 e.prefix)
2436 self.assertEqual(
2437 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2438 self._writeElement(e))
2439
2441 etree = self.etree
2442
2443 r = {None: 'http://ns.infrae.com/foo'}
2444 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2445 self.assertEqual(
2446 None,
2447 e.prefix)
2448 self.assertEqual(
2449 '{http://ns.infrae.com/foo}bar',
2450 e.tag)
2451 self.assertEqual(
2452 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2453 self._writeElement(e))
2454
2456 etree = self.etree
2457
2458 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2459 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2460 self.assertEqual(None, e.prefix)
2461 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2462 self.assertEqual(
2463 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2464 self._writeElement(e))
2465
2467 etree = self.etree
2468
2469 r = {None: 'http://ns.infrae.com/foo',
2470 'hoi': 'http://ns.infrae.com/hoi'}
2471 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2472 e.set('{http://ns.infrae.com/hoi}test', 'value')
2473 self.assertEqual(
2474 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2475 self._writeElement(e))
2476
2478 etree = self.etree
2479
2480 root = etree.Element('{http://test/ns}root',
2481 nsmap={None: 'http://test/ns'})
2482 sub = etree.Element('{http://test/ns}sub',
2483 nsmap={'test': 'http://test/ns'})
2484
2485 sub.attrib['{http://test/ns}attr'] = 'value'
2486 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2487 self.assertEqual(
2488 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2489 etree.tostring(sub))
2490
2491 root.append(sub)
2492 self.assertEqual(
2493 _bytes('<root xmlns="http://test/ns">'
2494 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2495 '</root>'),
2496 etree.tostring(root))
2497
2499 etree = self.etree
2500
2501 root = etree.Element('root')
2502 sub = etree.Element('{http://test/ns}sub',
2503 nsmap={'test': 'http://test/ns'})
2504
2505 sub.attrib['{http://test/ns}attr'] = 'value'
2506 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2507 self.assertEqual(
2508 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2509 etree.tostring(sub))
2510
2511 root.append(sub)
2512 self.assertEqual(
2513 _bytes('<root>'
2514 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2515 '</root>'),
2516 etree.tostring(root))
2517
2519 etree = self.etree
2520
2521 root = etree.Element('root')
2522 sub = etree.Element('{http://test/ns}sub',
2523 nsmap={None: 'http://test/ns'})
2524
2525 sub.attrib['{http://test/ns}attr'] = 'value'
2526 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2527 self.assertEqual(
2528 _bytes('<sub xmlns="http://test/ns" '
2529 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2530 etree.tostring(sub))
2531
2532 root.append(sub)
2533 self.assertEqual(
2534 _bytes('<root>'
2535 '<sub xmlns="http://test/ns"'
2536 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2537 '</root>'),
2538 etree.tostring(root))
2539
2541 etree = self.etree
2542
2543 root = etree.Element('{http://test/ns}root',
2544 nsmap={'test': 'http://test/ns',
2545 None: 'http://test/ns'})
2546 sub = etree.Element('{http://test/ns}sub',
2547 nsmap={None: 'http://test/ns'})
2548
2549 sub.attrib['{http://test/ns}attr'] = 'value'
2550 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2551 self.assertEqual(
2552 _bytes('<sub xmlns="http://test/ns" '
2553 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2554 etree.tostring(sub))
2555
2556 root.append(sub)
2557 self.assertEqual(
2558 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2559 '<test:sub test:attr="value"/>'
2560 '</test:root>'),
2561 etree.tostring(root))
2562
2564 etree = self.etree
2565 r = {None: 'http://ns.infrae.com/foo',
2566 'hoi': 'http://ns.infrae.com/hoi'}
2567 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2568 tree = etree.ElementTree(element=e)
2569 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2570 self.assertEqual(
2571 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2572 self._writeElement(e))
2573
2575 etree = self.etree
2576
2577 r = {None: 'http://ns.infrae.com/foo'}
2578 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2579 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2580
2581 e1.append(e2)
2582
2583 self.assertEqual(
2584 None,
2585 e1.prefix)
2586 self.assertEqual(
2587 None,
2588 e1[0].prefix)
2589 self.assertEqual(
2590 '{http://ns.infrae.com/foo}bar',
2591 e1.tag)
2592 self.assertEqual(
2593 '{http://ns.infrae.com/foo}bar',
2594 e1[0].tag)
2595
2597 etree = self.etree
2598
2599 r = {None: 'http://ns.infrae.com/BAR'}
2600 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2601 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2602
2603 e1.append(e2)
2604
2605 self.assertEqual(
2606 None,
2607 e1.prefix)
2608 self.assertNotEqual(
2609 None,
2610 e2.prefix)
2611 self.assertEqual(
2612 '{http://ns.infrae.com/BAR}bar',
2613 e1.tag)
2614 self.assertEqual(
2615 '{http://ns.infrae.com/foo}bar',
2616 e2.tag)
2617
2619 ns_href = "http://a.b.c"
2620 one = self.etree.fromstring(
2621 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2622 baz = one[0][0]
2623
2624 two = self.etree.fromstring(
2625 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2626 two.append(baz)
2627 del one
2628
2629 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2630 self.assertEqual(
2631 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2632 self.etree.tostring(two))
2633
2647
2664
2675
2677 xml = ('<root>' +
2678 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2679 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2680 root = self.etree.fromstring(xml)
2681 self.assertEqual(xml, self.etree.tostring(root))
2682 self.etree.cleanup_namespaces(root)
2683 self.assertEqual(
2684 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2685 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2686 self.etree.tostring(root))
2687
2689 xml = ('<root>' +
2690 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2691 '<n64:x xmlns:a="A" a:attr="X"/>' +
2692 '</a>'*100 +
2693 '</root>').encode('utf8')
2694 root = self.etree.fromstring(xml)
2695 self.assertEqual(xml, self.etree.tostring(root))
2696 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2697 self.assertEqual(
2698 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2699 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2700 self.etree.tostring(root))
2701
2703 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2704 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2705 '<foo>foo:bar</foo>'
2706 '</root>').encode('utf8')
2707 root = self.etree.fromstring(xml)
2708 self.assertEqual(xml, self.etree.tostring(root))
2709 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2710 self.assertEqual(
2711 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2712 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2713 b'<foo>foo:bar</foo>'
2714 b'</root>',
2715 self.etree.tostring(root))
2716
2718 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2719 '<sub xmlns:foo="FOO">'
2720 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2721 '<foo>foo:bar</foo>'
2722 '</sub>'
2723 '</root>').encode('utf8')
2724 root = self.etree.fromstring(xml)
2725 self.assertEqual(xml, self.etree.tostring(root))
2726 self.etree.cleanup_namespaces(
2727 root,
2728 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2729 keep_ns_prefixes=['foo'])
2730 self.assertEqual(
2731 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2732 b'<sub>'
2733 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2734 b'<foo>foo:bar</foo>'
2735 b'</sub>'
2736 b'</root>',
2737 self.etree.tostring(root))
2738
2740 etree = self.etree
2741
2742 r = {None: 'http://ns.infrae.com/foo',
2743 'hoi': 'http://ns.infrae.com/hoi'}
2744 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2745 self.assertEqual(
2746 r,
2747 e.nsmap)
2748
2750 etree = self.etree
2751
2752 re = {None: 'http://ns.infrae.com/foo',
2753 'hoi': 'http://ns.infrae.com/hoi'}
2754 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2755
2756 rs = {None: 'http://ns.infrae.com/honk',
2757 'top': 'http://ns.infrae.com/top'}
2758 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2759
2760 r = re.copy()
2761 r.update(rs)
2762 self.assertEqual(re, e.nsmap)
2763 self.assertEqual(r, s.nsmap)
2764
2766 etree = self.etree
2767 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2768 self.assertEqual({'hha': None}, el.nsmap)
2769
2771 Element = self.etree.Element
2772 SubElement = self.etree.SubElement
2773
2774 a = Element('a')
2775 b = SubElement(a, 'b')
2776 c = SubElement(a, 'c')
2777 d = SubElement(b, 'd')
2778 e = SubElement(c, 'e')
2779 f = SubElement(c, 'f')
2780
2781 self.assertEqual(
2782 [a, b],
2783 list(a.getiterator('a', 'b')))
2784 self.assertEqual(
2785 [],
2786 list(a.getiterator('x', 'y')))
2787 self.assertEqual(
2788 [a, f],
2789 list(a.getiterator('f', 'a')))
2790 self.assertEqual(
2791 [c, e, f],
2792 list(c.getiterator('c', '*', 'a')))
2793 self.assertEqual(
2794 [],
2795 list(a.getiterator( (), () )))
2796
2798 Element = self.etree.Element
2799 SubElement = self.etree.SubElement
2800
2801 a = Element('a')
2802 b = SubElement(a, 'b')
2803 c = SubElement(a, 'c')
2804 d = SubElement(b, 'd')
2805 e = SubElement(c, 'e')
2806 f = SubElement(c, 'f')
2807
2808 self.assertEqual(
2809 [a, b],
2810 list(a.getiterator( ('a', 'b') )))
2811 self.assertEqual(
2812 [],
2813 list(a.getiterator( ('x', 'y') )))
2814 self.assertEqual(
2815 [a, f],
2816 list(a.getiterator( ('f', 'a') )))
2817 self.assertEqual(
2818 [c, e, f],
2819 list(c.getiterator( ('c', '*', 'a') )))
2820 self.assertEqual(
2821 [],
2822 list(a.getiterator( () )))
2823
2825 Element = self.etree.Element
2826 SubElement = self.etree.SubElement
2827
2828 a = Element('{a}a')
2829 b = SubElement(a, '{a}b')
2830 c = SubElement(a, '{a}c')
2831 d = SubElement(b, '{b}d')
2832 e = SubElement(c, '{a}e')
2833 f = SubElement(c, '{b}f')
2834 g = SubElement(c, 'g')
2835
2836 self.assertEqual(
2837 [a],
2838 list(a.getiterator('{a}a')))
2839 self.assertEqual(
2840 [],
2841 list(a.getiterator('{b}a')))
2842 self.assertEqual(
2843 [],
2844 list(a.getiterator('a')))
2845 self.assertEqual(
2846 [a,b,d,c,e,f,g],
2847 list(a.getiterator('*')))
2848 self.assertEqual(
2849 [f],
2850 list(c.getiterator('{b}*')))
2851 self.assertEqual(
2852 [d, f],
2853 list(a.getiterator('{b}*')))
2854 self.assertEqual(
2855 [g],
2856 list(a.getiterator('g')))
2857 self.assertEqual(
2858 [g],
2859 list(a.getiterator('{}g')))
2860 self.assertEqual(
2861 [g],
2862 list(a.getiterator('{}*')))
2863
2865 Element = self.etree.Element
2866 SubElement = self.etree.SubElement
2867
2868 a = Element('{a}a')
2869 b = SubElement(a, '{nsA}b')
2870 c = SubElement(b, '{nsB}b')
2871 d = SubElement(a, 'b')
2872 e = SubElement(a, '{nsA}e')
2873 f = SubElement(e, '{nsB}e')
2874 g = SubElement(e, 'e')
2875
2876 self.assertEqual(
2877 [b, c, d],
2878 list(a.getiterator('{*}b')))
2879 self.assertEqual(
2880 [e, f, g],
2881 list(a.getiterator('{*}e')))
2882 self.assertEqual(
2883 [a, b, c, d, e, f, g],
2884 list(a.getiterator('{*}*')))
2885
2910
2926
2943
2945 a = etree.Element("a")
2946 b = etree.SubElement(a, "b")
2947 c = etree.SubElement(a, "c")
2948 d1 = etree.SubElement(c, "d")
2949 d2 = etree.SubElement(c, "d")
2950 c.text = d1.text = 'TEXT'
2951
2952 tree = etree.ElementTree(a)
2953 self.assertEqual('.', tree.getelementpath(a))
2954 self.assertEqual('c/d[1]', tree.getelementpath(d1))
2955 self.assertEqual('c/d[2]', tree.getelementpath(d2))
2956
2957 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2958 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2959
2960 tree = etree.ElementTree(c)
2961 self.assertEqual('.', tree.getelementpath(c))
2962 self.assertEqual('d[2]', tree.getelementpath(d2))
2963 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2964
2965 tree = etree.ElementTree(b)
2966 self.assertEqual('.', tree.getelementpath(b))
2967 self.assertRaises(ValueError, tree.getelementpath, a)
2968 self.assertRaises(ValueError, tree.getelementpath, c)
2969 self.assertRaises(ValueError, tree.getelementpath, d2)
2970
2972 a = etree.Element("{http://ns1/}a")
2973 b = etree.SubElement(a, "{http://ns1/}b")
2974 c = etree.SubElement(a, "{http://ns1/}c")
2975 d1 = etree.SubElement(c, "{http://ns1/}d")
2976 d2 = etree.SubElement(c, "{http://ns2/}d")
2977 d3 = etree.SubElement(c, "{http://ns1/}d")
2978
2979 tree = etree.ElementTree(a)
2980 self.assertEqual('.', tree.getelementpath(a))
2981 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
2982 tree.getelementpath(d1))
2983 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
2984 tree.getelementpath(d2))
2985 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
2986 tree.getelementpath(d3))
2987
2988 self.assertEqual(a, tree.find(tree.getelementpath(a)))
2989 self.assertEqual(b, tree.find(tree.getelementpath(b)))
2990 self.assertEqual(c, tree.find(tree.getelementpath(c)))
2991 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2992 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2993 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
2994
2995 tree = etree.ElementTree(c)
2996 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
2997 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
2998 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
2999 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3000 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3001 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3002
3003 tree = etree.ElementTree(b)
3004 self.assertRaises(ValueError, tree.getelementpath, d1)
3005 self.assertRaises(ValueError, tree.getelementpath, d2)
3006
3013
3020
3029
3031 XML = self.etree.XML
3032 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3033 self.assertEqual(len(root.findall(".//{X}b")), 2)
3034 self.assertEqual(len(root.findall(".//{X}*")), 2)
3035 self.assertEqual(len(root.findall(".//b")), 3)
3036
3038 XML = self.etree.XML
3039 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3040 nsmap = {'xx': 'X'}
3041 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3042 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3043 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3044 nsmap = {'xx': 'Y'}
3045 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3046 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3047 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3048
3050 XML = self.etree.XML
3051 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3052 nsmap = {'xx': 'X'}
3053 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3054 nsmap = {'xx': 'X', None: 'Y'}
3055 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3056 nsmap = {'xx': 'X', '': 'Y'}
3057 self.assertRaises(ValueError, root.findall, ".//xx:b", namespaces=nsmap)
3058
3065
3067 etree = self.etree
3068 e = etree.Element('foo')
3069 for i in range(10):
3070 etree.SubElement(e, 'a%s' % i)
3071 for i in range(10):
3072 self.assertEqual(
3073 i,
3074 e.index(e[i]))
3075 self.assertEqual(
3076 3, e.index(e[3], 3))
3077 self.assertRaises(
3078 ValueError, e.index, e[3], 4)
3079 self.assertRaises(
3080 ValueError, e.index, e[3], 0, 2)
3081 self.assertRaises(
3082 ValueError, e.index, e[8], 0, -3)
3083 self.assertRaises(
3084 ValueError, e.index, e[8], -5, -3)
3085 self.assertEqual(
3086 8, e.index(e[8], 0, -1))
3087 self.assertEqual(
3088 8, e.index(e[8], -12, -1))
3089 self.assertEqual(
3090 0, e.index(e[0], -12, -1))
3091
3093 etree = self.etree
3094 e = etree.Element('foo')
3095 for i in range(10):
3096 el = etree.SubElement(e, 'a%s' % i)
3097 el.text = "text%d" % i
3098 el.tail = "tail%d" % i
3099
3100 child0 = e[0]
3101 child1 = e[1]
3102 child2 = e[2]
3103
3104 e.replace(e[0], e[1])
3105 self.assertEqual(
3106 9, len(e))
3107 self.assertEqual(
3108 child1, e[0])
3109 self.assertEqual(
3110 child1.text, "text1")
3111 self.assertEqual(
3112 child1.tail, "tail1")
3113 self.assertEqual(
3114 child0.tail, "tail0")
3115 self.assertEqual(
3116 child2, e[1])
3117
3118 e.replace(e[-1], e[0])
3119 self.assertEqual(
3120 child1, e[-1])
3121 self.assertEqual(
3122 child1.text, "text1")
3123 self.assertEqual(
3124 child1.tail, "tail1")
3125 self.assertEqual(
3126 child2, e[0])
3127
3129 etree = self.etree
3130 e = etree.Element('foo')
3131 for i in range(10):
3132 etree.SubElement(e, 'a%s' % i)
3133
3134 new_element = etree.Element("test")
3135 new_element.text = "TESTTEXT"
3136 new_element.tail = "TESTTAIL"
3137 child1 = e[1]
3138 e.replace(e[0], new_element)
3139 self.assertEqual(
3140 new_element, e[0])
3141 self.assertEqual(
3142 "TESTTEXT",
3143 e[0].text)
3144 self.assertEqual(
3145 "TESTTAIL",
3146 e[0].tail)
3147 self.assertEqual(
3148 child1, e[1])
3149
3165
3183
3201
3219
3221 Element = self.etree.Element
3222 SubElement = self.etree.SubElement
3223 try:
3224 slice
3225 except NameError:
3226 print("slice() not found")
3227 return
3228
3229 a = Element('a')
3230 b = SubElement(a, 'b')
3231 c = SubElement(a, 'c')
3232 d = SubElement(a, 'd')
3233 e = SubElement(a, 'e')
3234
3235 x = Element('x')
3236 y = Element('y')
3237 z = Element('z')
3238
3239 self.assertRaises(
3240 ValueError,
3241 operator.setitem, a, slice(1,None,2), [x, y, z])
3242
3243 self.assertEqual(
3244 [b, c, d, e],
3245 list(a))
3246
3259
3261 XML = self.etree.XML
3262 root = XML(_bytes(
3263 '<?xml version="1.0"?>\n'
3264 '<root>' + '\n' * 65536 +
3265 '<p>' + '\n' * 65536 + '</p>\n' +
3266 '<br/>\n'
3267 '</root>'))
3268
3269 if self.etree.LIBXML_VERSION >= (2, 9):
3270 expected = [2, 131074, 131076]
3271 else:
3272 expected = [2, 65535, 65535]
3273
3274 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3275
3283
3292
3302
3312
3318
3326
3332
3339
3345
3347 etree = self.etree
3348 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3349 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3350 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3351 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3352
3353 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3354
3355 tree = etree.parse(BytesIO(xml))
3356 docinfo = tree.docinfo
3357 self.assertEqual(docinfo.encoding, "ascii")
3358 self.assertEqual(docinfo.xml_version, "1.0")
3359 self.assertEqual(docinfo.public_id, pub_id)
3360 self.assertEqual(docinfo.system_url, sys_id)
3361 self.assertEqual(docinfo.root_name, 'html')
3362 self.assertEqual(docinfo.doctype, doctype_string)
3363
3379
3391
3403
3409
3411 etree = self.etree
3412 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3413 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3414 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3415
3416 xml = _bytes('<!DOCTYPE root>\n<root/>')
3417 tree = etree.parse(BytesIO(xml))
3418 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3419 etree.tostring(tree, doctype=doctype_string))
3420
3422 etree = self.etree
3423 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3424 self.assertEqual(root.base, "http://no/such/url")
3425 self.assertEqual(
3426 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3427 root.base = "https://secret/url"
3428 self.assertEqual(root.base, "https://secret/url")
3429 self.assertEqual(
3430 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3431 "https://secret/url")
3432
3434 etree = self.etree
3435 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3436 self.assertEqual(root.base, "http://no/such/url")
3437 self.assertEqual(
3438 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3439 root.set('{http://www.w3.org/XML/1998/namespace}base',
3440 "https://secret/url")
3441 self.assertEqual(root.base, "https://secret/url")
3442 self.assertEqual(
3443 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3444 "https://secret/url")
3445
3451
3456
3463
3477
3479 Element = self.etree.Element
3480
3481 a = Element('a')
3482 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3483 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3484
3485 self.assertRaises(ValueError, Element, 'ha\0ho')
3486
3488 Element = self.etree.Element
3489
3490 a = Element('a')
3491 self.assertRaises(ValueError, setattr, a, "text",
3492 _str('ha\0ho'))
3493 self.assertRaises(ValueError, setattr, a, "tail",
3494 _str('ha\0ho'))
3495
3496 self.assertRaises(ValueError, Element,
3497 _str('ha\0ho'))
3498
3500 Element = self.etree.Element
3501
3502 a = Element('a')
3503 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3504 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3505
3506 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3507 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3508
3509 self.assertRaises(ValueError, Element, 'ha\x07ho')
3510 self.assertRaises(ValueError, Element, 'ha\x02ho')
3511
3513 Element = self.etree.Element
3514
3515 a = Element('a')
3516 self.assertRaises(ValueError, setattr, a, "text",
3517 _str('ha\x07ho'))
3518 self.assertRaises(ValueError, setattr, a, "text",
3519 _str('ha\x02ho'))
3520
3521 self.assertRaises(ValueError, setattr, a, "tail",
3522 _str('ha\x07ho'))
3523 self.assertRaises(ValueError, setattr, a, "tail",
3524 _str('ha\x02ho'))
3525
3526 self.assertRaises(ValueError, Element,
3527 _str('ha\x07ho'))
3528 self.assertRaises(ValueError, Element,
3529 _str('ha\x02ho'))
3530
3532 Element = self.etree.Element
3533
3534 a = Element('a')
3535 self.assertRaises(ValueError, setattr, a, "text",
3536 _str('ha\u1234\x07ho'))
3537 self.assertRaises(ValueError, setattr, a, "text",
3538 _str('ha\u1234\x02ho'))
3539
3540 self.assertRaises(ValueError, setattr, a, "tail",
3541 _str('ha\u1234\x07ho'))
3542 self.assertRaises(ValueError, setattr, a, "tail",
3543 _str('ha\u1234\x02ho'))
3544
3545 self.assertRaises(ValueError, Element,
3546 _str('ha\u1234\x07ho'))
3547 self.assertRaises(ValueError, Element,
3548 _str('ha\u1234\x02ho'))
3549
3563
3568
3586
3606
3608 tostring = self.etree.tostring
3609 html = self.etree.fromstring(
3610 '<html><body>'
3611 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3612 '</body></html>',
3613 parser=self.etree.HTMLParser())
3614 self.assertEqual(html.tag, 'html')
3615 div = html.find('.//div')
3616 self.assertEqual(div.tail, '\r\n')
3617 result = tostring(div, method='html')
3618 self.assertEqual(
3619 result,
3620 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3621 result = tostring(div, method='html', with_tail=True)
3622 self.assertEqual(
3623 result,
3624 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3625 result = tostring(div, method='html', with_tail=False)
3626 self.assertEqual(
3627 result,
3628 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3629
3651
3653 tostring = self.etree.tostring
3654 XML = self.etree.XML
3655 ElementTree = self.etree.ElementTree
3656
3657 root = XML(_bytes("<root/>"))
3658
3659 tree = ElementTree(root)
3660 self.assertEqual(None, tree.docinfo.standalone)
3661
3662 result = tostring(root, xml_declaration=True, encoding="ASCII")
3663 self.assertEqual(result, _bytes(
3664 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3665
3666 result = tostring(root, xml_declaration=True, encoding="ASCII",
3667 standalone=True)
3668 self.assertEqual(result, _bytes(
3669 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3670
3671 tree = ElementTree(XML(result))
3672 self.assertEqual(True, tree.docinfo.standalone)
3673
3674 result = tostring(root, xml_declaration=True, encoding="ASCII",
3675 standalone=False)
3676 self.assertEqual(result, _bytes(
3677 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3678
3679 tree = ElementTree(XML(result))
3680 self.assertEqual(False, tree.docinfo.standalone)
3681
3701
3703 tostring = self.etree.tostring
3704 Element = self.etree.Element
3705 SubElement = self.etree.SubElement
3706
3707 a = Element('a')
3708 a.text = "A"
3709 a.tail = "tail"
3710 b = SubElement(a, 'b')
3711 b.text = "B"
3712 b.tail = _str("Søk på nettet")
3713 c = SubElement(a, 'c')
3714 c.text = "C"
3715
3716 result = tostring(a, method="text", encoding="UTF-16")
3717
3718 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3719 result)
3720
3722 tostring = self.etree.tostring
3723 Element = self.etree.Element
3724 SubElement = self.etree.SubElement
3725
3726 a = Element('a')
3727 a.text = _str('Søk på nettetA')
3728 a.tail = "tail"
3729 b = SubElement(a, 'b')
3730 b.text = "B"
3731 b.tail = _str('Søk på nettetB')
3732 c = SubElement(a, 'c')
3733 c.text = "C"
3734
3735 self.assertRaises(UnicodeEncodeError,
3736 tostring, a, method="text")
3737
3738 self.assertEqual(
3739 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3740 tostring(a, encoding="UTF-8", method="text"))
3741
3754
3770
3774
3789
3807
3820
3822 tostring = self.etree.tostring
3823 Element = self.etree.Element
3824 SubElement = self.etree.SubElement
3825
3826 a = Element('a')
3827 b = SubElement(a, 'b')
3828 c = SubElement(a, 'c')
3829 d = SubElement(c, 'd')
3830 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3831 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3832 self.assertEqual(_bytes('<b></b>'),
3833 canonicalize(tostring(b, encoding=_unicode)))
3834 self.assertEqual(_bytes('<c><d></d></c>'),
3835 canonicalize(tostring(c, encoding=_unicode)))
3836
3841
3856
3858 tostring = self.etree.tostring
3859 Element = self.etree.Element
3860 SubElement = self.etree.SubElement
3861
3862 a = Element('a')
3863 b = SubElement(a, 'b')
3864 c = SubElement(a, 'c')
3865
3866 result = tostring(a, encoding=_unicode)
3867 self.assertEqual(result, "<a><b/><c/></a>")
3868
3869 result = tostring(a, encoding=_unicode, pretty_print=False)
3870 self.assertEqual(result, "<a><b/><c/></a>")
3871
3872 result = tostring(a, encoding=_unicode, pretty_print=True)
3873 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3874
3886
3888 class SubEl(etree.ElementBase):
3889 pass
3890
3891 el1 = SubEl()
3892 el2 = SubEl()
3893 self.assertEqual('SubEl', el1.tag)
3894 self.assertEqual('SubEl', el2.tag)
3895 el1.other = el2
3896 el2.other = el1
3897
3898 del el1, el2
3899 gc.collect()
3900
3901
3915
3917 root = etree.Element('parent')
3918 c1 = etree.SubElement(root, 'child1')
3919 c2 = etree.SubElement(root, 'child2')
3920
3921 root.remove(c1)
3922 root.remove(c2)
3923 c1.addnext(c2)
3924 c1.tail = 'abc'
3925 c2.tail = 'xyz'
3926 del c1
3927
3928 c2.getprevious()
3929
3930 self.assertEqual('child1', c2.getprevious().tag)
3931 self.assertEqual('abc', c2.getprevious().tail)
3932
3933
3934
3935 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3946
3991
3992 res_instance = res()
3993 parser = etree.XMLParser(load_dtd = True)
3994 parser.resolvers.add(res_instance)
3995
3996 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3997 parser = parser)
3998
3999 self.include(tree)
4000
4001 called = list(res_instance.called.items())
4002 called.sort()
4003 self.assertEqual(
4004 [("dtd", True), ("include", True), ("input", True)],
4005 called)
4006
4008 data = textwrap.dedent('''
4009 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4010 <foo/>
4011 <xi:include href="./test.xml" />
4012 </doc>
4013 ''')
4014
4015 class Resolver(etree.Resolver):
4016 called = {}
4017
4018 def resolve(self, url, id, context):
4019 if url.endswith("test_xinclude.xml"):
4020 assert not self.called.get("input")
4021 self.called["input"] = True
4022 return None
4023 elif url.endswith('/test5.xml'):
4024 assert not self.called.get("DONE")
4025 self.called["DONE"] = True
4026 return self.resolve_string('<DONE/>', context)
4027 else:
4028 _, filename = url.rsplit('/', 1)
4029 assert not self.called.get(filename)
4030 self.called[filename] = True
4031 next_data = data.replace(
4032 'test.xml', 'test%d.xml' % len(self.called))
4033 return self.resolve_string(next_data, context)
4034
4035 res_instance = Resolver()
4036 parser = etree.XMLParser(load_dtd=True)
4037 parser.resolvers.add(res_instance)
4038
4039 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4040 parser=parser)
4041
4042 self.include(tree)
4043
4044 called = list(res_instance.called.items())
4045 called.sort()
4046 self.assertEqual(
4047 [("DONE", True), ("input", True), ("test.xml", True),
4048 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4049 called)
4050
4055
4061
4065 tree = self.parse(_bytes('<a><b/></a>'))
4066 f = BytesIO()
4067 tree.write_c14n(f)
4068 s = f.getvalue()
4069 self.assertEqual(_bytes('<a><b></b></a>'),
4070 s)
4071
4073 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4074 f = BytesIO()
4075 tree.write_c14n(f, compression=9)
4076 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4077 s = gzfile.read()
4078 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4079 s)
4080
4088
4097
4115
4127
4139
4141 tree = self.parse(_bytes(
4142 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4143 f = BytesIO()
4144 tree.write_c14n(f)
4145 s = f.getvalue()
4146 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4147 s)
4148 f = BytesIO()
4149 tree.write_c14n(f, exclusive=False)
4150 s = f.getvalue()
4151 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4152 s)
4153 f = BytesIO()
4154 tree.write_c14n(f, exclusive=True)
4155 s = f.getvalue()
4156 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4157 s)
4158
4159 f = BytesIO()
4160 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4161 s = f.getvalue()
4162 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4163 s)
4164
4166 tree = self.parse(_bytes(
4167 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4168 s = etree.tostring(tree, method='c14n')
4169 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4170 s)
4171 s = etree.tostring(tree, method='c14n', exclusive=False)
4172 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4173 s)
4174 s = etree.tostring(tree, method='c14n', exclusive=True)
4175 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4176 s)
4177
4178 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4179 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4180 s)
4181
4183 tree = self.parse(_bytes(
4184 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4185 s = etree.tostring(tree.getroot(), method='c14n')
4186 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4187 s)
4188 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4189 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4190 s)
4191 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4192 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4193 s)
4194
4195 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4196 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4197 s)
4198 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4199 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4200 s)
4201
4202 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4203 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4204 s)
4205
4207 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4208 tree = self.parse(_bytes(
4209 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4210
4211 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4212 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4213 s)
4214
4218 tree = self.parse(_bytes('<a><b/></a>'))
4219 f = BytesIO()
4220 tree.write(f)
4221 s = f.getvalue()
4222 self.assertEqual(_bytes('<a><b/></a>'),
4223 s)
4224
4226 tree = self.parse(_bytes('<a><b/></a>'))
4227 f = BytesIO()
4228 tree.write(f, doctype='HUHU')
4229 s = f.getvalue()
4230 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
4231 s)
4232
4234 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4235 f = BytesIO()
4236 tree.write(f, compression=9)
4237 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4238 s = gzfile.read()
4239 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4240 s)
4241
4243 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4244 f = BytesIO()
4245 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
4246 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4247 s = gzfile.read()
4248 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
4249 s)
4250
4252 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4253 f = BytesIO()
4254 tree.write(f, compression=0)
4255 s0 = f.getvalue()
4256
4257 f = BytesIO()
4258 tree.write(f)
4259 self.assertEqual(f.getvalue(), s0)
4260
4261 f = BytesIO()
4262 tree.write(f, compression=1)
4263 s = f.getvalue()
4264 self.assertTrue(len(s) <= len(s0))
4265 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4266 s1 = gzfile.read()
4267
4268 f = BytesIO()
4269 tree.write(f, compression=9)
4270 s = f.getvalue()
4271 self.assertTrue(len(s) <= len(s0))
4272 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4273 s9 = gzfile.read()
4274
4275 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4276 s0)
4277 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4278 s1)
4279 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4280 s9)
4281
4289
4298
4306
4315
4318 etree = etree
4319
4341
4343 """This can't really be tested as long as there isn't a way to
4344 reset the logging setup ...
4345 """
4346 parse = self.etree.parse
4347
4348 messages = []
4349 class Logger(self.etree.PyErrorLog):
4350 def log(self, entry, message, *args):
4351 messages.append(message)
4352
4353 self.etree.use_global_python_log(Logger())
4354 f = BytesIO('<a><b></c></b></a>')
4355 try:
4356 parse(f)
4357 except SyntaxError:
4358 pass
4359 f.close()
4360
4361 self.assertTrue([ message for message in messages
4362 if 'mismatch' in message ])
4363 self.assertTrue([ message for message in messages
4364 if ':PARSER:' in message])
4365 self.assertTrue([ message for message in messages
4366 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4367 self.assertTrue([ message for message in messages
4368 if ':1:15:' in message ])
4369
4372 etree = etree
4373
4377
4379 class Target(object):
4380 def start(self, tag, attrib):
4381 return 'start(%s)' % tag
4382 def end(self, tag):
4383 return 'end(%s)' % tag
4384 def close(self):
4385 return 'close()'
4386
4387 parser = self.etree.XMLPullParser(target=Target())
4388 events = parser.read_events()
4389
4390 parser.feed('<root><element>')
4391 self.assertFalse(list(events))
4392 self.assertFalse(list(events))
4393 parser.feed('</element><child>')
4394 self.assertEqual([('end', 'end(element)')], list(events))
4395 parser.feed('</child>')
4396 self.assertEqual([('end', 'end(child)')], list(events))
4397 parser.feed('</root>')
4398 self.assertEqual([('end', 'end(root)')], list(events))
4399 self.assertFalse(list(events))
4400 self.assertEqual('close()', parser.close())
4401
4403 class Target(object):
4404 def start(self, tag, attrib):
4405 return 'start(%s)' % tag
4406 def end(self, tag):
4407 return 'end(%s)' % tag
4408 def close(self):
4409 return 'close()'
4410
4411 parser = self.etree.XMLPullParser(
4412 ['start', 'end'], target=Target())
4413 events = parser.read_events()
4414
4415 parser.feed('<root><element>')
4416 self.assertEqual(
4417 [('start', 'start(root)'), ('start', 'start(element)')],
4418 list(events))
4419 self.assertFalse(list(events))
4420 parser.feed('</element><child>')
4421 self.assertEqual(
4422 [('end', 'end(element)'), ('start', 'start(child)')],
4423 list(events))
4424 parser.feed('</child>')
4425 self.assertEqual(
4426 [('end', 'end(child)')],
4427 list(events))
4428 parser.feed('</root>')
4429 self.assertEqual(
4430 [('end', 'end(root)')],
4431 list(events))
4432 self.assertFalse(list(events))
4433 self.assertEqual('close()', parser.close())
4434
4436 parser = self.etree.XMLPullParser(
4437 ['start', 'end'], target=etree.TreeBuilder())
4438 events = parser.read_events()
4439
4440 parser.feed('<root><element>')
4441 self.assert_event_tags(
4442 events, [('start', 'root'), ('start', 'element')])
4443 self.assertFalse(list(events))
4444 parser.feed('</element><child>')
4445 self.assert_event_tags(
4446 events, [('end', 'element'), ('start', 'child')])
4447 parser.feed('</child>')
4448 self.assert_event_tags(
4449 events, [('end', 'child')])
4450 parser.feed('</root>')
4451 self.assert_event_tags(
4452 events, [('end', 'root')])
4453 self.assertFalse(list(events))
4454 root = parser.close()
4455 self.assertEqual('root', root.tag)
4456
4458 class Target(etree.TreeBuilder):
4459 def end(self, tag):
4460 el = super(Target, self).end(tag)
4461 el.tag += '-huhu'
4462 return el
4463
4464 parser = self.etree.XMLPullParser(
4465 ['start', 'end'], target=Target())
4466 events = parser.read_events()
4467
4468 parser.feed('<root><element>')
4469 self.assert_event_tags(
4470 events, [('start', 'root'), ('start', 'element')])
4471 self.assertFalse(list(events))
4472 parser.feed('</element><child>')
4473 self.assert_event_tags(
4474 events, [('end', 'element-huhu'), ('start', 'child')])
4475 parser.feed('</child>')
4476 self.assert_event_tags(
4477 events, [('end', 'child-huhu')])
4478 parser.feed('</root>')
4479 self.assert_event_tags(
4480 events, [('end', 'root-huhu')])
4481 self.assertFalse(list(events))
4482 root = parser.close()
4483 self.assertEqual('root-huhu', root.tag)
4484
4516
4517
4518 if __name__ == '__main__':
4519 print('to test use test.py %s' % __file__)
4520