1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 import os.path
11 import unittest
12 import copy
13 import sys
14 import re
15 import gc
16 import operator
17 import tempfile
18 import textwrap
19 import zlib
20 import gzip
21
22 this_dir = os.path.dirname(__file__)
23 if this_dir not in sys.path:
24 sys.path.insert(0, this_dir)
25
26 from common_imports import etree, StringIO, BytesIO, HelperTestCase
27 from common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
28 from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
29 from common_imports import canonicalize, sorted, _str, _bytes
30
31 print("")
32 print("TESTED VERSION: %s" % etree.__version__)
33 print(" Python: " + repr(sys.version_info))
34 print(" lxml.etree: " + repr(etree.LXML_VERSION))
35 print(" libxml used: " + repr(etree.LIBXML_VERSION))
36 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
37 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
38 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
39 print("")
40
41 try:
42 _unicode = unicode
43 except NameError:
44
45 _unicode = str
46
48 """Tests only for etree, not ElementTree"""
49 etree = etree
50
61
70
77
79 Element = self.etree.Element
80 el = Element('name')
81 self.assertRaises(ValueError, Element, '{}')
82 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
83
84 self.assertRaises(ValueError, Element, '{test}')
85 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
86
94
96 Element = self.etree.Element
97 self.assertRaises(ValueError, Element, "p'name")
98 self.assertRaises(ValueError, Element, 'p"name')
99
100 self.assertRaises(ValueError, Element, "{test}p'name")
101 self.assertRaises(ValueError, Element, '{test}p"name')
102
103 el = Element('name')
104 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
105 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
106
108 Element = self.etree.Element
109 self.assertRaises(ValueError, Element, ' name ')
110 self.assertRaises(ValueError, Element, 'na me')
111 self.assertRaises(ValueError, Element, '{test} name')
112
113 el = Element('name')
114 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
115
123
131
133 Element = self.etree.Element
134 SubElement = self.etree.SubElement
135
136 el = Element('name')
137 self.assertRaises(ValueError, SubElement, el, "p'name")
138 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
139
140 self.assertRaises(ValueError, SubElement, el, 'p"name')
141 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
142
151
160
162 QName = self.etree.QName
163 self.assertRaises(ValueError, QName, '')
164 self.assertRaises(ValueError, QName, 'test', '')
165
167 QName = self.etree.QName
168 self.assertRaises(ValueError, QName, 'p:name')
169 self.assertRaises(ValueError, QName, 'test', 'p:name')
170
172 QName = self.etree.QName
173 self.assertRaises(ValueError, QName, ' name ')
174 self.assertRaises(ValueError, QName, 'na me')
175 self.assertRaises(ValueError, QName, 'test', ' name')
176
184
186
187 QName = self.etree.QName
188 qname1 = QName('http://myns', 'a')
189 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
190
191 qname2 = QName(a)
192 self.assertEqual(a.tag, qname1.text)
193 self.assertEqual(qname1.text, qname2.text)
194 self.assertEqual(qname1, qname2)
195
197
198 etree = self.etree
199 qname = etree.QName('http://myns', 'a')
200 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
201 a.text = qname
202
203 self.assertEqual("p:a", a.text)
204
213
228
234
244
256
258 Element = self.etree.Element
259
260 keys = ["attr%d" % i for i in range(10)]
261 values = ["TEST-%d" % i for i in range(10)]
262 items = list(zip(keys, values))
263
264 root = Element("root")
265 for key, value in items:
266 root.set(key, value)
267 self.assertEqual(keys, root.attrib.keys())
268 self.assertEqual(values, root.attrib.values())
269
270 root2 = Element("root2", root.attrib,
271 attr_99='TOAST-1', attr_98='TOAST-2')
272 self.assertEqual(['attr_98', 'attr_99'] + keys,
273 root2.attrib.keys())
274 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
275 root2.attrib.values())
276
277 self.assertEqual(keys, root.attrib.keys())
278 self.assertEqual(values, root.attrib.values())
279
287
301
323
325 XML = self.etree.XML
326 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
327
328 root = XML(xml)
329 self.etree.strip_elements(root, 'a')
330 self.assertEqual(_bytes('<test><x></x></test>'),
331 self._writeElement(root))
332
333 root = XML(xml)
334 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
335 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
336 self._writeElement(root))
337
338 root = XML(xml)
339 self.etree.strip_elements(root, 'c')
340 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
341 self._writeElement(root))
342
344 XML = self.etree.XML
345 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
346
347 root = XML(xml)
348 self.etree.strip_elements(root, 'a')
349 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
350 self._writeElement(root))
351
352 root = XML(xml)
353 self.etree.strip_elements(root, '{urn:a}b', 'c')
354 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
355 self._writeElement(root))
356
357 root = XML(xml)
358 self.etree.strip_elements(root, '{urn:a}*', 'c')
359 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
360 self._writeElement(root))
361
362 root = XML(xml)
363 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
364 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
365 self._writeElement(root))
366
385
411
438
465
484
497
508
514
516 XML = self.etree.XML
517 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
518 self.assertEqual(root[0].target, "mypi")
519 self.assertEqual(root[0].get('my'), "1")
520 self.assertEqual(root[0].get('test'), " abc ")
521 self.assertEqual(root[0].get('quotes'), "' '")
522 self.assertEqual(root[0].get('only'), None)
523 self.assertEqual(root[0].get('names'), None)
524 self.assertEqual(root[0].get('nope'), None)
525
527 XML = self.etree.XML
528 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
529 self.assertEqual(root[0].target, "mypi")
530 self.assertEqual(root[0].attrib['my'], "1")
531 self.assertEqual(root[0].attrib['test'], " abc ")
532 self.assertEqual(root[0].attrib['quotes'], "' '")
533 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
534 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
535 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
536
538
539 ProcessingInstruction = self.etree.ProcessingInstruction
540
541 a = ProcessingInstruction("PI", "ONE")
542 b = copy.deepcopy(a)
543 b.text = "ANOTHER"
544
545 self.assertEqual('ONE', a.text)
546 self.assertEqual('ANOTHER', b.text)
547
563
578
590
609
614
627
638
639 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
640 events = list(iterparse(f, events=('end', 'comment')))
641 root = events[-1][1]
642 self.assertEqual(6, len(events))
643 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
644 [ name(*item) for item in events ])
645 self.assertEqual(
646 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
647 tostring(root))
648
660
661 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
662 events = list(iterparse(f, events=('end', 'pi')))
663 root = events[-2][1]
664 self.assertEqual(8, len(events))
665 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
666 ('pid','d'), 'a', ('pie','e')],
667 [ name(*item) for item in events ])
668 self.assertEqual(
669 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
670 tostring(ElementTree(root)))
671
686
692
694 iterparse = self.etree.iterparse
695 f = BytesIO('<a><b><c/></a>')
696 it = iterparse(f, events=('start', 'end'), recover=True)
697 events = [(ev, el.tag) for ev, el in it]
698 root = it.root
699 self.assertTrue(root is not None)
700
701 self.assertEqual(1, events.count(('start', 'a')))
702 self.assertEqual(1, events.count(('end', 'a')))
703
704 self.assertEqual(1, events.count(('start', 'b')))
705 self.assertEqual(1, events.count(('end', 'b')))
706
707 self.assertEqual(1, events.count(('start', 'c')))
708 self.assertEqual(1, events.count(('end', 'c')))
709
711 iterparse = self.etree.iterparse
712 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
713 it = iterparse(f, events=('start', 'end'), recover=True)
714 events = [(ev, el.tag) for ev, el in it]
715 root = it.root
716 self.assertTrue(root is not None)
717
718 self.assertEqual(1, events.count(('start', 'a')))
719 self.assertEqual(1, events.count(('end', 'a')))
720
721 self.assertEqual(2, events.count(('start', 'b')))
722 self.assertEqual(2, events.count(('end', 'b')))
723
724 self.assertEqual(2, events.count(('start', 'c')))
725 self.assertEqual(2, events.count(('end', 'c')))
726
728 iterparse = self.etree.iterparse
729 f = BytesIO("""
730 <a> \n \n <b> b test </b> \n
731
732 \n\t <c> \n </c> </a> \n """)
733 iterator = iterparse(f, remove_blank_text=True)
734 text = [ (element.text, element.tail)
735 for event, element in iterator ]
736 self.assertEqual(
737 [(" b test ", None), (" \n ", None), (None, None)],
738 text)
739
741 iterparse = self.etree.iterparse
742 f = BytesIO('<a><b><d/></b><c/></a>')
743
744 iterator = iterparse(f, tag="b", events=('start', 'end'))
745 events = list(iterator)
746 root = iterator.root
747 self.assertEqual(
748 [('start', root[0]), ('end', root[0])],
749 events)
750
752 iterparse = self.etree.iterparse
753 f = BytesIO('<a><b><d/></b><c/></a>')
754
755 iterator = iterparse(f, tag="*", events=('start', 'end'))
756 events = list(iterator)
757 self.assertEqual(
758 8,
759 len(events))
760
762 iterparse = self.etree.iterparse
763 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
764
765 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
766 events = list(iterator)
767 root = iterator.root
768 self.assertEqual(
769 [('start', root[0]), ('end', root[0])],
770 events)
771
773 iterparse = self.etree.iterparse
774 f = BytesIO('<a><b><d/></b><c/></a>')
775 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
776 events = list(iterator)
777 root = iterator.root
778 self.assertEqual(
779 [('start', root[0]), ('end', root[0])],
780 events)
781
782 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
783 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
784 events = list(iterator)
785 root = iterator.root
786 self.assertEqual([], events)
787
789 iterparse = self.etree.iterparse
790 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
791 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
792 events = list(iterator)
793 self.assertEqual(8, len(events))
794
796 iterparse = self.etree.iterparse
797 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
798 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
799 events = list(iterator)
800 self.assertEqual([], events)
801
802 f = BytesIO('<a><b><d/></b><c/></a>')
803 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
804 events = list(iterator)
805 self.assertEqual(8, len(events))
806
808 text = _str('Søk på nettet')
809 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
810 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
811 ).encode('iso-8859-1')
812
813 self.assertRaises(self.etree.ParseError,
814 list, self.etree.iterparse(BytesIO(xml_latin1)))
815
817 text = _str('Søk på nettet', encoding="UTF-8")
818 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
819 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
820 ).encode('iso-8859-1')
821
822 iterator = self.etree.iterparse(BytesIO(xml_latin1),
823 encoding="iso-8859-1")
824 self.assertEqual(1, len(list(iterator)))
825
826 a = iterator.root
827 self.assertEqual(a.text, text)
828
830 tostring = self.etree.tostring
831 f = BytesIO('<root><![CDATA[test]]></root>')
832 context = self.etree.iterparse(f, strip_cdata=False)
833 content = [ el.text for event,el in context ]
834
835 self.assertEqual(['test'], content)
836 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
837 tostring(context.root))
838
842
847
866
867
868
891
892
893
904 def end(self, tag):
905 events.append("end")
906 assertEqual("TAG", tag)
907 def close(self):
908 return "DONE"
909
910 parser = self.etree.XMLParser(target=Target())
911 tree = self.etree.ElementTree()
912
913 self.assertRaises(TypeError,
914 tree.parse, BytesIO("<TAG/>"), parser=parser)
915 self.assertEqual(["start", "end"], events)
916
918
919 events = []
920 class Target(object):
921 def start(self, tag, attrib):
922 events.append("start-" + tag)
923 def end(self, tag):
924 events.append("end-" + tag)
925 if tag == 'a':
926 raise ValueError("dead and gone")
927 def data(self, data):
928 events.append("data-" + data)
929 def close(self):
930 events.append("close")
931 return "DONE"
932
933 parser = self.etree.XMLParser(target=Target())
934
935 try:
936 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
937 done = parser.close()
938 self.fail("error expected, but parsing succeeded")
939 except ValueError:
940 done = 'value error received as expected'
941
942 self.assertEqual(["start-root", "data-A", "start-a",
943 "data-ca", "end-a", "close"],
944 events)
945
947
948 events = []
949 class Target(object):
950 def start(self, tag, attrib):
951 events.append("start-" + tag)
952 def end(self, tag):
953 events.append("end-" + tag)
954 if tag == 'a':
955 raise ValueError("dead and gone")
956 def data(self, data):
957 events.append("data-" + data)
958 def close(self):
959 events.append("close")
960 return "DONE"
961
962 parser = self.etree.XMLParser(target=Target())
963
964 try:
965 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
966 parser=parser)
967 self.fail("error expected, but parsing succeeded")
968 except ValueError:
969 done = 'value error received as expected'
970
971 self.assertEqual(["start-root", "data-A", "start-a",
972 "data-ca", "end-a", "close"],
973 events)
974
976
977 events = []
978 class Target(object):
979 def start(self, tag, attrib):
980 events.append("start-" + tag)
981 def end(self, tag):
982 events.append("end-" + tag)
983 def data(self, data):
984 events.append("data-" + data)
985 def comment(self, text):
986 events.append("comment-" + text)
987 def close(self):
988 return "DONE"
989
990 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
991
992 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
993 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
994 done = parser.close()
995
996 self.assertEqual("DONE", done)
997 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
998 "start-sub", "end-sub", "data-B", "end-root"],
999 events)
1000
1006 def end(self, tag):
1007 events.append("end-" + tag)
1008 def data(self, data):
1009 events.append("data-" + data)
1010 def comment(self, text):
1011 events.append("comment-" + text)
1012 def close(self):
1013 return "DONE"
1014
1015 parser = self.etree.XMLParser(target=Target())
1016
1017 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1018 done = parser.close()
1019
1020 self.assertEqual("DONE", done)
1021 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1022 "start-sub", "end-sub", "comment-c", "data-B",
1023 "end-root", "comment-d"],
1024 events)
1025
1027 events = []
1028 class Target(object):
1029 def start(self, tag, attrib):
1030 events.append("start-" + tag)
1031 def end(self, tag):
1032 events.append("end-" + tag)
1033 def data(self, data):
1034 events.append("data-" + data)
1035 def pi(self, target, data):
1036 events.append("pi-" + target + "-" + data)
1037 def close(self):
1038 return "DONE"
1039
1040 parser = self.etree.XMLParser(target=Target())
1041
1042 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1043 done = parser.close()
1044
1045 self.assertEqual("DONE", done)
1046 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1047 "data-B", "end-root", "pi-test-c"],
1048 events)
1049
1051 events = []
1052 class Target(object):
1053 def start(self, tag, attrib):
1054 events.append("start-" + tag)
1055 def end(self, tag):
1056 events.append("end-" + tag)
1057 def data(self, data):
1058 events.append("data-" + data)
1059 def close(self):
1060 return "DONE"
1061
1062 parser = self.etree.XMLParser(target=Target(),
1063 strip_cdata=False)
1064
1065 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1066 done = parser.close()
1067
1068 self.assertEqual("DONE", done)
1069 self.assertEqual(["start-root", "data-A", "start-a",
1070 "data-ca", "end-a", "data-B", "end-root"],
1071 events)
1072
1074 events = []
1075 class Target(object):
1076 def start(self, tag, attrib):
1077 events.append("start-" + tag)
1078 def end(self, tag):
1079 events.append("end-" + tag)
1080 def data(self, data):
1081 events.append("data-" + data)
1082 def close(self):
1083 events.append("close")
1084 return "DONE"
1085
1086 parser = self.etree.XMLParser(target=Target(),
1087 recover=True)
1088
1089 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1090 done = parser.close()
1091
1092 self.assertEqual("DONE", done)
1093 self.assertEqual(["start-root", "data-A", "start-a",
1094 "data-ca", "end-a", "data-B",
1095 "end-root", "close"],
1096 events)
1097
1107
1117
1126
1136
1138 iterwalk = self.etree.iterwalk
1139 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1140
1141 iterator = iterwalk(root, events=('start','end'))
1142 events = list(iterator)
1143 self.assertEqual(
1144 [('start', root), ('start', root[0]), ('end', root[0]),
1145 ('start', root[1]), ('end', root[1]), ('end', root)],
1146 events)
1147
1158
1160 iterwalk = self.etree.iterwalk
1161 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1162
1163 attr_name = '{testns}bla'
1164 events = []
1165 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1166 for event, elem in iterator:
1167 events.append(event)
1168 if event == 'start':
1169 if elem.tag != '{ns1}a':
1170 elem.set(attr_name, 'value')
1171
1172 self.assertEqual(
1173 ['start-ns', 'start', 'start', 'start-ns', 'start',
1174 'end', 'end-ns', 'end', 'end', 'end-ns'],
1175 events)
1176
1177 self.assertEqual(
1178 None,
1179 root.get(attr_name))
1180 self.assertEqual(
1181 'value',
1182 root[0].get(attr_name))
1183
1194
1196 parse = self.etree.parse
1197 parser = self.etree.XMLParser(dtd_validation=True)
1198 assertEqual = self.assertEqual
1199 test_url = _str("__nosuch.dtd")
1200
1201 class MyResolver(self.etree.Resolver):
1202 def resolve(self, url, id, context):
1203 assertEqual(url, test_url)
1204 return self.resolve_string(
1205 _str('''<!ENTITY myentity "%s">
1206 <!ELEMENT doc ANY>''') % url, context)
1207
1208 parser.resolvers.add(MyResolver())
1209
1210 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1211 tree = parse(StringIO(xml), parser)
1212 root = tree.getroot()
1213 self.assertEqual(root.text, test_url)
1214
1216 parse = self.etree.parse
1217 parser = self.etree.XMLParser(dtd_validation=True)
1218 assertEqual = self.assertEqual
1219 test_url = _str("__nosuch.dtd")
1220
1221 class MyResolver(self.etree.Resolver):
1222 def resolve(self, url, id, context):
1223 assertEqual(url, test_url)
1224 return self.resolve_string(
1225 (_str('''<!ENTITY myentity "%s">
1226 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1227 context)
1228
1229 parser.resolvers.add(MyResolver())
1230
1231 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1232 tree = parse(StringIO(xml), parser)
1233 root = tree.getroot()
1234 self.assertEqual(root.text, test_url)
1235
1237 parse = self.etree.parse
1238 parser = self.etree.XMLParser(dtd_validation=True)
1239 assertEqual = self.assertEqual
1240 test_url = _str("__nosuch.dtd")
1241
1242 class MyResolver(self.etree.Resolver):
1243 def resolve(self, url, id, context):
1244 assertEqual(url, test_url)
1245 return self.resolve_file(
1246 SillyFileLike(
1247 _str('''<!ENTITY myentity "%s">
1248 <!ELEMENT doc ANY>''') % url), context)
1249
1250 parser.resolvers.add(MyResolver())
1251
1252 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1253 tree = parse(StringIO(xml), parser)
1254 root = tree.getroot()
1255 self.assertEqual(root.text, test_url)
1256
1258 parse = self.etree.parse
1259 parser = self.etree.XMLParser(attribute_defaults=True)
1260 assertEqual = self.assertEqual
1261 test_url = _str("__nosuch.dtd")
1262
1263 class MyResolver(self.etree.Resolver):
1264 def resolve(self, url, id, context):
1265 assertEqual(url, test_url)
1266 return self.resolve_filename(
1267 fileInTestDir('test.dtd'), context)
1268
1269 parser.resolvers.add(MyResolver())
1270
1271 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1272 tree = parse(StringIO(xml), parser)
1273 root = tree.getroot()
1274 self.assertEqual(
1275 root.attrib, {'default': 'valueA'})
1276 self.assertEqual(
1277 root[0].attrib, {'default': 'valueB'})
1278
1293
1294 parser.resolvers.add(MyResolver())
1295
1296 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1297 tree = parse(StringIO(xml), parser,
1298 base_url=fileUrlInTestDir('__test.xml'))
1299 root = tree.getroot()
1300 self.assertEqual(
1301 root.attrib, {'default': 'valueA'})
1302 self.assertEqual(
1303 root[0].attrib, {'default': 'valueB'})
1304
1306 parse = self.etree.parse
1307 parser = self.etree.XMLParser(attribute_defaults=True)
1308 assertEqual = self.assertEqual
1309 test_url = _str("__nosuch.dtd")
1310
1311 class MyResolver(self.etree.Resolver):
1312 def resolve(self, url, id, context):
1313 assertEqual(url, test_url)
1314 return self.resolve_file(
1315 open(fileInTestDir('test.dtd'), 'rb'), context)
1316
1317 parser.resolvers.add(MyResolver())
1318
1319 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1320 tree = parse(StringIO(xml), parser)
1321 root = tree.getroot()
1322 self.assertEqual(
1323 root.attrib, {'default': 'valueA'})
1324 self.assertEqual(
1325 root[0].attrib, {'default': 'valueB'})
1326
1328 parse = self.etree.parse
1329 parser = self.etree.XMLParser(load_dtd=True)
1330 assertEqual = self.assertEqual
1331 test_url = _str("__nosuch.dtd")
1332
1333 class check(object):
1334 resolved = False
1335
1336 class MyResolver(self.etree.Resolver):
1337 def resolve(self, url, id, context):
1338 assertEqual(url, test_url)
1339 check.resolved = True
1340 return self.resolve_empty(context)
1341
1342 parser.resolvers.add(MyResolver())
1343
1344 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1345 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1346 self.assertTrue(check.resolved)
1347
1354
1355 class MyResolver(self.etree.Resolver):
1356 def resolve(self, url, id, context):
1357 raise _LocalException
1358
1359 parser.resolvers.add(MyResolver())
1360
1361 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1362 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1363
1364 if etree.LIBXML_VERSION > (2,6,20):
1381
1383 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1384 <root>
1385 <child1/>
1386 <child2/>
1387 <child3> </child3>
1388 </root>''')
1389
1390 parser = self.etree.XMLParser(resolve_entities=False)
1391 root = etree.fromstring(xml, parser)
1392 self.assertEqual([ el.tag for el in root ],
1393 ['child1', 'child2', 'child3'])
1394
1395 root[0] = root[-1]
1396 self.assertEqual([ el.tag for el in root ],
1397 ['child3', 'child2'])
1398 self.assertEqual(root[0][0].text, ' ')
1399 self.assertEqual(root[0][0].name, 'nbsp')
1400
1416
1423
1425 Entity = self.etree.Entity
1426 self.assertRaises(ValueError, Entity, 'a b c')
1427 self.assertRaises(ValueError, Entity, 'a,b')
1428 self.assertRaises(ValueError, Entity, 'a\0b')
1429 self.assertRaises(ValueError, Entity, '#abc')
1430 self.assertRaises(ValueError, Entity, '#xxyz')
1431
1444
1465
1478
1490
1499
1508
1509
1519
1528
1530 Element = self.etree.Element
1531 SubElement = self.etree.SubElement
1532 root = Element('root')
1533 self.assertRaises(ValueError, root.append, root)
1534 child = SubElement(root, 'child')
1535 self.assertRaises(ValueError, child.append, root)
1536 child2 = SubElement(child, 'child2')
1537 self.assertRaises(ValueError, child2.append, root)
1538 self.assertRaises(ValueError, child2.append, child)
1539 self.assertEqual('child2', root[0][0].tag)
1540
1553
1566
1577
1588
1598
1608
1624
1640
1646
1661
1674
1689
1702
1717
1730
1745
1758
1759
1767
1768
1778
1779
1794
1795
1805
1806
1817
1844
1845
1847 self.assertRaises(TypeError, self.etree.dump, None)
1848
1861
1874
1895
1904
1913
1922
1931
1940
1942 XML = self.etree.XML
1943
1944 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1945 result = []
1946 for el in root.iterchildren(tag=['two', 'three']):
1947 result.append(el.text)
1948 self.assertEqual(['Two', 'Bla', None], result)
1949
1951 XML = self.etree.XML
1952
1953 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1954 result = []
1955 for el in root.iterchildren('two', 'three'):
1956 result.append(el.text)
1957 self.assertEqual(['Two', 'Bla', None], result)
1958
1967
1988
2010
2012 Element = self.etree.Element
2013 SubElement = self.etree.SubElement
2014
2015 a = Element('a')
2016 b = SubElement(a, 'b')
2017 c = SubElement(a, 'c')
2018 d = SubElement(b, 'd')
2019 self.assertEqual(
2020 [b, a],
2021 list(d.iterancestors(tag=('a', 'b'))))
2022 self.assertEqual(
2023 [b, a],
2024 list(d.iterancestors('a', 'b')))
2025
2026 self.assertEqual(
2027 [],
2028 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2029 self.assertEqual(
2030 [],
2031 list(d.iterancestors('w', 'x', 'y', 'z')))
2032
2033 self.assertEqual(
2034 [],
2035 list(d.iterancestors(tag=('d', 'x'))))
2036 self.assertEqual(
2037 [],
2038 list(d.iterancestors('d', 'x')))
2039
2040 self.assertEqual(
2041 [b, a],
2042 list(d.iterancestors(tag=('b', '*'))))
2043 self.assertEqual(
2044 [b, a],
2045 list(d.iterancestors('b', '*')))
2046
2047 self.assertEqual(
2048 [b],
2049 list(d.iterancestors(tag=('b', 'c'))))
2050 self.assertEqual(
2051 [b],
2052 list(d.iterancestors('b', 'c')))
2053
2070
2072 Element = self.etree.Element
2073 SubElement = self.etree.SubElement
2074
2075 a = Element('a')
2076 b = SubElement(a, 'b')
2077 c = SubElement(a, 'c')
2078 d = SubElement(b, 'd')
2079 e = SubElement(c, 'e')
2080
2081 self.assertEqual(
2082 [],
2083 list(a.iterdescendants('a')))
2084 self.assertEqual(
2085 [],
2086 list(a.iterdescendants(tag='a')))
2087
2088 a2 = SubElement(e, 'a')
2089 self.assertEqual(
2090 [a2],
2091 list(a.iterdescendants('a')))
2092
2093 self.assertEqual(
2094 [a2],
2095 list(c.iterdescendants('a')))
2096 self.assertEqual(
2097 [a2],
2098 list(c.iterdescendants(tag='a')))
2099
2101 Element = self.etree.Element
2102 SubElement = self.etree.SubElement
2103
2104 a = Element('a')
2105 b = SubElement(a, 'b')
2106 c = SubElement(a, 'c')
2107 d = SubElement(b, 'd')
2108 e = SubElement(c, 'e')
2109
2110 self.assertEqual(
2111 [b, e],
2112 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2113 self.assertEqual(
2114 [b, e],
2115 list(a.iterdescendants('a', 'b', 'e')))
2116
2117 a2 = SubElement(e, 'a')
2118 self.assertEqual(
2119 [b, a2],
2120 list(a.iterdescendants(tag=('a', 'b'))))
2121 self.assertEqual(
2122 [b, a2],
2123 list(a.iterdescendants('a', 'b')))
2124
2125 self.assertEqual(
2126 [],
2127 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2128 self.assertEqual(
2129 [],
2130 list(c.iterdescendants('x', 'y', 'z')))
2131
2132 self.assertEqual(
2133 [b, d, c, e, a2],
2134 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2135 self.assertEqual(
2136 [b, d, c, e, a2],
2137 list(a.iterdescendants('x', 'y', 'z', '*')))
2138
2156
2173
2191
2215
2217 Element = self.etree.Element
2218 SubElement = self.etree.SubElement
2219
2220 a = Element('a')
2221 b = SubElement(a, 'b')
2222 c = SubElement(a, 'c')
2223 d = SubElement(b, 'd')
2224 self.assertEqual(
2225 [],
2226 list(a.itersiblings(tag='XXX')))
2227 self.assertEqual(
2228 [c],
2229 list(b.itersiblings(tag='c')))
2230 self.assertEqual(
2231 [c],
2232 list(b.itersiblings(tag='*')))
2233 self.assertEqual(
2234 [b],
2235 list(c.itersiblings(preceding=True, tag='b')))
2236 self.assertEqual(
2237 [],
2238 list(c.itersiblings(preceding=True, tag='c')))
2239
2241 Element = self.etree.Element
2242 SubElement = self.etree.SubElement
2243
2244 a = Element('a')
2245 b = SubElement(a, 'b')
2246 c = SubElement(a, 'c')
2247 d = SubElement(b, 'd')
2248 e = SubElement(a, 'e')
2249 self.assertEqual(
2250 [],
2251 list(a.itersiblings(tag=('XXX', 'YYY'))))
2252 self.assertEqual(
2253 [c, e],
2254 list(b.itersiblings(tag=('c', 'd', 'e'))))
2255 self.assertEqual(
2256 [b],
2257 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2258 self.assertEqual(
2259 [c, b],
2260 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2261
2263 parseid = self.etree.parseid
2264 XML = self.etree.XML
2265 xml_text = _bytes('''
2266 <!DOCTYPE document [
2267 <!ELEMENT document (h1,p)*>
2268 <!ELEMENT h1 (#PCDATA)>
2269 <!ATTLIST h1 myid ID #REQUIRED>
2270 <!ELEMENT p (#PCDATA)>
2271 <!ATTLIST p someid ID #REQUIRED>
2272 ]>
2273 <document>
2274 <h1 myid="chapter1">...</h1>
2275 <p id="note1" class="note">...</p>
2276 <p>Regular paragraph.</p>
2277 <p xml:id="xmlid">XML:ID paragraph.</p>
2278 <p someid="warn1" class="warning">...</p>
2279 </document>
2280 ''')
2281
2282 tree, dic = parseid(BytesIO(xml_text))
2283 root = tree.getroot()
2284 root2 = XML(xml_text)
2285 self.assertEqual(self._writeElement(root),
2286 self._writeElement(root2))
2287 expected = {
2288 "chapter1" : root[0],
2289 "xmlid" : root[3],
2290 "warn1" : root[4]
2291 }
2292 self.assertTrue("chapter1" in dic)
2293 self.assertTrue("warn1" in dic)
2294 self.assertTrue("xmlid" in dic)
2295 self._checkIDDict(dic, expected)
2296
2298 XMLDTDID = self.etree.XMLDTDID
2299 XML = self.etree.XML
2300 xml_text = _bytes('''
2301 <!DOCTYPE document [
2302 <!ELEMENT document (h1,p)*>
2303 <!ELEMENT h1 (#PCDATA)>
2304 <!ATTLIST h1 myid ID #REQUIRED>
2305 <!ELEMENT p (#PCDATA)>
2306 <!ATTLIST p someid ID #REQUIRED>
2307 ]>
2308 <document>
2309 <h1 myid="chapter1">...</h1>
2310 <p id="note1" class="note">...</p>
2311 <p>Regular paragraph.</p>
2312 <p xml:id="xmlid">XML:ID paragraph.</p>
2313 <p someid="warn1" class="warning">...</p>
2314 </document>
2315 ''')
2316
2317 root, dic = XMLDTDID(xml_text)
2318 root2 = XML(xml_text)
2319 self.assertEqual(self._writeElement(root),
2320 self._writeElement(root2))
2321 expected = {
2322 "chapter1" : root[0],
2323 "xmlid" : root[3],
2324 "warn1" : root[4]
2325 }
2326 self.assertTrue("chapter1" in dic)
2327 self.assertTrue("warn1" in dic)
2328 self.assertTrue("xmlid" in dic)
2329 self._checkIDDict(dic, expected)
2330
2332 XMLDTDID = self.etree.XMLDTDID
2333 XML = self.etree.XML
2334 xml_text = _bytes('''
2335 <document>
2336 <h1 myid="chapter1">...</h1>
2337 <p id="note1" class="note">...</p>
2338 <p>Regular paragraph.</p>
2339 <p someid="warn1" class="warning">...</p>
2340 </document>
2341 ''')
2342
2343 root, dic = XMLDTDID(xml_text)
2344 root2 = XML(xml_text)
2345 self.assertEqual(self._writeElement(root),
2346 self._writeElement(root2))
2347 expected = {}
2348 self._checkIDDict(dic, expected)
2349
2351 XMLDTDID = self.etree.XMLDTDID
2352 XML = self.etree.XML
2353 xml_text = _bytes('''
2354 <!DOCTYPE document [
2355 <!ELEMENT document (h1,p)*>
2356 <!ELEMENT h1 (#PCDATA)>
2357 <!ATTLIST h1 myid ID #REQUIRED>
2358 <!ELEMENT p (#PCDATA)>
2359 <!ATTLIST p someid ID #REQUIRED>
2360 ]>
2361 <document>
2362 <h1 myid="chapter1">...</h1>
2363 <p id="note1" class="note">...</p>
2364 <p>Regular paragraph.</p>
2365 <p xml:id="xmlid">XML:ID paragraph.</p>
2366 <p someid="warn1" class="warning">...</p>
2367 </document>
2368 ''')
2369
2370 parser = etree.XMLParser(collect_ids=False)
2371 root, dic = XMLDTDID(xml_text, parser=parser)
2372 root2 = XML(xml_text)
2373 self.assertEqual(self._writeElement(root),
2374 self._writeElement(root2))
2375 self.assertFalse(dic)
2376 self._checkIDDict(dic, {})
2377
2379 self.assertEqual(len(dic),
2380 len(expected))
2381 self.assertEqual(sorted(dic.items()),
2382 sorted(expected.items()))
2383 if sys.version_info < (3,):
2384 self.assertEqual(sorted(dic.iteritems()),
2385 sorted(expected.iteritems()))
2386 self.assertEqual(sorted(dic.keys()),
2387 sorted(expected.keys()))
2388 if sys.version_info < (3,):
2389 self.assertEqual(sorted(dic.iterkeys()),
2390 sorted(expected.iterkeys()))
2391 if sys.version_info < (3,):
2392 self.assertEqual(sorted(dic.values()),
2393 sorted(expected.values()))
2394 self.assertEqual(sorted(dic.itervalues()),
2395 sorted(expected.itervalues()))
2396
2398 etree = self.etree
2399
2400 r = {'foo': 'http://ns.infrae.com/foo'}
2401 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2402 self.assertEqual(
2403 'foo',
2404 e.prefix)
2405 self.assertEqual(
2406 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2407 self._writeElement(e))
2408
2410 etree = self.etree
2411
2412 r = {None: 'http://ns.infrae.com/foo'}
2413 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2414 self.assertEqual(
2415 None,
2416 e.prefix)
2417 self.assertEqual(
2418 '{http://ns.infrae.com/foo}bar',
2419 e.tag)
2420 self.assertEqual(
2421 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2422 self._writeElement(e))
2423
2425 etree = self.etree
2426
2427 r = {None: 'http://ns.infrae.com/foo',
2428 'hoi': 'http://ns.infrae.com/hoi'}
2429 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2430 e.set('{http://ns.infrae.com/hoi}test', 'value')
2431 self.assertEqual(
2432 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2433 self._writeElement(e))
2434
2436 etree = self.etree
2437
2438 root = etree.Element('{http://test/ns}root',
2439 nsmap={None: 'http://test/ns'})
2440 sub = etree.Element('{http://test/ns}sub',
2441 nsmap={'test': 'http://test/ns'})
2442
2443 sub.attrib['{http://test/ns}attr'] = 'value'
2444 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2445 self.assertEqual(
2446 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2447 etree.tostring(sub))
2448
2449 root.append(sub)
2450 self.assertEqual(
2451 _bytes('<root xmlns="http://test/ns">'
2452 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2453 '</root>'),
2454 etree.tostring(root))
2455
2457 etree = self.etree
2458
2459 root = etree.Element('root')
2460 sub = etree.Element('{http://test/ns}sub',
2461 nsmap={'test': 'http://test/ns'})
2462
2463 sub.attrib['{http://test/ns}attr'] = 'value'
2464 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2465 self.assertEqual(
2466 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2467 etree.tostring(sub))
2468
2469 root.append(sub)
2470 self.assertEqual(
2471 _bytes('<root>'
2472 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2473 '</root>'),
2474 etree.tostring(root))
2475
2477 etree = self.etree
2478
2479 root = etree.Element('root')
2480 sub = etree.Element('{http://test/ns}sub',
2481 nsmap={None: 'http://test/ns'})
2482
2483 sub.attrib['{http://test/ns}attr'] = 'value'
2484 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2485 self.assertEqual(
2486 _bytes('<sub xmlns="http://test/ns" '
2487 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2488 etree.tostring(sub))
2489
2490 root.append(sub)
2491 self.assertEqual(
2492 _bytes('<root>'
2493 '<sub xmlns="http://test/ns"'
2494 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2495 '</root>'),
2496 etree.tostring(root))
2497
2499 etree = self.etree
2500
2501 root = etree.Element('{http://test/ns}root',
2502 nsmap={'test': 'http://test/ns',
2503 None: 'http://test/ns'})
2504 sub = etree.Element('{http://test/ns}sub',
2505 nsmap={None: 'http://test/ns'})
2506
2507 sub.attrib['{http://test/ns}attr'] = 'value'
2508 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2509 self.assertEqual(
2510 _bytes('<sub xmlns="http://test/ns" '
2511 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2512 etree.tostring(sub))
2513
2514 root.append(sub)
2515 self.assertEqual(
2516 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2517 '<test:sub test:attr="value"/>'
2518 '</test:root>'),
2519 etree.tostring(root))
2520
2522 etree = self.etree
2523 r = {None: 'http://ns.infrae.com/foo',
2524 'hoi': 'http://ns.infrae.com/hoi'}
2525 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2526 tree = etree.ElementTree(element=e)
2527 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2528 self.assertEqual(
2529 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2530 self._writeElement(e))
2531
2533 etree = self.etree
2534
2535 r = {None: 'http://ns.infrae.com/foo'}
2536 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2537 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2538
2539 e1.append(e2)
2540
2541 self.assertEqual(
2542 None,
2543 e1.prefix)
2544 self.assertEqual(
2545 None,
2546 e1[0].prefix)
2547 self.assertEqual(
2548 '{http://ns.infrae.com/foo}bar',
2549 e1.tag)
2550 self.assertEqual(
2551 '{http://ns.infrae.com/foo}bar',
2552 e1[0].tag)
2553
2555 etree = self.etree
2556
2557 r = {None: 'http://ns.infrae.com/BAR'}
2558 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2559 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2560
2561 e1.append(e2)
2562
2563 self.assertEqual(
2564 None,
2565 e1.prefix)
2566 self.assertNotEqual(
2567 None,
2568 e2.prefix)
2569 self.assertEqual(
2570 '{http://ns.infrae.com/BAR}bar',
2571 e1.tag)
2572 self.assertEqual(
2573 '{http://ns.infrae.com/foo}bar',
2574 e2.tag)
2575
2577 ns_href = "http://a.b.c"
2578 one = self.etree.fromstring(
2579 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2580 baz = one[0][0]
2581
2582 two = self.etree.fromstring(
2583 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2584 two.append(baz)
2585 del one
2586
2587 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2588 self.assertEqual(
2589 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2590 self.etree.tostring(two))
2591
2601
2603 etree = self.etree
2604
2605 r = {None: 'http://ns.infrae.com/foo',
2606 'hoi': 'http://ns.infrae.com/hoi'}
2607 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2608 self.assertEqual(
2609 r,
2610 e.nsmap)
2611
2613 etree = self.etree
2614
2615 re = {None: 'http://ns.infrae.com/foo',
2616 'hoi': 'http://ns.infrae.com/hoi'}
2617 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2618
2619 rs = {None: 'http://ns.infrae.com/honk',
2620 'top': 'http://ns.infrae.com/top'}
2621 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2622
2623 r = re.copy()
2624 r.update(rs)
2625 self.assertEqual(re, e.nsmap)
2626 self.assertEqual(r, s.nsmap)
2627
2629 etree = self.etree
2630 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2631 self.assertEqual({'hha': None}, el.nsmap)
2632
2634 Element = self.etree.Element
2635 SubElement = self.etree.SubElement
2636
2637 a = Element('a')
2638 b = SubElement(a, 'b')
2639 c = SubElement(a, 'c')
2640 d = SubElement(b, 'd')
2641 e = SubElement(c, 'e')
2642 f = SubElement(c, 'f')
2643
2644 self.assertEqual(
2645 [a, b],
2646 list(a.getiterator('a', 'b')))
2647 self.assertEqual(
2648 [],
2649 list(a.getiterator('x', 'y')))
2650 self.assertEqual(
2651 [a, f],
2652 list(a.getiterator('f', 'a')))
2653 self.assertEqual(
2654 [c, e, f],
2655 list(c.getiterator('c', '*', 'a')))
2656 self.assertEqual(
2657 [],
2658 list(a.getiterator( (), () )))
2659
2661 Element = self.etree.Element
2662 SubElement = self.etree.SubElement
2663
2664 a = Element('a')
2665 b = SubElement(a, 'b')
2666 c = SubElement(a, 'c')
2667 d = SubElement(b, 'd')
2668 e = SubElement(c, 'e')
2669 f = SubElement(c, 'f')
2670
2671 self.assertEqual(
2672 [a, b],
2673 list(a.getiterator( ('a', 'b') )))
2674 self.assertEqual(
2675 [],
2676 list(a.getiterator( ('x', 'y') )))
2677 self.assertEqual(
2678 [a, f],
2679 list(a.getiterator( ('f', 'a') )))
2680 self.assertEqual(
2681 [c, e, f],
2682 list(c.getiterator( ('c', '*', 'a') )))
2683 self.assertEqual(
2684 [],
2685 list(a.getiterator( () )))
2686
2688 Element = self.etree.Element
2689 SubElement = self.etree.SubElement
2690
2691 a = Element('{a}a')
2692 b = SubElement(a, '{a}b')
2693 c = SubElement(a, '{a}c')
2694 d = SubElement(b, '{b}d')
2695 e = SubElement(c, '{a}e')
2696 f = SubElement(c, '{b}f')
2697 g = SubElement(c, 'g')
2698
2699 self.assertEqual(
2700 [a],
2701 list(a.getiterator('{a}a')))
2702 self.assertEqual(
2703 [],
2704 list(a.getiterator('{b}a')))
2705 self.assertEqual(
2706 [],
2707 list(a.getiterator('a')))
2708 self.assertEqual(
2709 [a,b,d,c,e,f,g],
2710 list(a.getiterator('*')))
2711 self.assertEqual(
2712 [f],
2713 list(c.getiterator('{b}*')))
2714 self.assertEqual(
2715 [d, f],
2716 list(a.getiterator('{b}*')))
2717 self.assertEqual(
2718 [g],
2719 list(a.getiterator('g')))
2720 self.assertEqual(
2721 [g],
2722 list(a.getiterator('{}g')))
2723 self.assertEqual(
2724 [g],
2725 list(a.getiterator('{}*')))
2726
2728 Element = self.etree.Element
2729 SubElement = self.etree.SubElement
2730
2731 a = Element('{a}a')
2732 b = SubElement(a, '{nsA}b')
2733 c = SubElement(b, '{nsB}b')
2734 d = SubElement(a, 'b')
2735 e = SubElement(a, '{nsA}e')
2736 f = SubElement(e, '{nsB}e')
2737 g = SubElement(e, 'e')
2738
2739 self.assertEqual(
2740 [b, c, d],
2741 list(a.getiterator('{*}b')))
2742 self.assertEqual(
2743 [e, f, g],
2744 list(a.getiterator('{*}e')))
2745 self.assertEqual(
2746 [a, b, c, d, e, f, g],
2747 list(a.getiterator('{*}*')))
2748
2773
2789
2806
2808 a = etree.Element("a")
2809 b = etree.SubElement(a, "b")
2810 c = etree.SubElement(a, "c")
2811 d1 = etree.SubElement(c, "d")
2812 d2 = etree.SubElement(c, "d")
2813 c.text = d1.text = 'TEXT'
2814
2815 tree = etree.ElementTree(a)
2816 self.assertEqual('.', tree.getelementpath(a))
2817 self.assertEqual('c/d[1]', tree.getelementpath(d1))
2818 self.assertEqual('c/d[2]', tree.getelementpath(d2))
2819
2820 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2821 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2822
2823 tree = etree.ElementTree(c)
2824 self.assertEqual('.', tree.getelementpath(c))
2825 self.assertEqual('d[2]', tree.getelementpath(d2))
2826 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2827
2828 tree = etree.ElementTree(b)
2829 self.assertEqual('.', tree.getelementpath(b))
2830 self.assertRaises(ValueError, tree.getelementpath, a)
2831 self.assertRaises(ValueError, tree.getelementpath, c)
2832 self.assertRaises(ValueError, tree.getelementpath, d2)
2833
2835 a = etree.Element("{http://ns1/}a")
2836 b = etree.SubElement(a, "{http://ns1/}b")
2837 c = etree.SubElement(a, "{http://ns1/}c")
2838 d1 = etree.SubElement(c, "{http://ns1/}d")
2839 d2 = etree.SubElement(c, "{http://ns2/}d")
2840 d3 = etree.SubElement(c, "{http://ns1/}d")
2841
2842 tree = etree.ElementTree(a)
2843 self.assertEqual('.', tree.getelementpath(a))
2844 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
2845 tree.getelementpath(d1))
2846 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
2847 tree.getelementpath(d2))
2848 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
2849 tree.getelementpath(d3))
2850
2851 self.assertEqual(a, tree.find(tree.getelementpath(a)))
2852 self.assertEqual(b, tree.find(tree.getelementpath(b)))
2853 self.assertEqual(c, tree.find(tree.getelementpath(c)))
2854 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2855 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2856 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
2857
2858 tree = etree.ElementTree(c)
2859 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
2860 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
2861 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
2862 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2863 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2864 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
2865
2866 tree = etree.ElementTree(b)
2867 self.assertRaises(ValueError, tree.getelementpath, d1)
2868 self.assertRaises(ValueError, tree.getelementpath, d2)
2869
2876
2883
2892
2894 XML = self.etree.XML
2895 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
2896 self.assertEqual(len(root.findall(".//{X}b")), 2)
2897 self.assertEqual(len(root.findall(".//{X}*")), 2)
2898 self.assertEqual(len(root.findall(".//b")), 3)
2899
2901 XML = self.etree.XML
2902 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
2903 nsmap = {'xx': 'X'}
2904 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2905 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
2906 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2907 nsmap = {'xx': 'Y'}
2908 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2909 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
2910 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2911
2918
2920 etree = self.etree
2921 e = etree.Element('foo')
2922 for i in range(10):
2923 etree.SubElement(e, 'a%s' % i)
2924 for i in range(10):
2925 self.assertEqual(
2926 i,
2927 e.index(e[i]))
2928 self.assertEqual(
2929 3, e.index(e[3], 3))
2930 self.assertRaises(
2931 ValueError, e.index, e[3], 4)
2932 self.assertRaises(
2933 ValueError, e.index, e[3], 0, 2)
2934 self.assertRaises(
2935 ValueError, e.index, e[8], 0, -3)
2936 self.assertRaises(
2937 ValueError, e.index, e[8], -5, -3)
2938 self.assertEqual(
2939 8, e.index(e[8], 0, -1))
2940 self.assertEqual(
2941 8, e.index(e[8], -12, -1))
2942 self.assertEqual(
2943 0, e.index(e[0], -12, -1))
2944
2946 etree = self.etree
2947 e = etree.Element('foo')
2948 for i in range(10):
2949 el = etree.SubElement(e, 'a%s' % i)
2950 el.text = "text%d" % i
2951 el.tail = "tail%d" % i
2952
2953 child0 = e[0]
2954 child1 = e[1]
2955 child2 = e[2]
2956
2957 e.replace(e[0], e[1])
2958 self.assertEqual(
2959 9, len(e))
2960 self.assertEqual(
2961 child1, e[0])
2962 self.assertEqual(
2963 child1.text, "text1")
2964 self.assertEqual(
2965 child1.tail, "tail1")
2966 self.assertEqual(
2967 child0.tail, "tail0")
2968 self.assertEqual(
2969 child2, e[1])
2970
2971 e.replace(e[-1], e[0])
2972 self.assertEqual(
2973 child1, e[-1])
2974 self.assertEqual(
2975 child1.text, "text1")
2976 self.assertEqual(
2977 child1.tail, "tail1")
2978 self.assertEqual(
2979 child2, e[0])
2980
2982 etree = self.etree
2983 e = etree.Element('foo')
2984 for i in range(10):
2985 etree.SubElement(e, 'a%s' % i)
2986
2987 new_element = etree.Element("test")
2988 new_element.text = "TESTTEXT"
2989 new_element.tail = "TESTTAIL"
2990 child1 = e[1]
2991 e.replace(e[0], new_element)
2992 self.assertEqual(
2993 new_element, e[0])
2994 self.assertEqual(
2995 "TESTTEXT",
2996 e[0].text)
2997 self.assertEqual(
2998 "TESTTAIL",
2999 e[0].tail)
3000 self.assertEqual(
3001 child1, e[1])
3002
3018
3036
3054
3072
3074 Element = self.etree.Element
3075 SubElement = self.etree.SubElement
3076 try:
3077 slice
3078 except NameError:
3079 print("slice() not found")
3080 return
3081
3082 a = Element('a')
3083 b = SubElement(a, 'b')
3084 c = SubElement(a, 'c')
3085 d = SubElement(a, 'd')
3086 e = SubElement(a, 'e')
3087
3088 x = Element('x')
3089 y = Element('y')
3090 z = Element('z')
3091
3092 self.assertRaises(
3093 ValueError,
3094 operator.setitem, a, slice(1,None,2), [x, y, z])
3095
3096 self.assertEqual(
3097 [b, c, d, e],
3098 list(a))
3099
3112
3114 XML = self.etree.XML
3115 root = XML(_bytes(
3116 '<?xml version="1.0"?>\n'
3117 '<root>' + '\n' * 65536 +
3118 '<p>' + '\n' * 65536 + '</p>\n' +
3119 '<br/>\n'
3120 '</root>'))
3121
3122 if self.etree.LIBXML_VERSION >= (2, 9):
3123 expected = [2, 131074, 131076]
3124 else:
3125 expected = [2, 65535, 65535]
3126
3127 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3128
3136
3145
3155
3165
3171
3179
3185
3192
3198
3200 etree = self.etree
3201 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3202 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3203 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3204 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3205
3206 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3207
3208 tree = etree.parse(BytesIO(xml))
3209 docinfo = tree.docinfo
3210 self.assertEqual(docinfo.encoding, "ascii")
3211 self.assertEqual(docinfo.xml_version, "1.0")
3212 self.assertEqual(docinfo.public_id, pub_id)
3213 self.assertEqual(docinfo.system_url, sys_id)
3214 self.assertEqual(docinfo.root_name, 'html')
3215 self.assertEqual(docinfo.doctype, doctype_string)
3216
3232
3244
3256
3262
3264 etree = self.etree
3265 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3266 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3267 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3268
3269 xml = _bytes('<!DOCTYPE root>\n<root/>')
3270 tree = etree.parse(BytesIO(xml))
3271 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3272 etree.tostring(tree, doctype=doctype_string))
3273
3275 etree = self.etree
3276 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3277 self.assertEqual(root.base, "http://no/such/url")
3278 self.assertEqual(
3279 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3280 root.base = "https://secret/url"
3281 self.assertEqual(root.base, "https://secret/url")
3282 self.assertEqual(
3283 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3284 "https://secret/url")
3285
3287 etree = self.etree
3288 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3289 self.assertEqual(root.base, "http://no/such/url")
3290 self.assertEqual(
3291 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3292 root.set('{http://www.w3.org/XML/1998/namespace}base',
3293 "https://secret/url")
3294 self.assertEqual(root.base, "https://secret/url")
3295 self.assertEqual(
3296 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3297 "https://secret/url")
3298
3304
3309
3316
3330
3332 Element = self.etree.Element
3333
3334 a = Element('a')
3335 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3336 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3337
3338 self.assertRaises(ValueError, Element, 'ha\0ho')
3339
3341 Element = self.etree.Element
3342
3343 a = Element('a')
3344 self.assertRaises(ValueError, setattr, a, "text",
3345 _str('ha\0ho'))
3346 self.assertRaises(ValueError, setattr, a, "tail",
3347 _str('ha\0ho'))
3348
3349 self.assertRaises(ValueError, Element,
3350 _str('ha\0ho'))
3351
3353 Element = self.etree.Element
3354
3355 a = Element('a')
3356 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3357 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3358
3359 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3360 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3361
3362 self.assertRaises(ValueError, Element, 'ha\x07ho')
3363 self.assertRaises(ValueError, Element, 'ha\x02ho')
3364
3366 Element = self.etree.Element
3367
3368 a = Element('a')
3369 self.assertRaises(ValueError, setattr, a, "text",
3370 _str('ha\x07ho'))
3371 self.assertRaises(ValueError, setattr, a, "text",
3372 _str('ha\x02ho'))
3373
3374 self.assertRaises(ValueError, setattr, a, "tail",
3375 _str('ha\x07ho'))
3376 self.assertRaises(ValueError, setattr, a, "tail",
3377 _str('ha\x02ho'))
3378
3379 self.assertRaises(ValueError, Element,
3380 _str('ha\x07ho'))
3381 self.assertRaises(ValueError, Element,
3382 _str('ha\x02ho'))
3383
3385 Element = self.etree.Element
3386
3387 a = Element('a')
3388 self.assertRaises(ValueError, setattr, a, "text",
3389 _str('ha\u1234\x07ho'))
3390 self.assertRaises(ValueError, setattr, a, "text",
3391 _str('ha\u1234\x02ho'))
3392
3393 self.assertRaises(ValueError, setattr, a, "tail",
3394 _str('ha\u1234\x07ho'))
3395 self.assertRaises(ValueError, setattr, a, "tail",
3396 _str('ha\u1234\x02ho'))
3397
3398 self.assertRaises(ValueError, Element,
3399 _str('ha\u1234\x07ho'))
3400 self.assertRaises(ValueError, Element,
3401 _str('ha\u1234\x02ho'))
3402
3416
3421
3439
3459
3461 tostring = self.etree.tostring
3462 html = self.etree.fromstring(
3463 '<html><body>'
3464 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3465 '</body></html>',
3466 parser=self.etree.HTMLParser())
3467 self.assertEqual(html.tag, 'html')
3468 div = html.find('.//div')
3469 self.assertEqual(div.tail, '\r\n')
3470 result = tostring(div, method='html')
3471 self.assertEqual(
3472 result,
3473 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3474 result = tostring(div, method='html', with_tail=True)
3475 self.assertEqual(
3476 result,
3477 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3478 result = tostring(div, method='html', with_tail=False)
3479 self.assertEqual(
3480 result,
3481 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3482
3504
3506 tostring = self.etree.tostring
3507 XML = self.etree.XML
3508 ElementTree = self.etree.ElementTree
3509
3510 root = XML(_bytes("<root/>"))
3511
3512 tree = ElementTree(root)
3513 self.assertEqual(None, tree.docinfo.standalone)
3514
3515 result = tostring(root, xml_declaration=True, encoding="ASCII")
3516 self.assertEqual(result, _bytes(
3517 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3518
3519 result = tostring(root, xml_declaration=True, encoding="ASCII",
3520 standalone=True)
3521 self.assertEqual(result, _bytes(
3522 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3523
3524 tree = ElementTree(XML(result))
3525 self.assertEqual(True, tree.docinfo.standalone)
3526
3527 result = tostring(root, xml_declaration=True, encoding="ASCII",
3528 standalone=False)
3529 self.assertEqual(result, _bytes(
3530 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3531
3532 tree = ElementTree(XML(result))
3533 self.assertEqual(False, tree.docinfo.standalone)
3534
3554
3556 tostring = self.etree.tostring
3557 Element = self.etree.Element
3558 SubElement = self.etree.SubElement
3559
3560 a = Element('a')
3561 a.text = "A"
3562 a.tail = "tail"
3563 b = SubElement(a, 'b')
3564 b.text = "B"
3565 b.tail = _str("Søk på nettet")
3566 c = SubElement(a, 'c')
3567 c.text = "C"
3568
3569 result = tostring(a, method="text", encoding="UTF-16")
3570
3571 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3572 result)
3573
3575 tostring = self.etree.tostring
3576 Element = self.etree.Element
3577 SubElement = self.etree.SubElement
3578
3579 a = Element('a')
3580 a.text = _str('Søk på nettetA')
3581 a.tail = "tail"
3582 b = SubElement(a, 'b')
3583 b.text = "B"
3584 b.tail = _str('Søk på nettetB')
3585 c = SubElement(a, 'c')
3586 c.text = "C"
3587
3588 self.assertRaises(UnicodeEncodeError,
3589 tostring, a, method="text")
3590
3591 self.assertEqual(
3592 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3593 tostring(a, encoding="UTF-8", method="text"))
3594
3607
3623
3627
3642
3660
3673
3675 tostring = self.etree.tostring
3676 Element = self.etree.Element
3677 SubElement = self.etree.SubElement
3678
3679 a = Element('a')
3680 b = SubElement(a, 'b')
3681 c = SubElement(a, 'c')
3682 d = SubElement(c, 'd')
3683 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3684 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3685 self.assertEqual(_bytes('<b></b>'),
3686 canonicalize(tostring(b, encoding=_unicode)))
3687 self.assertEqual(_bytes('<c><d></d></c>'),
3688 canonicalize(tostring(c, encoding=_unicode)))
3689
3694
3709
3711 tostring = self.etree.tostring
3712 Element = self.etree.Element
3713 SubElement = self.etree.SubElement
3714
3715 a = Element('a')
3716 b = SubElement(a, 'b')
3717 c = SubElement(a, 'c')
3718
3719 result = tostring(a, encoding=_unicode)
3720 self.assertEqual(result, "<a><b/><c/></a>")
3721
3722 result = tostring(a, encoding=_unicode, pretty_print=False)
3723 self.assertEqual(result, "<a><b/><c/></a>")
3724
3725 result = tostring(a, encoding=_unicode, pretty_print=True)
3726 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3727
3739
3741 class SubEl(etree.ElementBase):
3742 pass
3743
3744 el1 = SubEl()
3745 el2 = SubEl()
3746 self.assertEqual('SubEl', el1.tag)
3747 self.assertEqual('SubEl', el2.tag)
3748 el1.other = el2
3749 el2.other = el1
3750
3751 del el1, el2
3752 gc.collect()
3753
3754
3768
3770 root = etree.Element('parent')
3771 c1 = etree.SubElement(root, 'child1')
3772 c2 = etree.SubElement(root, 'child2')
3773
3774 root.remove(c1)
3775 root.remove(c2)
3776 c1.addnext(c2)
3777 c1.tail = 'abc'
3778 c2.tail = 'xyz'
3779 del c1
3780
3781 c2.getprevious()
3782
3783 self.assertEqual('child1', c2.getprevious().tag)
3784 self.assertEqual('abc', c2.getprevious().tail)
3785
3786
3787
3788 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3799
3800
3844
3845 res_instance = res()
3846 parser = etree.XMLParser(load_dtd = True)
3847 parser.resolvers.add(res_instance)
3848
3849 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3850 parser = parser)
3851
3852 self.include(tree)
3853
3854 called = list(res_instance.called.items())
3855 called.sort()
3856 self.assertEqual(
3857 [("dtd", True), ("include", True), ("input", True)],
3858 called)
3859
3861 data = textwrap.dedent('''
3862 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
3863 <foo/>
3864 <xi:include href="./test.xml" />
3865 </doc>
3866 ''')
3867
3868 class Resolver(etree.Resolver):
3869 called = {}
3870
3871 def resolve(self, url, id, context):
3872 if url.endswith("test_xinclude.xml"):
3873 assert not self.called.get("input")
3874 self.called["input"] = True
3875 return None
3876 elif url.endswith('/test5.xml'):
3877 assert not self.called.get("DONE")
3878 self.called["DONE"] = True
3879 return self.resolve_string('<DONE/>', context)
3880 else:
3881 _, filename = url.rsplit('/', 1)
3882 assert not self.called.get(filename)
3883 self.called[filename] = True
3884 next_data = data.replace(
3885 'test.xml', 'test%d.xml' % len(self.called))
3886 return self.resolve_string(next_data, context)
3887
3888 res_instance = Resolver()
3889 parser = etree.XMLParser(load_dtd=True)
3890 parser.resolvers.add(res_instance)
3891
3892 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3893 parser=parser)
3894
3895 self.include(tree)
3896
3897 called = list(res_instance.called.items())
3898 called.sort()
3899 self.assertEqual(
3900 [("DONE", True), ("input", True), ("test.xml", True),
3901 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
3902 called)
3903
3904
3908
3909
3914
3915
3918 tree = self.parse(_bytes('<a><b/></a>'))
3919 f = BytesIO()
3920 tree.write_c14n(f)
3921 s = f.getvalue()
3922 self.assertEqual(_bytes('<a><b></b></a>'),
3923 s)
3924
3926 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3927 f = BytesIO()
3928 tree.write_c14n(f, compression=9)
3929 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3930 try:
3931 s = gzfile.read()
3932 finally:
3933 gzfile.close()
3934 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
3935 s)
3936
3948
3964
3982
3994
4006
4008 tree = self.parse(_bytes(
4009 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4010 f = BytesIO()
4011 tree.write_c14n(f)
4012 s = f.getvalue()
4013 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4014 s)
4015 f = BytesIO()
4016 tree.write_c14n(f, exclusive=False)
4017 s = f.getvalue()
4018 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4019 s)
4020 f = BytesIO()
4021 tree.write_c14n(f, exclusive=True)
4022 s = f.getvalue()
4023 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4024 s)
4025
4026 f = BytesIO()
4027 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4028 s = f.getvalue()
4029 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4030 s)
4031
4033 tree = self.parse(_bytes(
4034 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4035 s = etree.tostring(tree, method='c14n')
4036 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4037 s)
4038 s = etree.tostring(tree, method='c14n', exclusive=False)
4039 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4040 s)
4041 s = etree.tostring(tree, method='c14n', exclusive=True)
4042 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4043 s)
4044
4045 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4046 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4047 s)
4048
4050 tree = self.parse(_bytes(
4051 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4052 s = etree.tostring(tree.getroot(), method='c14n')
4053 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4054 s)
4055 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4056 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4057 s)
4058 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4059 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4060 s)
4061
4062 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4063 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4064 s)
4065 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4066 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4067 s)
4068
4069 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4070 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4071 s)
4072
4074 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4075 tree = self.parse(_bytes(
4076 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4077
4078 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4079 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4080 s)
4081
4082
4085 tree = self.parse(_bytes('<a><b/></a>'))
4086 f = BytesIO()
4087 tree.write(f)
4088 s = f.getvalue()
4089 self.assertEqual(_bytes('<a><b/></a>'),
4090 s)
4091
4093 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4094 f = BytesIO()
4095 tree.write(f, compression=9)
4096 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
4097 try:
4098 s = gzfile.read()
4099 finally:
4100 gzfile.close()
4101 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4102 s)
4103
4105 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4106 f = BytesIO()
4107 tree.write(f, compression=0)
4108 s0 = f.getvalue()
4109
4110 f = BytesIO()
4111 tree.write(f)
4112 self.assertEqual(f.getvalue(), s0)
4113
4114 f = BytesIO()
4115 tree.write(f, compression=1)
4116 s = f.getvalue()
4117 self.assertTrue(len(s) <= len(s0))
4118 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
4119 try:
4120 s1 = gzfile.read()
4121 finally:
4122 gzfile.close()
4123
4124 f = BytesIO()
4125 tree.write(f, compression=9)
4126 s = f.getvalue()
4127 self.assertTrue(len(s) <= len(s0))
4128 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
4129 try:
4130 s9 = gzfile.read()
4131 finally:
4132 gzfile.close()
4133
4134 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4135 s0)
4136 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4137 s1)
4138 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4139 s9)
4140
4152
4168
4180
4193
4195 etree = etree
4196
4218
4220 """This can't really be tested as long as there isn't a way to
4221 reset the logging setup ...
4222 """
4223 parse = self.etree.parse
4224
4225 messages = []
4226 class Logger(self.etree.PyErrorLog):
4227 def log(self, entry, message, *args):
4228 messages.append(message)
4229
4230 self.etree.use_global_python_log(Logger())
4231 f = BytesIO('<a><b></c></b></a>')
4232 try:
4233 parse(f)
4234 except SyntaxError:
4235 pass
4236 f.close()
4237
4238 self.assertTrue([ message for message in messages
4239 if 'mismatch' in message ])
4240 self.assertTrue([ message for message in messages
4241 if ':PARSER:' in message])
4242 self.assertTrue([ message for message in messages
4243 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4244 self.assertTrue([ message for message in messages
4245 if ':1:15:' in message ])
4246
4247
4249 etree = etree
4250
4254
4256 class Target(object):
4257 def start(self, tag, attrib):
4258 return 'start(%s)' % tag
4259 def end(self, tag):
4260 return 'end(%s)' % tag
4261 def close(self):
4262 return 'close()'
4263
4264 parser = self.etree.XMLPullParser(target=Target())
4265 events = parser.read_events()
4266
4267 parser.feed('<root><element>')
4268 self.assertFalse(list(events))
4269 self.assertFalse(list(events))
4270 parser.feed('</element><child>')
4271 self.assertEqual([('end', 'end(element)')], list(events))
4272 parser.feed('</child>')
4273 self.assertEqual([('end', 'end(child)')], list(events))
4274 parser.feed('</root>')
4275 self.assertEqual([('end', 'end(root)')], list(events))
4276 self.assertFalse(list(events))
4277 self.assertEqual('close()', parser.close())
4278
4280 class Target(object):
4281 def start(self, tag, attrib):
4282 return 'start(%s)' % tag
4283 def end(self, tag):
4284 return 'end(%s)' % tag
4285 def close(self):
4286 return 'close()'
4287
4288 parser = self.etree.XMLPullParser(
4289 ['start', 'end'], target=Target())
4290 events = parser.read_events()
4291
4292 parser.feed('<root><element>')
4293 self.assertEqual(
4294 [('start', 'start(root)'), ('start', 'start(element)')],
4295 list(events))
4296 self.assertFalse(list(events))
4297 parser.feed('</element><child>')
4298 self.assertEqual(
4299 [('end', 'end(element)'), ('start', 'start(child)')],
4300 list(events))
4301 parser.feed('</child>')
4302 self.assertEqual(
4303 [('end', 'end(child)')],
4304 list(events))
4305 parser.feed('</root>')
4306 self.assertEqual(
4307 [('end', 'end(root)')],
4308 list(events))
4309 self.assertFalse(list(events))
4310 self.assertEqual('close()', parser.close())
4311
4313 parser = self.etree.XMLPullParser(
4314 ['start', 'end'], target=etree.TreeBuilder())
4315 events = parser.read_events()
4316
4317 parser.feed('<root><element>')
4318 self.assert_event_tags(
4319 events, [('start', 'root'), ('start', 'element')])
4320 self.assertFalse(list(events))
4321 parser.feed('</element><child>')
4322 self.assert_event_tags(
4323 events, [('end', 'element'), ('start', 'child')])
4324 parser.feed('</child>')
4325 self.assert_event_tags(
4326 events, [('end', 'child')])
4327 parser.feed('</root>')
4328 self.assert_event_tags(
4329 events, [('end', 'root')])
4330 self.assertFalse(list(events))
4331 root = parser.close()
4332 self.assertEqual('root', root.tag)
4333
4335 class Target(etree.TreeBuilder):
4336 def end(self, tag):
4337 el = super(Target, self).end(tag)
4338 el.tag += '-huhu'
4339 return el
4340
4341 parser = self.etree.XMLPullParser(
4342 ['start', 'end'], target=Target())
4343 events = parser.read_events()
4344
4345 parser.feed('<root><element>')
4346 self.assert_event_tags(
4347 events, [('start', 'root'), ('start', 'element')])
4348 self.assertFalse(list(events))
4349 parser.feed('</element><child>')
4350 self.assert_event_tags(
4351 events, [('end', 'element-huhu'), ('start', 'child')])
4352 parser.feed('</child>')
4353 self.assert_event_tags(
4354 events, [('end', 'child-huhu')])
4355 parser.feed('</root>')
4356 self.assert_event_tags(
4357 events, [('end', 'root-huhu')])
4358 self.assertFalse(list(events))
4359 root = parser.close()
4360 self.assertEqual('root-huhu', root.tag)
4361
4362
4386
4387 if __name__ == '__main__':
4388 print('to test use test.py %s' % __file__)
4389