1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 import os.path
11 import unittest
12 import copy
13 import sys
14 import re
15 import gc
16 import operator
17 import tempfile
18 import zlib
19 import gzip
20
21 this_dir = os.path.dirname(__file__)
22 if this_dir not in sys.path:
23 sys.path.insert(0, this_dir)
24
25 from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir, read_file
26 from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
27 from common_imports import canonicalize, sorted, _str, _bytes
28
29 print("")
30 print("TESTED VERSION: %s" % etree.__version__)
31 print(" Python: " + repr(sys.version_info))
32 print(" lxml.etree: " + repr(etree.LXML_VERSION))
33 print(" libxml used: " + repr(etree.LIBXML_VERSION))
34 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
35 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
36 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
37 print("")
38
39 try:
40 _unicode = unicode
41 except NameError:
42
43 _unicode = str
44
46 """Tests only for etree, not ElementTree"""
47 etree = etree
48
59
68
75
77 Element = self.etree.Element
78 el = Element('name')
79 self.assertRaises(ValueError, Element, '{}')
80 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
81
82 self.assertRaises(ValueError, Element, '{test}')
83 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
84
92
94 Element = self.etree.Element
95 self.assertRaises(ValueError, Element, "p'name")
96 self.assertRaises(ValueError, Element, 'p"name')
97
98 self.assertRaises(ValueError, Element, "{test}p'name")
99 self.assertRaises(ValueError, Element, '{test}p"name')
100
101 el = Element('name')
102 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
103 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, ' name ')
108 self.assertRaises(ValueError, Element, 'na me')
109 self.assertRaises(ValueError, Element, '{test} name')
110
111 el = Element('name')
112 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
113
121
129
131 Element = self.etree.Element
132 SubElement = self.etree.SubElement
133
134 el = Element('name')
135 self.assertRaises(ValueError, SubElement, el, "p'name")
136 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
137
138 self.assertRaises(ValueError, SubElement, el, 'p"name')
139 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
140
149
158
160 QName = self.etree.QName
161 self.assertRaises(ValueError, QName, '')
162 self.assertRaises(ValueError, QName, 'test', '')
163
165 QName = self.etree.QName
166 self.assertRaises(ValueError, QName, 'p:name')
167 self.assertRaises(ValueError, QName, 'test', 'p:name')
168
170 QName = self.etree.QName
171 self.assertRaises(ValueError, QName, ' name ')
172 self.assertRaises(ValueError, QName, 'na me')
173 self.assertRaises(ValueError, QName, 'test', ' name')
174
182
184
185 QName = self.etree.QName
186 qname1 = QName('http://myns', 'a')
187 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
188
189 qname2 = QName(a)
190 self.assertEqual(a.tag, qname1.text)
191 self.assertEqual(qname1.text, qname2.text)
192 self.assertEqual(qname1, qname2)
193
195
196 etree = self.etree
197 qname = etree.QName('http://myns', 'a')
198 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
199 a.text = qname
200
201 self.assertEqual("p:a", a.text)
202
211
226
232
240
254
276
278 XML = self.etree.XML
279 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
280
281 root = XML(xml)
282 self.etree.strip_elements(root, 'a')
283 self.assertEqual(_bytes('<test><x></x></test>'),
284 self._writeElement(root))
285
286 root = XML(xml)
287 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
288 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
289 self._writeElement(root))
290
291 root = XML(xml)
292 self.etree.strip_elements(root, 'c')
293 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
294 self._writeElement(root))
295
297 XML = self.etree.XML
298 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
299
300 root = XML(xml)
301 self.etree.strip_elements(root, 'a')
302 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
303 self._writeElement(root))
304
305 root = XML(xml)
306 self.etree.strip_elements(root, '{urn:a}b', 'c')
307 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
308 self._writeElement(root))
309
310 root = XML(xml)
311 self.etree.strip_elements(root, '{urn:a}*', 'c')
312 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
313 self._writeElement(root))
314
315 root = XML(xml)
316 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
317 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
318 self._writeElement(root))
319
338
364
391
418
437
450
461
467
469 XML = self.etree.XML
470 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
471 self.assertEqual(root[0].target, "mypi")
472 self.assertEqual(root[0].get('my'), "1")
473 self.assertEqual(root[0].get('test'), " abc ")
474 self.assertEqual(root[0].get('quotes'), "' '")
475 self.assertEqual(root[0].get('only'), None)
476 self.assertEqual(root[0].get('names'), None)
477 self.assertEqual(root[0].get('nope'), None)
478
480 XML = self.etree.XML
481 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
482 self.assertEqual(root[0].target, "mypi")
483 self.assertEqual(root[0].attrib['my'], "1")
484 self.assertEqual(root[0].attrib['test'], " abc ")
485 self.assertEqual(root[0].attrib['quotes'], "' '")
486 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
487 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
488 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
489
491
492 ProcessingInstruction = self.etree.ProcessingInstruction
493
494 a = ProcessingInstruction("PI", "ONE")
495 b = copy.deepcopy(a)
496 b.text = "ANOTHER"
497
498 self.assertEqual('ONE', a.text)
499 self.assertEqual('ANOTHER', b.text)
500
516
531
541
553
572
577
590
601
602 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
603 events = list(iterparse(f, events=('end', 'comment')))
604 root = events[-1][1]
605 self.assertEqual(6, len(events))
606 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
607 [ name(*item) for item in events ])
608 self.assertEqual(
609 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
610 tostring(root))
611
623
624 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
625 events = list(iterparse(f, events=('end', 'pi')))
626 root = events[-2][1]
627 self.assertEqual(8, len(events))
628 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
629 ('pid','d'), 'a', ('pie','e')],
630 [ name(*item) for item in events ])
631 self.assertEqual(
632 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
633 tostring(ElementTree(root)))
634
649
655
657 iterparse = self.etree.iterparse
658 f = BytesIO("""
659 <a> \n \n <b> b test </b> \n
660
661 \n\t <c> \n </c> </a> \n """)
662 iterator = iterparse(f, remove_blank_text=True)
663 text = [ (element.text, element.tail)
664 for event, element in iterator ]
665 self.assertEqual(
666 [(" b test ", None), (" \n ", None), (None, None)],
667 text)
668
670 iterparse = self.etree.iterparse
671 f = BytesIO('<a><b><d/></b><c/></a>')
672
673 iterator = iterparse(f, tag="b", events=('start', 'end'))
674 events = list(iterator)
675 root = iterator.root
676 self.assertEqual(
677 [('start', root[0]), ('end', root[0])],
678 events)
679
681 iterparse = self.etree.iterparse
682 f = BytesIO('<a><b><d/></b><c/></a>')
683
684 iterator = iterparse(f, tag="*", events=('start', 'end'))
685 events = list(iterator)
686 self.assertEqual(
687 8,
688 len(events))
689
691 iterparse = self.etree.iterparse
692 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
693
694 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
695 events = list(iterator)
696 root = iterator.root
697 self.assertEqual(
698 [('start', root[0]), ('end', root[0])],
699 events)
700
702 iterparse = self.etree.iterparse
703 f = BytesIO('<a><b><d/></b><c/></a>')
704 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
705 events = list(iterator)
706 root = iterator.root
707 self.assertEqual(
708 [('start', root[0]), ('end', root[0])],
709 events)
710
711 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
712 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
713 events = list(iterator)
714 root = iterator.root
715 self.assertEqual([], events)
716
718 iterparse = self.etree.iterparse
719 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
720 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
721 events = list(iterator)
722 self.assertEqual(8, len(events))
723
725 iterparse = self.etree.iterparse
726 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
727 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
728 events = list(iterator)
729 self.assertEqual([], events)
730
731 f = BytesIO('<a><b><d/></b><c/></a>')
732 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
733 events = list(iterator)
734 self.assertEqual(8, len(events))
735
737 text = _str('Søk på nettet')
738 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
739 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
740 ).encode('iso-8859-1')
741
742 self.assertRaises(self.etree.ParseError,
743 list, self.etree.iterparse(BytesIO(xml_latin1)))
744
746 text = _str('Søk på nettet', encoding="UTF-8")
747 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
748 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
749 ).encode('iso-8859-1')
750
751 iterator = self.etree.iterparse(BytesIO(xml_latin1),
752 encoding="iso-8859-1")
753 self.assertEqual(1, len(list(iterator)))
754
755 a = iterator.root
756 self.assertEqual(a.text, text)
757
759 tostring = self.etree.tostring
760 f = BytesIO('<root><![CDATA[test]]></root>')
761 context = self.etree.iterparse(f, strip_cdata=False)
762 content = [ el.text for event,el in context ]
763
764 self.assertEqual(['test'], content)
765 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
766 tostring(context.root))
767
771
776
795
796
797
808 def end(self, tag):
809 events.append("end")
810 assertEqual("TAG", tag)
811 def close(self):
812 return "DONE"
813
814 parser = self.etree.XMLParser(target=Target())
815 tree = self.etree.ElementTree()
816
817 self.assertRaises(TypeError,
818 tree.parse, BytesIO("<TAG/>"), parser=parser)
819 self.assertEqual(["start", "end"], events)
820
822
823 events = []
824 class Target(object):
825 def start(self, tag, attrib):
826 events.append("start-" + tag)
827 def end(self, tag):
828 events.append("end-" + tag)
829 if tag == 'a':
830 raise ValueError("dead and gone")
831 def data(self, data):
832 events.append("data-" + data)
833 def close(self):
834 events.append("close")
835 return "DONE"
836
837 parser = self.etree.XMLParser(target=Target())
838
839 try:
840 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
841 done = parser.close()
842 self.fail("error expected, but parsing succeeded")
843 except ValueError:
844 done = 'value error received as expected'
845
846 self.assertEqual(["start-root", "data-A", "start-a",
847 "data-ca", "end-a", "close"],
848 events)
849
851
852 events = []
853 class Target(object):
854 def start(self, tag, attrib):
855 events.append("start-" + tag)
856 def end(self, tag):
857 events.append("end-" + tag)
858 if tag == 'a':
859 raise ValueError("dead and gone")
860 def data(self, data):
861 events.append("data-" + data)
862 def close(self):
863 events.append("close")
864 return "DONE"
865
866 parser = self.etree.XMLParser(target=Target())
867
868 try:
869 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
870 parser=parser)
871 self.fail("error expected, but parsing succeeded")
872 except ValueError:
873 done = 'value error received as expected'
874
875 self.assertEqual(["start-root", "data-A", "start-a",
876 "data-ca", "end-a", "close"],
877 events)
878
884 def end(self, tag):
885 events.append("end-" + tag)
886 def data(self, data):
887 events.append("data-" + data)
888 def comment(self, text):
889 events.append("comment-" + text)
890 def close(self):
891 return "DONE"
892
893 parser = self.etree.XMLParser(target=Target())
894
895 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
896 done = parser.close()
897
898 self.assertEqual("DONE", done)
899 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
900 "start-sub", "end-sub", "comment-c", "data-B",
901 "end-root", "comment-d"],
902 events)
903
905 events = []
906 class Target(object):
907 def start(self, tag, attrib):
908 events.append("start-" + tag)
909 def end(self, tag):
910 events.append("end-" + tag)
911 def data(self, data):
912 events.append("data-" + data)
913 def pi(self, target, data):
914 events.append("pi-" + target + "-" + data)
915 def close(self):
916 return "DONE"
917
918 parser = self.etree.XMLParser(target=Target())
919
920 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
921 done = parser.close()
922
923 self.assertEqual("DONE", done)
924 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
925 "data-B", "end-root", "pi-test-c"],
926 events)
927
929 events = []
930 class Target(object):
931 def start(self, tag, attrib):
932 events.append("start-" + tag)
933 def end(self, tag):
934 events.append("end-" + tag)
935 def data(self, data):
936 events.append("data-" + data)
937 def close(self):
938 return "DONE"
939
940 parser = self.etree.XMLParser(target=Target(),
941 strip_cdata=False)
942
943 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
944 done = parser.close()
945
946 self.assertEqual("DONE", done)
947 self.assertEqual(["start-root", "data-A", "start-a",
948 "data-ca", "end-a", "data-B", "end-root"],
949 events)
950
952 events = []
953 class Target(object):
954 def start(self, tag, attrib):
955 events.append("start-" + tag)
956 def end(self, tag):
957 events.append("end-" + tag)
958 def data(self, data):
959 events.append("data-" + data)
960 def close(self):
961 events.append("close")
962 return "DONE"
963
964 parser = self.etree.XMLParser(target=Target(),
965 recover=True)
966
967 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
968 done = parser.close()
969
970 self.assertEqual("DONE", done)
971 self.assertEqual(["start-root", "data-A", "start-a",
972 "data-ca", "end-a", "data-B",
973 "end-root", "close"],
974 events)
975
985
995
1004
1014
1016 iterwalk = self.etree.iterwalk
1017 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1018
1019 iterator = iterwalk(root, events=('start','end'))
1020 events = list(iterator)
1021 self.assertEqual(
1022 [('start', root), ('start', root[0]), ('end', root[0]),
1023 ('start', root[1]), ('end', root[1]), ('end', root)],
1024 events)
1025
1036
1038 iterwalk = self.etree.iterwalk
1039 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1040
1041 attr_name = '{testns}bla'
1042 events = []
1043 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1044 for event, elem in iterator:
1045 events.append(event)
1046 if event == 'start':
1047 if elem.tag != '{ns1}a':
1048 elem.set(attr_name, 'value')
1049
1050 self.assertEqual(
1051 ['start-ns', 'start', 'start', 'start-ns', 'start',
1052 'end', 'end-ns', 'end', 'end', 'end-ns'],
1053 events)
1054
1055 self.assertEqual(
1056 None,
1057 root.get(attr_name))
1058 self.assertEqual(
1059 'value',
1060 root[0].get(attr_name))
1061
1072
1074 parse = self.etree.parse
1075 parser = self.etree.XMLParser(dtd_validation=True)
1076 assertEqual = self.assertEqual
1077 test_url = _str("__nosuch.dtd")
1078
1079 class MyResolver(self.etree.Resolver):
1080 def resolve(self, url, id, context):
1081 assertEqual(url, test_url)
1082 return self.resolve_string(
1083 _str('''<!ENTITY myentity "%s">
1084 <!ELEMENT doc ANY>''') % url, context)
1085
1086 parser.resolvers.add(MyResolver())
1087
1088 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1089 tree = parse(StringIO(xml), parser)
1090 root = tree.getroot()
1091 self.assertEqual(root.text, test_url)
1092
1094 parse = self.etree.parse
1095 parser = self.etree.XMLParser(dtd_validation=True)
1096 assertEqual = self.assertEqual
1097 test_url = _str("__nosuch.dtd")
1098
1099 class MyResolver(self.etree.Resolver):
1100 def resolve(self, url, id, context):
1101 assertEqual(url, test_url)
1102 return self.resolve_string(
1103 (_str('''<!ENTITY myentity "%s">
1104 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1105 context)
1106
1107 parser.resolvers.add(MyResolver())
1108
1109 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1110 tree = parse(StringIO(xml), parser)
1111 root = tree.getroot()
1112 self.assertEqual(root.text, test_url)
1113
1115 parse = self.etree.parse
1116 parser = self.etree.XMLParser(dtd_validation=True)
1117 assertEqual = self.assertEqual
1118 test_url = _str("__nosuch.dtd")
1119
1120 class MyResolver(self.etree.Resolver):
1121 def resolve(self, url, id, context):
1122 assertEqual(url, test_url)
1123 return self.resolve_file(
1124 SillyFileLike(
1125 _str('''<!ENTITY myentity "%s">
1126 <!ELEMENT doc ANY>''') % url), context)
1127
1128 parser.resolvers.add(MyResolver())
1129
1130 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1131 tree = parse(StringIO(xml), parser)
1132 root = tree.getroot()
1133 self.assertEqual(root.text, test_url)
1134
1136 parse = self.etree.parse
1137 parser = self.etree.XMLParser(attribute_defaults=True)
1138 assertEqual = self.assertEqual
1139 test_url = _str("__nosuch.dtd")
1140
1141 class MyResolver(self.etree.Resolver):
1142 def resolve(self, url, id, context):
1143 assertEqual(url, test_url)
1144 return self.resolve_filename(
1145 fileInTestDir('test.dtd'), context)
1146
1147 parser.resolvers.add(MyResolver())
1148
1149 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1150 tree = parse(StringIO(xml), parser)
1151 root = tree.getroot()
1152 self.assertEqual(
1153 root.attrib, {'default': 'valueA'})
1154 self.assertEqual(
1155 root[0].attrib, {'default': 'valueB'})
1156
1168
1169 parser.resolvers.add(MyResolver())
1170
1171 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1172 tree = parse(StringIO(xml), parser,
1173 base_url=fileInTestDir('__test.xml'))
1174 root = tree.getroot()
1175 self.assertEqual(
1176 root.attrib, {'default': 'valueA'})
1177 self.assertEqual(
1178 root[0].attrib, {'default': 'valueB'})
1179
1181 parse = self.etree.parse
1182 parser = self.etree.XMLParser(attribute_defaults=True)
1183 assertEqual = self.assertEqual
1184 test_url = _str("__nosuch.dtd")
1185
1186 class MyResolver(self.etree.Resolver):
1187 def resolve(self, url, id, context):
1188 assertEqual(url, test_url)
1189 return self.resolve_file(
1190 open(fileInTestDir('test.dtd'), 'rb'), context)
1191
1192 parser.resolvers.add(MyResolver())
1193
1194 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1195 tree = parse(StringIO(xml), parser)
1196 root = tree.getroot()
1197 self.assertEqual(
1198 root.attrib, {'default': 'valueA'})
1199 self.assertEqual(
1200 root[0].attrib, {'default': 'valueB'})
1201
1203 parse = self.etree.parse
1204 parser = self.etree.XMLParser(load_dtd=True)
1205 assertEqual = self.assertEqual
1206 test_url = _str("__nosuch.dtd")
1207
1208 class check(object):
1209 resolved = False
1210
1211 class MyResolver(self.etree.Resolver):
1212 def resolve(self, url, id, context):
1213 assertEqual(url, test_url)
1214 check.resolved = True
1215 return self.resolve_empty(context)
1216
1217 parser.resolvers.add(MyResolver())
1218
1219 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1220 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1221 self.assertTrue(check.resolved)
1222
1229
1230 class MyResolver(self.etree.Resolver):
1231 def resolve(self, url, id, context):
1232 raise _LocalException
1233
1234 parser.resolvers.add(MyResolver())
1235
1236 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1237 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1238
1239 if etree.LIBXML_VERSION > (2,6,20):
1256
1258 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1259 <root>
1260 <child1/>
1261 <child2/>
1262 <child3> </child3>
1263 </root>''')
1264
1265 parser = self.etree.XMLParser(resolve_entities=False)
1266 root = etree.fromstring(xml, parser)
1267 self.assertEqual([ el.tag for el in root ],
1268 ['child1', 'child2', 'child3'])
1269
1270 root[0] = root[-1]
1271 self.assertEqual([ el.tag for el in root ],
1272 ['child3', 'child2'])
1273 self.assertEqual(root[0][0].text, ' ')
1274 self.assertEqual(root[0][0].name, 'nbsp')
1275
1291
1298
1300 Entity = self.etree.Entity
1301 self.assertRaises(ValueError, Entity, 'a b c')
1302 self.assertRaises(ValueError, Entity, 'a,b')
1303 self.assertRaises(ValueError, Entity, 'a\0b')
1304 self.assertRaises(ValueError, Entity, '#abc')
1305 self.assertRaises(ValueError, Entity, '#xxyz')
1306
1319
1332
1334 CDATA = self.etree.CDATA
1335 Element = self.etree.Element
1336
1337 root = Element("root")
1338 cdata = CDATA('test')
1339
1340 self.assertRaises(TypeError,
1341 setattr, root, 'tail', cdata)
1342 self.assertRaises(TypeError,
1343 root.set, 'attr', cdata)
1344 self.assertRaises(TypeError,
1345 operator.setitem, root.attrib, 'attr', cdata)
1346
1355
1364
1365
1375
1384
1386 Element = self.etree.Element
1387 SubElement = self.etree.SubElement
1388 root = Element('root')
1389 self.assertRaises(ValueError, root.append, root)
1390 child = SubElement(root, 'child')
1391 self.assertRaises(ValueError, child.append, root)
1392 child2 = SubElement(child, 'child2')
1393 self.assertRaises(ValueError, child2.append, root)
1394 self.assertRaises(ValueError, child2.append, child)
1395 self.assertEqual('child2', root[0][0].tag)
1396
1409
1422
1438
1454
1460
1475
1488
1503
1516
1531
1544
1559
1572
1573
1581
1582
1592
1593
1608
1609
1619
1620
1631
1632
1634 self.assertRaises(TypeError, self.etree.dump, None)
1635
1648
1661
1682
1691
1700
1709
1718
1720 XML = self.etree.XML
1721
1722 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1723 result = []
1724 for el in root.iterchildren(tag=['two', 'three']):
1725 result.append(el.text)
1726 self.assertEqual(['Two', 'Bla', None], result)
1727
1736
1757
1772
1774 Element = self.etree.Element
1775 SubElement = self.etree.SubElement
1776
1777 a = Element('a')
1778 b = SubElement(a, 'b')
1779 c = SubElement(a, 'c')
1780 d = SubElement(b, 'd')
1781 self.assertEqual(
1782 [b, a],
1783 list(d.iterancestors(tag=('a', 'b'))))
1784 self.assertEqual(
1785 [],
1786 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
1787 self.assertEqual(
1788 [],
1789 list(d.iterancestors(tag=('d', 'x'))))
1790 self.assertEqual(
1791 [b, a],
1792 list(d.iterancestors(tag=('b', '*'))))
1793 self.assertEqual(
1794 [b],
1795 list(d.iterancestors(tag=('b', 'c'))))
1796
1813
1834
1836 Element = self.etree.Element
1837 SubElement = self.etree.SubElement
1838
1839 a = Element('a')
1840 b = SubElement(a, 'b')
1841 c = SubElement(a, 'c')
1842 d = SubElement(b, 'd')
1843 e = SubElement(c, 'e')
1844
1845 self.assertEqual(
1846 [b, e],
1847 list(a.iterdescendants(tag=('a', 'b', 'e'))))
1848 a2 = SubElement(e, 'a')
1849 self.assertEqual(
1850 [b, a2],
1851 list(a.iterdescendants(tag=('a', 'b'))))
1852 self.assertEqual(
1853 [],
1854 list(c.iterdescendants(tag=('x', 'y', 'z'))))
1855 self.assertEqual(
1856 [b, d, c, e, a2],
1857 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
1858
1876
1893
1911
1935
1937 Element = self.etree.Element
1938 SubElement = self.etree.SubElement
1939
1940 a = Element('a')
1941 b = SubElement(a, 'b')
1942 c = SubElement(a, 'c')
1943 d = SubElement(b, 'd')
1944 self.assertEqual(
1945 [],
1946 list(a.itersiblings(tag='XXX')))
1947 self.assertEqual(
1948 [c],
1949 list(b.itersiblings(tag='c')))
1950 self.assertEqual(
1951 [c],
1952 list(b.itersiblings(tag='*')))
1953 self.assertEqual(
1954 [b],
1955 list(c.itersiblings(preceding=True, tag='b')))
1956 self.assertEqual(
1957 [],
1958 list(c.itersiblings(preceding=True, tag='c')))
1959
1961 Element = self.etree.Element
1962 SubElement = self.etree.SubElement
1963
1964 a = Element('a')
1965 b = SubElement(a, 'b')
1966 c = SubElement(a, 'c')
1967 d = SubElement(b, 'd')
1968 e = SubElement(a, 'e')
1969 self.assertEqual(
1970 [],
1971 list(a.itersiblings(tag=('XXX', 'YYY'))))
1972 self.assertEqual(
1973 [c, e],
1974 list(b.itersiblings(tag=('c', 'd', 'e'))))
1975 self.assertEqual(
1976 [b],
1977 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
1978 self.assertEqual(
1979 [c, b],
1980 list(e.itersiblings(preceding=True, tag=('c', '*'))))
1981
1983 parseid = self.etree.parseid
1984 XML = self.etree.XML
1985 xml_text = _bytes('''
1986 <!DOCTYPE document [
1987 <!ELEMENT document (h1,p)*>
1988 <!ELEMENT h1 (#PCDATA)>
1989 <!ATTLIST h1 myid ID #REQUIRED>
1990 <!ELEMENT p (#PCDATA)>
1991 <!ATTLIST p someid ID #REQUIRED>
1992 ]>
1993 <document>
1994 <h1 myid="chapter1">...</h1>
1995 <p id="note1" class="note">...</p>
1996 <p>Regular paragraph.</p>
1997 <p xml:id="xmlid">XML:ID paragraph.</p>
1998 <p someid="warn1" class="warning">...</p>
1999 </document>
2000 ''')
2001
2002 tree, dic = parseid(BytesIO(xml_text))
2003 root = tree.getroot()
2004 root2 = XML(xml_text)
2005 self.assertEqual(self._writeElement(root),
2006 self._writeElement(root2))
2007 expected = {
2008 "chapter1" : root[0],
2009 "xmlid" : root[3],
2010 "warn1" : root[4]
2011 }
2012 self.assertTrue("chapter1" in dic)
2013 self.assertTrue("warn1" in dic)
2014 self.assertTrue("xmlid" in dic)
2015 self._checkIDDict(dic, expected)
2016
2018 XMLDTDID = self.etree.XMLDTDID
2019 XML = self.etree.XML
2020 xml_text = _bytes('''
2021 <!DOCTYPE document [
2022 <!ELEMENT document (h1,p)*>
2023 <!ELEMENT h1 (#PCDATA)>
2024 <!ATTLIST h1 myid ID #REQUIRED>
2025 <!ELEMENT p (#PCDATA)>
2026 <!ATTLIST p someid ID #REQUIRED>
2027 ]>
2028 <document>
2029 <h1 myid="chapter1">...</h1>
2030 <p id="note1" class="note">...</p>
2031 <p>Regular paragraph.</p>
2032 <p xml:id="xmlid">XML:ID paragraph.</p>
2033 <p someid="warn1" class="warning">...</p>
2034 </document>
2035 ''')
2036
2037 root, dic = XMLDTDID(xml_text)
2038 root2 = XML(xml_text)
2039 self.assertEqual(self._writeElement(root),
2040 self._writeElement(root2))
2041 expected = {
2042 "chapter1" : root[0],
2043 "xmlid" : root[3],
2044 "warn1" : root[4]
2045 }
2046 self.assertTrue("chapter1" in dic)
2047 self.assertTrue("warn1" in dic)
2048 self.assertTrue("xmlid" in dic)
2049 self._checkIDDict(dic, expected)
2050
2052 XMLDTDID = self.etree.XMLDTDID
2053 XML = self.etree.XML
2054 xml_text = _bytes('''
2055 <document>
2056 <h1 myid="chapter1">...</h1>
2057 <p id="note1" class="note">...</p>
2058 <p>Regular paragraph.</p>
2059 <p someid="warn1" class="warning">...</p>
2060 </document>
2061 ''')
2062
2063 root, dic = XMLDTDID(xml_text)
2064 root2 = XML(xml_text)
2065 self.assertEqual(self._writeElement(root),
2066 self._writeElement(root2))
2067 expected = {}
2068 self._checkIDDict(dic, expected)
2069
2071 self.assertEqual(len(dic),
2072 len(expected))
2073 self.assertEqual(sorted(dic.items()),
2074 sorted(expected.items()))
2075 if sys.version_info < (3,):
2076 self.assertEqual(sorted(dic.iteritems()),
2077 sorted(expected.iteritems()))
2078 self.assertEqual(sorted(dic.keys()),
2079 sorted(expected.keys()))
2080 if sys.version_info < (3,):
2081 self.assertEqual(sorted(dic.iterkeys()),
2082 sorted(expected.iterkeys()))
2083 if sys.version_info < (3,):
2084 self.assertEqual(sorted(dic.values()),
2085 sorted(expected.values()))
2086 self.assertEqual(sorted(dic.itervalues()),
2087 sorted(expected.itervalues()))
2088
2090 etree = self.etree
2091
2092 r = {'foo': 'http://ns.infrae.com/foo'}
2093 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2094 self.assertEqual(
2095 'foo',
2096 e.prefix)
2097 self.assertEqual(
2098 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2099 self._writeElement(e))
2100
2102 etree = self.etree
2103
2104 r = {None: 'http://ns.infrae.com/foo'}
2105 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2106 self.assertEqual(
2107 None,
2108 e.prefix)
2109 self.assertEqual(
2110 '{http://ns.infrae.com/foo}bar',
2111 e.tag)
2112 self.assertEqual(
2113 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2114 self._writeElement(e))
2115
2117 etree = self.etree
2118
2119 r = {None: 'http://ns.infrae.com/foo',
2120 'hoi': 'http://ns.infrae.com/hoi'}
2121 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2122 e.set('{http://ns.infrae.com/hoi}test', 'value')
2123 self.assertEqual(
2124 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2125 self._writeElement(e))
2126
2128 etree = self.etree
2129
2130 root = etree.Element('{http://test/ns}root',
2131 nsmap={None: 'http://test/ns'})
2132 sub = etree.Element('{http://test/ns}sub',
2133 nsmap={'test': 'http://test/ns'})
2134
2135 sub.attrib['{http://test/ns}attr'] = 'value'
2136 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2137 self.assertEqual(
2138 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2139 etree.tostring(sub))
2140
2141 root.append(sub)
2142 self.assertEqual(
2143 _bytes('<root xmlns="http://test/ns">'
2144 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2145 '</root>'),
2146 etree.tostring(root))
2147
2149 etree = self.etree
2150
2151 root = etree.Element('root')
2152 sub = etree.Element('{http://test/ns}sub',
2153 nsmap={'test': 'http://test/ns'})
2154
2155 sub.attrib['{http://test/ns}attr'] = 'value'
2156 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2157 self.assertEqual(
2158 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2159 etree.tostring(sub))
2160
2161 root.append(sub)
2162 self.assertEqual(
2163 _bytes('<root>'
2164 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2165 '</root>'),
2166 etree.tostring(root))
2167
2169 etree = self.etree
2170
2171 root = etree.Element('root')
2172 sub = etree.Element('{http://test/ns}sub',
2173 nsmap={None: 'http://test/ns'})
2174
2175 sub.attrib['{http://test/ns}attr'] = 'value'
2176 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2177 self.assertEqual(
2178 _bytes('<sub xmlns="http://test/ns" '
2179 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2180 etree.tostring(sub))
2181
2182 root.append(sub)
2183 self.assertEqual(
2184 _bytes('<root>'
2185 '<sub xmlns="http://test/ns"'
2186 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2187 '</root>'),
2188 etree.tostring(root))
2189
2191 etree = self.etree
2192
2193 root = etree.Element('{http://test/ns}root',
2194 nsmap={'test': 'http://test/ns',
2195 None: 'http://test/ns'})
2196 sub = etree.Element('{http://test/ns}sub',
2197 nsmap={None: 'http://test/ns'})
2198
2199 sub.attrib['{http://test/ns}attr'] = 'value'
2200 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2201 self.assertEqual(
2202 _bytes('<sub xmlns="http://test/ns" '
2203 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2204 etree.tostring(sub))
2205
2206 root.append(sub)
2207 self.assertEqual(
2208 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2209 '<test:sub test:attr="value"/>'
2210 '</test:root>'),
2211 etree.tostring(root))
2212
2214 etree = self.etree
2215 r = {None: 'http://ns.infrae.com/foo',
2216 'hoi': 'http://ns.infrae.com/hoi'}
2217 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2218 tree = etree.ElementTree(element=e)
2219 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2220 self.assertEqual(
2221 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2222 self._writeElement(e))
2223
2225 etree = self.etree
2226
2227 r = {None: 'http://ns.infrae.com/foo'}
2228 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2229 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2230
2231 e1.append(e2)
2232
2233 self.assertEqual(
2234 None,
2235 e1.prefix)
2236 self.assertEqual(
2237 None,
2238 e1[0].prefix)
2239 self.assertEqual(
2240 '{http://ns.infrae.com/foo}bar',
2241 e1.tag)
2242 self.assertEqual(
2243 '{http://ns.infrae.com/foo}bar',
2244 e1[0].tag)
2245
2247 etree = self.etree
2248
2249 r = {None: 'http://ns.infrae.com/BAR'}
2250 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2251 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2252
2253 e1.append(e2)
2254
2255 self.assertEqual(
2256 None,
2257 e1.prefix)
2258 self.assertNotEqual(
2259 None,
2260 e2.prefix)
2261 self.assertEqual(
2262 '{http://ns.infrae.com/BAR}bar',
2263 e1.tag)
2264 self.assertEqual(
2265 '{http://ns.infrae.com/foo}bar',
2266 e2.tag)
2267
2269 ns_href = "http://a.b.c"
2270 one = self.etree.fromstring(
2271 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2272 baz = one[0][0]
2273
2274 two = self.etree.fromstring(
2275 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2276 two.append(baz)
2277 del one
2278
2279 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2280 self.assertEqual(
2281 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2282 self.etree.tostring(two))
2283
2293
2295 etree = self.etree
2296
2297 r = {None: 'http://ns.infrae.com/foo',
2298 'hoi': 'http://ns.infrae.com/hoi'}
2299 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2300 self.assertEqual(
2301 r,
2302 e.nsmap)
2303
2305 etree = self.etree
2306
2307 re = {None: 'http://ns.infrae.com/foo',
2308 'hoi': 'http://ns.infrae.com/hoi'}
2309 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2310
2311 rs = {None: 'http://ns.infrae.com/honk',
2312 'top': 'http://ns.infrae.com/top'}
2313 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2314
2315 r = re.copy()
2316 r.update(rs)
2317 self.assertEqual(re, e.nsmap)
2318 self.assertEqual(r, s.nsmap)
2319
2321 etree = self.etree
2322 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2323 self.assertEqual({'hha': None}, el.nsmap)
2324
2326 Element = self.etree.Element
2327 SubElement = self.etree.SubElement
2328
2329 a = Element('a')
2330 b = SubElement(a, 'b')
2331 c = SubElement(a, 'c')
2332 d = SubElement(b, 'd')
2333 e = SubElement(c, 'e')
2334 f = SubElement(c, 'f')
2335
2336 self.assertEqual(
2337 [a, b],
2338 list(a.getiterator('a', 'b')))
2339 self.assertEqual(
2340 [],
2341 list(a.getiterator('x', 'y')))
2342 self.assertEqual(
2343 [a, f],
2344 list(a.getiterator('f', 'a')))
2345 self.assertEqual(
2346 [c, e, f],
2347 list(c.getiterator('c', '*', 'a')))
2348 self.assertEqual(
2349 [],
2350 list(a.getiterator( (), () )))
2351
2353 Element = self.etree.Element
2354 SubElement = self.etree.SubElement
2355
2356 a = Element('a')
2357 b = SubElement(a, 'b')
2358 c = SubElement(a, 'c')
2359 d = SubElement(b, 'd')
2360 e = SubElement(c, 'e')
2361 f = SubElement(c, 'f')
2362
2363 self.assertEqual(
2364 [a, b],
2365 list(a.getiterator( ('a', 'b') )))
2366 self.assertEqual(
2367 [],
2368 list(a.getiterator( ('x', 'y') )))
2369 self.assertEqual(
2370 [a, f],
2371 list(a.getiterator( ('f', 'a') )))
2372 self.assertEqual(
2373 [c, e, f],
2374 list(c.getiterator( ('c', '*', 'a') )))
2375 self.assertEqual(
2376 [],
2377 list(a.getiterator( () )))
2378
2380 Element = self.etree.Element
2381 SubElement = self.etree.SubElement
2382
2383 a = Element('{a}a')
2384 b = SubElement(a, '{a}b')
2385 c = SubElement(a, '{a}c')
2386 d = SubElement(b, '{b}d')
2387 e = SubElement(c, '{a}e')
2388 f = SubElement(c, '{b}f')
2389 g = SubElement(c, 'g')
2390
2391 self.assertEqual(
2392 [a],
2393 list(a.getiterator('{a}a')))
2394 self.assertEqual(
2395 [],
2396 list(a.getiterator('{b}a')))
2397 self.assertEqual(
2398 [],
2399 list(a.getiterator('a')))
2400 self.assertEqual(
2401 [a,b,d,c,e,f,g],
2402 list(a.getiterator('*')))
2403 self.assertEqual(
2404 [f],
2405 list(c.getiterator('{b}*')))
2406 self.assertEqual(
2407 [d, f],
2408 list(a.getiterator('{b}*')))
2409 self.assertEqual(
2410 [g],
2411 list(a.getiterator('g')))
2412 self.assertEqual(
2413 [g],
2414 list(a.getiterator('{}g')))
2415 self.assertEqual(
2416 [g],
2417 list(a.getiterator('{}*')))
2418
2420 Element = self.etree.Element
2421 SubElement = self.etree.SubElement
2422
2423 a = Element('{a}a')
2424 b = SubElement(a, '{nsA}b')
2425 c = SubElement(b, '{nsB}b')
2426 d = SubElement(a, 'b')
2427 e = SubElement(a, '{nsA}e')
2428 f = SubElement(e, '{nsB}e')
2429 g = SubElement(e, 'e')
2430
2431 self.assertEqual(
2432 [b, c, d],
2433 list(a.getiterator('{*}b')))
2434 self.assertEqual(
2435 [e, f, g],
2436 list(a.getiterator('{*}e')))
2437 self.assertEqual(
2438 [a, b, c, d, e, f, g],
2439 list(a.getiterator('{*}*')))
2440
2465
2481
2498
2505
2512
2521
2523 XML = self.etree.XML
2524 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
2525 self.assertEqual(len(root.findall(".//{X}b")), 2)
2526 self.assertEqual(len(root.findall(".//{X}*")), 2)
2527 self.assertEqual(len(root.findall(".//b")), 3)
2528
2530 XML = self.etree.XML
2531 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
2532 nsmap = {'xx': 'X'}
2533 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2534 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
2535 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2536 nsmap = {'xx': 'Y'}
2537 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2538 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
2539 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2540
2542 XML = self.etree.XML
2543 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
2544 nsmap = {'xx': 'X'}
2545 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2546 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
2547 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2548 nsmap = {'xx': 'Y'}
2549 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2550 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
2551 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2552
2559
2561 etree = self.etree
2562 e = etree.Element('foo')
2563 for i in range(10):
2564 etree.SubElement(e, 'a%s' % i)
2565 for i in range(10):
2566 self.assertEqual(
2567 i,
2568 e.index(e[i]))
2569 self.assertEqual(
2570 3, e.index(e[3], 3))
2571 self.assertRaises(
2572 ValueError, e.index, e[3], 4)
2573 self.assertRaises(
2574 ValueError, e.index, e[3], 0, 2)
2575 self.assertRaises(
2576 ValueError, e.index, e[8], 0, -3)
2577 self.assertRaises(
2578 ValueError, e.index, e[8], -5, -3)
2579 self.assertEqual(
2580 8, e.index(e[8], 0, -1))
2581 self.assertEqual(
2582 8, e.index(e[8], -12, -1))
2583 self.assertEqual(
2584 0, e.index(e[0], -12, -1))
2585
2587 etree = self.etree
2588 e = etree.Element('foo')
2589 for i in range(10):
2590 el = etree.SubElement(e, 'a%s' % i)
2591 el.text = "text%d" % i
2592 el.tail = "tail%d" % i
2593
2594 child0 = e[0]
2595 child1 = e[1]
2596 child2 = e[2]
2597
2598 e.replace(e[0], e[1])
2599 self.assertEqual(
2600 9, len(e))
2601 self.assertEqual(
2602 child1, e[0])
2603 self.assertEqual(
2604 child1.text, "text1")
2605 self.assertEqual(
2606 child1.tail, "tail1")
2607 self.assertEqual(
2608 child0.tail, "tail0")
2609 self.assertEqual(
2610 child2, e[1])
2611
2612 e.replace(e[-1], e[0])
2613 self.assertEqual(
2614 child1, e[-1])
2615 self.assertEqual(
2616 child1.text, "text1")
2617 self.assertEqual(
2618 child1.tail, "tail1")
2619 self.assertEqual(
2620 child2, e[0])
2621
2623 etree = self.etree
2624 e = etree.Element('foo')
2625 for i in range(10):
2626 etree.SubElement(e, 'a%s' % i)
2627
2628 new_element = etree.Element("test")
2629 new_element.text = "TESTTEXT"
2630 new_element.tail = "TESTTAIL"
2631 child1 = e[1]
2632 e.replace(e[0], new_element)
2633 self.assertEqual(
2634 new_element, e[0])
2635 self.assertEqual(
2636 "TESTTEXT",
2637 e[0].text)
2638 self.assertEqual(
2639 "TESTTAIL",
2640 e[0].tail)
2641 self.assertEqual(
2642 child1, e[1])
2643
2659
2677
2695
2713
2715 Element = self.etree.Element
2716 SubElement = self.etree.SubElement
2717 try:
2718 slice
2719 except NameError:
2720 print("slice() not found")
2721 return
2722
2723 a = Element('a')
2724 b = SubElement(a, 'b')
2725 c = SubElement(a, 'c')
2726 d = SubElement(a, 'd')
2727 e = SubElement(a, 'e')
2728
2729 x = Element('x')
2730 y = Element('y')
2731 z = Element('z')
2732
2733 self.assertRaises(
2734 ValueError,
2735 operator.setitem, a, slice(1,None,2), [x, y, z])
2736
2737 self.assertEqual(
2738 [b, c, d, e],
2739 list(a))
2740
2753
2761
2770
2780
2790
2796
2804
2810
2817
2823
2825 etree = self.etree
2826 xml_header = '<?xml version="1.0" encoding="ascii"?>'
2827 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2828 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2829 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
2830
2831 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
2832
2833 tree = etree.parse(BytesIO(xml))
2834 docinfo = tree.docinfo
2835 self.assertEqual(docinfo.encoding, "ascii")
2836 self.assertEqual(docinfo.xml_version, "1.0")
2837 self.assertEqual(docinfo.public_id, pub_id)
2838 self.assertEqual(docinfo.system_url, sys_id)
2839 self.assertEqual(docinfo.root_name, 'html')
2840 self.assertEqual(docinfo.doctype, doctype_string)
2841
2857
2869
2881
2887
2889 etree = self.etree
2890 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2891 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2892 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
2893
2894 xml = _bytes('<!DOCTYPE root>\n<root/>')
2895 tree = etree.parse(BytesIO(xml))
2896 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
2897 etree.tostring(tree, doctype=doctype_string))
2898
2900 etree = self.etree
2901 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2902 self.assertEqual(root.base, "http://no/such/url")
2903 self.assertEqual(
2904 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2905 root.base = "https://secret/url"
2906 self.assertEqual(root.base, "https://secret/url")
2907 self.assertEqual(
2908 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2909 "https://secret/url")
2910
2912 etree = self.etree
2913 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2914 self.assertEqual(root.base, "http://no/such/url")
2915 self.assertEqual(
2916 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2917 root.set('{http://www.w3.org/XML/1998/namespace}base',
2918 "https://secret/url")
2919 self.assertEqual(root.base, "https://secret/url")
2920 self.assertEqual(
2921 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2922 "https://secret/url")
2923
2929
2934
2941
2955
2957 Element = self.etree.Element
2958
2959 a = Element('a')
2960 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
2961 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
2962
2963 self.assertRaises(ValueError, Element, 'ha\0ho')
2964
2966 Element = self.etree.Element
2967
2968 a = Element('a')
2969 self.assertRaises(ValueError, setattr, a, "text",
2970 _str('ha\0ho'))
2971 self.assertRaises(ValueError, setattr, a, "tail",
2972 _str('ha\0ho'))
2973
2974 self.assertRaises(ValueError, Element,
2975 _str('ha\0ho'))
2976
2978 Element = self.etree.Element
2979
2980 a = Element('a')
2981 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
2982 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
2983
2984 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
2985 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
2986
2987 self.assertRaises(ValueError, Element, 'ha\x07ho')
2988 self.assertRaises(ValueError, Element, 'ha\x02ho')
2989
2991 Element = self.etree.Element
2992
2993 a = Element('a')
2994 self.assertRaises(ValueError, setattr, a, "text",
2995 _str('ha\x07ho'))
2996 self.assertRaises(ValueError, setattr, a, "text",
2997 _str('ha\x02ho'))
2998
2999 self.assertRaises(ValueError, setattr, a, "tail",
3000 _str('ha\x07ho'))
3001 self.assertRaises(ValueError, setattr, a, "tail",
3002 _str('ha\x02ho'))
3003
3004 self.assertRaises(ValueError, Element,
3005 _str('ha\x07ho'))
3006 self.assertRaises(ValueError, Element,
3007 _str('ha\x02ho'))
3008
3010 Element = self.etree.Element
3011
3012 a = Element('a')
3013 self.assertRaises(ValueError, setattr, a, "text",
3014 _str('ha\u1234\x07ho'))
3015 self.assertRaises(ValueError, setattr, a, "text",
3016 _str('ha\u1234\x02ho'))
3017
3018 self.assertRaises(ValueError, setattr, a, "tail",
3019 _str('ha\u1234\x07ho'))
3020 self.assertRaises(ValueError, setattr, a, "tail",
3021 _str('ha\u1234\x02ho'))
3022
3023 self.assertRaises(ValueError, Element,
3024 _str('ha\u1234\x07ho'))
3025 self.assertRaises(ValueError, Element,
3026 _str('ha\u1234\x02ho'))
3027
3041
3046
3064
3084
3106
3108 tostring = self.etree.tostring
3109 XML = self.etree.XML
3110 ElementTree = self.etree.ElementTree
3111
3112 root = XML(_bytes("<root/>"))
3113
3114 tree = ElementTree(root)
3115 self.assertEqual(None, tree.docinfo.standalone)
3116
3117 result = tostring(root, xml_declaration=True, encoding="ASCII")
3118 self.assertEqual(result, _bytes(
3119 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3120
3121 result = tostring(root, xml_declaration=True, encoding="ASCII",
3122 standalone=True)
3123 self.assertEqual(result, _bytes(
3124 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3125
3126 tree = ElementTree(XML(result))
3127 self.assertEqual(True, tree.docinfo.standalone)
3128
3129 result = tostring(root, xml_declaration=True, encoding="ASCII",
3130 standalone=False)
3131 self.assertEqual(result, _bytes(
3132 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3133
3134 tree = ElementTree(XML(result))
3135 self.assertEqual(False, tree.docinfo.standalone)
3136
3156
3158 tostring = self.etree.tostring
3159 Element = self.etree.Element
3160 SubElement = self.etree.SubElement
3161
3162 a = Element('a')
3163 a.text = "A"
3164 a.tail = "tail"
3165 b = SubElement(a, 'b')
3166 b.text = "B"
3167 b.tail = _str("Søk på nettet")
3168 c = SubElement(a, 'c')
3169 c.text = "C"
3170
3171 result = tostring(a, method="text", encoding="UTF-16")
3172
3173 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3174 result)
3175
3177 tostring = self.etree.tostring
3178 Element = self.etree.Element
3179 SubElement = self.etree.SubElement
3180
3181 a = Element('a')
3182 a.text = _str('Søk på nettetA')
3183 a.tail = "tail"
3184 b = SubElement(a, 'b')
3185 b.text = "B"
3186 b.tail = _str('Søk på nettetB')
3187 c = SubElement(a, 'c')
3188 c.text = "C"
3189
3190 self.assertRaises(UnicodeEncodeError,
3191 tostring, a, method="text")
3192
3193 self.assertEqual(
3194 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3195 tostring(a, encoding="UTF-8", method="text"))
3196
3209
3225
3229
3244
3262
3275
3277 tostring = self.etree.tostring
3278 Element = self.etree.Element
3279 SubElement = self.etree.SubElement
3280
3281 a = Element('a')
3282 b = SubElement(a, 'b')
3283 c = SubElement(a, 'c')
3284 d = SubElement(c, 'd')
3285 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3286 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3287 self.assertEqual(_bytes('<b></b>'),
3288 canonicalize(tostring(b, encoding=_unicode)))
3289 self.assertEqual(_bytes('<c><d></d></c>'),
3290 canonicalize(tostring(c, encoding=_unicode)))
3291
3296
3311
3313 tostring = self.etree.tostring
3314 Element = self.etree.Element
3315 SubElement = self.etree.SubElement
3316
3317 a = Element('a')
3318 b = SubElement(a, 'b')
3319 c = SubElement(a, 'c')
3320
3321 result = tostring(a, encoding=_unicode)
3322 self.assertEqual(result, "<a><b/><c/></a>")
3323
3324 result = tostring(a, encoding=_unicode, pretty_print=False)
3325 self.assertEqual(result, "<a><b/><c/></a>")
3326
3327 result = tostring(a, encoding=_unicode, pretty_print=True)
3328 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3329
3341
3343 class SubEl(etree.ElementBase):
3344 pass
3345
3346 el1 = SubEl()
3347 el2 = SubEl()
3348 self.assertEqual('SubEl', el1.tag)
3349 self.assertEqual('SubEl', el2.tag)
3350 el1.other = el2
3351 el2.other = el1
3352
3353 del el1, el2
3354 gc.collect()
3355
3356
3357
3358
3359 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3370
3371
3374 filename = fileInTestDir('test_broken.xml')
3375 root = etree.XML(_bytes('''\
3376 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
3377 <xi:include href="%s" parse="text"/>
3378 </doc>
3379 ''' % filename))
3380 old_text = root.text
3381 content = read_file(filename)
3382 old_tail = root[0].tail
3383
3384 self.include( etree.ElementTree(root) )
3385 self.assertEqual(old_text + content + old_tail,
3386 root.text)
3387
3399
3401 class res(etree.Resolver):
3402 include_text = read_file(fileInTestDir('test.xml'))
3403 called = {}
3404 def resolve(self, url, id, context):
3405 if url.endswith(".dtd"):
3406 self.called["dtd"] = True
3407 return self.resolve_filename(
3408 fileInTestDir('test.dtd'), context)
3409 elif url.endswith("test_xinclude.xml"):
3410 self.called["input"] = True
3411 return None
3412 else:
3413 self.called["include"] = True
3414 return self.resolve_string(self.include_text, context)
3415
3416 res_instance = res()
3417 parser = etree.XMLParser(load_dtd = True)
3418 parser.resolvers.add(res_instance)
3419
3420 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3421 parser = parser)
3422
3423 self.include(tree)
3424
3425 called = list(res_instance.called.items())
3426 called.sort()
3427 self.assertEqual(
3428 [("dtd", True), ("include", True), ("input", True)],
3429 called)
3430
3434
3435
3440
3441
3444 tree = self.parse(_bytes('<a><b/></a>'))
3445 f = BytesIO()
3446 tree.write_c14n(f)
3447 s = f.getvalue()
3448 self.assertEqual(_bytes('<a><b></b></a>'),
3449 s)
3450
3452 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3453 f = BytesIO()
3454 tree.write_c14n(f, compression=9)
3455 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3456 try:
3457 s = gzfile.read()
3458 finally:
3459 gzfile.close()
3460 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
3461 s)
3462
3474
3490
3508
3520
3532
3534 tree = self.parse(_bytes(
3535 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3536 f = BytesIO()
3537 tree.write_c14n(f)
3538 s = f.getvalue()
3539 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3540 s)
3541 f = BytesIO()
3542 tree.write_c14n(f, exclusive=False)
3543 s = f.getvalue()
3544 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3545 s)
3546 f = BytesIO()
3547 tree.write_c14n(f, exclusive=True)
3548 s = f.getvalue()
3549 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3550 s)
3551
3552 f = BytesIO()
3553 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
3554 s = f.getvalue()
3555 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
3556 s)
3557
3559 tree = self.parse(_bytes(
3560 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3561 s = etree.tostring(tree, method='c14n')
3562 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3563 s)
3564 s = etree.tostring(tree, method='c14n', exclusive=False)
3565 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3566 s)
3567 s = etree.tostring(tree, method='c14n', exclusive=True)
3568 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3569 s)
3570
3571 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3572 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
3573 s)
3574
3576 tree = self.parse(_bytes(
3577 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3578 s = etree.tostring(tree.getroot(), method='c14n')
3579 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3580 s)
3581 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
3582 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3583 s)
3584 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
3585 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3586 s)
3587
3588 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
3589 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3590 s)
3591 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
3592 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
3593 s)
3594
3595 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3596 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3597 s)
3598
3600 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
3601 tree = self.parse(_bytes(
3602 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3603
3604 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
3605 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3606 s)
3607
3608
3611 tree = self.parse(_bytes('<a><b/></a>'))
3612 f = BytesIO()
3613 tree.write(f)
3614 s = f.getvalue()
3615 self.assertEqual(_bytes('<a><b/></a>'),
3616 s)
3617
3619 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3620 f = BytesIO()
3621 tree.write(f, compression=9)
3622 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3623 try:
3624 s = gzfile.read()
3625 finally:
3626 gzfile.close()
3627 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3628 s)
3629
3631 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3632 f = BytesIO()
3633 tree.write(f, compression=0)
3634 s0 = f.getvalue()
3635
3636 f = BytesIO()
3637 tree.write(f)
3638 self.assertEqual(f.getvalue(), s0)
3639
3640 f = BytesIO()
3641 tree.write(f, compression=1)
3642 s = f.getvalue()
3643 self.assertTrue(len(s) <= len(s0))
3644 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3645 try:
3646 s1 = gzfile.read()
3647 finally:
3648 gzfile.close()
3649
3650 f = BytesIO()
3651 tree.write(f, compression=9)
3652 s = f.getvalue()
3653 self.assertTrue(len(s) <= len(s0))
3654 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3655 try:
3656 s9 = gzfile.read()
3657 finally:
3658 gzfile.close()
3659
3660 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3661 s0)
3662 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3663 s1)
3664 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3665 s9)
3666
3678
3694
3706
3719
3721 etree = etree
3722
3744
3746 """This can't really be tested as long as there isn't a way to
3747 reset the logging setup ...
3748 """
3749 parse = self.etree.parse
3750
3751 messages = []
3752 class Logger(self.etree.PyErrorLog):
3753 def log(self, entry, message, *args):
3754 messages.append(message)
3755
3756 self.etree.use_global_python_log(Logger())
3757 f = BytesIO('<a><b></c></b></a>')
3758 try:
3759 parse(f)
3760 except SyntaxError:
3761 pass
3762 f.close()
3763
3764 self.assertTrue([ message for message in messages
3765 if 'mismatch' in message ])
3766 self.assertTrue([ message for message in messages
3767 if ':PARSER:' in message])
3768 self.assertTrue([ message for message in messages
3769 if ':ERR_TAG_NAME_MISMATCH:' in message ])
3770 self.assertTrue([ message for message in messages
3771 if ':1:15:' in message ])
3772
3794
3795 if __name__ == '__main__':
3796 print('to test use test.py %s' % __file__)
3797