1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 import os.path
11 import unittest
12 import copy
13 import sys
14 import re
15 import gc
16 import operator
17 import tempfile
18 import gzip
19
20 this_dir = os.path.dirname(__file__)
21 if this_dir not in sys.path:
22 sys.path.insert(0, this_dir)
23
24 from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir, read_file
25 from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
26 from common_imports import canonicalize, sorted, _str, _bytes
27
28 print("")
29 print("TESTED VERSION: %s" % etree.__version__)
30 print(" Python: " + repr(sys.version_info))
31 print(" lxml.etree: " + repr(etree.LXML_VERSION))
32 print(" libxml used: " + repr(etree.LIBXML_VERSION))
33 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
34 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
35 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
36 print("")
37
38 try:
39 _unicode = unicode
40 except NameError:
41
42 _unicode = str
43
45 """Tests only for etree, not ElementTree"""
46 etree = etree
47
58
67
74
76 Element = self.etree.Element
77 el = Element('name')
78 self.assertRaises(ValueError, Element, '{}')
79 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
80
81 self.assertRaises(ValueError, Element, '{test}')
82 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
83
91
93 Element = self.etree.Element
94 self.assertRaises(ValueError, Element, "p'name")
95 self.assertRaises(ValueError, Element, 'p"name')
96
97 self.assertRaises(ValueError, Element, "{test}p'name")
98 self.assertRaises(ValueError, Element, '{test}p"name')
99
100 el = Element('name')
101 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
102 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
103
105 Element = self.etree.Element
106 self.assertRaises(ValueError, Element, ' name ')
107 self.assertRaises(ValueError, Element, 'na me')
108 self.assertRaises(ValueError, Element, '{test} name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
112
120
128
130 Element = self.etree.Element
131 SubElement = self.etree.SubElement
132
133 el = Element('name')
134 self.assertRaises(ValueError, SubElement, el, "p'name")
135 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
136
137 self.assertRaises(ValueError, SubElement, el, 'p"name')
138 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
139
148
157
159 QName = self.etree.QName
160 self.assertRaises(ValueError, QName, '')
161 self.assertRaises(ValueError, QName, 'test', '')
162
164 QName = self.etree.QName
165 self.assertRaises(ValueError, QName, 'p:name')
166 self.assertRaises(ValueError, QName, 'test', 'p:name')
167
169 QName = self.etree.QName
170 self.assertRaises(ValueError, QName, ' name ')
171 self.assertRaises(ValueError, QName, 'na me')
172 self.assertRaises(ValueError, QName, 'test', ' name')
173
181
183
184 QName = self.etree.QName
185 qname1 = QName('http://myns', 'a')
186 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
187
188 qname2 = QName(a)
189 self.assertEqual(a.tag, qname1.text)
190 self.assertEqual(qname1.text, qname2.text)
191 self.assertEqual(qname1, qname2)
192
194
195 etree = self.etree
196 qname = etree.QName('http://myns', 'a')
197 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
198 a.text = qname
199
200 self.assertEqual("p:a", a.text)
201
210
225
231
239
253
275
277 XML = self.etree.XML
278 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
279
280 root = XML(xml)
281 self.etree.strip_elements(root, 'a')
282 self.assertEqual(_bytes('<test><x></x></test>'),
283 self._writeElement(root))
284
285 root = XML(xml)
286 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
287 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
288 self._writeElement(root))
289
290 root = XML(xml)
291 self.etree.strip_elements(root, 'c')
292 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
293 self._writeElement(root))
294
296 XML = self.etree.XML
297 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
298
299 root = XML(xml)
300 self.etree.strip_elements(root, 'a')
301 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
302 self._writeElement(root))
303
304 root = XML(xml)
305 self.etree.strip_elements(root, '{urn:a}b', 'c')
306 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
307 self._writeElement(root))
308
309 root = XML(xml)
310 self.etree.strip_elements(root, '{urn:a}*', 'c')
311 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
312 self._writeElement(root))
313
314 root = XML(xml)
315 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
316 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
317 self._writeElement(root))
318
337
363
390
416
435
448
459
465
467 XML = self.etree.XML
468 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
469 self.assertEqual(root[0].target, "mypi")
470 self.assertEqual(root[0].get('my'), "1")
471 self.assertEqual(root[0].get('test'), " abc ")
472 self.assertEqual(root[0].get('quotes'), "' '")
473 self.assertEqual(root[0].get('only'), None)
474 self.assertEqual(root[0].get('names'), None)
475 self.assertEqual(root[0].get('nope'), None)
476
478 XML = self.etree.XML
479 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
480 self.assertEqual(root[0].target, "mypi")
481 self.assertEqual(root[0].attrib['my'], "1")
482 self.assertEqual(root[0].attrib['test'], " abc ")
483 self.assertEqual(root[0].attrib['quotes'], "' '")
484 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
485 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
486 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
487
489
490 ProcessingInstruction = self.etree.ProcessingInstruction
491
492 a = ProcessingInstruction("PI", "ONE")
493 b = copy.deepcopy(a)
494 b.text = "ANOTHER"
495
496 self.assertEqual('ONE', a.text)
497 self.assertEqual('ANOTHER', b.text)
498
514
529
539
551
570
575
588
599
600 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
601 events = list(iterparse(f, events=('end', 'comment')))
602 root = events[-1][1]
603 self.assertEqual(6, len(events))
604 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
605 [ name(*item) for item in events ])
606 self.assertEqual(
607 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
608 tostring(root))
609
621
622 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
623 events = list(iterparse(f, events=('end', 'pi')))
624 root = events[-2][1]
625 self.assertEqual(8, len(events))
626 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
627 ('pid','d'), 'a', ('pie','e')],
628 [ name(*item) for item in events ])
629 self.assertEqual(
630 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
631 tostring(ElementTree(root)))
632
647
653
655 iterparse = self.etree.iterparse
656 f = BytesIO("""
657 <a> \n \n <b> b test </b> \n
658
659 \n\t <c> \n </c> </a> \n """)
660 iterator = iterparse(f, remove_blank_text=True)
661 text = [ (element.text, element.tail)
662 for event, element in iterator ]
663 self.assertEqual(
664 [(" b test ", None), (" \n ", None), (None, None)],
665 text)
666
668 iterparse = self.etree.iterparse
669 f = BytesIO('<a><b><d/></b><c/></a>')
670
671 iterator = iterparse(f, tag="b", events=('start', 'end'))
672 events = list(iterator)
673 root = iterator.root
674 self.assertEqual(
675 [('start', root[0]), ('end', root[0])],
676 events)
677
679 iterparse = self.etree.iterparse
680 f = BytesIO('<a><b><d/></b><c/></a>')
681
682 iterator = iterparse(f, tag="*", events=('start', 'end'))
683 events = list(iterator)
684 self.assertEqual(
685 8,
686 len(events))
687
689 iterparse = self.etree.iterparse
690 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
691
692 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
693 events = list(iterator)
694 root = iterator.root
695 self.assertEqual(
696 [('start', root[0]), ('end', root[0])],
697 events)
698
700 iterparse = self.etree.iterparse
701 f = BytesIO('<a><b><d/></b><c/></a>')
702 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
703 events = list(iterator)
704 root = iterator.root
705 self.assertEqual(
706 [('start', root[0]), ('end', root[0])],
707 events)
708
709 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
710 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
711 events = list(iterator)
712 root = iterator.root
713 self.assertEqual([], events)
714
716 iterparse = self.etree.iterparse
717 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
718 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
719 events = list(iterator)
720 self.assertEqual(8, len(events))
721
723 iterparse = self.etree.iterparse
724 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
725 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
726 events = list(iterator)
727 self.assertEqual([], events)
728
729 f = BytesIO('<a><b><d/></b><c/></a>')
730 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
731 events = list(iterator)
732 self.assertEqual(8, len(events))
733
735 text = _str('Søk på nettet')
736 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
737 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
738 ).encode('iso-8859-1')
739
740 self.assertRaises(self.etree.ParseError,
741 list, self.etree.iterparse(BytesIO(xml_latin1)))
742
744 text = _str('Søk på nettet', encoding="UTF-8")
745 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
746 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
747 ).encode('iso-8859-1')
748
749 iterator = self.etree.iterparse(BytesIO(xml_latin1),
750 encoding="iso-8859-1")
751 self.assertEqual(1, len(list(iterator)))
752
753 a = iterator.root
754 self.assertEqual(a.text, text)
755
757 tostring = self.etree.tostring
758 f = BytesIO('<root><![CDATA[test]]></root>')
759 context = self.etree.iterparse(f, strip_cdata=False)
760 content = [ el.text for event,el in context ]
761
762 self.assertEqual(['test'], content)
763 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
764 tostring(context.root))
765
769
774
793
794
795
806 def end(self, tag):
807 events.append("end")
808 assertEqual("TAG", tag)
809 def close(self):
810 return "DONE"
811
812 parser = self.etree.XMLParser(target=Target())
813 tree = self.etree.ElementTree()
814
815 self.assertRaises(TypeError,
816 tree.parse, BytesIO("<TAG/>"), parser=parser)
817 self.assertEqual(["start", "end"], events)
818
820
821 events = []
822 class Target(object):
823 def start(self, tag, attrib):
824 events.append("start-" + tag)
825 def end(self, tag):
826 events.append("end-" + tag)
827 if tag == 'a':
828 raise ValueError("dead and gone")
829 def data(self, data):
830 events.append("data-" + data)
831 def close(self):
832 events.append("close")
833 return "DONE"
834
835 parser = self.etree.XMLParser(target=Target())
836
837 try:
838 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
839 done = parser.close()
840 self.fail("error expected, but parsing succeeded")
841 except ValueError:
842 done = 'value error received as expected'
843
844 self.assertEqual(["start-root", "data-A", "start-a",
845 "data-ca", "end-a", "close"],
846 events)
847
849
850 events = []
851 class Target(object):
852 def start(self, tag, attrib):
853 events.append("start-" + tag)
854 def end(self, tag):
855 events.append("end-" + tag)
856 if tag == 'a':
857 raise ValueError("dead and gone")
858 def data(self, data):
859 events.append("data-" + data)
860 def close(self):
861 events.append("close")
862 return "DONE"
863
864 parser = self.etree.XMLParser(target=Target())
865
866 try:
867 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
868 parser=parser)
869 self.fail("error expected, but parsing succeeded")
870 except ValueError:
871 done = 'value error received as expected'
872
873 self.assertEqual(["start-root", "data-A", "start-a",
874 "data-ca", "end-a", "close"],
875 events)
876
882 def end(self, tag):
883 events.append("end-" + tag)
884 def data(self, data):
885 events.append("data-" + data)
886 def comment(self, text):
887 events.append("comment-" + text)
888 def close(self):
889 return "DONE"
890
891 parser = self.etree.XMLParser(target=Target())
892
893 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
894 done = parser.close()
895
896 self.assertEqual("DONE", done)
897 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
898 "start-sub", "end-sub", "comment-c", "data-B",
899 "end-root", "comment-d"],
900 events)
901
903 events = []
904 class Target(object):
905 def start(self, tag, attrib):
906 events.append("start-" + tag)
907 def end(self, tag):
908 events.append("end-" + tag)
909 def data(self, data):
910 events.append("data-" + data)
911 def pi(self, target, data):
912 events.append("pi-" + target + "-" + data)
913 def close(self):
914 return "DONE"
915
916 parser = self.etree.XMLParser(target=Target())
917
918 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
919 done = parser.close()
920
921 self.assertEqual("DONE", done)
922 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
923 "data-B", "end-root", "pi-test-c"],
924 events)
925
927 events = []
928 class Target(object):
929 def start(self, tag, attrib):
930 events.append("start-" + tag)
931 def end(self, tag):
932 events.append("end-" + tag)
933 def data(self, data):
934 events.append("data-" + data)
935 def close(self):
936 return "DONE"
937
938 parser = self.etree.XMLParser(target=Target(),
939 strip_cdata=False)
940
941 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
942 done = parser.close()
943
944 self.assertEqual("DONE", done)
945 self.assertEqual(["start-root", "data-A", "start-a",
946 "data-ca", "end-a", "data-B", "end-root"],
947 events)
948
950 events = []
951 class Target(object):
952 def start(self, tag, attrib):
953 events.append("start-" + tag)
954 def end(self, tag):
955 events.append("end-" + tag)
956 def data(self, data):
957 events.append("data-" + data)
958 def close(self):
959 events.append("close")
960 return "DONE"
961
962 parser = self.etree.XMLParser(target=Target(),
963 recover=True)
964
965 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
966 done = parser.close()
967
968 self.assertEqual("DONE", done)
969 self.assertEqual(["start-root", "data-A", "start-a",
970 "data-ca", "end-a", "data-B",
971 "end-root", "close"],
972 events)
973
983
993
1002
1012
1014 iterwalk = self.etree.iterwalk
1015 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1016
1017 iterator = iterwalk(root, events=('start','end'))
1018 events = list(iterator)
1019 self.assertEqual(
1020 [('start', root), ('start', root[0]), ('end', root[0]),
1021 ('start', root[1]), ('end', root[1]), ('end', root)],
1022 events)
1023
1034
1036 iterwalk = self.etree.iterwalk
1037 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1038
1039 attr_name = '{testns}bla'
1040 events = []
1041 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1042 for event, elem in iterator:
1043 events.append(event)
1044 if event == 'start':
1045 if elem.tag != '{ns1}a':
1046 elem.set(attr_name, 'value')
1047
1048 self.assertEqual(
1049 ['start-ns', 'start', 'start', 'start-ns', 'start',
1050 'end', 'end-ns', 'end', 'end', 'end-ns'],
1051 events)
1052
1053 self.assertEqual(
1054 None,
1055 root.get(attr_name))
1056 self.assertEqual(
1057 'value',
1058 root[0].get(attr_name))
1059
1070
1072 parse = self.etree.parse
1073 parser = self.etree.XMLParser(dtd_validation=True)
1074 assertEqual = self.assertEqual
1075 test_url = _str("__nosuch.dtd")
1076
1077 class MyResolver(self.etree.Resolver):
1078 def resolve(self, url, id, context):
1079 assertEqual(url, test_url)
1080 return self.resolve_string(
1081 _str('''<!ENTITY myentity "%s">
1082 <!ELEMENT doc ANY>''') % url, context)
1083
1084 parser.resolvers.add(MyResolver())
1085
1086 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1087 tree = parse(StringIO(xml), parser)
1088 root = tree.getroot()
1089 self.assertEqual(root.text, test_url)
1090
1092 parse = self.etree.parse
1093 parser = self.etree.XMLParser(dtd_validation=True)
1094 assertEqual = self.assertEqual
1095 test_url = _str("__nosuch.dtd")
1096
1097 class MyResolver(self.etree.Resolver):
1098 def resolve(self, url, id, context):
1099 assertEqual(url, test_url)
1100 return self.resolve_string(
1101 (_str('''<!ENTITY myentity "%s">
1102 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1103 context)
1104
1105 parser.resolvers.add(MyResolver())
1106
1107 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1108 tree = parse(StringIO(xml), parser)
1109 root = tree.getroot()
1110 self.assertEqual(root.text, test_url)
1111
1113 parse = self.etree.parse
1114 parser = self.etree.XMLParser(dtd_validation=True)
1115 assertEqual = self.assertEqual
1116 test_url = _str("__nosuch.dtd")
1117
1118 class MyResolver(self.etree.Resolver):
1119 def resolve(self, url, id, context):
1120 assertEqual(url, test_url)
1121 return self.resolve_file(
1122 SillyFileLike(
1123 _str('''<!ENTITY myentity "%s">
1124 <!ELEMENT doc ANY>''') % url), context)
1125
1126 parser.resolvers.add(MyResolver())
1127
1128 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1129 tree = parse(StringIO(xml), parser)
1130 root = tree.getroot()
1131 self.assertEqual(root.text, test_url)
1132
1134 parse = self.etree.parse
1135 parser = self.etree.XMLParser(attribute_defaults=True)
1136 assertEqual = self.assertEqual
1137 test_url = _str("__nosuch.dtd")
1138
1139 class MyResolver(self.etree.Resolver):
1140 def resolve(self, url, id, context):
1141 assertEqual(url, test_url)
1142 return self.resolve_filename(
1143 fileInTestDir('test.dtd'), context)
1144
1145 parser.resolvers.add(MyResolver())
1146
1147 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1148 tree = parse(StringIO(xml), parser)
1149 root = tree.getroot()
1150 self.assertEqual(
1151 root.attrib, {'default': 'valueA'})
1152 self.assertEqual(
1153 root[0].attrib, {'default': 'valueB'})
1154
1166
1167 parser.resolvers.add(MyResolver())
1168
1169 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1170 tree = parse(StringIO(xml), parser,
1171 base_url=fileInTestDir('__test.xml'))
1172 root = tree.getroot()
1173 self.assertEqual(
1174 root.attrib, {'default': 'valueA'})
1175 self.assertEqual(
1176 root[0].attrib, {'default': 'valueB'})
1177
1179 parse = self.etree.parse
1180 parser = self.etree.XMLParser(attribute_defaults=True)
1181 assertEqual = self.assertEqual
1182 test_url = _str("__nosuch.dtd")
1183
1184 class MyResolver(self.etree.Resolver):
1185 def resolve(self, url, id, context):
1186 assertEqual(url, test_url)
1187 return self.resolve_file(
1188 open(fileInTestDir('test.dtd'), 'rb'), context)
1189
1190 parser.resolvers.add(MyResolver())
1191
1192 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1193 tree = parse(StringIO(xml), parser)
1194 root = tree.getroot()
1195 self.assertEqual(
1196 root.attrib, {'default': 'valueA'})
1197 self.assertEqual(
1198 root[0].attrib, {'default': 'valueB'})
1199
1201 parse = self.etree.parse
1202 parser = self.etree.XMLParser(load_dtd=True)
1203 assertEqual = self.assertEqual
1204 test_url = _str("__nosuch.dtd")
1205
1206 class check(object):
1207 resolved = False
1208
1209 class MyResolver(self.etree.Resolver):
1210 def resolve(self, url, id, context):
1211 assertEqual(url, test_url)
1212 check.resolved = True
1213 return self.resolve_empty(context)
1214
1215 parser.resolvers.add(MyResolver())
1216
1217 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1218 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1219 self.assertTrue(check.resolved)
1220
1227
1228 class MyResolver(self.etree.Resolver):
1229 def resolve(self, url, id, context):
1230 raise _LocalException
1231
1232 parser.resolvers.add(MyResolver())
1233
1234 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1235 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1236
1237 if etree.LIBXML_VERSION > (2,6,20):
1254
1256 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1257 <root>
1258 <child1/>
1259 <child2/>
1260 <child3> </child3>
1261 </root>''')
1262
1263 parser = self.etree.XMLParser(resolve_entities=False)
1264 root = etree.fromstring(xml, parser)
1265 self.assertEqual([ el.tag for el in root ],
1266 ['child1', 'child2', 'child3'])
1267
1268 root[0] = root[-1]
1269 self.assertEqual([ el.tag for el in root ],
1270 ['child3', 'child2'])
1271 self.assertEqual(root[0][0].text, ' ')
1272 self.assertEqual(root[0][0].name, 'nbsp')
1273
1289
1296
1298 Entity = self.etree.Entity
1299 self.assertRaises(ValueError, Entity, 'a b c')
1300 self.assertRaises(ValueError, Entity, 'a,b')
1301 self.assertRaises(ValueError, Entity, 'a\0b')
1302 self.assertRaises(ValueError, Entity, '#abc')
1303 self.assertRaises(ValueError, Entity, '#xxyz')
1304
1317
1330
1332 CDATA = self.etree.CDATA
1333 Element = self.etree.Element
1334
1335 root = Element("root")
1336 cdata = CDATA('test')
1337
1338 self.assertRaises(TypeError,
1339 setattr, root, 'tail', cdata)
1340 self.assertRaises(TypeError,
1341 root.set, 'attr', cdata)
1342 self.assertRaises(TypeError,
1343 operator.setitem, root.attrib, 'attr', cdata)
1344
1353
1362
1363
1373
1382
1395
1408
1414
1420
1435
1448
1463
1476
1491
1504
1519
1532
1533
1541
1542
1552
1553
1568
1569
1579
1580
1591
1592
1594 self.assertRaises(TypeError, self.etree.dump, None)
1595
1608
1621
1642
1651
1660
1669
1678
1680 XML = self.etree.XML
1681
1682 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1683 result = []
1684 for el in root.iterchildren(tag=['two', 'three']):
1685 result.append(el.text)
1686 self.assertEqual(['Two', 'Bla', None], result)
1687
1696
1717
1732
1734 Element = self.etree.Element
1735 SubElement = self.etree.SubElement
1736
1737 a = Element('a')
1738 b = SubElement(a, 'b')
1739 c = SubElement(a, 'c')
1740 d = SubElement(b, 'd')
1741 self.assertEqual(
1742 [b, a],
1743 list(d.iterancestors(tag=('a', 'b'))))
1744 self.assertEqual(
1745 [],
1746 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
1747 self.assertEqual(
1748 [],
1749 list(d.iterancestors(tag=('d', 'x'))))
1750 self.assertEqual(
1751 [b, a],
1752 list(d.iterancestors(tag=('b', '*'))))
1753 self.assertEqual(
1754 [b],
1755 list(d.iterancestors(tag=('b', 'c'))))
1756
1773
1794
1796 Element = self.etree.Element
1797 SubElement = self.etree.SubElement
1798
1799 a = Element('a')
1800 b = SubElement(a, 'b')
1801 c = SubElement(a, 'c')
1802 d = SubElement(b, 'd')
1803 e = SubElement(c, 'e')
1804
1805 self.assertEqual(
1806 [b, e],
1807 list(a.iterdescendants(tag=('a', 'b', 'e'))))
1808 a2 = SubElement(e, 'a')
1809 self.assertEqual(
1810 [b, a2],
1811 list(a.iterdescendants(tag=('a', 'b'))))
1812 self.assertEqual(
1813 [],
1814 list(c.iterdescendants(tag=('x', 'y', 'z'))))
1815 self.assertEqual(
1816 [b, d, c, e, a2],
1817 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
1818
1836
1853
1871
1895
1897 Element = self.etree.Element
1898 SubElement = self.etree.SubElement
1899
1900 a = Element('a')
1901 b = SubElement(a, 'b')
1902 c = SubElement(a, 'c')
1903 d = SubElement(b, 'd')
1904 self.assertEqual(
1905 [],
1906 list(a.itersiblings(tag='XXX')))
1907 self.assertEqual(
1908 [c],
1909 list(b.itersiblings(tag='c')))
1910 self.assertEqual(
1911 [c],
1912 list(b.itersiblings(tag='*')))
1913 self.assertEqual(
1914 [b],
1915 list(c.itersiblings(preceding=True, tag='b')))
1916 self.assertEqual(
1917 [],
1918 list(c.itersiblings(preceding=True, tag='c')))
1919
1921 Element = self.etree.Element
1922 SubElement = self.etree.SubElement
1923
1924 a = Element('a')
1925 b = SubElement(a, 'b')
1926 c = SubElement(a, 'c')
1927 d = SubElement(b, 'd')
1928 e = SubElement(a, 'e')
1929 self.assertEqual(
1930 [],
1931 list(a.itersiblings(tag=('XXX', 'YYY'))))
1932 self.assertEqual(
1933 [c, e],
1934 list(b.itersiblings(tag=('c', 'd', 'e'))))
1935 self.assertEqual(
1936 [b],
1937 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
1938 self.assertEqual(
1939 [c, b],
1940 list(e.itersiblings(preceding=True, tag=('c', '*'))))
1941
1943 parseid = self.etree.parseid
1944 XML = self.etree.XML
1945 xml_text = _bytes('''
1946 <!DOCTYPE document [
1947 <!ELEMENT document (h1,p)*>
1948 <!ELEMENT h1 (#PCDATA)>
1949 <!ATTLIST h1 myid ID #REQUIRED>
1950 <!ELEMENT p (#PCDATA)>
1951 <!ATTLIST p someid ID #REQUIRED>
1952 ]>
1953 <document>
1954 <h1 myid="chapter1">...</h1>
1955 <p id="note1" class="note">...</p>
1956 <p>Regular paragraph.</p>
1957 <p xml:id="xmlid">XML:ID paragraph.</p>
1958 <p someid="warn1" class="warning">...</p>
1959 </document>
1960 ''')
1961
1962 tree, dic = parseid(BytesIO(xml_text))
1963 root = tree.getroot()
1964 root2 = XML(xml_text)
1965 self.assertEqual(self._writeElement(root),
1966 self._writeElement(root2))
1967 expected = {
1968 "chapter1" : root[0],
1969 "xmlid" : root[3],
1970 "warn1" : root[4]
1971 }
1972 self.assertTrue("chapter1" in dic)
1973 self.assertTrue("warn1" in dic)
1974 self.assertTrue("xmlid" in dic)
1975 self._checkIDDict(dic, expected)
1976
1978 XMLDTDID = self.etree.XMLDTDID
1979 XML = self.etree.XML
1980 xml_text = _bytes('''
1981 <!DOCTYPE document [
1982 <!ELEMENT document (h1,p)*>
1983 <!ELEMENT h1 (#PCDATA)>
1984 <!ATTLIST h1 myid ID #REQUIRED>
1985 <!ELEMENT p (#PCDATA)>
1986 <!ATTLIST p someid ID #REQUIRED>
1987 ]>
1988 <document>
1989 <h1 myid="chapter1">...</h1>
1990 <p id="note1" class="note">...</p>
1991 <p>Regular paragraph.</p>
1992 <p xml:id="xmlid">XML:ID paragraph.</p>
1993 <p someid="warn1" class="warning">...</p>
1994 </document>
1995 ''')
1996
1997 root, dic = XMLDTDID(xml_text)
1998 root2 = XML(xml_text)
1999 self.assertEqual(self._writeElement(root),
2000 self._writeElement(root2))
2001 expected = {
2002 "chapter1" : root[0],
2003 "xmlid" : root[3],
2004 "warn1" : root[4]
2005 }
2006 self.assertTrue("chapter1" in dic)
2007 self.assertTrue("warn1" in dic)
2008 self.assertTrue("xmlid" in dic)
2009 self._checkIDDict(dic, expected)
2010
2012 XMLDTDID = self.etree.XMLDTDID
2013 XML = self.etree.XML
2014 xml_text = _bytes('''
2015 <document>
2016 <h1 myid="chapter1">...</h1>
2017 <p id="note1" class="note">...</p>
2018 <p>Regular paragraph.</p>
2019 <p someid="warn1" class="warning">...</p>
2020 </document>
2021 ''')
2022
2023 root, dic = XMLDTDID(xml_text)
2024 root2 = XML(xml_text)
2025 self.assertEqual(self._writeElement(root),
2026 self._writeElement(root2))
2027 expected = {}
2028 self._checkIDDict(dic, expected)
2029
2031 self.assertEqual(len(dic),
2032 len(expected))
2033 self.assertEqual(sorted(dic.items()),
2034 sorted(expected.items()))
2035 if sys.version_info < (3,):
2036 self.assertEqual(sorted(dic.iteritems()),
2037 sorted(expected.iteritems()))
2038 self.assertEqual(sorted(dic.keys()),
2039 sorted(expected.keys()))
2040 if sys.version_info < (3,):
2041 self.assertEqual(sorted(dic.iterkeys()),
2042 sorted(expected.iterkeys()))
2043 if sys.version_info < (3,):
2044 self.assertEqual(sorted(dic.values()),
2045 sorted(expected.values()))
2046 self.assertEqual(sorted(dic.itervalues()),
2047 sorted(expected.itervalues()))
2048
2050 etree = self.etree
2051
2052 r = {'foo': 'http://ns.infrae.com/foo'}
2053 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2054 self.assertEqual(
2055 'foo',
2056 e.prefix)
2057 self.assertEqual(
2058 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2059 self._writeElement(e))
2060
2062 etree = self.etree
2063
2064 r = {None: 'http://ns.infrae.com/foo'}
2065 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2066 self.assertEqual(
2067 None,
2068 e.prefix)
2069 self.assertEqual(
2070 '{http://ns.infrae.com/foo}bar',
2071 e.tag)
2072 self.assertEqual(
2073 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2074 self._writeElement(e))
2075
2077 etree = self.etree
2078
2079 r = {None: 'http://ns.infrae.com/foo',
2080 'hoi': 'http://ns.infrae.com/hoi'}
2081 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2082 e.set('{http://ns.infrae.com/hoi}test', 'value')
2083 self.assertEqual(
2084 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2085 self._writeElement(e))
2086
2088 etree = self.etree
2089 r = {None: 'http://ns.infrae.com/foo',
2090 'hoi': 'http://ns.infrae.com/hoi'}
2091 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2092 tree = etree.ElementTree(element=e)
2093 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2094 self.assertEqual(
2095 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2096 self._writeElement(e))
2097
2099 etree = self.etree
2100
2101 r = {None: 'http://ns.infrae.com/foo'}
2102 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2103 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2104
2105 e1.append(e2)
2106
2107 self.assertEqual(
2108 None,
2109 e1.prefix)
2110 self.assertEqual(
2111 None,
2112 e1[0].prefix)
2113 self.assertEqual(
2114 '{http://ns.infrae.com/foo}bar',
2115 e1.tag)
2116 self.assertEqual(
2117 '{http://ns.infrae.com/foo}bar',
2118 e1[0].tag)
2119
2121 etree = self.etree
2122
2123 r = {None: 'http://ns.infrae.com/BAR'}
2124 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2125 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2126
2127 e1.append(e2)
2128
2129 self.assertEqual(
2130 None,
2131 e1.prefix)
2132 self.assertNotEqual(
2133 None,
2134 e2.prefix)
2135 self.assertEqual(
2136 '{http://ns.infrae.com/BAR}bar',
2137 e1.tag)
2138 self.assertEqual(
2139 '{http://ns.infrae.com/foo}bar',
2140 e2.tag)
2141
2143 ns_href = "http://a.b.c"
2144 one = self.etree.fromstring(
2145 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2146 baz = one[0][0]
2147
2148 two = self.etree.fromstring(
2149 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2150 two.append(baz)
2151 del one
2152
2153 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2154 self.assertEqual(
2155 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2156 self.etree.tostring(two))
2157
2167
2169 etree = self.etree
2170
2171 r = {None: 'http://ns.infrae.com/foo',
2172 'hoi': 'http://ns.infrae.com/hoi'}
2173 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2174 self.assertEqual(
2175 r,
2176 e.nsmap)
2177
2179 etree = self.etree
2180
2181 re = {None: 'http://ns.infrae.com/foo',
2182 'hoi': 'http://ns.infrae.com/hoi'}
2183 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2184
2185 rs = {None: 'http://ns.infrae.com/honk',
2186 'top': 'http://ns.infrae.com/top'}
2187 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2188
2189 r = re.copy()
2190 r.update(rs)
2191 self.assertEqual(re, e.nsmap)
2192 self.assertEqual(r, s.nsmap)
2193
2195 etree = self.etree
2196 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2197 self.assertEqual({'hha': None}, el.nsmap)
2198
2200 Element = self.etree.Element
2201 SubElement = self.etree.SubElement
2202
2203 a = Element('a')
2204 b = SubElement(a, 'b')
2205 c = SubElement(a, 'c')
2206 d = SubElement(b, 'd')
2207 e = SubElement(c, 'e')
2208 f = SubElement(c, 'f')
2209
2210 self.assertEqual(
2211 [a, b],
2212 list(a.getiterator('a', 'b')))
2213 self.assertEqual(
2214 [],
2215 list(a.getiterator('x', 'y')))
2216 self.assertEqual(
2217 [a, f],
2218 list(a.getiterator('f', 'a')))
2219 self.assertEqual(
2220 [c, e, f],
2221 list(c.getiterator('c', '*', 'a')))
2222 self.assertEqual(
2223 [],
2224 list(a.getiterator( (), () )))
2225
2227 Element = self.etree.Element
2228 SubElement = self.etree.SubElement
2229
2230 a = Element('a')
2231 b = SubElement(a, 'b')
2232 c = SubElement(a, 'c')
2233 d = SubElement(b, 'd')
2234 e = SubElement(c, 'e')
2235 f = SubElement(c, 'f')
2236
2237 self.assertEqual(
2238 [a, b],
2239 list(a.getiterator( ('a', 'b') )))
2240 self.assertEqual(
2241 [],
2242 list(a.getiterator( ('x', 'y') )))
2243 self.assertEqual(
2244 [a, f],
2245 list(a.getiterator( ('f', 'a') )))
2246 self.assertEqual(
2247 [c, e, f],
2248 list(c.getiterator( ('c', '*', 'a') )))
2249 self.assertEqual(
2250 [],
2251 list(a.getiterator( () )))
2252
2254 Element = self.etree.Element
2255 SubElement = self.etree.SubElement
2256
2257 a = Element('{a}a')
2258 b = SubElement(a, '{a}b')
2259 c = SubElement(a, '{a}c')
2260 d = SubElement(b, '{b}d')
2261 e = SubElement(c, '{a}e')
2262 f = SubElement(c, '{b}f')
2263 g = SubElement(c, 'g')
2264
2265 self.assertEqual(
2266 [a],
2267 list(a.getiterator('{a}a')))
2268 self.assertEqual(
2269 [],
2270 list(a.getiterator('{b}a')))
2271 self.assertEqual(
2272 [],
2273 list(a.getiterator('a')))
2274 self.assertEqual(
2275 [a,b,d,c,e,f,g],
2276 list(a.getiterator('*')))
2277 self.assertEqual(
2278 [f],
2279 list(c.getiterator('{b}*')))
2280 self.assertEqual(
2281 [d, f],
2282 list(a.getiterator('{b}*')))
2283 self.assertEqual(
2284 [g],
2285 list(a.getiterator('g')))
2286 self.assertEqual(
2287 [g],
2288 list(a.getiterator('{}g')))
2289 self.assertEqual(
2290 [g],
2291 list(a.getiterator('{}*')))
2292
2294 Element = self.etree.Element
2295 SubElement = self.etree.SubElement
2296
2297 a = Element('{a}a')
2298 b = SubElement(a, '{nsA}b')
2299 c = SubElement(b, '{nsB}b')
2300 d = SubElement(a, 'b')
2301 e = SubElement(a, '{nsA}e')
2302 f = SubElement(e, '{nsB}e')
2303 g = SubElement(e, 'e')
2304
2305 self.assertEqual(
2306 [b, c, d],
2307 list(a.getiterator('{*}b')))
2308 self.assertEqual(
2309 [e, f, g],
2310 list(a.getiterator('{*}e')))
2311 self.assertEqual(
2312 [a, b, c, d, e, f, g],
2313 list(a.getiterator('{*}*')))
2314
2339
2355
2372
2379
2386
2395
2397 XML = self.etree.XML
2398 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
2399 self.assertEqual(len(root.findall(".//{X}b")), 2)
2400 self.assertEqual(len(root.findall(".//{X}*")), 2)
2401 self.assertEqual(len(root.findall(".//b")), 3)
2402
2404 XML = self.etree.XML
2405 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
2406 nsmap = {'xx': 'X'}
2407 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2408 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
2409 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2410 nsmap = {'xx': 'Y'}
2411 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2412 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
2413 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2414
2416 XML = self.etree.XML
2417 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
2418 nsmap = {'xx': 'X'}
2419 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2420 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
2421 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2422 nsmap = {'xx': 'Y'}
2423 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2424 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
2425 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2426
2433
2435 etree = self.etree
2436 e = etree.Element('foo')
2437 for i in range(10):
2438 etree.SubElement(e, 'a%s' % i)
2439 for i in range(10):
2440 self.assertEqual(
2441 i,
2442 e.index(e[i]))
2443 self.assertEqual(
2444 3, e.index(e[3], 3))
2445 self.assertRaises(
2446 ValueError, e.index, e[3], 4)
2447 self.assertRaises(
2448 ValueError, e.index, e[3], 0, 2)
2449 self.assertRaises(
2450 ValueError, e.index, e[8], 0, -3)
2451 self.assertRaises(
2452 ValueError, e.index, e[8], -5, -3)
2453 self.assertEqual(
2454 8, e.index(e[8], 0, -1))
2455 self.assertEqual(
2456 8, e.index(e[8], -12, -1))
2457 self.assertEqual(
2458 0, e.index(e[0], -12, -1))
2459
2461 etree = self.etree
2462 e = etree.Element('foo')
2463 for i in range(10):
2464 el = etree.SubElement(e, 'a%s' % i)
2465 el.text = "text%d" % i
2466 el.tail = "tail%d" % i
2467
2468 child0 = e[0]
2469 child1 = e[1]
2470 child2 = e[2]
2471
2472 e.replace(e[0], e[1])
2473 self.assertEqual(
2474 9, len(e))
2475 self.assertEqual(
2476 child1, e[0])
2477 self.assertEqual(
2478 child1.text, "text1")
2479 self.assertEqual(
2480 child1.tail, "tail1")
2481 self.assertEqual(
2482 child0.tail, "tail0")
2483 self.assertEqual(
2484 child2, e[1])
2485
2486 e.replace(e[-1], e[0])
2487 self.assertEqual(
2488 child1, e[-1])
2489 self.assertEqual(
2490 child1.text, "text1")
2491 self.assertEqual(
2492 child1.tail, "tail1")
2493 self.assertEqual(
2494 child2, e[0])
2495
2497 etree = self.etree
2498 e = etree.Element('foo')
2499 for i in range(10):
2500 etree.SubElement(e, 'a%s' % i)
2501
2502 new_element = etree.Element("test")
2503 new_element.text = "TESTTEXT"
2504 new_element.tail = "TESTTAIL"
2505 child1 = e[1]
2506 e.replace(e[0], new_element)
2507 self.assertEqual(
2508 new_element, e[0])
2509 self.assertEqual(
2510 "TESTTEXT",
2511 e[0].text)
2512 self.assertEqual(
2513 "TESTTAIL",
2514 e[0].tail)
2515 self.assertEqual(
2516 child1, e[1])
2517
2533
2551
2569
2587
2589 Element = self.etree.Element
2590 SubElement = self.etree.SubElement
2591 try:
2592 slice
2593 except NameError:
2594 print("slice() not found")
2595 return
2596
2597 a = Element('a')
2598 b = SubElement(a, 'b')
2599 c = SubElement(a, 'c')
2600 d = SubElement(a, 'd')
2601 e = SubElement(a, 'e')
2602
2603 x = Element('x')
2604 y = Element('y')
2605 z = Element('z')
2606
2607 self.assertRaises(
2608 ValueError,
2609 operator.setitem, a, slice(1,None,2), [x, y, z])
2610
2611 self.assertEqual(
2612 [b, c, d, e],
2613 list(a))
2614
2627
2635
2644
2654
2664
2670
2678
2684
2691
2697
2699 etree = self.etree
2700 xml_header = '<?xml version="1.0" encoding="ascii"?>'
2701 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2702 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2703 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
2704
2705 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
2706
2707 tree = etree.parse(BytesIO(xml))
2708 docinfo = tree.docinfo
2709 self.assertEqual(docinfo.encoding, "ascii")
2710 self.assertEqual(docinfo.xml_version, "1.0")
2711 self.assertEqual(docinfo.public_id, pub_id)
2712 self.assertEqual(docinfo.system_url, sys_id)
2713 self.assertEqual(docinfo.root_name, 'html')
2714 self.assertEqual(docinfo.doctype, doctype_string)
2715
2731
2743
2755
2761
2763 etree = self.etree
2764 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2765 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2766 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
2767
2768 xml = _bytes('<!DOCTYPE root>\n<root/>')
2769 tree = etree.parse(BytesIO(xml))
2770 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
2771 etree.tostring(tree, doctype=doctype_string))
2772
2774 etree = self.etree
2775 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2776 self.assertEqual(root.base, "http://no/such/url")
2777 self.assertEqual(
2778 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2779 root.base = "https://secret/url"
2780 self.assertEqual(root.base, "https://secret/url")
2781 self.assertEqual(
2782 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2783 "https://secret/url")
2784
2786 etree = self.etree
2787 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2788 self.assertEqual(root.base, "http://no/such/url")
2789 self.assertEqual(
2790 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2791 root.set('{http://www.w3.org/XML/1998/namespace}base',
2792 "https://secret/url")
2793 self.assertEqual(root.base, "https://secret/url")
2794 self.assertEqual(
2795 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2796 "https://secret/url")
2797
2803
2808
2815
2829
2831 Element = self.etree.Element
2832
2833 a = Element('a')
2834 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
2835 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
2836
2837 self.assertRaises(ValueError, Element, 'ha\0ho')
2838
2840 Element = self.etree.Element
2841
2842 a = Element('a')
2843 self.assertRaises(ValueError, setattr, a, "text",
2844 _str('ha\0ho'))
2845 self.assertRaises(ValueError, setattr, a, "tail",
2846 _str('ha\0ho'))
2847
2848 self.assertRaises(ValueError, Element,
2849 _str('ha\0ho'))
2850
2852 Element = self.etree.Element
2853
2854 a = Element('a')
2855 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
2856 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
2857
2858 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
2859 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
2860
2861 self.assertRaises(ValueError, Element, 'ha\x07ho')
2862 self.assertRaises(ValueError, Element, 'ha\x02ho')
2863
2865 Element = self.etree.Element
2866
2867 a = Element('a')
2868 self.assertRaises(ValueError, setattr, a, "text",
2869 _str('ha\x07ho'))
2870 self.assertRaises(ValueError, setattr, a, "text",
2871 _str('ha\x02ho'))
2872
2873 self.assertRaises(ValueError, setattr, a, "tail",
2874 _str('ha\x07ho'))
2875 self.assertRaises(ValueError, setattr, a, "tail",
2876 _str('ha\x02ho'))
2877
2878 self.assertRaises(ValueError, Element,
2879 _str('ha\x07ho'))
2880 self.assertRaises(ValueError, Element,
2881 _str('ha\x02ho'))
2882
2884 Element = self.etree.Element
2885
2886 a = Element('a')
2887 self.assertRaises(ValueError, setattr, a, "text",
2888 _str('ha\u1234\x07ho'))
2889 self.assertRaises(ValueError, setattr, a, "text",
2890 _str('ha\u1234\x02ho'))
2891
2892 self.assertRaises(ValueError, setattr, a, "tail",
2893 _str('ha\u1234\x07ho'))
2894 self.assertRaises(ValueError, setattr, a, "tail",
2895 _str('ha\u1234\x02ho'))
2896
2897 self.assertRaises(ValueError, Element,
2898 _str('ha\u1234\x07ho'))
2899 self.assertRaises(ValueError, Element,
2900 _str('ha\u1234\x02ho'))
2901
2915
2920
2938
2958
2980
2982 tostring = self.etree.tostring
2983 XML = self.etree.XML
2984 ElementTree = self.etree.ElementTree
2985
2986 root = XML(_bytes("<root/>"))
2987
2988 tree = ElementTree(root)
2989 self.assertEqual(None, tree.docinfo.standalone)
2990
2991 result = tostring(root, xml_declaration=True, encoding="ASCII")
2992 self.assertEqual(result, _bytes(
2993 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
2994
2995 result = tostring(root, xml_declaration=True, encoding="ASCII",
2996 standalone=True)
2997 self.assertEqual(result, _bytes(
2998 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
2999
3000 tree = ElementTree(XML(result))
3001 self.assertEqual(True, tree.docinfo.standalone)
3002
3003 result = tostring(root, xml_declaration=True, encoding="ASCII",
3004 standalone=False)
3005 self.assertEqual(result, _bytes(
3006 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3007
3008 tree = ElementTree(XML(result))
3009 self.assertEqual(False, tree.docinfo.standalone)
3010
3030
3032 tostring = self.etree.tostring
3033 Element = self.etree.Element
3034 SubElement = self.etree.SubElement
3035
3036 a = Element('a')
3037 a.text = "A"
3038 a.tail = "tail"
3039 b = SubElement(a, 'b')
3040 b.text = "B"
3041 b.tail = _str("Søk på nettet")
3042 c = SubElement(a, 'c')
3043 c.text = "C"
3044
3045 result = tostring(a, method="text", encoding="UTF-16")
3046
3047 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3048 result)
3049
3051 tostring = self.etree.tostring
3052 Element = self.etree.Element
3053 SubElement = self.etree.SubElement
3054
3055 a = Element('a')
3056 a.text = _str('Søk på nettetA')
3057 a.tail = "tail"
3058 b = SubElement(a, 'b')
3059 b.text = "B"
3060 b.tail = _str('Søk på nettetB')
3061 c = SubElement(a, 'c')
3062 c.text = "C"
3063
3064 self.assertRaises(UnicodeEncodeError,
3065 tostring, a, method="text")
3066
3067 self.assertEqual(
3068 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3069 tostring(a, encoding="UTF-8", method="text"))
3070
3083
3099
3103
3118
3136
3149
3151 tostring = self.etree.tostring
3152 Element = self.etree.Element
3153 SubElement = self.etree.SubElement
3154
3155 a = Element('a')
3156 b = SubElement(a, 'b')
3157 c = SubElement(a, 'c')
3158 d = SubElement(c, 'd')
3159 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3160 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3161 self.assertEqual(_bytes('<b></b>'),
3162 canonicalize(tostring(b, encoding=_unicode)))
3163 self.assertEqual(_bytes('<c><d></d></c>'),
3164 canonicalize(tostring(c, encoding=_unicode)))
3165
3170
3185
3187 tostring = self.etree.tostring
3188 Element = self.etree.Element
3189 SubElement = self.etree.SubElement
3190
3191 a = Element('a')
3192 b = SubElement(a, 'b')
3193 c = SubElement(a, 'c')
3194
3195 result = tostring(a, encoding=_unicode)
3196 self.assertEqual(result, "<a><b/><c/></a>")
3197
3198 result = tostring(a, encoding=_unicode, pretty_print=False)
3199 self.assertEqual(result, "<a><b/><c/></a>")
3200
3201 result = tostring(a, encoding=_unicode, pretty_print=True)
3202 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3203
3215
3216
3217
3218 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3229
3230
3233 filename = fileInTestDir('test_broken.xml')
3234 root = etree.XML(_bytes('''\
3235 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
3236 <xi:include href="%s" parse="text"/>
3237 </doc>
3238 ''' % filename))
3239 old_text = root.text
3240 content = read_file(filename)
3241 old_tail = root[0].tail
3242
3243 self.include( etree.ElementTree(root) )
3244 self.assertEqual(old_text + content + old_tail,
3245 root.text)
3246
3258
3260 class res(etree.Resolver):
3261 include_text = read_file(fileInTestDir('test.xml'))
3262 called = {}
3263 def resolve(self, url, id, context):
3264 if url.endswith(".dtd"):
3265 self.called["dtd"] = True
3266 return self.resolve_filename(
3267 fileInTestDir('test.dtd'), context)
3268 elif url.endswith("test_xinclude.xml"):
3269 self.called["input"] = True
3270 return None
3271 else:
3272 self.called["include"] = True
3273 return self.resolve_string(self.include_text, context)
3274
3275 res_instance = res()
3276 parser = etree.XMLParser(load_dtd = True)
3277 parser.resolvers.add(res_instance)
3278
3279 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3280 parser = parser)
3281
3282 self.include(tree)
3283
3284 called = list(res_instance.called.items())
3285 called.sort()
3286 self.assertEqual(
3287 [("dtd", True), ("include", True), ("input", True)],
3288 called)
3289
3293
3294
3299
3300
3303 tree = self.parse(_bytes('<a><b/></a>'))
3304 f = BytesIO()
3305 tree.write_c14n(f)
3306 s = f.getvalue()
3307 self.assertEqual(_bytes('<a><b></b></a>'),
3308 s)
3309
3311 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3312 f = BytesIO()
3313 tree.write_c14n(f, compression=9)
3314 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3315 try:
3316 s = gzfile.read()
3317 finally:
3318 gzfile.close()
3319 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
3320 s)
3321
3333
3349
3367
3379
3391
3393 tree = self.parse(_bytes(
3394 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3395 f = BytesIO()
3396 tree.write_c14n(f)
3397 s = f.getvalue()
3398 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3399 s)
3400 f = BytesIO()
3401 tree.write_c14n(f, exclusive=False)
3402 s = f.getvalue()
3403 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3404 s)
3405 f = BytesIO()
3406 tree.write_c14n(f, exclusive=True)
3407 s = f.getvalue()
3408 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3409 s)
3410
3411 f = BytesIO()
3412 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
3413 s = f.getvalue()
3414 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
3415 s)
3416
3418 tree = self.parse(_bytes(
3419 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3420 s = etree.tostring(tree, method='c14n')
3421 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3422 s)
3423 s = etree.tostring(tree, method='c14n', exclusive=False)
3424 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3425 s)
3426 s = etree.tostring(tree, method='c14n', exclusive=True)
3427 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3428 s)
3429
3430 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3431 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
3432 s)
3433
3435 tree = self.parse(_bytes(
3436 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3437 s = etree.tostring(tree.getroot(), method='c14n')
3438 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3439 s)
3440 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
3441 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3442 s)
3443 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
3444 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3445 s)
3446
3447 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
3448 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3449 s)
3450 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
3451 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
3452 s)
3453
3454 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3455 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3456 s)
3457
3459 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
3460 tree = self.parse(_bytes(
3461 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3462
3463 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
3464 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3465 s)
3466
3467
3470 tree = self.parse(_bytes('<a><b/></a>'))
3471 f = BytesIO()
3472 tree.write(f)
3473 s = f.getvalue()
3474 self.assertEqual(_bytes('<a><b/></a>'),
3475 s)
3476
3478 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3479 f = BytesIO()
3480 tree.write(f, compression=9)
3481 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3482 try:
3483 s = gzfile.read()
3484 finally:
3485 gzfile.close()
3486 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3487 s)
3488
3490 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3491 f = BytesIO()
3492 tree.write(f, compression=0)
3493 s0 = f.getvalue()
3494
3495 f = BytesIO()
3496 tree.write(f)
3497 self.assertEqual(f.getvalue(), s0)
3498
3499 f = BytesIO()
3500 tree.write(f, compression=1)
3501 s = f.getvalue()
3502 self.assertTrue(len(s) <= len(s0))
3503 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3504 try:
3505 s1 = gzfile.read()
3506 finally:
3507 gzfile.close()
3508
3509 f = BytesIO()
3510 tree.write(f, compression=9)
3511 s = f.getvalue()
3512 self.assertTrue(len(s) <= len(s0))
3513 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3514 try:
3515 s9 = gzfile.read()
3516 finally:
3517 gzfile.close()
3518
3519 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3520 s0)
3521 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3522 s1)
3523 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3524 s9)
3525
3537
3553
3565
3578
3580 etree = etree
3581
3603
3605 """This can't really be tested as long as there isn't a way to
3606 reset the logging setup ...
3607 """
3608 parse = self.etree.parse
3609
3610 messages = []
3611 class Logger(self.etree.PyErrorLog):
3612 def log(self, entry, message, *args):
3613 messages.append(message)
3614
3615 self.etree.use_global_python_log(Logger())
3616 f = BytesIO('<a><b></c></b></a>')
3617 try:
3618 parse(f)
3619 except SyntaxError:
3620 pass
3621 f.close()
3622
3623 self.assertTrue([ message for message in messages
3624 if 'mismatch' in message ])
3625 self.assertTrue([ message for message in messages
3626 if ':PARSER:' in message])
3627 self.assertTrue([ message for message in messages
3628 if ':ERR_TAG_NAME_MISMATCH:' in message ])
3629 self.assertTrue([ message for message in messages
3630 if ':1:15:' in message ])
3631
3653
3654 if __name__ == '__main__':
3655 print('to test use test.py %s' % __file__)
3656