1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 import os.path
11 import unittest
12 import copy
13 import sys
14 import re
15 import gc
16 import operator
17 import tempfile
18 import gzip
19
20 this_dir = os.path.dirname(__file__)
21 if this_dir not in sys.path:
22 sys.path.insert(0, this_dir)
23
24 from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir, read_file
25 from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
26 from common_imports import canonicalize, sorted, _str, _bytes
27
28 print("")
29 print("TESTED VERSION: %s" % etree.__version__)
30 print(" Python: " + repr(sys.version_info))
31 print(" lxml.etree: " + repr(etree.LXML_VERSION))
32 print(" libxml used: " + repr(etree.LIBXML_VERSION))
33 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
34 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
35 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
36 print("")
37
38 try:
39 _unicode = unicode
40 except NameError:
41
42 _unicode = str
43
45 """Tests only for etree, not ElementTree"""
46 etree = etree
47
58
67
74
76 Element = self.etree.Element
77 el = Element('name')
78 self.assertRaises(ValueError, Element, '{}')
79 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
80
81 self.assertRaises(ValueError, Element, '{test}')
82 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
83
91
93 Element = self.etree.Element
94 self.assertRaises(ValueError, Element, "p'name")
95 self.assertRaises(ValueError, Element, 'p"name')
96
97 self.assertRaises(ValueError, Element, "{test}p'name")
98 self.assertRaises(ValueError, Element, '{test}p"name')
99
100 el = Element('name')
101 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
102 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
103
105 Element = self.etree.Element
106 self.assertRaises(ValueError, Element, ' name ')
107 self.assertRaises(ValueError, Element, 'na me')
108 self.assertRaises(ValueError, Element, '{test} name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
112
120
128
130 Element = self.etree.Element
131 SubElement = self.etree.SubElement
132
133 el = Element('name')
134 self.assertRaises(ValueError, SubElement, el, "p'name")
135 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
136
137 self.assertRaises(ValueError, SubElement, el, 'p"name')
138 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
139
148
157
159 QName = self.etree.QName
160 self.assertRaises(ValueError, QName, '')
161 self.assertRaises(ValueError, QName, 'test', '')
162
164 QName = self.etree.QName
165 self.assertRaises(ValueError, QName, 'p:name')
166 self.assertRaises(ValueError, QName, 'test', 'p:name')
167
169 QName = self.etree.QName
170 self.assertRaises(ValueError, QName, ' name ')
171 self.assertRaises(ValueError, QName, 'na me')
172 self.assertRaises(ValueError, QName, 'test', ' name')
173
181
183
184 QName = self.etree.QName
185 qname1 = QName('http://myns', 'a')
186 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
187
188 qname2 = QName(a)
189 self.assertEquals(a.tag, qname1.text)
190 self.assertEquals(qname1.text, qname2.text)
191 self.assertEquals(qname1, qname2)
192
194
195 etree = self.etree
196 qname = etree.QName('http://myns', 'a')
197 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
198 a.text = qname
199
200 self.assertEquals("p:a", a.text)
201
210
225
231
239
253
275
277 XML = self.etree.XML
278 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
279
280 root = XML(xml)
281 self.etree.strip_elements(root, 'a')
282 self.assertEquals(_bytes('<test><x></x></test>'),
283 self._writeElement(root))
284
285 root = XML(xml)
286 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
287 self.assertEquals(_bytes('<test><a></a><x><a></a></x></test>'),
288 self._writeElement(root))
289
290 root = XML(xml)
291 self.etree.strip_elements(root, 'c')
292 self.assertEquals(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
293 self._writeElement(root))
294
296 XML = self.etree.XML
297 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
298
299 root = XML(xml)
300 self.etree.strip_elements(root, 'a')
301 self.assertEquals(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
302 self._writeElement(root))
303
304 root = XML(xml)
305 self.etree.strip_elements(root, '{urn:a}b', 'c')
306 self.assertEquals(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
307 self._writeElement(root))
308
309 root = XML(xml)
310 self.etree.strip_elements(root, '{urn:a}*', 'c')
311 self.assertEquals(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
312 self._writeElement(root))
313
314 root = XML(xml)
315 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
316 self.assertEquals(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
317 self._writeElement(root))
318
337
363
390
416
435
448
459
465
467 XML = self.etree.XML
468 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
469 self.assertEquals(root[0].target, "mypi")
470 self.assertEquals(root[0].get('my'), "1")
471 self.assertEquals(root[0].get('test'), " abc ")
472 self.assertEquals(root[0].get('quotes'), "' '")
473 self.assertEquals(root[0].get('only'), None)
474 self.assertEquals(root[0].get('names'), None)
475 self.assertEquals(root[0].get('nope'), None)
476
478 XML = self.etree.XML
479 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
480 self.assertEquals(root[0].target, "mypi")
481 self.assertEquals(root[0].attrib['my'], "1")
482 self.assertEquals(root[0].attrib['test'], " abc ")
483 self.assertEquals(root[0].attrib['quotes'], "' '")
484 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
485 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
486 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
487
489
490 ProcessingInstruction = self.etree.ProcessingInstruction
491
492 a = ProcessingInstruction("PI", "ONE")
493 b = copy.deepcopy(a)
494 b.text = "ANOTHER"
495
496 self.assertEquals('ONE', a.text)
497 self.assertEquals('ANOTHER', b.text)
498
514
529
539
551
570
575
588
599
600 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
601 events = list(iterparse(f, events=('end', 'comment')))
602 root = events[-1][1]
603 self.assertEquals(6, len(events))
604 self.assertEquals(['A', ' B ', 'c', 'b', 'C', 'a'],
605 [ name(*item) for item in events ])
606 self.assertEquals(
607 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
608 tostring(root))
609
621
622 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
623 events = list(iterparse(f, events=('end', 'pi')))
624 root = events[-2][1]
625 self.assertEquals(8, len(events))
626 self.assertEquals([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
627 ('pid','d'), 'a', ('pie','e')],
628 [ name(*item) for item in events ])
629 self.assertEquals(
630 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
631 tostring(ElementTree(root)))
632
647
653
655 iterparse = self.etree.iterparse
656 f = BytesIO("""
657 <a> \n \n <b> b test </b> \n
658
659 \n\t <c> \n </c> </a> \n """)
660 iterator = iterparse(f, remove_blank_text=True)
661 text = [ (element.text, element.tail)
662 for event, element in iterator ]
663 self.assertEquals(
664 [(" b test ", None), (" \n ", None), (None, None)],
665 text)
666
668 iterparse = self.etree.iterparse
669 f = BytesIO('<a><b><d/></b><c/></a>')
670
671 iterator = iterparse(f, tag="b", events=('start', 'end'))
672 events = list(iterator)
673 root = iterator.root
674 self.assertEquals(
675 [('start', root[0]), ('end', root[0])],
676 events)
677
679 iterparse = self.etree.iterparse
680 f = BytesIO('<a><b><d/></b><c/></a>')
681
682 iterator = iterparse(f, tag="*", events=('start', 'end'))
683 events = list(iterator)
684 self.assertEquals(
685 8,
686 len(events))
687
689 iterparse = self.etree.iterparse
690 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
691
692 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
693 events = list(iterator)
694 root = iterator.root
695 self.assertEquals(
696 [('start', root[0]), ('end', root[0])],
697 events)
698
700 iterparse = self.etree.iterparse
701 f = BytesIO('<a><b><d/></b><c/></a>')
702 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
703 events = list(iterator)
704 root = iterator.root
705 self.assertEquals(
706 [('start', root[0]), ('end', root[0])],
707 events)
708
709 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
710 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
711 events = list(iterator)
712 root = iterator.root
713 self.assertEquals([], events)
714
716 iterparse = self.etree.iterparse
717 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
718 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
719 events = list(iterator)
720 self.assertEquals(8, len(events))
721
723 iterparse = self.etree.iterparse
724 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
725 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
726 events = list(iterator)
727 self.assertEquals([], events)
728
729 f = BytesIO('<a><b><d/></b><c/></a>')
730 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
731 events = list(iterator)
732 self.assertEquals(8, len(events))
733
735 text = _str('Søk på nettet')
736 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
737 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
738 ).encode('iso-8859-1')
739
740 self.assertRaises(self.etree.ParseError,
741 list, self.etree.iterparse(BytesIO(xml_latin1)))
742
744 text = _str('Søk på nettet', encoding="UTF-8")
745 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
746 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
747 ).encode('iso-8859-1')
748
749 iterator = self.etree.iterparse(BytesIO(xml_latin1),
750 encoding="iso-8859-1")
751 self.assertEquals(1, len(list(iterator)))
752
753 a = iterator.root
754 self.assertEquals(a.text, text)
755
757 tostring = self.etree.tostring
758 f = BytesIO('<root><![CDATA[test]]></root>')
759 context = self.etree.iterparse(f, strip_cdata=False)
760 content = [ el.text for event,el in context ]
761
762 self.assertEquals(['test'], content)
763 self.assertEquals(_bytes('<root><![CDATA[test]]></root>'),
764 tostring(context.root))
765
769
774
793
794
795
806 def end(self, tag):
807 events.append("end")
808 assertEquals("TAG", tag)
809 def close(self):
810 return "DONE"
811
812 parser = self.etree.XMLParser(target=Target())
813 tree = self.etree.ElementTree()
814
815 self.assertRaises(TypeError,
816 tree.parse, BytesIO("<TAG/>"), parser=parser)
817 self.assertEquals(["start", "end"], events)
818
820
821 events = []
822 class Target(object):
823 def start(self, tag, attrib):
824 events.append("start-" + tag)
825 def end(self, tag):
826 events.append("end-" + tag)
827 if tag == 'a':
828 raise ValueError("dead and gone")
829 def data(self, data):
830 events.append("data-" + data)
831 def close(self):
832 events.append("close")
833 return "DONE"
834
835 parser = self.etree.XMLParser(target=Target())
836
837 try:
838 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
839 done = parser.close()
840 self.fail("error expected, but parsing succeeded")
841 except ValueError:
842 done = 'value error received as expected'
843
844 self.assertEquals(["start-root", "data-A", "start-a",
845 "data-ca", "end-a", "close"],
846 events)
847
849
850 events = []
851 class Target(object):
852 def start(self, tag, attrib):
853 events.append("start-" + tag)
854 def end(self, tag):
855 events.append("end-" + tag)
856 if tag == 'a':
857 raise ValueError("dead and gone")
858 def data(self, data):
859 events.append("data-" + data)
860 def close(self):
861 events.append("close")
862 return "DONE"
863
864 parser = self.etree.XMLParser(target=Target())
865
866 try:
867 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
868 parser=parser)
869 self.fail("error expected, but parsing succeeded")
870 except ValueError:
871 done = 'value error received as expected'
872
873 self.assertEquals(["start-root", "data-A", "start-a",
874 "data-ca", "end-a", "close"],
875 events)
876
882 def end(self, tag):
883 events.append("end-" + tag)
884 def data(self, data):
885 events.append("data-" + data)
886 def comment(self, text):
887 events.append("comment-" + text)
888 def close(self):
889 return "DONE"
890
891 parser = self.etree.XMLParser(target=Target())
892
893 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
894 done = parser.close()
895
896 self.assertEquals("DONE", done)
897 self.assertEquals(["comment-a", "start-root", "data-A", "comment-b",
898 "start-sub", "end-sub", "comment-c", "data-B",
899 "end-root", "comment-d"],
900 events)
901
903 events = []
904 class Target(object):
905 def start(self, tag, attrib):
906 events.append("start-" + tag)
907 def end(self, tag):
908 events.append("end-" + tag)
909 def data(self, data):
910 events.append("data-" + data)
911 def pi(self, target, data):
912 events.append("pi-" + target + "-" + data)
913 def close(self):
914 return "DONE"
915
916 parser = self.etree.XMLParser(target=Target())
917
918 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
919 done = parser.close()
920
921 self.assertEquals("DONE", done)
922 self.assertEquals(["pi-test-a", "start-root", "data-A", "pi-test-b",
923 "data-B", "end-root", "pi-test-c"],
924 events)
925
927 events = []
928 class Target(object):
929 def start(self, tag, attrib):
930 events.append("start-" + tag)
931 def end(self, tag):
932 events.append("end-" + tag)
933 def data(self, data):
934 events.append("data-" + data)
935 def close(self):
936 return "DONE"
937
938 parser = self.etree.XMLParser(target=Target(),
939 strip_cdata=False)
940
941 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
942 done = parser.close()
943
944 self.assertEquals("DONE", done)
945 self.assertEquals(["start-root", "data-A", "start-a",
946 "data-ca", "end-a", "data-B", "end-root"],
947 events)
948
950 events = []
951 class Target(object):
952 def start(self, tag, attrib):
953 events.append("start-" + tag)
954 def end(self, tag):
955 events.append("end-" + tag)
956 def data(self, data):
957 events.append("data-" + data)
958 def close(self):
959 events.append("close")
960 return "DONE"
961
962 parser = self.etree.XMLParser(target=Target(),
963 recover=True)
964
965 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
966 done = parser.close()
967
968 self.assertEquals("DONE", done)
969 self.assertEquals(["start-root", "data-A", "start-a",
970 "data-ca", "end-a", "data-B",
971 "end-root", "close"],
972 events)
973
983
993
1002
1012
1014 iterwalk = self.etree.iterwalk
1015 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1016
1017 iterator = iterwalk(root, events=('start','end'))
1018 events = list(iterator)
1019 self.assertEquals(
1020 [('start', root), ('start', root[0]), ('end', root[0]),
1021 ('start', root[1]), ('end', root[1]), ('end', root)],
1022 events)
1023
1034
1036 iterwalk = self.etree.iterwalk
1037 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1038
1039 attr_name = '{testns}bla'
1040 events = []
1041 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1042 for event, elem in iterator:
1043 events.append(event)
1044 if event == 'start':
1045 if elem.tag != '{ns1}a':
1046 elem.set(attr_name, 'value')
1047
1048 self.assertEquals(
1049 ['start-ns', 'start', 'start', 'start-ns', 'start',
1050 'end', 'end-ns', 'end', 'end', 'end-ns'],
1051 events)
1052
1053 self.assertEquals(
1054 None,
1055 root.get(attr_name))
1056 self.assertEquals(
1057 'value',
1058 root[0].get(attr_name))
1059
1070
1072 parse = self.etree.parse
1073 parser = self.etree.XMLParser(dtd_validation=True)
1074 assertEqual = self.assertEqual
1075 test_url = _str("__nosuch.dtd")
1076
1077 class MyResolver(self.etree.Resolver):
1078 def resolve(self, url, id, context):
1079 assertEqual(url, test_url)
1080 return self.resolve_string(
1081 _str('''<!ENTITY myentity "%s">
1082 <!ELEMENT doc ANY>''') % url, context)
1083
1084 parser.resolvers.add(MyResolver())
1085
1086 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1087 tree = parse(StringIO(xml), parser)
1088 root = tree.getroot()
1089 self.assertEquals(root.text, test_url)
1090
1092 parse = self.etree.parse
1093 parser = self.etree.XMLParser(dtd_validation=True)
1094 assertEqual = self.assertEqual
1095 test_url = _str("__nosuch.dtd")
1096
1097 class MyResolver(self.etree.Resolver):
1098 def resolve(self, url, id, context):
1099 assertEqual(url, test_url)
1100 return self.resolve_string(
1101 (_str('''<!ENTITY myentity "%s">
1102 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1103 context)
1104
1105 parser.resolvers.add(MyResolver())
1106
1107 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1108 tree = parse(StringIO(xml), parser)
1109 root = tree.getroot()
1110 self.assertEquals(root.text, test_url)
1111
1113 parse = self.etree.parse
1114 parser = self.etree.XMLParser(dtd_validation=True)
1115 assertEqual = self.assertEqual
1116 test_url = _str("__nosuch.dtd")
1117
1118 class MyResolver(self.etree.Resolver):
1119 def resolve(self, url, id, context):
1120 assertEqual(url, test_url)
1121 return self.resolve_file(
1122 SillyFileLike(
1123 _str('''<!ENTITY myentity "%s">
1124 <!ELEMENT doc ANY>''') % url), context)
1125
1126 parser.resolvers.add(MyResolver())
1127
1128 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1129 tree = parse(StringIO(xml), parser)
1130 root = tree.getroot()
1131 self.assertEquals(root.text, test_url)
1132
1134 parse = self.etree.parse
1135 parser = self.etree.XMLParser(attribute_defaults=True)
1136 assertEqual = self.assertEqual
1137 test_url = _str("__nosuch.dtd")
1138
1139 class MyResolver(self.etree.Resolver):
1140 def resolve(self, url, id, context):
1141 assertEqual(url, test_url)
1142 return self.resolve_filename(
1143 fileInTestDir('test.dtd'), context)
1144
1145 parser.resolvers.add(MyResolver())
1146
1147 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1148 tree = parse(StringIO(xml), parser)
1149 root = tree.getroot()
1150 self.assertEquals(
1151 root.attrib, {'default': 'valueA'})
1152 self.assertEquals(
1153 root[0].attrib, {'default': 'valueB'})
1154
1166
1167 parser.resolvers.add(MyResolver())
1168
1169 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1170 tree = parse(StringIO(xml), parser,
1171 base_url=fileInTestDir('__test.xml'))
1172 root = tree.getroot()
1173 self.assertEquals(
1174 root.attrib, {'default': 'valueA'})
1175 self.assertEquals(
1176 root[0].attrib, {'default': 'valueB'})
1177
1179 parse = self.etree.parse
1180 parser = self.etree.XMLParser(attribute_defaults=True)
1181 assertEqual = self.assertEqual
1182 test_url = _str("__nosuch.dtd")
1183
1184 class MyResolver(self.etree.Resolver):
1185 def resolve(self, url, id, context):
1186 assertEqual(url, test_url)
1187 return self.resolve_file(
1188 open(fileInTestDir('test.dtd'), 'rb'), context)
1189
1190 parser.resolvers.add(MyResolver())
1191
1192 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1193 tree = parse(StringIO(xml), parser)
1194 root = tree.getroot()
1195 self.assertEquals(
1196 root.attrib, {'default': 'valueA'})
1197 self.assertEquals(
1198 root[0].attrib, {'default': 'valueB'})
1199
1201 parse = self.etree.parse
1202 parser = self.etree.XMLParser(load_dtd=True)
1203 assertEqual = self.assertEqual
1204 test_url = _str("__nosuch.dtd")
1205
1206 class check(object):
1207 resolved = False
1208
1209 class MyResolver(self.etree.Resolver):
1210 def resolve(self, url, id, context):
1211 assertEqual(url, test_url)
1212 check.resolved = True
1213 return self.resolve_empty(context)
1214
1215 parser.resolvers.add(MyResolver())
1216
1217 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1218 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1219 self.assert_(check.resolved)
1220
1227
1228 class MyResolver(self.etree.Resolver):
1229 def resolve(self, url, id, context):
1230 raise _LocalException
1231
1232 parser.resolvers.add(MyResolver())
1233
1234 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1235 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1236
1237 if etree.LIBXML_VERSION > (2,6,20):
1254
1256 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1257 <root>
1258 <child1/>
1259 <child2/>
1260 <child3> </child3>
1261 </root>''')
1262
1263 parser = self.etree.XMLParser(resolve_entities=False)
1264 root = etree.fromstring(xml, parser)
1265 self.assertEquals([ el.tag for el in root ],
1266 ['child1', 'child2', 'child3'])
1267
1268 root[0] = root[-1]
1269 self.assertEquals([ el.tag for el in root ],
1270 ['child3', 'child2'])
1271 self.assertEquals(root[0][0].text, ' ')
1272 self.assertEquals(root[0][0].name, 'nbsp')
1273
1289
1296
1298 Entity = self.etree.Entity
1299 self.assertRaises(ValueError, Entity, 'a b c')
1300 self.assertRaises(ValueError, Entity, 'a,b')
1301 self.assertRaises(ValueError, Entity, 'a\0b')
1302 self.assertRaises(ValueError, Entity, '#abc')
1303 self.assertRaises(ValueError, Entity, '#xxyz')
1304
1317
1330
1332 CDATA = self.etree.CDATA
1333 Element = self.etree.Element
1334
1335 root = Element("root")
1336 cdata = CDATA('test')
1337
1338 self.assertRaises(TypeError,
1339 setattr, root, 'tail', cdata)
1340 self.assertRaises(TypeError,
1341 root.set, 'attr', cdata)
1342 self.assertRaises(TypeError,
1343 operator.setitem, root.attrib, 'attr', cdata)
1344
1353
1362
1363
1373
1382
1395
1408
1414
1420
1435
1448
1463
1476
1491
1504
1519
1532
1533
1541
1542
1552
1553
1568
1569
1579
1580
1591
1592
1594 self.assertRaises(TypeError, self.etree.dump, None)
1595
1608
1621
1642
1651
1660
1669
1678
1680 XML = self.etree.XML
1681
1682 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1683 result = []
1684 for el in root.iterchildren(tag=['two', 'three']):
1685 result.append(el.text)
1686 self.assertEquals(['Two', 'Bla', None], result)
1687
1696
1717
1732
1734 Element = self.etree.Element
1735 SubElement = self.etree.SubElement
1736
1737 a = Element('a')
1738 b = SubElement(a, 'b')
1739 c = SubElement(a, 'c')
1740 d = SubElement(b, 'd')
1741 self.assertEquals(
1742 [b, a],
1743 list(d.iterancestors(tag=('a', 'b'))))
1744 self.assertEquals(
1745 [],
1746 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
1747 self.assertEquals(
1748 [],
1749 list(d.iterancestors(tag=('d', 'x'))))
1750 self.assertEquals(
1751 [b, a],
1752 list(d.iterancestors(tag=('b', '*'))))
1753 self.assertEquals(
1754 [b],
1755 list(d.iterancestors(tag=('b', 'c'))))
1756
1773
1794
1796 Element = self.etree.Element
1797 SubElement = self.etree.SubElement
1798
1799 a = Element('a')
1800 b = SubElement(a, 'b')
1801 c = SubElement(a, 'c')
1802 d = SubElement(b, 'd')
1803 e = SubElement(c, 'e')
1804
1805 self.assertEquals(
1806 [b, e],
1807 list(a.iterdescendants(tag=('a', 'b', 'e'))))
1808 a2 = SubElement(e, 'a')
1809 self.assertEquals(
1810 [b, a2],
1811 list(a.iterdescendants(tag=('a', 'b'))))
1812 self.assertEquals(
1813 [],
1814 list(c.iterdescendants(tag=('x', 'y', 'z'))))
1815 self.assertEquals(
1816 [b, d, c, e, a2],
1817 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
1818
1836
1853
1871
1895
1897 Element = self.etree.Element
1898 SubElement = self.etree.SubElement
1899
1900 a = Element('a')
1901 b = SubElement(a, 'b')
1902 c = SubElement(a, 'c')
1903 d = SubElement(b, 'd')
1904 self.assertEquals(
1905 [],
1906 list(a.itersiblings(tag='XXX')))
1907 self.assertEquals(
1908 [c],
1909 list(b.itersiblings(tag='c')))
1910 self.assertEquals(
1911 [c],
1912 list(b.itersiblings(tag='*')))
1913 self.assertEquals(
1914 [b],
1915 list(c.itersiblings(preceding=True, tag='b')))
1916 self.assertEquals(
1917 [],
1918 list(c.itersiblings(preceding=True, tag='c')))
1919
1921 Element = self.etree.Element
1922 SubElement = self.etree.SubElement
1923
1924 a = Element('a')
1925 b = SubElement(a, 'b')
1926 c = SubElement(a, 'c')
1927 d = SubElement(b, 'd')
1928 e = SubElement(a, 'e')
1929 self.assertEquals(
1930 [],
1931 list(a.itersiblings(tag=('XXX', 'YYY'))))
1932 self.assertEquals(
1933 [c, e],
1934 list(b.itersiblings(tag=('c', 'd', 'e'))))
1935 self.assertEquals(
1936 [b],
1937 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
1938 self.assertEquals(
1939 [c, b],
1940 list(e.itersiblings(preceding=True, tag=('c', '*'))))
1941
1943 parseid = self.etree.parseid
1944 XML = self.etree.XML
1945 xml_text = _bytes('''
1946 <!DOCTYPE document [
1947 <!ELEMENT document (h1,p)*>
1948 <!ELEMENT h1 (#PCDATA)>
1949 <!ATTLIST h1 myid ID #REQUIRED>
1950 <!ELEMENT p (#PCDATA)>
1951 <!ATTLIST p someid ID #REQUIRED>
1952 ]>
1953 <document>
1954 <h1 myid="chapter1">...</h1>
1955 <p id="note1" class="note">...</p>
1956 <p>Regular paragraph.</p>
1957 <p xml:id="xmlid">XML:ID paragraph.</p>
1958 <p someid="warn1" class="warning">...</p>
1959 </document>
1960 ''')
1961
1962 tree, dic = parseid(BytesIO(xml_text))
1963 root = tree.getroot()
1964 root2 = XML(xml_text)
1965 self.assertEquals(self._writeElement(root),
1966 self._writeElement(root2))
1967 expected = {
1968 "chapter1" : root[0],
1969 "xmlid" : root[3],
1970 "warn1" : root[4]
1971 }
1972 self.assert_("chapter1" in dic)
1973 self.assert_("warn1" in dic)
1974 self.assert_("xmlid" in dic)
1975 self._checkIDDict(dic, expected)
1976
1978 XMLDTDID = self.etree.XMLDTDID
1979 XML = self.etree.XML
1980 xml_text = _bytes('''
1981 <!DOCTYPE document [
1982 <!ELEMENT document (h1,p)*>
1983 <!ELEMENT h1 (#PCDATA)>
1984 <!ATTLIST h1 myid ID #REQUIRED>
1985 <!ELEMENT p (#PCDATA)>
1986 <!ATTLIST p someid ID #REQUIRED>
1987 ]>
1988 <document>
1989 <h1 myid="chapter1">...</h1>
1990 <p id="note1" class="note">...</p>
1991 <p>Regular paragraph.</p>
1992 <p xml:id="xmlid">XML:ID paragraph.</p>
1993 <p someid="warn1" class="warning">...</p>
1994 </document>
1995 ''')
1996
1997 root, dic = XMLDTDID(xml_text)
1998 root2 = XML(xml_text)
1999 self.assertEquals(self._writeElement(root),
2000 self._writeElement(root2))
2001 expected = {
2002 "chapter1" : root[0],
2003 "xmlid" : root[3],
2004 "warn1" : root[4]
2005 }
2006 self.assert_("chapter1" in dic)
2007 self.assert_("warn1" in dic)
2008 self.assert_("xmlid" in dic)
2009 self._checkIDDict(dic, expected)
2010
2012 XMLDTDID = self.etree.XMLDTDID
2013 XML = self.etree.XML
2014 xml_text = _bytes('''
2015 <document>
2016 <h1 myid="chapter1">...</h1>
2017 <p id="note1" class="note">...</p>
2018 <p>Regular paragraph.</p>
2019 <p someid="warn1" class="warning">...</p>
2020 </document>
2021 ''')
2022
2023 root, dic = XMLDTDID(xml_text)
2024 root2 = XML(xml_text)
2025 self.assertEquals(self._writeElement(root),
2026 self._writeElement(root2))
2027 expected = {}
2028 self._checkIDDict(dic, expected)
2029
2031 self.assertEquals(len(dic),
2032 len(expected))
2033 self.assertEquals(sorted(dic.items()),
2034 sorted(expected.items()))
2035 if sys.version_info < (3,):
2036 self.assertEquals(sorted(dic.iteritems()),
2037 sorted(expected.iteritems()))
2038 self.assertEquals(sorted(dic.keys()),
2039 sorted(expected.keys()))
2040 if sys.version_info < (3,):
2041 self.assertEquals(sorted(dic.iterkeys()),
2042 sorted(expected.iterkeys()))
2043 if sys.version_info < (3,):
2044 self.assertEquals(sorted(dic.values()),
2045 sorted(expected.values()))
2046 self.assertEquals(sorted(dic.itervalues()),
2047 sorted(expected.itervalues()))
2048
2050 etree = self.etree
2051
2052 r = {'foo': 'http://ns.infrae.com/foo'}
2053 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2054 self.assertEquals(
2055 'foo',
2056 e.prefix)
2057 self.assertEquals(
2058 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2059 self._writeElement(e))
2060
2062 etree = self.etree
2063
2064 r = {None: 'http://ns.infrae.com/foo'}
2065 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2066 self.assertEquals(
2067 None,
2068 e.prefix)
2069 self.assertEquals(
2070 '{http://ns.infrae.com/foo}bar',
2071 e.tag)
2072 self.assertEquals(
2073 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2074 self._writeElement(e))
2075
2077 etree = self.etree
2078
2079 r = {None: 'http://ns.infrae.com/foo',
2080 'hoi': 'http://ns.infrae.com/hoi'}
2081 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2082 e.set('{http://ns.infrae.com/hoi}test', 'value')
2083 self.assertEquals(
2084 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2085 self._writeElement(e))
2086
2088 etree = self.etree
2089 r = {None: 'http://ns.infrae.com/foo',
2090 'hoi': 'http://ns.infrae.com/hoi'}
2091 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2092 tree = etree.ElementTree(element=e)
2093 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2094 self.assertEquals(
2095 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2096 self._writeElement(e))
2097
2099 etree = self.etree
2100
2101 r = {None: 'http://ns.infrae.com/foo'}
2102 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2103 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2104
2105 e1.append(e2)
2106
2107 self.assertEquals(
2108 None,
2109 e1.prefix)
2110 self.assertEquals(
2111 None,
2112 e1[0].prefix)
2113 self.assertEquals(
2114 '{http://ns.infrae.com/foo}bar',
2115 e1.tag)
2116 self.assertEquals(
2117 '{http://ns.infrae.com/foo}bar',
2118 e1[0].tag)
2119
2121 etree = self.etree
2122
2123 r = {None: 'http://ns.infrae.com/BAR'}
2124 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2125 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2126
2127 e1.append(e2)
2128
2129 self.assertEquals(
2130 None,
2131 e1.prefix)
2132 self.assertNotEquals(
2133 None,
2134 e2.prefix)
2135 self.assertEquals(
2136 '{http://ns.infrae.com/BAR}bar',
2137 e1.tag)
2138 self.assertEquals(
2139 '{http://ns.infrae.com/foo}bar',
2140 e2.tag)
2141
2143 ns_href = "http://a.b.c"
2144 one = self.etree.fromstring(
2145 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2146 baz = one[0][0]
2147
2148 two = self.etree.fromstring(
2149 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2150 two.append(baz)
2151 del one
2152
2153 self.assertEquals('{%s}baz' % ns_href, baz.tag)
2154 self.assertEquals(
2155 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2156 self.etree.tostring(two))
2157
2167
2169 etree = self.etree
2170
2171 r = {None: 'http://ns.infrae.com/foo',
2172 'hoi': 'http://ns.infrae.com/hoi'}
2173 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2174 self.assertEquals(
2175 r,
2176 e.nsmap)
2177
2179 etree = self.etree
2180
2181 re = {None: 'http://ns.infrae.com/foo',
2182 'hoi': 'http://ns.infrae.com/hoi'}
2183 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2184
2185 rs = {None: 'http://ns.infrae.com/honk',
2186 'top': 'http://ns.infrae.com/top'}
2187 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2188
2189 r = re.copy()
2190 r.update(rs)
2191 self.assertEquals(re, e.nsmap)
2192 self.assertEquals(r, s.nsmap)
2193
2195 etree = self.etree
2196 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2197 self.assertEquals({'hha': None}, el.nsmap)
2198
2200 Element = self.etree.Element
2201 SubElement = self.etree.SubElement
2202
2203 a = Element('a')
2204 b = SubElement(a, 'b')
2205 c = SubElement(a, 'c')
2206 d = SubElement(b, 'd')
2207 e = SubElement(c, 'e')
2208 f = SubElement(c, 'f')
2209
2210 self.assertEquals(
2211 [a, b],
2212 list(a.getiterator('a', 'b')))
2213 self.assertEquals(
2214 [],
2215 list(a.getiterator('x', 'y')))
2216 self.assertEquals(
2217 [a, f],
2218 list(a.getiterator('f', 'a')))
2219 self.assertEquals(
2220 [c, e, f],
2221 list(c.getiterator('c', '*', 'a')))
2222 self.assertEquals(
2223 [],
2224 list(a.getiterator( (), () )))
2225
2227 Element = self.etree.Element
2228 SubElement = self.etree.SubElement
2229
2230 a = Element('a')
2231 b = SubElement(a, 'b')
2232 c = SubElement(a, 'c')
2233 d = SubElement(b, 'd')
2234 e = SubElement(c, 'e')
2235 f = SubElement(c, 'f')
2236
2237 self.assertEquals(
2238 [a, b],
2239 list(a.getiterator( ('a', 'b') )))
2240 self.assertEquals(
2241 [],
2242 list(a.getiterator( ('x', 'y') )))
2243 self.assertEquals(
2244 [a, f],
2245 list(a.getiterator( ('f', 'a') )))
2246 self.assertEquals(
2247 [c, e, f],
2248 list(c.getiterator( ('c', '*', 'a') )))
2249 self.assertEquals(
2250 [],
2251 list(a.getiterator( () )))
2252
2254 Element = self.etree.Element
2255 SubElement = self.etree.SubElement
2256
2257 a = Element('{a}a')
2258 b = SubElement(a, '{a}b')
2259 c = SubElement(a, '{a}c')
2260 d = SubElement(b, '{b}d')
2261 e = SubElement(c, '{a}e')
2262 f = SubElement(c, '{b}f')
2263 g = SubElement(c, 'g')
2264
2265 self.assertEquals(
2266 [a],
2267 list(a.getiterator('{a}a')))
2268 self.assertEquals(
2269 [],
2270 list(a.getiterator('{b}a')))
2271 self.assertEquals(
2272 [],
2273 list(a.getiterator('a')))
2274 self.assertEquals(
2275 [a,b,d,c,e,f,g],
2276 list(a.getiterator('*')))
2277 self.assertEquals(
2278 [f],
2279 list(c.getiterator('{b}*')))
2280 self.assertEquals(
2281 [d, f],
2282 list(a.getiterator('{b}*')))
2283 self.assertEquals(
2284 [g],
2285 list(a.getiterator('g')))
2286 self.assertEquals(
2287 [g],
2288 list(a.getiterator('{}g')))
2289 self.assertEquals(
2290 [g],
2291 list(a.getiterator('{}*')))
2292
2294 Element = self.etree.Element
2295 SubElement = self.etree.SubElement
2296
2297 a = Element('{a}a')
2298 b = SubElement(a, '{nsA}b')
2299 c = SubElement(b, '{nsB}b')
2300 d = SubElement(a, 'b')
2301 e = SubElement(a, '{nsA}e')
2302 f = SubElement(e, '{nsB}e')
2303 g = SubElement(e, 'e')
2304
2305 self.assertEquals(
2306 [b, c, d],
2307 list(a.getiterator('{*}b')))
2308 self.assertEquals(
2309 [e, f, g],
2310 list(a.getiterator('{*}e')))
2311 self.assertEquals(
2312 [a, b, c, d, e, f, g],
2313 list(a.getiterator('{*}*')))
2314
2339
2355
2372
2379
2386
2395
2397 XML = self.etree.XML
2398 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
2399 self.assertEquals(len(root.findall(".//{X}b")), 2)
2400 self.assertEquals(len(root.findall(".//{X}*")), 2)
2401 self.assertEquals(len(root.findall(".//b")), 3)
2402
2409
2411 etree = self.etree
2412 e = etree.Element('foo')
2413 for i in range(10):
2414 etree.SubElement(e, 'a%s' % i)
2415 for i in range(10):
2416 self.assertEquals(
2417 i,
2418 e.index(e[i]))
2419 self.assertEquals(
2420 3, e.index(e[3], 3))
2421 self.assertRaises(
2422 ValueError, e.index, e[3], 4)
2423 self.assertRaises(
2424 ValueError, e.index, e[3], 0, 2)
2425 self.assertRaises(
2426 ValueError, e.index, e[8], 0, -3)
2427 self.assertRaises(
2428 ValueError, e.index, e[8], -5, -3)
2429 self.assertEquals(
2430 8, e.index(e[8], 0, -1))
2431 self.assertEquals(
2432 8, e.index(e[8], -12, -1))
2433 self.assertEquals(
2434 0, e.index(e[0], -12, -1))
2435
2437 etree = self.etree
2438 e = etree.Element('foo')
2439 for i in range(10):
2440 el = etree.SubElement(e, 'a%s' % i)
2441 el.text = "text%d" % i
2442 el.tail = "tail%d" % i
2443
2444 child0 = e[0]
2445 child1 = e[1]
2446 child2 = e[2]
2447
2448 e.replace(e[0], e[1])
2449 self.assertEquals(
2450 9, len(e))
2451 self.assertEquals(
2452 child1, e[0])
2453 self.assertEquals(
2454 child1.text, "text1")
2455 self.assertEquals(
2456 child1.tail, "tail1")
2457 self.assertEquals(
2458 child0.tail, "tail0")
2459 self.assertEquals(
2460 child2, e[1])
2461
2462 e.replace(e[-1], e[0])
2463 self.assertEquals(
2464 child1, e[-1])
2465 self.assertEquals(
2466 child1.text, "text1")
2467 self.assertEquals(
2468 child1.tail, "tail1")
2469 self.assertEquals(
2470 child2, e[0])
2471
2473 etree = self.etree
2474 e = etree.Element('foo')
2475 for i in range(10):
2476 etree.SubElement(e, 'a%s' % i)
2477
2478 new_element = etree.Element("test")
2479 new_element.text = "TESTTEXT"
2480 new_element.tail = "TESTTAIL"
2481 child1 = e[1]
2482 e.replace(e[0], new_element)
2483 self.assertEquals(
2484 new_element, e[0])
2485 self.assertEquals(
2486 "TESTTEXT",
2487 e[0].text)
2488 self.assertEquals(
2489 "TESTTAIL",
2490 e[0].tail)
2491 self.assertEquals(
2492 child1, e[1])
2493
2509
2527
2545
2563
2565 Element = self.etree.Element
2566 SubElement = self.etree.SubElement
2567 try:
2568 slice
2569 except NameError:
2570 print("slice() not found")
2571 return
2572
2573 a = Element('a')
2574 b = SubElement(a, 'b')
2575 c = SubElement(a, 'c')
2576 d = SubElement(a, 'd')
2577 e = SubElement(a, 'e')
2578
2579 x = Element('x')
2580 y = Element('y')
2581 z = Element('z')
2582
2583 self.assertRaises(
2584 ValueError,
2585 operator.setitem, a, slice(1,None,2), [x, y, z])
2586
2587 self.assertEquals(
2588 [b, c, d, e],
2589 list(a))
2590
2603
2611
2620
2630
2640
2646
2654
2660
2667
2673
2675 etree = self.etree
2676 xml_header = '<?xml version="1.0" encoding="ascii"?>'
2677 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2678 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2679 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
2680
2681 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
2682
2683 tree = etree.parse(BytesIO(xml))
2684 docinfo = tree.docinfo
2685 self.assertEquals(docinfo.encoding, "ascii")
2686 self.assertEquals(docinfo.xml_version, "1.0")
2687 self.assertEquals(docinfo.public_id, pub_id)
2688 self.assertEquals(docinfo.system_url, sys_id)
2689 self.assertEquals(docinfo.root_name, 'html')
2690 self.assertEquals(docinfo.doctype, doctype_string)
2691
2707
2719
2731
2737
2739 etree = self.etree
2740 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2741 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2742 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
2743
2744 xml = _bytes('<!DOCTYPE root>\n<root/>')
2745 tree = etree.parse(BytesIO(xml))
2746 self.assertEquals(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
2747 etree.tostring(tree, doctype=doctype_string))
2748
2750 etree = self.etree
2751 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2752 self.assertEquals(root.base, "http://no/such/url")
2753 self.assertEquals(
2754 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2755 root.base = "https://secret/url"
2756 self.assertEquals(root.base, "https://secret/url")
2757 self.assertEquals(
2758 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2759 "https://secret/url")
2760
2762 etree = self.etree
2763 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2764 self.assertEquals(root.base, "http://no/such/url")
2765 self.assertEquals(
2766 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2767 root.set('{http://www.w3.org/XML/1998/namespace}base',
2768 "https://secret/url")
2769 self.assertEquals(root.base, "https://secret/url")
2770 self.assertEquals(
2771 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2772 "https://secret/url")
2773
2779
2784
2791
2805
2807 Element = self.etree.Element
2808
2809 a = Element('a')
2810 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
2811 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
2812
2813 self.assertRaises(ValueError, Element, 'ha\0ho')
2814
2816 Element = self.etree.Element
2817
2818 a = Element('a')
2819 self.assertRaises(ValueError, setattr, a, "text",
2820 _str('ha\0ho'))
2821 self.assertRaises(ValueError, setattr, a, "tail",
2822 _str('ha\0ho'))
2823
2824 self.assertRaises(ValueError, Element,
2825 _str('ha\0ho'))
2826
2828 Element = self.etree.Element
2829
2830 a = Element('a')
2831 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
2832 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
2833
2834 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
2835 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
2836
2837 self.assertRaises(ValueError, Element, 'ha\x07ho')
2838 self.assertRaises(ValueError, Element, 'ha\x02ho')
2839
2841 Element = self.etree.Element
2842
2843 a = Element('a')
2844 self.assertRaises(ValueError, setattr, a, "text",
2845 _str('ha\x07ho'))
2846 self.assertRaises(ValueError, setattr, a, "text",
2847 _str('ha\x02ho'))
2848
2849 self.assertRaises(ValueError, setattr, a, "tail",
2850 _str('ha\x07ho'))
2851 self.assertRaises(ValueError, setattr, a, "tail",
2852 _str('ha\x02ho'))
2853
2854 self.assertRaises(ValueError, Element,
2855 _str('ha\x07ho'))
2856 self.assertRaises(ValueError, Element,
2857 _str('ha\x02ho'))
2858
2860 Element = self.etree.Element
2861
2862 a = Element('a')
2863 self.assertRaises(ValueError, setattr, a, "text",
2864 _str('ha\u1234\x07ho'))
2865 self.assertRaises(ValueError, setattr, a, "text",
2866 _str('ha\u1234\x02ho'))
2867
2868 self.assertRaises(ValueError, setattr, a, "tail",
2869 _str('ha\u1234\x07ho'))
2870 self.assertRaises(ValueError, setattr, a, "tail",
2871 _str('ha\u1234\x02ho'))
2872
2873 self.assertRaises(ValueError, Element,
2874 _str('ha\u1234\x07ho'))
2875 self.assertRaises(ValueError, Element,
2876 _str('ha\u1234\x02ho'))
2877
2891
2896
2914
2934
2956
2958 tostring = self.etree.tostring
2959 XML = self.etree.XML
2960 ElementTree = self.etree.ElementTree
2961
2962 root = XML(_bytes("<root/>"))
2963
2964 tree = ElementTree(root)
2965 self.assertEquals(None, tree.docinfo.standalone)
2966
2967 result = tostring(root, xml_declaration=True, encoding="ASCII")
2968 self.assertEquals(result, _bytes(
2969 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
2970
2971 result = tostring(root, xml_declaration=True, encoding="ASCII",
2972 standalone=True)
2973 self.assertEquals(result, _bytes(
2974 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
2975
2976 tree = ElementTree(XML(result))
2977 self.assertEquals(True, tree.docinfo.standalone)
2978
2979 result = tostring(root, xml_declaration=True, encoding="ASCII",
2980 standalone=False)
2981 self.assertEquals(result, _bytes(
2982 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
2983
2984 tree = ElementTree(XML(result))
2985 self.assertEquals(False, tree.docinfo.standalone)
2986
3006
3008 tostring = self.etree.tostring
3009 Element = self.etree.Element
3010 SubElement = self.etree.SubElement
3011
3012 a = Element('a')
3013 a.text = "A"
3014 a.tail = "tail"
3015 b = SubElement(a, 'b')
3016 b.text = "B"
3017 b.tail = _str("Søk på nettet")
3018 c = SubElement(a, 'c')
3019 c.text = "C"
3020
3021 result = tostring(a, method="text", encoding="UTF-16")
3022
3023 self.assertEquals(_str('ABSøk på nettetCtail').encode("UTF-16"),
3024 result)
3025
3027 tostring = self.etree.tostring
3028 Element = self.etree.Element
3029 SubElement = self.etree.SubElement
3030
3031 a = Element('a')
3032 a.text = _str('Søk på nettetA')
3033 a.tail = "tail"
3034 b = SubElement(a, 'b')
3035 b.text = "B"
3036 b.tail = _str('Søk på nettetB')
3037 c = SubElement(a, 'c')
3038 c.text = "C"
3039
3040 self.assertRaises(UnicodeEncodeError,
3041 tostring, a, method="text")
3042
3043 self.assertEquals(
3044 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3045 tostring(a, encoding="UTF-8", method="text"))
3046
3059
3075
3079
3094
3112
3125
3127 tostring = self.etree.tostring
3128 Element = self.etree.Element
3129 SubElement = self.etree.SubElement
3130
3131 a = Element('a')
3132 b = SubElement(a, 'b')
3133 c = SubElement(a, 'c')
3134 d = SubElement(c, 'd')
3135 self.assert_(isinstance(tostring(b, encoding=_unicode), _unicode))
3136 self.assert_(isinstance(tostring(c, encoding=_unicode), _unicode))
3137 self.assertEquals(_bytes('<b></b>'),
3138 canonicalize(tostring(b, encoding=_unicode)))
3139 self.assertEquals(_bytes('<c><d></d></c>'),
3140 canonicalize(tostring(c, encoding=_unicode)))
3141
3146
3161
3163 tostring = self.etree.tostring
3164 Element = self.etree.Element
3165 SubElement = self.etree.SubElement
3166
3167 a = Element('a')
3168 b = SubElement(a, 'b')
3169 c = SubElement(a, 'c')
3170
3171 result = tostring(a, encoding=_unicode)
3172 self.assertEquals(result, "<a><b/><c/></a>")
3173
3174 result = tostring(a, encoding=_unicode, pretty_print=False)
3175 self.assertEquals(result, "<a><b/><c/></a>")
3176
3177 result = tostring(a, encoding=_unicode, pretty_print=True)
3178 self.assertEquals(result, "<a>\n <b/>\n <c/>\n</a>\n")
3179
3191
3192
3193
3194 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3205
3206
3209 filename = fileInTestDir('test_broken.xml')
3210 root = etree.XML(_bytes('''\
3211 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
3212 <xi:include href="%s" parse="text"/>
3213 </doc>
3214 ''' % filename))
3215 old_text = root.text
3216 content = read_file(filename)
3217 old_tail = root[0].tail
3218
3219 self.include( etree.ElementTree(root) )
3220 self.assertEquals(old_text + content + old_tail,
3221 root.text)
3222
3234
3236 class res(etree.Resolver):
3237 include_text = read_file(fileInTestDir('test.xml'))
3238 called = {}
3239 def resolve(self, url, id, context):
3240 if url.endswith(".dtd"):
3241 self.called["dtd"] = True
3242 return self.resolve_filename(
3243 fileInTestDir('test.dtd'), context)
3244 elif url.endswith("test_xinclude.xml"):
3245 self.called["input"] = True
3246 return None
3247 else:
3248 self.called["include"] = True
3249 return self.resolve_string(self.include_text, context)
3250
3251 res_instance = res()
3252 parser = etree.XMLParser(load_dtd = True)
3253 parser.resolvers.add(res_instance)
3254
3255 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3256 parser = parser)
3257
3258 self.include(tree)
3259
3260 called = list(res_instance.called.items())
3261 called.sort()
3262 self.assertEquals(
3263 [("dtd", True), ("include", True), ("input", True)],
3264 called)
3265
3269
3270
3275
3276
3279 tree = self.parse(_bytes('<a><b/></a>'))
3280 f = BytesIO()
3281 tree.write_c14n(f)
3282 s = f.getvalue()
3283 self.assertEquals(_bytes('<a><b></b></a>'),
3284 s)
3285
3287 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3288 f = BytesIO()
3289 tree.write_c14n(f, compression=9)
3290 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3291 try:
3292 s = gzfile.read()
3293 finally:
3294 gzfile.close()
3295 self.assertEquals(_bytes('<a>'+'<b></b>'*200+'</a>'),
3296 s)
3297
3309
3325
3343
3355
3367
3369 tree = self.parse(_bytes(
3370 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3371 f = BytesIO()
3372 tree.write_c14n(f)
3373 s = f.getvalue()
3374 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3375 s)
3376 f = BytesIO()
3377 tree.write_c14n(f, exclusive=False)
3378 s = f.getvalue()
3379 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3380 s)
3381 f = BytesIO()
3382 tree.write_c14n(f, exclusive=True)
3383 s = f.getvalue()
3384 self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3385 s)
3386
3387 f = BytesIO()
3388 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
3389 s = f.getvalue()
3390 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
3391 s)
3392
3394 tree = self.parse(_bytes(
3395 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3396 s = etree.tostring(tree, method='c14n')
3397 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3398 s)
3399 s = etree.tostring(tree, method='c14n', exclusive=False)
3400 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3401 s)
3402 s = etree.tostring(tree, method='c14n', exclusive=True)
3403 self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3404 s)
3405
3406 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3407 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
3408 s)
3409
3411 tree = self.parse(_bytes(
3412 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3413 s = etree.tostring(tree.getroot(), method='c14n')
3414 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3415 s)
3416 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
3417 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3418 s)
3419 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
3420 self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3421 s)
3422
3423 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
3424 self.assertEquals(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3425 s)
3426 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
3427 self.assertEquals(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
3428 s)
3429
3430 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3431 self.assertEquals(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3432 s)
3433
3435 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
3436 tree = self.parse(_bytes(
3437 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3438
3439 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
3440 self.assertEquals(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3441 s)
3442
3443
3446 tree = self.parse(_bytes('<a><b/></a>'))
3447 f = BytesIO()
3448 tree.write(f)
3449 s = f.getvalue()
3450 self.assertEquals(_bytes('<a><b/></a>'),
3451 s)
3452
3454 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3455 f = BytesIO()
3456 tree.write(f, compression=9)
3457 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3458 try:
3459 s = gzfile.read()
3460 finally:
3461 gzfile.close()
3462 self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3463 s)
3464
3466 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3467 f = BytesIO()
3468 tree.write(f, compression=0)
3469 s0 = f.getvalue()
3470
3471 f = BytesIO()
3472 tree.write(f)
3473 self.assertEquals(f.getvalue(), s0)
3474
3475 f = BytesIO()
3476 tree.write(f, compression=1)
3477 s = f.getvalue()
3478 self.assert_(len(s) <= len(s0))
3479 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3480 try:
3481 s1 = gzfile.read()
3482 finally:
3483 gzfile.close()
3484
3485 f = BytesIO()
3486 tree.write(f, compression=9)
3487 s = f.getvalue()
3488 self.assert_(len(s) <= len(s0))
3489 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3490 try:
3491 s9 = gzfile.read()
3492 finally:
3493 gzfile.close()
3494
3495 self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3496 s0)
3497 self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3498 s1)
3499 self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3500 s9)
3501
3513
3529
3541
3554
3556 etree = etree
3557
3579
3581 """This can't really be tested as long as there isn't a way to
3582 reset the logging setup ...
3583 """
3584 parse = self.etree.parse
3585
3586 messages = []
3587 class Logger(self.etree.PyErrorLog):
3588 def log(self, entry, message, *args):
3589 messages.append(message)
3590
3591 self.etree.use_global_python_log(Logger())
3592 f = BytesIO('<a><b></c></b></a>')
3593 try:
3594 parse(f)
3595 except SyntaxError:
3596 pass
3597 f.close()
3598
3599 self.assert_([ message for message in messages
3600 if 'mismatch' in message ])
3601 self.assert_([ message for message in messages
3602 if ':PARSER:' in message])
3603 self.assert_([ message for message in messages
3604 if ':ERR_TAG_NAME_MISMATCH:' in message ])
3605 self.assert_([ message for message in messages
3606 if ':1:15:' in message ])
3607
3627
3628 if __name__ == '__main__':
3629 print('to test use test.py %s' % __file__)
3630