1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 import os.path
11 import unittest
12 import copy
13 import sys
14 import re
15 import operator
16 import tempfile
17 import gzip
18
19 this_dir = os.path.dirname(__file__)
20 if this_dir not in sys.path:
21 sys.path.insert(0, this_dir)
22
23 from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir, read_file
24 from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
25 from common_imports import canonicalize, sorted, _str, _bytes
26
27 print("")
28 print("TESTED VERSION: %s" % etree.__version__)
29 print(" Python: " + repr(sys.version_info))
30 print(" lxml.etree: " + repr(etree.LXML_VERSION))
31 print(" libxml used: " + repr(etree.LIBXML_VERSION))
32 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
33 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
34 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
35 print("")
36
37 try:
38 _unicode = unicode
39 except NameError:
40
41 _unicode = str
42
44 """Tests only for etree, not ElementTree"""
45 etree = etree
46
57
66
73
75 Element = self.etree.Element
76 el = Element('name')
77 self.assertRaises(ValueError, Element, '{}')
78 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
79
80 self.assertRaises(ValueError, Element, '{test}')
81 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
82
90
92 Element = self.etree.Element
93 self.assertRaises(ValueError, Element, "p'name")
94 self.assertRaises(ValueError, Element, 'p"name')
95
96 self.assertRaises(ValueError, Element, "{test}p'name")
97 self.assertRaises(ValueError, Element, '{test}p"name')
98
99 el = Element('name')
100 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
101 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
102
104 Element = self.etree.Element
105 self.assertRaises(ValueError, Element, ' name ')
106 self.assertRaises(ValueError, Element, 'na me')
107 self.assertRaises(ValueError, Element, '{test} name')
108
109 el = Element('name')
110 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
111
119
127
129 Element = self.etree.Element
130 SubElement = self.etree.SubElement
131
132 el = Element('name')
133 self.assertRaises(ValueError, SubElement, el, "p'name")
134 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
135
136 self.assertRaises(ValueError, SubElement, el, 'p"name')
137 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
138
147
156
158 QName = self.etree.QName
159 self.assertRaises(ValueError, QName, '')
160 self.assertRaises(ValueError, QName, 'test', '')
161
163 QName = self.etree.QName
164 self.assertRaises(ValueError, QName, 'p:name')
165 self.assertRaises(ValueError, QName, 'test', 'p:name')
166
168 QName = self.etree.QName
169 self.assertRaises(ValueError, QName, ' name ')
170 self.assertRaises(ValueError, QName, 'na me')
171 self.assertRaises(ValueError, QName, 'test', ' name')
172
180
182
183 QName = self.etree.QName
184 qname1 = QName('http://myns', 'a')
185 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
186
187 qname2 = QName(a)
188 self.assertEquals(a.tag, qname1.text)
189 self.assertEquals(qname1.text, qname2.text)
190 self.assertEquals(qname1, qname2)
191
193
194 etree = self.etree
195 qname = etree.QName('http://myns', 'a')
196 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
197 a.text = qname
198
199 self.assertEquals("p:a", a.text)
200
209
224
230
238
252
274
276 XML = self.etree.XML
277 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
278
279 root = XML(xml)
280 self.etree.strip_elements(root, 'a')
281 self.assertEquals(_bytes('<test><x></x></test>'),
282 self._writeElement(root))
283
284 root = XML(xml)
285 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
286 self.assertEquals(_bytes('<test><a></a><x><a></a></x></test>'),
287 self._writeElement(root))
288
289 root = XML(xml)
290 self.etree.strip_elements(root, 'c')
291 self.assertEquals(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
292 self._writeElement(root))
293
295 XML = self.etree.XML
296 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
297
298 root = XML(xml)
299 self.etree.strip_elements(root, 'a')
300 self.assertEquals(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
301 self._writeElement(root))
302
303 root = XML(xml)
304 self.etree.strip_elements(root, '{urn:a}b', 'c')
305 self.assertEquals(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
306 self._writeElement(root))
307
308 root = XML(xml)
309 self.etree.strip_elements(root, '{urn:a}*', 'c')
310 self.assertEquals(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
311 self._writeElement(root))
312
313 root = XML(xml)
314 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
315 self.assertEquals(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
316 self._writeElement(root))
317
336
362
389
415
434
447
458
464
466 XML = self.etree.XML
467 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
468 self.assertEquals(root[0].target, "mypi")
469 self.assertEquals(root[0].get('my'), "1")
470 self.assertEquals(root[0].get('test'), " abc ")
471 self.assertEquals(root[0].get('quotes'), "' '")
472 self.assertEquals(root[0].get('only'), None)
473 self.assertEquals(root[0].get('names'), None)
474 self.assertEquals(root[0].get('nope'), None)
475
477 XML = self.etree.XML
478 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
479 self.assertEquals(root[0].target, "mypi")
480 self.assertEquals(root[0].attrib['my'], "1")
481 self.assertEquals(root[0].attrib['test'], " abc ")
482 self.assertEquals(root[0].attrib['quotes'], "' '")
483 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
484 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
485 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
486
488
489 ProcessingInstruction = self.etree.ProcessingInstruction
490
491 a = ProcessingInstruction("PI", "ONE")
492 b = copy.deepcopy(a)
493 b.text = "ANOTHER"
494
495 self.assertEquals('ONE', a.text)
496 self.assertEquals('ANOTHER', b.text)
497
513
528
538
550
569
574
587
598
599 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
600 events = list(iterparse(f, events=('end', 'comment')))
601 root = events[-1][1]
602 self.assertEquals(6, len(events))
603 self.assertEquals(['A', ' B ', 'c', 'b', 'C', 'a'],
604 [ name(*item) for item in events ])
605 self.assertEquals(
606 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
607 tostring(root))
608
620
621 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
622 events = list(iterparse(f, events=('end', 'pi')))
623 root = events[-2][1]
624 self.assertEquals(8, len(events))
625 self.assertEquals([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
626 ('pid','d'), 'a', ('pie','e')],
627 [ name(*item) for item in events ])
628 self.assertEquals(
629 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
630 tostring(ElementTree(root)))
631
646
652
654 iterparse = self.etree.iterparse
655 f = BytesIO("""
656 <a> \n \n <b> b test </b> \n
657
658 \n\t <c> \n </c> </a> \n """)
659 iterator = iterparse(f, remove_blank_text=True)
660 text = [ (element.text, element.tail)
661 for event, element in iterator ]
662 self.assertEquals(
663 [(" b test ", None), (" \n ", None), (None, None)],
664 text)
665
667 iterparse = self.etree.iterparse
668 f = BytesIO('<a><b><d/></b><c/></a>')
669
670 iterator = iterparse(f, tag="b", events=('start', 'end'))
671 events = list(iterator)
672 root = iterator.root
673 self.assertEquals(
674 [('start', root[0]), ('end', root[0])],
675 events)
676
678 iterparse = self.etree.iterparse
679 f = BytesIO('<a><b><d/></b><c/></a>')
680
681 iterator = iterparse(f, tag="*", events=('start', 'end'))
682 events = list(iterator)
683 self.assertEquals(
684 8,
685 len(events))
686
688 iterparse = self.etree.iterparse
689 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
690
691 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
692 events = list(iterator)
693 root = iterator.root
694 self.assertEquals(
695 [('start', root[0]), ('end', root[0])],
696 events)
697
699 iterparse = self.etree.iterparse
700 f = BytesIO('<a><b><d/></b><c/></a>')
701 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
702 events = list(iterator)
703 root = iterator.root
704 self.assertEquals(
705 [('start', root[0]), ('end', root[0])],
706 events)
707
708 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
709 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
710 events = list(iterator)
711 root = iterator.root
712 self.assertEquals([], events)
713
715 iterparse = self.etree.iterparse
716 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
717 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
718 events = list(iterator)
719 self.assertEquals(8, len(events))
720
722 iterparse = self.etree.iterparse
723 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
724 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
725 events = list(iterator)
726 self.assertEquals([], events)
727
728 f = BytesIO('<a><b><d/></b><c/></a>')
729 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
730 events = list(iterator)
731 self.assertEquals(8, len(events))
732
734 text = _str('Søk på nettet')
735 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
736 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
737 ).encode('iso-8859-1')
738
739 self.assertRaises(self.etree.ParseError,
740 list, self.etree.iterparse(BytesIO(xml_latin1)))
741
743 text = _str('Søk på nettet', encoding="UTF-8")
744 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
745 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
746 ).encode('iso-8859-1')
747
748 iterator = self.etree.iterparse(BytesIO(xml_latin1),
749 encoding="iso-8859-1")
750 self.assertEquals(1, len(list(iterator)))
751
752 a = iterator.root
753 self.assertEquals(a.text, text)
754
756 tostring = self.etree.tostring
757 f = BytesIO('<root><![CDATA[test]]></root>')
758 context = self.etree.iterparse(f, strip_cdata=False)
759 content = [ el.text for event,el in context ]
760
761 self.assertEquals(['test'], content)
762 self.assertEquals(_bytes('<root><![CDATA[test]]></root>'),
763 tostring(context.root))
764
768
773
792
793
794
805 def end(self, tag):
806 events.append("end")
807 assertEquals("TAG", tag)
808 def close(self):
809 return "DONE"
810
811 parser = self.etree.XMLParser(target=Target())
812 tree = self.etree.ElementTree()
813
814 self.assertRaises(TypeError,
815 tree.parse, BytesIO("<TAG/>"), parser=parser)
816 self.assertEquals(["start", "end"], events)
817
819
820 events = []
821 class Target(object):
822 def start(self, tag, attrib):
823 events.append("start-" + tag)
824 def end(self, tag):
825 events.append("end-" + tag)
826 if tag == 'a':
827 raise ValueError("dead and gone")
828 def data(self, data):
829 events.append("data-" + data)
830 def close(self):
831 events.append("close")
832 return "DONE"
833
834 parser = self.etree.XMLParser(target=Target())
835
836 try:
837 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
838 done = parser.close()
839 self.fail("error expected, but parsing succeeded")
840 except ValueError:
841 done = 'value error received as expected'
842
843 self.assertEquals(["start-root", "data-A", "start-a",
844 "data-ca", "end-a", "close"],
845 events)
846
848
849 events = []
850 class Target(object):
851 def start(self, tag, attrib):
852 events.append("start-" + tag)
853 def end(self, tag):
854 events.append("end-" + tag)
855 if tag == 'a':
856 raise ValueError("dead and gone")
857 def data(self, data):
858 events.append("data-" + data)
859 def close(self):
860 events.append("close")
861 return "DONE"
862
863 parser = self.etree.XMLParser(target=Target())
864
865 try:
866 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
867 parser=parser)
868 self.fail("error expected, but parsing succeeded")
869 except ValueError:
870 done = 'value error received as expected'
871
872 self.assertEquals(["start-root", "data-A", "start-a",
873 "data-ca", "end-a", "close"],
874 events)
875
881 def end(self, tag):
882 events.append("end-" + tag)
883 def data(self, data):
884 events.append("data-" + data)
885 def comment(self, text):
886 events.append("comment-" + text)
887 def close(self):
888 return "DONE"
889
890 parser = self.etree.XMLParser(target=Target())
891
892 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
893 done = parser.close()
894
895 self.assertEquals("DONE", done)
896 self.assertEquals(["comment-a", "start-root", "data-A", "comment-b",
897 "start-sub", "end-sub", "comment-c", "data-B",
898 "end-root", "comment-d"],
899 events)
900
902 events = []
903 class Target(object):
904 def start(self, tag, attrib):
905 events.append("start-" + tag)
906 def end(self, tag):
907 events.append("end-" + tag)
908 def data(self, data):
909 events.append("data-" + data)
910 def pi(self, target, data):
911 events.append("pi-" + target + "-" + data)
912 def close(self):
913 return "DONE"
914
915 parser = self.etree.XMLParser(target=Target())
916
917 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
918 done = parser.close()
919
920 self.assertEquals("DONE", done)
921 self.assertEquals(["pi-test-a", "start-root", "data-A", "pi-test-b",
922 "data-B", "end-root", "pi-test-c"],
923 events)
924
926 events = []
927 class Target(object):
928 def start(self, tag, attrib):
929 events.append("start-" + tag)
930 def end(self, tag):
931 events.append("end-" + tag)
932 def data(self, data):
933 events.append("data-" + data)
934 def close(self):
935 return "DONE"
936
937 parser = self.etree.XMLParser(target=Target(),
938 strip_cdata=False)
939
940 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
941 done = parser.close()
942
943 self.assertEquals("DONE", done)
944 self.assertEquals(["start-root", "data-A", "start-a",
945 "data-ca", "end-a", "data-B", "end-root"],
946 events)
947
949 events = []
950 class Target(object):
951 def start(self, tag, attrib):
952 events.append("start-" + tag)
953 def end(self, tag):
954 events.append("end-" + tag)
955 def data(self, data):
956 events.append("data-" + data)
957 def close(self):
958 events.append("close")
959 return "DONE"
960
961 parser = self.etree.XMLParser(target=Target(),
962 recover=True)
963
964 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
965 done = parser.close()
966
967 self.assertEquals("DONE", done)
968 self.assertEquals(["start-root", "data-A", "start-a",
969 "data-ca", "end-a", "data-B",
970 "end-root", "close"],
971 events)
972
982
992
1001
1011
1013 iterwalk = self.etree.iterwalk
1014 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1015
1016 iterator = iterwalk(root, events=('start','end'))
1017 events = list(iterator)
1018 self.assertEquals(
1019 [('start', root), ('start', root[0]), ('end', root[0]),
1020 ('start', root[1]), ('end', root[1]), ('end', root)],
1021 events)
1022
1033
1035 iterwalk = self.etree.iterwalk
1036 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1037
1038 attr_name = '{testns}bla'
1039 events = []
1040 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1041 for event, elem in iterator:
1042 events.append(event)
1043 if event == 'start':
1044 if elem.tag != '{ns1}a':
1045 elem.set(attr_name, 'value')
1046
1047 self.assertEquals(
1048 ['start-ns', 'start', 'start', 'start-ns', 'start',
1049 'end', 'end-ns', 'end', 'end', 'end-ns'],
1050 events)
1051
1052 self.assertEquals(
1053 None,
1054 root.get(attr_name))
1055 self.assertEquals(
1056 'value',
1057 root[0].get(attr_name))
1058
1069
1071 parse = self.etree.parse
1072 parser = self.etree.XMLParser(dtd_validation=True)
1073 assertEqual = self.assertEqual
1074 test_url = _str("__nosuch.dtd")
1075
1076 class MyResolver(self.etree.Resolver):
1077 def resolve(self, url, id, context):
1078 assertEqual(url, test_url)
1079 return self.resolve_string(
1080 _str('''<!ENTITY myentity "%s">
1081 <!ELEMENT doc ANY>''') % url, context)
1082
1083 parser.resolvers.add(MyResolver())
1084
1085 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1086 tree = parse(StringIO(xml), parser)
1087 root = tree.getroot()
1088 self.assertEquals(root.text, test_url)
1089
1091 parse = self.etree.parse
1092 parser = self.etree.XMLParser(dtd_validation=True)
1093 assertEqual = self.assertEqual
1094 test_url = _str("__nosuch.dtd")
1095
1096 class MyResolver(self.etree.Resolver):
1097 def resolve(self, url, id, context):
1098 assertEqual(url, test_url)
1099 return self.resolve_string(
1100 (_str('''<!ENTITY myentity "%s">
1101 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1102 context)
1103
1104 parser.resolvers.add(MyResolver())
1105
1106 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1107 tree = parse(StringIO(xml), parser)
1108 root = tree.getroot()
1109 self.assertEquals(root.text, test_url)
1110
1112 parse = self.etree.parse
1113 parser = self.etree.XMLParser(dtd_validation=True)
1114 assertEqual = self.assertEqual
1115 test_url = _str("__nosuch.dtd")
1116
1117 class MyResolver(self.etree.Resolver):
1118 def resolve(self, url, id, context):
1119 assertEqual(url, test_url)
1120 return self.resolve_file(
1121 SillyFileLike(
1122 _str('''<!ENTITY myentity "%s">
1123 <!ELEMENT doc ANY>''') % url), context)
1124
1125 parser.resolvers.add(MyResolver())
1126
1127 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1128 tree = parse(StringIO(xml), parser)
1129 root = tree.getroot()
1130 self.assertEquals(root.text, test_url)
1131
1133 parse = self.etree.parse
1134 parser = self.etree.XMLParser(attribute_defaults=True)
1135 assertEqual = self.assertEqual
1136 test_url = _str("__nosuch.dtd")
1137
1138 class MyResolver(self.etree.Resolver):
1139 def resolve(self, url, id, context):
1140 assertEqual(url, test_url)
1141 return self.resolve_filename(
1142 fileInTestDir('test.dtd'), context)
1143
1144 parser.resolvers.add(MyResolver())
1145
1146 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1147 tree = parse(StringIO(xml), parser)
1148 root = tree.getroot()
1149 self.assertEquals(
1150 root.attrib, {'default': 'valueA'})
1151 self.assertEquals(
1152 root[0].attrib, {'default': 'valueB'})
1153
1165
1166 parser.resolvers.add(MyResolver())
1167
1168 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1169 tree = parse(StringIO(xml), parser,
1170 base_url=fileInTestDir('__test.xml'))
1171 root = tree.getroot()
1172 self.assertEquals(
1173 root.attrib, {'default': 'valueA'})
1174 self.assertEquals(
1175 root[0].attrib, {'default': 'valueB'})
1176
1178 parse = self.etree.parse
1179 parser = self.etree.XMLParser(attribute_defaults=True)
1180 assertEqual = self.assertEqual
1181 test_url = _str("__nosuch.dtd")
1182
1183 class MyResolver(self.etree.Resolver):
1184 def resolve(self, url, id, context):
1185 assertEqual(url, test_url)
1186 return self.resolve_file(
1187 open(fileInTestDir('test.dtd'), 'rb'), context)
1188
1189 parser.resolvers.add(MyResolver())
1190
1191 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1192 tree = parse(StringIO(xml), parser)
1193 root = tree.getroot()
1194 self.assertEquals(
1195 root.attrib, {'default': 'valueA'})
1196 self.assertEquals(
1197 root[0].attrib, {'default': 'valueB'})
1198
1200 parse = self.etree.parse
1201 parser = self.etree.XMLParser(load_dtd=True)
1202 assertEqual = self.assertEqual
1203 test_url = _str("__nosuch.dtd")
1204
1205 class check(object):
1206 resolved = False
1207
1208 class MyResolver(self.etree.Resolver):
1209 def resolve(self, url, id, context):
1210 assertEqual(url, test_url)
1211 check.resolved = True
1212 return self.resolve_empty(context)
1213
1214 parser.resolvers.add(MyResolver())
1215
1216 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1217 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1218 self.assert_(check.resolved)
1219
1226
1227 class MyResolver(self.etree.Resolver):
1228 def resolve(self, url, id, context):
1229 raise _LocalException
1230
1231 parser.resolvers.add(MyResolver())
1232
1233 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1234 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1235
1236 if etree.LIBXML_VERSION > (2,6,20):
1253
1255 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1256 <root>
1257 <child1/>
1258 <child2/>
1259 <child3> </child3>
1260 </root>''')
1261
1262 parser = self.etree.XMLParser(resolve_entities=False)
1263 root = etree.fromstring(xml, parser)
1264 self.assertEquals([ el.tag for el in root ],
1265 ['child1', 'child2', 'child3'])
1266
1267 root[0] = root[-1]
1268 self.assertEquals([ el.tag for el in root ],
1269 ['child3', 'child2'])
1270 self.assertEquals(root[0][0].text, ' ')
1271 self.assertEquals(root[0][0].name, 'nbsp')
1272
1288
1295
1297 Entity = self.etree.Entity
1298 self.assertRaises(ValueError, Entity, 'a b c')
1299 self.assertRaises(ValueError, Entity, 'a,b')
1300 self.assertRaises(ValueError, Entity, 'a\0b')
1301 self.assertRaises(ValueError, Entity, '#abc')
1302 self.assertRaises(ValueError, Entity, '#xxyz')
1303
1316
1329
1331 CDATA = self.etree.CDATA
1332 Element = self.etree.Element
1333
1334 root = Element("root")
1335 cdata = CDATA('test')
1336
1337 self.assertRaises(TypeError,
1338 setattr, root, 'tail', cdata)
1339 self.assertRaises(TypeError,
1340 root.set, 'attr', cdata)
1341 self.assertRaises(TypeError,
1342 operator.setitem, root.attrib, 'attr', cdata)
1343
1352
1361
1362
1372
1381
1394
1407
1413
1419
1434
1447
1462
1475
1490
1503
1518
1531
1532
1540
1541
1551
1552
1567
1568
1578
1579
1590
1591
1593 self.assertRaises(TypeError, self.etree.dump, None)
1594
1607
1620
1641
1650
1659
1668
1677
1679 XML = self.etree.XML
1680
1681 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1682 result = []
1683 for el in root.iterchildren(tag=['two', 'three']):
1684 result.append(el.text)
1685 self.assertEquals(['Two', 'Bla', None], result)
1686
1695
1716
1731
1733 Element = self.etree.Element
1734 SubElement = self.etree.SubElement
1735
1736 a = Element('a')
1737 b = SubElement(a, 'b')
1738 c = SubElement(a, 'c')
1739 d = SubElement(b, 'd')
1740 self.assertEquals(
1741 [b, a],
1742 list(d.iterancestors(tag=('a', 'b'))))
1743 self.assertEquals(
1744 [],
1745 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
1746 self.assertEquals(
1747 [],
1748 list(d.iterancestors(tag=('d', 'x'))))
1749 self.assertEquals(
1750 [b, a],
1751 list(d.iterancestors(tag=('b', '*'))))
1752 self.assertEquals(
1753 [b],
1754 list(d.iterancestors(tag=('b', 'c'))))
1755
1772
1793
1795 Element = self.etree.Element
1796 SubElement = self.etree.SubElement
1797
1798 a = Element('a')
1799 b = SubElement(a, 'b')
1800 c = SubElement(a, 'c')
1801 d = SubElement(b, 'd')
1802 e = SubElement(c, 'e')
1803
1804 self.assertEquals(
1805 [b, e],
1806 list(a.iterdescendants(tag=('a', 'b', 'e'))))
1807 a2 = SubElement(e, 'a')
1808 self.assertEquals(
1809 [b, a2],
1810 list(a.iterdescendants(tag=('a', 'b'))))
1811 self.assertEquals(
1812 [],
1813 list(c.iterdescendants(tag=('x', 'y', 'z'))))
1814 self.assertEquals(
1815 [b, d, c, e, a2],
1816 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
1817
1835
1852
1870
1894
1896 Element = self.etree.Element
1897 SubElement = self.etree.SubElement
1898
1899 a = Element('a')
1900 b = SubElement(a, 'b')
1901 c = SubElement(a, 'c')
1902 d = SubElement(b, 'd')
1903 self.assertEquals(
1904 [],
1905 list(a.itersiblings(tag='XXX')))
1906 self.assertEquals(
1907 [c],
1908 list(b.itersiblings(tag='c')))
1909 self.assertEquals(
1910 [c],
1911 list(b.itersiblings(tag='*')))
1912 self.assertEquals(
1913 [b],
1914 list(c.itersiblings(preceding=True, tag='b')))
1915 self.assertEquals(
1916 [],
1917 list(c.itersiblings(preceding=True, tag='c')))
1918
1920 Element = self.etree.Element
1921 SubElement = self.etree.SubElement
1922
1923 a = Element('a')
1924 b = SubElement(a, 'b')
1925 c = SubElement(a, 'c')
1926 d = SubElement(b, 'd')
1927 e = SubElement(a, 'e')
1928 self.assertEquals(
1929 [],
1930 list(a.itersiblings(tag=('XXX', 'YYY'))))
1931 self.assertEquals(
1932 [c, e],
1933 list(b.itersiblings(tag=('c', 'd', 'e'))))
1934 self.assertEquals(
1935 [b],
1936 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
1937 self.assertEquals(
1938 [c, b],
1939 list(e.itersiblings(preceding=True, tag=('c', '*'))))
1940
1942 parseid = self.etree.parseid
1943 XML = self.etree.XML
1944 xml_text = _bytes('''
1945 <!DOCTYPE document [
1946 <!ELEMENT document (h1,p)*>
1947 <!ELEMENT h1 (#PCDATA)>
1948 <!ATTLIST h1 myid ID #REQUIRED>
1949 <!ELEMENT p (#PCDATA)>
1950 <!ATTLIST p someid ID #REQUIRED>
1951 ]>
1952 <document>
1953 <h1 myid="chapter1">...</h1>
1954 <p id="note1" class="note">...</p>
1955 <p>Regular paragraph.</p>
1956 <p xml:id="xmlid">XML:ID paragraph.</p>
1957 <p someid="warn1" class="warning">...</p>
1958 </document>
1959 ''')
1960
1961 tree, dic = parseid(BytesIO(xml_text))
1962 root = tree.getroot()
1963 root2 = XML(xml_text)
1964 self.assertEquals(self._writeElement(root),
1965 self._writeElement(root2))
1966 expected = {
1967 "chapter1" : root[0],
1968 "xmlid" : root[3],
1969 "warn1" : root[4]
1970 }
1971 self.assert_("chapter1" in dic)
1972 self.assert_("warn1" in dic)
1973 self.assert_("xmlid" in dic)
1974 self._checkIDDict(dic, expected)
1975
1977 XMLDTDID = self.etree.XMLDTDID
1978 XML = self.etree.XML
1979 xml_text = _bytes('''
1980 <!DOCTYPE document [
1981 <!ELEMENT document (h1,p)*>
1982 <!ELEMENT h1 (#PCDATA)>
1983 <!ATTLIST h1 myid ID #REQUIRED>
1984 <!ELEMENT p (#PCDATA)>
1985 <!ATTLIST p someid ID #REQUIRED>
1986 ]>
1987 <document>
1988 <h1 myid="chapter1">...</h1>
1989 <p id="note1" class="note">...</p>
1990 <p>Regular paragraph.</p>
1991 <p xml:id="xmlid">XML:ID paragraph.</p>
1992 <p someid="warn1" class="warning">...</p>
1993 </document>
1994 ''')
1995
1996 root, dic = XMLDTDID(xml_text)
1997 root2 = XML(xml_text)
1998 self.assertEquals(self._writeElement(root),
1999 self._writeElement(root2))
2000 expected = {
2001 "chapter1" : root[0],
2002 "xmlid" : root[3],
2003 "warn1" : root[4]
2004 }
2005 self.assert_("chapter1" in dic)
2006 self.assert_("warn1" in dic)
2007 self.assert_("xmlid" in dic)
2008 self._checkIDDict(dic, expected)
2009
2011 XMLDTDID = self.etree.XMLDTDID
2012 XML = self.etree.XML
2013 xml_text = _bytes('''
2014 <document>
2015 <h1 myid="chapter1">...</h1>
2016 <p id="note1" class="note">...</p>
2017 <p>Regular paragraph.</p>
2018 <p someid="warn1" class="warning">...</p>
2019 </document>
2020 ''')
2021
2022 root, dic = XMLDTDID(xml_text)
2023 root2 = XML(xml_text)
2024 self.assertEquals(self._writeElement(root),
2025 self._writeElement(root2))
2026 expected = {}
2027 self._checkIDDict(dic, expected)
2028
2030 self.assertEquals(len(dic),
2031 len(expected))
2032 self.assertEquals(sorted(dic.items()),
2033 sorted(expected.items()))
2034 if sys.version_info < (3,):
2035 self.assertEquals(sorted(dic.iteritems()),
2036 sorted(expected.iteritems()))
2037 self.assertEquals(sorted(dic.keys()),
2038 sorted(expected.keys()))
2039 if sys.version_info < (3,):
2040 self.assertEquals(sorted(dic.iterkeys()),
2041 sorted(expected.iterkeys()))
2042 if sys.version_info < (3,):
2043 self.assertEquals(sorted(dic.values()),
2044 sorted(expected.values()))
2045 self.assertEquals(sorted(dic.itervalues()),
2046 sorted(expected.itervalues()))
2047
2049 etree = self.etree
2050
2051 r = {'foo': 'http://ns.infrae.com/foo'}
2052 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2053 self.assertEquals(
2054 'foo',
2055 e.prefix)
2056 self.assertEquals(
2057 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2058 self._writeElement(e))
2059
2061 etree = self.etree
2062
2063 r = {None: 'http://ns.infrae.com/foo'}
2064 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2065 self.assertEquals(
2066 None,
2067 e.prefix)
2068 self.assertEquals(
2069 '{http://ns.infrae.com/foo}bar',
2070 e.tag)
2071 self.assertEquals(
2072 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2073 self._writeElement(e))
2074
2076 etree = self.etree
2077
2078 r = {None: 'http://ns.infrae.com/foo',
2079 'hoi': 'http://ns.infrae.com/hoi'}
2080 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2081 e.set('{http://ns.infrae.com/hoi}test', 'value')
2082 self.assertEquals(
2083 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2084 self._writeElement(e))
2085
2087 etree = self.etree
2088 r = {None: 'http://ns.infrae.com/foo',
2089 'hoi': 'http://ns.infrae.com/hoi'}
2090 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2091 tree = etree.ElementTree(element=e)
2092 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2093 self.assertEquals(
2094 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2095 self._writeElement(e))
2096
2098 etree = self.etree
2099
2100 r = {None: 'http://ns.infrae.com/foo'}
2101 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2102 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2103
2104 e1.append(e2)
2105
2106 self.assertEquals(
2107 None,
2108 e1.prefix)
2109 self.assertEquals(
2110 None,
2111 e1[0].prefix)
2112 self.assertEquals(
2113 '{http://ns.infrae.com/foo}bar',
2114 e1.tag)
2115 self.assertEquals(
2116 '{http://ns.infrae.com/foo}bar',
2117 e1[0].tag)
2118
2120 etree = self.etree
2121
2122 r = {None: 'http://ns.infrae.com/BAR'}
2123 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2124 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2125
2126 e1.append(e2)
2127
2128 self.assertEquals(
2129 None,
2130 e1.prefix)
2131 self.assertNotEquals(
2132 None,
2133 e2.prefix)
2134 self.assertEquals(
2135 '{http://ns.infrae.com/BAR}bar',
2136 e1.tag)
2137 self.assertEquals(
2138 '{http://ns.infrae.com/foo}bar',
2139 e2.tag)
2140
2142 ns_href = "http://a.b.c"
2143 one = self.etree.fromstring(
2144 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2145 baz = one[0][0]
2146
2147 two = self.etree.fromstring(
2148 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2149 two.append(baz)
2150 del one
2151
2152 self.assertEquals('{%s}baz' % ns_href, baz.tag)
2153 self.assertEquals(
2154 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2155 self.etree.tostring(two))
2156
2166
2168 etree = self.etree
2169
2170 r = {None: 'http://ns.infrae.com/foo',
2171 'hoi': 'http://ns.infrae.com/hoi'}
2172 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2173 self.assertEquals(
2174 r,
2175 e.nsmap)
2176
2178 etree = self.etree
2179
2180 re = {None: 'http://ns.infrae.com/foo',
2181 'hoi': 'http://ns.infrae.com/hoi'}
2182 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2183
2184 rs = {None: 'http://ns.infrae.com/honk',
2185 'top': 'http://ns.infrae.com/top'}
2186 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2187
2188 r = re.copy()
2189 r.update(rs)
2190 self.assertEquals(re, e.nsmap)
2191 self.assertEquals(r, s.nsmap)
2192
2194 etree = self.etree
2195 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2196 self.assertEquals({'hha': None}, el.nsmap)
2197
2199 Element = self.etree.Element
2200 SubElement = self.etree.SubElement
2201
2202 a = Element('a')
2203 b = SubElement(a, 'b')
2204 c = SubElement(a, 'c')
2205 d = SubElement(b, 'd')
2206 e = SubElement(c, 'e')
2207 f = SubElement(c, 'f')
2208
2209 self.assertEquals(
2210 [a, b],
2211 list(a.getiterator('a', 'b')))
2212 self.assertEquals(
2213 [],
2214 list(a.getiterator('x', 'y')))
2215 self.assertEquals(
2216 [a, f],
2217 list(a.getiterator('f', 'a')))
2218 self.assertEquals(
2219 [c, e, f],
2220 list(c.getiterator('c', '*', 'a')))
2221 self.assertEquals(
2222 [],
2223 list(a.getiterator( (), () )))
2224
2226 Element = self.etree.Element
2227 SubElement = self.etree.SubElement
2228
2229 a = Element('a')
2230 b = SubElement(a, 'b')
2231 c = SubElement(a, 'c')
2232 d = SubElement(b, 'd')
2233 e = SubElement(c, 'e')
2234 f = SubElement(c, 'f')
2235
2236 self.assertEquals(
2237 [a, b],
2238 list(a.getiterator( ('a', 'b') )))
2239 self.assertEquals(
2240 [],
2241 list(a.getiterator( ('x', 'y') )))
2242 self.assertEquals(
2243 [a, f],
2244 list(a.getiterator( ('f', 'a') )))
2245 self.assertEquals(
2246 [c, e, f],
2247 list(c.getiterator( ('c', '*', 'a') )))
2248 self.assertEquals(
2249 [],
2250 list(a.getiterator( () )))
2251
2253 Element = self.etree.Element
2254 SubElement = self.etree.SubElement
2255
2256 a = Element('{a}a')
2257 b = SubElement(a, '{a}b')
2258 c = SubElement(a, '{a}c')
2259 d = SubElement(b, '{b}d')
2260 e = SubElement(c, '{a}e')
2261 f = SubElement(c, '{b}f')
2262 g = SubElement(c, 'g')
2263
2264 self.assertEquals(
2265 [a],
2266 list(a.getiterator('{a}a')))
2267 self.assertEquals(
2268 [],
2269 list(a.getiterator('{b}a')))
2270 self.assertEquals(
2271 [],
2272 list(a.getiterator('a')))
2273 self.assertEquals(
2274 [a,b,d,c,e,f,g],
2275 list(a.getiterator('*')))
2276 self.assertEquals(
2277 [f],
2278 list(c.getiterator('{b}*')))
2279 self.assertEquals(
2280 [d, f],
2281 list(a.getiterator('{b}*')))
2282 self.assertEquals(
2283 [g],
2284 list(a.getiterator('g')))
2285 self.assertEquals(
2286 [g],
2287 list(a.getiterator('{}g')))
2288 self.assertEquals(
2289 [g],
2290 list(a.getiterator('{}*')))
2291
2293 Element = self.etree.Element
2294 SubElement = self.etree.SubElement
2295
2296 a = Element('{a}a')
2297 b = SubElement(a, '{nsA}b')
2298 c = SubElement(b, '{nsB}b')
2299 d = SubElement(a, 'b')
2300 e = SubElement(a, '{nsA}e')
2301 f = SubElement(e, '{nsB}e')
2302 g = SubElement(e, 'e')
2303
2304 self.assertEquals(
2305 [b, c, d],
2306 list(a.getiterator('{*}b')))
2307 self.assertEquals(
2308 [e, f, g],
2309 list(a.getiterator('{*}e')))
2310 self.assertEquals(
2311 [a, b, c, d, e, f, g],
2312 list(a.getiterator('{*}*')))
2313
2338
2354
2371
2378
2385
2394
2396 XML = self.etree.XML
2397 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
2398 self.assertEquals(len(root.findall(".//{X}b")), 2)
2399 self.assertEquals(len(root.findall(".//{X}*")), 2)
2400 self.assertEquals(len(root.findall(".//b")), 3)
2401
2408
2410 etree = self.etree
2411 e = etree.Element('foo')
2412 for i in range(10):
2413 etree.SubElement(e, 'a%s' % i)
2414 for i in range(10):
2415 self.assertEquals(
2416 i,
2417 e.index(e[i]))
2418 self.assertEquals(
2419 3, e.index(e[3], 3))
2420 self.assertRaises(
2421 ValueError, e.index, e[3], 4)
2422 self.assertRaises(
2423 ValueError, e.index, e[3], 0, 2)
2424 self.assertRaises(
2425 ValueError, e.index, e[8], 0, -3)
2426 self.assertRaises(
2427 ValueError, e.index, e[8], -5, -3)
2428 self.assertEquals(
2429 8, e.index(e[8], 0, -1))
2430 self.assertEquals(
2431 8, e.index(e[8], -12, -1))
2432 self.assertEquals(
2433 0, e.index(e[0], -12, -1))
2434
2436 etree = self.etree
2437 e = etree.Element('foo')
2438 for i in range(10):
2439 el = etree.SubElement(e, 'a%s' % i)
2440 el.text = "text%d" % i
2441 el.tail = "tail%d" % i
2442
2443 child0 = e[0]
2444 child1 = e[1]
2445 child2 = e[2]
2446
2447 e.replace(e[0], e[1])
2448 self.assertEquals(
2449 9, len(e))
2450 self.assertEquals(
2451 child1, e[0])
2452 self.assertEquals(
2453 child1.text, "text1")
2454 self.assertEquals(
2455 child1.tail, "tail1")
2456 self.assertEquals(
2457 child0.tail, "tail0")
2458 self.assertEquals(
2459 child2, e[1])
2460
2461 e.replace(e[-1], e[0])
2462 self.assertEquals(
2463 child1, e[-1])
2464 self.assertEquals(
2465 child1.text, "text1")
2466 self.assertEquals(
2467 child1.tail, "tail1")
2468 self.assertEquals(
2469 child2, e[0])
2470
2472 etree = self.etree
2473 e = etree.Element('foo')
2474 for i in range(10):
2475 etree.SubElement(e, 'a%s' % i)
2476
2477 new_element = etree.Element("test")
2478 new_element.text = "TESTTEXT"
2479 new_element.tail = "TESTTAIL"
2480 child1 = e[1]
2481 e.replace(e[0], new_element)
2482 self.assertEquals(
2483 new_element, e[0])
2484 self.assertEquals(
2485 "TESTTEXT",
2486 e[0].text)
2487 self.assertEquals(
2488 "TESTTAIL",
2489 e[0].tail)
2490 self.assertEquals(
2491 child1, e[1])
2492
2508
2526
2544
2562
2564 Element = self.etree.Element
2565 SubElement = self.etree.SubElement
2566 try:
2567 slice
2568 except NameError:
2569 print("slice() not found")
2570 return
2571
2572 a = Element('a')
2573 b = SubElement(a, 'b')
2574 c = SubElement(a, 'c')
2575 d = SubElement(a, 'd')
2576 e = SubElement(a, 'e')
2577
2578 x = Element('x')
2579 y = Element('y')
2580 z = Element('z')
2581
2582 self.assertRaises(
2583 ValueError,
2584 operator.setitem, a, slice(1,None,2), [x, y, z])
2585
2586 self.assertEquals(
2587 [b, c, d, e],
2588 list(a))
2589
2602
2610
2619
2629
2639
2645
2653
2659
2666
2672
2674 etree = self.etree
2675 xml_header = '<?xml version="1.0" encoding="ascii"?>'
2676 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2677 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2678 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
2679
2680 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
2681
2682 tree = etree.parse(BytesIO(xml))
2683 docinfo = tree.docinfo
2684 self.assertEquals(docinfo.encoding, "ascii")
2685 self.assertEquals(docinfo.xml_version, "1.0")
2686 self.assertEquals(docinfo.public_id, pub_id)
2687 self.assertEquals(docinfo.system_url, sys_id)
2688 self.assertEquals(docinfo.root_name, 'html')
2689 self.assertEquals(docinfo.doctype, doctype_string)
2690
2706
2718
2730
2736
2738 etree = self.etree
2739 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2740 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2741 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
2742
2743 xml = _bytes('<!DOCTYPE root>\n<root/>')
2744 tree = etree.parse(BytesIO(xml))
2745 self.assertEquals(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
2746 etree.tostring(tree, doctype=doctype_string))
2747
2749 etree = self.etree
2750 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2751 self.assertEquals(root.base, "http://no/such/url")
2752 self.assertEquals(
2753 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2754 root.base = "https://secret/url"
2755 self.assertEquals(root.base, "https://secret/url")
2756 self.assertEquals(
2757 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2758 "https://secret/url")
2759
2761 etree = self.etree
2762 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2763 self.assertEquals(root.base, "http://no/such/url")
2764 self.assertEquals(
2765 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2766 root.set('{http://www.w3.org/XML/1998/namespace}base',
2767 "https://secret/url")
2768 self.assertEquals(root.base, "https://secret/url")
2769 self.assertEquals(
2770 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2771 "https://secret/url")
2772
2778
2783
2790
2804
2806 Element = self.etree.Element
2807
2808 a = Element('a')
2809 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
2810 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
2811
2812 self.assertRaises(ValueError, Element, 'ha\0ho')
2813
2815 Element = self.etree.Element
2816
2817 a = Element('a')
2818 self.assertRaises(ValueError, setattr, a, "text",
2819 _str('ha\0ho'))
2820 self.assertRaises(ValueError, setattr, a, "tail",
2821 _str('ha\0ho'))
2822
2823 self.assertRaises(ValueError, Element,
2824 _str('ha\0ho'))
2825
2827 Element = self.etree.Element
2828
2829 a = Element('a')
2830 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
2831 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
2832
2833 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
2834 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
2835
2836 self.assertRaises(ValueError, Element, 'ha\x07ho')
2837 self.assertRaises(ValueError, Element, 'ha\x02ho')
2838
2840 Element = self.etree.Element
2841
2842 a = Element('a')
2843 self.assertRaises(ValueError, setattr, a, "text",
2844 _str('ha\x07ho'))
2845 self.assertRaises(ValueError, setattr, a, "text",
2846 _str('ha\x02ho'))
2847
2848 self.assertRaises(ValueError, setattr, a, "tail",
2849 _str('ha\x07ho'))
2850 self.assertRaises(ValueError, setattr, a, "tail",
2851 _str('ha\x02ho'))
2852
2853 self.assertRaises(ValueError, Element,
2854 _str('ha\x07ho'))
2855 self.assertRaises(ValueError, Element,
2856 _str('ha\x02ho'))
2857
2859 Element = self.etree.Element
2860
2861 a = Element('a')
2862 self.assertRaises(ValueError, setattr, a, "text",
2863 _str('ha\u1234\x07ho'))
2864 self.assertRaises(ValueError, setattr, a, "text",
2865 _str('ha\u1234\x02ho'))
2866
2867 self.assertRaises(ValueError, setattr, a, "tail",
2868 _str('ha\u1234\x07ho'))
2869 self.assertRaises(ValueError, setattr, a, "tail",
2870 _str('ha\u1234\x02ho'))
2871
2872 self.assertRaises(ValueError, Element,
2873 _str('ha\u1234\x07ho'))
2874 self.assertRaises(ValueError, Element,
2875 _str('ha\u1234\x02ho'))
2876
2890
2895
2913
2933
2955
2957 tostring = self.etree.tostring
2958 XML = self.etree.XML
2959 ElementTree = self.etree.ElementTree
2960
2961 root = XML(_bytes("<root/>"))
2962
2963 tree = ElementTree(root)
2964 self.assertEquals(None, tree.docinfo.standalone)
2965
2966 result = tostring(root, xml_declaration=True, encoding="ASCII")
2967 self.assertEquals(result, _bytes(
2968 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
2969
2970 result = tostring(root, xml_declaration=True, encoding="ASCII",
2971 standalone=True)
2972 self.assertEquals(result, _bytes(
2973 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
2974
2975 tree = ElementTree(XML(result))
2976 self.assertEquals(True, tree.docinfo.standalone)
2977
2978 result = tostring(root, xml_declaration=True, encoding="ASCII",
2979 standalone=False)
2980 self.assertEquals(result, _bytes(
2981 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
2982
2983 tree = ElementTree(XML(result))
2984 self.assertEquals(False, tree.docinfo.standalone)
2985
3005
3007 tostring = self.etree.tostring
3008 Element = self.etree.Element
3009 SubElement = self.etree.SubElement
3010
3011 a = Element('a')
3012 a.text = "A"
3013 a.tail = "tail"
3014 b = SubElement(a, 'b')
3015 b.text = "B"
3016 b.tail = _str("Søk på nettet")
3017 c = SubElement(a, 'c')
3018 c.text = "C"
3019
3020 result = tostring(a, method="text", encoding="UTF-16")
3021
3022 self.assertEquals(_str('ABSøk på nettetCtail').encode("UTF-16"),
3023 result)
3024
3026 tostring = self.etree.tostring
3027 Element = self.etree.Element
3028 SubElement = self.etree.SubElement
3029
3030 a = Element('a')
3031 a.text = _str('Søk på nettetA')
3032 a.tail = "tail"
3033 b = SubElement(a, 'b')
3034 b.text = "B"
3035 b.tail = _str('Søk på nettetB')
3036 c = SubElement(a, 'c')
3037 c.text = "C"
3038
3039 self.assertRaises(UnicodeEncodeError,
3040 tostring, a, method="text")
3041
3042 self.assertEquals(
3043 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3044 tostring(a, encoding="UTF-8", method="text"))
3045
3058
3074
3078
3093
3111
3124
3126 tostring = self.etree.tostring
3127 Element = self.etree.Element
3128 SubElement = self.etree.SubElement
3129
3130 a = Element('a')
3131 b = SubElement(a, 'b')
3132 c = SubElement(a, 'c')
3133 d = SubElement(c, 'd')
3134 self.assert_(isinstance(tostring(b, encoding=_unicode), _unicode))
3135 self.assert_(isinstance(tostring(c, encoding=_unicode), _unicode))
3136 self.assertEquals(_bytes('<b></b>'),
3137 canonicalize(tostring(b, encoding=_unicode)))
3138 self.assertEquals(_bytes('<c><d></d></c>'),
3139 canonicalize(tostring(c, encoding=_unicode)))
3140
3145
3160
3162 tostring = self.etree.tostring
3163 Element = self.etree.Element
3164 SubElement = self.etree.SubElement
3165
3166 a = Element('a')
3167 b = SubElement(a, 'b')
3168 c = SubElement(a, 'c')
3169
3170 result = tostring(a, encoding=_unicode)
3171 self.assertEquals(result, "<a><b/><c/></a>")
3172
3173 result = tostring(a, encoding=_unicode, pretty_print=False)
3174 self.assertEquals(result, "<a><b/><c/></a>")
3175
3176 result = tostring(a, encoding=_unicode, pretty_print=True)
3177 self.assertEquals(result, "<a>\n <b/>\n <c/>\n</a>\n")
3178
3179
3180
3181 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3192
3193
3196 filename = fileInTestDir('test_broken.xml')
3197 root = etree.XML(_bytes('''\
3198 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
3199 <xi:include href="%s" parse="text"/>
3200 </doc>
3201 ''' % filename))
3202 old_text = root.text
3203 content = read_file(filename)
3204 old_tail = root[0].tail
3205
3206 self.include( etree.ElementTree(root) )
3207 self.assertEquals(old_text + content + old_tail,
3208 root.text)
3209
3221
3223 class res(etree.Resolver):
3224 include_text = read_file(fileInTestDir('test.xml'))
3225 called = {}
3226 def resolve(self, url, id, context):
3227 if url.endswith(".dtd"):
3228 self.called["dtd"] = True
3229 return self.resolve_filename(
3230 fileInTestDir('test.dtd'), context)
3231 elif url.endswith("test_xinclude.xml"):
3232 self.called["input"] = True
3233 return None
3234 else:
3235 self.called["include"] = True
3236 return self.resolve_string(self.include_text, context)
3237
3238 res_instance = res()
3239 parser = etree.XMLParser(load_dtd = True)
3240 parser.resolvers.add(res_instance)
3241
3242 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3243 parser = parser)
3244
3245 self.include(tree)
3246
3247 called = list(res_instance.called.items())
3248 called.sort()
3249 self.assertEquals(
3250 [("dtd", True), ("include", True), ("input", True)],
3251 called)
3252
3256
3257
3262
3263
3266 tree = self.parse(_bytes('<a><b/></a>'))
3267 f = BytesIO()
3268 tree.write_c14n(f)
3269 s = f.getvalue()
3270 self.assertEquals(_bytes('<a><b></b></a>'),
3271 s)
3272
3274 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3275 f = BytesIO()
3276 tree.write_c14n(f, compression=9)
3277 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3278 try:
3279 s = gzfile.read()
3280 finally:
3281 gzfile.close()
3282 self.assertEquals(_bytes('<a>'+'<b></b>'*200+'</a>'),
3283 s)
3284
3296
3312
3330
3342
3354
3356 tree = self.parse(_bytes(
3357 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3358 f = BytesIO()
3359 tree.write_c14n(f)
3360 s = f.getvalue()
3361 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3362 s)
3363 f = BytesIO()
3364 tree.write_c14n(f, exclusive=False)
3365 s = f.getvalue()
3366 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3367 s)
3368 f = BytesIO()
3369 tree.write_c14n(f, exclusive=True)
3370 s = f.getvalue()
3371 self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3372 s)
3373
3374 f = BytesIO()
3375 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
3376 s = f.getvalue()
3377 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
3378 s)
3379
3381 tree = self.parse(_bytes(
3382 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3383 s = etree.tostring(tree, method='c14n')
3384 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3385 s)
3386 s = etree.tostring(tree, method='c14n', exclusive=False)
3387 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3388 s)
3389 s = etree.tostring(tree, method='c14n', exclusive=True)
3390 self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3391 s)
3392
3393 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3394 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
3395 s)
3396
3398 tree = self.parse(_bytes(
3399 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3400 s = etree.tostring(tree.getroot(), method='c14n')
3401 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3402 s)
3403 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
3404 self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3405 s)
3406 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
3407 self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3408 s)
3409
3410 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
3411 self.assertEquals(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3412 s)
3413 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
3414 self.assertEquals(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
3415 s)
3416
3417 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3418 self.assertEquals(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3419 s)
3420
3422 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
3423 tree = self.parse(_bytes(
3424 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3425
3426 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
3427 self.assertEquals(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3428 s)
3429
3430
3433 tree = self.parse(_bytes('<a><b/></a>'))
3434 f = BytesIO()
3435 tree.write(f)
3436 s = f.getvalue()
3437 self.assertEquals(_bytes('<a><b/></a>'),
3438 s)
3439
3441 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3442 f = BytesIO()
3443 tree.write(f, compression=9)
3444 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3445 try:
3446 s = gzfile.read()
3447 finally:
3448 gzfile.close()
3449 self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3450 s)
3451
3453 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3454 f = BytesIO()
3455 tree.write(f, compression=0)
3456 s0 = f.getvalue()
3457
3458 f = BytesIO()
3459 tree.write(f)
3460 self.assertEquals(f.getvalue(), s0)
3461
3462 f = BytesIO()
3463 tree.write(f, compression=1)
3464 s = f.getvalue()
3465 self.assert_(len(s) <= len(s0))
3466 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3467 try:
3468 s1 = gzfile.read()
3469 finally:
3470 gzfile.close()
3471
3472 f = BytesIO()
3473 tree.write(f, compression=9)
3474 s = f.getvalue()
3475 self.assert_(len(s) <= len(s0))
3476 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3477 try:
3478 s9 = gzfile.read()
3479 finally:
3480 gzfile.close()
3481
3482 self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3483 s0)
3484 self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3485 s1)
3486 self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3487 s9)
3488
3500
3516
3528
3541
3543 etree = etree
3544
3566
3568 """This can't really be tested as long as there isn't a way to
3569 reset the logging setup ...
3570 """
3571 parse = self.etree.parse
3572
3573 messages = []
3574 class Logger(self.etree.PyErrorLog):
3575 def log(self, entry, message, *args):
3576 messages.append(message)
3577
3578 self.etree.use_global_python_log(Logger())
3579 f = BytesIO('<a><b></c></b></a>')
3580 try:
3581 parse(f)
3582 except SyntaxError:
3583 pass
3584 f.close()
3585
3586 self.assert_([ message for message in messages
3587 if 'mismatch' in message ])
3588 self.assert_([ message for message in messages
3589 if ':PARSER:' in message])
3590 self.assert_([ message for message in messages
3591 if ':ERR_TAG_NAME_MISMATCH:' in message ])
3592 self.assert_([ message for message in messages
3593 if ':1:15:' in message ])
3594
3614
3615 if __name__ == '__main__':
3616 print('to test use test.py %s' % __file__)
3617