Package lxml :: Package tests :: Module selftest2
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.selftest2

  1  # $Id: selftest.py 2213 2005-01-11 18:49:47Z fredrik $ 
  2  # elementtree selftest program 
  3   
  4  # this test script uses Python's "doctest" module to check that the 
  5  # *test script* works as expected. 
  6   
  7  import sys 
  8   
  9  try: 
 10      from StringIO import StringIO 
 11      BytesIO = StringIO 
 12  except ImportError: 
 13      from io import BytesIO, StringIO 
 14   
 15  from lxml import etree as ElementTree 
 16   
17 -def stdout():
18 if sys.version_info[0] < 3: 19 return sys.stdout 20 class bytes_stdout(object): 21 def write(self, data): 22 if isinstance(data, bytes): 23 data = data.decode('ISO8859-1') 24 sys.stdout.write(data)
25 return bytes_stdout() 26
27 -def unserialize(text):
28 file = StringIO(text) 29 tree = ElementTree.parse(file) 30 return tree.getroot()
31
32 -def serialize(elem, encoding=None):
33 file = BytesIO() 34 tree = ElementTree.ElementTree(elem) 35 if encoding: 36 tree.write(file, encoding=encoding) 37 else: 38 tree.write(file) 39 result = file.getvalue() 40 if sys.version_info[0] >= 3: 41 result = result.decode('ISO8859-1') 42 result = result.replace(' />', '/>') 43 if result[-1:] == '\n': 44 result = result[:-1] 45 return result
46
47 -def summarize(elem):
48 return elem.tag
49
50 -def summarize_list(seq):
51 return list(map(summarize, seq))
52 53 SAMPLE_XML = unserialize(""" 54 <body> 55 <tag>text</tag> 56 <tag /> 57 <section> 58 <tag>subtext</tag> 59 </section> 60 </body> 61 """) 62 63 SAMPLE_XML_NS = unserialize(""" 64 <body xmlns="http://effbot.org/ns"> 65 <tag>text</tag> 66 <tag /> 67 <section> 68 <tag>subtext</tag> 69 </section> 70 </body> 71 """) 72 73 # interface tests 74
75 -def check_string(string):
76 len(string) 77 for char in string: 78 if len(char) != 1: 79 print("expected one-character string, got %r" % char) 80 new_string = string + "" 81 new_string = string + " " 82 string[:0]
83
84 -def check_mapping(mapping):
85 len(mapping) 86 keys = mapping.keys() 87 items = mapping.items() 88 for key in keys: 89 item = mapping[key] 90 mapping["key"] = "value" 91 if mapping["key"] != "value": 92 print("expected value string, got %r" % mapping["key"])
93
94 -def check_element(element):
95 if not hasattr(element, "tag"): 96 print("no tag member") 97 if not hasattr(element, "attrib"): 98 print("no attrib member") 99 if not hasattr(element, "text"): 100 print("no text member") 101 if not hasattr(element, "tail"): 102 print("no tail member") 103 check_string(element.tag) 104 check_mapping(element.attrib) 105 if element.text != None: 106 check_string(element.text) 107 if element.tail != None: 108 check_string(element.tail)
109
110 -def check_element_tree(tree):
111 check_element(tree.getroot())
112
113 -def element():
114 """ 115 Test element tree interface. 116 117 >>> element = ElementTree.Element("tag") 118 >>> check_element(element) 119 >>> tree = ElementTree.ElementTree(element) 120 >>> check_element_tree(tree) 121 """
122
123 -def parsefile():
124 """ 125 Test parsing from file. Note that we're opening the files in 126 here; by default, the 'parse' function opens the file in binary 127 mode, and doctest doesn't filter out carriage returns. 128 129 >>> file = open("samples/simple.xml", "rb") 130 >>> tree = ElementTree.parse(file) 131 >>> file.close() 132 >>> tree.write(stdout()) 133 <root> 134 <element key="value">text</element> 135 <element>text</element>tail 136 <empty-element/> 137 </root> 138 >>> file = open("samples/simple-ns.xml", "rb") 139 >>> tree = ElementTree.parse(file) 140 >>> file.close() 141 >>> tree.write(stdout()) 142 <root xmlns="http://namespace/"> 143 <element key="value">text</element> 144 <element>text</element>tail 145 <empty-element/> 146 </root> 147 """
148
149 -def writefile():
150 """ 151 >>> elem = ElementTree.Element("tag") 152 >>> elem.text = "text" 153 >>> serialize(elem) 154 '<tag>text</tag>' 155 >>> ElementTree.SubElement(elem, "subtag").text = "subtext" 156 >>> serialize(elem) 157 '<tag>text<subtag>subtext</subtag></tag>' 158 """
159
160 -def encoding():
161 r""" 162 Test encoding issues. 163 164 >>> elem = ElementTree.Element("tag") 165 >>> elem.text = u'abc' 166 >>> serialize(elem) 167 '<tag>abc</tag>' 168 >>> serialize(elem, "utf-8") 169 '<tag>abc</tag>' 170 >>> serialize(elem, "us-ascii") 171 '<tag>abc</tag>' 172 >>> serialize(elem, "iso-8859-1").lower() 173 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>" 174 175 >>> elem.text = "<&\"\'>" 176 >>> serialize(elem) 177 '<tag>&lt;&amp;"\'&gt;</tag>' 178 >>> serialize(elem, "utf-8") 179 '<tag>&lt;&amp;"\'&gt;</tag>' 180 >>> serialize(elem, "us-ascii") # cdata characters 181 '<tag>&lt;&amp;"\'&gt;</tag>' 182 >>> serialize(elem, "iso-8859-1").lower() 183 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>' 184 185 >>> elem.attrib["key"] = "<&\"\'>" 186 >>> elem.text = None 187 >>> serialize(elem) 188 '<tag key="&lt;&amp;&quot;\'&gt;"/>' 189 >>> serialize(elem, "utf-8") 190 '<tag key="&lt;&amp;&quot;\'&gt;"/>' 191 >>> serialize(elem, "us-ascii") 192 '<tag key="&lt;&amp;&quot;\'&gt;"/>' 193 >>> serialize(elem, "iso-8859-1").lower() 194 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;"/>' 195 196 >>> elem.text = u'\xe5\xf6\xf6<>' 197 >>> elem.attrib.clear() 198 >>> serialize(elem) 199 '<tag>&#229;&#246;&#246;&lt;&gt;</tag>' 200 >>> serialize(elem, "utf-8") 201 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>' 202 >>> serialize(elem, "us-ascii") 203 '<tag>&#229;&#246;&#246;&lt;&gt;</tag>' 204 >>> serialize(elem, "iso-8859-1").lower() 205 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>" 206 207 >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>' 208 >>> elem.text = None 209 >>> serialize(elem) 210 '<tag key="&#229;&#246;&#246;&lt;&gt;"/>' 211 >>> serialize(elem, "utf-8") 212 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;"/>' 213 >>> serialize(elem, "us-ascii") 214 '<tag key="&#229;&#246;&#246;&lt;&gt;"/>' 215 >>> serialize(elem, "iso-8859-1").lower() 216 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;"/>' 217 218 """
219 220 if sys.version_info[0] >= 3: 221 encoding.__doc__ = encoding.__doc__.replace("u'", "'") 222
223 -def qname():
224 """ 225 Test QName handling. 226 227 1) decorated tags 228 229 >>> elem = ElementTree.Element("{uri}tag") 230 >>> serialize(elem) # 1.1 231 '<ns0:tag xmlns:ns0="uri"/>' 232 233 ## 2) decorated attributes 234 235 ## >>> elem.attrib["{uri}key"] = "value" 236 ## >>> serialize(elem) # 2.1 237 ## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>' 238 239 """
240
241 -def cdata():
242 """ 243 Test CDATA handling (etc). 244 245 >>> serialize(unserialize("<tag>hello</tag>")) 246 '<tag>hello</tag>' 247 >>> serialize(unserialize("<tag>&#104;&#101;&#108;&#108;&#111;</tag>")) 248 '<tag>hello</tag>' 249 >>> serialize(unserialize("<tag><![CDATA[hello]]></tag>")) 250 '<tag>hello</tag>' 251 252 """
253
254 -def find():
255 """ 256 Test find methods (including xpath syntax). 257 258 >>> elem = SAMPLE_XML 259 >>> elem.find("tag").tag 260 'tag' 261 >>> ElementTree.ElementTree(elem).find("tag").tag 262 'tag' 263 >>> elem.find("section/tag").tag 264 'tag' 265 >>> ElementTree.ElementTree(elem).find("section/tag").tag 266 'tag' 267 >>> elem.findtext("tag") 268 'text' 269 >>> elem.findtext("tog", "default") 270 'default' 271 >>> ElementTree.ElementTree(elem).findtext("tag") 272 'text' 273 >>> elem.findtext("section/tag") 274 'subtext' 275 >>> ElementTree.ElementTree(elem).findtext("section/tag") 276 'subtext' 277 >>> summarize_list(elem.findall("tag")) 278 ['tag', 'tag'] 279 >>> summarize_list(elem.findall("*")) 280 ['tag', 'tag', 'section'] 281 >>> summarize_list(elem.findall(".//tag")) 282 ['tag', 'tag', 'tag'] 283 >>> summarize_list(elem.findall("section/tag")) 284 ['tag'] 285 >>> summarize_list(elem.findall("section//tag")) 286 ['tag'] 287 >>> summarize_list(elem.findall("section/*")) 288 ['tag'] 289 >>> summarize_list(elem.findall("section//*")) 290 ['tag'] 291 >>> summarize_list(elem.findall("section/.//*")) 292 ['tag'] 293 >>> summarize_list(elem.findall("*/*")) 294 ['tag'] 295 >>> summarize_list(elem.findall("*//*")) 296 ['tag'] 297 >>> summarize_list(elem.findall("*/tag")) 298 ['tag'] 299 >>> summarize_list(elem.findall("*/./tag")) 300 ['tag'] 301 >>> summarize_list(elem.findall("./tag")) 302 ['tag', 'tag'] 303 >>> summarize_list(elem.findall(".//tag")) 304 ['tag', 'tag', 'tag'] 305 >>> summarize_list(elem.findall("././tag")) 306 ['tag', 'tag'] 307 >>> summarize_list(ElementTree.ElementTree(elem).findall("/tag")) 308 ['tag', 'tag'] 309 >>> summarize_list(ElementTree.ElementTree(elem).findall("./tag")) 310 ['tag', 'tag'] 311 >>> elem = SAMPLE_XML_NS 312 >>> summarize_list(elem.findall("tag")) 313 [] 314 >>> summarize_list(elem.findall("{http://effbot.org/ns}tag")) 315 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] 316 >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag")) 317 ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag'] 318 """
319 320 # XXX only deep copying is supported 321
322 -def copy():
323 """ 324 Test copy handling (etc). 325 326 >>> import copy 327 >>> e1 = unserialize("<tag>hello<foo/></tag>") 328 >>> # e2 = copy.copy(e1) 329 >>> e3 = copy.deepcopy(e1) 330 >>> e1.find("foo").tag = "bar" 331 332 >>> serialize(e1).replace(' ', '') 333 '<tag>hello<bar/></tag>' 334 335 ## >>> serialize(e2).replace(' ', '') 336 ## '<tag>hello<bar/></tag>' 337 338 >>> serialize(e3).replace(' ', '') 339 '<tag>hello<foo/></tag>' 340 341 """
342
343 -def attrib():
344 """ 345 Test attribute handling. 346 347 >>> elem = ElementTree.Element("tag") 348 >>> elem.get("key") # 1.1 349 >>> elem.get("key", "default") # 1.2 350 'default' 351 >>> elem.set("key", "value") 352 >>> elem.get("key") # 1.3 353 'value' 354 355 >>> elem = ElementTree.Element("tag", key="value") 356 >>> elem.get("key") # 2.1 357 'value' 358 >>> elem.attrib # 2.2 359 {'key': 'value'} 360 361 >>> elem = ElementTree.Element("tag", {"key": "value"}) 362 >>> elem.get("key") # 3.1 363 'value' 364 >>> elem.attrib # 3.2 365 {'key': 'value'} 366 367 >>> elem = ElementTree.Element("tag", {"key": "other"}, key="value") 368 >>> elem.get("key") # 4.1 369 'value' 370 >>> elem.attrib # 4.2 371 {'key': 'value'} 372 373 """
374
375 -def makeelement():
376 """ 377 Test makeelement handling. 378 379 >>> elem = ElementTree.Element("tag") 380 >>> subelem = elem.makeelement("subtag", {"key": "value"}) 381 >>> elem.append(subelem) 382 >>> serialize(elem) 383 '<tag><subtag key="value"/></tag>' 384 385 >>> elem.clear() 386 >>> serialize(elem) 387 '<tag/>' 388 >>> elem.append(subelem) 389 >>> serialize(elem) 390 '<tag><subtag key="value"/></tag>' 391 392 """
393 394 ## def observer(): 395 ## """ 396 ## Test observers. 397 398 ## >>> def observer(action, elem): 399 ## ... print("%s %s" % (action, elem.tag)) 400 ## >>> builder = ElementTree.TreeBuilder() 401 ## >>> builder.addobserver(observer) 402 ## >>> parser = ElementTree.XMLParser(builder) 403 ## >>> file = open("samples/simple.xml", "rb") 404 ## >>> parser.feed(file.read()) 405 ## start root 406 ## start element 407 ## end element 408 ## start element 409 ## end element 410 ## start empty-element 411 ## end empty-element 412 ## end root 413 ## >>> file.close() 414 415 ## """ 416 417 ENTITY_XML = """\ 418 <!DOCTYPE points [ 419 <!ENTITY % user-entities SYSTEM 'user-entities.xml'> 420 %user-entities; 421 ]> 422 <document>&entity;</document> 423 """ 424 425 ## def entity(): 426 ## """ 427 ## Test entity handling. 428 429 ## 1) bad entities 430 431 ## >>> ElementTree.XML("<document>&entity;</document>") 432 ## Traceback (most recent call last): 433 ## SyntaxError: undefined entity: line 1, column 10 434 435 ## 2) custom entity 436 437 ## >>> parser = ElementTree.XMLParser() 438 ## >>> parser.entity["entity"] = "text" 439 ## >>> parser.feed(ENTITY_XML) 440 ## >>> root = parser.close() 441 ## >>> serialize(root) 442 ## '<document>text</document>' 443 444 ## """ 445 446 if __name__ == "__main__": 447 import doctest, selftest2 448 failed, tested = doctest.testmod(selftest2) 449 print("%d tests ok." % (tested - failed)) 450 if failed > 0: 451 print("%d tests failed. Exiting with non-zero return code." % failed) 452 sys.exit(1) 453