Package lxml :: Package tests :: Module test_io
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_io

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  IO test cases that apply to both etree and ElementTree 
  5  """ 
  6   
  7  from __future__ import absolute_import 
  8   
  9  import unittest 
 10  import tempfile, gzip, os, os.path, gc, shutil 
 11   
 12  from .common_imports import ( 
 13      etree, ElementTree, _str, _bytes, 
 14      SillyFileLike, LargeFileLike, HelperTestCase, 
 15      read_file, write_to_file, BytesIO, tmpfile 
 16  ) 
 17   
 18   
19 -class _IOTestCaseBase(HelperTestCase):
20 """(c)ElementTree compatibility for IO functions/methods 21 """ 22 etree = None 23
24 - def setUp(self):
25 """Setting up a minimal tree 26 """ 27 self.root = self.etree.Element('a') 28 self.root_str = self.etree.tostring(self.root) 29 self.tree = self.etree.ElementTree(self.root) 30 self._temp_dir = tempfile.mkdtemp()
31
32 - def tearDown(self):
33 gc.collect() 34 shutil.rmtree(self._temp_dir)
35
36 - def getTestFilePath(self, name):
37 return os.path.join(self._temp_dir, name)
38
39 - def buildNodes(self, element, children, depth):
40 Element = self.etree.Element 41 42 if depth == 0: 43 return 44 for i in range(children): 45 new_element = Element('element_%s_%s' % (depth, i)) 46 self.buildNodes(new_element, children, depth - 1) 47 element.append(new_element)
48
49 - def test_tree_io(self):
50 Element = self.etree.Element 51 ElementTree = self.etree.ElementTree 52 53 element = Element('top') 54 element.text = _str("qwrtioüöä\uAABB") 55 tree = ElementTree(element) 56 self.buildNodes(element, 10, 3) 57 with open(self.getTestFilePath('testdump.xml'), 'wb') as f: 58 tree.write(f, encoding='UTF-8') 59 with open(self.getTestFilePath('testdump.xml'), 'rb') as f: 60 tree = ElementTree(file=f) 61 with open(self.getTestFilePath('testdump2.xml'), 'wb') as f: 62 tree.write(f, encoding='UTF-8') 63 with open(self.getTestFilePath('testdump.xml'), 'rb') as f: 64 data1 = f.read() 65 with open(self.getTestFilePath('testdump2.xml'), 'rb') as f: 66 data2 = f.read() 67 self.assertEqual(data1, data2)
68
69 - def test_tree_io_latin1(self):
70 Element = self.etree.Element 71 ElementTree = self.etree.ElementTree 72 73 element = Element('top') 74 element.text = _str("qwrtioüöäßá") 75 tree = ElementTree(element) 76 self.buildNodes(element, 10, 3) 77 with open(self.getTestFilePath('testdump.xml'), 'wb') as f: 78 tree.write(f, encoding='iso-8859-1') 79 with open(self.getTestFilePath('testdump.xml'), 'rb') as f: 80 tree = ElementTree(file=f) 81 with open(self.getTestFilePath('testdump2.xml'), 'wb') as f: 82 tree.write(f, encoding='iso-8859-1') 83 with open(self.getTestFilePath('testdump.xml'), 'rb') as f: 84 data1 = f.read() 85 with open(self.getTestFilePath('testdump2.xml'), 'rb') as f: 86 data2 = f.read() 87 self.assertEqual(data1, data2)
88
89 - def test_write_filename(self):
90 # (c)ElementTree supports filename strings as write argument 91 with tmpfile(prefix="p", suffix=".xml") as filename: 92 self.tree.write(filename) 93 self.assertEqual(read_file(filename, 'rb').replace(b'\n', b''), 94 self.root_str)
95
97 # '%20' is a URL escaped space character. 98 before_test = os.listdir(tempfile.gettempdir()) 99 100 def difference(filenames): 101 return sorted( 102 fn for fn in set(filenames).difference(before_test) 103 if fn.startswith('lxmltmp-') 104 )
105 106 with tmpfile(prefix="lxmltmp-p%20p", suffix=".xml") as filename: 107 try: 108 before_write = os.listdir(tempfile.gettempdir()) 109 self.tree.write(filename) 110 after_write = os.listdir(tempfile.gettempdir()) 111 self.assertEqual(read_file(filename, 'rb').replace(b'\n', b''), 112 self.root_str) 113 except (AssertionError, IOError, OSError): 114 print("Before write: %s, after write: %s" % ( 115 difference(before_write), difference(after_write)) 116 ) 117 raise
118
119 - def test_write_filename_special_plus(self):
120 # '+' is used as an escaped space character in URLs. 121 with tmpfile(prefix="p+", suffix=".xml") as filename: 122 self.tree.write(filename) 123 self.assertEqual(read_file(filename, 'rb').replace(b'\n', b''), 124 self.root_str)
125
126 - def test_write_invalid_filename(self):
127 filename = os.path.join( 128 os.path.join('hopefullynonexistingpathname'), 129 'invalid_file.xml') 130 try: 131 self.tree.write(filename) 132 except IOError: 133 pass 134 else: 135 self.assertTrue( 136 False, "writing to an invalid file path should fail")
137
138 - def test_module_parse_gzipobject(self):
139 # (c)ElementTree supports gzip instance as parse argument 140 with tmpfile(suffix=".xml.gz") as filename: 141 with gzip.open(filename, 'wb') as f: 142 f.write(self.root_str) 143 with gzip.open(filename, 'rb') as f_gz: 144 tree = self.etree.parse(f_gz) 145 self.assertEqual(self.etree.tostring(tree.getroot()), self.root_str)
146
147 - def test_class_parse_filename(self):
148 # (c)ElementTree class ElementTree has a 'parse' method that returns 149 # the root of the tree 150 151 # parse from filename 152 with tmpfile(suffix=".xml") as filename: 153 write_to_file(filename, self.root_str, 'wb') 154 tree = self.etree.ElementTree() 155 root = tree.parse(filename) 156 self.assertEqual(self.etree.tostring(root), self.root_str)
157
158 - def test_class_parse_filename_remove_previous(self):
159 with tmpfile(suffix=".xml") as filename: 160 write_to_file(filename, self.root_str, 'wb') 161 tree = self.etree.ElementTree() 162 root = tree.parse(filename) 163 # and now do it again; previous content should still be there 164 root2 = tree.parse(filename) 165 self.assertEqual('a', root.tag) 166 self.assertEqual('a', root2.tag) 167 # now remove all references to root2, and parse again 168 del root2 169 root3 = tree.parse(filename) 170 self.assertEqual('a', root.tag) 171 self.assertEqual('a', root3.tag)
172 # root2's memory should've been freed here 173 # XXX how to check? 174
175 - def test_class_parse_fileobject(self):
176 # (c)ElementTree class ElementTree has a 'parse' method that returns 177 # the root of the tree 178 179 # parse from file object 180 handle, filename = tempfile.mkstemp(suffix=".xml") 181 try: 182 os.write(handle, self.root_str) 183 with open(filename, 'rb') as f: 184 tree = self.etree.ElementTree() 185 root = tree.parse(f) 186 self.assertEqual(self.etree.tostring(root), self.root_str) 187 finally: 188 os.close(handle) 189 os.remove(filename)
190
191 - def test_class_parse_unamed_fileobject(self):
192 # (c)ElementTree class ElementTree has a 'parse' method that returns 193 # the root of the tree 194 195 # parse from unnamed file object 196 f = SillyFileLike() 197 root = self.etree.ElementTree().parse(f) 198 self.assertTrue(root.tag.endswith('foo'))
199
200 - def test_module_parse_large_fileobject(self):
201 # parse from unnamed file object 202 f = LargeFileLike() 203 tree = self.etree.parse(f) 204 root = tree.getroot() 205 self.assertTrue(root.tag.endswith('root'))
206
207 - def test_module_parse_fileobject_error(self):
208 class LocalError(Exception): 209 pass
210 class TestFile: 211 def read(*args): 212 raise LocalError 213 f = TestFile() 214 self.assertRaises(LocalError, self.etree.parse, f) 215
216 - def test_module_parse_fileobject_late_error(self):
217 class LocalError(Exception): 218 pass
219 class TestFile: 220 data = '<root>test</' 221 try: 222 next_char = iter(data).next 223 except AttributeError: 224 # Python 3 225 next_char = iter(data).__next__ 226 counter = 0 227 def read(self, amount=None): 228 if amount is None: 229 while True: 230 self.read(1) 231 else: 232 try: 233 self.counter += 1 234 return _bytes(self.next_char()) 235 except StopIteration: 236 raise LocalError 237 f = TestFile() 238 self.assertRaises(LocalError, self.etree.parse, f) 239 self.assertEqual(f.counter, len(f.data)+1) 240
241 - def test_module_parse_fileobject_type_error(self):
242 class TestFile: 243 def read(*args): 244 return 1
245 f = TestFile() 246 247 try: 248 expect_exc = (TypeError, self.etree.ParseError) 249 except AttributeError: 250 expect_exc = TypeError 251 self.assertRaises(expect_exc, self.etree.parse, f) 252
253 - def test_etree_parse_io_error(self):
254 # this is a directory name that contains characters beyond latin-1 255 dirnameEN = _str('Directory') 256 dirnameRU = _str('Каталог') 257 filename = _str('nosuchfile.xml') 258 dn = tempfile.mkdtemp(prefix=dirnameEN) 259 try: 260 self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename)) 261 finally: 262 os.rmdir(dn) 263 dn = tempfile.mkdtemp(prefix=dirnameRU) 264 try: 265 self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename)) 266 finally: 267 os.rmdir(dn)
268
269 - def test_parse_utf8_bom(self):
270 utext = _str('Søk på nettet') 271 uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext 272 bom = _bytes('\\xEF\\xBB\\xBF').decode( 273 "unicode_escape").encode("latin1") 274 self.assertEqual(3, len(bom)) 275 f = tempfile.NamedTemporaryFile(delete=False) 276 try: 277 try: 278 f.write(bom) 279 f.write(uxml.encode("utf-8")) 280 finally: 281 f.close() 282 tree = self.etree.parse(f.name) 283 finally: 284 os.unlink(f.name) 285 self.assertEqual(utext, tree.getroot().text)
286
287 - def test_iterparse_utf8_bom(self):
288 utext = _str('Søk på nettet') 289 uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext 290 bom = _bytes('\\xEF\\xBB\\xBF').decode( 291 "unicode_escape").encode("latin1") 292 self.assertEqual(3, len(bom)) 293 f = tempfile.NamedTemporaryFile(delete=False) 294 try: 295 try: 296 f.write(bom) 297 f.write(uxml.encode("utf-8")) 298 finally: 299 f.close() 300 elements = [el for _, el in self.etree.iterparse(f.name)] 301 self.assertEqual(1, len(elements)) 302 root = elements[0] 303 finally: 304 os.unlink(f.name) 305 self.assertEqual(utext, root.text)
306
307 - def test_iterparse_utf16_bom(self):
308 utext = _str('Søk på nettet') 309 uxml = '<?xml version="1.0" encoding="UTF-16"?><p>%s</p>' % utext 310 boms = _bytes('\\xFE\\xFF \\xFF\\xFE').decode( 311 "unicode_escape").encode("latin1") 312 self.assertEqual(5, len(boms)) 313 xml = uxml.encode("utf-16") 314 self.assertTrue(xml[:2] in boms, repr(xml[:2])) 315 316 f = tempfile.NamedTemporaryFile(delete=False) 317 try: 318 try: 319 f.write(xml) 320 finally: 321 f.close() 322 elements = [el for _, el in self.etree.iterparse(f.name)] 323 self.assertEqual(1, len(elements)) 324 root = elements[0] 325 finally: 326 os.unlink(f.name) 327 self.assertEqual(utext, root.text)
328 329
330 -class ETreeIOTestCase(_IOTestCaseBase):
331 etree = etree 332
334 Element = self.etree.Element 335 SubElement = self.etree.SubElement 336 ElementTree = self.etree.ElementTree 337 text = _str("qwrtioüöä") 338 339 root = Element('root') 340 root.text = text 341 child = SubElement(root, 'sub') 342 child.text = 'TEXT' 343 child.tail = 'TAIL' 344 SubElement(root, 'sub').text = text 345 346 tree = ElementTree(root) 347 out = BytesIO() 348 tree.write(out, method='text', encoding='utf8', compression=9) 349 out.seek(0) 350 351 f = gzip.GzipFile(fileobj=out) 352 try: 353 result = f.read().decode('utf8') 354 finally: 355 f.close() 356 self.assertEqual(text+'TEXTTAIL'+text, result)
357 358 359 if ElementTree:
360 - class ElementTreeIOTestCase(_IOTestCaseBase):
361 etree = ElementTree
362 363
364 -def test_suite():
365 suite = unittest.TestSuite() 366 suite.addTests([unittest.makeSuite(ETreeIOTestCase)]) 367 if ElementTree: 368 suite.addTests([unittest.makeSuite(ElementTreeIOTestCase)]) 369 return suite
370 371 372 if __name__ == '__main__': 373 print('to test use test.py %s' % __file__) 374