Package lxml :: Package tests :: Module test_io
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_io

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  IO test cases that apply to both etree and ElementTree 
  5  """ 
  6   
  7  import unittest 
  8  import tempfile, gzip, os, os.path, sys, gc, shutil 
  9   
 10  this_dir = os.path.dirname(__file__) 
 11  if this_dir not in sys.path: 
 12      sys.path.insert(0, this_dir)  # needed for Py3 
 13   
 14  from common_imports import etree, ElementTree, _str, _bytes 
 15  from common_imports import SillyFileLike, LargeFileLike, HelperTestCase 
 16  from common_imports import read_file, write_to_file, BytesIO 
 17   
 18   
19 -class _IOTestCaseBase(HelperTestCase):
20 """(c)ElementTree compatibility for IO functions/methods 21 """ 22 etree = None 23
24 - def setUp(self):
25 """Setting up a minimal tree 26 """ 27 self.root = self.etree.Element('a') 28 self.root_str = self.etree.tostring(self.root) 29 self.tree = self.etree.ElementTree(self.root) 30 self._temp_dir = tempfile.mkdtemp()
31
32 - def tearDown(self):
33 gc.collect() 34 shutil.rmtree(self._temp_dir)
35
36 - def getTestFilePath(self, name):
37 return os.path.join(self._temp_dir, name)
38
39 - def buildNodes(self, element, children, depth):
40 Element = self.etree.Element 41 42 if depth == 0: 43 return 44 for i in range(children): 45 new_element = Element('element_%s_%s' % (depth, i)) 46 self.buildNodes(new_element, children, depth - 1) 47 element.append(new_element)
48
49 - def test_tree_io(self):
50 Element = self.etree.Element 51 ElementTree = self.etree.ElementTree 52 53 element = Element('top') 54 element.text = _str("qwrtioüöä\uAABB") 55 tree = ElementTree(element) 56 self.buildNodes(element, 10, 3) 57 f = open(self.getTestFilePath('testdump.xml'), 'wb') 58 tree.write(f, encoding='UTF-8') 59 f.close() 60 f = open(self.getTestFilePath('testdump.xml'), 'rb') 61 tree = ElementTree(file=f) 62 f.close() 63 f = open(self.getTestFilePath('testdump2.xml'), 'wb') 64 tree.write(f, encoding='UTF-8') 65 f.close() 66 f = open(self.getTestFilePath('testdump.xml'), 'rb') 67 data1 = f.read() 68 f.close() 69 f = open(self.getTestFilePath('testdump2.xml'), 'rb') 70 data2 = f.read() 71 f.close() 72 self.assertEqual(data1, data2)
73
74 - def test_tree_io_latin1(self):
75 Element = self.etree.Element 76 ElementTree = self.etree.ElementTree 77 78 element = Element('top') 79 element.text = _str("qwrtioüöäßá") 80 tree = ElementTree(element) 81 self.buildNodes(element, 10, 3) 82 f = open(self.getTestFilePath('testdump.xml'), 'wb') 83 tree.write(f, encoding='iso-8859-1') 84 f.close() 85 f = open(self.getTestFilePath('testdump.xml'), 'rb') 86 tree = ElementTree(file=f) 87 f.close() 88 f = open(self.getTestFilePath('testdump2.xml'), 'wb') 89 tree.write(f, encoding='iso-8859-1') 90 f.close() 91 f = open(self.getTestFilePath('testdump.xml'), 'rb') 92 data1 = f.read() 93 f.close() 94 f = open(self.getTestFilePath('testdump2.xml'), 'rb') 95 data2 = f.read() 96 f.close() 97 self.assertEqual(data1, data2)
98
99 - def test_write_filename(self):
100 # (c)ElementTree supports filename strings as write argument 101 102 handle, filename = tempfile.mkstemp(suffix=".xml") 103 self.tree.write(filename) 104 try: 105 self.assertEqual(read_file(filename, 'rb').replace(_bytes('\n'), _bytes('')), 106 self.root_str) 107 finally: 108 os.close(handle) 109 os.remove(filename)
110
112 filename = os.path.join( 113 os.path.join('hopefullynonexistingpathname'), 114 'invalid_file.xml') 115 try: 116 self.tree.write(filename) 117 except IOError: 118 pass 119 else: 120 self.assertTrue( 121 False, "writing to an invalid file path should fail")
122
124 # (c)ElementTree supports gzip instance as parse argument 125 handle, filename = tempfile.mkstemp(suffix=".xml.gz") 126 f = gzip.open(filename, 'wb') 127 f.write(self.root_str) 128 f.close() 129 try: 130 f_gz = gzip.open(filename, 'rb') 131 tree = self.etree.parse(f_gz) 132 f_gz.close() 133 self.assertEqual(self.etree.tostring(tree.getroot()), self.root_str) 134 finally: 135 os.close(handle) 136 os.remove(filename)
137
139 # (c)ElementTree class ElementTree has a 'parse' method that returns 140 # the root of the tree 141 142 # parse from filename 143 144 handle, filename = tempfile.mkstemp(suffix=".xml") 145 write_to_file(filename, self.root_str, 'wb') 146 try: 147 tree = self.etree.ElementTree() 148 root = tree.parse(filename) 149 self.assertEqual(self.etree.tostring(root), self.root_str) 150 finally: 151 os.close(handle) 152 os.remove(filename)
153
155 handle, filename = tempfile.mkstemp(suffix=".xml") 156 write_to_file(filename, self.root_str, 'wb') 157 try: 158 tree = self.etree.ElementTree() 159 root = tree.parse(filename) 160 # and now do it again; previous content should still be there 161 root2 = tree.parse(filename) 162 self.assertEqual('a', root.tag) 163 self.assertEqual('a', root2.tag) 164 # now remove all references to root2, and parse again 165 del root2 166 root3 = tree.parse(filename) 167 self.assertEqual('a', root.tag) 168 self.assertEqual('a', root3.tag) 169 # root2's memory should've been freed here 170 # XXX how to check? 171 finally: 172 os.close(handle) 173 os.remove(filename)
174
176 # (c)ElementTree class ElementTree has a 'parse' method that returns 177 # the root of the tree 178 179 # parse from file object 180 181 handle, filename = tempfile.mkstemp(suffix=".xml") 182 try: 183 os.write(handle, self.root_str) 184 f = open(filename, 'rb') 185 tree = self.etree.ElementTree() 186 root = tree.parse(f) 187 f.close() 188 self.assertEqual(self.etree.tostring(root), self.root_str) 189 finally: 190 os.close(handle) 191 os.remove(filename)
192
194 # (c)ElementTree class ElementTree has a 'parse' method that returns 195 # the root of the tree 196 197 # parse from unamed file object 198 f = SillyFileLike() 199 root = self.etree.ElementTree().parse(f) 200 self.assertTrue(root.tag.endswith('foo'))
201
203 # parse from unamed file object 204 f = LargeFileLike() 205 tree = self.etree.parse(f) 206 root = tree.getroot() 207 self.assertTrue(root.tag.endswith('root'))
208
210 class LocalError(Exception): 211 pass
212 class TestFile: 213 def read(*args): 214 raise LocalError
215 f = TestFile() 216 self.assertRaises(LocalError, self.etree.parse, f) 217
218 - def test_module_parse_fileobject_late_error(self):
219 class LocalError(Exception): 220 pass
221 class TestFile: 222 data = '<root>test</' 223 try: 224 next_char = iter(data).next 225 except AttributeError: 226 # Python 3 227 next_char = iter(data).__next__ 228 counter = 0 229 def read(self, amount=None): 230 if amount is None: 231 while True: 232 self.read(1) 233 else: 234 try: 235 self.counter += 1 236 return _bytes(self.next_char()) 237 except StopIteration: 238 raise LocalError 239 f = TestFile() 240 self.assertRaises(LocalError, self.etree.parse, f) 241 self.assertEqual(f.counter, len(f.data)+1) 242
243 - def test_module_parse_fileobject_type_error(self):
244 class TestFile: 245 def read(*args): 246 return 1
247 f = TestFile() 248 249 try: 250 expect_exc = (TypeError, self.etree.ParseError) 251 except AttributeError: 252 expect_exc = TypeError 253 self.assertRaises(expect_exc, self.etree.parse, f) 254
255 - def test_etree_parse_io_error(self):
256 # this is a directory name that contains characters beyond latin-1 257 dirnameEN = _str('Directory') 258 dirnameRU = _str('Каталог') 259 filename = _str('nosuchfile.xml') 260 dn = tempfile.mkdtemp(prefix=dirnameEN) 261 try: 262 self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename)) 263 finally: 264 os.rmdir(dn) 265 dn = tempfile.mkdtemp(prefix=dirnameRU) 266 try: 267 self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename)) 268 finally: 269 os.rmdir(dn)
270
271 - def test_parse_utf8_bom(self):
272 utext = _str('Søk på nettet') 273 uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext 274 bom = _bytes('\\xEF\\xBB\\xBF').decode( 275 "unicode_escape").encode("latin1") 276 self.assertEqual(3, len(bom)) 277 f = tempfile.NamedTemporaryFile(delete=False) 278 try: 279 try: 280 f.write(bom) 281 f.write(uxml.encode("utf-8")) 282 finally: 283 f.close() 284 tree = self.etree.parse(f.name) 285 finally: 286 os.unlink(f.name) 287 self.assertEqual(utext, tree.getroot().text)
288
289 - def test_iterparse_utf8_bom(self):
290 utext = _str('Søk på nettet') 291 uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext 292 bom = _bytes('\\xEF\\xBB\\xBF').decode( 293 "unicode_escape").encode("latin1") 294 self.assertEqual(3, len(bom)) 295 f = tempfile.NamedTemporaryFile(delete=False) 296 try: 297 try: 298 f.write(bom) 299 f.write(uxml.encode("utf-8")) 300 finally: 301 f.close() 302 elements = [el for _, el in self.etree.iterparse(f.name)] 303 self.assertEqual(1, len(elements)) 304 root = elements[0] 305 finally: 306 os.unlink(f.name) 307 self.assertEqual(utext, root.text)
308
309 - def test_iterparse_utf16_bom(self):
310 utext = _str('Søk på nettet') 311 uxml = '<?xml version="1.0" encoding="UTF-16"?><p>%s</p>' % utext 312 boms = _bytes('\\xFE\\xFF \\xFF\\xFE').decode( 313 "unicode_escape").encode("latin1") 314 self.assertEqual(5, len(boms)) 315 xml = uxml.encode("utf-16") 316 self.assertTrue(xml[:2] in boms, repr(xml[:2])) 317 318 f = tempfile.NamedTemporaryFile(delete=False) 319 try: 320 try: 321 f.write(xml) 322 finally: 323 f.close() 324 elements = [el for _, el in self.etree.iterparse(f.name)] 325 self.assertEqual(1, len(elements)) 326 root = elements[0] 327 finally: 328 os.unlink(f.name) 329 self.assertEqual(utext, root.text)
330 331
332 -class ETreeIOTestCase(_IOTestCaseBase):
333 etree = etree 334
336 Element = self.etree.Element 337 SubElement = self.etree.SubElement 338 ElementTree = self.etree.ElementTree 339 text = _str("qwrtioüöä") 340 341 root = Element('root') 342 root.text = text 343 child = SubElement(root, 'sub') 344 child.text = 'TEXT' 345 child.tail = 'TAIL' 346 SubElement(root, 'sub').text = text 347 348 tree = ElementTree(root) 349 out = BytesIO() 350 tree.write(out, method='text', encoding='utf8', compression=9) 351 out.seek(0) 352 353 f = gzip.GzipFile(fileobj=out) 354 try: 355 result = f.read().decode('utf8') 356 finally: 357 f.close() 358 self.assertEqual(text+'TEXTTAIL'+text, result)
359 360 361 if ElementTree:
362 - class ElementTreeIOTestCase(_IOTestCaseBase):
363 etree = ElementTree
364 365
366 -def test_suite():
367 suite = unittest.TestSuite() 368 suite.addTests([unittest.makeSuite(ETreeIOTestCase)]) 369 if ElementTree: 370 suite.addTests([unittest.makeSuite(ElementTreeIOTestCase)]) 371 return suite
372 373 374 if __name__ == '__main__': 375 print('to test use test.py %s' % __file__) 376