Package lxml :: Package tests :: Module test_io
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_io

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  IO test cases that apply to both etree and ElementTree 
  5  """ 
  6   
  7  import unittest 
  8  import tempfile, gzip, os, os.path, sys, gc, shutil 
  9   
 10  this_dir = os.path.dirname(__file__) 
 11  if this_dir not in sys.path: 
 12      sys.path.insert(0, this_dir)  # needed for Py3 
 13   
 14  from common_imports import etree, ElementTree, _str, _bytes 
 15  from common_imports import SillyFileLike, LargeFileLike, HelperTestCase 
 16  from common_imports import read_file, write_to_file, BytesIO 
 17   
 18  if sys.version_info < (2,6): 
19 - class NamedTemporaryFile(object):
20 - def __init__(self, delete=True, **kwargs):
21 self._tmpfile = tempfile.NamedTemporaryFile(**kwargs)
22 - def close(self):
23 self._tmpfile.flush()
24 - def __getattr__(self, name):
25 return getattr(self._tmpfile, name)
26 else: 27 NamedTemporaryFile = tempfile.NamedTemporaryFile 28 29
30 -class _IOTestCaseBase(HelperTestCase):
31 """(c)ElementTree compatibility for IO functions/methods 32 """ 33 etree = None 34
35 - def setUp(self):
36 """Setting up a minimal tree 37 """ 38 self.root = self.etree.Element('a') 39 self.root_str = self.etree.tostring(self.root) 40 self.tree = self.etree.ElementTree(self.root) 41 self._temp_dir = tempfile.mkdtemp()
42
43 - def tearDown(self):
44 gc.collect() 45 shutil.rmtree(self._temp_dir)
46
47 - def getTestFilePath(self, name):
48 return os.path.join(self._temp_dir, name)
49
50 - def buildNodes(self, element, children, depth):
51 Element = self.etree.Element 52 53 if depth == 0: 54 return 55 for i in range(children): 56 new_element = Element('element_%s_%s' % (depth, i)) 57 self.buildNodes(new_element, children, depth - 1) 58 element.append(new_element)
59
60 - def test_tree_io(self):
61 Element = self.etree.Element 62 ElementTree = self.etree.ElementTree 63 64 element = Element('top') 65 element.text = _str("qwrtioüöä\uAABB") 66 tree = ElementTree(element) 67 self.buildNodes(element, 10, 3) 68 f = open(self.getTestFilePath('testdump.xml'), 'wb') 69 tree.write(f, encoding='UTF-8') 70 f.close() 71 f = open(self.getTestFilePath('testdump.xml'), 'rb') 72 tree = ElementTree(file=f) 73 f.close() 74 f = open(self.getTestFilePath('testdump2.xml'), 'wb') 75 tree.write(f, encoding='UTF-8') 76 f.close() 77 f = open(self.getTestFilePath('testdump.xml'), 'rb') 78 data1 = f.read() 79 f.close() 80 f = open(self.getTestFilePath('testdump2.xml'), 'rb') 81 data2 = f.read() 82 f.close() 83 self.assertEqual(data1, data2)
84
85 - def test_tree_io_latin1(self):
86 Element = self.etree.Element 87 ElementTree = self.etree.ElementTree 88 89 element = Element('top') 90 element.text = _str("qwrtioüöäßá") 91 tree = ElementTree(element) 92 self.buildNodes(element, 10, 3) 93 f = open(self.getTestFilePath('testdump.xml'), 'wb') 94 tree.write(f, encoding='iso-8859-1') 95 f.close() 96 f = open(self.getTestFilePath('testdump.xml'), 'rb') 97 tree = ElementTree(file=f) 98 f.close() 99 f = open(self.getTestFilePath('testdump2.xml'), 'wb') 100 tree.write(f, encoding='iso-8859-1') 101 f.close() 102 f = open(self.getTestFilePath('testdump.xml'), 'rb') 103 data1 = f.read() 104 f.close() 105 f = open(self.getTestFilePath('testdump2.xml'), 'rb') 106 data2 = f.read() 107 f.close() 108 self.assertEqual(data1, data2)
109
110 - def test_write_filename(self):
111 # (c)ElementTree supports filename strings as write argument 112 113 handle, filename = tempfile.mkstemp(suffix=".xml") 114 self.tree.write(filename) 115 try: 116 self.assertEqual(read_file(filename, 'rb').replace(_bytes('\n'), _bytes('')), 117 self.root_str) 118 finally: 119 os.close(handle) 120 os.remove(filename)
121
123 filename = os.path.join( 124 os.path.join('hopefullynonexistingpathname'), 125 'invalid_file.xml') 126 try: 127 self.tree.write(filename) 128 except IOError: 129 pass 130 else: 131 self.assertTrue( 132 False, "writing to an invalid file path should fail")
133
135 # (c)ElementTree supports gzip instance as parse argument 136 handle, filename = tempfile.mkstemp(suffix=".xml.gz") 137 f = gzip.open(filename, 'wb') 138 f.write(self.root_str) 139 f.close() 140 try: 141 f_gz = gzip.open(filename, 'rb') 142 tree = self.etree.parse(f_gz) 143 f_gz.close() 144 self.assertEqual(self.etree.tostring(tree.getroot()), self.root_str) 145 finally: 146 os.close(handle) 147 os.remove(filename)
148
150 # (c)ElementTree class ElementTree has a 'parse' method that returns 151 # the root of the tree 152 153 # parse from filename 154 155 handle, filename = tempfile.mkstemp(suffix=".xml") 156 write_to_file(filename, self.root_str, 'wb') 157 try: 158 tree = self.etree.ElementTree() 159 root = tree.parse(filename) 160 self.assertEqual(self.etree.tostring(root), self.root_str) 161 finally: 162 os.close(handle) 163 os.remove(filename)
164
166 handle, filename = tempfile.mkstemp(suffix=".xml") 167 write_to_file(filename, self.root_str, 'wb') 168 try: 169 tree = self.etree.ElementTree() 170 root = tree.parse(filename) 171 # and now do it again; previous content should still be there 172 root2 = tree.parse(filename) 173 self.assertEqual('a', root.tag) 174 self.assertEqual('a', root2.tag) 175 # now remove all references to root2, and parse again 176 del root2 177 root3 = tree.parse(filename) 178 self.assertEqual('a', root.tag) 179 self.assertEqual('a', root3.tag) 180 # root2's memory should've been freed here 181 # XXX how to check? 182 finally: 183 os.close(handle) 184 os.remove(filename)
185
187 # (c)ElementTree class ElementTree has a 'parse' method that returns 188 # the root of the tree 189 190 # parse from file object 191 192 handle, filename = tempfile.mkstemp(suffix=".xml") 193 try: 194 os.write(handle, self.root_str) 195 f = open(filename, 'rb') 196 tree = self.etree.ElementTree() 197 root = tree.parse(f) 198 f.close() 199 self.assertEqual(self.etree.tostring(root), self.root_str) 200 finally: 201 os.close(handle) 202 os.remove(filename)
203
205 # (c)ElementTree class ElementTree has a 'parse' method that returns 206 # the root of the tree 207 208 # parse from unamed file object 209 f = SillyFileLike() 210 root = self.etree.ElementTree().parse(f) 211 self.assertTrue(root.tag.endswith('foo'))
212
214 # parse from unamed file object 215 f = LargeFileLike() 216 tree = self.etree.parse(f) 217 root = tree.getroot() 218 self.assertTrue(root.tag.endswith('root'))
219
221 class LocalError(Exception): 222 pass
223 class TestFile: 224 def read(*args): 225 raise LocalError
226 f = TestFile() 227 self.assertRaises(LocalError, self.etree.parse, f) 228
229 - def test_module_parse_fileobject_late_error(self):
230 class LocalError(Exception): 231 pass
232 class TestFile: 233 data = '<root>test</' 234 try: 235 next_char = iter(data).next 236 except AttributeError: 237 # Python 3 238 next_char = iter(data).__next__ 239 counter = 0 240 def read(self, amount=None): 241 if amount is None: 242 while True: 243 self.read(1) 244 else: 245 try: 246 self.counter += 1 247 return _bytes(self.next_char()) 248 except StopIteration: 249 raise LocalError 250 f = TestFile() 251 self.assertRaises(LocalError, self.etree.parse, f) 252 self.assertEqual(f.counter, len(f.data)+1) 253
254 - def test_module_parse_fileobject_type_error(self):
255 class TestFile: 256 def read(*args): 257 return 1
258 f = TestFile() 259 260 try: 261 expect_exc = (TypeError, self.etree.ParseError) 262 except AttributeError: 263 expect_exc = TypeError 264 self.assertRaises(expect_exc, self.etree.parse, f) 265
266 - def test_etree_parse_io_error(self):
267 # this is a directory name that contains characters beyond latin-1 268 dirnameEN = _str('Directory') 269 dirnameRU = _str('Каталог') 270 filename = _str('nosuchfile.xml') 271 dn = tempfile.mkdtemp(prefix=dirnameEN) 272 try: 273 self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename)) 274 finally: 275 os.rmdir(dn) 276 dn = tempfile.mkdtemp(prefix=dirnameRU) 277 try: 278 self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename)) 279 finally: 280 os.rmdir(dn)
281
282 - def test_parse_utf8_bom(self):
283 utext = _str('Søk på nettet') 284 uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext 285 bom = _bytes('\\xEF\\xBB\\xBF').decode( 286 "unicode_escape").encode("latin1") 287 self.assertEqual(3, len(bom)) 288 f = NamedTemporaryFile(delete=False) 289 try: 290 try: 291 f.write(bom) 292 f.write(uxml.encode("utf-8")) 293 finally: 294 f.close() 295 tree = self.etree.parse(f.name) 296 finally: 297 os.unlink(f.name) 298 self.assertEqual(utext, tree.getroot().text)
299
300 - def test_iterparse_utf8_bom(self):
301 utext = _str('Søk på nettet') 302 uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext 303 bom = _bytes('\\xEF\\xBB\\xBF').decode( 304 "unicode_escape").encode("latin1") 305 self.assertEqual(3, len(bom)) 306 f = NamedTemporaryFile(delete=False) 307 try: 308 try: 309 f.write(bom) 310 f.write(uxml.encode("utf-8")) 311 finally: 312 f.close() 313 elements = [el for _, el in self.etree.iterparse(f.name)] 314 self.assertEqual(1, len(elements)) 315 root = elements[0] 316 finally: 317 os.unlink(f.name) 318 self.assertEqual(utext, root.text)
319
320 - def test_iterparse_utf16_bom(self):
321 utext = _str('Søk på nettet') 322 uxml = '<?xml version="1.0" encoding="UTF-16"?><p>%s</p>' % utext 323 boms = _bytes('\\xFE\\xFF \\xFF\\xFE').decode( 324 "unicode_escape").encode("latin1") 325 self.assertEqual(5, len(boms)) 326 xml = uxml.encode("utf-16") 327 self.assertTrue(xml[:2] in boms, repr(xml[:2])) 328 329 f = NamedTemporaryFile(delete=False) 330 try: 331 try: 332 f.write(xml) 333 finally: 334 f.close() 335 elements = [el for _, el in self.etree.iterparse(f.name)] 336 self.assertEqual(1, len(elements)) 337 root = elements[0] 338 finally: 339 os.unlink(f.name) 340 self.assertEqual(utext, root.text)
341 342
343 -class ETreeIOTestCase(_IOTestCaseBase):
344 etree = etree 345
347 Element = self.etree.Element 348 SubElement = self.etree.SubElement 349 ElementTree = self.etree.ElementTree 350 text = _str("qwrtioüöä") 351 352 root = Element('root') 353 root.text = text 354 child = SubElement(root, 'sub') 355 child.text = 'TEXT' 356 child.tail = 'TAIL' 357 SubElement(root, 'sub').text = text 358 359 tree = ElementTree(root) 360 out = BytesIO() 361 tree.write(out, method='text', encoding='utf8', compression=9) 362 out.seek(0) 363 364 f = gzip.GzipFile(fileobj=out) 365 try: 366 result = f.read().decode('utf8') 367 finally: 368 f.close() 369 self.assertEqual(text+'TEXTTAIL'+text, result)
370 371 372 if ElementTree:
373 - class ElementTreeIOTestCase(_IOTestCaseBase):
374 etree = ElementTree
375 376
377 -def test_suite():
378 suite = unittest.TestSuite() 379 suite.addTests([unittest.makeSuite(ETreeIOTestCase)]) 380 if ElementTree: 381 suite.addTests([unittest.makeSuite(ElementTreeIOTestCase)]) 382 return suite
383 384 385 if __name__ == '__main__': 386 print('to test use test.py %s' % __file__) 387