Package lxml :: Package tests :: Module test_io
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_io

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  IO test cases that apply to both etree and ElementTree 
  5  """ 
  6   
  7  import unittest 
  8  import tempfile, gzip, os, os.path, gc, shutil 
  9   
 10  from lxml.tests.common_imports import ( 
 11      etree, ElementTree, _str, _bytes, 
 12      SillyFileLike, LargeFileLike, HelperTestCase, 
 13      read_file, write_to_file, BytesIO, tmpfile 
 14  ) 
 15   
 16   
17 -class _IOTestCaseBase(HelperTestCase):
18 """(c)ElementTree compatibility for IO functions/methods 19 """ 20 etree = None 21
22 - def setUp(self):
23 """Setting up a minimal tree 24 """ 25 self.root = self.etree.Element('a') 26 self.root_str = self.etree.tostring(self.root) 27 self.tree = self.etree.ElementTree(self.root) 28 self._temp_dir = tempfile.mkdtemp()
29
30 - def tearDown(self):
31 gc.collect() 32 shutil.rmtree(self._temp_dir)
33
34 - def getTestFilePath(self, name):
35 return os.path.join(self._temp_dir, name)
36
37 - def buildNodes(self, element, children, depth):
38 Element = self.etree.Element 39 40 if depth == 0: 41 return 42 for i in range(children): 43 new_element = Element('element_%s_%s' % (depth, i)) 44 self.buildNodes(new_element, children, depth - 1) 45 element.append(new_element)
46
47 - def test_tree_io(self):
48 Element = self.etree.Element 49 ElementTree = self.etree.ElementTree 50 51 element = Element('top') 52 element.text = _str("qwrtioüöä\uAABB") 53 tree = ElementTree(element) 54 self.buildNodes(element, 10, 3) 55 with open(self.getTestFilePath('testdump.xml'), 'wb') as f: 56 tree.write(f, encoding='UTF-8') 57 with open(self.getTestFilePath('testdump.xml'), 'rb') as f: 58 tree = ElementTree(file=f) 59 with open(self.getTestFilePath('testdump2.xml'), 'wb') as f: 60 tree.write(f, encoding='UTF-8') 61 with open(self.getTestFilePath('testdump.xml'), 'rb') as f: 62 data1 = f.read() 63 with open(self.getTestFilePath('testdump2.xml'), 'rb') as f: 64 data2 = f.read() 65 self.assertEqual(data1, data2)
66
67 - def test_tree_io_latin1(self):
68 Element = self.etree.Element 69 ElementTree = self.etree.ElementTree 70 71 element = Element('top') 72 element.text = _str("qwrtioüöäßá") 73 tree = ElementTree(element) 74 self.buildNodes(element, 10, 3) 75 with open(self.getTestFilePath('testdump.xml'), 'wb') as f: 76 tree.write(f, encoding='iso-8859-1') 77 with open(self.getTestFilePath('testdump.xml'), 'rb') as f: 78 tree = ElementTree(file=f) 79 with open(self.getTestFilePath('testdump2.xml'), 'wb') as f: 80 tree.write(f, encoding='iso-8859-1') 81 with open(self.getTestFilePath('testdump.xml'), 'rb') as f: 82 data1 = f.read() 83 with open(self.getTestFilePath('testdump2.xml'), 'rb') as f: 84 data2 = f.read() 85 self.assertEqual(data1, data2)
86
87 - def test_write_filename(self):
88 # (c)ElementTree supports filename strings as write argument 89 with tmpfile(prefix="p", suffix=".xml") as filename: 90 self.tree.write(filename) 91 self.assertEqual(read_file(filename, 'rb').replace(b'\n', b''), 92 self.root_str)
93
95 # '%20' is a URL escaped space character. 96 before_test = os.listdir(tempfile.gettempdir()) 97 98 def difference(filenames): 99 return sorted( 100 fn for fn in set(filenames).difference(before_test) 101 if fn.startswith('lxmltmp-') 102 )
103 104 with tmpfile(prefix="lxmltmp-p%20p", suffix=".xml") as filename: 105 try: 106 before_write = os.listdir(tempfile.gettempdir()) 107 self.tree.write(filename) 108 after_write = os.listdir(tempfile.gettempdir()) 109 self.assertEqual(read_file(filename, 'rb').replace(b'\n', b''), 110 self.root_str) 111 except (AssertionError, IOError, OSError): 112 print("Before write: %s, after write: %s" % ( 113 difference(before_write), difference(after_write)) 114 ) 115 raise
116
117 - def test_write_filename_special_plus(self):
118 # '+' is used as an escaped space character in URLs. 119 with tmpfile(prefix="p+", suffix=".xml") as filename: 120 self.tree.write(filename) 121 self.assertEqual(read_file(filename, 'rb').replace(b'\n', b''), 122 self.root_str)
123
124 - def test_write_invalid_filename(self):
125 filename = os.path.join( 126 os.path.join('hopefullynonexistingpathname'), 127 'invalid_file.xml') 128 try: 129 self.tree.write(filename) 130 except IOError: 131 pass 132 else: 133 self.assertTrue( 134 False, "writing to an invalid file path should fail")
135
136 - def test_module_parse_gzipobject(self):
137 # (c)ElementTree supports gzip instance as parse argument 138 with tmpfile(suffix=".xml.gz") as filename: 139 with gzip.open(filename, 'wb') as f: 140 f.write(self.root_str) 141 with gzip.open(filename, 'rb') as f_gz: 142 tree = self.etree.parse(f_gz) 143 self.assertEqual(self.etree.tostring(tree.getroot()), self.root_str)
144
145 - def test_class_parse_filename(self):
146 # (c)ElementTree class ElementTree has a 'parse' method that returns 147 # the root of the tree 148 149 # parse from filename 150 with tmpfile(suffix=".xml") as filename: 151 write_to_file(filename, self.root_str, 'wb') 152 tree = self.etree.ElementTree() 153 root = tree.parse(filename) 154 self.assertEqual(self.etree.tostring(root), self.root_str)
155
156 - def test_class_parse_filename_remove_previous(self):
157 with tmpfile(suffix=".xml") as filename: 158 write_to_file(filename, self.root_str, 'wb') 159 tree = self.etree.ElementTree() 160 root = tree.parse(filename) 161 # and now do it again; previous content should still be there 162 root2 = tree.parse(filename) 163 self.assertEqual('a', root.tag) 164 self.assertEqual('a', root2.tag) 165 # now remove all references to root2, and parse again 166 del root2 167 root3 = tree.parse(filename) 168 self.assertEqual('a', root.tag) 169 self.assertEqual('a', root3.tag)
170 # root2's memory should've been freed here 171 # XXX how to check? 172
173 - def test_class_parse_fileobject(self):
174 # (c)ElementTree class ElementTree has a 'parse' method that returns 175 # the root of the tree 176 177 # parse from file object 178 handle, filename = tempfile.mkstemp(suffix=".xml") 179 try: 180 os.write(handle, self.root_str) 181 with open(filename, 'rb') as f: 182 tree = self.etree.ElementTree() 183 root = tree.parse(f) 184 self.assertEqual(self.etree.tostring(root), self.root_str) 185 finally: 186 os.close(handle) 187 os.remove(filename)
188
189 - def test_class_parse_unamed_fileobject(self):
190 # (c)ElementTree class ElementTree has a 'parse' method that returns 191 # the root of the tree 192 193 # parse from unamed file object 194 f = SillyFileLike() 195 root = self.etree.ElementTree().parse(f) 196 self.assertTrue(root.tag.endswith('foo'))
197
198 - def test_module_parse_large_fileobject(self):
199 # parse from unamed file object 200 f = LargeFileLike() 201 tree = self.etree.parse(f) 202 root = tree.getroot() 203 self.assertTrue(root.tag.endswith('root'))
204
205 - def test_module_parse_fileobject_error(self):
206 class LocalError(Exception): 207 pass
208 class TestFile: 209 def read(*args): 210 raise LocalError 211 f = TestFile() 212 self.assertRaises(LocalError, self.etree.parse, f) 213
214 - def test_module_parse_fileobject_late_error(self):
215 class LocalError(Exception): 216 pass
217 class TestFile: 218 data = '<root>test</' 219 try: 220 next_char = iter(data).next 221 except AttributeError: 222 # Python 3 223 next_char = iter(data).__next__ 224 counter = 0 225 def read(self, amount=None): 226 if amount is None: 227 while True: 228 self.read(1) 229 else: 230 try: 231 self.counter += 1 232 return _bytes(self.next_char()) 233 except StopIteration: 234 raise LocalError 235 f = TestFile() 236 self.assertRaises(LocalError, self.etree.parse, f) 237 self.assertEqual(f.counter, len(f.data)+1) 238
239 - def test_module_parse_fileobject_type_error(self):
240 class TestFile: 241 def read(*args): 242 return 1
243 f = TestFile() 244 245 try: 246 expect_exc = (TypeError, self.etree.ParseError) 247 except AttributeError: 248 expect_exc = TypeError 249 self.assertRaises(expect_exc, self.etree.parse, f) 250
251 - def test_etree_parse_io_error(self):
252 # this is a directory name that contains characters beyond latin-1 253 dirnameEN = _str('Directory') 254 dirnameRU = _str('Каталог') 255 filename = _str('nosuchfile.xml') 256 dn = tempfile.mkdtemp(prefix=dirnameEN) 257 try: 258 self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename)) 259 finally: 260 os.rmdir(dn) 261 dn = tempfile.mkdtemp(prefix=dirnameRU) 262 try: 263 self.assertRaises(IOError, self.etree.parse, os.path.join(dn, filename)) 264 finally: 265 os.rmdir(dn)
266
267 - def test_parse_utf8_bom(self):
268 utext = _str('Søk på nettet') 269 uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext 270 bom = _bytes('\\xEF\\xBB\\xBF').decode( 271 "unicode_escape").encode("latin1") 272 self.assertEqual(3, len(bom)) 273 f = tempfile.NamedTemporaryFile(delete=False) 274 try: 275 try: 276 f.write(bom) 277 f.write(uxml.encode("utf-8")) 278 finally: 279 f.close() 280 tree = self.etree.parse(f.name) 281 finally: 282 os.unlink(f.name) 283 self.assertEqual(utext, tree.getroot().text)
284
285 - def test_iterparse_utf8_bom(self):
286 utext = _str('Søk på nettet') 287 uxml = '<?xml version="1.0" encoding="UTF-8"?><p>%s</p>' % utext 288 bom = _bytes('\\xEF\\xBB\\xBF').decode( 289 "unicode_escape").encode("latin1") 290 self.assertEqual(3, len(bom)) 291 f = tempfile.NamedTemporaryFile(delete=False) 292 try: 293 try: 294 f.write(bom) 295 f.write(uxml.encode("utf-8")) 296 finally: 297 f.close() 298 elements = [el for _, el in self.etree.iterparse(f.name)] 299 self.assertEqual(1, len(elements)) 300 root = elements[0] 301 finally: 302 os.unlink(f.name) 303 self.assertEqual(utext, root.text)
304
305 - def test_iterparse_utf16_bom(self):
306 utext = _str('Søk på nettet') 307 uxml = '<?xml version="1.0" encoding="UTF-16"?><p>%s</p>' % utext 308 boms = _bytes('\\xFE\\xFF \\xFF\\xFE').decode( 309 "unicode_escape").encode("latin1") 310 self.assertEqual(5, len(boms)) 311 xml = uxml.encode("utf-16") 312 self.assertTrue(xml[:2] in boms, repr(xml[:2])) 313 314 f = tempfile.NamedTemporaryFile(delete=False) 315 try: 316 try: 317 f.write(xml) 318 finally: 319 f.close() 320 elements = [el for _, el in self.etree.iterparse(f.name)] 321 self.assertEqual(1, len(elements)) 322 root = elements[0] 323 finally: 324 os.unlink(f.name) 325 self.assertEqual(utext, root.text)
326 327
328 -class ETreeIOTestCase(_IOTestCaseBase):
329 etree = etree 330
332 Element = self.etree.Element 333 SubElement = self.etree.SubElement 334 ElementTree = self.etree.ElementTree 335 text = _str("qwrtioüöä") 336 337 root = Element('root') 338 root.text = text 339 child = SubElement(root, 'sub') 340 child.text = 'TEXT' 341 child.tail = 'TAIL' 342 SubElement(root, 'sub').text = text 343 344 tree = ElementTree(root) 345 out = BytesIO() 346 tree.write(out, method='text', encoding='utf8', compression=9) 347 out.seek(0) 348 349 f = gzip.GzipFile(fileobj=out) 350 try: 351 result = f.read().decode('utf8') 352 finally: 353 f.close() 354 self.assertEqual(text+'TEXTTAIL'+text, result)
355 356 357 if ElementTree:
358 - class ElementTreeIOTestCase(_IOTestCaseBase):
359 etree = ElementTree
360 361
362 -def test_suite():
363 suite = unittest.TestSuite() 364 suite.addTests([unittest.makeSuite(ETreeIOTestCase)]) 365 if ElementTree: 366 suite.addTests([unittest.makeSuite(ElementTreeIOTestCase)]) 367 return suite
368 369 370 if __name__ == '__main__': 371 print('to test use test.py %s' % __file__) 372