Package lxml :: Package tests :: Module test_sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_sax

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  Test cases related to SAX I/O 
  5  """ 
  6   
  7  from __future__ import absolute_import 
  8   
  9  import unittest 
 10  from xml.dom import pulldom 
 11  from xml.sax.handler import ContentHandler 
 12   
 13  from .common_imports import HelperTestCase, make_doctest, BytesIO, _bytes 
 14  from lxml import sax 
 15   
 16   
17 -class ETreeSaxTestCase(HelperTestCase):
18
19 - def test_etree_sax_simple(self):
20 tree = self.parse('<a>ab<b/>ba</a>') 21 xml_out = self._saxify_serialize(tree) 22 self.assertEqual(_bytes('<a>ab<b/>ba</a>'), 23 xml_out)
24
25 - def test_etree_sax_double(self):
26 tree = self.parse('<a>ab<b>bb</b>ba</a>') 27 xml_out = self._saxify_serialize(tree) 28 self.assertEqual(_bytes('<a>ab<b>bb</b>ba</a>'), 29 xml_out)
30
31 - def test_etree_sax_comment(self):
32 tree = self.parse('<a>ab<!-- TEST -->ba</a>') 33 xml_out = self._saxify_serialize(tree) 34 self.assertEqual(_bytes('<a>abba</a>'), 35 xml_out)
36
37 - def test_etree_sax_pi(self):
38 tree = self.parse('<a>ab<?this and that?>ba</a>') 39 xml_out = self._saxify_serialize(tree) 40 self.assertEqual(_bytes('<a>ab<?this and that?>ba</a>'), 41 xml_out)
42
44 tree = self.parse('<!-- TEST --><a>ab</a>') 45 xml_out = self._saxify_serialize(tree) 46 self.assertEqual(_bytes('<a>ab</a>'), 47 xml_out)
48
49 - def test_etree_sax_pi_root(self):
50 tree = self.parse('<?this and that?><a>ab</a>') 51 xml_out = self._saxify_serialize(tree) 52 self.assertEqual(_bytes('<?this and that?><a>ab</a>'), 53 xml_out)
54
56 tree = self.parse('<a aa="5">ab<b b="5"/>ba</a>') 57 xml_out = self._saxify_serialize(tree) 58 self.assertEqual(_bytes('<a aa="5">ab<b b="5"/>ba</a>'), 59 xml_out)
60
61 - def test_etree_sax_ns1(self):
62 tree = self.parse('<a xmlns="bla">ab<b>bb</b>ba</a>') 63 new_tree = self._saxify_unsaxify(tree) 64 root = new_tree.getroot() 65 self.assertEqual('{bla}a', 66 root.tag) 67 self.assertEqual('{bla}b', 68 root[0].tag)
69
70 - def test_etree_sax_ns2(self):
71 tree = self.parse('<a xmlns="blaA">ab<b:b xmlns:b="blaB">bb</b:b>ba</a>') 72 new_tree = self._saxify_unsaxify(tree) 73 root = new_tree.getroot() 74 self.assertEqual('{blaA}a', 75 root.tag) 76 self.assertEqual('{blaB}b', 77 root[0].tag)
78
79 - def test_sax_to_pulldom(self):
80 tree = self.parse('<a xmlns="blaA">ab<b:b xmlns:b="blaB">bb</b:b>ba</a>') 81 handler = pulldom.SAX2DOM() 82 sax.saxify(tree, handler) 83 dom = handler.document 84 85 self.assertEqual('a', 86 dom.firstChild.localName) 87 self.assertEqual('blaA', 88 dom.firstChild.namespaceURI) 89 self.assertEqual(None, 90 dom.firstChild.prefix) 91 92 children = dom.firstChild.childNodes 93 self.assertEqual('ab', 94 children[0].nodeValue) 95 self.assertEqual('blaB', 96 children[1].namespaceURI) 97 self.assertEqual('ba', 98 children[2].nodeValue)
99
101 tree = self.parse('<a xmlns="blaA" xmlns:a="blaA"></a>') 102 handler = pulldom.SAX2DOM() 103 sax.saxify(tree, handler) 104 dom = handler.document 105 106 # With multiple prefix definitions, the node should keep the one 107 # that was actually used, even if the others also are valid. 108 self.assertEqual('a', 109 dom.firstChild.localName) 110 self.assertEqual('blaA', 111 dom.firstChild.namespaceURI) 112 self.assertEqual(None, 113 dom.firstChild.prefix) 114 115 tree = self.parse('<a:a xmlns="blaA" xmlns:a="blaA"></a:a>') 116 handler = pulldom.SAX2DOM() 117 sax.saxify(tree, handler) 118 dom = handler.document 119 120 self.assertEqual('a', 121 dom.firstChild.localName) 122 self.assertEqual('blaA', 123 dom.firstChild.namespaceURI) 124 self.assertEqual('a', 125 dom.firstChild.prefix)
126
127 - def test_element_sax(self):
128 tree = self.parse('<a><b/></a>') 129 a = tree.getroot() 130 b = a[0] 131 132 xml_out = self._saxify_serialize(a) 133 self.assertEqual(_bytes('<a><b/></a>'), 134 xml_out) 135 136 xml_out = self._saxify_serialize(b) 137 self.assertEqual(_bytes('<b/>'), 138 xml_out)
139
140 - def test_element_sax_ns(self):
141 tree = self.parse('<a:a xmlns:a="blaA"><b/></a:a>') 142 a = tree.getroot() 143 b = a[0] 144 145 new_tree = self._saxify_unsaxify(a) 146 root = new_tree.getroot() 147 self.assertEqual('{blaA}a', 148 root.tag) 149 self.assertEqual('b', 150 root[0].tag) 151 152 new_tree = self._saxify_unsaxify(b) 153 root = new_tree.getroot() 154 self.assertEqual('b', 155 root.tag) 156 self.assertEqual(0, 157 len(root))
158
160 handler = sax.ElementTreeContentHandler() 161 handler.startDocument() 162 handler.startPrefixMapping(None, 'blaA') 163 handler.startElementNS(('blaA', 'a'), 'a', {}) 164 handler.startPrefixMapping(None, 'blaB') 165 handler.startElementNS(('blaB', 'b'), 'b', {}) 166 handler.endElementNS( ('blaB', 'b'), 'b') 167 handler.endPrefixMapping(None) 168 handler.startElementNS(('blaA', 'c'), 'c', {}) 169 handler.endElementNS( ('blaA', 'c'), 'c') 170 handler.endElementNS( ('blaA', 'a'), 'a') 171 handler.endPrefixMapping(None) 172 handler.endDocument() 173 174 new_tree = handler.etree 175 root = new_tree.getroot() 176 self.assertEqual('{blaA}a', 177 root.tag) 178 self.assertEqual('{blaB}b', 179 root[0].tag) 180 self.assertEqual('{blaA}c', 181 root[1].tag)
182
184 handler = sax.ElementTreeContentHandler() 185 handler.startDocument() 186 handler.startPrefixMapping(None, 'blaA') 187 handler.startElementNS((None, 'a'), 'a', {}) 188 handler.startPrefixMapping(None, 'blaB') 189 handler.startElementNS((None, 'b'), 'b', {}) 190 handler.endElementNS( (None, 'b'), 'b') 191 handler.endPrefixMapping(None) 192 handler.startElementNS((None, 'c'), 'c', {}) 193 handler.endElementNS( (None, 'c'), 'c') 194 handler.endElementNS( (None, 'a'), 'a') 195 handler.endPrefixMapping(None) 196 handler.endDocument() 197 198 new_tree = handler.etree 199 root = new_tree.getroot() 200 self.assertEqual('{blaA}a', 201 root.tag) 202 self.assertEqual('{blaB}b', 203 root[0].tag) 204 self.assertEqual('{blaA}c', 205 root[1].tag)
206
208 handler = sax.ElementTreeContentHandler() 209 handler.startDocument() 210 handler.startPrefixMapping('ns', 'blaA') 211 handler.startElementNS(('blaA', 'a'), 'ns:a', {}) 212 handler.startPrefixMapping('ns', 'blaB') 213 handler.startElementNS(('blaB', 'b'), 'ns:b', {}) 214 handler.endElementNS( ('blaB', 'b'), 'ns:b') 215 handler.endPrefixMapping('ns') 216 handler.startElementNS(('blaA', 'c'), 'ns:c', {}) 217 handler.endElementNS( ('blaA', 'c'), 'ns:c') 218 handler.endElementNS( ('blaA', 'a'), 'ns:a') 219 handler.endPrefixMapping('ns') 220 handler.endDocument() 221 222 new_tree = handler.etree 223 root = new_tree.getroot() 224 self.assertEqual('{blaA}a', 225 root.tag) 226 self.assertEqual('{blaB}b', 227 root[0].tag) 228 self.assertEqual('{blaA}c', 229 root[1].tag)
230
231 - def test_etree_sax_no_ns(self):
232 handler = sax.ElementTreeContentHandler() 233 handler.startDocument() 234 handler.startElement('a', {}) 235 handler.startElement('b', {}) 236 handler.endElement('b') 237 handler.startElement('c') # with empty attributes 238 handler.endElement('c') 239 handler.endElement('a') 240 handler.endDocument() 241 242 new_tree = handler.etree 243 root = new_tree.getroot() 244 self.assertEqual('a', root.tag) 245 self.assertEqual('b', root[0].tag) 246 self.assertEqual('c', root[1].tag)
247
249 handler = sax.ElementTreeContentHandler() 250 handler.startDocument() 251 handler.startElement('a', {"attr_a1": "a1"}) 252 handler.startElement('b', {"attr_b1": "b1"}) 253 handler.endElement('b') 254 handler.endElement('a') 255 handler.endDocument() 256 257 new_tree = handler.etree 258 root = new_tree.getroot() 259 self.assertEqual('a', root.tag) 260 self.assertEqual('b', root[0].tag) 261 self.assertEqual('a1', root.attrib["attr_a1"]) 262 self.assertEqual('b1', root[0].attrib["attr_b1"])
263
265 handler = sax.ElementTreeContentHandler() 266 handler.startDocument() 267 268 self.assertRaises(ValueError, 269 handler.startElement, 270 'a', {"blaA:attr_a1": "a1"} 271 )
272
273 - def test_etree_sax_error(self):
274 handler = sax.ElementTreeContentHandler() 275 handler.startDocument() 276 handler.startElement('a') 277 self.assertRaises(sax.SaxError, handler.endElement, 'b')
278
279 - def test_etree_sax_error2(self):
280 handler = sax.ElementTreeContentHandler() 281 handler.startDocument() 282 handler.startElement('a') 283 handler.startElement('b') 284 self.assertRaises(sax.SaxError, handler.endElement, 'a')
285
286 - def _saxify_unsaxify(self, saxifiable):
287 handler = sax.ElementTreeContentHandler() 288 sax.ElementTreeProducer(saxifiable, handler).saxify() 289 return handler.etree
290
291 - def _saxify_serialize(self, tree):
292 new_tree = self._saxify_unsaxify(tree) 293 f = BytesIO() 294 new_tree.write(f) 295 return f.getvalue().replace(_bytes('\n'), _bytes(''))
296 297
298 -class SimpleContentHandler(ContentHandler, object):
299 """A SAX content handler that just stores the events""" 300
301 - def __init__(self):
302 self.sax_events = [] 303 super(SimpleContentHandler, self).__init__()
304
305 - def startDocument(self):
306 self.sax_events.append(('startDocument',))
307
308 - def endDocument(self):
309 self.sax_events.append(('endDocument',))
310
311 - def startPrefixMapping(self, prefix, uri):
312 self.sax_events.append(('startPrefixMapping', prefix, uri))
313
314 - def endPrefixMapping(self, prefix):
315 self.sax_events.append(('endPrefixMapping', prefix))
316
317 - def startElement(self, name, attrs):
318 self.sax_events.append(('startElement', name, dict(attrs)))
319
320 - def endElement(self, name):
321 self.sax_events.append(('endElement', name))
322
323 - def startElementNS(self, name, qname, attrs):
324 self.sax_events.append(('startElementNS', name, qname, attrs._qnames))
325
326 - def endElementNS(self, name, qname):
327 self.sax_events.append(('endElementNS', name, qname))
328
329 - def characters(self, content):
330 self.sax_events.append(('characters', content))
331
332 - def ignorableWhitespace(self, whitespace):
333 self.sax_events.append(('ignorableWhitespace', whitespace))
334
335 - def processingInstruction(self, target, data):
336 self.sax_events.append(('processingInstruction', target, data))
337
338 - def skippedEntity(self, name):
339 self.sax_events.append(('skippedEntity', name))
340 341
342 -class NSPrefixSaxTestCase(HelperTestCase):
343 """Testing that namespaces generate the right SAX events""" 344
345 - def _saxify(self, tree):
346 handler = SimpleContentHandler() 347 sax.ElementTreeProducer(tree, handler).saxify() 348 return handler.sax_events
349
351 # The name of the prefix should be preserved, if the uri is unique 352 tree = self.parse('<a:a xmlns:a="blaA" xmlns:c="blaC">' 353 '<d a:attr="value" c:attr="value" /></a:a>') 354 a = tree.getroot() 355 356 self.assertEqual( 357 [('startElementNS', ('blaA', 'a'), 'a:a', {}), 358 ('startElementNS', (None, 'd'), 'd', 359 {('blaA', 'attr'): 'a:attr', ('blaC', 'attr'): 'c:attr'}), 360 ('endElementNS', (None, 'd'), 'd'), 361 ('endElementNS', ('blaA', 'a'), 'a:a'), 362 ], 363 self._saxify(a)[3:7])
364
366 # Default prefixes should also not get a generated prefix 367 tree = self.parse('<a xmlns="blaA"><b attr="value" /></a>') 368 a = tree.getroot() 369 370 self.assertEqual( 371 [('startDocument',), 372 # NS prefix should be None: 373 ('startPrefixMapping', None, 'blaA'), 374 ('startElementNS', ('blaA', 'a'), 'a', {}), 375 # Attribute prefix should be None: 376 ('startElementNS', ('blaA', 'b'), 'b', {(None, 'attr'): 'attr'}), 377 ('endElementNS', ('blaA', 'b'), 'b'), 378 ('endElementNS', ('blaA', 'a'), 'a'), 379 # Prefix should be None again: 380 ('endPrefixMapping', None), 381 ('endDocument',)], 382 self._saxify(a)) 383 384 # Except for attributes, if there is both a default namespace 385 # and a named namespace with the same uri 386 tree = self.parse('<a xmlns="bla" xmlns:a="bla">' 387 '<b a:attr="value" /></a>') 388 a = tree.getroot() 389 390 self.assertEqual( 391 ('startElementNS', ('bla', 'b'), 'b', {('bla', 'attr'): 'a:attr'}), 392 self._saxify(a)[4])
393
395 # Make an element with an doubly registered uri 396 tree = self.parse('<a xmlns:b="bla" xmlns:c="bla">' 397 '<d c:attr="attr" /></a>') 398 a = tree.getroot() 399 400 self.assertEqual( 401 # It should get the b prefix in this case 402 ('startElementNS', (None, 'd'), 'd', {('bla', 'attr'): 'b:attr'}), 403 self._saxify(a)[4])
404 405
406 -def test_suite():
407 suite = unittest.TestSuite() 408 suite.addTests([unittest.makeSuite(ETreeSaxTestCase)]) 409 suite.addTests([unittest.makeSuite(NSPrefixSaxTestCase)]) 410 suite.addTests( 411 [make_doctest('../../../doc/sax.txt')]) 412 return suite
413 414 415 if __name__ == '__main__': 416 print('to test use test.py %s' % __file__) 417