Package lxml :: Package tests :: Module test_sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_sax

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  Test cases related to SAX I/O 
  5  """ 
  6   
  7  import unittest, sys, os.path 
  8   
  9  this_dir = os.path.dirname(__file__) 
 10  if this_dir not in sys.path: 
 11      sys.path.insert(0, this_dir) # needed for Py3 
 12   
 13  from common_imports import HelperTestCase, make_doctest, BytesIO, _bytes 
 14  from lxml import sax 
 15  from xml.dom import pulldom 
 16  from xml.sax.handler import ContentHandler 
 17   
 18   
19 -class ETreeSaxTestCase(HelperTestCase):
20
21 - def test_etree_sax_simple(self):
22 tree = self.parse('<a>ab<b/>ba</a>') 23 xml_out = self._saxify_serialize(tree) 24 self.assertEqual(_bytes('<a>ab<b/>ba</a>'), 25 xml_out)
26
27 - def test_etree_sax_double(self):
28 tree = self.parse('<a>ab<b>bb</b>ba</a>') 29 xml_out = self._saxify_serialize(tree) 30 self.assertEqual(_bytes('<a>ab<b>bb</b>ba</a>'), 31 xml_out)
32
33 - def test_etree_sax_comment(self):
34 tree = self.parse('<a>ab<!-- TEST -->ba</a>') 35 xml_out = self._saxify_serialize(tree) 36 self.assertEqual(_bytes('<a>abba</a>'), 37 xml_out)
38
39 - def test_etree_sax_pi(self):
40 tree = self.parse('<a>ab<?this and that?>ba</a>') 41 xml_out = self._saxify_serialize(tree) 42 self.assertEqual(_bytes('<a>ab<?this and that?>ba</a>'), 43 xml_out)
44
46 tree = self.parse('<!-- TEST --><a>ab</a>') 47 xml_out = self._saxify_serialize(tree) 48 self.assertEqual(_bytes('<a>ab</a>'), 49 xml_out)
50
51 - def test_etree_sax_pi_root(self):
52 tree = self.parse('<?this and that?><a>ab</a>') 53 xml_out = self._saxify_serialize(tree) 54 self.assertEqual(_bytes('<?this and that?><a>ab</a>'), 55 xml_out)
56
58 tree = self.parse('<a aa="5">ab<b b="5"/>ba</a>') 59 xml_out = self._saxify_serialize(tree) 60 self.assertEqual(_bytes('<a aa="5">ab<b b="5"/>ba</a>'), 61 xml_out)
62
63 - def test_etree_sax_ns1(self):
64 tree = self.parse('<a xmlns="bla">ab<b>bb</b>ba</a>') 65 new_tree = self._saxify_unsaxify(tree) 66 root = new_tree.getroot() 67 self.assertEqual('{bla}a', 68 root.tag) 69 self.assertEqual('{bla}b', 70 root[0].tag)
71
72 - def test_etree_sax_ns2(self):
73 tree = self.parse('<a xmlns="blaA">ab<b:b xmlns:b="blaB">bb</b:b>ba</a>') 74 new_tree = self._saxify_unsaxify(tree) 75 root = new_tree.getroot() 76 self.assertEqual('{blaA}a', 77 root.tag) 78 self.assertEqual('{blaB}b', 79 root[0].tag)
80
81 - def test_sax_to_pulldom(self):
82 tree = self.parse('<a xmlns="blaA">ab<b:b xmlns:b="blaB">bb</b:b>ba</a>') 83 handler = pulldom.SAX2DOM() 84 sax.saxify(tree, handler) 85 dom = handler.document 86 87 self.assertEqual('a', 88 dom.firstChild.localName) 89 self.assertEqual('blaA', 90 dom.firstChild.namespaceURI) 91 self.assertEqual(None, 92 dom.firstChild.prefix) 93 94 children = dom.firstChild.childNodes 95 self.assertEqual('ab', 96 children[0].nodeValue) 97 self.assertEqual('blaB', 98 children[1].namespaceURI) 99 self.assertEqual('ba', 100 children[2].nodeValue)
101
103 tree = self.parse('<a xmlns="blaA" xmlns:a="blaA"></a>') 104 handler = pulldom.SAX2DOM() 105 sax.saxify(tree, handler) 106 dom = handler.document 107 108 # With multiple prefix definitions, the node should keep the one 109 # that was actually used, even if the others also are valid. 110 self.assertEqual('a', 111 dom.firstChild.localName) 112 self.assertEqual('blaA', 113 dom.firstChild.namespaceURI) 114 self.assertEqual(None, 115 dom.firstChild.prefix) 116 117 tree = self.parse('<a:a xmlns="blaA" xmlns:a="blaA"></a:a>') 118 handler = pulldom.SAX2DOM() 119 sax.saxify(tree, handler) 120 dom = handler.document 121 122 self.assertEqual('a', 123 dom.firstChild.localName) 124 self.assertEqual('blaA', 125 dom.firstChild.namespaceURI) 126 self.assertEqual('a', 127 dom.firstChild.prefix)
128
129 - def test_element_sax(self):
130 tree = self.parse('<a><b/></a>') 131 a = tree.getroot() 132 b = a[0] 133 134 xml_out = self._saxify_serialize(a) 135 self.assertEqual(_bytes('<a><b/></a>'), 136 xml_out) 137 138 xml_out = self._saxify_serialize(b) 139 self.assertEqual(_bytes('<b/>'), 140 xml_out)
141
142 - def test_element_sax_ns(self):
143 tree = self.parse('<a:a xmlns:a="blaA"><b/></a:a>') 144 a = tree.getroot() 145 b = a[0] 146 147 new_tree = self._saxify_unsaxify(a) 148 root = new_tree.getroot() 149 self.assertEqual('{blaA}a', 150 root.tag) 151 self.assertEqual('b', 152 root[0].tag) 153 154 new_tree = self._saxify_unsaxify(b) 155 root = new_tree.getroot() 156 self.assertEqual('b', 157 root.tag) 158 self.assertEqual(0, 159 len(root))
160
162 handler = sax.ElementTreeContentHandler() 163 handler.startDocument() 164 handler.startPrefixMapping(None, 'blaA') 165 handler.startElementNS(('blaA', 'a'), 'a', {}) 166 handler.startPrefixMapping(None, 'blaB') 167 handler.startElementNS(('blaB', 'b'), 'b', {}) 168 handler.endElementNS( ('blaB', 'b'), 'b') 169 handler.endPrefixMapping(None) 170 handler.startElementNS(('blaA', 'c'), 'c', {}) 171 handler.endElementNS( ('blaA', 'c'), 'c') 172 handler.endElementNS( ('blaA', 'a'), 'a') 173 handler.endPrefixMapping(None) 174 handler.endDocument() 175 176 new_tree = handler.etree 177 root = new_tree.getroot() 178 self.assertEqual('{blaA}a', 179 root.tag) 180 self.assertEqual('{blaB}b', 181 root[0].tag) 182 self.assertEqual('{blaA}c', 183 root[1].tag)
184
186 handler = sax.ElementTreeContentHandler() 187 handler.startDocument() 188 handler.startPrefixMapping(None, 'blaA') 189 handler.startElementNS((None, 'a'), 'a', {}) 190 handler.startPrefixMapping(None, 'blaB') 191 handler.startElementNS((None, 'b'), 'b', {}) 192 handler.endElementNS( (None, 'b'), 'b') 193 handler.endPrefixMapping(None) 194 handler.startElementNS((None, 'c'), 'c', {}) 195 handler.endElementNS( (None, 'c'), 'c') 196 handler.endElementNS( (None, 'a'), 'a') 197 handler.endPrefixMapping(None) 198 handler.endDocument() 199 200 new_tree = handler.etree 201 root = new_tree.getroot() 202 self.assertEqual('{blaA}a', 203 root.tag) 204 self.assertEqual('{blaB}b', 205 root[0].tag) 206 self.assertEqual('{blaA}c', 207 root[1].tag)
208
210 handler = sax.ElementTreeContentHandler() 211 handler.startDocument() 212 handler.startPrefixMapping('ns', 'blaA') 213 handler.startElementNS(('blaA', 'a'), 'ns:a', {}) 214 handler.startPrefixMapping('ns', 'blaB') 215 handler.startElementNS(('blaB', 'b'), 'ns:b', {}) 216 handler.endElementNS( ('blaB', 'b'), 'ns:b') 217 handler.endPrefixMapping('ns') 218 handler.startElementNS(('blaA', 'c'), 'ns:c', {}) 219 handler.endElementNS( ('blaA', 'c'), 'ns:c') 220 handler.endElementNS( ('blaA', 'a'), 'ns:a') 221 handler.endPrefixMapping('ns') 222 handler.endDocument() 223 224 new_tree = handler.etree 225 root = new_tree.getroot() 226 self.assertEqual('{blaA}a', 227 root.tag) 228 self.assertEqual('{blaB}b', 229 root[0].tag) 230 self.assertEqual('{blaA}c', 231 root[1].tag)
232
233 - def test_etree_sax_no_ns(self):
234 handler = sax.ElementTreeContentHandler() 235 handler.startDocument() 236 handler.startElement('a', {}) 237 handler.startElement('b', {}) 238 handler.endElement('b') 239 handler.startElement('c') # with empty attributes 240 handler.endElement('c') 241 handler.endElement('a') 242 handler.endDocument() 243 244 new_tree = handler.etree 245 root = new_tree.getroot() 246 self.assertEqual('a', root.tag) 247 self.assertEqual('b', root[0].tag) 248 self.assertEqual('c', root[1].tag)
249
251 handler = sax.ElementTreeContentHandler() 252 handler.startDocument() 253 handler.startElement('a', {"attr_a1": "a1"}) 254 handler.startElement('b', {"attr_b1": "b1"}) 255 handler.endElement('b') 256 handler.endElement('a') 257 handler.endDocument() 258 259 new_tree = handler.etree 260 root = new_tree.getroot() 261 self.assertEqual('a', root.tag) 262 self.assertEqual('b', root[0].tag) 263 self.assertEqual('a1', root.attrib["attr_a1"]) 264 self.assertEqual('b1', root[0].attrib["attr_b1"])
265
267 handler = sax.ElementTreeContentHandler() 268 handler.startDocument() 269 270 self.assertRaises(ValueError, 271 handler.startElement, 272 'a', {"blaA:attr_a1": "a1"} 273 )
274
275 - def test_etree_sax_error(self):
276 handler = sax.ElementTreeContentHandler() 277 handler.startDocument() 278 handler.startElement('a') 279 self.assertRaises(sax.SaxError, handler.endElement, 'b')
280
281 - def test_etree_sax_error2(self):
282 handler = sax.ElementTreeContentHandler() 283 handler.startDocument() 284 handler.startElement('a') 285 handler.startElement('b') 286 self.assertRaises(sax.SaxError, handler.endElement, 'a')
287
288 - def _saxify_unsaxify(self, saxifiable):
289 handler = sax.ElementTreeContentHandler() 290 sax.ElementTreeProducer(saxifiable, handler).saxify() 291 return handler.etree
292
293 - def _saxify_serialize(self, tree):
294 new_tree = self._saxify_unsaxify(tree) 295 f = BytesIO() 296 new_tree.write(f) 297 return f.getvalue().replace(_bytes('\n'), _bytes(''))
298 299
300 -class SimpleContentHandler(ContentHandler, object):
301 """A SAX content handler that just stores the events""" 302
303 - def __init__(self):
304 self.sax_events = [] 305 super(SimpleContentHandler, self).__init__()
306
307 - def startDocument(self):
308 self.sax_events.append(('startDocument',))
309
310 - def endDocument(self):
311 self.sax_events.append(('endDocument',))
312
313 - def startPrefixMapping(self, prefix, uri):
314 self.sax_events.append(('startPrefixMapping', prefix, uri))
315
316 - def endPrefixMapping(self, prefix):
317 self.sax_events.append(('endPrefixMapping', prefix))
318
319 - def startElement(self, name, attrs):
320 self.sax_events.append(('startElement', name, dict(attrs)))
321
322 - def endElement(self, name):
323 self.sax_events.append(('endElement', name))
324
325 - def startElementNS(self, name, qname, attrs):
326 self.sax_events.append(('startElementNS', name, qname, attrs._qnames))
327
328 - def endElementNS(self, name, qname):
329 self.sax_events.append(('endElementNS', name, qname))
330
331 - def characters(self, content):
332 self.sax_events.append(('characters', content))
333
334 - def ignorableWhitespace(self, whitespace):
335 self.sax_events.append(('ignorableWhitespace', whitespace))
336
337 - def processingInstruction(self, target, data):
338 self.sax_events.append(('processingInstruction', target, data))
339
340 - def skippedEntity(self, name):
341 self.sax_events.append(('skippedEntity', name))
342 343
344 -class NSPrefixSaxTestCase(HelperTestCase):
345 """Testing that namespaces generate the right SAX events""" 346
347 - def _saxify(self, tree):
348 handler = SimpleContentHandler() 349 sax.ElementTreeProducer(tree, handler).saxify() 350 return handler.sax_events
351
353 # The name of the prefix should be preserved, if the uri is unique 354 tree = self.parse('<a:a xmlns:a="blaA" xmlns:c="blaC">' 355 '<d a:attr="value" c:attr="value" /></a:a>') 356 a = tree.getroot() 357 358 self.assertEqual( 359 [('startElementNS', ('blaA', 'a'), 'a:a', {}), 360 ('startElementNS', (None, 'd'), 'd', 361 {('blaA', 'attr'): 'a:attr', ('blaC', 'attr'): 'c:attr'}), 362 ('endElementNS', (None, 'd'), 'd'), 363 ('endElementNS', ('blaA', 'a'), 'a:a'), 364 ], 365 self._saxify(a)[3:7])
366
368 # Default prefixes should also not get a generated prefix 369 tree = self.parse('<a xmlns="blaA"><b attr="value" /></a>') 370 a = tree.getroot() 371 372 self.assertEqual( 373 [('startDocument',), 374 # NS prefix should be None: 375 ('startPrefixMapping', None, 'blaA'), 376 ('startElementNS', ('blaA', 'a'), 'a', {}), 377 # Attribute prefix should be None: 378 ('startElementNS', ('blaA', 'b'), 'b', {(None, 'attr'): 'attr'}), 379 ('endElementNS', ('blaA', 'b'), 'b'), 380 ('endElementNS', ('blaA', 'a'), 'a'), 381 # Prefix should be None again: 382 ('endPrefixMapping', None), 383 ('endDocument',)], 384 self._saxify(a)) 385 386 # Except for attributes, if there is both a default namespace 387 # and a named namespace with the same uri 388 tree = self.parse('<a xmlns="bla" xmlns:a="bla">' 389 '<b a:attr="value" /></a>') 390 a = tree.getroot() 391 392 self.assertEqual( 393 ('startElementNS', ('bla', 'b'), 'b', {('bla', 'attr'): 'a:attr'}), 394 self._saxify(a)[4])
395
397 # Make an element with an doubly registered uri 398 tree = self.parse('<a xmlns:b="bla" xmlns:c="bla">' 399 '<d c:attr="attr" /></a>') 400 a = tree.getroot() 401 402 self.assertEqual( 403 # It should get the b prefix in this case 404 ('startElementNS', (None, 'd'), 'd', {('bla', 'attr'): 'b:attr'}), 405 self._saxify(a)[4])
406 407
408 -def test_suite():
409 suite = unittest.TestSuite() 410 suite.addTests([unittest.makeSuite(ETreeSaxTestCase)]) 411 suite.addTests([unittest.makeSuite(NSPrefixSaxTestCase)]) 412 suite.addTests( 413 [make_doctest('../../../doc/sax.txt')]) 414 return suite
415 416 417 if __name__ == '__main__': 418 print('to test use test.py %s' % __file__) 419