Package lxml :: Package tests :: Module test_dtd
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_dtd

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  Test cases related to DTD parsing and validation 
  5  """ 
  6   
  7  import unittest, sys 
  8   
  9  from .common_imports import ( 
 10      etree, html, BytesIO, _bytes, _str, 
 11      HelperTestCase, make_doctest, skipIf, 
 12      fileInTestDir, fileUrlInTestDir 
 13  ) 
14 15 16 -class ETreeDtdTestCase(HelperTestCase):
17 - def test_dtd(self):
18 pass
19
20 - def test_dtd_file(self):
21 parse = etree.parse 22 tree = parse(fileInTestDir("test.xml")) 23 root = tree.getroot() 24 25 dtd = etree.DTD(fileInTestDir("test.dtd")) 26 self.assertTrue(dtd.validate(root))
27
28 - def test_dtd_stringio(self):
29 root = etree.XML(_bytes("<b/>")) 30 dtd = etree.DTD(BytesIO("<!ELEMENT b EMPTY>")) 31 self.assertTrue(dtd.validate(root))
32
33 - def test_dtd_parse_invalid(self):
34 fromstring = etree.fromstring 35 parser = etree.XMLParser(dtd_validation=True) 36 xml = _bytes('<!DOCTYPE b SYSTEM "%s"><b><a/></b>' % 37 fileInTestDir("test.dtd")) 38 self.assertRaises(etree.XMLSyntaxError, 39 fromstring, xml, parser=parser)
40
42 fromstring = etree.fromstring 43 dtd_filename = fileUrlInTestDir("__nosuch.dtd") 44 parser = etree.XMLParser(dtd_validation=True) 45 xml = _bytes('<!DOCTYPE b SYSTEM "%s"><b><a/></b>' % dtd_filename) 46 self.assertRaises(etree.XMLSyntaxError, 47 fromstring, xml, parser=parser) 48 errors = None 49 try: 50 fromstring(xml, parser=parser) 51 except etree.XMLSyntaxError: 52 e = sys.exc_info()[1] 53 self.assertTrue(e.error_log) 54 self.assertTrue(parser.error_log) 55 errors = [entry.message for entry in e.error_log 56 if dtd_filename in entry.message] 57 self.assertTrue(errors)
58
59 - def test_dtd_parse_valid(self):
60 parser = etree.XMLParser(dtd_validation=True) 61 xml = ('<!DOCTYPE a SYSTEM "%s"><a><b/></a>' % 62 fileUrlInTestDir("test.dtd")) 63 root = etree.fromstring(xml, parser=parser)
64
66 parser = etree.XMLParser(dtd_validation=True) 67 xml = ('<!DOCTYPE a SYSTEM "%s"><a><b/></a>' % 68 fileUrlInTestDir("test.dtd")) 69 root = etree.fromstring(xml, parser=parser)
70
72 parser = etree.XMLParser(dtd_validation=True) 73 xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>' 74 root = etree.fromstring( 75 xml, parser=parser, base_url=fileUrlInTestDir("test.xml"))
76
78 parser = etree.XMLParser(dtd_validation=True) 79 xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>' 80 root = etree.fromstring( 81 xml, parser=parser, base_url=fileUrlInTestDir("test.xml"))
82
83 - def test_dtd_invalid(self):
84 root = etree.XML("<b><a/></b>") 85 dtd = etree.DTD(BytesIO("<!ELEMENT b EMPTY>")) 86 self.assertRaises(etree.DocumentInvalid, dtd.assertValid, root)
87
88 - def test_dtd_assertValid(self):
89 root = etree.XML("<b><a/></b>") 90 dtd = etree.DTD(BytesIO("<!ELEMENT b (a)><!ELEMENT a EMPTY>")) 91 dtd.assertValid(root)
92
93 - def test_dtd_internal(self):
94 root = etree.XML(_bytes(''' 95 <!DOCTYPE b SYSTEM "none" [ 96 <!ELEMENT b (a)> 97 <!ELEMENT a EMPTY> 98 ]> 99 <b><a/></b> 100 ''')) 101 dtd = etree.ElementTree(root).docinfo.internalDTD 102 self.assertTrue(dtd) 103 dtd.assertValid(root)
104
106 root = etree.XML(_bytes(''' 107 <!DOCTYPE b SYSTEM "none" [ 108 <!ELEMENT b (a)> 109 <!ELEMENT a (c)> 110 <!ELEMENT c EMPTY> 111 ]> 112 <b><a/></b> 113 ''')) 114 dtd = etree.ElementTree(root).docinfo.internalDTD 115 self.assertTrue(dtd) 116 self.assertFalse(dtd.validate(root))
117
119 root = etree.XML(_bytes(''' 120 <a><b id="id1"/><b id="id2"/><b id="id1"/></a> 121 ''')) 122 dtd = etree.DTD(BytesIO(_bytes(""" 123 <!ELEMENT a (b*)> 124 <!ATTLIST b 125 id ID #REQUIRED 126 > 127 <!ELEMENT b EMPTY> 128 """))) 129 self.assertFalse(dtd.validate(root)) 130 self.assertTrue(dtd.error_log) 131 self.assertTrue([error for error in dtd.error_log 132 if 'id1' in error.message])
133
134 - def test_dtd_api_internal(self):
135 root = etree.XML(_bytes(''' 136 <!DOCTYPE b SYSTEM "none" [ 137 <!ATTLIST a 138 attr1 (x | y | z) "z" 139 attr2 CDATA #FIXED "X" 140 > 141 <!ELEMENT b (a)> 142 <!ELEMENT a EMPTY> 143 ]> 144 <b><a/></b> 145 ''')) 146 dtd = etree.ElementTree(root).docinfo.internalDTD 147 self.assertTrue(dtd) 148 dtd.assertValid(root) 149 150 seen = [] 151 for el in dtd.iterelements(): 152 if el.name == 'a': 153 self.assertEqual(2, len(el.attributes())) 154 for attr in el.iterattributes(): 155 if attr.name == 'attr1': 156 self.assertEqual('enumeration', attr.type) 157 self.assertEqual('none', attr.default) 158 self.assertEqual('z', attr.default_value) 159 values = attr.values() 160 values.sort() 161 self.assertEqual(['x', 'y', 'z'], values) 162 else: 163 self.assertEqual('attr2', attr.name) 164 self.assertEqual('cdata', attr.type) 165 self.assertEqual('fixed', attr.default) 166 self.assertEqual('X', attr.default_value) 167 else: 168 self.assertEqual('b', el.name) 169 self.assertEqual(0, len(el.attributes())) 170 seen.append(el.name) 171 seen.sort() 172 self.assertEqual(['a', 'b'], seen) 173 self.assertEqual(2, len(dtd.elements()))
174
175 - def test_internal_dtds(self):
176 for el_count in range(2, 5): 177 for attr_count in range(4): 178 root = etree.XML(_bytes(''' 179 <!DOCTYPE el0 SYSTEM "none" [ 180 ''' + ''.join([''' 181 <!ATTLIST el%d 182 attr%d (x | y | z) "z" 183 > 184 ''' % (e, a) for a in range(attr_count) for e in range(el_count) 185 ]) + ''.join([''' 186 <!ELEMENT el%d EMPTY> 187 ''' % e for e in range(1, el_count) 188 ]) + ''' 189 ''' + '<!ELEMENT el0 (%s)>' % '|'.join([ 190 'el%d' % e for e in range(1, el_count)]) + ''' 191 ]> 192 <el0><el1 %s /></el0> 193 ''' % ' '.join(['attr%d="x"' % a for a in range(attr_count)]))) 194 dtd = etree.ElementTree(root).docinfo.internalDTD 195 self.assertTrue(dtd) 196 dtd.assertValid(root) 197 198 e = -1 199 for e, el in enumerate(dtd.iterelements()): 200 self.assertEqual(attr_count, len(el.attributes())) 201 a = -1 202 for a, attr in enumerate(el.iterattributes()): 203 self.assertEqual('enumeration', attr.type) 204 self.assertEqual('none', attr.default) 205 self.assertEqual('z', attr.default_value) 206 values = sorted(attr.values()) 207 self.assertEqual(['x', 'y', 'z'], values) 208 self.assertEqual(attr_count - 1, a) 209 self.assertEqual(el_count - 1, e) 210 self.assertEqual(el_count, len(dtd.elements()))
211
212 - def test_dtd_broken(self):
213 self.assertRaises(etree.DTDParseError, etree.DTD, 214 BytesIO("<!ELEMENT b HONKEY>"))
215
216 - def test_parse_file_dtd(self):
217 parser = etree.XMLParser(attribute_defaults=True) 218 219 tree = etree.parse(fileInTestDir('test.xml'), parser) 220 root = tree.getroot() 221 222 self.assertEqual( 223 "valueA", 224 root.get("default")) 225 self.assertEqual( 226 "valueB", 227 root[0].get("default"))
228 229 @skipIf(etree.LIBXML_VERSION == (2, 9, 0), 230 "DTD loading is broken for incremental parsing in libxml2 2.9.0")
232 iterparse = etree.iterparse 233 iterator = iterparse(fileInTestDir("test.xml"), events=('start',), 234 attribute_defaults=True) 235 attributes = [ element.get("default") 236 for event, element in iterator ] 237 self.assertEqual( 238 ["valueA", "valueB"], 239 attributes)
240 241 @skipIf(etree.LIBXML_VERSION == (2, 9, 0), 242 "DTD loading is broken for incremental parsing in libxml2 2.9.0")
244 iterparse = etree.iterparse 245 iterator = iterparse(fileInTestDir("test.xml"), events=('end',), 246 attribute_defaults=True) 247 attributes = [ element.get("default") 248 for event, element in iterator ] 249 self.assertEqual( 250 ["valueB", "valueA"], 251 attributes)
252
253 - def test_dtd_attrs(self):
254 dtd = etree.DTD(fileUrlInTestDir("test.dtd")) 255 256 # Test DTD.system_url attribute 257 self.assertTrue(dtd.system_url.endswith("test.dtd")) 258 259 # Test elements and their attributes 260 a = dtd.elements()[0] 261 self.assertEqual(a.name, "a") 262 self.assertEqual(a.type, "element") 263 self.assertEqual(a.content.name, "b") 264 self.assertEqual(a.content.type, "element") 265 self.assertEqual(a.content.occur, "once") 266 267 aattr = a.attributes()[0] 268 self.assertEqual(aattr.name, "default") 269 self.assertEqual(aattr.type, "enumeration") 270 self.assertEqual(aattr.values(), ["valueA", "valueB"]) 271 self.assertEqual(aattr.default_value, "valueA") 272 273 b = dtd.elements()[1] 274 self.assertEqual(b.name, "b") 275 self.assertEqual(b.type, "empty") 276 self.assertEqual(b.content, None) 277 278 # Test entities and their attributes 279 c = dtd.entities()[0] 280 self.assertEqual(c.name, "c") 281 self.assertEqual(c.orig, "&#42;") 282 self.assertEqual(c.content, "*") 283 284 # Test DTD.name attribute 285 root = etree.XML(_bytes(''' 286 <!DOCTYPE a SYSTEM "none" [ 287 <!ELEMENT a EMPTY> 288 ]> 289 <a/> 290 ''')) 291 dtd = etree.ElementTree(root).docinfo.internalDTD 292 self.assertEqual(dtd.name, "a") 293 294 # Test DTD.name and DTD.systemID attributes 295 parser = etree.XMLParser(dtd_validation=True) 296 xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>' 297 root = etree.fromstring(xml, parser=parser, 298 base_url=fileUrlInTestDir("test.xml")) 299 300 dtd = root.getroottree().docinfo.internalDTD 301 self.assertEqual(dtd.name, "a") 302 self.assertEqual(dtd.system_url, "test.dtd")
303
305 # Standard allows quotes in systemliteral, but in that case 306 # systemliteral must be escaped with single quotes. 307 # See http://www.w3.org/TR/REC-xml/#sec-prolog-dtd. 308 root = etree.XML('''<!DOCTYPE a PUBLIC 'foo' '"'><a/>''') 309 doc = root.getroottree() 310 self.assertEqual(doc.docinfo.doctype, 311 '''<!DOCTYPE a PUBLIC "foo" '"'>''') 312 self.assertEqual(etree.tostring(doc), 313 _bytes('''<!DOCTYPE a PUBLIC "foo" '"'>\n<a/>'''))
314
316 root = etree.XML('''<!DOCTYPE a SYSTEM '"'><a/>''') 317 doc = root.getroottree() 318 self.assertEqual(doc.docinfo.doctype, '''<!DOCTYPE a SYSTEM '"'>''') 319 self.assertEqual(etree.tostring(doc), 320 _bytes('''<!DOCTYPE a SYSTEM '"'>\n<a/>'''))
321
322 - def test_declaration_apos(self):
323 root = etree.XML('''<!DOCTYPE a SYSTEM "'"><a/>''') 324 doc = root.getroottree() 325 self.assertEqual(doc.docinfo.doctype, '''<!DOCTYPE a SYSTEM "'">''') 326 self.assertEqual(etree.tostring(doc), 327 _bytes('''<!DOCTYPE a SYSTEM "'">\n<a/>'''))
328
329 - def test_ietf_decl(self):
330 html_data = ( 331 '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">\n' 332 '<html></html>') 333 root = etree.HTML(html_data) 334 doc = root.getroottree() 335 self.assertEqual(doc.docinfo.doctype, 336 '<!DOCTYPE html PUBLIC "-//IETF//DTD HTML//EN">') 337 self.assertEqual(etree.tostring(doc, method='html'), _bytes(html_data))
338
339 - def test_set_decl_public(self):
340 doc = etree.Element('test').getroottree() 341 doc.docinfo.public_id = 'bar' 342 doc.docinfo.system_url = 'baz' 343 self.assertEqual(doc.docinfo.doctype, 344 '<!DOCTYPE test PUBLIC "bar" "baz">') 345 self.assertEqual(etree.tostring(doc), 346 _bytes('<!DOCTYPE test PUBLIC "bar" "baz">\n<test/>'))
347
348 - def test_html_decl(self):
349 # Slightly different to one above: when we create an html element, 350 # we do not start with a blank slate. 351 doc = html.Element('html').getroottree() 352 doc.docinfo.public_id = 'bar' 353 doc.docinfo.system_url = 'baz' 354 self.assertEqual(doc.docinfo.doctype, 355 '<!DOCTYPE html PUBLIC "bar" "baz">') 356 self.assertEqual(etree.tostring(doc), 357 _bytes('<!DOCTYPE html PUBLIC "bar" "baz">\n<html/>'))
358
359 - def test_clean_doctype(self):
360 doc = html.Element('html').getroottree() 361 self.assertTrue(doc.docinfo.doctype != '') 362 doc.docinfo.clear() 363 self.assertTrue(doc.docinfo.doctype == '')
364
365 - def test_set_decl_system(self):
366 doc = etree.Element('test').getroottree() 367 doc.docinfo.system_url = 'baz' 368 self.assertEqual(doc.docinfo.doctype, 369 '<!DOCTYPE test SYSTEM "baz">') 370 self.assertEqual(etree.tostring(doc), 371 _bytes('<!DOCTYPE test SYSTEM "baz">\n<test/>'))
372
373 - def test_empty_decl(self):
374 doc = etree.Element('test').getroottree() 375 doc.docinfo.public_id = None 376 self.assertEqual(doc.docinfo.doctype, 377 '<!DOCTYPE test>') 378 self.assertTrue(doc.docinfo.public_id is None) 379 self.assertTrue(doc.docinfo.system_url is None) 380 self.assertEqual(etree.tostring(doc), 381 _bytes('<!DOCTYPE test>\n<test/>'))
382
383 - def test_invalid_decl_1(self):
384 docinfo = etree.Element('test').getroottree().docinfo 385 386 def set_public_id(value): 387 docinfo.public_id = value
388 self.assertRaises(ValueError, set_public_id, _str('ä')) 389 self.assertRaises(ValueError, set_public_id, _str('qwerty ä asdf'))
390
391 - def test_invalid_decl_2(self):
392 docinfo = etree.Element('test').getroottree().docinfo 393 394 def set_system_url(value): 395 docinfo.system_url = value
396 self.assertRaises(ValueError, set_system_url, '\'"') 397 self.assertRaises(ValueError, set_system_url, '"\'') 398 self.assertRaises(ValueError, set_system_url, ' " \' ') 399
400 - def test_comment_before_dtd(self):
401 data = '<!--comment--><!DOCTYPE test>\n<!-- --><test/>' 402 doc = etree.fromstring(data).getroottree() 403 self.assertEqual(etree.tostring(doc), 404 _bytes(data))
405
406 407 -def test_suite():
408 suite = unittest.TestSuite() 409 suite.addTests([unittest.makeSuite(ETreeDtdTestCase)]) 410 suite.addTests( 411 [make_doctest('../../../doc/validation.txt')]) 412 return suite
413 414 if __name__ == '__main__': 415 print('to test use test.py %s' % __file__) 416