Package lxml :: Package tests :: Module test_xmlschema
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_xmlschema

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  Test cases related to XML Schema parsing and validation 
  5  """ 
  6   
  7  import unittest, sys, os.path 
  8   
  9  this_dir = os.path.dirname(__file__) 
 10  if this_dir not in sys.path: 
 11      sys.path.insert(0, this_dir) # needed for Py3 
 12   
 13  from common_imports import etree, BytesIO, HelperTestCase, fileInTestDir 
 14  from common_imports import doctest, make_doctest 
 15   
 16   
17 -class ETreeXMLSchemaTestCase(HelperTestCase):
18 - def test_xmlschema(self):
19 tree_valid = self.parse('<a><b></b></a>') 20 tree_invalid = self.parse('<a><c></c></a>') 21 schema = self.parse(''' 22 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 23 <xsd:element name="a" type="AType"/> 24 <xsd:complexType name="AType"> 25 <xsd:sequence> 26 <xsd:element name="b" type="xsd:string" /> 27 </xsd:sequence> 28 </xsd:complexType> 29 </xsd:schema> 30 ''') 31 schema = etree.XMLSchema(schema) 32 self.assertTrue(schema.validate(tree_valid)) 33 self.assertFalse(schema.validate(tree_invalid)) 34 self.assertTrue(schema.validate(tree_valid)) # retry valid 35 self.assertFalse(schema.validate(tree_invalid)) # retry invalid
36
37 - def test_xmlschema_error_log(self):
38 tree_valid = self.parse('<a><b></b></a>') 39 tree_invalid = self.parse('<a><c></c></a>') 40 schema = self.parse(''' 41 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 42 <xsd:element name="a" type="AType"/> 43 <xsd:complexType name="AType"> 44 <xsd:sequence> 45 <xsd:element name="b" type="xsd:string" /> 46 </xsd:sequence> 47 </xsd:complexType> 48 </xsd:schema> 49 ''') 50 schema = etree.XMLSchema(schema) 51 self.assertTrue(schema.validate(tree_valid)) 52 self.assertFalse(schema.error_log.filter_from_errors()) 53 54 self.assertFalse(schema.validate(tree_invalid)) 55 self.assertTrue(schema.error_log.filter_from_errors()) 56 self.assertTrue(schema.error_log.filter_types( 57 etree.ErrorTypes.SCHEMAV_ELEMENT_CONTENT)) 58 59 self.assertTrue(schema.validate(tree_valid)) 60 self.assertFalse(schema.error_log.filter_from_errors()) 61 62 self.assertFalse(schema.validate(tree_invalid)) 63 self.assertTrue(schema.error_log.filter_from_errors()) 64 self.assertTrue(schema.error_log.filter_types( 65 etree.ErrorTypes.SCHEMAV_ELEMENT_CONTENT))
66
68 """We don't have a guarantee that there will always be a path 69 for a _LogEntry object (or even a node for which to determina 70 a path), but at least when this test was created schema validation 71 errors always got a node and an XPath value. If that ever changes, 72 we can modify this test to something like: 73 self.assertTrue(error_path is None or tree_path == error_path) 74 That way, we can at least verify that if we did get a path value 75 it wasn't bogus. 76 """ 77 tree = self.parse('<a><b>42</b><b>dada</b></a>') 78 schema = self.parse(''' 79 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 80 <xsd:element name="a" type="AType"/> 81 <xsd:complexType name="AType"> 82 <xsd:sequence> 83 <xsd:element name="b" type="xsd:integer" maxOccurs="2"/> 84 </xsd:sequence> 85 </xsd:complexType> 86 </xsd:schema> 87 ''') 88 schema = etree.XMLSchema(schema) 89 schema.validate(tree) 90 tree_path = tree.getpath(tree.findall('b')[1]) 91 error_path = schema.error_log[0].path 92 self.assertTrue(tree_path == error_path)
93
95 schema = self.parse(''' 96 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 97 <xsd:element name="a" type="AType"/> 98 <xsd:complexType name="AType"> 99 <xsd:sequence minOccurs="4" maxOccurs="4"> 100 <xsd:element name="b" type="BType" /> 101 </xsd:sequence> 102 </xsd:complexType> 103 <xsd:complexType name="BType"> 104 <xsd:attribute name="hardy" type="xsd:string" default="hey" /> 105 </xsd:complexType> 106 </xsd:schema> 107 ''') 108 schema = etree.XMLSchema(schema, attribute_defaults=True) 109 110 tree = self.parse('<a><b hardy="ho"/><b/><b hardy="ho"/><b/></a>') 111 112 root = tree.getroot() 113 self.assertEqual('ho', root[0].get('hardy')) 114 self.assertEqual(None, root[1].get('hardy')) 115 self.assertEqual('ho', root[2].get('hardy')) 116 self.assertEqual(None, root[3].get('hardy')) 117 118 self.assertTrue(schema(tree)) 119 120 root = tree.getroot() 121 self.assertEqual('ho', root[0].get('hardy')) 122 self.assertEqual('hey', root[1].get('hardy')) 123 self.assertEqual('ho', root[2].get('hardy')) 124 self.assertEqual('hey', root[3].get('hardy'))
125
126 - def test_xmlschema_parse(self):
127 schema = self.parse(''' 128 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 129 <xsd:element name="a" type="AType"/> 130 <xsd:complexType name="AType"> 131 <xsd:sequence> 132 <xsd:element name="b" type="xsd:string" /> 133 </xsd:sequence> 134 </xsd:complexType> 135 </xsd:schema> 136 ''') 137 schema = etree.XMLSchema(schema) 138 parser = etree.XMLParser(schema=schema) 139 140 tree_valid = self.parse('<a><b></b></a>', parser=parser) 141 self.assertEqual('a', tree_valid.getroot().tag) 142 143 self.assertRaises(etree.XMLSyntaxError, 144 self.parse, '<a><c></c></a>', parser=parser)
145
147 # does not work as of libxml2 2.7.3 148 schema = self.parse(''' 149 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 150 <xsd:element name="a" type="AType"/> 151 <xsd:complexType name="AType"> 152 <xsd:sequence minOccurs="4" maxOccurs="4"> 153 <xsd:element name="b" type="BType" /> 154 </xsd:sequence> 155 </xsd:complexType> 156 <xsd:complexType name="BType"> 157 <xsd:attribute name="hardy" type="xsd:string" default="hey" /> 158 </xsd:complexType> 159 </xsd:schema> 160 ''') 161 schema = etree.XMLSchema(schema) 162 parser = etree.XMLParser(schema=schema, attribute_defaults=True) 163 164 tree_valid = self.parse('<a><b hardy="ho"/><b/><b hardy="ho"/><b/></a>', 165 parser=parser) 166 root = tree_valid.getroot() 167 self.assertEqual('ho', root[0].get('hardy')) 168 self.assertEqual('hey', root[1].get('hardy')) 169 self.assertEqual('ho', root[2].get('hardy')) 170 self.assertEqual('hey', root[3].get('hardy'))
171
173 # does not work as of libxml2 2.7.3 174 schema = self.parse(''' 175 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 176 <xsd:element name="a" type="AType"/> 177 <xsd:complexType name="AType"> 178 <xsd:sequence minOccurs="4" maxOccurs="4"> 179 <xsd:element name="b" type="BType" /> 180 </xsd:sequence> 181 </xsd:complexType> 182 <xsd:complexType name="BType"> 183 <xsd:attribute name="hardy" type="xsd:string" default="hey" /> 184 </xsd:complexType> 185 </xsd:schema> 186 ''') 187 schema = etree.XMLSchema(schema, attribute_defaults=True) 188 parser = etree.XMLParser(schema=schema) 189 190 tree_valid = self.parse('<a><b hardy="ho"/><b/><b hardy="ho"/><b/></a>', 191 parser=parser) 192 root = tree_valid.getroot() 193 self.assertEqual('ho', root[0].get('hardy')) 194 self.assertEqual('hey', root[1].get('hardy')) 195 self.assertEqual('ho', root[2].get('hardy')) 196 self.assertEqual('hey', root[3].get('hardy'))
197
199 # does not work as of libxml2 2.7.3 200 schema = self.parse(''' 201 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 202 <xsd:element name="a" type="AType"/> 203 <xsd:complexType name="AType"> 204 <xsd:sequence minOccurs="3" maxOccurs="3"> 205 <xsd:element name="b" type="BType" /> 206 </xsd:sequence> 207 </xsd:complexType> 208 <xsd:complexType name="BType"> 209 <xsd:attribute name="hardy" type="xsd:string" fixed="hey" /> 210 </xsd:complexType> 211 </xsd:schema> 212 ''') 213 schema = etree.XMLSchema(schema) 214 parser = etree.XMLParser(schema=schema, attribute_defaults=True) 215 216 tree_valid = self.parse('<a><b/><b hardy="hey"/><b/></a>', 217 parser=parser) 218 root = tree_valid.getroot() 219 self.assertEqual('hey', root[0].get('hardy')) 220 self.assertEqual('hey', root[1].get('hardy')) 221 self.assertEqual('hey', root[2].get('hardy'))
222
223 - def test_xmlschema_stringio(self):
224 schema_file = BytesIO(''' 225 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 226 <xsd:element name="a" type="AType"/> 227 <xsd:complexType name="AType"> 228 <xsd:sequence> 229 <xsd:element name="b" type="xsd:string" /> 230 </xsd:sequence> 231 </xsd:complexType> 232 </xsd:schema> 233 ''') 234 schema = etree.XMLSchema(file=schema_file) 235 parser = etree.XMLParser(schema=schema) 236 237 tree_valid = self.parse('<a><b></b></a>', parser=parser) 238 self.assertEqual('a', tree_valid.getroot().tag) 239 240 self.assertRaises(etree.XMLSyntaxError, 241 self.parse, '<a><c></c></a>', parser=parser)
242
243 - def test_xmlschema_iterparse(self):
244 schema = self.parse(''' 245 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 246 <xsd:element name="a" type="AType"/> 247 <xsd:complexType name="AType"> 248 <xsd:sequence> 249 <xsd:element name="b" type="xsd:string" /> 250 </xsd:sequence> 251 </xsd:complexType> 252 </xsd:schema> 253 ''') 254 schema = etree.XMLSchema(schema) 255 xml = BytesIO('<a><b></b></a>') 256 events = [ (event, el.tag) 257 for (event, el) in etree.iterparse(xml, schema=schema) ] 258 259 self.assertEqual([('end', 'b'), ('end', 'a')], 260 events)
261
263 schema = self.parse(''' 264 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 265 <xsd:element name="a" type="AType"/> 266 <xsd:complexType name="AType"> 267 <xsd:sequence> 268 <xsd:element name="b" type="xsd:string" /> 269 </xsd:sequence> 270 </xsd:complexType> 271 </xsd:schema> 272 ''') 273 schema = etree.XMLSchema(schema) 274 self.assertRaises( 275 etree.XMLSyntaxError, 276 list, etree.iterparse(BytesIO('<a><c></c></a>'), schema=schema))
277
279 self.assertRaises(ValueError, etree.XMLSchema, etree.ElementTree())
280
282 self.assertRaises(ValueError, etree.XMLSchema, etree.Comment('TEST'))
283
285 schema = self.parse(''' 286 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 287 <xsd:element name="a" type="xsd:string"/> 288 </xsd:schema> 289 ''') 290 schema = etree.XMLSchema(schema) 291 292 root = etree.Element('a') 293 root.text = 'TEST' 294 self.assertTrue(schema(root)) 295 296 self.assertRaises(ValueError, schema, etree.Comment('TEST')) 297 self.assertRaises(ValueError, schema, etree.PI('a', 'text')) 298 self.assertRaises(ValueError, schema, etree.Entity('text'))
299
301 schema = self.parse('''\ 302 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 303 <element name="a" type="AType"/> 304 <xsd:complexType name="AType"> 305 <xsd:sequence> 306 <xsd:element name="b" type="xsd:string" /> 307 </xsd:sequence> 308 </xsd:complexType> 309 </xsd:schema> 310 ''') 311 self.assertRaises(etree.XMLSchemaParseError, 312 etree.XMLSchema, schema)
313
315 schema = self.parse('<test/>') 316 self.assertRaises(etree.XMLSchemaParseError, 317 etree.XMLSchema, schema)
318
319 - def test_xmlschema_file(self):
320 # this will only work if we access the file through path or 321 # file object.. 322 f = open(fileInTestDir('test.xsd'), 'rb') 323 try: 324 schema = etree.XMLSchema(file=f) 325 finally: 326 f.close() 327 tree_valid = self.parse('<a><b></b></a>') 328 self.assertTrue(schema.validate(tree_valid))
329
331 # this will only work if we access the file through path or 332 # file object.. 333 schema = etree.XMLSchema(file=fileInTestDir('test_import.xsd')) 334 tree_valid = self.parse( 335 '<a:x xmlns:a="http://codespeak.net/lxml/schema/ns1"><b></b></a:x>') 336 self.assertTrue(schema.validate(tree_valid))
337
338 - def test_xmlschema_shortcut(self):
339 tree_valid = self.parse('<a><b></b></a>') 340 tree_invalid = self.parse('<a><c></c></a>') 341 schema = self.parse('''\ 342 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> 343 <xsd:element name="a" type="AType"/> 344 <xsd:complexType name="AType"> 345 <xsd:sequence> 346 <xsd:element name="b" type="xsd:string" /> 347 </xsd:sequence> 348 </xsd:complexType> 349 </xsd:schema> 350 ''') 351 self.assertTrue(tree_valid.xmlschema(schema)) 352 self.assertFalse(tree_invalid.xmlschema(schema))
353
355 # this used to crash because the schema part was not properly copied out 356 wsdl = self.parse('''\ 357 <wsdl:definitions 358 xmlns:wsdl="http://schemas.xmlsoap.org/wsdl/" 359 xmlns:xs="http://www.w3.org/2001/XMLSchema"> 360 <wsdl:types> 361 <xs:schema> 362 </xs:schema> 363 </wsdl:types> 364 </wsdl:definitions> 365 ''') 366 schema_element = wsdl.find( 367 "{http://schemas.xmlsoap.org/wsdl/}types/" 368 "{http://www.w3.org/2001/XMLSchema}schema" 369 ) 370 etree.XMLSchema(schema_element) 371 etree.XMLSchema(schema_element) 372 etree.XMLSchema(schema_element)
373 374
375 -class ETreeXMLSchemaResolversTestCase(HelperTestCase):
376 resolver_schema_int = BytesIO("""\ 377 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" 378 xmlns:etype="http://codespeak.net/lxml/test/external" 379 targetNamespace="http://codespeak.net/lxml/test/internal"> 380 <xsd:import namespace="http://codespeak.net/lxml/test/external" schemaLocation="XXX.xsd" /> 381 <xsd:element name="a" type="etype:AType"/> 382 </xsd:schema>""") 383 384 resolver_schema_int2 = BytesIO("""\ 385 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" 386 xmlns:etype="http://codespeak.net/lxml/test/external" 387 targetNamespace="http://codespeak.net/lxml/test/internal"> 388 <xsd:import namespace="http://codespeak.net/lxml/test/external" schemaLocation="YYY.xsd" /> 389 <xsd:element name="a" type="etype:AType"/> 390 </xsd:schema>""") 391 392 resolver_schema_ext = """\ 393 <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" 394 targetNamespace="http://codespeak.net/lxml/test/external"> 395 <xsd:complexType name="AType"> 396 <xsd:sequence><xsd:element name="b" type="xsd:string" minOccurs="0" maxOccurs="unbounded" /></xsd:sequence> 397 </xsd:complexType> 398 </xsd:schema>""" 399
400 - class simple_resolver(etree.Resolver):
401 - def __init__(self, schema):
402 self.schema = schema
403
404 - def resolve(self, url, id, context):
405 assert url == 'XXX.xsd' 406 return self.resolve_string(self.schema, context)
407 408 # tests: 409
410 - def test_xmlschema_resolvers(self):
411 # test that resolvers work with schema. 412 parser = etree.XMLParser() 413 parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext)) 414 schema_doc = etree.parse(self.resolver_schema_int, parser = parser) 415 schema = etree.XMLSchema(schema_doc)
416
418 # test that the default resolver will get called if there's no 419 # specific parser resolver. 420 root_resolver = self.simple_resolver(self.resolver_schema_ext) 421 etree.get_default_parser().resolvers.add(root_resolver) 422 schema_doc = etree.parse(self.resolver_schema_int) 423 schema = etree.XMLSchema(schema_doc) 424 etree.get_default_parser().resolvers.remove(root_resolver)
425
427 # test that the default resolver will not get called when a 428 # more specific resolver is registered. 429 430 class res_root(etree.Resolver): 431 def resolve(self, url, id, context): 432 assert False 433 return None
434 435 root_resolver = res_root() 436 etree.get_default_parser().resolvers.add(root_resolver) 437 438 parser = etree.XMLParser() 439 parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext)) 440 441 schema_doc = etree.parse(self.resolver_schema_int, parser = parser) 442 schema = etree.XMLSchema(schema_doc) 443 etree.get_default_parser().resolvers.remove(root_resolver) 444
445 - def test_xmlschema_nested_resolvers(self):
446 # test that resolvers work in a nested fashion. 447 448 resolver_schema = self.resolver_schema_ext 449 450 class res_nested(etree.Resolver): 451 def __init__(self, ext_schema): 452 self.ext_schema = ext_schema
453 454 def resolve(self, url, id, context): 455 assert url == 'YYY.xsd' 456 return self.resolve_string(self.ext_schema, context) 457 458 class res(etree.Resolver): 459 def __init__(self, ext_schema_1, ext_schema_2): 460 self.ext_schema_1 = ext_schema_1 461 self.ext_schema_2 = ext_schema_2 462 463 def resolve(self, url, id, context): 464 assert url == 'XXX.xsd' 465 466 new_parser = etree.XMLParser() 467 new_parser.resolvers.add(res_nested(self.ext_schema_2)) 468 new_schema_doc = etree.parse(self.ext_schema_1, parser = new_parser) 469 new_schema = etree.XMLSchema(new_schema_doc) 470 471 return self.resolve_string(resolver_schema, context) 472 473 parser = etree.XMLParser() 474 parser.resolvers.add(res(self.resolver_schema_int2, self.resolver_schema_ext)) 475 schema_doc = etree.parse(self.resolver_schema_int, parser = parser) 476 schema = etree.XMLSchema(schema_doc) 477 478
479 -def test_suite():
480 suite = unittest.TestSuite() 481 suite.addTests([unittest.makeSuite(ETreeXMLSchemaTestCase)]) 482 suite.addTests([unittest.makeSuite(ETreeXMLSchemaResolversTestCase)]) 483 suite.addTests( 484 [make_doctest('../../../doc/validation.txt')]) 485 return suite
486 487 488 if __name__ == '__main__': 489 print('to test use test.py %s' % __file__) 490