Package lxml :: Package isoschematron
[hide private]
[frames] | no frames]

Source Code for Package lxml.isoschematron

  1  """The ``lxml.isoschematron`` package implements ISO Schematron support on top 
  2  of the pure-xslt 'skeleton' implementation. 
  3  """ 
  4   
  5  import sys 
  6  import os.path 
  7  from lxml import etree as _etree # due to validator __init__ signature 
  8   
  9   
 10  # some compat stuff, borrowed from lxml.html 
 11  try: 
 12      bytes 
 13  except NameError: 
 14      # Python < 2.6 
 15      bytes = str 
 16  try: 
 17      unicode 
 18  except NameError: 
 19      # Python 3 
 20      unicode = str 
 21  try: 
 22      basestring 
 23  except NameError: 
 24      # Python 3 
 25      basestring = str 
 26   
 27   
 28  __all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include', 
 29             'iso_abstract_expand', 'iso_svrl_for_xslt1', 
 30             'svrl_validation_errors', 'schematron_schema_valid', 
 31             'stylesheet_params', 'Schematron']  
 32   
 33   
 34  # some namespaces 
 35  #FIXME: Maybe lxml should provide a dedicated place for common namespace 
 36  #FIXME: definitions? 
 37  XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema" 
 38  RELAXNG_NS = "http://relaxng.org/ns/structure/1.0" 
 39  SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron" 
 40  SVRL_NS = "http://purl.oclc.org/dsdl/svrl" 
 41   
 42   
 43  # some helpers 
 44  _schematron_root = '{%s}schema' % SCHEMATRON_NS 
 45  _xml_schema_root = '{%s}schema' % XML_SCHEMA_NS 
 46  _resources_dir = os.path.join(os.path.dirname(__file__), 'resources') 
 47   
 48   
 49  # the iso-schematron skeleton implementation steps aka xsl transformations 
 50  extract_xsd = _etree.XSLT(_etree.parse( 
 51      os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl'))) 
 52  extract_rng = _etree.XSLT(_etree.parse( 
 53      os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl'))) 
 54  iso_dsdl_include = _etree.XSLT(_etree.parse( 
 55      os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', 
 56                   'iso_dsdl_include.xsl'))) 
 57  iso_abstract_expand = _etree.XSLT(_etree.parse( 
 58      os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', 
 59                   'iso_abstract_expand.xsl'))) 
 60  iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( 
 61      os.path.join(_resources_dir, 
 62                   'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl'))) 
 63   
 64   
 65  # svrl result accessors 
 66  svrl_validation_errors = _etree.XPath( 
 67      '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) 
 68   
 69   
 70  # RelaxNG validator for schematron schemas 
 71  schematron_schema_valid = _etree.RelaxNG(_etree.parse( 
 72      os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))) 
 73   
 74   
75 -def stylesheet_params(**kwargs):
76 """Convert keyword args to a dictionary of stylesheet parameters. 77 XSL stylesheet parameters must be XPath expressions, i.e.: 78 79 * string expressions, like "'5'" 80 * simple (number) expressions, like "5" 81 * valid XPath expressions, like "/a/b/text()" 82 83 This function converts native Python keyword arguments to stylesheet 84 parameters following these rules: 85 If an arg is a string wrap it with XSLT.strparam(). 86 If an arg is an XPath object use its path string. 87 If arg is None raise TypeError. 88 Else convert arg to string. 89 """ 90 result = {} 91 for key, val in kwargs.items(): 92 if isinstance(val, basestring): 93 val = _etree.XSLT.strparam(val) 94 elif val is None: 95 raise TypeError('None not allowed as a stylesheet parameter') 96 elif not isinstance(val, _etree.XPath): 97 val = unicode(val) 98 result[key] = val 99 return result
100 101 102 # helper function for use in Schematron __init__
103 -def _stylesheet_param_dict(paramsDict, kwargsDict):
104 """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as 105 stylesheet arguments. 106 kwargsDict entries with a value of None are ignored. 107 """ 108 # beware of changing mutable default arg 109 paramsDict = dict(paramsDict) 110 for k, v in kwargsDict.items(): 111 if v is not None: # None values do not override 112 paramsDict[k] = v 113 paramsDict = stylesheet_params(**paramsDict) 114 return paramsDict
115 116
117 -class Schematron(_etree._Validator):
118 """An ISO Schematron validator. 119 120 Pass a root Element or an ElementTree to turn it into a validator. 121 Alternatively, pass a filename as keyword argument 'file' to parse from 122 the file system. 123 Built on the Schematron language 'reference' skeleton pure-xslt 124 implementation, the validator is created as an XSLT 1.0 stylesheet using 125 these steps: 126 127 0) (Extract from XML Schema or RelaxNG schema) 128 1) Process inclusions 129 2) Process abstract patterns 130 3) Compile the schematron schema to XSLT 131 132 The ``include`` and ``expand`` keyword arguments can be used to switch off 133 steps 1) and 2). 134 To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the 135 keyword arguments ``include_params``, ``expand_params`` or 136 ``compile_params``. 137 For convenience, the compile-step parameter ``phase`` is also exposed as a 138 keyword argument ``phase``. This takes precedence if the parameter is also 139 given in the parameter dictionary. 140 If ``store_schematron`` is set to True, the (included-and-expanded) 141 schematron document tree is stored and available through the ``schematron`` 142 property. 143 If ``store_xslt`` is set to True, the validation XSLT document tree will be 144 stored and can be retrieved through the ``validator_xslt`` property. 145 With ``store_report`` set to True (default: False), the resulting validation 146 report document gets stored and can be accessed as the ``validation_report`` 147 property. 148 149 Schematron is a less well known, but very powerful schema language. The main 150 idea is to use the capabilities of XPath to put restrictions on the structure 151 and the content of XML documents. Here is a simple example:: 152 153 >>> from lxml import isoschematron 154 >>> schematron = isoschematron.Schematron(etree.XML(''' 155 ... <schema xmlns="http://purl.oclc.org/dsdl/schematron" > 156 ... <pattern id="id_only_attribute"> 157 ... <title>id is the only permitted attribute name</title> 158 ... <rule context="*"> 159 ... <report test="@*[not(name()='id')]">Attribute 160 ... <name path="@*[not(name()='id')]"/> is forbidden<name/> 161 ... </report> 162 ... </rule> 163 ... </pattern> 164 ... </schema> 165 ... ''')) 166 167 >>> xml = etree.XML(''' 168 ... <AAA name="aaa"> 169 ... <BBB id="bbb"/> 170 ... <CCC color="ccc"/> 171 ... </AAA> 172 ... ''') 173 174 >>> schematron.validate(xml) 175 0 176 177 >>> xml = etree.XML(''' 178 ... <AAA id="aaa"> 179 ... <BBB id="bbb"/> 180 ... <CCC/> 181 ... </AAA> 182 ... ''') 183 184 >>> schematron.validate(xml) 185 1 186 """ 187 188 # libxml2 error categorization for validation errors 189 _domain = _etree.ErrorDomains.SCHEMATRONV 190 _level = _etree.ErrorLevels.ERROR 191 _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT 192
193 - def _extract(self, element):
194 """Extract embedded schematron schema from non-schematron host schema. 195 This method will only be called by __init__ if the given schema document 196 is not a schematron schema by itself. 197 Must return a schematron schema document tree or None. 198 """ 199 schematron = None 200 if element.tag == _xml_schema_root: 201 schematron = self._extract_xsd(element) 202 elif element.nsmap[element.prefix] == RELAXNG_NS: 203 # RelaxNG does not have a single unique root element 204 schematron = self._extract_rng(element) 205 return schematron
206 207 # customization points 208 # etree.XSLT objects that provide the extract, include, expand, compile 209 # steps 210 _extract_xsd = extract_xsd 211 _extract_rng = extract_rng 212 _include = iso_dsdl_include 213 _expand = iso_abstract_expand 214 _compile = iso_svrl_for_xslt1 215 # etree.XPath object that determines input document validity when applied to 216 # the svrl result report; must return a list of result elements (empty if 217 # valid) 218 _validation_errors = svrl_validation_errors 219
220 - def __init__(self, etree=None, file=None, include=True, expand=True, 221 include_params={}, expand_params={}, compile_params={}, 222 store_schematron=False, store_xslt=False, store_report=False, 223 phase=None):
224 super(Schematron, self).__init__() 225 226 self._store_report = store_report 227 self._schematron = None 228 self._validator_xslt = None 229 self._validation_report = None 230 231 # parse schema document, may be a schematron schema or an XML Schema or 232 # a RelaxNG schema with embedded schematron rules 233 try: 234 if etree is not None: 235 if isinstance(etree, _etree._Element): 236 root = etree 237 else: 238 root = etree.getroot() 239 elif file is not None: 240 root = _etree.parse(file).getroot() 241 except Exception: 242 raise _etree.SchematronParseError( 243 "No tree or file given: %s" % sys.exc_info()[1]) 244 if root is None: 245 raise ValueError("Empty tree") 246 if root.tag == _schematron_root: 247 schematron = root 248 else: 249 schematron = self._extract(root) 250 if schematron is None: 251 raise _etree.SchematronParseError( 252 "Document is not a schematron schema or schematron-extractable") 253 # perform the iso-schematron skeleton implementation steps to get a 254 # validating xslt 255 if include: 256 schematron = self._include(schematron, **include_params) 257 if expand: 258 schematron = self._expand(schematron, **expand_params) 259 if not schematron_schema_valid(schematron): 260 raise _etree.SchematronParseError( 261 "invalid schematron schema: %s" % 262 schematron_schema_valid.error_log) 263 if store_schematron: 264 self._schematron = schematron 265 # add new compile keyword args here if exposing them 266 compile_kwargs = {'phase': phase} 267 compile_params = _stylesheet_param_dict(compile_params, compile_kwargs) 268 validator_xslt = self._compile(schematron, **compile_params) 269 if store_xslt: 270 self._validator_xslt = validator_xslt 271 self._validator = _etree.XSLT(validator_xslt)
272
273 - def __call__(self, etree):
274 """Validate doc using Schematron. 275 276 Returns true if document is valid, false if not. 277 """ 278 self._clear_error_log() 279 result = self._validator(etree) 280 if self._store_report: 281 self._validation_report = result 282 errors = self._validation_errors(result) 283 if errors: 284 if isinstance(etree, _etree._Element): 285 fname = etree.getroottree().docinfo.URL or '<file>' 286 else: 287 fname = etree.docinfo.URL or '<file>' 288 for error in errors: 289 # Does svrl report the line number, anywhere? Don't think so. 290 self._append_log_message( 291 domain=self._domain, type=self._error_type, 292 level=self._level, line=0, message=_etree.tounicode(error), 293 filename=fname) 294 return False 295 return True
296
297 - def schematron(self):
298 """ISO-schematron schema document (None if object has been initialized 299 with store_schematron=False). 300 """ 301 return self._schematron
302 schematron = property(schematron, doc=schematron.__doc__) 303
304 - def validator_xslt(self):
305 """ISO-schematron skeleton implementation XSLT validator document (None 306 if object has been initialized with store_xslt=False). 307 """ 308 return self._validator_xslt
309 validator_xslt = property(validator_xslt, doc=validator_xslt.__doc__) 310
311 - def validation_report(self):
312 """ISO-schematron validation result report (None if result-storing has 313 been turned off). 314 """ 315 return self._validation_report
316 validation_report = property(validation_report, doc=validation_report.__doc__)
317