1 """The ``lxml.isoschematron`` package implements ISO Schematron support on top
2 of the pure-xslt 'skeleton' implementation.
3 """
4
5 import sys
6 import os.path
7 from lxml import etree as _etree
8
9
10
11 try:
12 unicode
13 except NameError:
14
15 unicode = str
16 try:
17 basestring
18 except NameError:
19
20 basestring = str
21
22
23 __all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
24 'iso_abstract_expand', 'iso_svrl_for_xslt1',
25 'svrl_validation_errors', 'schematron_schema_valid',
26 'stylesheet_params', 'Schematron']
27
28
29
30
31
32 XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
33 RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
34 SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
35 SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
36
37
38
39 _schematron_root = '{%s}schema' % SCHEMATRON_NS
40 _xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
41 _resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
42
43
44
45 extract_xsd = _etree.XSLT(_etree.parse(
46 os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
47 extract_rng = _etree.XSLT(_etree.parse(
48 os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
49 iso_dsdl_include = _etree.XSLT(_etree.parse(
50 os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
51 'iso_dsdl_include.xsl')))
52 iso_abstract_expand = _etree.XSLT(_etree.parse(
53 os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
54 'iso_abstract_expand.xsl')))
55 iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
56 os.path.join(_resources_dir,
57 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
58
59
60
61 svrl_validation_errors = _etree.XPath(
62 '//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
63
64
65
66 schematron_schema_valid = _etree.RelaxNG(_etree.parse(
67 os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')))
71 """Convert keyword args to a dictionary of stylesheet parameters.
72 XSL stylesheet parameters must be XPath expressions, i.e.:
73
74 * string expressions, like "'5'"
75 * simple (number) expressions, like "5"
76 * valid XPath expressions, like "/a/b/text()"
77
78 This function converts native Python keyword arguments to stylesheet
79 parameters following these rules:
80 If an arg is a string wrap it with XSLT.strparam().
81 If an arg is an XPath object use its path string.
82 If arg is None raise TypeError.
83 Else convert arg to string.
84 """
85 result = {}
86 for key, val in kwargs.items():
87 if isinstance(val, basestring):
88 val = _etree.XSLT.strparam(val)
89 elif val is None:
90 raise TypeError('None not allowed as a stylesheet parameter')
91 elif not isinstance(val, _etree.XPath):
92 val = unicode(val)
93 result[key] = val
94 return result
95
99 """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
100 stylesheet arguments.
101 kwargsDict entries with a value of None are ignored.
102 """
103
104 paramsDict = dict(paramsDict)
105 for k, v in kwargsDict.items():
106 if v is not None:
107 paramsDict[k] = v
108 paramsDict = stylesheet_params(**paramsDict)
109 return paramsDict
110
113 """An ISO Schematron validator.
114
115 Pass a root Element or an ElementTree to turn it into a validator.
116 Alternatively, pass a filename as keyword argument 'file' to parse from
117 the file system.
118
119 Schematron is a less well known, but very powerful schema language.
120 The main idea is to use the capabilities of XPath to put restrictions on
121 the structure and the content of XML documents.
122
123 The standard behaviour is to fail on ``failed-assert`` findings only
124 (``ASSERTS_ONLY``). To change this, you can either pass a report filter
125 function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
126 or a custom ``XPath`` object), or subclass isoschematron.Schematron for
127 complete control of the validation process.
128
129 Built on the Schematron language 'reference' skeleton pure-xslt
130 implementation, the validator is created as an XSLT 1.0 stylesheet using
131 these steps:
132
133 0) (Extract from XML Schema or RelaxNG schema)
134 1) Process inclusions
135 2) Process abstract patterns
136 3) Compile the schematron schema to XSLT
137
138 The ``include`` and ``expand`` keyword arguments can be used to switch off
139 steps 1) and 2).
140 To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
141 keyword arguments ``include_params``, ``expand_params`` or
142 ``compile_params``.
143 For convenience, the compile-step parameter ``phase`` is also exposed as a
144 keyword argument ``phase``. This takes precedence if the parameter is also
145 given in the parameter dictionary.
146
147 If ``store_schematron`` is set to True, the (included-and-expanded)
148 schematron document tree is stored and available through the ``schematron``
149 property.
150 If ``store_xslt`` is set to True, the validation XSLT document tree will be
151 stored and can be retrieved through the ``validator_xslt`` property.
152 With ``store_report`` set to True (default: False), the resulting validation
153 report document gets stored and can be accessed as the ``validation_report``
154 property.
155
156 Here is a usage example::
157
158 >>> from lxml import etree
159 >>> from lxml.isoschematron import Schematron
160
161 >>> schematron = Schematron(etree.XML('''
162 ... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
163 ... <pattern id="id_only_attribute">
164 ... <title>id is the only permitted attribute name</title>
165 ... <rule context="*">
166 ... <report test="@*[not(name()='id')]">Attribute
167 ... <name path="@*[not(name()='id')]"/> is forbidden<name/>
168 ... </report>
169 ... </rule>
170 ... </pattern>
171 ... </schema>'''),
172 ... error_finder=Schematron.ASSERTS_AND_REPORTS)
173
174 >>> xml = etree.XML('''
175 ... <AAA name="aaa">
176 ... <BBB id="bbb"/>
177 ... <CCC color="ccc"/>
178 ... </AAA>
179 ... ''')
180
181 >>> schematron.validate(xml)
182 False
183
184 >>> xml = etree.XML('''
185 ... <AAA id="aaa">
186 ... <BBB id="bbb"/>
187 ... <CCC/>
188 ... </AAA>
189 ... ''')
190
191 >>> schematron.validate(xml)
192 True
193 """
194
195
196 _domain = _etree.ErrorDomains.SCHEMATRONV
197 _level = _etree.ErrorLevels.ERROR
198 _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
199
200
201 ASSERTS_ONLY = svrl_validation_errors
202 ASSERTS_AND_REPORTS = _etree.XPath(
203 '//svrl:failed-assert | //svrl:successful-report',
204 namespaces={'svrl': SVRL_NS})
205
219
220
221
222
223 _extract_xsd = extract_xsd
224 _extract_rng = extract_rng
225 _include = iso_dsdl_include
226 _expand = iso_abstract_expand
227 _compile = iso_svrl_for_xslt1
228
229
230
231
232 _validation_errors = ASSERTS_ONLY
233
234 - def __init__(self, etree=None, file=None, include=True, expand=True,
235 include_params={}, expand_params={}, compile_params={},
236 store_schematron=False, store_xslt=False, store_report=False,
237 phase=None, error_finder=ASSERTS_ONLY):
289
314
315 @property
317 """ISO-schematron schema document (None if object has been initialized
318 with store_schematron=False).
319 """
320 return self._schematron
321
322 @property
324 """ISO-schematron skeleton implementation XSLT validator document (None
325 if object has been initialized with store_xslt=False).
326 """
327 return self._validator_xslt
328
329 @property
331 """ISO-schematron validation result report (None if result-storing has
332 been turned off).
333 """
334 return self._validation_report
335