1 """The ``lxml.isoschematron`` package implements ISO Schematron support on top
2 of the pure-xslt 'skeleton' implementation.
3 """
4
5 import sys
6 import os.path
7 from lxml import etree as _etree
8
9
10
11 try:
12 bytes = __builtins__["bytes"]
13 except (KeyError, NameError):
14
15 bytes = str
16 try:
17 unicode = __builtins__["unicode"]
18 except (KeyError, NameError):
19
20 unicode = str
21 try:
22 basestring = __builtins__["basestring"]
23 except (KeyError, NameError):
24
25 basestring = str
26
27
28 __all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
29 'iso_abstract_expand', 'iso_svrl_for_xslt1',
30 'svrl_validation_errors', 'schematron_schema_valid',
31 'stylesheet_params', 'Schematron']
32
33
34
35
36
37 XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
38 RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
39 SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
40 SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
41
42
43
44 _schematron_root = '{%s}schema' % SCHEMATRON_NS
45 _xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
46 _resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
47
48
49
50 extract_xsd = _etree.XSLT(_etree.parse(
51 os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
52 extract_rng = _etree.XSLT(_etree.parse(
53 os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
54 iso_dsdl_include = _etree.XSLT(_etree.parse(
55 os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
56 'iso_dsdl_include.xsl')))
57 iso_abstract_expand = _etree.XSLT(_etree.parse(
58 os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
59 'iso_abstract_expand.xsl')))
60 iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
61 os.path.join(_resources_dir,
62 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
63
64
65
66 svrl_validation_errors = _etree.XPath(
67 '//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
68
69
70
71 schematron_schema_valid = _etree.RelaxNG(_etree.parse(
72 os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')))
73
74
76 """Convert keyword args to a dictionary of stylesheet parameters.
77 XSL stylesheet parameters must be XPath expressions, i.e.:
78 * string expressions, like "'5'"
79 * simple (number) expressions, like "5"
80 * valid XPath expressions, like "/a/b/text()"
81 This function converts native Python keyword arguments to stylesheet
82 parameters following these rules:
83 If an arg is a string wrap it with XSLT.strparam().
84 If an arg is an XPath object use its path string.
85 If arg is None raise TypeError.
86 Else convert arg to string.
87 """
88 result = {}
89 for key, val in kwargs.items():
90 if isinstance(val, basestring):
91 val = _etree.XSLT.strparam(val)
92 elif val is None:
93 raise TypeError('None not allowed as a stylesheet parameter')
94 elif not isinstance(val, _etree.XPath):
95 val = unicode(val)
96 result[key] = val
97 return result
98
99
100
102 """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
103 stylesheet arguments.
104 kwargsDict entries with a value of None are ignored.
105 """
106
107 paramsDict = dict(paramsDict)
108 for k, v in kwargsDict.items():
109 if v is not None:
110 paramsDict[k] = v
111 paramsDict = stylesheet_params(**paramsDict)
112 return paramsDict
113
114
116 """An ISO Schematron validator.
117
118 Pass a root Element or an ElementTree to turn it into a validator.
119 Alternatively, pass a filename as keyword argument 'file' to parse from
120 the file system.
121 Built on the Schematron language 'reference' skeleton pure-xslt
122 implementation, the validator is created as an XSLT 1.0 stylesheet using
123 these steps:
124
125 0) (Extract from XML Schema or RelaxNG schema)
126 1) Process inclusions
127 2) Process abstract patterns
128 3) Compile the schematron schema to XSLT
129
130 The ``include`` and ``expand`` keyword arguments can be used to switch off
131 steps 1) and 2).
132 To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
133 keyword arguments ``include_params``, ``expand_params`` or
134 ``compile_params``.
135 For convenience, the compile-step parameter ``phase`` is also exposed as a
136 keyword argument ``phase``. This takes precedence if the parameter is also
137 given in the parameter dictionary.
138 If ``store_schematron`` is set to True, the (included-and-expanded)
139 schematron document tree is stored and available through the ``schematron``
140 property.
141 If ``store_xslt`` is set to True, the validation XSLT document tree will be
142 stored and can be retrieved through the ``validator_xslt`` property.
143 With ``store_report`` set to True (default: False), the resulting validation
144 report document gets stored and can be accessed as the ``validation_report``
145 property.
146
147 Schematron is a less well known, but very powerful schema language. The main
148 idea is to use the capabilities of XPath to put restrictions on the structure
149 and the content of XML documents. Here is a simple example::
150
151 >>> from lxml import isoschematron
152 >>> schematron = isoschematron.Schematron(etree.XML('''
153 ... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
154 ... <pattern id="id_only_attribute">
155 ... <title>id is the only permitted attribute name</title>
156 ... <rule context="*">
157 ... <report test="@*[not(name()='id')]">Attribute
158 ... <name path="@*[not(name()='id')]"/> is forbidden<name/>
159 ... </report>
160 ... </rule>
161 ... </pattern>
162 ... </schema>
163 ... '''))
164
165 >>> xml = etree.XML('''
166 ... <AAA name="aaa">
167 ... <BBB id="bbb"/>
168 ... <CCC color="ccc"/>
169 ... </AAA>
170 ... ''')
171
172 >>> schematron.validate(xml)
173 0
174
175 >>> xml = etree.XML('''
176 ... <AAA id="aaa">
177 ... <BBB id="bbb"/>
178 ... <CCC/>
179 ... </AAA>
180 ... ''')
181
182 >>> schematron.validate(xml)
183 1
184 """
185
186
187 _domain = _etree.ErrorDomains.SCHEMATRONV
188 _level = _etree.ErrorLevels.ERROR
189 _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
190
204
205
206
207
208 _extract_xsd = extract_xsd
209 _extract_rng = extract_rng
210 _include = iso_dsdl_include
211 _expand = iso_abstract_expand
212 _compile = iso_svrl_for_xslt1
213
214
215
216 _validation_errors = svrl_validation_errors
217
218 - def __init__(self, etree=None, file=None, include=True, expand=True,
219 include_params={}, expand_params={}, compile_params={},
220 store_schematron=False, store_xslt=False, store_report=False,
221 phase=None):
270
294
296 """ISO-schematron schema document (None if object has been initialized
297 with store_schematron=False).
298 """
299 return self._schematron
300 schematron = property(schematron, doc=schematron.__doc__)
301
303 """ISO-schematron skeleton implementation XSLT validator document (None
304 if object has been initialized with store_xslt=False).
305 """
306 return self._validator_xslt
307 validator_xslt = property(validator_xslt, doc=validator_xslt.__doc__)
308
310 """ISO-schematron validation result report (None if result-storing has
311 been turned off).
312 """
313 return self._validation_report
314 validation_report = property(validation_report, doc=validation_report.__doc__)
315