1
2
3 """
4 SAX-based adapter to copy trees from/to the Python standard library.
5
6 Use the `ElementTreeContentHandler` class to build an ElementTree from
7 SAX events.
8
9 Use the `ElementTreeProducer` class or the `saxify()` function to fire
10 the SAX events of an ElementTree against a SAX ContentHandler.
11
12 See http://codespeak.net/lxml/sax.html
13 """
14
15 from __future__ import absolute_import
16
17 from xml.sax.handler import ContentHandler
18 from lxml import etree
19 from lxml.etree import ElementTree, SubElement
20 from lxml.etree import Comment, ProcessingInstruction
21
22
24 """General SAX error.
25 """
26
27
29 if tag[0] == '{':
30 return tuple(tag[1:].split('}', 1))
31 else:
32 return None, tag
33
34
35 -class ElementTreeContentHandler(ContentHandler):
36 """Build an lxml ElementTree from SAX events.
37 """
38 - def __init__(self, makeelement=None):
39 ContentHandler.__init__(self)
40 self._root = None
41 self._root_siblings = []
42 self._element_stack = []
43 self._default_ns = None
44 self._ns_mapping = { None : [None] }
45 self._new_mappings = {}
46 if makeelement is None:
47 makeelement = etree.Element
48 self._makeelement = makeelement
49
50 - def _get_etree(self):
51 "Contains the generated ElementTree after parsing is finished."
52 return ElementTree(self._root)
53
54 etree = property(_get_etree, doc=_get_etree.__doc__)
55
56 - def setDocumentLocator(self, locator):
58
59 - def startDocument(self):
61
62 - def endDocument(self):
64
65 - def startPrefixMapping(self, prefix, uri):
66 self._new_mappings[prefix] = uri
67 try:
68 self._ns_mapping[prefix].append(uri)
69 except KeyError:
70 self._ns_mapping[prefix] = [uri]
71 if prefix is None:
72 self._default_ns = uri
73
74 - def endPrefixMapping(self, prefix):
75 ns_uri_list = self._ns_mapping[prefix]
76 ns_uri_list.pop()
77 if prefix is None:
78 self._default_ns = ns_uri_list[-1]
79
80 - def _buildTag(self, ns_name_tuple):
81 ns_uri, local_name = ns_name_tuple
82 if ns_uri:
83 el_tag = "{%s}%s" % ns_name_tuple
84 elif self._default_ns:
85 el_tag = "{%s}%s" % (self._default_ns, local_name)
86 else:
87 el_tag = local_name
88 return el_tag
89
90 - def startElementNS(self, ns_name, qname, attributes=None):
91 el_name = self._buildTag(ns_name)
92 if attributes:
93 attrs = {}
94 try:
95 iter_attributes = attributes.iteritems()
96 except AttributeError:
97 iter_attributes = attributes.items()
98
99 for name_tuple, value in iter_attributes:
100 if name_tuple[0]:
101 attr_name = "{%s}%s" % name_tuple
102 else:
103 attr_name = name_tuple[1]
104 attrs[attr_name] = value
105 else:
106 attrs = None
107
108 element_stack = self._element_stack
109 if self._root is None:
110 element = self._root = \
111 self._makeelement(el_name, attrs, self._new_mappings)
112 if self._root_siblings and hasattr(element, 'addprevious'):
113 for sibling in self._root_siblings:
114 element.addprevious(sibling)
115 del self._root_siblings[:]
116 else:
117 element = SubElement(element_stack[-1], el_name,
118 attrs, self._new_mappings)
119 element_stack.append(element)
120
121 self._new_mappings.clear()
122
123 - def processingInstruction(self, target, data):
124 pi = ProcessingInstruction(target, data)
125 if self._root is None:
126 self._root_siblings.append(pi)
127 else:
128 self._element_stack[-1].append(pi)
129
130 - def endElementNS(self, ns_name, qname):
131 element = self._element_stack.pop()
132 el_tag = self._buildTag(ns_name)
133 if el_tag != element.tag:
134 raise SaxError("Unexpected element closed: " + el_tag)
135
136 - def startElement(self, name, attributes=None):
137 if attributes:
138 attributes = dict(
139 [((None, k), v) for k, v in attributes.items()]
140 )
141 self.startElementNS((None, name), name, attributes)
142
143 - def endElement(self, name):
144 self.endElementNS((None, name), name)
145
146 - def characters(self, data):
147 last_element = self._element_stack[-1]
148 try:
149
150 last_element = last_element[-1]
151 last_element.tail = (last_element.tail or '') + data
152 except IndexError:
153
154 last_element.text = (last_element.text or '') + data
155
156 ignorableWhitespace = characters
157
158
160 """Produces SAX events for an element and children.
161 """
162 - def __init__(self, element_or_tree, content_handler):
163 try:
164 element = element_or_tree.getroot()
165 except AttributeError:
166 element = element_or_tree
167 self._element = element
168 self._content_handler = content_handler
169 from xml.sax.xmlreader import AttributesNSImpl as attr_class
170 self._attr_class = attr_class
171 self._empty_attributes = attr_class({}, {})
172
195
197 content_handler = self._content_handler
198 tag = element.tag
199 if tag is Comment or tag is ProcessingInstruction:
200 if tag is ProcessingInstruction:
201 content_handler.processingInstruction(
202 element.target, element.text)
203 tail = element.tail
204 if tail:
205 content_handler.characters(tail)
206 return
207
208 element_nsmap = element.nsmap
209 new_prefixes = []
210 if element_nsmap != parent_nsmap:
211
212 for prefix, ns_uri in element_nsmap.items():
213 if parent_nsmap.get(prefix) != ns_uri:
214 new_prefixes.append( (prefix, ns_uri) )
215
216 attribs = element.items()
217 if attribs:
218 attr_values = {}
219 attr_qnames = {}
220 for attr_ns_name, value in attribs:
221 attr_ns_tuple = _getNsTag(attr_ns_name)
222 attr_values[attr_ns_tuple] = value
223 attr_qnames[attr_ns_tuple] = self._build_qname(
224 attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap,
225 preferred_prefix=None, is_attribute=True)
226 sax_attributes = self._attr_class(attr_values, attr_qnames)
227 else:
228 sax_attributes = self._empty_attributes
229
230 ns_uri, local_name = _getNsTag(tag)
231 qname = self._build_qname(
232 ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False)
233
234 for prefix, uri in new_prefixes:
235 content_handler.startPrefixMapping(prefix, uri)
236 content_handler.startElementNS(
237 (ns_uri, local_name), qname, sax_attributes)
238 text = element.text
239 if text:
240 content_handler.characters(text)
241 for child in element:
242 self._recursive_saxify(child, element_nsmap)
243 content_handler.endElementNS((ns_uri, local_name), qname)
244 for prefix, uri in new_prefixes:
245 content_handler.endPrefixMapping(prefix)
246 tail = element.tail
247 if tail:
248 content_handler.characters(tail)
249
250 - def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute):
251 if ns_uri is None:
252 return local_name
253
254 if not is_attribute and nsmap.get(preferred_prefix) == ns_uri:
255 prefix = preferred_prefix
256 else:
257
258 candidates = [
259 pfx for (pfx, uri) in nsmap.items()
260 if pfx is not None and uri == ns_uri
261 ]
262 prefix = (
263 candidates[0] if len(candidates) == 1
264 else min(candidates) if candidates
265 else None
266 )
267
268 if prefix is None:
269
270 return local_name
271 return prefix + ':' + local_name
272
273
274 -def saxify(element_or_tree, content_handler):
275 """One-shot helper to generate SAX events from an XML tree and fire
276 them against a SAX ContentHandler.
277 """
278 return ElementTreeProducer(element_or_tree, content_handler).saxify()
279