1 """
2 SAX-based adapter to copy trees from/to the Python standard library.
3
4 Use the `ElementTreeContentHandler` class to build an ElementTree from
5 SAX events.
6
7 Use the `ElementTreeProducer` class or the `saxify()` function to fire
8 the SAX events of an ElementTree against a SAX ContentHandler.
9
10 See http://codespeak.net/lxml/sax.html
11 """
12
13 from __future__ import absolute_import
14
15 from xml.sax.handler import ContentHandler
16 from lxml import etree
17 from lxml.etree import ElementTree, SubElement
18 from lxml.etree import Comment, ProcessingInstruction
19
20
22 """General SAX error.
23 """
24
25
27 if tag[0] == '{':
28 return tuple(tag[1:].split('}', 1))
29 else:
30 return (None, tag)
31
32
33 -class ElementTreeContentHandler(ContentHandler):
34 """Build an lxml ElementTree from SAX events.
35 """
36 - def __init__(self, makeelement=None):
37 ContentHandler.__init__(self)
38 self._root = None
39 self._root_siblings = []
40 self._element_stack = []
41 self._default_ns = None
42 self._ns_mapping = { None : [None] }
43 self._new_mappings = {}
44 if makeelement is None:
45 makeelement = etree.Element
46 self._makeelement = makeelement
47
48 - def _get_etree(self):
49 "Contains the generated ElementTree after parsing is finished."
50 return ElementTree(self._root)
51
52 etree = property(_get_etree, doc=_get_etree.__doc__)
53
54 - def setDocumentLocator(self, locator):
56
57 - def startDocument(self):
59
60 - def endDocument(self):
62
63 - def startPrefixMapping(self, prefix, uri):
64 self._new_mappings[prefix] = uri
65 try:
66 self._ns_mapping[prefix].append(uri)
67 except KeyError:
68 self._ns_mapping[prefix] = [uri]
69 if prefix is None:
70 self._default_ns = uri
71
72 - def endPrefixMapping(self, prefix):
73 ns_uri_list = self._ns_mapping[prefix]
74 ns_uri_list.pop()
75 if prefix is None:
76 self._default_ns = ns_uri_list[-1]
77
78 - def _buildTag(self, ns_name_tuple):
79 ns_uri, local_name = ns_name_tuple
80 if ns_uri:
81 el_tag = "{%s}%s" % ns_name_tuple
82 elif self._default_ns:
83 el_tag = "{%s}%s" % (self._default_ns, local_name)
84 else:
85 el_tag = local_name
86 return el_tag
87
88 - def startElementNS(self, ns_name, qname, attributes=None):
89 el_name = self._buildTag(ns_name)
90 if attributes:
91 attrs = {}
92 try:
93 iter_attributes = attributes.iteritems()
94 except AttributeError:
95 iter_attributes = attributes.items()
96
97 for name_tuple, value in iter_attributes:
98 if name_tuple[0]:
99 attr_name = "{%s}%s" % name_tuple
100 else:
101 attr_name = name_tuple[1]
102 attrs[attr_name] = value
103 else:
104 attrs = None
105
106 element_stack = self._element_stack
107 if self._root is None:
108 element = self._root = \
109 self._makeelement(el_name, attrs, self._new_mappings)
110 if self._root_siblings and hasattr(element, 'addprevious'):
111 for sibling in self._root_siblings:
112 element.addprevious(sibling)
113 del self._root_siblings[:]
114 else:
115 element = SubElement(element_stack[-1], el_name,
116 attrs, self._new_mappings)
117 element_stack.append(element)
118
119 self._new_mappings.clear()
120
121 - def processingInstruction(self, target, data):
122 pi = ProcessingInstruction(target, data)
123 if self._root is None:
124 self._root_siblings.append(pi)
125 else:
126 self._element_stack[-1].append(pi)
127
128 - def endElementNS(self, ns_name, qname):
129 element = self._element_stack.pop()
130 el_tag = self._buildTag(ns_name)
131 if el_tag != element.tag:
132 raise SaxError("Unexpected element closed: " + el_tag)
133
134 - def startElement(self, name, attributes=None):
135 if attributes:
136 attributes = dict(
137 [((None, k), v) for k, v in attributes.items()]
138 )
139 self.startElementNS((None, name), name, attributes)
140
141 - def endElement(self, name):
142 self.endElementNS((None, name), name)
143
144 - def characters(self, data):
145 last_element = self._element_stack[-1]
146 try:
147
148 last_element = last_element[-1]
149 last_element.tail = (last_element.tail or '') + data
150 except IndexError:
151
152 last_element.text = (last_element.text or '') + data
153
154 ignorableWhitespace = characters
155
156
158 """Produces SAX events for an element and children.
159 """
160 - def __init__(self, element_or_tree, content_handler):
161 try:
162 element = element_or_tree.getroot()
163 except AttributeError:
164 element = element_or_tree
165 self._element = element
166 self._content_handler = content_handler
167 from xml.sax.xmlreader import AttributesNSImpl as attr_class
168 self._attr_class = attr_class
169 self._empty_attributes = attr_class({}, {})
170
193
195 content_handler = self._content_handler
196 tag = element.tag
197 if tag is Comment or tag is ProcessingInstruction:
198 if tag is ProcessingInstruction:
199 content_handler.processingInstruction(
200 element.target, element.text)
201 if element.tail:
202 content_handler.characters(element.tail)
203 return
204
205 new_prefixes = []
206 build_qname = self._build_qname
207 attribs = element.items()
208 if attribs:
209 attr_values = {}
210 attr_qnames = {}
211 for attr_ns_name, value in attribs:
212 attr_ns_tuple = _getNsTag(attr_ns_name)
213 attr_values[attr_ns_tuple] = value
214 attr_qnames[attr_ns_tuple] = build_qname(
215 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
216 sax_attributes = self._attr_class(attr_values, attr_qnames)
217 else:
218 sax_attributes = self._empty_attributes
219
220 ns_uri, local_name = _getNsTag(tag)
221 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
222
223 for prefix, uri in new_prefixes:
224 content_handler.startPrefixMapping(prefix, uri)
225 content_handler.startElementNS((ns_uri, local_name),
226 qname, sax_attributes)
227 if element.text:
228 content_handler.characters(element.text)
229 for child in element:
230 self._recursive_saxify(child, prefixes)
231 content_handler.endElementNS((ns_uri, local_name), qname)
232 for prefix, uri in new_prefixes:
233 content_handler.endPrefixMapping(prefix)
234 if element.tail:
235 content_handler.characters(element.tail)
236
237 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
238 if ns_uri is None:
239 return local_name
240 try:
241 prefix = prefixes[ns_uri]
242 except KeyError:
243 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
244 new_prefixes.append( (prefix, ns_uri) )
245 return prefix + ':' + local_name
246
247 -def saxify(element_or_tree, content_handler):
248 """One-shot helper to generate SAX events from an XML tree and fire
249 them against a SAX ContentHandler.
250 """
251 return ElementTreeProducer(element_or_tree, content_handler).saxify()
252