1 """
2 SAX-based adapter to copy trees from/to the Python standard library.
3
4 Use the `ElementTreeContentHandler` class to build an ElementTree from
5 SAX events.
6
7 Use the `ElementTreeProducer` class or the `saxify()` function to fire
8 the SAX events of an ElementTree against a SAX ContentHandler.
9
10 See http://codespeak.net/lxml/sax.html
11 """
12
13 from xml.sax.handler import ContentHandler
14 from lxml import etree
15 from lxml.etree import ElementTree, SubElement
16 from lxml.etree import Comment, ProcessingInstruction
17
19 """General SAX error.
20 """
21 pass
22
24 if tag[0] == '{':
25 return tuple(tag[1:].split('}', 1))
26 else:
27 return (None, tag)
28
29
30 -class ElementTreeContentHandler(ContentHandler):
31 """Build an lxml ElementTree from SAX events.
32 """
33 - def __init__(self, makeelement=None):
34 ContentHandler.__init__(self)
35 self._root = None
36 self._root_siblings = []
37 self._element_stack = []
38 self._default_ns = None
39 self._ns_mapping = { None : [None] }
40 self._new_mappings = {}
41 if makeelement is None:
42 makeelement = etree.Element
43 self._makeelement = makeelement
44
45 - def _get_etree(self):
46 "Contains the generated ElementTree after parsing is finished."
47 return ElementTree(self._root)
48
49 etree = property(_get_etree, doc=_get_etree.__doc__)
50
51 - def setDocumentLocator(self, locator):
53
54 - def startDocument(self):
56
57 - def endDocument(self):
59
60 - def startPrefixMapping(self, prefix, uri):
61 self._new_mappings[prefix] = uri
62 try:
63 self._ns_mapping[prefix].append(uri)
64 except KeyError:
65 self._ns_mapping[prefix] = [uri]
66 if prefix is None:
67 self._default_ns = uri
68
69 - def endPrefixMapping(self, prefix):
70 ns_uri_list = self._ns_mapping[prefix]
71 ns_uri_list.pop()
72 if prefix is None:
73 self._default_ns = ns_uri_list[-1]
74
75 - def _buildTag(self, ns_name_tuple):
76 ns_uri, local_name = ns_name_tuple
77 if ns_uri:
78 el_tag = "{%s}%s" % ns_name_tuple
79 elif self._default_ns:
80 el_tag = "{%s}%s" % (self._default_ns, local_name)
81 else:
82 el_tag = local_name
83 return el_tag
84
85 - def startElementNS(self, ns_name, qname, attributes=None):
86 el_name = self._buildTag(ns_name)
87 if attributes:
88 attrs = {}
89 try:
90 iter_attributes = attributes.iteritems()
91 except AttributeError:
92 iter_attributes = attributes.items()
93
94 for name_tuple, value in iter_attributes:
95 if name_tuple[0]:
96 attr_name = "{%s}%s" % name_tuple
97 else:
98 attr_name = name_tuple[1]
99 attrs[attr_name] = value
100 else:
101 attrs = None
102
103 element_stack = self._element_stack
104 if self._root is None:
105 element = self._root = \
106 self._makeelement(el_name, attrs, self._new_mappings)
107 if self._root_siblings and hasattr(element, 'addprevious'):
108 for sibling in self._root_siblings:
109 element.addprevious(sibling)
110 del self._root_siblings[:]
111 else:
112 element = SubElement(element_stack[-1], el_name,
113 attrs, self._new_mappings)
114 element_stack.append(element)
115
116 self._new_mappings.clear()
117
118 - def processingInstruction(self, target, data):
119 pi = ProcessingInstruction(target, data)
120 if self._root is None:
121 self._root_siblings.append(pi)
122 else:
123 self._element_stack[-1].append(pi)
124
125 - def endElementNS(self, ns_name, qname):
126 element = self._element_stack.pop()
127 el_tag = self._buildTag(ns_name)
128 if el_tag != element.tag:
129 raise SaxError("Unexpected element closed: " + el_tag)
130
131 - def startElement(self, name, attributes=None):
132 if attributes:
133 attributes = dict(
134 [((None, k), v) for k, v in attributes.items()]
135 )
136 self.startElementNS((None, name), name, attributes)
137
138 - def endElement(self, name):
139 self.endElementNS((None, name), name)
140
141 - def characters(self, data):
142 last_element = self._element_stack[-1]
143 try:
144
145 last_element = last_element[-1]
146 last_element.tail = (last_element.tail or '') + data
147 except IndexError:
148
149 last_element.text = (last_element.text or '') + data
150
151 ignorableWhitespace = characters
152
153
155 """Produces SAX events for an element and children.
156 """
157 - def __init__(self, element_or_tree, content_handler):
158 try:
159 element = element_or_tree.getroot()
160 except AttributeError:
161 element = element_or_tree
162 self._element = element
163 self._content_handler = content_handler
164 from xml.sax.xmlreader import AttributesNSImpl as attr_class
165 self._attr_class = attr_class
166 self._empty_attributes = attr_class({}, {})
167
169 self._content_handler.startDocument()
170
171 element = self._element
172 if hasattr(element, 'getprevious'):
173 siblings = []
174 sibling = element.getprevious()
175 while getattr(sibling, 'tag', None) is ProcessingInstruction:
176 siblings.append(sibling)
177 sibling = sibling.getprevious()
178 for sibling in siblings[::-1]:
179 self._recursive_saxify(sibling, {})
180
181 self._recursive_saxify(element, {})
182
183 if hasattr(element, 'getnext'):
184 sibling = element.getnext()
185 while getattr(sibling, 'tag', None) is ProcessingInstruction:
186 self._recursive_saxify(sibling, {})
187 sibling = sibling.getnext()
188
189 self._content_handler.endDocument()
190
192 content_handler = self._content_handler
193 tag = element.tag
194 if tag is Comment or tag is ProcessingInstruction:
195 if tag is ProcessingInstruction:
196 content_handler.processingInstruction(
197 element.target, element.text)
198 if element.tail:
199 content_handler.characters(element.tail)
200 return
201
202 new_prefixes = []
203 build_qname = self._build_qname
204 attribs = element.items()
205 if attribs:
206 attr_values = {}
207 attr_qnames = {}
208 for attr_ns_name, value in attribs:
209 attr_ns_tuple = _getNsTag(attr_ns_name)
210 attr_values[attr_ns_tuple] = value
211 attr_qnames[attr_ns_tuple] = build_qname(
212 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
213 sax_attributes = self._attr_class(attr_values, attr_qnames)
214 else:
215 sax_attributes = self._empty_attributes
216
217 ns_uri, local_name = _getNsTag(tag)
218 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
219
220 for prefix, uri in new_prefixes:
221 content_handler.startPrefixMapping(prefix, uri)
222 content_handler.startElementNS((ns_uri, local_name),
223 qname, sax_attributes)
224 if element.text:
225 content_handler.characters(element.text)
226 for child in element:
227 self._recursive_saxify(child, prefixes)
228 content_handler.endElementNS((ns_uri, local_name), qname)
229 for prefix, uri in new_prefixes:
230 content_handler.endPrefixMapping(prefix)
231 if element.tail:
232 content_handler.characters(element.tail)
233
234 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
235 if ns_uri is None:
236 return local_name
237 try:
238 prefix = prefixes[ns_uri]
239 except KeyError:
240 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
241 new_prefixes.append( (prefix, ns_uri) )
242 return prefix + ':' + local_name
243
244 -def saxify(element_or_tree, content_handler):
245 """One-shot helper to generate SAX events from an XML tree and fire
246 them against a SAX ContentHandler.
247 """
248 return ElementTreeProducer(element_or_tree, content_handler).saxify()
249