1 """
2 SAX-based adapter to copy trees from/to the Python standard library.
3
4 Use the `ElementTreeContentHandler` class to build an ElementTree from
5 SAX events.
6
7 Use the `ElementTreeProducer` class or the `saxify()` function to fire
8 the SAX events of an ElementTree against a SAX ContentHandler.
9
10 See http://codespeak.net/lxml/sax.html
11 """
12
13 from xml.sax.handler import ContentHandler
14 from lxml import etree
15 from lxml.etree import ElementTree, SubElement
16 from lxml.etree import Comment, ProcessingInstruction
17
19 """General SAX error.
20 """
21 pass
22
24 if tag[0] == '{':
25 return tuple(tag[1:].split('}', 1))
26 else:
27 return (None, tag)
28
29
30 -class ElementTreeContentHandler(ContentHandler):
31 """Build an lxml ElementTree from SAX events.
32 """
33 - def __init__(self, makeelement=None):
34 self._root = None
35 self._root_siblings = []
36 self._element_stack = []
37 self._default_ns = None
38 self._ns_mapping = { None : [None] }
39 self._new_mappings = {}
40 if makeelement is None:
41 makeelement = etree.Element
42 self._makeelement = makeelement
43
44 - def _get_etree(self):
45 "Contains the generated ElementTree after parsing is finished."
46 return ElementTree(self._root)
47
48 etree = property(_get_etree, doc=_get_etree.__doc__)
49
50 - def setDocumentLocator(self, locator):
52
53 - def startDocument(self):
55
56 - def endDocument(self):
58
59 - def startPrefixMapping(self, prefix, uri):
60 self._new_mappings[prefix] = uri
61 try:
62 self._ns_mapping[prefix].append(uri)
63 except KeyError:
64 self._ns_mapping[prefix] = [uri]
65 if prefix is None:
66 self._default_ns = uri
67
68 - def endPrefixMapping(self, prefix):
69 ns_uri_list = self._ns_mapping[prefix]
70 ns_uri_list.pop()
71 if prefix is None:
72 self._default_ns = ns_uri_list[-1]
73
74 - def _buildTag(self, ns_name_tuple):
75 ns_uri, local_name = ns_name_tuple
76 if ns_uri:
77 el_tag = "{%s}%s" % ns_name_tuple
78 elif self._default_ns:
79 el_tag = "{%s}%s" % (self._default_ns, local_name)
80 else:
81 el_tag = local_name
82 return el_tag
83
84 - def startElementNS(self, ns_name, qname, attributes=None):
85 el_name = self._buildTag(ns_name)
86 if attributes:
87 attrs = {}
88 try:
89 iter_attributes = attributes.iteritems()
90 except AttributeError:
91 iter_attributes = attributes.items()
92
93 for name_tuple, value in iter_attributes:
94 if name_tuple[0]:
95 attr_name = "{%s}%s" % name_tuple
96 else:
97 attr_name = name_tuple[1]
98 attrs[attr_name] = value
99 else:
100 attrs = None
101
102 element_stack = self._element_stack
103 if self._root is None:
104 element = self._root = \
105 self._makeelement(el_name, attrs, self._new_mappings)
106 if self._root_siblings and hasattr(element, 'addprevious'):
107 for sibling in self._root_siblings:
108 element.addprevious(sibling)
109 del self._root_siblings[:]
110 else:
111 element = SubElement(element_stack[-1], el_name,
112 attrs, self._new_mappings)
113 element_stack.append(element)
114
115 self._new_mappings.clear()
116
117 - def processingInstruction(self, target, data):
118 pi = ProcessingInstruction(target, data)
119 if self._root is None:
120 self._root_siblings.append(pi)
121 else:
122 self._element_stack[-1].append(pi)
123
124 - def endElementNS(self, ns_name, qname):
125 element = self._element_stack.pop()
126 el_tag = self._buildTag(ns_name)
127 if el_tag != element.tag:
128 raise SaxError("Unexpected element closed: " + el_tag)
129
130 - def startElement(self, name, attributes=None):
131 if attributes:
132 attributes = dict(
133 [((None, k), v) for k, v in attributes.items()]
134 )
135 self.startElementNS((None, name), name, attributes)
136
137 - def endElement(self, name):
138 self.endElementNS((None, name), name)
139
140 - def characters(self, data):
141 last_element = self._element_stack[-1]
142 try:
143
144 last_element = last_element[-1]
145 last_element.tail = (last_element.tail or '') + data
146 except IndexError:
147
148 last_element.text = (last_element.text or '') + data
149
150 ignorableWhitespace = characters
151
152
154 """Produces SAX events for an element and children.
155 """
156 - def __init__(self, element_or_tree, content_handler):
157 try:
158 element = element_or_tree.getroot()
159 except AttributeError:
160 element = element_or_tree
161 self._element = element
162 self._content_handler = content_handler
163 from xml.sax.xmlreader import AttributesNSImpl as attr_class
164 self._attr_class = attr_class
165 self._empty_attributes = attr_class({}, {})
166
168 self._content_handler.startDocument()
169
170 element = self._element
171 if hasattr(element, 'getprevious'):
172 siblings = []
173 sibling = element.getprevious()
174 while getattr(sibling, 'tag', None) is ProcessingInstruction:
175 siblings.append(sibling)
176 sibling = sibling.getprevious()
177 for sibling in siblings[::-1]:
178 self._recursive_saxify(sibling, {})
179
180 self._recursive_saxify(element, {})
181
182 if hasattr(element, 'getnext'):
183 sibling = element.getnext()
184 while getattr(sibling, 'tag', None) is ProcessingInstruction:
185 self._recursive_saxify(sibling, {})
186 sibling = sibling.getnext()
187
188 self._content_handler.endDocument()
189
191 content_handler = self._content_handler
192 tag = element.tag
193 if tag is Comment or tag is ProcessingInstruction:
194 if tag is ProcessingInstruction:
195 content_handler.processingInstruction(
196 element.target, element.text)
197 if element.tail:
198 content_handler.characters(element.tail)
199 return
200
201 new_prefixes = []
202 build_qname = self._build_qname
203 attribs = element.items()
204 if attribs:
205 attr_values = {}
206 attr_qnames = {}
207 for attr_ns_name, value in attribs:
208 attr_ns_tuple = _getNsTag(attr_ns_name)
209 attr_values[attr_ns_tuple] = value
210 attr_qnames[attr_ns_tuple] = build_qname(
211 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
212 sax_attributes = self._attr_class(attr_values, attr_qnames)
213 else:
214 sax_attributes = self._empty_attributes
215
216 ns_uri, local_name = _getNsTag(tag)
217 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
218
219 for prefix, uri in new_prefixes:
220 content_handler.startPrefixMapping(prefix, uri)
221 content_handler.startElementNS((ns_uri, local_name),
222 qname, sax_attributes)
223 if element.text:
224 content_handler.characters(element.text)
225 for child in element:
226 self._recursive_saxify(child, prefixes)
227 content_handler.endElementNS((ns_uri, local_name), qname)
228 for prefix, uri in new_prefixes:
229 content_handler.endPrefixMapping(prefix)
230 if element.tail:
231 content_handler.characters(element.tail)
232
233 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
234 if ns_uri is None:
235 return local_name
236 try:
237 prefix = prefixes[ns_uri]
238 except KeyError:
239 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
240 new_prefixes.append( (prefix, ns_uri) )
241 return prefix + ':' + local_name
242
243 -def saxify(element_or_tree, content_handler):
244 """One-shot helper to generate SAX events from an XML tree and fire
245 them against a SAX ContentHandler.
246 """
247 return ElementTreeProducer(element_or_tree, content_handler).saxify()
248