1 """
2 SAX-based adapter to copy trees from/to the Python standard library.
3
4 Use the `ElementTreeContentHandler` class to build an ElementTree from
5 SAX events.
6
7 Use the `ElementTreeProducer` class or the `saxify()` function to fire
8 the SAX events of an ElementTree against a SAX ContentHandler.
9
10 See http://codespeak.net/lxml/sax.html
11 """
12
13 from xml.sax.handler import ContentHandler
14 from lxml import etree
15 from lxml.etree import ElementTree, SubElement
16 from lxml.etree import Comment, ProcessingInstruction
17
19 """General SAX error.
20 """
21 pass
22
24 if tag[0] == '{':
25 return tuple(tag[1:].split('}', 1))
26 else:
27 return (None, tag)
28
29 -class ElementTreeContentHandler(ContentHandler):
30 """Build an lxml ElementTree from SAX events.
31 """
32 - def __init__(self, makeelement=None):
33 self._root = None
34 self._root_siblings = []
35 self._element_stack = []
36 self._default_ns = None
37 self._ns_mapping = { None : [None] }
38 self._new_mappings = {}
39 if makeelement is None:
40 makeelement = etree.Element
41 self._makeelement = makeelement
42
43 - def _get_etree(self):
44 "Contains the generated ElementTree after parsing is finished."
45 return ElementTree(self._root)
46
47 etree = property(_get_etree, doc=_get_etree.__doc__)
48
49 - def setDocumentLocator(self, locator):
51
52 - def startDocument(self):
54
55 - def endDocument(self):
57
58 - def startPrefixMapping(self, prefix, uri):
59 self._new_mappings[prefix] = uri
60 try:
61 self._ns_mapping[prefix].append(uri)
62 except KeyError:
63 self._ns_mapping[prefix] = [uri]
64 if prefix is None:
65 self._default_ns = uri
66
67 - def endPrefixMapping(self, prefix):
68 ns_uri_list = self._ns_mapping[prefix]
69 ns_uri_list.pop()
70 if prefix is None:
71 self._default_ns = ns_uri_list[-1]
72
73 - def _buildTag(self, ns_name_tuple):
74 ns_uri, local_name = ns_name_tuple
75 if ns_uri:
76 el_tag = "{%s}%s" % ns_name_tuple
77 elif self._default_ns:
78 el_tag = "{%s}%s" % (self._default_ns, local_name)
79 else:
80 el_tag = local_name
81 return el_tag
82
83 - def startElementNS(self, ns_name, qname, attributes=None):
84 el_name = self._buildTag(ns_name)
85 if attributes:
86 attrs = {}
87 try:
88 iter_attributes = attributes.iteritems()
89 except AttributeError:
90 iter_attributes = attributes.items()
91
92 for name_tuple, value in iter_attributes:
93 if name_tuple[0]:
94 attr_name = "{%s}%s" % name_tuple
95 else:
96 attr_name = name_tuple[1]
97 attrs[attr_name] = value
98 else:
99 attrs = None
100
101 element_stack = self._element_stack
102 if self._root is None:
103 element = self._root = \
104 self._makeelement(el_name, attrs, self._new_mappings)
105 if self._root_siblings and hasattr(element, 'addprevious'):
106 for sibling in self._root_siblings:
107 element.addprevious(sibling)
108 del self._root_siblings[:]
109 else:
110 element = SubElement(element_stack[-1], el_name,
111 attrs, self._new_mappings)
112 element_stack.append(element)
113
114 self._new_mappings.clear()
115
116 - def processingInstruction(self, target, data):
117 pi = ProcessingInstruction(target, data)
118 if self._root is None:
119 self._root_siblings.append(pi)
120 else:
121 self._element_stack[-1].append(pi)
122
123 - def endElementNS(self, ns_name, qname):
124 element = self._element_stack.pop()
125 el_tag = self._buildTag(ns_name)
126 if el_tag != element.tag:
127 raise SaxError("Unexpected element closed: " + el_tag)
128
129 - def startElement(self, name, attributes=None):
130 self.startElementNS((None, name), name, attributes)
131
132 - def endElement(self, name):
133 self.endElementNS((None, name), name)
134
135 - def characters(self, data):
136 last_element = self._element_stack[-1]
137 try:
138
139 last_element = last_element[-1]
140 last_element.tail = (last_element.tail or '') + data
141 except IndexError:
142
143 last_element.text = (last_element.text or '') + data
144
145 ignorableWhitespace = characters
146
147
149 """Produces SAX events for an element and children.
150 """
151 - def __init__(self, element_or_tree, content_handler):
152 try:
153 element = element_or_tree.getroot()
154 except AttributeError:
155 element = element_or_tree
156 self._element = element
157 self._content_handler = content_handler
158 from xml.sax.xmlreader import AttributesNSImpl as attr_class
159 self._attr_class = attr_class
160 self._empty_attributes = attr_class({}, {})
161
163 self._content_handler.startDocument()
164
165 element = self._element
166 if hasattr(element, 'getprevious'):
167 siblings = []
168 sibling = element.getprevious()
169 while getattr(sibling, 'tag', None) is ProcessingInstruction:
170 siblings.append(sibling)
171 sibling = sibling.getprevious()
172 for sibling in siblings[::-1]:
173 self._recursive_saxify(sibling, {})
174
175 self._recursive_saxify(element, {})
176
177 if hasattr(element, 'getnext'):
178 sibling = element.getnext()
179 while getattr(sibling, 'tag', None) is ProcessingInstruction:
180 self._recursive_saxify(sibling, {})
181 sibling = sibling.getnext()
182
183 self._content_handler.endDocument()
184
186 content_handler = self._content_handler
187 tag = element.tag
188 if tag is Comment or tag is ProcessingInstruction:
189 if tag is ProcessingInstruction:
190 content_handler.processingInstruction(
191 element.target, element.text)
192 if element.tail:
193 content_handler.characters(element.tail)
194 return
195
196 new_prefixes = []
197 build_qname = self._build_qname
198 attribs = element.items()
199 if attribs:
200 attr_values = {}
201 attr_qnames = {}
202 for attr_ns_name, value in attribs:
203 attr_ns_tuple = _getNsTag(attr_ns_name)
204 attr_values[attr_ns_tuple] = value
205 attr_qnames[attr_ns_tuple] = build_qname(
206 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
207 sax_attributes = self._attr_class(attr_values, attr_qnames)
208 else:
209 sax_attributes = self._empty_attributes
210
211 ns_uri, local_name = _getNsTag(tag)
212 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
213
214 for prefix, uri in new_prefixes:
215 content_handler.startPrefixMapping(prefix, uri)
216 content_handler.startElementNS((ns_uri, local_name),
217 qname, sax_attributes)
218 if element.text:
219 content_handler.characters(element.text)
220 for child in element:
221 self._recursive_saxify(child, prefixes)
222 content_handler.endElementNS((ns_uri, local_name), qname)
223 for prefix, uri in new_prefixes:
224 content_handler.endPrefixMapping(prefix)
225 if element.tail:
226 content_handler.characters(element.tail)
227
228 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
229 if ns_uri is None:
230 return local_name
231 try:
232 prefix = prefixes[ns_uri]
233 except KeyError:
234 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
235 new_prefixes.append( (prefix, ns_uri) )
236 return prefix + ':' + local_name
237
238 -def saxify(element_or_tree, content_handler):
239 """One-shot helper to generate SAX events from an XML tree and fire
240 them against a SAX ContentHandler.
241 """
242 return ElementTreeProducer(element_or_tree, content_handler).saxify()
243