1 """
2 SAX-based adapter to copy trees from/to the Python standard library.
3
4 Use the `ElementTreeContentHandler` class to build an ElementTree from
5 SAX events.
6
7 Use the `ElementTreeProducer` class or the `saxify()` function to fire
8 the SAX events of an ElementTree against a SAX ContentHandler.
9
10 See http://codespeak.net/lxml/sax.html
11 """
12
13 from xml.sax.handler import ContentHandler
14 from lxml import etree
15 from lxml.etree import ElementTree, SubElement
16 from lxml.etree import Comment, ProcessingInstruction
17
18
20 """General SAX error.
21 """
22
23
25 if tag[0] == '{':
26 return tuple(tag[1:].split('}', 1))
27 else:
28 return (None, tag)
29
30
31 -class ElementTreeContentHandler(ContentHandler):
32 """Build an lxml ElementTree from SAX events.
33 """
34 - def __init__(self, makeelement=None):
35 ContentHandler.__init__(self)
36 self._root = None
37 self._root_siblings = []
38 self._element_stack = []
39 self._default_ns = None
40 self._ns_mapping = { None : [None] }
41 self._new_mappings = {}
42 if makeelement is None:
43 makeelement = etree.Element
44 self._makeelement = makeelement
45
46 - def _get_etree(self):
47 "Contains the generated ElementTree after parsing is finished."
48 return ElementTree(self._root)
49
50 etree = property(_get_etree, doc=_get_etree.__doc__)
51
52 - def setDocumentLocator(self, locator):
54
55 - def startDocument(self):
57
58 - def endDocument(self):
60
61 - def startPrefixMapping(self, prefix, uri):
62 self._new_mappings[prefix] = uri
63 try:
64 self._ns_mapping[prefix].append(uri)
65 except KeyError:
66 self._ns_mapping[prefix] = [uri]
67 if prefix is None:
68 self._default_ns = uri
69
70 - def endPrefixMapping(self, prefix):
71 ns_uri_list = self._ns_mapping[prefix]
72 ns_uri_list.pop()
73 if prefix is None:
74 self._default_ns = ns_uri_list[-1]
75
76 - def _buildTag(self, ns_name_tuple):
77 ns_uri, local_name = ns_name_tuple
78 if ns_uri:
79 el_tag = "{%s}%s" % ns_name_tuple
80 elif self._default_ns:
81 el_tag = "{%s}%s" % (self._default_ns, local_name)
82 else:
83 el_tag = local_name
84 return el_tag
85
86 - def startElementNS(self, ns_name, qname, attributes=None):
87 el_name = self._buildTag(ns_name)
88 if attributes:
89 attrs = {}
90 try:
91 iter_attributes = attributes.iteritems()
92 except AttributeError:
93 iter_attributes = attributes.items()
94
95 for name_tuple, value in iter_attributes:
96 if name_tuple[0]:
97 attr_name = "{%s}%s" % name_tuple
98 else:
99 attr_name = name_tuple[1]
100 attrs[attr_name] = value
101 else:
102 attrs = None
103
104 element_stack = self._element_stack
105 if self._root is None:
106 element = self._root = \
107 self._makeelement(el_name, attrs, self._new_mappings)
108 if self._root_siblings and hasattr(element, 'addprevious'):
109 for sibling in self._root_siblings:
110 element.addprevious(sibling)
111 del self._root_siblings[:]
112 else:
113 element = SubElement(element_stack[-1], el_name,
114 attrs, self._new_mappings)
115 element_stack.append(element)
116
117 self._new_mappings.clear()
118
119 - def processingInstruction(self, target, data):
120 pi = ProcessingInstruction(target, data)
121 if self._root is None:
122 self._root_siblings.append(pi)
123 else:
124 self._element_stack[-1].append(pi)
125
126 - def endElementNS(self, ns_name, qname):
127 element = self._element_stack.pop()
128 el_tag = self._buildTag(ns_name)
129 if el_tag != element.tag:
130 raise SaxError("Unexpected element closed: " + el_tag)
131
132 - def startElement(self, name, attributes=None):
133 if attributes:
134 attributes = dict(
135 [((None, k), v) for k, v in attributes.items()]
136 )
137 self.startElementNS((None, name), name, attributes)
138
139 - def endElement(self, name):
140 self.endElementNS((None, name), name)
141
142 - def characters(self, data):
143 last_element = self._element_stack[-1]
144 try:
145
146 last_element = last_element[-1]
147 last_element.tail = (last_element.tail or '') + data
148 except IndexError:
149
150 last_element.text = (last_element.text or '') + data
151
152 ignorableWhitespace = characters
153
154
156 """Produces SAX events for an element and children.
157 """
158 - def __init__(self, element_or_tree, content_handler):
159 try:
160 element = element_or_tree.getroot()
161 except AttributeError:
162 element = element_or_tree
163 self._element = element
164 self._content_handler = content_handler
165 from xml.sax.xmlreader import AttributesNSImpl as attr_class
166 self._attr_class = attr_class
167 self._empty_attributes = attr_class({}, {})
168
191
193 content_handler = self._content_handler
194 tag = element.tag
195 if tag is Comment or tag is ProcessingInstruction:
196 if tag is ProcessingInstruction:
197 content_handler.processingInstruction(
198 element.target, element.text)
199 if element.tail:
200 content_handler.characters(element.tail)
201 return
202
203 new_prefixes = []
204 build_qname = self._build_qname
205 attribs = element.items()
206 if attribs:
207 attr_values = {}
208 attr_qnames = {}
209 for attr_ns_name, value in attribs:
210 attr_ns_tuple = _getNsTag(attr_ns_name)
211 attr_values[attr_ns_tuple] = value
212 attr_qnames[attr_ns_tuple] = build_qname(
213 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
214 sax_attributes = self._attr_class(attr_values, attr_qnames)
215 else:
216 sax_attributes = self._empty_attributes
217
218 ns_uri, local_name = _getNsTag(tag)
219 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
220
221 for prefix, uri in new_prefixes:
222 content_handler.startPrefixMapping(prefix, uri)
223 content_handler.startElementNS((ns_uri, local_name),
224 qname, sax_attributes)
225 if element.text:
226 content_handler.characters(element.text)
227 for child in element:
228 self._recursive_saxify(child, prefixes)
229 content_handler.endElementNS((ns_uri, local_name), qname)
230 for prefix, uri in new_prefixes:
231 content_handler.endPrefixMapping(prefix)
232 if element.tail:
233 content_handler.characters(element.tail)
234
235 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
236 if ns_uri is None:
237 return local_name
238 try:
239 prefix = prefixes[ns_uri]
240 except KeyError:
241 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
242 new_prefixes.append( (prefix, ns_uri) )
243 return prefix + ':' + local_name
244
245 -def saxify(element_or_tree, content_handler):
246 """One-shot helper to generate SAX events from an XML tree and fire
247 them against a SAX ContentHandler.
248 """
249 return ElementTreeProducer(element_or_tree, content_handler).saxify()
250