1 from xml.sax.handler import ContentHandler
2 import etree
3 from etree import ElementTree, SubElement
4 from etree import Comment, ProcessingInstruction
5
7 """General SAX error.
8 """
9 pass
10
12 if tag[0] == '{':
13 return tuple(tag[1:].split('}', 1))
14 else:
15 return (None, tag)
16
17 -class ElementTreeContentHandler(object, ContentHandler):
18 """Build an lxml ElementTree from SAX events.
19 """
20 - def __init__(self, makeelement=None):
21 self._root = None
22 self._root_siblings = []
23 self._element_stack = []
24 self._default_ns = None
25 self._ns_mapping = { None : [None] }
26 self._new_mappings = {}
27 if makeelement is None:
28 makeelement = etree.Element
29 self._makeelement = makeelement
30
31 - def _get_etree(self):
32 "Contains the generated ElementTree after parsing is finished."
33 return ElementTree(self._root)
34
35 etree = property(_get_etree, doc=_get_etree.__doc__)
36
37 - def setDocumentLocator(self, locator):
39
40 - def startDocument(self):
42
43 - def endDocument(self):
45
46 - def startPrefixMapping(self, prefix, uri):
47 self._new_mappings[prefix] = uri
48 try:
49 self._ns_mapping[prefix].append(uri)
50 except KeyError:
51 self._ns_mapping[prefix] = [uri]
52 if prefix is None:
53 self._default_ns = uri
54
55 - def endPrefixMapping(self, prefix):
56 ns_uri_list = self._ns_mapping[prefix]
57 ns_uri_list.pop()
58 if prefix is None:
59 self._default_ns = ns_uri_list[-1]
60
61 - def startElementNS(self, ns_name, qname, attributes=None):
62 ns_uri, local_name = ns_name
63 if ns_uri:
64 el_name = "{%s}%s" % ns_name
65 elif self._default_ns:
66 el_name = "{%s}%s" % (self._default_ns, local_name)
67 else:
68 el_name = local_name
69
70 if attributes:
71 attrs = {}
72 try:
73 iter_attributes = attributes.iteritems()
74 except AttributeError:
75 iter_attributes = attributes.items()
76
77 for name_tuple, value in iter_attributes:
78 if name_tuple[0]:
79 attr_name = "{%s}%s" % name_tuple
80 else:
81 attr_name = name_tuple[1]
82 attrs[attr_name] = value
83 else:
84 attrs = None
85
86 element_stack = self._element_stack
87 if self._root is None:
88 element = self._root = \
89 self._makeelement(el_name, attrs, self._new_mappings)
90 if self._root_siblings and hasattr(element, 'addprevious'):
91 for sibling in self._root_siblings:
92 element.addprevious(sibling)
93 del self._root_siblings[:]
94 else:
95 element = SubElement(element_stack[-1], el_name,
96 attrs, self._new_mappings)
97 element_stack.append(element)
98
99 self._new_mappings.clear()
100
101 - def processingInstruction(self, target, data):
102 pi = ProcessingInstruction(target, data)
103 if self._root is None:
104 self._root_siblings.append(pi)
105 else:
106 self._element_stack[-1].append(pi)
107
108 - def endElementNS(self, ns_name, qname):
109 element = self._element_stack.pop()
110 if ns_name != _getNsTag(element.tag):
111 raise SaxError, "Unexpected element closed: {%s}%s" % ns_name
112
113 - def startElement(self, name, attributes=None):
114 self.startElementNS((None, name), name, attributes)
115
116 - def endElement(self, name):
117 self.endElementNS((None, name), name)
118
119 - def characters(self, data):
120 last_element = self._element_stack[-1]
121 try:
122
123 last_element = last_element[-1]
124 last_element.tail = (last_element.tail or '') + data
125 except IndexError:
126
127 last_element.text = (last_element.text or '') + data
128
129 ignorableWhitespace = characters
130
131
133 """Produces SAX events for an element and children.
134 """
135 - def __init__(self, element_or_tree, content_handler):
136 try:
137 element = element_or_tree.getroot()
138 except AttributeError:
139 element = element_or_tree
140 self._element = element
141 self._content_handler = content_handler
142 from xml.sax.xmlreader import AttributesNSImpl as attr_class
143 self._attr_class = attr_class
144 self._empty_attributes = attr_class({}, {})
145
147 self._content_handler.startDocument()
148
149 element = self._element
150 if hasattr(element, 'getprevious'):
151 siblings = []
152 sibling = element.getprevious()
153 while getattr(sibling, 'tag', None) is ProcessingInstruction:
154 siblings.append(sibling)
155 sibling = sibling.getprevious()
156 for sibling in siblings[::-1]:
157 self._recursive_saxify(sibling, {})
158
159 self._recursive_saxify(element, {})
160
161 if hasattr(element, 'getnext'):
162 sibling = element.getnext()
163 while getattr(sibling, 'tag', None) is ProcessingInstruction:
164 self._recursive_saxify(sibling, {})
165 sibling = sibling.getnext()
166
167 self._content_handler.endDocument()
168
170 content_handler = self._content_handler
171 tag = element.tag
172 if tag is Comment or tag is ProcessingInstruction:
173 if tag is ProcessingInstruction:
174 content_handler.processingInstruction(
175 element.target, element.text)
176 if element.tail:
177 content_handler.characters(element.tail)
178 return
179
180 new_prefixes = []
181 build_qname = self._build_qname
182 attribs = element.items()
183 if attribs:
184 attr_values = {}
185 attr_qnames = {}
186 for attr_ns_name, value in attribs:
187 attr_ns_tuple = _getNsTag(attr_ns_name)
188 attr_values[attr_ns_tuple] = value
189 attr_qnames[attr_ns_tuple] = build_qname(
190 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
191 sax_attributes = self._attr_class(attr_values, attr_qnames)
192 else:
193 sax_attributes = self._empty_attributes
194
195 ns_uri, local_name = _getNsTag(tag)
196 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
197
198 for prefix, uri in new_prefixes:
199 content_handler.startPrefixMapping(prefix, uri)
200 content_handler.startElementNS((ns_uri, local_name),
201 qname, sax_attributes)
202 if element.text:
203 content_handler.characters(element.text)
204 for child in element:
205 self._recursive_saxify(child, prefixes)
206 content_handler.endElementNS((ns_uri, local_name), qname)
207 for prefix, uri in new_prefixes:
208 content_handler.endPrefixMapping(prefix)
209 if element.tail:
210 content_handler.characters(element.tail)
211
212 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
213 if ns_uri is None:
214 return local_name
215 try:
216 prefix = prefixes[ns_uri]
217 except KeyError:
218 prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes)
219 new_prefixes.append( (prefix, ns_uri) )
220 return prefix + ':' + local_name
221
222 -def saxify(element_or_tree, content_handler):
224