1 from xml.sax.handler import ContentHandler
2 from etree import ElementTree, Element, SubElement, LxmlError
3 from etree import XML, Comment, ProcessingInstruction
4
6 """General SAX error.
7 """
8 pass
9
11 if tag[0] == '{':
12 return tuple(tag[1:].split('}', 1))
13 else:
14 return (None, tag)
15
16 -class ElementTreeContentHandler(object, ContentHandler):
17 """Build an lxml ElementTree from SAX events.
18 """
19 - def __init__(self, makeelement=None):
20 self._root = None
21 self._root_siblings = []
22 self._element_stack = []
23 self._default_ns = None
24 self._ns_mapping = { None : [None] }
25 self._new_mappings = {}
26 if makeelement is None:
27 makeelement = Element
28 self._makeelement = makeelement
29
30 - def _get_etree(self):
31 "Contains the generated ElementTree after parsing is finished."
32 return ElementTree(self._root)
33
34 etree = property(_get_etree, doc=_get_etree.__doc__)
35
36 - def setDocumentLocator(self, locator):
38
39 - def startDocument(self):
41
42 - def endDocument(self):
44
45 - def startPrefixMapping(self, prefix, uri):
46 self._new_mappings[prefix] = uri
47 try:
48 self._ns_mapping[prefix].append(uri)
49 except KeyError:
50 self._ns_mapping[prefix] = [uri]
51 if prefix is None:
52 self._default_ns = uri
53
54 - def endPrefixMapping(self, prefix):
55 ns_uri_list = self._ns_mapping[prefix]
56 ns_uri_list.pop()
57 if prefix is None:
58 self._default_ns = ns_uri_list[-1]
59
60 - def startElementNS(self, ns_name, qname, attributes=None):
61 ns_uri, local_name = ns_name
62 if ns_uri:
63 el_name = "{%s}%s" % ns_name
64 elif self._default_ns:
65 el_name = "{%s}%s" % (self._default_ns, local_name)
66 else:
67 el_name = local_name
68
69 if attributes:
70 attrs = {}
71 try:
72 iter_attributes = attributes.iteritems()
73 except AttributeError:
74 iter_attributes = attributes.items()
75
76 for name_tuple, value in iter_attributes:
77 if name_tuple[0]:
78 attr_name = "{%s}%s" % name_tuple
79 else:
80 attr_name = name_tuple[1]
81 attrs[attr_name] = value
82 else:
83 attrs = None
84
85 element_stack = self._element_stack
86 if self._root is None:
87 element = self._root = \
88 self._makeelement(el_name, attrs, self._new_mappings)
89 if self._root_siblings and hasattr(element, 'addprevious'):
90 for sibling in self._root_siblings:
91 element.addprevious(sibling)
92 del self._root_siblings[:]
93 else:
94 element = SubElement(element_stack[-1], el_name,
95 attrs, self._new_mappings)
96 element_stack.append(element)
97
98 self._new_mappings.clear()
99
100 - def processingInstruction(self, target, data):
101 pi = ProcessingInstruction(target, data)
102 if self._root is None:
103 self._root_siblings.append(pi)
104 else:
105 self._element_stack[-1].append(pi)
106
107 - def endElementNS(self, ns_name, qname):
108 element = self._element_stack.pop()
109 if ns_name != _getNsTag(element.tag):
110 raise SaxError, "Unexpected element closed: {%s}%s" % ns_name
111
112 - def startElement(self, name, attributes=None):
113 self.startElementNS((None, name), name, attributes)
114
115 - def endElement(self, name):
116 self.endElementNS((None, name), name)
117
118 - def characters(self, data):
119 last_element = self._element_stack[-1]
120 try:
121
122 last_element = last_element[-1]
123 last_element.tail = (last_element.tail or '') + data
124 except IndexError:
125
126 last_element.text = (last_element.text or '') + data
127
128 ignorableWhitespace = characters
129
130
132 """Produces SAX events for an element and children.
133 """
134 - def __init__(self, element_or_tree, content_handler):
135 try:
136 element = element_or_tree.getroot()
137 except AttributeError:
138 element = element_or_tree
139 self._element = element
140 self._content_handler = content_handler
141 from xml.sax.xmlreader import AttributesNSImpl as attr_class
142 self._attr_class = attr_class
143 self._empty_attributes = attr_class({}, {})
144
146 self._content_handler.startDocument()
147
148 element = self._element
149 if hasattr(element, 'getprevious'):
150 siblings = []
151 sibling = element.getprevious()
152 while getattr(sibling, 'tag', None) is ProcessingInstruction:
153 siblings.append(sibling)
154 sibling = sibling.getprevious()
155 for sibling in siblings[::-1]:
156 self._recursive_saxify(sibling, {})
157
158 self._recursive_saxify(element, {})
159
160 if hasattr(element, 'getnext'):
161 sibling = element.getnext()
162 while getattr(sibling, 'tag', None) is ProcessingInstruction:
163 self._recursive_saxify(sibling, {})
164 sibling = sibling.getnext()
165
166 self._content_handler.endDocument()
167
169 content_handler = self._content_handler
170 tag = element.tag
171 if tag is Comment or tag is ProcessingInstruction:
172 if tag is ProcessingInstruction:
173 content_handler.processingInstruction(
174 element.target, element.text)
175 if element.tail:
176 content_handler.characters(element.tail)
177 return
178
179 new_prefixes = []
180 build_qname = self._build_qname
181 attribs = element.items()
182 if attribs:
183 attr_values = {}
184 attr_qnames = {}
185 for attr_ns_name, value in attribs:
186 attr_ns_tuple = _getNsTag(attr_ns_name)
187 attr_values[attr_ns_tuple] = value
188 attr_qnames[attr_ns_tuple] = build_qname(
189 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
190 sax_attributes = self._attr_class(attr_values, attr_qnames)
191 else:
192 sax_attributes = self._empty_attributes
193
194 ns_uri, local_name = _getNsTag(tag)
195 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
196
197 for prefix, uri in new_prefixes:
198 content_handler.startPrefixMapping(prefix, uri)
199 content_handler.startElementNS((ns_uri, local_name),
200 qname, sax_attributes)
201 if element.text:
202 content_handler.characters(element.text)
203 for child in element:
204 self._recursive_saxify(child, prefixes)
205 content_handler.endElementNS((ns_uri, local_name), qname)
206 for prefix, uri in new_prefixes:
207 content_handler.endPrefixMapping(prefix)
208 if element.tail:
209 content_handler.characters(element.tail)
210
211 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
212 if ns_uri is None:
213 return local_name
214 try:
215 prefix = prefixes[ns_uri]
216 except KeyError:
217 prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes)
218 new_prefixes.append( (prefix, ns_uri) )
219 return prefix + ':' + local_name
220
221 -def saxify(element_or_tree, content_handler):
223