Package lxml :: Module sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.sax

  1  from xml.sax.handler import ContentHandler 
  2  from etree import ElementTree, Element, SubElement, LxmlError 
  3  from etree import XML, Comment, ProcessingInstruction 
  4   
5 -class SaxError(LxmlError):
6 """General SAX error. 7 """ 8 pass
9
10 -def _getNsTag(tag):
11 if tag[0] == '{': 12 return tuple(tag[1:].split('}', 1)) 13 else: 14 return (None, tag)
15
16 -class ElementTreeContentHandler(object, ContentHandler):
17 """Build an lxml ElementTree from SAX events. 18 """
19 - def __init__(self, makeelement=None):
20 self._root = None 21 self._root_siblings = [] 22 self._element_stack = [] 23 self._default_ns = None 24 self._ns_mapping = { None : [None] } 25 self._new_mappings = {} 26 if makeelement is None: 27 makeelement = Element 28 self._makeelement = makeelement
29
30 - def _get_etree(self):
31 "Contains the generated ElementTree after parsing is finished." 32 return ElementTree(self._root)
33 34 etree = property(_get_etree, doc=_get_etree.__doc__) 35
36 - def setDocumentLocator(self, locator):
37 pass
38
39 - def startDocument(self):
40 pass
41
42 - def endDocument(self):
43 pass
44
45 - def startPrefixMapping(self, prefix, uri):
46 self._new_mappings[prefix] = uri 47 try: 48 self._ns_mapping[prefix].append(uri) 49 except KeyError: 50 self._ns_mapping[prefix] = [uri] 51 if prefix is None: 52 self._default_ns = uri
53
54 - def endPrefixMapping(self, prefix):
55 ns_uri_list = self._ns_mapping[prefix] 56 ns_uri_list.pop() 57 if prefix is None: 58 self._default_ns = ns_uri_list[-1]
59
60 - def startElementNS(self, ns_name, qname, attributes=None):
61 ns_uri, local_name = ns_name 62 if ns_uri: 63 el_name = "{%s}%s" % ns_name 64 elif self._default_ns: 65 el_name = "{%s}%s" % (self._default_ns, local_name) 66 else: 67 el_name = local_name 68 69 if attributes: 70 attrs = {} 71 try: 72 iter_attributes = attributes.iteritems() 73 except AttributeError: 74 iter_attributes = attributes.items() 75 76 for name_tuple, value in iter_attributes: 77 if name_tuple[0]: 78 attr_name = "{%s}%s" % name_tuple 79 else: 80 attr_name = name_tuple[1] 81 attrs[attr_name] = value 82 else: 83 attrs = None 84 85 element_stack = self._element_stack 86 if self._root is None: 87 element = self._root = \ 88 self._makeelement(el_name, attrs, self._new_mappings) 89 if self._root_siblings and hasattr(element, 'addprevious'): 90 for sibling in self._root_siblings: 91 element.addprevious(sibling) 92 del self._root_siblings[:] 93 else: 94 element = SubElement(element_stack[-1], el_name, 95 attrs, self._new_mappings) 96 element_stack.append(element) 97 98 self._new_mappings.clear()
99
100 - def processingInstruction(self, target, data):
101 pi = ProcessingInstruction(target, data) 102 if self._root is None: 103 self._root_siblings.append(pi) 104 else: 105 self._element_stack[-1].append(pi)
106
107 - def endElementNS(self, ns_name, qname):
108 element = self._element_stack.pop() 109 if ns_name != _getNsTag(element.tag): 110 raise SaxError, "Unexpected element closed: {%s}%s" % ns_name
111
112 - def startElement(self, name, attributes=None):
113 self.startElementNS((None, name), name, attributes)
114
115 - def endElement(self, name):
116 self.endElementNS((None, name), name)
117
118 - def characters(self, data):
119 last_element = self._element_stack[-1] 120 try: 121 # if there already is a child element, we must append to its tail 122 last_element = last_element[-1] 123 last_element.tail = (last_element.tail or '') + data 124 except IndexError: 125 # otherwise: append to the text 126 last_element.text = (last_element.text or '') + data
127 128 ignorableWhitespace = characters
129 130
131 -class ElementTreeProducer(object):
132 """Produces SAX events for an element and children. 133 """
134 - def __init__(self, element_or_tree, content_handler):
135 try: 136 element = element_or_tree.getroot() 137 except AttributeError: 138 element = element_or_tree 139 self._element = element 140 self._content_handler = content_handler 141 from xml.sax.xmlreader import AttributesNSImpl as attr_class 142 self._attr_class = attr_class 143 self._empty_attributes = attr_class({}, {})
144
145 - def saxify(self):
146 self._content_handler.startDocument() 147 148 element = self._element 149 if hasattr(element, 'getprevious'): 150 siblings = [] 151 sibling = element.getprevious() 152 while getattr(sibling, 'tag', None) is ProcessingInstruction: 153 siblings.append(sibling) 154 sibling = sibling.getprevious() 155 for sibling in siblings[::-1]: 156 self._recursive_saxify(sibling, {}) 157 158 self._recursive_saxify(element, {}) 159 160 if hasattr(element, 'getnext'): 161 sibling = element.getnext() 162 while getattr(sibling, 'tag', None) is ProcessingInstruction: 163 self._recursive_saxify(sibling, {}) 164 sibling = sibling.getnext() 165 166 self._content_handler.endDocument()
167
168 - def _recursive_saxify(self, element, prefixes):
169 content_handler = self._content_handler 170 tag = element.tag 171 if tag is Comment or tag is ProcessingInstruction: 172 if tag is ProcessingInstruction: 173 content_handler.processingInstruction( 174 element.target, element.text) 175 if element.tail: 176 content_handler.characters(element.tail) 177 return 178 179 new_prefixes = [] 180 build_qname = self._build_qname 181 attribs = element.items() 182 if attribs: 183 attr_values = {} 184 attr_qnames = {} 185 for attr_ns_name, value in attribs: 186 attr_ns_tuple = _getNsTag(attr_ns_name) 187 attr_values[attr_ns_tuple] = value 188 attr_qnames[attr_ns_tuple] = build_qname( 189 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) 190 sax_attributes = self._attr_class(attr_values, attr_qnames) 191 else: 192 sax_attributes = self._empty_attributes 193 194 ns_uri, local_name = _getNsTag(tag) 195 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) 196 197 for prefix, uri in new_prefixes: 198 content_handler.startPrefixMapping(prefix, uri) 199 content_handler.startElementNS((ns_uri, local_name), 200 qname, sax_attributes) 201 if element.text: 202 content_handler.characters(element.text) 203 for child in element: 204 self._recursive_saxify(child, prefixes) 205 content_handler.endElementNS((ns_uri, local_name), qname) 206 for prefix, uri in new_prefixes: 207 content_handler.endPrefixMapping(prefix) 208 if element.tail: 209 content_handler.characters(element.tail)
210
211 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
212 if ns_uri is None: 213 return local_name 214 try: 215 prefix = prefixes[ns_uri] 216 except KeyError: 217 prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes) 218 new_prefixes.append( (prefix, ns_uri) ) 219 return prefix + ':' + local_name
220
221 -def saxify(element_or_tree, content_handler):
222 return ElementTreeProducer(element_or_tree, content_handler).saxify()
223