Package lxml :: Module sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.sax

  1  # cython: language_level=2 
  2   
  3  """ 
  4  SAX-based adapter to copy trees from/to the Python standard library. 
  5   
  6  Use the `ElementTreeContentHandler` class to build an ElementTree from 
  7  SAX events. 
  8   
  9  Use the `ElementTreeProducer` class or the `saxify()` function to fire 
 10  the SAX events of an ElementTree against a SAX ContentHandler. 
 11   
 12  See http://codespeak.net/lxml/sax.html 
 13  """ 
 14   
 15  from __future__ import absolute_import 
 16   
 17  from xml.sax.handler import ContentHandler 
 18  from lxml import etree 
 19  from lxml.etree import ElementTree, SubElement 
 20  from lxml.etree import Comment, ProcessingInstruction 
 21   
 22   
23 -class SaxError(etree.LxmlError):
24 """General SAX error. 25 """
26 27
28 -def _getNsTag(tag):
29 if tag[0] == '{': 30 return tuple(tag[1:].split('}', 1)) 31 else: 32 return None, tag
33 34
35 -class ElementTreeContentHandler(ContentHandler):
36 """Build an lxml ElementTree from SAX events. 37 """
38 - def __init__(self, makeelement=None):
39 ContentHandler.__init__(self) 40 self._root = None 41 self._root_siblings = [] 42 self._element_stack = [] 43 self._default_ns = None 44 self._ns_mapping = { None : [None] } 45 self._new_mappings = {} 46 if makeelement is None: 47 makeelement = etree.Element 48 self._makeelement = makeelement
49
50 - def _get_etree(self):
51 "Contains the generated ElementTree after parsing is finished." 52 return ElementTree(self._root)
53 54 etree = property(_get_etree, doc=_get_etree.__doc__) 55
56 - def setDocumentLocator(self, locator):
57 pass
58
59 - def startDocument(self):
60 pass
61
62 - def endDocument(self):
63 pass
64
65 - def startPrefixMapping(self, prefix, uri):
66 self._new_mappings[prefix] = uri 67 try: 68 self._ns_mapping[prefix].append(uri) 69 except KeyError: 70 self._ns_mapping[prefix] = [uri] 71 if prefix is None: 72 self._default_ns = uri
73
74 - def endPrefixMapping(self, prefix):
75 ns_uri_list = self._ns_mapping[prefix] 76 ns_uri_list.pop() 77 if prefix is None: 78 self._default_ns = ns_uri_list[-1]
79
80 - def _buildTag(self, ns_name_tuple):
81 ns_uri, local_name = ns_name_tuple 82 if ns_uri: 83 el_tag = "{%s}%s" % ns_name_tuple 84 elif self._default_ns: 85 el_tag = "{%s}%s" % (self._default_ns, local_name) 86 else: 87 el_tag = local_name 88 return el_tag
89
90 - def startElementNS(self, ns_name, qname, attributes=None):
91 el_name = self._buildTag(ns_name) 92 if attributes: 93 attrs = {} 94 try: 95 iter_attributes = attributes.iteritems() 96 except AttributeError: 97 iter_attributes = attributes.items() 98 99 for name_tuple, value in iter_attributes: 100 if name_tuple[0]: 101 attr_name = "{%s}%s" % name_tuple 102 else: 103 attr_name = name_tuple[1] 104 attrs[attr_name] = value 105 else: 106 attrs = None 107 108 element_stack = self._element_stack 109 if self._root is None: 110 element = self._root = \ 111 self._makeelement(el_name, attrs, self._new_mappings) 112 if self._root_siblings and hasattr(element, 'addprevious'): 113 for sibling in self._root_siblings: 114 element.addprevious(sibling) 115 del self._root_siblings[:] 116 else: 117 element = SubElement(element_stack[-1], el_name, 118 attrs, self._new_mappings) 119 element_stack.append(element) 120 121 self._new_mappings.clear()
122
123 - def processingInstruction(self, target, data):
124 pi = ProcessingInstruction(target, data) 125 if self._root is None: 126 self._root_siblings.append(pi) 127 else: 128 self._element_stack[-1].append(pi)
129
130 - def endElementNS(self, ns_name, qname):
131 element = self._element_stack.pop() 132 el_tag = self._buildTag(ns_name) 133 if el_tag != element.tag: 134 raise SaxError("Unexpected element closed: " + el_tag)
135
136 - def startElement(self, name, attributes=None):
137 if attributes: 138 attributes = dict( 139 [((None, k), v) for k, v in attributes.items()] 140 ) 141 self.startElementNS((None, name), name, attributes)
142
143 - def endElement(self, name):
144 self.endElementNS((None, name), name)
145
146 - def characters(self, data):
147 last_element = self._element_stack[-1] 148 try: 149 # if there already is a child element, we must append to its tail 150 last_element = last_element[-1] 151 last_element.tail = (last_element.tail or '') + data 152 except IndexError: 153 # otherwise: append to the text 154 last_element.text = (last_element.text or '') + data
155 156 ignorableWhitespace = characters
157 158
159 -class ElementTreeProducer(object):
160 """Produces SAX events for an element and children. 161 """
162 - def __init__(self, element_or_tree, content_handler):
163 try: 164 element = element_or_tree.getroot() 165 except AttributeError: 166 element = element_or_tree 167 self._element = element 168 self._content_handler = content_handler 169 from xml.sax.xmlreader import AttributesNSImpl as attr_class 170 self._attr_class = attr_class 171 self._empty_attributes = attr_class({}, {})
172
173 - def saxify(self):
174 self._content_handler.startDocument() 175 176 element = self._element 177 if hasattr(element, 'getprevious'): 178 siblings = [] 179 sibling = element.getprevious() 180 while getattr(sibling, 'tag', None) is ProcessingInstruction: 181 siblings.append(sibling) 182 sibling = sibling.getprevious() 183 for sibling in siblings[::-1]: 184 self._recursive_saxify(sibling, {}) 185 186 self._recursive_saxify(element, {}) 187 188 if hasattr(element, 'getnext'): 189 sibling = element.getnext() 190 while getattr(sibling, 'tag', None) is ProcessingInstruction: 191 self._recursive_saxify(sibling, {}) 192 sibling = sibling.getnext() 193 194 self._content_handler.endDocument()
195
196 - def _recursive_saxify(self, element, parent_nsmap):
197 content_handler = self._content_handler 198 tag = element.tag 199 if tag is Comment or tag is ProcessingInstruction: 200 if tag is ProcessingInstruction: 201 content_handler.processingInstruction( 202 element.target, element.text) 203 tail = element.tail 204 if tail: 205 content_handler.characters(tail) 206 return 207 208 element_nsmap = element.nsmap 209 new_prefixes = [] 210 if element_nsmap != parent_nsmap: 211 # There have been updates to the namespace 212 for prefix, ns_uri in element_nsmap.items(): 213 if parent_nsmap.get(prefix) != ns_uri: 214 new_prefixes.append( (prefix, ns_uri) ) 215 216 attribs = element.items() 217 if attribs: 218 attr_values = {} 219 attr_qnames = {} 220 for attr_ns_name, value in attribs: 221 attr_ns_tuple = _getNsTag(attr_ns_name) 222 attr_values[attr_ns_tuple] = value 223 attr_qnames[attr_ns_tuple] = self._build_qname( 224 attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap, 225 preferred_prefix=None, is_attribute=True) 226 sax_attributes = self._attr_class(attr_values, attr_qnames) 227 else: 228 sax_attributes = self._empty_attributes 229 230 ns_uri, local_name = _getNsTag(tag) 231 qname = self._build_qname( 232 ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False) 233 234 for prefix, uri in new_prefixes: 235 content_handler.startPrefixMapping(prefix, uri) 236 content_handler.startElementNS( 237 (ns_uri, local_name), qname, sax_attributes) 238 text = element.text 239 if text: 240 content_handler.characters(text) 241 for child in element: 242 self._recursive_saxify(child, element_nsmap) 243 content_handler.endElementNS((ns_uri, local_name), qname) 244 for prefix, uri in new_prefixes: 245 content_handler.endPrefixMapping(prefix) 246 tail = element.tail 247 if tail: 248 content_handler.characters(tail)
249
250 - def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute):
251 if ns_uri is None: 252 return local_name 253 254 if not is_attribute and nsmap.get(preferred_prefix) == ns_uri: 255 prefix = preferred_prefix 256 else: 257 # Pick the first matching prefix, in alphabetical order. 258 candidates = [ 259 pfx for (pfx, uri) in nsmap.items() 260 if pfx is not None and uri == ns_uri 261 ] 262 prefix = ( 263 candidates[0] if len(candidates) == 1 264 else min(candidates) if candidates 265 else None 266 ) 267 268 if prefix is None: 269 # Default namespace 270 return local_name 271 return prefix + ':' + local_name
272 273
274 -def saxify(element_or_tree, content_handler):
275 """One-shot helper to generate SAX events from an XML tree and fire 276 them against a SAX ContentHandler. 277 """ 278 return ElementTreeProducer(element_or_tree, content_handler).saxify()
279