Package lxml :: Module sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.sax

  1  """ 
  2  SAX-based adapter to copy trees from/to the Python standard library. 
  3   
  4  Use the `ElementTreeContentHandler` class to build an ElementTree from 
  5  SAX events. 
  6   
  7  Use the `ElementTreeProducer` class or the `saxify()` function to fire 
  8  the SAX events of an ElementTree against a SAX ContentHandler. 
  9   
 10  See http://codespeak.net/lxml/sax.html 
 11  """ 
 12   
 13  from __future__ import absolute_import 
 14   
 15  from xml.sax.handler import ContentHandler 
 16  from lxml import etree 
 17  from lxml.etree import ElementTree, SubElement 
 18  from lxml.etree import Comment, ProcessingInstruction 
 19   
 20   
21 -class SaxError(etree.LxmlError):
22 """General SAX error. 23 """
24 25
26 -def _getNsTag(tag):
27 if tag[0] == '{': 28 return tuple(tag[1:].split('}', 1)) 29 else: 30 return (None, tag)
31 32
33 -class ElementTreeContentHandler(ContentHandler):
34 """Build an lxml ElementTree from SAX events. 35 """
36 - def __init__(self, makeelement=None):
37 ContentHandler.__init__(self) 38 self._root = None 39 self._root_siblings = [] 40 self._element_stack = [] 41 self._default_ns = None 42 self._ns_mapping = { None : [None] } 43 self._new_mappings = {} 44 if makeelement is None: 45 makeelement = etree.Element 46 self._makeelement = makeelement
47
48 - def _get_etree(self):
49 "Contains the generated ElementTree after parsing is finished." 50 return ElementTree(self._root)
51 52 etree = property(_get_etree, doc=_get_etree.__doc__) 53
54 - def setDocumentLocator(self, locator):
55 pass
56
57 - def startDocument(self):
58 pass
59
60 - def endDocument(self):
61 pass
62
63 - def startPrefixMapping(self, prefix, uri):
64 self._new_mappings[prefix] = uri 65 try: 66 self._ns_mapping[prefix].append(uri) 67 except KeyError: 68 self._ns_mapping[prefix] = [uri] 69 if prefix is None: 70 self._default_ns = uri
71
72 - def endPrefixMapping(self, prefix):
73 ns_uri_list = self._ns_mapping[prefix] 74 ns_uri_list.pop() 75 if prefix is None: 76 self._default_ns = ns_uri_list[-1]
77
78 - def _buildTag(self, ns_name_tuple):
79 ns_uri, local_name = ns_name_tuple 80 if ns_uri: 81 el_tag = "{%s}%s" % ns_name_tuple 82 elif self._default_ns: 83 el_tag = "{%s}%s" % (self._default_ns, local_name) 84 else: 85 el_tag = local_name 86 return el_tag
87
88 - def startElementNS(self, ns_name, qname, attributes=None):
89 el_name = self._buildTag(ns_name) 90 if attributes: 91 attrs = {} 92 try: 93 iter_attributes = attributes.iteritems() 94 except AttributeError: 95 iter_attributes = attributes.items() 96 97 for name_tuple, value in iter_attributes: 98 if name_tuple[0]: 99 attr_name = "{%s}%s" % name_tuple 100 else: 101 attr_name = name_tuple[1] 102 attrs[attr_name] = value 103 else: 104 attrs = None 105 106 element_stack = self._element_stack 107 if self._root is None: 108 element = self._root = \ 109 self._makeelement(el_name, attrs, self._new_mappings) 110 if self._root_siblings and hasattr(element, 'addprevious'): 111 for sibling in self._root_siblings: 112 element.addprevious(sibling) 113 del self._root_siblings[:] 114 else: 115 element = SubElement(element_stack[-1], el_name, 116 attrs, self._new_mappings) 117 element_stack.append(element) 118 119 self._new_mappings.clear()
120
121 - def processingInstruction(self, target, data):
122 pi = ProcessingInstruction(target, data) 123 if self._root is None: 124 self._root_siblings.append(pi) 125 else: 126 self._element_stack[-1].append(pi)
127
128 - def endElementNS(self, ns_name, qname):
129 element = self._element_stack.pop() 130 el_tag = self._buildTag(ns_name) 131 if el_tag != element.tag: 132 raise SaxError("Unexpected element closed: " + el_tag)
133
134 - def startElement(self, name, attributes=None):
135 if attributes: 136 attributes = dict( 137 [((None, k), v) for k, v in attributes.items()] 138 ) 139 self.startElementNS((None, name), name, attributes)
140
141 - def endElement(self, name):
142 self.endElementNS((None, name), name)
143
144 - def characters(self, data):
145 last_element = self._element_stack[-1] 146 try: 147 # if there already is a child element, we must append to its tail 148 last_element = last_element[-1] 149 last_element.tail = (last_element.tail or '') + data 150 except IndexError: 151 # otherwise: append to the text 152 last_element.text = (last_element.text or '') + data
153 154 ignorableWhitespace = characters
155 156
157 -class ElementTreeProducer(object):
158 """Produces SAX events for an element and children. 159 """
160 - def __init__(self, element_or_tree, content_handler):
161 try: 162 element = element_or_tree.getroot() 163 except AttributeError: 164 element = element_or_tree 165 self._element = element 166 self._content_handler = content_handler 167 from xml.sax.xmlreader import AttributesNSImpl as attr_class 168 self._attr_class = attr_class 169 self._empty_attributes = attr_class({}, {})
170
171 - def saxify(self):
172 self._content_handler.startDocument() 173 174 element = self._element 175 if hasattr(element, 'getprevious'): 176 siblings = [] 177 sibling = element.getprevious() 178 while getattr(sibling, 'tag', None) is ProcessingInstruction: 179 siblings.append(sibling) 180 sibling = sibling.getprevious() 181 for sibling in siblings[::-1]: 182 self._recursive_saxify(sibling, {}) 183 184 self._recursive_saxify(element, {}) 185 186 if hasattr(element, 'getnext'): 187 sibling = element.getnext() 188 while getattr(sibling, 'tag', None) is ProcessingInstruction: 189 self._recursive_saxify(sibling, {}) 190 sibling = sibling.getnext() 191 192 self._content_handler.endDocument()
193
194 - def _recursive_saxify(self, element, prefixes):
195 content_handler = self._content_handler 196 tag = element.tag 197 if tag is Comment or tag is ProcessingInstruction: 198 if tag is ProcessingInstruction: 199 content_handler.processingInstruction( 200 element.target, element.text) 201 if element.tail: 202 content_handler.characters(element.tail) 203 return 204 205 new_prefixes = [] 206 build_qname = self._build_qname 207 attribs = element.items() 208 if attribs: 209 attr_values = {} 210 attr_qnames = {} 211 for attr_ns_name, value in attribs: 212 attr_ns_tuple = _getNsTag(attr_ns_name) 213 attr_values[attr_ns_tuple] = value 214 attr_qnames[attr_ns_tuple] = build_qname( 215 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) 216 sax_attributes = self._attr_class(attr_values, attr_qnames) 217 else: 218 sax_attributes = self._empty_attributes 219 220 ns_uri, local_name = _getNsTag(tag) 221 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) 222 223 for prefix, uri in new_prefixes: 224 content_handler.startPrefixMapping(prefix, uri) 225 content_handler.startElementNS((ns_uri, local_name), 226 qname, sax_attributes) 227 if element.text: 228 content_handler.characters(element.text) 229 for child in element: 230 self._recursive_saxify(child, prefixes) 231 content_handler.endElementNS((ns_uri, local_name), qname) 232 for prefix, uri in new_prefixes: 233 content_handler.endPrefixMapping(prefix) 234 if element.tail: 235 content_handler.characters(element.tail)
236
237 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
238 if ns_uri is None: 239 return local_name 240 try: 241 prefix = prefixes[ns_uri] 242 except KeyError: 243 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes) 244 new_prefixes.append( (prefix, ns_uri) ) 245 return prefix + ':' + local_name
246
247 -def saxify(element_or_tree, content_handler):
248 """One-shot helper to generate SAX events from an XML tree and fire 249 them against a SAX ContentHandler. 250 """ 251 return ElementTreeProducer(element_or_tree, content_handler).saxify()
252