Package lxml :: Module sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.sax

  1  """ 
  2  SAX-based adapter to copy trees from/to the Python standard library. 
  3   
  4  Use the `ElementTreeContentHandler` class to build an ElementTree from 
  5  SAX events. 
  6   
  7  Use the `ElementTreeProducer` class or the `saxify()` function to fire 
  8  the SAX events of an ElementTree against a SAX ContentHandler. 
  9   
 10  See http://codespeak.net/lxml/sax.html 
 11  """ 
 12   
 13  from xml.sax.handler import ContentHandler 
 14  from lxml import etree 
 15  from lxml.etree import ElementTree, SubElement 
 16  from lxml.etree import Comment, ProcessingInstruction 
 17   
18 -class SaxError(etree.LxmlError):
19 """General SAX error. 20 """ 21 pass
22
23 -def _getNsTag(tag):
24 if tag[0] == '{': 25 return tuple(tag[1:].split('}', 1)) 26 else: 27 return (None, tag)
28 29
30 -class ElementTreeContentHandler(ContentHandler):
31 """Build an lxml ElementTree from SAX events. 32 """
33 - def __init__(self, makeelement=None):
34 ContentHandler.__init__(self) 35 self._root = None 36 self._root_siblings = [] 37 self._element_stack = [] 38 self._default_ns = None 39 self._ns_mapping = { None : [None] } 40 self._new_mappings = {} 41 if makeelement is None: 42 makeelement = etree.Element 43 self._makeelement = makeelement
44
45 - def _get_etree(self):
46 "Contains the generated ElementTree after parsing is finished." 47 return ElementTree(self._root)
48 49 etree = property(_get_etree, doc=_get_etree.__doc__) 50
51 - def setDocumentLocator(self, locator):
52 pass
53
54 - def startDocument(self):
55 pass
56
57 - def endDocument(self):
58 pass
59
60 - def startPrefixMapping(self, prefix, uri):
61 self._new_mappings[prefix] = uri 62 try: 63 self._ns_mapping[prefix].append(uri) 64 except KeyError: 65 self._ns_mapping[prefix] = [uri] 66 if prefix is None: 67 self._default_ns = uri
68
69 - def endPrefixMapping(self, prefix):
70 ns_uri_list = self._ns_mapping[prefix] 71 ns_uri_list.pop() 72 if prefix is None: 73 self._default_ns = ns_uri_list[-1]
74
75 - def _buildTag(self, ns_name_tuple):
76 ns_uri, local_name = ns_name_tuple 77 if ns_uri: 78 el_tag = "{%s}%s" % ns_name_tuple 79 elif self._default_ns: 80 el_tag = "{%s}%s" % (self._default_ns, local_name) 81 else: 82 el_tag = local_name 83 return el_tag
84
85 - def startElementNS(self, ns_name, qname, attributes=None):
86 el_name = self._buildTag(ns_name) 87 if attributes: 88 attrs = {} 89 try: 90 iter_attributes = attributes.iteritems() 91 except AttributeError: 92 iter_attributes = attributes.items() 93 94 for name_tuple, value in iter_attributes: 95 if name_tuple[0]: 96 attr_name = "{%s}%s" % name_tuple 97 else: 98 attr_name = name_tuple[1] 99 attrs[attr_name] = value 100 else: 101 attrs = None 102 103 element_stack = self._element_stack 104 if self._root is None: 105 element = self._root = \ 106 self._makeelement(el_name, attrs, self._new_mappings) 107 if self._root_siblings and hasattr(element, 'addprevious'): 108 for sibling in self._root_siblings: 109 element.addprevious(sibling) 110 del self._root_siblings[:] 111 else: 112 element = SubElement(element_stack[-1], el_name, 113 attrs, self._new_mappings) 114 element_stack.append(element) 115 116 self._new_mappings.clear()
117
118 - def processingInstruction(self, target, data):
119 pi = ProcessingInstruction(target, data) 120 if self._root is None: 121 self._root_siblings.append(pi) 122 else: 123 self._element_stack[-1].append(pi)
124
125 - def endElementNS(self, ns_name, qname):
126 element = self._element_stack.pop() 127 el_tag = self._buildTag(ns_name) 128 if el_tag != element.tag: 129 raise SaxError("Unexpected element closed: " + el_tag)
130
131 - def startElement(self, name, attributes=None):
132 if attributes: 133 attributes = dict( 134 [((None, k), v) for k, v in attributes.items()] 135 ) 136 self.startElementNS((None, name), name, attributes)
137
138 - def endElement(self, name):
139 self.endElementNS((None, name), name)
140
141 - def characters(self, data):
142 last_element = self._element_stack[-1] 143 try: 144 # if there already is a child element, we must append to its tail 145 last_element = last_element[-1] 146 last_element.tail = (last_element.tail or '') + data 147 except IndexError: 148 # otherwise: append to the text 149 last_element.text = (last_element.text or '') + data
150 151 ignorableWhitespace = characters
152 153
154 -class ElementTreeProducer(object):
155 """Produces SAX events for an element and children. 156 """
157 - def __init__(self, element_or_tree, content_handler):
158 try: 159 element = element_or_tree.getroot() 160 except AttributeError: 161 element = element_or_tree 162 self._element = element 163 self._content_handler = content_handler 164 from xml.sax.xmlreader import AttributesNSImpl as attr_class 165 self._attr_class = attr_class 166 self._empty_attributes = attr_class({}, {})
167
168 - def saxify(self):
169 self._content_handler.startDocument() 170 171 element = self._element 172 if hasattr(element, 'getprevious'): 173 siblings = [] 174 sibling = element.getprevious() 175 while getattr(sibling, 'tag', None) is ProcessingInstruction: 176 siblings.append(sibling) 177 sibling = sibling.getprevious() 178 for sibling in siblings[::-1]: 179 self._recursive_saxify(sibling, {}) 180 181 self._recursive_saxify(element, {}) 182 183 if hasattr(element, 'getnext'): 184 sibling = element.getnext() 185 while getattr(sibling, 'tag', None) is ProcessingInstruction: 186 self._recursive_saxify(sibling, {}) 187 sibling = sibling.getnext() 188 189 self._content_handler.endDocument()
190
191 - def _recursive_saxify(self, element, prefixes):
192 content_handler = self._content_handler 193 tag = element.tag 194 if tag is Comment or tag is ProcessingInstruction: 195 if tag is ProcessingInstruction: 196 content_handler.processingInstruction( 197 element.target, element.text) 198 if element.tail: 199 content_handler.characters(element.tail) 200 return 201 202 new_prefixes = [] 203 build_qname = self._build_qname 204 attribs = element.items() 205 if attribs: 206 attr_values = {} 207 attr_qnames = {} 208 for attr_ns_name, value in attribs: 209 attr_ns_tuple = _getNsTag(attr_ns_name) 210 attr_values[attr_ns_tuple] = value 211 attr_qnames[attr_ns_tuple] = build_qname( 212 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) 213 sax_attributes = self._attr_class(attr_values, attr_qnames) 214 else: 215 sax_attributes = self._empty_attributes 216 217 ns_uri, local_name = _getNsTag(tag) 218 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) 219 220 for prefix, uri in new_prefixes: 221 content_handler.startPrefixMapping(prefix, uri) 222 content_handler.startElementNS((ns_uri, local_name), 223 qname, sax_attributes) 224 if element.text: 225 content_handler.characters(element.text) 226 for child in element: 227 self._recursive_saxify(child, prefixes) 228 content_handler.endElementNS((ns_uri, local_name), qname) 229 for prefix, uri in new_prefixes: 230 content_handler.endPrefixMapping(prefix) 231 if element.tail: 232 content_handler.characters(element.tail)
233
234 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
235 if ns_uri is None: 236 return local_name 237 try: 238 prefix = prefixes[ns_uri] 239 except KeyError: 240 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes) 241 new_prefixes.append( (prefix, ns_uri) ) 242 return prefix + ':' + local_name
243
244 -def saxify(element_or_tree, content_handler):
245 """One-shot helper to generate SAX events from an XML tree and fire 246 them against a SAX ContentHandler. 247 """ 248 return ElementTreeProducer(element_or_tree, content_handler).saxify()
249