Package lxml :: Module sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.sax

  1  """ 
  2  SAX-based adapter to copy trees from/to the Python standard library. 
  3   
  4  Use the `ElementTreeContentHandler` class to build an ElementTree from 
  5  SAX events. 
  6   
  7  Use the `ElementTreeProducer` class or the `saxify()` function to fire 
  8  the SAX events of an ElementTree against a SAX ContentHandler. 
  9   
 10  See http://codespeak.net/lxml/sax.html 
 11  """ 
 12   
 13  from xml.sax.handler import ContentHandler 
 14  from lxml import etree 
 15  from lxml.etree import ElementTree, SubElement 
 16  from lxml.etree import Comment, ProcessingInstruction 
 17   
18 -class SaxError(etree.LxmlError):
19 """General SAX error. 20 """ 21 pass
22
23 -def _getNsTag(tag):
24 if tag[0] == '{': 25 return tuple(tag[1:].split('}', 1)) 26 else: 27 return (None, tag)
28
29 -class ElementTreeContentHandler(ContentHandler):
30 """Build an lxml ElementTree from SAX events. 31 """
32 - def __init__(self, makeelement=None):
33 self._root = None 34 self._root_siblings = [] 35 self._element_stack = [] 36 self._default_ns = None 37 self._ns_mapping = { None : [None] } 38 self._new_mappings = {} 39 if makeelement is None: 40 makeelement = etree.Element 41 self._makeelement = makeelement
42
43 - def _get_etree(self):
44 "Contains the generated ElementTree after parsing is finished." 45 return ElementTree(self._root)
46 47 etree = property(_get_etree, doc=_get_etree.__doc__) 48
49 - def setDocumentLocator(self, locator):
50 pass
51
52 - def startDocument(self):
53 pass
54
55 - def endDocument(self):
56 pass
57
58 - def startPrefixMapping(self, prefix, uri):
59 self._new_mappings[prefix] = uri 60 try: 61 self._ns_mapping[prefix].append(uri) 62 except KeyError: 63 self._ns_mapping[prefix] = [uri] 64 if prefix is None: 65 self._default_ns = uri
66
67 - def endPrefixMapping(self, prefix):
68 ns_uri_list = self._ns_mapping[prefix] 69 ns_uri_list.pop() 70 if prefix is None: 71 self._default_ns = ns_uri_list[-1]
72
73 - def _buildTag(self, ns_name_tuple):
74 ns_uri, local_name = ns_name_tuple 75 if ns_uri: 76 el_tag = "{%s}%s" % ns_name_tuple 77 elif self._default_ns: 78 el_tag = "{%s}%s" % (self._default_ns, local_name) 79 else: 80 el_tag = local_name 81 return el_tag
82
83 - def startElementNS(self, ns_name, qname, attributes=None):
84 el_name = self._buildTag(ns_name) 85 if attributes: 86 attrs = {} 87 try: 88 iter_attributes = attributes.iteritems() 89 except AttributeError: 90 iter_attributes = attributes.items() 91 92 for name_tuple, value in iter_attributes: 93 if name_tuple[0]: 94 attr_name = "{%s}%s" % name_tuple 95 else: 96 attr_name = name_tuple[1] 97 attrs[attr_name] = value 98 else: 99 attrs = None 100 101 element_stack = self._element_stack 102 if self._root is None: 103 element = self._root = \ 104 self._makeelement(el_name, attrs, self._new_mappings) 105 if self._root_siblings and hasattr(element, 'addprevious'): 106 for sibling in self._root_siblings: 107 element.addprevious(sibling) 108 del self._root_siblings[:] 109 else: 110 element = SubElement(element_stack[-1], el_name, 111 attrs, self._new_mappings) 112 element_stack.append(element) 113 114 self._new_mappings.clear()
115
116 - def processingInstruction(self, target, data):
117 pi = ProcessingInstruction(target, data) 118 if self._root is None: 119 self._root_siblings.append(pi) 120 else: 121 self._element_stack[-1].append(pi)
122
123 - def endElementNS(self, ns_name, qname):
124 element = self._element_stack.pop() 125 el_tag = self._buildTag(ns_name) 126 if el_tag != element.tag: 127 raise SaxError("Unexpected element closed: " + el_tag)
128
129 - def startElement(self, name, attributes=None):
130 self.startElementNS((None, name), name, attributes)
131
132 - def endElement(self, name):
133 self.endElementNS((None, name), name)
134
135 - def characters(self, data):
136 last_element = self._element_stack[-1] 137 try: 138 # if there already is a child element, we must append to its tail 139 last_element = last_element[-1] 140 last_element.tail = (last_element.tail or '') + data 141 except IndexError: 142 # otherwise: append to the text 143 last_element.text = (last_element.text or '') + data
144 145 ignorableWhitespace = characters
146 147
148 -class ElementTreeProducer(object):
149 """Produces SAX events for an element and children. 150 """
151 - def __init__(self, element_or_tree, content_handler):
152 try: 153 element = element_or_tree.getroot() 154 except AttributeError: 155 element = element_or_tree 156 self._element = element 157 self._content_handler = content_handler 158 from xml.sax.xmlreader import AttributesNSImpl as attr_class 159 self._attr_class = attr_class 160 self._empty_attributes = attr_class({}, {})
161
162 - def saxify(self):
163 self._content_handler.startDocument() 164 165 element = self._element 166 if hasattr(element, 'getprevious'): 167 siblings = [] 168 sibling = element.getprevious() 169 while getattr(sibling, 'tag', None) is ProcessingInstruction: 170 siblings.append(sibling) 171 sibling = sibling.getprevious() 172 for sibling in siblings[::-1]: 173 self._recursive_saxify(sibling, {}) 174 175 self._recursive_saxify(element, {}) 176 177 if hasattr(element, 'getnext'): 178 sibling = element.getnext() 179 while getattr(sibling, 'tag', None) is ProcessingInstruction: 180 self._recursive_saxify(sibling, {}) 181 sibling = sibling.getnext() 182 183 self._content_handler.endDocument()
184
185 - def _recursive_saxify(self, element, prefixes):
186 content_handler = self._content_handler 187 tag = element.tag 188 if tag is Comment or tag is ProcessingInstruction: 189 if tag is ProcessingInstruction: 190 content_handler.processingInstruction( 191 element.target, element.text) 192 if element.tail: 193 content_handler.characters(element.tail) 194 return 195 196 new_prefixes = [] 197 build_qname = self._build_qname 198 attribs = element.items() 199 if attribs: 200 attr_values = {} 201 attr_qnames = {} 202 for attr_ns_name, value in attribs: 203 attr_ns_tuple = _getNsTag(attr_ns_name) 204 attr_values[attr_ns_tuple] = value 205 attr_qnames[attr_ns_tuple] = build_qname( 206 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) 207 sax_attributes = self._attr_class(attr_values, attr_qnames) 208 else: 209 sax_attributes = self._empty_attributes 210 211 ns_uri, local_name = _getNsTag(tag) 212 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) 213 214 for prefix, uri in new_prefixes: 215 content_handler.startPrefixMapping(prefix, uri) 216 content_handler.startElementNS((ns_uri, local_name), 217 qname, sax_attributes) 218 if element.text: 219 content_handler.characters(element.text) 220 for child in element: 221 self._recursive_saxify(child, prefixes) 222 content_handler.endElementNS((ns_uri, local_name), qname) 223 for prefix, uri in new_prefixes: 224 content_handler.endPrefixMapping(prefix) 225 if element.tail: 226 content_handler.characters(element.tail)
227
228 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
229 if ns_uri is None: 230 return local_name 231 try: 232 prefix = prefixes[ns_uri] 233 except KeyError: 234 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes) 235 new_prefixes.append( (prefix, ns_uri) ) 236 return prefix + ':' + local_name
237
238 -def saxify(element_or_tree, content_handler):
239 """One-shot helper to generate SAX events from an XML tree and fire 240 them against a SAX ContentHandler. 241 """ 242 return ElementTreeProducer(element_or_tree, content_handler).saxify()
243