Package lxml :: Module sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.sax

  1  from xml.sax.handler import ContentHandler 
  2  import etree 
  3  from etree import ElementTree, SubElement 
  4  from etree import Comment, ProcessingInstruction 
  5   
6 -class SaxError(etree.LxmlError):
7 """General SAX error. 8 """ 9 pass
10
11 -def _getNsTag(tag):
12 if tag[0] == '{': 13 return tuple(tag[1:].split('}', 1)) 14 else: 15 return (None, tag)
16
17 -class ElementTreeContentHandler(object, ContentHandler):
18 """Build an lxml ElementTree from SAX events. 19 """
20 - def __init__(self, makeelement=None):
21 self._root = None 22 self._root_siblings = [] 23 self._element_stack = [] 24 self._default_ns = None 25 self._ns_mapping = { None : [None] } 26 self._new_mappings = {} 27 if makeelement is None: 28 makeelement = etree.Element 29 self._makeelement = makeelement
30
31 - def _get_etree(self):
32 "Contains the generated ElementTree after parsing is finished." 33 return ElementTree(self._root)
34 35 etree = property(_get_etree, doc=_get_etree.__doc__) 36
37 - def setDocumentLocator(self, locator):
38 pass
39
40 - def startDocument(self):
41 pass
42
43 - def endDocument(self):
44 pass
45
46 - def startPrefixMapping(self, prefix, uri):
47 self._new_mappings[prefix] = uri 48 try: 49 self._ns_mapping[prefix].append(uri) 50 except KeyError: 51 self._ns_mapping[prefix] = [uri] 52 if prefix is None: 53 self._default_ns = uri
54
55 - def endPrefixMapping(self, prefix):
56 ns_uri_list = self._ns_mapping[prefix] 57 ns_uri_list.pop() 58 if prefix is None: 59 self._default_ns = ns_uri_list[-1]
60
61 - def startElementNS(self, ns_name, qname, attributes=None):
62 ns_uri, local_name = ns_name 63 if ns_uri: 64 el_name = "{%s}%s" % ns_name 65 elif self._default_ns: 66 el_name = "{%s}%s" % (self._default_ns, local_name) 67 else: 68 el_name = local_name 69 70 if attributes: 71 attrs = {} 72 try: 73 iter_attributes = attributes.iteritems() 74 except AttributeError: 75 iter_attributes = attributes.items() 76 77 for name_tuple, value in iter_attributes: 78 if name_tuple[0]: 79 attr_name = "{%s}%s" % name_tuple 80 else: 81 attr_name = name_tuple[1] 82 attrs[attr_name] = value 83 else: 84 attrs = None 85 86 element_stack = self._element_stack 87 if self._root is None: 88 element = self._root = \ 89 self._makeelement(el_name, attrs, self._new_mappings) 90 if self._root_siblings and hasattr(element, 'addprevious'): 91 for sibling in self._root_siblings: 92 element.addprevious(sibling) 93 del self._root_siblings[:] 94 else: 95 element = SubElement(element_stack[-1], el_name, 96 attrs, self._new_mappings) 97 element_stack.append(element) 98 99 self._new_mappings.clear()
100
101 - def processingInstruction(self, target, data):
102 pi = ProcessingInstruction(target, data) 103 if self._root is None: 104 self._root_siblings.append(pi) 105 else: 106 self._element_stack[-1].append(pi)
107
108 - def endElementNS(self, ns_name, qname):
109 element = self._element_stack.pop() 110 if ns_name != _getNsTag(element.tag): 111 raise SaxError, "Unexpected element closed: {%s}%s" % ns_name
112
113 - def startElement(self, name, attributes=None):
114 self.startElementNS((None, name), name, attributes)
115
116 - def endElement(self, name):
117 self.endElementNS((None, name), name)
118
119 - def characters(self, data):
120 last_element = self._element_stack[-1] 121 try: 122 # if there already is a child element, we must append to its tail 123 last_element = last_element[-1] 124 last_element.tail = (last_element.tail or '') + data 125 except IndexError: 126 # otherwise: append to the text 127 last_element.text = (last_element.text or '') + data
128 129 ignorableWhitespace = characters
130 131
132 -class ElementTreeProducer(object):
133 """Produces SAX events for an element and children. 134 """
135 - def __init__(self, element_or_tree, content_handler):
136 try: 137 element = element_or_tree.getroot() 138 except AttributeError: 139 element = element_or_tree 140 self._element = element 141 self._content_handler = content_handler 142 from xml.sax.xmlreader import AttributesNSImpl as attr_class 143 self._attr_class = attr_class 144 self._empty_attributes = attr_class({}, {})
145
146 - def saxify(self):
147 self._content_handler.startDocument() 148 149 element = self._element 150 if hasattr(element, 'getprevious'): 151 siblings = [] 152 sibling = element.getprevious() 153 while getattr(sibling, 'tag', None) is ProcessingInstruction: 154 siblings.append(sibling) 155 sibling = sibling.getprevious() 156 for sibling in siblings[::-1]: 157 self._recursive_saxify(sibling, {}) 158 159 self._recursive_saxify(element, {}) 160 161 if hasattr(element, 'getnext'): 162 sibling = element.getnext() 163 while getattr(sibling, 'tag', None) is ProcessingInstruction: 164 self._recursive_saxify(sibling, {}) 165 sibling = sibling.getnext() 166 167 self._content_handler.endDocument()
168
169 - def _recursive_saxify(self, element, prefixes):
170 content_handler = self._content_handler 171 tag = element.tag 172 if tag is Comment or tag is ProcessingInstruction: 173 if tag is ProcessingInstruction: 174 content_handler.processingInstruction( 175 element.target, element.text) 176 if element.tail: 177 content_handler.characters(element.tail) 178 return 179 180 new_prefixes = [] 181 build_qname = self._build_qname 182 attribs = element.items() 183 if attribs: 184 attr_values = {} 185 attr_qnames = {} 186 for attr_ns_name, value in attribs: 187 attr_ns_tuple = _getNsTag(attr_ns_name) 188 attr_values[attr_ns_tuple] = value 189 attr_qnames[attr_ns_tuple] = build_qname( 190 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) 191 sax_attributes = self._attr_class(attr_values, attr_qnames) 192 else: 193 sax_attributes = self._empty_attributes 194 195 ns_uri, local_name = _getNsTag(tag) 196 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) 197 198 for prefix, uri in new_prefixes: 199 content_handler.startPrefixMapping(prefix, uri) 200 content_handler.startElementNS((ns_uri, local_name), 201 qname, sax_attributes) 202 if element.text: 203 content_handler.characters(element.text) 204 for child in element: 205 self._recursive_saxify(child, prefixes) 206 content_handler.endElementNS((ns_uri, local_name), qname) 207 for prefix, uri in new_prefixes: 208 content_handler.endPrefixMapping(prefix) 209 if element.tail: 210 content_handler.characters(element.tail)
211
212 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
213 if ns_uri is None: 214 return local_name 215 try: 216 prefix = prefixes[ns_uri] 217 except KeyError: 218 prefix = prefixes[ns_uri] = u'ns%02d' % len(prefixes) 219 new_prefixes.append( (prefix, ns_uri) ) 220 return prefix + ':' + local_name
221
222 -def saxify(element_or_tree, content_handler):
223 return ElementTreeProducer(element_or_tree, content_handler).saxify()
224