Package xml :: Package etree :: Module ElementTree
[hide private]
[frames] | no frames]

Source Code for Module xml.etree.ElementTree

   1  # 
   2  # ElementTree 
   3  # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ 
   4  # 
   5  # light-weight XML support for Python 2.3 and later. 
   6  # 
   7  # history (since 1.2.6): 
   8  # 2005-11-12 fl   added tostringlist/fromstringlist helpers 
   9  # 2006-07-05 fl   merged in selected changes from the 1.3 sandbox 
  10  # 2006-07-05 fl   removed support for 2.1 and earlier 
  11  # 2007-06-21 fl   added deprecation/future warnings 
  12  # 2007-08-25 fl   added doctype hook, added parser version attribute etc 
  13  # 2007-08-26 fl   added new serializer code (better namespace handling, etc) 
  14  # 2007-08-27 fl   warn for broken /tag searches on tree level 
  15  # 2007-09-02 fl   added html/text methods to serializer (experimental) 
  16  # 2007-09-05 fl   added method argument to tostring/tostringlist 
  17  # 2007-09-06 fl   improved error handling 
  18  # 2007-09-13 fl   added itertext, iterfind; assorted cleanups 
  19  # 2007-12-15 fl   added C14N hooks, copy method (experimental) 
  20  # 
  21  # Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved. 
  22  # 
  23  # fredrik@pythonware.com 
  24  # http://www.pythonware.com 
  25  # 
  26  # -------------------------------------------------------------------- 
  27  # The ElementTree toolkit is 
  28  # 
  29  # Copyright (c) 1999-2008 by Fredrik Lundh 
  30  # 
  31  # By obtaining, using, and/or copying this software and/or its 
  32  # associated documentation, you agree that you have read, understood, 
  33  # and will comply with the following terms and conditions: 
  34  # 
  35  # Permission to use, copy, modify, and distribute this software and 
  36  # its associated documentation for any purpose and without fee is 
  37  # hereby granted, provided that the above copyright notice appears in 
  38  # all copies, and that both that copyright notice and this permission 
  39  # notice appear in supporting documentation, and that the name of 
  40  # Secret Labs AB or the author not be used in advertising or publicity 
  41  # pertaining to distribution of the software without specific, written 
  42  # prior permission. 
  43  # 
  44  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
  45  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
  46  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
  47  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
  48  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
  49  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
  50  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
  51  # OF THIS SOFTWARE. 
  52  # -------------------------------------------------------------------- 
  53   
  54  # Licensed to PSF under a Contributor Agreement. 
  55  # See http://www.python.org/psf/license for licensing details. 
  56   
  57  __all__ = [ 
  58      # public symbols 
  59      "Comment", 
  60      "dump", 
  61      "Element", "ElementTree", 
  62      "fromstring", "fromstringlist", 
  63      "iselement", "iterparse", 
  64      "parse", "ParseError", 
  65      "PI", "ProcessingInstruction", 
  66      "QName", 
  67      "SubElement", 
  68      "tostring", "tostringlist", 
  69      "TreeBuilder", 
  70      "VERSION", 
  71      "XML", 
  72      "XMLParser", "XMLTreeBuilder", 
  73      ] 
  74   
  75  VERSION = "1.3.0" 
  76   
  77  ## 
  78  # The <b>Element</b> type is a flexible container object, designed to 
  79  # store hierarchical data structures in memory. The type can be 
  80  # described as a cross between a list and a dictionary. 
  81  # <p> 
  82  # Each element has a number of properties associated with it: 
  83  # <ul> 
  84  # <li>a <i>tag</i>. This is a string identifying what kind of data 
  85  # this element represents (the element type, in other words).</li> 
  86  # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> 
  87  # <li>a <i>text</i> string.</li> 
  88  # <li>an optional <i>tail</i> string.</li> 
  89  # <li>a number of <i>child elements</i>, stored in a Python sequence</li> 
  90  # </ul> 
  91  # 
  92  # To create an element instance, use the {@link #Element} constructor 
  93  # or the {@link #SubElement} factory function. 
  94  # <p> 
  95  # The {@link #ElementTree} class can be used to wrap an element 
  96  # structure, and convert it from and to XML. 
  97  ## 
  98   
  99  import sys 
 100  import re 
 101  import warnings 
 102   
 103   
104 -class _SimpleElementPath(object):
105 # emulate pre-1.2 find/findtext/findall behaviour
106 - def find(self, element, tag, namespaces=None):
107 for elem in element: 108 if elem.tag == tag: 109 return elem 110 return None
111 - def findtext(self, element, tag, default=None, namespaces=None):
112 elem = self.find(element, tag) 113 if elem is None: 114 return default 115 return elem.text or ""
116 - def iterfind(self, element, tag, namespaces=None):
117 if tag[:3] == ".//": 118 for elem in element.iter(tag[3:]): 119 yield elem 120 for elem in element: 121 if elem.tag == tag: 122 yield elem
123 - def findall(self, element, tag, namespaces=None):
124 return list(self.iterfind(element, tag, namespaces))
125 126 try: 127 from . import ElementPath 128 except ImportError: 129 ElementPath = _SimpleElementPath() 130 131 ## 132 # Parser error. This is a subclass of <b>SyntaxError</b>. 133 # <p> 134 # In addition to the exception value, an exception instance contains a 135 # specific exception code in the <b>code</b> attribute, and the line and 136 # column of the error in the <b>position</b> attribute. 137
138 -class ParseError(SyntaxError):
139 pass
140 141 # -------------------------------------------------------------------- 142 143 ## 144 # Checks if an object appears to be a valid element object. 145 # 146 # @param An element instance. 147 # @return A true value if this is an element object. 148 # @defreturn flag 149
150 -def iselement(element):
151 # FIXME: not sure about this; might be a better idea to look 152 # for tag/attrib/text attributes 153 return isinstance(element, Element) or hasattr(element, "tag")
154 155 ## 156 # Element class. This class defines the Element interface, and 157 # provides a reference implementation of this interface. 158 # <p> 159 # The element name, attribute names, and attribute values can be 160 # either ASCII strings (ordinary Python strings containing only 7-bit 161 # ASCII characters) or Unicode strings. 162 # 163 # @param tag The element name. 164 # @param attrib An optional dictionary, containing element attributes. 165 # @param **extra Additional attributes, given as keyword arguments. 166 # @see Element 167 # @see SubElement 168 # @see Comment 169 # @see ProcessingInstruction 170
171 -class Element(object):
172 # <tag attrib>text<child/>...</tag>tail 173 174 ## 175 # (Attribute) Element tag. 176 177 tag = None 178 179 ## 180 # (Attribute) Element attribute dictionary. Where possible, use 181 # {@link #Element.get}, 182 # {@link #Element.set}, 183 # {@link #Element.keys}, and 184 # {@link #Element.items} to access 185 # element attributes. 186 187 attrib = None 188 189 ## 190 # (Attribute) Text before first subelement. This is either a 191 # string or the value None. Note that if there was no text, this 192 # attribute may be either None or an empty string, depending on 193 # the parser. 194 195 text = None 196 197 ## 198 # (Attribute) Text after this element's end tag, but before the 199 # next sibling element's start tag. This is either a string or 200 # the value None. Note that if there was no text, this attribute 201 # may be either None or an empty string, depending on the parser. 202 203 tail = None # text after end tag, if any 204 205 # constructor 206
207 - def __init__(self, tag, attrib={}, **extra):
208 attrib = attrib.copy() 209 attrib.update(extra) 210 self.tag = tag 211 self.attrib = attrib 212 self._children = []
213
214 - def __repr__(self):
215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216 217 ## 218 # Creates a new element object of the same type as this element. 219 # 220 # @param tag Element tag. 221 # @param attrib Element attributes, given as a dictionary. 222 # @return A new element instance. 223
224 - def makeelement(self, tag, attrib):
225 return self.__class__(tag, attrib)
226 227 ## 228 # (Experimental) Copies the current element. This creates a 229 # shallow copy; subelements will be shared with the original tree. 230 # 231 # @return A new element instance. 232
233 - def copy(self):
234 elem = self.makeelement(self.tag, self.attrib) 235 elem.text = self.text 236 elem.tail = self.tail 237 elem[:] = self 238 return elem
239 240 ## 241 # Returns the number of subelements. Note that this only counts 242 # full elements; to check if there's any content in an element, you 243 # have to check both the length and the <b>text</b> attribute. 244 # 245 # @return The number of subelements. 246
247 - def __len__(self):
248 return len(self._children)
249
250 - def __nonzero__(self):
251 warnings.warn( 252 "The behavior of this method will change in future versions. " 253 "Use specific 'len(elem)' or 'elem is not None' test instead.", 254 FutureWarning, stacklevel=2 255 ) 256 return len(self._children) != 0 # emulate old behaviour, for now
257 258 ## 259 # Returns the given subelement, by index. 260 # 261 # @param index What subelement to return. 262 # @return The given subelement. 263 # @exception IndexError If the given element does not exist. 264
265 - def __getitem__(self, index):
266 return self._children[index]
267 268 ## 269 # Replaces the given subelement, by index. 270 # 271 # @param index What subelement to replace. 272 # @param element The new element value. 273 # @exception IndexError If the given element does not exist. 274
275 - def __setitem__(self, index, element):
276 # if isinstance(index, slice): 277 # for elt in element: 278 # assert iselement(elt) 279 # else: 280 # assert iselement(element) 281 self._children[index] = element
282 283 ## 284 # Deletes the given subelement, by index. 285 # 286 # @param index What subelement to delete. 287 # @exception IndexError If the given element does not exist. 288
289 - def __delitem__(self, index):
290 del self._children[index]
291 292 ## 293 # Adds a subelement to the end of this element. In document order, 294 # the new element will appear after the last existing subelement (or 295 # directly after the text, if it's the first subelement), but before 296 # the end tag for this element. 297 # 298 # @param element The element to add. 299
300 - def append(self, element):
301 # assert iselement(element) 302 self._children.append(element)
303 304 ## 305 # Appends subelements from a sequence. 306 # 307 # @param elements A sequence object with zero or more elements. 308 # @since 1.3 309
310 - def extend(self, elements):
311 # for element in elements: 312 # assert iselement(element) 313 self._children.extend(elements)
314 315 ## 316 # Inserts a subelement at the given position in this element. 317 # 318 # @param index Where to insert the new subelement. 319
320 - def insert(self, index, element):
321 # assert iselement(element) 322 self._children.insert(index, element)
323 324 ## 325 # Removes a matching subelement. Unlike the <b>find</b> methods, 326 # this method compares elements based on identity, not on tag 327 # value or contents. To remove subelements by other means, the 328 # easiest way is often to use a list comprehension to select what 329 # elements to keep, and use slice assignment to update the parent 330 # element. 331 # 332 # @param element What element to remove. 333 # @exception ValueError If a matching element could not be found. 334
335 - def remove(self, element):
336 # assert iselement(element) 337 self._children.remove(element)
338 339 ## 340 # (Deprecated) Returns all subelements. The elements are returned 341 # in document order. 342 # 343 # @return A list of subelements. 344 # @defreturn list of Element instances 345
346 - def getchildren(self):
347 warnings.warn( 348 "This method will be removed in future versions. " 349 "Use 'list(elem)' or iteration over elem instead.", 350 DeprecationWarning, stacklevel=2 351 ) 352 return self._children
353 354 ## 355 # Finds the first matching subelement, by tag name or path. 356 # 357 # @param path What element to look for. 358 # @keyparam namespaces Optional namespace prefix map. 359 # @return The first matching element, or None if no element was found. 360 # @defreturn Element or None 361
362 - def find(self, path, namespaces=None):
363 return ElementPath.find(self, path, namespaces)
364 365 ## 366 # Finds text for the first matching subelement, by tag name or path. 367 # 368 # @param path What element to look for. 369 # @param default What to return if the element was not found. 370 # @keyparam namespaces Optional namespace prefix map. 371 # @return The text content of the first matching element, or the 372 # default value no element was found. Note that if the element 373 # is found, but has no text content, this method returns an 374 # empty string. 375 # @defreturn string 376
377 - def findtext(self, path, default=None, namespaces=None):
378 return ElementPath.findtext(self, path, default, namespaces)
379 380 ## 381 # Finds all matching subelements, by tag name or path. 382 # 383 # @param path What element to look for. 384 # @keyparam namespaces Optional namespace prefix map. 385 # @return A list or other sequence containing all matching elements, 386 # in document order. 387 # @defreturn list of Element instances 388
389 - def findall(self, path, namespaces=None):
390 return ElementPath.findall(self, path, namespaces)
391 392 ## 393 # Finds all matching subelements, by tag name or path. 394 # 395 # @param path What element to look for. 396 # @keyparam namespaces Optional namespace prefix map. 397 # @return An iterator or sequence containing all matching elements, 398 # in document order. 399 # @defreturn a generated sequence of Element instances 400
401 - def iterfind(self, path, namespaces=None):
402 return ElementPath.iterfind(self, path, namespaces)
403 404 ## 405 # Resets an element. This function removes all subelements, clears 406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes 407 # to None. 408
409 - def clear(self):
410 self.attrib.clear() 411 self._children = [] 412 self.text = self.tail = None
413 414 ## 415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but 416 # some implementations may handle this a bit more efficiently. 417 # 418 # @param key What attribute to look for. 419 # @param default What to return if the attribute was not found. 420 # @return The attribute value, or the default value, if the 421 # attribute was not found. 422 # @defreturn string or None 423
424 - def get(self, key, default=None):
425 return self.attrib.get(key, default)
426 427 ## 428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>, 429 # but some implementations may handle this a bit more efficiently. 430 # 431 # @param key What attribute to set. 432 # @param value The attribute value. 433
434 - def set(self, key, value):
435 self.attrib[key] = value
436 437 ## 438 # Gets a list of attribute names. The names are returned in an 439 # arbitrary order (just like for an ordinary Python dictionary). 440 # Equivalent to <b>attrib.keys()</b>. 441 # 442 # @return A list of element attribute names. 443 # @defreturn list of strings 444
445 - def keys(self):
446 return self.attrib.keys()
447 448 ## 449 # Gets element attributes, as a sequence. The attributes are 450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>. 451 # 452 # @return A list of (name, value) tuples for all attributes. 453 # @defreturn list of (string, string) tuples 454
455 - def items(self):
456 return self.attrib.items()
457 458 ## 459 # Creates a tree iterator. The iterator loops over this element 460 # and all subelements, in document order, and returns all elements 461 # with a matching tag. 462 # <p> 463 # If the tree structure is modified during iteration, new or removed 464 # elements may or may not be included. To get a stable set, use the 465 # list() function on the iterator, and loop over the resulting list. 466 # 467 # @param tag What tags to look for (default is to return all elements). 468 # @return An iterator containing all the matching elements. 469 # @defreturn iterator 470
471 - def iter(self, tag=None):
472 if tag == "*": 473 tag = None 474 if tag is None or self.tag == tag: 475 yield self 476 for e in self._children: 477 for e in e.iter(tag): 478 yield e
479 480 # compatibility
481 - def getiterator(self, tag=None):
482 # Change for a DeprecationWarning in 1.4 483 warnings.warn( 484 "This method will be removed in future versions. " 485 "Use 'elem.iter()' or 'list(elem.iter())' instead.", 486 PendingDeprecationWarning, stacklevel=2 487 ) 488 return list(self.iter(tag))
489 490 ## 491 # Creates a text iterator. The iterator loops over this element 492 # and all subelements, in document order, and returns all inner 493 # text. 494 # 495 # @return An iterator containing all inner text. 496 # @defreturn iterator 497
498 - def itertext(self):
499 tag = self.tag 500 if not isinstance(tag, basestring) and tag is not None: 501 return 502 if self.text: 503 yield self.text 504 for e in self: 505 for s in e.itertext(): 506 yield s 507 if e.tail: 508 yield e.tail
509 510 # compatibility 511 _Element = _ElementInterface = Element 512 513 ## 514 # Subelement factory. This function creates an element instance, and 515 # appends it to an existing element. 516 # <p> 517 # The element name, attribute names, and attribute values can be 518 # either 8-bit ASCII strings or Unicode strings. 519 # 520 # @param parent The parent element. 521 # @param tag The subelement name. 522 # @param attrib An optional dictionary, containing element attributes. 523 # @param **extra Additional attributes, given as keyword arguments. 524 # @return An element instance. 525 # @defreturn Element 526
527 -def SubElement(parent, tag, attrib={}, **extra):
528 attrib = attrib.copy() 529 attrib.update(extra) 530 element = parent.makeelement(tag, attrib) 531 parent.append(element) 532 return element
533 534 ## 535 # Comment element factory. This factory function creates a special 536 # element that will be serialized as an XML comment by the standard 537 # serializer. 538 # <p> 539 # The comment string can be either an 8-bit ASCII string or a Unicode 540 # string. 541 # 542 # @param text A string containing the comment string. 543 # @return An element instance, representing a comment. 544 # @defreturn Element 545
546 -def Comment(text=None):
547 element = Element(Comment) 548 element.text = text 549 return element
550 551 ## 552 # PI element factory. This factory function creates a special element 553 # that will be serialized as an XML processing instruction by the standard 554 # serializer. 555 # 556 # @param target A string containing the PI target. 557 # @param text A string containing the PI contents, if any. 558 # @return An element instance, representing a PI. 559 # @defreturn Element 560
561 -def ProcessingInstruction(target, text=None):
562 element = Element(ProcessingInstruction) 563 element.text = target 564 if text: 565 element.text = element.text + " " + text 566 return element
567 568 PI = ProcessingInstruction 569 570 ## 571 # QName wrapper. This can be used to wrap a QName attribute value, in 572 # order to get proper namespace handling on output. 573 # 574 # @param text A string containing the QName value, in the form {uri}local, 575 # or, if the tag argument is given, the URI part of a QName. 576 # @param tag Optional tag. If given, the first argument is interpreted as 577 # a URI, and this argument is interpreted as a local name. 578 # @return An opaque object, representing the QName. 579
580 -class QName(object):
581 - def __init__(self, text_or_uri, tag=None):
582 if tag: 583 text_or_uri = "{%s}%s" % (text_or_uri, tag) 584 self.text = text_or_uri
585 - def __str__(self):
586 return self.text
587 - def __hash__(self):
588 return hash(self.text)
589 - def __cmp__(self, other):
590 if isinstance(other, QName): 591 return cmp(self.text, other.text) 592 return cmp(self.text, other)
593 594 # -------------------------------------------------------------------- 595 596 ## 597 # ElementTree wrapper class. This class represents an entire element 598 # hierarchy, and adds some extra support for serialization to and from 599 # standard XML. 600 # 601 # @param element Optional root element. 602 # @keyparam file Optional file handle or file name. If given, the 603 # tree is initialized with the contents of this XML file. 604
605 -class ElementTree(object):
606
607 - def __init__(self, element=None, file=None):
608 # assert element is None or iselement(element) 609 self._root = element # first node 610 if file: 611 self.parse(file)
612 613 ## 614 # Gets the root element for this tree. 615 # 616 # @return An element instance. 617 # @defreturn Element 618
619 - def getroot(self):
620 return self._root
621 622 ## 623 # Replaces the root element for this tree. This discards the 624 # current contents of the tree, and replaces it with the given 625 # element. Use with care. 626 # 627 # @param element An element instance. 628
629 - def _setroot(self, element):
630 # assert iselement(element) 631 self._root = element
632 633 ## 634 # Loads an external XML document into this element tree. 635 # 636 # @param source A file name or file object. If a file object is 637 # given, it only has to implement a <b>read(n)</b> method. 638 # @keyparam parser An optional parser instance. If not given, the 639 # standard {@link XMLParser} parser is used. 640 # @return The document root element. 641 # @defreturn Element 642 # @exception ParseError If the parser fails to parse the document. 643
644 - def parse(self, source, parser=None):
645 close_source = False 646 if not hasattr(source, "read"): 647 source = open(source, "rb") 648 close_source = True 649 try: 650 if not parser: 651 parser = XMLParser(target=TreeBuilder()) 652 while 1: 653 data = source.read(65536) 654 if not data: 655 break 656 parser.feed(data) 657 self._root = parser.close() 658 return self._root 659 finally: 660 if close_source: 661 source.close()
662 663 ## 664 # Creates a tree iterator for the root element. The iterator loops 665 # over all elements in this tree, in document order. 666 # 667 # @param tag What tags to look for (default is to return all elements) 668 # @return An iterator. 669 # @defreturn iterator 670
671 - def iter(self, tag=None):
672 # assert self._root is not None 673 return self._root.iter(tag)
674 675 # compatibility
676 - def getiterator(self, tag=None):
677 # Change for a DeprecationWarning in 1.4 678 warnings.warn( 679 "This method will be removed in future versions. " 680 "Use 'tree.iter()' or 'list(tree.iter())' instead.", 681 PendingDeprecationWarning, stacklevel=2 682 ) 683 return list(self.iter(tag))
684 685 ## 686 # Same as getroot().find(path), starting at the root of the 687 # tree. 688 # 689 # @param path What element to look for. 690 # @keyparam namespaces Optional namespace prefix map. 691 # @return The first matching element, or None if no element was found. 692 # @defreturn Element or None 693
694 - def find(self, path, namespaces=None):
695 # assert self._root is not None 696 if path[:1] == "/": 697 path = "." + path 698 warnings.warn( 699 "This search is broken in 1.3 and earlier, and will be " 700 "fixed in a future version. If you rely on the current " 701 "behaviour, change it to %r" % path, 702 FutureWarning, stacklevel=2 703 ) 704 return self._root.find(path, namespaces)
705 706 ## 707 # Same as getroot().findtext(path), starting at the root of the tree. 708 # 709 # @param path What element to look for. 710 # @param default What to return if the element was not found. 711 # @keyparam namespaces Optional namespace prefix map. 712 # @return The text content of the first matching element, or the 713 # default value no element was found. Note that if the element 714 # is found, but has no text content, this method returns an 715 # empty string. 716 # @defreturn string 717
718 - def findtext(self, path, default=None, namespaces=None):
719 # assert self._root is not None 720 if path[:1] == "/": 721 path = "." + path 722 warnings.warn( 723 "This search is broken in 1.3 and earlier, and will be " 724 "fixed in a future version. If you rely on the current " 725 "behaviour, change it to %r" % path, 726 FutureWarning, stacklevel=2 727 ) 728 return self._root.findtext(path, default, namespaces)
729 730 ## 731 # Same as getroot().findall(path), starting at the root of the tree. 732 # 733 # @param path What element to look for. 734 # @keyparam namespaces Optional namespace prefix map. 735 # @return A list or iterator containing all matching elements, 736 # in document order. 737 # @defreturn list of Element instances 738
739 - def findall(self, path, namespaces=None):
740 # assert self._root is not None 741 if path[:1] == "/": 742 path = "." + path 743 warnings.warn( 744 "This search is broken in 1.3 and earlier, and will be " 745 "fixed in a future version. If you rely on the current " 746 "behaviour, change it to %r" % path, 747 FutureWarning, stacklevel=2 748 ) 749 return self._root.findall(path, namespaces)
750 751 ## 752 # Finds all matching subelements, by tag name or path. 753 # Same as getroot().iterfind(path). 754 # 755 # @param path What element to look for. 756 # @keyparam namespaces Optional namespace prefix map. 757 # @return An iterator or sequence containing all matching elements, 758 # in document order. 759 # @defreturn a generated sequence of Element instances 760
761 - def iterfind(self, path, namespaces=None):
762 # assert self._root is not None 763 if path[:1] == "/": 764 path = "." + path 765 warnings.warn( 766 "This search is broken in 1.3 and earlier, and will be " 767 "fixed in a future version. If you rely on the current " 768 "behaviour, change it to %r" % path, 769 FutureWarning, stacklevel=2 770 ) 771 return self._root.iterfind(path, namespaces)
772 773 ## 774 # Writes the element tree to a file, as XML. 775 # 776 # @def write(file, **options) 777 # @param file A file name, or a file object opened for writing. 778 # @param **options Options, given as keyword arguments. 779 # @keyparam encoding Optional output encoding (default is US-ASCII). 780 # @keyparam xml_declaration Controls if an XML declaration should 781 # be added to the file. Use False for never, True for always, 782 # None for only if not US-ASCII or UTF-8. None is default. 783 # @keyparam default_namespace Sets the default XML namespace (for "xmlns"). 784 # @keyparam method Optional output method ("xml", "html", "text" or 785 # "c14n"; default is "xml"). 786
787 - def write(self, file_or_filename, 788 # keyword arguments 789 encoding=None, 790 xml_declaration=None, 791 default_namespace=None, 792 method=None):
793 # assert self._root is not None 794 if not method: 795 method = "xml" 796 elif method not in _serialize: 797 # FIXME: raise an ImportError for c14n if ElementC14N is missing? 798 raise ValueError("unknown method %r" % method) 799 if hasattr(file_or_filename, "write"): 800 file = file_or_filename 801 else: 802 file = open(file_or_filename, "wb") 803 write = file.write 804 if not encoding: 805 if method == "c14n": 806 encoding = "utf-8" 807 else: 808 encoding = "us-ascii" 809 elif xml_declaration or (xml_declaration is None and 810 encoding not in ("utf-8", "us-ascii")): 811 if method == "xml": 812 write("<?xml version='1.0' encoding='%s'?>\n" % encoding) 813 if method == "text": 814 _serialize_text(write, self._root, encoding) 815 else: 816 qnames, namespaces = _namespaces( 817 self._root, encoding, default_namespace 818 ) 819 serialize = _serialize[method] 820 serialize(write, self._root, encoding, qnames, namespaces) 821 if file_or_filename is not file: 822 file.close()
823
824 - def write_c14n(self, file):
825 # lxml.etree compatibility. use output method instead 826 return self.write(file, method="c14n")
827 828 # -------------------------------------------------------------------- 829 # serialization support 830
831 -def _namespaces(elem, encoding, default_namespace=None):
832 # identify namespaces used in this tree 833 834 # maps qnames to *encoded* prefix:local names 835 qnames = {None: None} 836 837 # maps uri:s to prefixes 838 namespaces = {} 839 if default_namespace: 840 namespaces[default_namespace] = "" 841 842 def encode(text): 843 return text.encode(encoding)
844 845 def add_qname(qname): 846 # calculate serialized qname representation 847 try: 848 if qname[:1] == "{": 849 uri, tag = qname[1:].rsplit("}", 1) 850 prefix = namespaces.get(uri) 851 if prefix is None: 852 prefix = _namespace_map.get(uri) 853 if prefix is None: 854 prefix = "ns%d" % len(namespaces) 855 if prefix != "xml": 856 namespaces[uri] = prefix 857 if prefix: 858 qnames[qname] = encode("%s:%s" % (prefix, tag)) 859 else: 860 qnames[qname] = encode(tag) # default element 861 else: 862 if default_namespace: 863 # FIXME: can this be handled in XML 1.0? 864 raise ValueError( 865 "cannot use non-qualified names with " 866 "default_namespace option" 867 ) 868 qnames[qname] = encode(qname) 869 except TypeError: 870 _raise_serialization_error(qname) 871 872 # populate qname and namespaces table 873 try: 874 iterate = elem.iter 875 except AttributeError: 876 iterate = elem.getiterator # cET compatibility 877 for elem in iterate(): 878 tag = elem.tag 879 if isinstance(tag, QName): 880 if tag.text not in qnames: 881 add_qname(tag.text) 882 elif isinstance(tag, basestring): 883 if tag not in qnames: 884 add_qname(tag) 885 elif tag is not None and tag is not Comment and tag is not PI: 886 _raise_serialization_error(tag) 887 for key, value in elem.items(): 888 if isinstance(key, QName): 889 key = key.text 890 if key not in qnames: 891 add_qname(key) 892 if isinstance(value, QName) and value.text not in qnames: 893 add_qname(value.text) 894 text = elem.text 895 if isinstance(text, QName) and text.text not in qnames: 896 add_qname(text.text) 897 return qnames, namespaces 898
899 -def _serialize_xml(write, elem, encoding, qnames, namespaces):
900 tag = elem.tag 901 text = elem.text 902 if tag is Comment: 903 write("<!--%s-->" % _encode(text, encoding)) 904 elif tag is ProcessingInstruction: 905 write("<?%s?>" % _encode(text, encoding)) 906 else: 907 tag = qnames[tag] 908 if tag is None: 909 if text: 910 write(_escape_cdata(text, encoding)) 911 for e in elem: 912 _serialize_xml(write, e, encoding, qnames, None) 913 else: 914 write("<" + tag) 915 items = elem.items() 916 if items or namespaces: 917 if namespaces: 918 for v, k in sorted(namespaces.items(), 919 key=lambda x: x[1]): # sort on prefix 920 if k: 921 k = ":" + k 922 write(" xmlns%s=\"%s\"" % ( 923 k.encode(encoding), 924 _escape_attrib(v, encoding) 925 )) 926 for k, v in sorted(items): # lexical order 927 if isinstance(k, QName): 928 k = k.text 929 if isinstance(v, QName): 930 v = qnames[v.text] 931 else: 932 v = _escape_attrib(v, encoding) 933 write(" %s=\"%s\"" % (qnames[k], v)) 934 if text or len(elem): 935 write(">") 936 if text: 937 write(_escape_cdata(text, encoding)) 938 for e in elem: 939 _serialize_xml(write, e, encoding, qnames, None) 940 write("</" + tag + ">") 941 else: 942 write(" />") 943 if elem.tail: 944 write(_escape_cdata(elem.tail, encoding))
945 946 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 947 "img", "input", "isindex", "link", "meta", "param") 948 949 try: 950 HTML_EMPTY = set(HTML_EMPTY) 951 except NameError: 952 pass 953
954 -def _serialize_html(write, elem, encoding, qnames, namespaces):
955 tag = elem.tag 956 text = elem.text 957 if tag is Comment: 958 write("<!--%s-->" % _escape_cdata(text, encoding)) 959 elif tag is ProcessingInstruction: 960 write("<?%s?>" % _escape_cdata(text, encoding)) 961 else: 962 tag = qnames[tag] 963 if tag is None: 964 if text: 965 write(_escape_cdata(text, encoding)) 966 for e in elem: 967 _serialize_html(write, e, encoding, qnames, None) 968 else: 969 write("<" + tag) 970 items = elem.items() 971 if items or namespaces: 972 if namespaces: 973 for v, k in sorted(namespaces.items(), 974 key=lambda x: x[1]): # sort on prefix 975 if k: 976 k = ":" + k 977 write(" xmlns%s=\"%s\"" % ( 978 k.encode(encoding), 979 _escape_attrib(v, encoding) 980 )) 981 for k, v in sorted(items): # lexical order 982 if isinstance(k, QName): 983 k = k.text 984 if isinstance(v, QName): 985 v = qnames[v.text] 986 else: 987 v = _escape_attrib_html(v, encoding) 988 # FIXME: handle boolean attributes 989 write(" %s=\"%s\"" % (qnames[k], v)) 990 write(">") 991 ltag = tag.lower() 992 if text: 993 if ltag == "script" or ltag == "style": 994 write(_encode(text, encoding)) 995 else: 996 write(_escape_cdata(text, encoding)) 997 for e in elem: 998 _serialize_html(write, e, encoding, qnames, None) 999 if ltag not in HTML_EMPTY: 1000 write("</" + tag + ">") 1001 if elem.tail: 1002 write(_escape_cdata(elem.tail, encoding))
1003
1004 -def _serialize_text(write, elem, encoding):
1005 for part in elem.itertext(): 1006 write(part.encode(encoding)) 1007 if elem.tail: 1008 write(elem.tail.encode(encoding))
1009 1010 _serialize = { 1011 "xml": _serialize_xml, 1012 "html": _serialize_html, 1013 "text": _serialize_text, 1014 # this optional method is imported at the end of the module 1015 # "c14n": _serialize_c14n, 1016 } 1017 1018 ## 1019 # Registers a namespace prefix. The registry is global, and any 1020 # existing mapping for either the given prefix or the namespace URI 1021 # will be removed. 1022 # 1023 # @param prefix Namespace prefix. 1024 # @param uri Namespace uri. Tags and attributes in this namespace 1025 # will be serialized with the given prefix, if at all possible. 1026 # @exception ValueError If the prefix is reserved, or is otherwise 1027 # invalid. 1028
1029 -def register_namespace(prefix, uri):
1030 if re.match("ns\d+$", prefix): 1031 raise ValueError("Prefix format reserved for internal use") 1032 for k, v in _namespace_map.items(): 1033 if k == uri or v == prefix: 1034 del _namespace_map[k] 1035 _namespace_map[uri] = prefix
1036 1037 _namespace_map = { 1038 # "well-known" namespace prefixes 1039 "http://www.w3.org/XML/1998/namespace": "xml", 1040 "http://www.w3.org/1999/xhtml": "html", 1041 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 1042 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 1043 # xml schema 1044 "http://www.w3.org/2001/XMLSchema": "xs", 1045 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 1046 # dublin core 1047 "http://purl.org/dc/elements/1.1/": "dc", 1048 } 1049
1050 -def _raise_serialization_error(text):
1051 raise TypeError( 1052 "cannot serialize %r (type %s)" % (text, type(text).__name__) 1053 )
1054
1055 -def _encode(text, encoding):
1056 try: 1057 return text.encode(encoding, "xmlcharrefreplace") 1058 except (TypeError, AttributeError): 1059 _raise_serialization_error(text)
1060
1061 -def _escape_cdata(text, encoding):
1062 # escape character data 1063 try: 1064 # it's worth avoiding do-nothing calls for strings that are 1065 # shorter than 500 character, or so. assume that's, by far, 1066 # the most common case in most applications. 1067 if "&" in text: 1068 text = text.replace("&", "&amp;") 1069 if "<" in text: 1070 text = text.replace("<", "&lt;") 1071 if ">" in text: 1072 text = text.replace(">", "&gt;") 1073 return text.encode(encoding, "xmlcharrefreplace") 1074 except (TypeError, AttributeError): 1075 _raise_serialization_error(text)
1076
1077 -def _escape_attrib(text, encoding):
1078 # escape attribute value 1079 try: 1080 if "&" in text: 1081 text = text.replace("&", "&amp;") 1082 if "<" in text: 1083 text = text.replace("<", "&lt;") 1084 if ">" in text: 1085 text = text.replace(">", "&gt;") 1086 if "\"" in text: 1087 text = text.replace("\"", "&quot;") 1088 if "\n" in text: 1089 text = text.replace("\n", "&#10;") 1090 return text.encode(encoding, "xmlcharrefreplace") 1091 except (TypeError, AttributeError): 1092 _raise_serialization_error(text)
1093
1094 -def _escape_attrib_html(text, encoding):
1095 # escape attribute value 1096 try: 1097 if "&" in text: 1098 text = text.replace("&", "&amp;") 1099 if ">" in text: 1100 text = text.replace(">", "&gt;") 1101 if "\"" in text: 1102 text = text.replace("\"", "&quot;") 1103 return text.encode(encoding, "xmlcharrefreplace") 1104 except (TypeError, AttributeError): 1105 _raise_serialization_error(text)
1106 1107 # -------------------------------------------------------------------- 1108 1109 ## 1110 # Generates a string representation of an XML element, including all 1111 # subelements. 1112 # 1113 # @param element An Element instance. 1114 # @keyparam encoding Optional output encoding (default is US-ASCII). 1115 # @keyparam method Optional output method ("xml", "html", "text" or 1116 # "c14n"; default is "xml"). 1117 # @return An encoded string containing the XML data. 1118 # @defreturn string 1119
1120 -def tostring(element, encoding=None, method=None):
1121 class dummy: 1122 pass
1123 data = [] 1124 file = dummy() 1125 file.write = data.append 1126 ElementTree(element).write(file, encoding, method=method) 1127 return "".join(data) 1128 1129 ## 1130 # Generates a string representation of an XML element, including all 1131 # subelements. The string is returned as a sequence of string fragments. 1132 # 1133 # @param element An Element instance. 1134 # @keyparam encoding Optional output encoding (default is US-ASCII). 1135 # @keyparam method Optional output method ("xml", "html", "text" or 1136 # "c14n"; default is "xml"). 1137 # @return A sequence object containing the XML data. 1138 # @defreturn sequence 1139 # @since 1.3 1140
1141 -def tostringlist(element, encoding=None, method=None):
1142 class dummy: 1143 pass
1144 data = [] 1145 file = dummy() 1146 file.write = data.append 1147 ElementTree(element).write(file, encoding, method=method) 1148 # FIXME: merge small fragments into larger parts 1149 return data 1150 1151 ## 1152 # Writes an element tree or element structure to sys.stdout. This 1153 # function should be used for debugging only. 1154 # <p> 1155 # The exact output format is implementation dependent. In this 1156 # version, it's written as an ordinary XML file. 1157 # 1158 # @param elem An element tree or an individual element. 1159
1160 -def dump(elem):
1161 # debugging 1162 if not isinstance(elem, ElementTree): 1163 elem = ElementTree(elem) 1164 elem.write(sys.stdout) 1165 tail = elem.getroot().tail 1166 if not tail or tail[-1] != "\n": 1167 sys.stdout.write("\n")
1168 1169 # -------------------------------------------------------------------- 1170 # parsing 1171 1172 ## 1173 # Parses an XML document into an element tree. 1174 # 1175 # @param source A filename or file object containing XML data. 1176 # @param parser An optional parser instance. If not given, the 1177 # standard {@link XMLParser} parser is used. 1178 # @return An ElementTree instance 1179
1180 -def parse(source, parser=None):
1181 tree = ElementTree() 1182 tree.parse(source, parser) 1183 return tree
1184 1185 ## 1186 # Parses an XML document into an element tree incrementally, and reports 1187 # what's going on to the user. 1188 # 1189 # @param source A filename or file object containing XML data. 1190 # @param events A list of events to report back. If omitted, only "end" 1191 # events are reported. 1192 # @param parser An optional parser instance. If not given, the 1193 # standard {@link XMLParser} parser is used. 1194 # @return A (event, elem) iterator. 1195
1196 -def iterparse(source, events=None, parser=None):
1197 close_source = False 1198 if not hasattr(source, "read"): 1199 source = open(source, "rb") 1200 close_source = True 1201 try: 1202 if not parser: 1203 parser = XMLParser(target=TreeBuilder()) 1204 return _IterParseIterator(source, events, parser, close_source) 1205 except: 1206 if close_source: 1207 source.close() 1208 raise
1209
1210 -class _IterParseIterator(object):
1211
1212 - def __init__(self, source, events, parser, close_source=False):
1213 self._file = source 1214 self._close_file = close_source 1215 self._events = [] 1216 self._index = 0 1217 self._error = None 1218 self.root = self._root = None 1219 self._parser = parser 1220 # wire up the parser for event reporting 1221 parser = self._parser._parser 1222 append = self._events.append 1223 if events is None: 1224 events = ["end"] 1225 for event in events: 1226 if event == "start": 1227 try: 1228 parser.ordered_attributes = 1 1229 parser.specified_attributes = 1 1230 def handler(tag, attrib_in, event=event, append=append, 1231 start=self._parser._start_list): 1232 append((event, start(tag, attrib_in)))
1233 parser.StartElementHandler = handler 1234 except AttributeError: 1235 def handler(tag, attrib_in, event=event, append=append, 1236 start=self._parser._start): 1237 append((event, start(tag, attrib_in)))
1238 parser.StartElementHandler = handler 1239 elif event == "end": 1240 def handler(tag, event=event, append=append, 1241 end=self._parser._end): 1242 append((event, end(tag))) 1243 parser.EndElementHandler = handler 1244 elif event == "start-ns": 1245 def handler(prefix, uri, event=event, append=append): 1246 try: 1247 uri = (uri or "").encode("ascii") 1248 except UnicodeError: 1249 pass 1250 append((event, (prefix or "", uri or ""))) 1251 parser.StartNamespaceDeclHandler = handler 1252 elif event == "end-ns": 1253 def handler(prefix, event=event, append=append): 1254 append((event, None)) 1255 parser.EndNamespaceDeclHandler = handler 1256 else: 1257 raise ValueError("unknown event %r" % event) 1258
1259 - def next(self):
1260 try: 1261 while 1: 1262 try: 1263 item = self._events[self._index] 1264 self._index += 1 1265 return item 1266 except IndexError: 1267 pass 1268 if self._error: 1269 e = self._error 1270 self._error = None 1271 raise e 1272 if self._parser is None: 1273 self.root = self._root 1274 break 1275 # load event buffer 1276 del self._events[:] 1277 self._index = 0 1278 data = self._file.read(16384) 1279 if data: 1280 try: 1281 self._parser.feed(data) 1282 except SyntaxError as exc: 1283 self._error = exc 1284 else: 1285 self._root = self._parser.close() 1286 self._parser = None 1287 except: 1288 if self._close_file: 1289 self._file.close() 1290 raise 1291 if self._close_file: 1292 self._file.close() 1293 raise StopIteration
1294
1295 - def __iter__(self):
1296 return self
1297 1298 ## 1299 # Parses an XML document from a string constant. This function can 1300 # be used to embed "XML literals" in Python code. 1301 # 1302 # @param source A string containing XML data. 1303 # @param parser An optional parser instance. If not given, the 1304 # standard {@link XMLParser} parser is used. 1305 # @return An Element instance. 1306 # @defreturn Element 1307
1308 -def XML(text, parser=None):
1309 if not parser: 1310 parser = XMLParser(target=TreeBuilder()) 1311 parser.feed(text) 1312 return parser.close()
1313 1314 ## 1315 # Parses an XML document from a string constant, and also returns 1316 # a dictionary which maps from element id:s to elements. 1317 # 1318 # @param source A string containing XML data. 1319 # @param parser An optional parser instance. If not given, the 1320 # standard {@link XMLParser} parser is used. 1321 # @return A tuple containing an Element instance and a dictionary. 1322 # @defreturn (Element, dictionary) 1323
1324 -def XMLID(text, parser=None):
1325 if not parser: 1326 parser = XMLParser(target=TreeBuilder()) 1327 parser.feed(text) 1328 tree = parser.close() 1329 ids = {} 1330 for elem in tree.iter(): 1331 id = elem.get("id") 1332 if id: 1333 ids[id] = elem 1334 return tree, ids
1335 1336 ## 1337 # Parses an XML document from a string constant. Same as {@link #XML}. 1338 # 1339 # @def fromstring(text) 1340 # @param source A string containing XML data. 1341 # @return An Element instance. 1342 # @defreturn Element 1343 1344 fromstring = XML 1345 1346 ## 1347 # Parses an XML document from a sequence of string fragments. 1348 # 1349 # @param sequence A list or other sequence containing XML data fragments. 1350 # @param parser An optional parser instance. If not given, the 1351 # standard {@link XMLParser} parser is used. 1352 # @return An Element instance. 1353 # @defreturn Element 1354 # @since 1.3 1355
1356 -def fromstringlist(sequence, parser=None):
1357 if not parser: 1358 parser = XMLParser(target=TreeBuilder()) 1359 for text in sequence: 1360 parser.feed(text) 1361 return parser.close()
1362 1363 # -------------------------------------------------------------------- 1364 1365 ## 1366 # Generic element structure builder. This builder converts a sequence 1367 # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link 1368 # #TreeBuilder.end} method calls to a well-formed element structure. 1369 # <p> 1370 # You can use this class to build an element structure using a custom XML 1371 # parser, or a parser for some other XML-like format. 1372 # 1373 # @param element_factory Optional element factory. This factory 1374 # is called to create new Element instances, as necessary. 1375
1376 -class TreeBuilder(object):
1377
1378 - def __init__(self, element_factory=None):
1379 self._data = [] # data collector 1380 self._elem = [] # element stack 1381 self._last = None # last element 1382 self._tail = None # true if we're after an end tag 1383 if element_factory is None: 1384 element_factory = Element 1385 self._factory = element_factory
1386 1387 ## 1388 # Flushes the builder buffers, and returns the toplevel document 1389 # element. 1390 # 1391 # @return An Element instance. 1392 # @defreturn Element 1393
1394 - def close(self):
1395 assert len(self._elem) == 0, "missing end tags" 1396 assert self._last is not None, "missing toplevel element" 1397 return self._last
1398
1399 - def _flush(self):
1400 if self._data: 1401 if self._last is not None: 1402 text = "".join(self._data) 1403 if self._tail: 1404 assert self._last.tail is None, "internal error (tail)" 1405 self._last.tail = text 1406 else: 1407 assert self._last.text is None, "internal error (text)" 1408 self._last.text = text 1409 self._data = []
1410 1411 ## 1412 # Adds text to the current element. 1413 # 1414 # @param data A string. This should be either an 8-bit string 1415 # containing ASCII text, or a Unicode string. 1416
1417 - def data(self, data):
1418 self._data.append(data)
1419 1420 ## 1421 # Opens a new element. 1422 # 1423 # @param tag The element name. 1424 # @param attrib A dictionary containing element attributes. 1425 # @return The opened element. 1426 # @defreturn Element 1427
1428 - def start(self, tag, attrs):
1429 self._flush() 1430 self._last = elem = self._factory(tag, attrs) 1431 if self._elem: 1432 self._elem[-1].append(elem) 1433 self._elem.append(elem) 1434 self._tail = 0 1435 return elem
1436 1437 ## 1438 # Closes the current element. 1439 # 1440 # @param tag The element name. 1441 # @return The closed element. 1442 # @defreturn Element 1443
1444 - def end(self, tag):
1445 self._flush() 1446 self._last = self._elem.pop() 1447 assert self._last.tag == tag,\ 1448 "end tag mismatch (expected %s, got %s)" % ( 1449 self._last.tag, tag) 1450 self._tail = 1 1451 return self._last
1452 1453 _sentinel = ['sentinel'] 1454 1455 ## 1456 # Element structure builder for XML source data, based on the 1457 # <b>expat</b> parser. 1458 # 1459 # @keyparam target Target object. If omitted, the builder uses an 1460 # instance of the standard {@link #TreeBuilder} class. 1461 # @keyparam html Predefine HTML entities. This flag is not supported 1462 # by the current implementation. 1463 # @keyparam encoding Optional encoding. If given, the value overrides 1464 # the encoding specified in the XML file. 1465 # @see #ElementTree 1466 # @see #TreeBuilder 1467
1468 -class XMLParser(object):
1469
1470 - def __init__(self, html=_sentinel, target=None, encoding=None):
1471 if html is not _sentinel: 1472 warnings.warnpy3k( 1473 "The html argument of XMLParser() is deprecated", 1474 DeprecationWarning, stacklevel=2) 1475 try: 1476 from xml.parsers import expat 1477 except ImportError: 1478 try: 1479 import pyexpat as expat 1480 except ImportError: 1481 raise ImportError( 1482 "No module named expat; use SimpleXMLTreeBuilder instead" 1483 ) 1484 parser = expat.ParserCreate(encoding, "}") 1485 if target is None: 1486 target = TreeBuilder() 1487 # underscored names are provided for compatibility only 1488 self.parser = self._parser = parser 1489 self.target = self._target = target 1490 self._error = expat.error 1491 self._names = {} # name memo cache 1492 # callbacks 1493 parser.DefaultHandlerExpand = self._default 1494 parser.StartElementHandler = self._start 1495 parser.EndElementHandler = self._end 1496 parser.CharacterDataHandler = self._data 1497 # optional callbacks 1498 parser.CommentHandler = self._comment 1499 parser.ProcessingInstructionHandler = self._pi 1500 # let expat do the buffering, if supported 1501 try: 1502 self._parser.buffer_text = 1 1503 except AttributeError: 1504 pass 1505 # use new-style attribute handling, if supported 1506 try: 1507 self._parser.ordered_attributes = 1 1508 self._parser.specified_attributes = 1 1509 parser.StartElementHandler = self._start_list 1510 except AttributeError: 1511 pass 1512 self._doctype = None 1513 self.entity = {} 1514 try: 1515 self.version = "Expat %d.%d.%d" % expat.version_info 1516 except AttributeError: 1517 pass # unknown
1518
1519 - def _raiseerror(self, value):
1520 err = ParseError(value) 1521 err.code = value.code 1522 err.position = value.lineno, value.offset 1523 raise err
1524
1525 - def _fixtext(self, text):
1526 # convert text string to ascii, if possible 1527 try: 1528 return text.encode("ascii") 1529 except UnicodeError: 1530 return text
1531
1532 - def _fixname(self, key):
1533 # expand qname, and convert name string to ascii, if possible 1534 try: 1535 name = self._names[key] 1536 except KeyError: 1537 name = key 1538 if "}" in name: 1539 name = "{" + name 1540 self._names[key] = name = self._fixtext(name) 1541 return name
1542
1543 - def _start(self, tag, attrib_in):
1544 fixname = self._fixname 1545 fixtext = self._fixtext 1546 tag = fixname(tag) 1547 attrib = {} 1548 for key, value in attrib_in.items(): 1549 attrib[fixname(key)] = fixtext(value) 1550 return self.target.start(tag, attrib)
1551
1552 - def _start_list(self, tag, attrib_in):
1553 fixname = self._fixname 1554 fixtext = self._fixtext 1555 tag = fixname(tag) 1556 attrib = {} 1557 if attrib_in: 1558 for i in range(0, len(attrib_in), 2): 1559 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) 1560 return self.target.start(tag, attrib)
1561
1562 - def _data(self, text):
1563 return self.target.data(self._fixtext(text))
1564
1565 - def _end(self, tag):
1566 return self.target.end(self._fixname(tag))
1567
1568 - def _comment(self, data):
1569 try: 1570 comment = self.target.comment 1571 except AttributeError: 1572 pass 1573 else: 1574 return comment(self._fixtext(data))
1575
1576 - def _pi(self, target, data):
1577 try: 1578 pi = self.target.pi 1579 except AttributeError: 1580 pass 1581 else: 1582 return pi(self._fixtext(target), self._fixtext(data))
1583
1584 - def _default(self, text):
1585 prefix = text[:1] 1586 if prefix == "&": 1587 # deal with undefined entities 1588 try: 1589 self.target.data(self.entity[text[1:-1]]) 1590 except KeyError: 1591 from xml.parsers import expat 1592 err = expat.error( 1593 "undefined entity %s: line %d, column %d" % 1594 (text, self._parser.ErrorLineNumber, 1595 self._parser.ErrorColumnNumber) 1596 ) 1597 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 1598 err.lineno = self._parser.ErrorLineNumber 1599 err.offset = self._parser.ErrorColumnNumber 1600 raise err 1601 elif prefix == "<" and text[:9] == "<!DOCTYPE": 1602 self._doctype = [] # inside a doctype declaration 1603 elif self._doctype is not None: 1604 # parse doctype contents 1605 if prefix == ">": 1606 self._doctype = None 1607 return 1608 text = text.strip() 1609 if not text: 1610 return 1611 self._doctype.append(text) 1612 n = len(self._doctype) 1613 if n > 2: 1614 type = self._doctype[1] 1615 if type == "PUBLIC" and n == 4: 1616 name, type, pubid, system = self._doctype 1617 elif type == "SYSTEM" and n == 3: 1618 name, type, system = self._doctype 1619 pubid = None 1620 else: 1621 return 1622 if pubid: 1623 pubid = pubid[1:-1] 1624 if hasattr(self.target, "doctype"): 1625 self.target.doctype(name, pubid, system[1:-1]) 1626 elif self.doctype != self._XMLParser__doctype: 1627 # warn about deprecated call 1628 self._XMLParser__doctype(name, pubid, system[1:-1]) 1629 self.doctype(name, pubid, system[1:-1]) 1630 self._doctype = None
1631 1632 ## 1633 # (Deprecated) Handles a doctype declaration. 1634 # 1635 # @param name Doctype name. 1636 # @param pubid Public identifier. 1637 # @param system System identifier. 1638
1639 - def doctype(self, name, pubid, system):
1640 """This method of XMLParser is deprecated.""" 1641 warnings.warn( 1642 "This method of XMLParser is deprecated. Define doctype() " 1643 "method on the TreeBuilder target.", 1644 DeprecationWarning, 1645 )
1646 1647 # sentinel, if doctype is redefined in a subclass 1648 __doctype = doctype 1649 1650 ## 1651 # Feeds data to the parser. 1652 # 1653 # @param data Encoded data. 1654
1655 - def feed(self, data):
1656 try: 1657 self._parser.Parse(data, 0) 1658 except self._error, v: 1659 self._raiseerror(v)
1660 1661 ## 1662 # Finishes feeding data to the parser. 1663 # 1664 # @return An element structure. 1665 # @defreturn Element 1666
1667 - def close(self):
1668 try: 1669 self._parser.Parse("", 1) # end of data 1670 except self._error, v: 1671 self._raiseerror(v) 1672 tree = self.target.close() 1673 del self.target, self._parser # get rid of circular references 1674 return tree
1675 1676 # compatibility 1677 XMLTreeBuilder = XMLParser 1678 1679 # workaround circular import. 1680 try: 1681 from ElementC14N import _serialize_c14n 1682 _serialize["c14n"] = _serialize_c14n 1683 except ImportError: 1684 pass 1685