Package xml :: Package etree :: Module ElementTree
[hide private]
[frames] | no frames]

Source Code for Module xml.etree.ElementTree

   1  # 
   2  # ElementTree 
   3  # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ 
   4  # 
   5  # light-weight XML support for Python 2.3 and later. 
   6  # 
   7  # history (since 1.2.6): 
   8  # 2005-11-12 fl   added tostringlist/fromstringlist helpers 
   9  # 2006-07-05 fl   merged in selected changes from the 1.3 sandbox 
  10  # 2006-07-05 fl   removed support for 2.1 and earlier 
  11  # 2007-06-21 fl   added deprecation/future warnings 
  12  # 2007-08-25 fl   added doctype hook, added parser version attribute etc 
  13  # 2007-08-26 fl   added new serializer code (better namespace handling, etc) 
  14  # 2007-08-27 fl   warn for broken /tag searches on tree level 
  15  # 2007-09-02 fl   added html/text methods to serializer (experimental) 
  16  # 2007-09-05 fl   added method argument to tostring/tostringlist 
  17  # 2007-09-06 fl   improved error handling 
  18  # 2007-09-13 fl   added itertext, iterfind; assorted cleanups 
  19  # 2007-12-15 fl   added C14N hooks, copy method (experimental) 
  20  # 
  21  # Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved. 
  22  # 
  23  # fredrik@pythonware.com 
  24  # http://www.pythonware.com 
  25  # 
  26  # -------------------------------------------------------------------- 
  27  # The ElementTree toolkit is 
  28  # 
  29  # Copyright (c) 1999-2008 by Fredrik Lundh 
  30  # 
  31  # By obtaining, using, and/or copying this software and/or its 
  32  # associated documentation, you agree that you have read, understood, 
  33  # and will comply with the following terms and conditions: 
  34  # 
  35  # Permission to use, copy, modify, and distribute this software and 
  36  # its associated documentation for any purpose and without fee is 
  37  # hereby granted, provided that the above copyright notice appears in 
  38  # all copies, and that both that copyright notice and this permission 
  39  # notice appear in supporting documentation, and that the name of 
  40  # Secret Labs AB or the author not be used in advertising or publicity 
  41  # pertaining to distribution of the software without specific, written 
  42  # prior permission. 
  43  # 
  44  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
  45  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
  46  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
  47  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
  48  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
  49  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
  50  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
  51  # OF THIS SOFTWARE. 
  52  # -------------------------------------------------------------------- 
  53   
  54  # Licensed to PSF under a Contributor Agreement. 
  55  # See http://www.python.org/psf/license for licensing details. 
  56   
  57  __all__ = [ 
  58      # public symbols 
  59      "Comment", 
  60      "dump", 
  61      "Element", "ElementTree", 
  62      "fromstring", "fromstringlist", 
  63      "iselement", "iterparse", 
  64      "parse", "ParseError", 
  65      "PI", "ProcessingInstruction", 
  66      "QName", 
  67      "SubElement", 
  68      "tostring", "tostringlist", 
  69      "TreeBuilder", 
  70      "VERSION", 
  71      "XML", 
  72      "XMLParser", "XMLTreeBuilder", 
  73      ] 
  74   
  75  VERSION = "1.3.0" 
  76   
  77  ## 
  78  # The <b>Element</b> type is a flexible container object, designed to 
  79  # store hierarchical data structures in memory. The type can be 
  80  # described as a cross between a list and a dictionary. 
  81  # <p> 
  82  # Each element has a number of properties associated with it: 
  83  # <ul> 
  84  # <li>a <i>tag</i>. This is a string identifying what kind of data 
  85  # this element represents (the element type, in other words).</li> 
  86  # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> 
  87  # <li>a <i>text</i> string.</li> 
  88  # <li>an optional <i>tail</i> string.</li> 
  89  # <li>a number of <i>child elements</i>, stored in a Python sequence</li> 
  90  # </ul> 
  91  # 
  92  # To create an element instance, use the {@link #Element} constructor 
  93  # or the {@link #SubElement} factory function. 
  94  # <p> 
  95  # The {@link #ElementTree} class can be used to wrap an element 
  96  # structure, and convert it from and to XML. 
  97  ## 
  98   
  99  import sys 
 100  import re 
 101  import warnings 
 102   
 103   
104 -class _SimpleElementPath(object):
105 # emulate pre-1.2 find/findtext/findall behaviour
106 - def find(self, element, tag, namespaces=None):
107 for elem in element: 108 if elem.tag == tag: 109 return elem 110 return None
111 - def findtext(self, element, tag, default=None, namespaces=None):
112 elem = self.find(element, tag) 113 if elem is None: 114 return default 115 return elem.text or ""
116 - def iterfind(self, element, tag, namespaces=None):
117 if tag[:3] == ".//": 118 for elem in element.iter(tag[3:]): 119 yield elem 120 for elem in element: 121 if elem.tag == tag: 122 yield elem
123 - def findall(self, element, tag, namespaces=None):
124 return list(self.iterfind(element, tag, namespaces))
125 126 try: 127 from . import ElementPath 128 except ImportError: 129 ElementPath = _SimpleElementPath() 130 131 ## 132 # Parser error. This is a subclass of <b>SyntaxError</b>. 133 # <p> 134 # In addition to the exception value, an exception instance contains a 135 # specific exception code in the <b>code</b> attribute, and the line and 136 # column of the error in the <b>position</b> attribute. 137
138 -class ParseError(SyntaxError):
139 pass
140 141 # -------------------------------------------------------------------- 142 143 ## 144 # Checks if an object appears to be a valid element object. 145 # 146 # @param An element instance. 147 # @return A true value if this is an element object. 148 # @defreturn flag 149
150 -def iselement(element):
151 # FIXME: not sure about this; might be a better idea to look 152 # for tag/attrib/text attributes 153 return isinstance(element, Element) or hasattr(element, "tag")
154 155 ## 156 # Element class. This class defines the Element interface, and 157 # provides a reference implementation of this interface. 158 # <p> 159 # The element name, attribute names, and attribute values can be 160 # either ASCII strings (ordinary Python strings containing only 7-bit 161 # ASCII characters) or Unicode strings. 162 # 163 # @param tag The element name. 164 # @param attrib An optional dictionary, containing element attributes. 165 # @param **extra Additional attributes, given as keyword arguments. 166 # @see Element 167 # @see SubElement 168 # @see Comment 169 # @see ProcessingInstruction 170
171 -class Element(object):
172 # <tag attrib>text<child/>...</tag>tail 173 174 ## 175 # (Attribute) Element tag. 176 177 tag = None 178 179 ## 180 # (Attribute) Element attribute dictionary. Where possible, use 181 # {@link #Element.get}, 182 # {@link #Element.set}, 183 # {@link #Element.keys}, and 184 # {@link #Element.items} to access 185 # element attributes. 186 187 attrib = None 188 189 ## 190 # (Attribute) Text before first subelement. This is either a 191 # string or the value None. Note that if there was no text, this 192 # attribute may be either None or an empty string, depending on 193 # the parser. 194 195 text = None 196 197 ## 198 # (Attribute) Text after this element's end tag, but before the 199 # next sibling element's start tag. This is either a string or 200 # the value None. Note that if there was no text, this attribute 201 # may be either None or an empty string, depending on the parser. 202 203 tail = None # text after end tag, if any 204 205 # constructor 206
207 - def __init__(self, tag, attrib={}, **extra):
208 attrib = attrib.copy() 209 attrib.update(extra) 210 self.tag = tag 211 self.attrib = attrib 212 self._children = []
213
214 - def __repr__(self):
215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216 217 ## 218 # Creates a new element object of the same type as this element. 219 # 220 # @param tag Element tag. 221 # @param attrib Element attributes, given as a dictionary. 222 # @return A new element instance. 223
224 - def makeelement(self, tag, attrib):
225 return self.__class__(tag, attrib)
226 227 ## 228 # (Experimental) Copies the current element. This creates a 229 # shallow copy; subelements will be shared with the original tree. 230 # 231 # @return A new element instance. 232
233 - def copy(self):
234 elem = self.makeelement(self.tag, self.attrib) 235 elem.text = self.text 236 elem.tail = self.tail 237 elem[:] = self 238 return elem
239 240 ## 241 # Returns the number of subelements. Note that this only counts 242 # full elements; to check if there's any content in an element, you 243 # have to check both the length and the <b>text</b> attribute. 244 # 245 # @return The number of subelements. 246
247 - def __len__(self):
248 return len(self._children)
249
250 - def __nonzero__(self):
251 warnings.warn( 252 "The behavior of this method will change in future versions. " 253 "Use specific 'len(elem)' or 'elem is not None' test instead.", 254 FutureWarning, stacklevel=2 255 ) 256 return len(self._children) != 0 # emulate old behaviour, for now
257 258 ## 259 # Returns the given subelement, by index. 260 # 261 # @param index What subelement to return. 262 # @return The given subelement. 263 # @exception IndexError If the given element does not exist. 264
265 - def __getitem__(self, index):
266 return self._children[index]
267 268 ## 269 # Replaces the given subelement, by index. 270 # 271 # @param index What subelement to replace. 272 # @param element The new element value. 273 # @exception IndexError If the given element does not exist. 274
275 - def __setitem__(self, index, element):
276 # if isinstance(index, slice): 277 # for elt in element: 278 # assert iselement(elt) 279 # else: 280 # assert iselement(element) 281 self._children[index] = element
282 283 ## 284 # Deletes the given subelement, by index. 285 # 286 # @param index What subelement to delete. 287 # @exception IndexError If the given element does not exist. 288
289 - def __delitem__(self, index):
290 del self._children[index]
291 292 ## 293 # Adds a subelement to the end of this element. In document order, 294 # the new element will appear after the last existing subelement (or 295 # directly after the text, if it's the first subelement), but before 296 # the end tag for this element. 297 # 298 # @param element The element to add. 299
300 - def append(self, element):
301 # assert iselement(element) 302 self._children.append(element)
303 304 ## 305 # Appends subelements from a sequence. 306 # 307 # @param elements A sequence object with zero or more elements. 308 # @since 1.3 309
310 - def extend(self, elements):
311 # for element in elements: 312 # assert iselement(element) 313 self._children.extend(elements)
314 315 ## 316 # Inserts a subelement at the given position in this element. 317 # 318 # @param index Where to insert the new subelement. 319
320 - def insert(self, index, element):
321 # assert iselement(element) 322 self._children.insert(index, element)
323 324 ## 325 # Removes a matching subelement. Unlike the <b>find</b> methods, 326 # this method compares elements based on identity, not on tag 327 # value or contents. To remove subelements by other means, the 328 # easiest way is often to use a list comprehension to select what 329 # elements to keep, and use slice assignment to update the parent 330 # element. 331 # 332 # @param element What element to remove. 333 # @exception ValueError If a matching element could not be found. 334
335 - def remove(self, element):
336 # assert iselement(element) 337 self._children.remove(element)
338 339 ## 340 # (Deprecated) Returns all subelements. The elements are returned 341 # in document order. 342 # 343 # @return A list of subelements. 344 # @defreturn list of Element instances 345
346 - def getchildren(self):
347 warnings.warn( 348 "This method will be removed in future versions. " 349 "Use 'list(elem)' or iteration over elem instead.", 350 DeprecationWarning, stacklevel=2 351 ) 352 return self._children
353 354 ## 355 # Finds the first matching subelement, by tag name or path. 356 # 357 # @param path What element to look for. 358 # @keyparam namespaces Optional namespace prefix map. 359 # @return The first matching element, or None if no element was found. 360 # @defreturn Element or None 361
362 - def find(self, path, namespaces=None):
363 return ElementPath.find(self, path, namespaces)
364 365 ## 366 # Finds text for the first matching subelement, by tag name or path. 367 # 368 # @param path What element to look for. 369 # @param default What to return if the element was not found. 370 # @keyparam namespaces Optional namespace prefix map. 371 # @return The text content of the first matching element, or the 372 # default value no element was found. Note that if the element 373 # is found, but has no text content, this method returns an 374 # empty string. 375 # @defreturn string 376
377 - def findtext(self, path, default=None, namespaces=None):
378 return ElementPath.findtext(self, path, default, namespaces)
379 380 ## 381 # Finds all matching subelements, by tag name or path. 382 # 383 # @param path What element to look for. 384 # @keyparam namespaces Optional namespace prefix map. 385 # @return A list or other sequence containing all matching elements, 386 # in document order. 387 # @defreturn list of Element instances 388
389 - def findall(self, path, namespaces=None):
390 return ElementPath.findall(self, path, namespaces)
391 392 ## 393 # Finds all matching subelements, by tag name or path. 394 # 395 # @param path What element to look for. 396 # @keyparam namespaces Optional namespace prefix map. 397 # @return An iterator or sequence containing all matching elements, 398 # in document order. 399 # @defreturn a generated sequence of Element instances 400
401 - def iterfind(self, path, namespaces=None):
402 return ElementPath.iterfind(self, path, namespaces)
403 404 ## 405 # Resets an element. This function removes all subelements, clears 406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes 407 # to None. 408
409 - def clear(self):
410 self.attrib.clear() 411 self._children = [] 412 self.text = self.tail = None
413 414 ## 415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but 416 # some implementations may handle this a bit more efficiently. 417 # 418 # @param key What attribute to look for. 419 # @param default What to return if the attribute was not found. 420 # @return The attribute value, or the default value, if the 421 # attribute was not found. 422 # @defreturn string or None 423
424 - def get(self, key, default=None):
425 return self.attrib.get(key, default)
426 427 ## 428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>, 429 # but some implementations may handle this a bit more efficiently. 430 # 431 # @param key What attribute to set. 432 # @param value The attribute value. 433
434 - def set(self, key, value):
435 self.attrib[key] = value
436 437 ## 438 # Gets a list of attribute names. The names are returned in an 439 # arbitrary order (just like for an ordinary Python dictionary). 440 # Equivalent to <b>attrib.keys()</b>. 441 # 442 # @return A list of element attribute names. 443 # @defreturn list of strings 444
445 - def keys(self):
446 return self.attrib.keys()
447 448 ## 449 # Gets element attributes, as a sequence. The attributes are 450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>. 451 # 452 # @return A list of (name, value) tuples for all attributes. 453 # @defreturn list of (string, string) tuples 454
455 - def items(self):
456 return self.attrib.items()
457 458 ## 459 # Creates a tree iterator. The iterator loops over this element 460 # and all subelements, in document order, and returns all elements 461 # with a matching tag. 462 # <p> 463 # If the tree structure is modified during iteration, new or removed 464 # elements may or may not be included. To get a stable set, use the 465 # list() function on the iterator, and loop over the resulting list. 466 # 467 # @param tag What tags to look for (default is to return all elements). 468 # @return An iterator containing all the matching elements. 469 # @defreturn iterator 470
471 - def iter(self, tag=None):
472 if tag == "*": 473 tag = None 474 if tag is None or self.tag == tag: 475 yield self 476 for e in self._children: 477 for e in e.iter(tag): 478 yield e
479 480 # compatibility
481 - def getiterator(self, tag=None):
482 # Change for a DeprecationWarning in 1.4 483 warnings.warn( 484 "This method will be removed in future versions. " 485 "Use 'elem.iter()' or 'list(elem.iter())' instead.", 486 PendingDeprecationWarning, stacklevel=2 487 ) 488 return list(self.iter(tag))
489 490 ## 491 # Creates a text iterator. The iterator loops over this element 492 # and all subelements, in document order, and returns all inner 493 # text. 494 # 495 # @return An iterator containing all inner text. 496 # @defreturn iterator 497
498 - def itertext(self):
499 tag = self.tag 500 if not isinstance(tag, basestring) and tag is not None: 501 return 502 if self.text: 503 yield self.text 504 for e in self: 505 for s in e.itertext(): 506 yield s 507 if e.tail: 508 yield e.tail
509 510 # compatibility 511 _Element = _ElementInterface = Element 512 513 ## 514 # Subelement factory. This function creates an element instance, and 515 # appends it to an existing element. 516 # <p> 517 # The element name, attribute names, and attribute values can be 518 # either 8-bit ASCII strings or Unicode strings. 519 # 520 # @param parent The parent element. 521 # @param tag The subelement name. 522 # @param attrib An optional dictionary, containing element attributes. 523 # @param **extra Additional attributes, given as keyword arguments. 524 # @return An element instance. 525 # @defreturn Element 526
527 -def SubElement(parent, tag, attrib={}, **extra):
528 attrib = attrib.copy() 529 attrib.update(extra) 530 element = parent.makeelement(tag, attrib) 531 parent.append(element) 532 return element
533 534 ## 535 # Comment element factory. This factory function creates a special 536 # element that will be serialized as an XML comment by the standard 537 # serializer. 538 # <p> 539 # The comment string can be either an 8-bit ASCII string or a Unicode 540 # string. 541 # 542 # @param text A string containing the comment string. 543 # @return An element instance, representing a comment. 544 # @defreturn Element 545
546 -def Comment(text=None):
547 element = Element(Comment) 548 element.text = text 549 return element
550 551 ## 552 # PI element factory. This factory function creates a special element 553 # that will be serialized as an XML processing instruction by the standard 554 # serializer. 555 # 556 # @param target A string containing the PI target. 557 # @param text A string containing the PI contents, if any. 558 # @return An element instance, representing a PI. 559 # @defreturn Element 560
561 -def ProcessingInstruction(target, text=None):
562 element = Element(ProcessingInstruction) 563 element.text = target 564 if text: 565 element.text = element.text + " " + text 566 return element
567 568 PI = ProcessingInstruction 569 570 ## 571 # QName wrapper. This can be used to wrap a QName attribute value, in 572 # order to get proper namespace handling on output. 573 # 574 # @param text A string containing the QName value, in the form {uri}local, 575 # or, if the tag argument is given, the URI part of a QName. 576 # @param tag Optional tag. If given, the first argument is interpreted as 577 # an URI, and this argument is interpreted as a local name. 578 # @return An opaque object, representing the QName. 579
580 -class QName(object):
581 - def __init__(self, text_or_uri, tag=None):
582 if tag: 583 text_or_uri = "{%s}%s" % (text_or_uri, tag) 584 self.text = text_or_uri
585 - def __str__(self):
586 return self.text
587 - def __hash__(self):
588 return hash(self.text)
589 - def __cmp__(self, other):
590 if isinstance(other, QName): 591 return cmp(self.text, other.text) 592 return cmp(self.text, other)
593 594 # -------------------------------------------------------------------- 595 596 ## 597 # ElementTree wrapper class. This class represents an entire element 598 # hierarchy, and adds some extra support for serialization to and from 599 # standard XML. 600 # 601 # @param element Optional root element. 602 # @keyparam file Optional file handle or file name. If given, the 603 # tree is initialized with the contents of this XML file. 604
605 -class ElementTree(object):
606
607 - def __init__(self, element=None, file=None):
608 # assert element is None or iselement(element) 609 self._root = element # first node 610 if file: 611 self.parse(file)
612 613 ## 614 # Gets the root element for this tree. 615 # 616 # @return An element instance. 617 # @defreturn Element 618
619 - def getroot(self):
620 return self._root
621 622 ## 623 # Replaces the root element for this tree. This discards the 624 # current contents of the tree, and replaces it with the given 625 # element. Use with care. 626 # 627 # @param element An element instance. 628
629 - def _setroot(self, element):
630 # assert iselement(element) 631 self._root = element
632 633 ## 634 # Loads an external XML document into this element tree. 635 # 636 # @param source A file name or file object. If a file object is 637 # given, it only has to implement a <b>read(n)</b> method. 638 # @keyparam parser An optional parser instance. If not given, the 639 # standard {@link XMLParser} parser is used. 640 # @return The document root element. 641 # @defreturn Element 642 # @exception ParseError If the parser fails to parse the document. 643
644 - def parse(self, source, parser=None):
645 close_source = False 646 if not hasattr(source, "read"): 647 source = open(source, "rb") 648 close_source = True 649 try: 650 if not parser: 651 parser = XMLParser(target=TreeBuilder()) 652 while 1: 653 data = source.read(65536) 654 if not data: 655 break 656 parser.feed(data) 657 self._root = parser.close() 658 return self._root 659 finally: 660 if close_source: 661 source.close()
662 663 ## 664 # Creates a tree iterator for the root element. The iterator loops 665 # over all elements in this tree, in document order. 666 # 667 # @param tag What tags to look for (default is to return all elements) 668 # @return An iterator. 669 # @defreturn iterator 670
671 - def iter(self, tag=None):
672 # assert self._root is not None 673 return self._root.iter(tag)
674 675 # compatibility
676 - def getiterator(self, tag=None):
677 # Change for a DeprecationWarning in 1.4 678 warnings.warn( 679 "This method will be removed in future versions. " 680 "Use 'tree.iter()' or 'list(tree.iter())' instead.", 681 PendingDeprecationWarning, stacklevel=2 682 ) 683 return list(self.iter(tag))
684 685 ## 686 # Finds the first toplevel element with given tag. 687 # Same as getroot().find(path). 688 # 689 # @param path What element to look for. 690 # @keyparam namespaces Optional namespace prefix map. 691 # @return The first matching element, or None if no element was found. 692 # @defreturn Element or None 693
694 - def find(self, path, namespaces=None):
695 # assert self._root is not None 696 if path[:1] == "/": 697 path = "." + path 698 warnings.warn( 699 "This search is broken in 1.3 and earlier, and will be " 700 "fixed in a future version. If you rely on the current " 701 "behaviour, change it to %r" % path, 702 FutureWarning, stacklevel=2 703 ) 704 return self._root.find(path, namespaces)
705 706 ## 707 # Finds the element text for the first toplevel element with given 708 # tag. Same as getroot().findtext(path). 709 # 710 # @param path What toplevel element to look for. 711 # @param default What to return if the element was not found. 712 # @keyparam namespaces Optional namespace prefix map. 713 # @return The text content of the first matching element, or the 714 # default value no element was found. Note that if the element 715 # is found, but has no text content, this method returns an 716 # empty string. 717 # @defreturn string 718
719 - def findtext(self, path, default=None, namespaces=None):
720 # assert self._root is not None 721 if path[:1] == "/": 722 path = "." + path 723 warnings.warn( 724 "This search is broken in 1.3 and earlier, and will be " 725 "fixed in a future version. If you rely on the current " 726 "behaviour, change it to %r" % path, 727 FutureWarning, stacklevel=2 728 ) 729 return self._root.findtext(path, default, namespaces)
730 731 ## 732 # Finds all toplevel elements with the given tag. 733 # Same as getroot().findall(path). 734 # 735 # @param path What element to look for. 736 # @keyparam namespaces Optional namespace prefix map. 737 # @return A list or iterator containing all matching elements, 738 # in document order. 739 # @defreturn list of Element instances 740
741 - def findall(self, path, namespaces=None):
742 # assert self._root is not None 743 if path[:1] == "/": 744 path = "." + path 745 warnings.warn( 746 "This search is broken in 1.3 and earlier, and will be " 747 "fixed in a future version. If you rely on the current " 748 "behaviour, change it to %r" % path, 749 FutureWarning, stacklevel=2 750 ) 751 return self._root.findall(path, namespaces)
752 753 ## 754 # Finds all matching subelements, by tag name or path. 755 # Same as getroot().iterfind(path). 756 # 757 # @param path What element to look for. 758 # @keyparam namespaces Optional namespace prefix map. 759 # @return An iterator or sequence containing all matching elements, 760 # in document order. 761 # @defreturn a generated sequence of Element instances 762
763 - def iterfind(self, path, namespaces=None):
764 # assert self._root is not None 765 if path[:1] == "/": 766 path = "." + path 767 warnings.warn( 768 "This search is broken in 1.3 and earlier, and will be " 769 "fixed in a future version. If you rely on the current " 770 "behaviour, change it to %r" % path, 771 FutureWarning, stacklevel=2 772 ) 773 return self._root.iterfind(path, namespaces)
774 775 ## 776 # Writes the element tree to a file, as XML. 777 # 778 # @def write(file, **options) 779 # @param file A file name, or a file object opened for writing. 780 # @param **options Options, given as keyword arguments. 781 # @keyparam encoding Optional output encoding (default is US-ASCII). 782 # @keyparam method Optional output method ("xml", "html", "text" or 783 # "c14n"; default is "xml"). 784 # @keyparam xml_declaration Controls if an XML declaration should 785 # be added to the file. Use False for never, True for always, 786 # None for only if not US-ASCII or UTF-8. None is default. 787
788 - def write(self, file_or_filename, 789 # keyword arguments 790 encoding=None, 791 xml_declaration=None, 792 default_namespace=None, 793 method=None):
794 # assert self._root is not None 795 if not method: 796 method = "xml" 797 elif method not in _serialize: 798 # FIXME: raise an ImportError for c14n if ElementC14N is missing? 799 raise ValueError("unknown method %r" % method) 800 if hasattr(file_or_filename, "write"): 801 file = file_or_filename 802 else: 803 file = open(file_or_filename, "wb") 804 write = file.write 805 if not encoding: 806 if method == "c14n": 807 encoding = "utf-8" 808 else: 809 encoding = "us-ascii" 810 elif xml_declaration or (xml_declaration is None and 811 encoding not in ("utf-8", "us-ascii")): 812 if method == "xml": 813 write("<?xml version='1.0' encoding='%s'?>\n" % encoding) 814 if method == "text": 815 _serialize_text(write, self._root, encoding) 816 else: 817 qnames, namespaces = _namespaces( 818 self._root, encoding, default_namespace 819 ) 820 serialize = _serialize[method] 821 serialize(write, self._root, encoding, qnames, namespaces) 822 if file_or_filename is not file: 823 file.close()
824
825 - def write_c14n(self, file):
826 # lxml.etree compatibility. use output method instead 827 return self.write(file, method="c14n")
828 829 # -------------------------------------------------------------------- 830 # serialization support 831
832 -def _namespaces(elem, encoding, default_namespace=None):
833 # identify namespaces used in this tree 834 835 # maps qnames to *encoded* prefix:local names 836 qnames = {None: None} 837 838 # maps uri:s to prefixes 839 namespaces = {} 840 if default_namespace: 841 namespaces[default_namespace] = "" 842 843 def encode(text): 844 return text.encode(encoding)
845 846 def add_qname(qname): 847 # calculate serialized qname representation 848 try: 849 if qname[:1] == "{": 850 uri, tag = qname[1:].rsplit("}", 1) 851 prefix = namespaces.get(uri) 852 if prefix is None: 853 prefix = _namespace_map.get(uri) 854 if prefix is None: 855 prefix = "ns%d" % len(namespaces) 856 if prefix != "xml": 857 namespaces[uri] = prefix 858 if prefix: 859 qnames[qname] = encode("%s:%s" % (prefix, tag)) 860 else: 861 qnames[qname] = encode(tag) # default element 862 else: 863 if default_namespace: 864 # FIXME: can this be handled in XML 1.0? 865 raise ValueError( 866 "cannot use non-qualified names with " 867 "default_namespace option" 868 ) 869 qnames[qname] = encode(qname) 870 except TypeError: 871 _raise_serialization_error(qname) 872 873 # populate qname and namespaces table 874 try: 875 iterate = elem.iter 876 except AttributeError: 877 iterate = elem.getiterator # cET compatibility 878 for elem in iterate(): 879 tag = elem.tag 880 if isinstance(tag, QName): 881 if tag.text not in qnames: 882 add_qname(tag.text) 883 elif isinstance(tag, basestring): 884 if tag not in qnames: 885 add_qname(tag) 886 elif tag is not None and tag is not Comment and tag is not PI: 887 _raise_serialization_error(tag) 888 for key, value in elem.items(): 889 if isinstance(key, QName): 890 key = key.text 891 if key not in qnames: 892 add_qname(key) 893 if isinstance(value, QName) and value.text not in qnames: 894 add_qname(value.text) 895 text = elem.text 896 if isinstance(text, QName) and text.text not in qnames: 897 add_qname(text.text) 898 return qnames, namespaces 899
900 -def _serialize_xml(write, elem, encoding, qnames, namespaces):
901 tag = elem.tag 902 text = elem.text 903 if tag is Comment: 904 write("<!--%s-->" % _encode(text, encoding)) 905 elif tag is ProcessingInstruction: 906 write("<?%s?>" % _encode(text, encoding)) 907 else: 908 tag = qnames[tag] 909 if tag is None: 910 if text: 911 write(_escape_cdata(text, encoding)) 912 for e in elem: 913 _serialize_xml(write, e, encoding, qnames, None) 914 else: 915 write("<" + tag) 916 items = elem.items() 917 if items or namespaces: 918 if namespaces: 919 for v, k in sorted(namespaces.items(), 920 key=lambda x: x[1]): # sort on prefix 921 if k: 922 k = ":" + k 923 write(" xmlns%s=\"%s\"" % ( 924 k.encode(encoding), 925 _escape_attrib(v, encoding) 926 )) 927 for k, v in sorted(items): # lexical order 928 if isinstance(k, QName): 929 k = k.text 930 if isinstance(v, QName): 931 v = qnames[v.text] 932 else: 933 v = _escape_attrib(v, encoding) 934 write(" %s=\"%s\"" % (qnames[k], v)) 935 if text or len(elem): 936 write(">") 937 if text: 938 write(_escape_cdata(text, encoding)) 939 for e in elem: 940 _serialize_xml(write, e, encoding, qnames, None) 941 write("</" + tag + ">") 942 else: 943 write(" />") 944 if elem.tail: 945 write(_escape_cdata(elem.tail, encoding))
946 947 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 948 "img", "input", "isindex", "link", "meta" "param") 949 950 try: 951 HTML_EMPTY = set(HTML_EMPTY) 952 except NameError: 953 pass 954
955 -def _serialize_html(write, elem, encoding, qnames, namespaces):
956 tag = elem.tag 957 text = elem.text 958 if tag is Comment: 959 write("<!--%s-->" % _escape_cdata(text, encoding)) 960 elif tag is ProcessingInstruction: 961 write("<?%s?>" % _escape_cdata(text, encoding)) 962 else: 963 tag = qnames[tag] 964 if tag is None: 965 if text: 966 write(_escape_cdata(text, encoding)) 967 for e in elem: 968 _serialize_html(write, e, encoding, qnames, None) 969 else: 970 write("<" + tag) 971 items = elem.items() 972 if items or namespaces: 973 if namespaces: 974 for v, k in sorted(namespaces.items(), 975 key=lambda x: x[1]): # sort on prefix 976 if k: 977 k = ":" + k 978 write(" xmlns%s=\"%s\"" % ( 979 k.encode(encoding), 980 _escape_attrib(v, encoding) 981 )) 982 for k, v in sorted(items): # lexical order 983 if isinstance(k, QName): 984 k = k.text 985 if isinstance(v, QName): 986 v = qnames[v.text] 987 else: 988 v = _escape_attrib_html(v, encoding) 989 # FIXME: handle boolean attributes 990 write(" %s=\"%s\"" % (qnames[k], v)) 991 write(">") 992 tag = tag.lower() 993 if text: 994 if tag == "script" or tag == "style": 995 write(_encode(text, encoding)) 996 else: 997 write(_escape_cdata(text, encoding)) 998 for e in elem: 999 _serialize_html(write, e, encoding, qnames, None) 1000 if tag not in HTML_EMPTY: 1001 write("</" + tag + ">") 1002 if elem.tail: 1003 write(_escape_cdata(elem.tail, encoding))
1004
1005 -def _serialize_text(write, elem, encoding):
1006 for part in elem.itertext(): 1007 write(part.encode(encoding)) 1008 if elem.tail: 1009 write(elem.tail.encode(encoding))
1010 1011 _serialize = { 1012 "xml": _serialize_xml, 1013 "html": _serialize_html, 1014 "text": _serialize_text, 1015 # this optional method is imported at the end of the module 1016 # "c14n": _serialize_c14n, 1017 } 1018 1019 ## 1020 # Registers a namespace prefix. The registry is global, and any 1021 # existing mapping for either the given prefix or the namespace URI 1022 # will be removed. 1023 # 1024 # @param prefix Namespace prefix. 1025 # @param uri Namespace uri. Tags and attributes in this namespace 1026 # will be serialized with the given prefix, if at all possible. 1027 # @exception ValueError If the prefix is reserved, or is otherwise 1028 # invalid. 1029
1030 -def register_namespace(prefix, uri):
1031 if re.match("ns\d+$", prefix): 1032 raise ValueError("Prefix format reserved for internal use") 1033 for k, v in _namespace_map.items(): 1034 if k == uri or v == prefix: 1035 del _namespace_map[k] 1036 _namespace_map[uri] = prefix
1037 1038 _namespace_map = { 1039 # "well-known" namespace prefixes 1040 "http://www.w3.org/XML/1998/namespace": "xml", 1041 "http://www.w3.org/1999/xhtml": "html", 1042 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 1043 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 1044 # xml schema 1045 "http://www.w3.org/2001/XMLSchema": "xs", 1046 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 1047 # dublin core 1048 "http://purl.org/dc/elements/1.1/": "dc", 1049 } 1050
1051 -def _raise_serialization_error(text):
1052 raise TypeError( 1053 "cannot serialize %r (type %s)" % (text, type(text).__name__) 1054 )
1055
1056 -def _encode(text, encoding):
1057 try: 1058 return text.encode(encoding, "xmlcharrefreplace") 1059 except (TypeError, AttributeError): 1060 _raise_serialization_error(text)
1061
1062 -def _escape_cdata(text, encoding):
1063 # escape character data 1064 try: 1065 # it's worth avoiding do-nothing calls for strings that are 1066 # shorter than 500 character, or so. assume that's, by far, 1067 # the most common case in most applications. 1068 if "&" in text: 1069 text = text.replace("&", "&amp;") 1070 if "<" in text: 1071 text = text.replace("<", "&lt;") 1072 if ">" in text: 1073 text = text.replace(">", "&gt;") 1074 return text.encode(encoding, "xmlcharrefreplace") 1075 except (TypeError, AttributeError): 1076 _raise_serialization_error(text)
1077
1078 -def _escape_attrib(text, encoding):
1079 # escape attribute value 1080 try: 1081 if "&" in text: 1082 text = text.replace("&", "&amp;") 1083 if "<" in text: 1084 text = text.replace("<", "&lt;") 1085 if ">" in text: 1086 text = text.replace(">", "&gt;") 1087 if "\"" in text: 1088 text = text.replace("\"", "&quot;") 1089 if "\n" in text: 1090 text = text.replace("\n", "&#10;") 1091 return text.encode(encoding, "xmlcharrefreplace") 1092 except (TypeError, AttributeError): 1093 _raise_serialization_error(text)
1094
1095 -def _escape_attrib_html(text, encoding):
1096 # escape attribute value 1097 try: 1098 if "&" in text: 1099 text = text.replace("&", "&amp;") 1100 if ">" in text: 1101 text = text.replace(">", "&gt;") 1102 if "\"" in text: 1103 text = text.replace("\"", "&quot;") 1104 return text.encode(encoding, "xmlcharrefreplace") 1105 except (TypeError, AttributeError): 1106 _raise_serialization_error(text)
1107 1108 # -------------------------------------------------------------------- 1109 1110 ## 1111 # Generates a string representation of an XML element, including all 1112 # subelements. 1113 # 1114 # @param element An Element instance. 1115 # @keyparam encoding Optional output encoding (default is US-ASCII). 1116 # @keyparam method Optional output method ("xml", "html", "text" or 1117 # "c14n"; default is "xml"). 1118 # @return An encoded string containing the XML data. 1119 # @defreturn string 1120
1121 -def tostring(element, encoding=None, method=None):
1122 class dummy: 1123 pass
1124 data = [] 1125 file = dummy() 1126 file.write = data.append 1127 ElementTree(element).write(file, encoding, method=method) 1128 return "".join(data) 1129 1130 ## 1131 # Generates a string representation of an XML element, including all 1132 # subelements. The string is returned as a sequence of string fragments. 1133 # 1134 # @param element An Element instance. 1135 # @keyparam encoding Optional output encoding (default is US-ASCII). 1136 # @keyparam method Optional output method ("xml", "html", "text" or 1137 # "c14n"; default is "xml"). 1138 # @return A sequence object containing the XML data. 1139 # @defreturn sequence 1140 # @since 1.3 1141
1142 -def tostringlist(element, encoding=None, method=None):
1143 class dummy: 1144 pass
1145 data = [] 1146 file = dummy() 1147 file.write = data.append 1148 ElementTree(element).write(file, encoding, method=method) 1149 # FIXME: merge small fragments into larger parts 1150 return data 1151 1152 ## 1153 # Writes an element tree or element structure to sys.stdout. This 1154 # function should be used for debugging only. 1155 # <p> 1156 # The exact output format is implementation dependent. In this 1157 # version, it's written as an ordinary XML file. 1158 # 1159 # @param elem An element tree or an individual element. 1160
1161 -def dump(elem):
1162 # debugging 1163 if not isinstance(elem, ElementTree): 1164 elem = ElementTree(elem) 1165 elem.write(sys.stdout) 1166 tail = elem.getroot().tail 1167 if not tail or tail[-1] != "\n": 1168 sys.stdout.write("\n")
1169 1170 # -------------------------------------------------------------------- 1171 # parsing 1172 1173 ## 1174 # Parses an XML document into an element tree. 1175 # 1176 # @param source A filename or file object containing XML data. 1177 # @param parser An optional parser instance. If not given, the 1178 # standard {@link XMLParser} parser is used. 1179 # @return An ElementTree instance 1180
1181 -def parse(source, parser=None):
1182 tree = ElementTree() 1183 tree.parse(source, parser) 1184 return tree
1185 1186 ## 1187 # Parses an XML document into an element tree incrementally, and reports 1188 # what's going on to the user. 1189 # 1190 # @param source A filename or file object containing XML data. 1191 # @param events A list of events to report back. If omitted, only "end" 1192 # events are reported. 1193 # @param parser An optional parser instance. If not given, the 1194 # standard {@link XMLParser} parser is used. 1195 # @return A (event, elem) iterator. 1196
1197 -def iterparse(source, events=None, parser=None):
1198 close_source = False 1199 if not hasattr(source, "read"): 1200 source = open(source, "rb") 1201 close_source = True 1202 if not parser: 1203 parser = XMLParser(target=TreeBuilder()) 1204 return _IterParseIterator(source, events, parser, close_source)
1205
1206 -class _IterParseIterator(object):
1207
1208 - def __init__(self, source, events, parser, close_source=False):
1209 self._file = source 1210 self._close_file = close_source 1211 self._events = [] 1212 self._index = 0 1213 self._error = None 1214 self.root = self._root = None 1215 self._parser = parser 1216 # wire up the parser for event reporting 1217 parser = self._parser._parser 1218 append = self._events.append 1219 if events is None: 1220 events = ["end"] 1221 for event in events: 1222 if event == "start": 1223 try: 1224 parser.ordered_attributes = 1 1225 parser.specified_attributes = 1 1226 def handler(tag, attrib_in, event=event, append=append, 1227 start=self._parser._start_list): 1228 append((event, start(tag, attrib_in)))
1229 parser.StartElementHandler = handler 1230 except AttributeError: 1231 def handler(tag, attrib_in, event=event, append=append, 1232 start=self._parser._start): 1233 append((event, start(tag, attrib_in)))
1234 parser.StartElementHandler = handler 1235 elif event == "end": 1236 def handler(tag, event=event, append=append, 1237 end=self._parser._end): 1238 append((event, end(tag))) 1239 parser.EndElementHandler = handler 1240 elif event == "start-ns": 1241 def handler(prefix, uri, event=event, append=append): 1242 try: 1243 uri = (uri or "").encode("ascii") 1244 except UnicodeError: 1245 pass 1246 append((event, (prefix or "", uri or ""))) 1247 parser.StartNamespaceDeclHandler = handler 1248 elif event == "end-ns": 1249 def handler(prefix, event=event, append=append): 1250 append((event, None)) 1251 parser.EndNamespaceDeclHandler = handler 1252 else: 1253 raise ValueError("unknown event %r" % event) 1254
1255 - def next(self):
1256 while 1: 1257 try: 1258 item = self._events[self._index] 1259 self._index += 1 1260 return item 1261 except IndexError: 1262 pass 1263 if self._error: 1264 e = self._error 1265 self._error = None 1266 raise e 1267 if self._parser is None: 1268 self.root = self._root 1269 if self._close_file: 1270 self._file.close() 1271 raise StopIteration 1272 # load event buffer 1273 del self._events[:] 1274 self._index = 0 1275 data = self._file.read(16384) 1276 if data: 1277 try: 1278 self._parser.feed(data) 1279 except SyntaxError as exc: 1280 self._error = exc 1281 else: 1282 self._root = self._parser.close() 1283 self._parser = None
1284
1285 - def __iter__(self):
1286 return self
1287 1288 ## 1289 # Parses an XML document from a string constant. This function can 1290 # be used to embed "XML literals" in Python code. 1291 # 1292 # @param source A string containing XML data. 1293 # @param parser An optional parser instance. If not given, the 1294 # standard {@link XMLParser} parser is used. 1295 # @return An Element instance. 1296 # @defreturn Element 1297
1298 -def XML(text, parser=None):
1299 if not parser: 1300 parser = XMLParser(target=TreeBuilder()) 1301 parser.feed(text) 1302 return parser.close()
1303 1304 ## 1305 # Parses an XML document from a string constant, and also returns 1306 # a dictionary which maps from element id:s to elements. 1307 # 1308 # @param source A string containing XML data. 1309 # @param parser An optional parser instance. If not given, the 1310 # standard {@link XMLParser} parser is used. 1311 # @return A tuple containing an Element instance and a dictionary. 1312 # @defreturn (Element, dictionary) 1313
1314 -def XMLID(text, parser=None):
1315 if not parser: 1316 parser = XMLParser(target=TreeBuilder()) 1317 parser.feed(text) 1318 tree = parser.close() 1319 ids = {} 1320 for elem in tree.iter(): 1321 id = elem.get("id") 1322 if id: 1323 ids[id] = elem 1324 return tree, ids
1325 1326 ## 1327 # Parses an XML document from a string constant. Same as {@link #XML}. 1328 # 1329 # @def fromstring(text) 1330 # @param source A string containing XML data. 1331 # @return An Element instance. 1332 # @defreturn Element 1333 1334 fromstring = XML 1335 1336 ## 1337 # Parses an XML document from a sequence of string fragments. 1338 # 1339 # @param sequence A list or other sequence containing XML data fragments. 1340 # @param parser An optional parser instance. If not given, the 1341 # standard {@link XMLParser} parser is used. 1342 # @return An Element instance. 1343 # @defreturn Element 1344 # @since 1.3 1345
1346 -def fromstringlist(sequence, parser=None):
1347 if not parser: 1348 parser = XMLParser(target=TreeBuilder()) 1349 for text in sequence: 1350 parser.feed(text) 1351 return parser.close()
1352 1353 # -------------------------------------------------------------------- 1354 1355 ## 1356 # Generic element structure builder. This builder converts a sequence 1357 # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link 1358 # #TreeBuilder.end} method calls to a well-formed element structure. 1359 # <p> 1360 # You can use this class to build an element structure using a custom XML 1361 # parser, or a parser for some other XML-like format. 1362 # 1363 # @param element_factory Optional element factory. This factory 1364 # is called to create new Element instances, as necessary. 1365
1366 -class TreeBuilder(object):
1367
1368 - def __init__(self, element_factory=None):
1369 self._data = [] # data collector 1370 self._elem = [] # element stack 1371 self._last = None # last element 1372 self._tail = None # true if we're after an end tag 1373 if element_factory is None: 1374 element_factory = Element 1375 self._factory = element_factory
1376 1377 ## 1378 # Flushes the builder buffers, and returns the toplevel document 1379 # element. 1380 # 1381 # @return An Element instance. 1382 # @defreturn Element 1383
1384 - def close(self):
1385 assert len(self._elem) == 0, "missing end tags" 1386 assert self._last is not None, "missing toplevel element" 1387 return self._last
1388
1389 - def _flush(self):
1390 if self._data: 1391 if self._last is not None: 1392 text = "".join(self._data) 1393 if self._tail: 1394 assert self._last.tail is None, "internal error (tail)" 1395 self._last.tail = text 1396 else: 1397 assert self._last.text is None, "internal error (text)" 1398 self._last.text = text 1399 self._data = []
1400 1401 ## 1402 # Adds text to the current element. 1403 # 1404 # @param data A string. This should be either an 8-bit string 1405 # containing ASCII text, or a Unicode string. 1406
1407 - def data(self, data):
1408 self._data.append(data)
1409 1410 ## 1411 # Opens a new element. 1412 # 1413 # @param tag The element name. 1414 # @param attrib A dictionary containing element attributes. 1415 # @return The opened element. 1416 # @defreturn Element 1417
1418 - def start(self, tag, attrs):
1419 self._flush() 1420 self._last = elem = self._factory(tag, attrs) 1421 if self._elem: 1422 self._elem[-1].append(elem) 1423 self._elem.append(elem) 1424 self._tail = 0 1425 return elem
1426 1427 ## 1428 # Closes the current element. 1429 # 1430 # @param tag The element name. 1431 # @return The closed element. 1432 # @defreturn Element 1433
1434 - def end(self, tag):
1435 self._flush() 1436 self._last = self._elem.pop() 1437 assert self._last.tag == tag,\ 1438 "end tag mismatch (expected %s, got %s)" % ( 1439 self._last.tag, tag) 1440 self._tail = 1 1441 return self._last
1442 1443 ## 1444 # Element structure builder for XML source data, based on the 1445 # <b>expat</b> parser. 1446 # 1447 # @keyparam target Target object. If omitted, the builder uses an 1448 # instance of the standard {@link #TreeBuilder} class. 1449 # @keyparam html Predefine HTML entities. This flag is not supported 1450 # by the current implementation. 1451 # @keyparam encoding Optional encoding. If given, the value overrides 1452 # the encoding specified in the XML file. 1453 # @see #ElementTree 1454 # @see #TreeBuilder 1455
1456 -class XMLParser(object):
1457
1458 - def __init__(self, html=0, target=None, encoding=None):
1459 try: 1460 from xml.parsers import expat 1461 except ImportError: 1462 try: 1463 import pyexpat as expat 1464 except ImportError: 1465 raise ImportError( 1466 "No module named expat; use SimpleXMLTreeBuilder instead" 1467 ) 1468 parser = expat.ParserCreate(encoding, "}") 1469 if target is None: 1470 target = TreeBuilder() 1471 # underscored names are provided for compatibility only 1472 self.parser = self._parser = parser 1473 self.target = self._target = target 1474 self._error = expat.error 1475 self._names = {} # name memo cache 1476 # callbacks 1477 parser.DefaultHandlerExpand = self._default 1478 parser.StartElementHandler = self._start 1479 parser.EndElementHandler = self._end 1480 parser.CharacterDataHandler = self._data 1481 # optional callbacks 1482 parser.CommentHandler = self._comment 1483 parser.ProcessingInstructionHandler = self._pi 1484 # let expat do the buffering, if supported 1485 try: 1486 self._parser.buffer_text = 1 1487 except AttributeError: 1488 pass 1489 # use new-style attribute handling, if supported 1490 try: 1491 self._parser.ordered_attributes = 1 1492 self._parser.specified_attributes = 1 1493 parser.StartElementHandler = self._start_list 1494 except AttributeError: 1495 pass 1496 self._doctype = None 1497 self.entity = {} 1498 try: 1499 self.version = "Expat %d.%d.%d" % expat.version_info 1500 except AttributeError: 1501 pass # unknown
1502
1503 - def _raiseerror(self, value):
1504 err = ParseError(value) 1505 err.code = value.code 1506 err.position = value.lineno, value.offset 1507 raise err
1508
1509 - def _fixtext(self, text):
1510 # convert text string to ascii, if possible 1511 try: 1512 return text.encode("ascii") 1513 except UnicodeError: 1514 return text
1515
1516 - def _fixname(self, key):
1517 # expand qname, and convert name string to ascii, if possible 1518 try: 1519 name = self._names[key] 1520 except KeyError: 1521 name = key 1522 if "}" in name: 1523 name = "{" + name 1524 self._names[key] = name = self._fixtext(name) 1525 return name
1526
1527 - def _start(self, tag, attrib_in):
1528 fixname = self._fixname 1529 fixtext = self._fixtext 1530 tag = fixname(tag) 1531 attrib = {} 1532 for key, value in attrib_in.items(): 1533 attrib[fixname(key)] = fixtext(value) 1534 return self.target.start(tag, attrib)
1535
1536 - def _start_list(self, tag, attrib_in):
1537 fixname = self._fixname 1538 fixtext = self._fixtext 1539 tag = fixname(tag) 1540 attrib = {} 1541 if attrib_in: 1542 for i in range(0, len(attrib_in), 2): 1543 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) 1544 return self.target.start(tag, attrib)
1545
1546 - def _data(self, text):
1547 return self.target.data(self._fixtext(text))
1548
1549 - def _end(self, tag):
1550 return self.target.end(self._fixname(tag))
1551
1552 - def _comment(self, data):
1553 try: 1554 comment = self.target.comment 1555 except AttributeError: 1556 pass 1557 else: 1558 return comment(self._fixtext(data))
1559
1560 - def _pi(self, target, data):
1561 try: 1562 pi = self.target.pi 1563 except AttributeError: 1564 pass 1565 else: 1566 return pi(self._fixtext(target), self._fixtext(data))
1567
1568 - def _default(self, text):
1569 prefix = text[:1] 1570 if prefix == "&": 1571 # deal with undefined entities 1572 try: 1573 self.target.data(self.entity[text[1:-1]]) 1574 except KeyError: 1575 from xml.parsers import expat 1576 err = expat.error( 1577 "undefined entity %s: line %d, column %d" % 1578 (text, self._parser.ErrorLineNumber, 1579 self._parser.ErrorColumnNumber) 1580 ) 1581 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 1582 err.lineno = self._parser.ErrorLineNumber 1583 err.offset = self._parser.ErrorColumnNumber 1584 raise err 1585 elif prefix == "<" and text[:9] == "<!DOCTYPE": 1586 self._doctype = [] # inside a doctype declaration 1587 elif self._doctype is not None: 1588 # parse doctype contents 1589 if prefix == ">": 1590 self._doctype = None 1591 return 1592 text = text.strip() 1593 if not text: 1594 return 1595 self._doctype.append(text) 1596 n = len(self._doctype) 1597 if n > 2: 1598 type = self._doctype[1] 1599 if type == "PUBLIC" and n == 4: 1600 name, type, pubid, system = self._doctype 1601 elif type == "SYSTEM" and n == 3: 1602 name, type, system = self._doctype 1603 pubid = None 1604 else: 1605 return 1606 if pubid: 1607 pubid = pubid[1:-1] 1608 if hasattr(self.target, "doctype"): 1609 self.target.doctype(name, pubid, system[1:-1]) 1610 elif self.doctype is not self._XMLParser__doctype: 1611 # warn about deprecated call 1612 self._XMLParser__doctype(name, pubid, system[1:-1]) 1613 self.doctype(name, pubid, system[1:-1]) 1614 self._doctype = None
1615 1616 ## 1617 # (Deprecated) Handles a doctype declaration. 1618 # 1619 # @param name Doctype name. 1620 # @param pubid Public identifier. 1621 # @param system System identifier. 1622
1623 - def doctype(self, name, pubid, system):
1624 """This method of XMLParser is deprecated.""" 1625 warnings.warn( 1626 "This method of XMLParser is deprecated. Define doctype() " 1627 "method on the TreeBuilder target.", 1628 DeprecationWarning, 1629 )
1630 1631 # sentinel, if doctype is redefined in a subclass 1632 __doctype = doctype 1633 1634 ## 1635 # Feeds data to the parser. 1636 # 1637 # @param data Encoded data. 1638
1639 - def feed(self, data):
1640 try: 1641 self._parser.Parse(data, 0) 1642 except self._error, v: 1643 self._raiseerror(v)
1644 1645 ## 1646 # Finishes feeding data to the parser. 1647 # 1648 # @return An element structure. 1649 # @defreturn Element 1650
1651 - def close(self):
1652 try: 1653 self._parser.Parse("", 1) # end of data 1654 except self._error, v: 1655 self._raiseerror(v) 1656 tree = self.target.close() 1657 del self.target, self._parser # get rid of circular references 1658 return tree
1659 1660 # compatibility 1661 XMLTreeBuilder = XMLParser 1662 1663 # workaround circular import. 1664 try: 1665 from ElementC14N import _serialize_c14n 1666 _serialize["c14n"] = _serialize_c14n 1667 except ImportError: 1668 pass 1669