Home | Trees | Indices | Help |
|
---|
|
1 # 2 # ElementTree 3 # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ 4 # 5 # light-weight XML support for Python 2.3 and later. 6 # 7 # history (since 1.2.6): 8 # 2005-11-12 fl added tostringlist/fromstringlist helpers 9 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox 10 # 2006-07-05 fl removed support for 2.1 and earlier 11 # 2007-06-21 fl added deprecation/future warnings 12 # 2007-08-25 fl added doctype hook, added parser version attribute etc 13 # 2007-08-26 fl added new serializer code (better namespace handling, etc) 14 # 2007-08-27 fl warn for broken /tag searches on tree level 15 # 2007-09-02 fl added html/text methods to serializer (experimental) 16 # 2007-09-05 fl added method argument to tostring/tostringlist 17 # 2007-09-06 fl improved error handling 18 # 2007-09-13 fl added itertext, iterfind; assorted cleanups 19 # 2007-12-15 fl added C14N hooks, copy method (experimental) 20 # 21 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. 22 # 23 # fredrik@pythonware.com 24 # http://www.pythonware.com 25 # 26 # -------------------------------------------------------------------- 27 # The ElementTree toolkit is 28 # 29 # Copyright (c) 1999-2008 by Fredrik Lundh 30 # 31 # By obtaining, using, and/or copying this software and/or its 32 # associated documentation, you agree that you have read, understood, 33 # and will comply with the following terms and conditions: 34 # 35 # Permission to use, copy, modify, and distribute this software and 36 # its associated documentation for any purpose and without fee is 37 # hereby granted, provided that the above copyright notice appears in 38 # all copies, and that both that copyright notice and this permission 39 # notice appear in supporting documentation, and that the name of 40 # Secret Labs AB or the author not be used in advertising or publicity 41 # pertaining to distribution of the software without specific, written 42 # prior permission. 43 # 44 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 45 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 46 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 47 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 48 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 49 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 50 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 51 # OF THIS SOFTWARE. 52 # -------------------------------------------------------------------- 53 54 # Licensed to PSF under a Contributor Agreement. 55 # See http://www.python.org/psf/license for licensing details. 56 57 __all__ = [ 58 # public symbols 59 "Comment", 60 "dump", 61 "Element", "ElementTree", 62 "fromstring", "fromstringlist", 63 "iselement", "iterparse", 64 "parse", "ParseError", 65 "PI", "ProcessingInstruction", 66 "QName", 67 "SubElement", 68 "tostring", "tostringlist", 69 "TreeBuilder", 70 "VERSION", 71 "XML", 72 "XMLParser", "XMLTreeBuilder", 73 ] 74 75 VERSION = "1.3.0" 76 77 ## 78 # The <b>Element</b> type is a flexible container object, designed to 79 # store hierarchical data structures in memory. The type can be 80 # described as a cross between a list and a dictionary. 81 # <p> 82 # Each element has a number of properties associated with it: 83 # <ul> 84 # <li>a <i>tag</i>. This is a string identifying what kind of data 85 # this element represents (the element type, in other words).</li> 86 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> 87 # <li>a <i>text</i> string.</li> 88 # <li>an optional <i>tail</i> string.</li> 89 # <li>a number of <i>child elements</i>, stored in a Python sequence</li> 90 # </ul> 91 # 92 # To create an element instance, use the {@link #Element} constructor 93 # or the {@link #SubElement} factory function. 94 # <p> 95 # The {@link #ElementTree} class can be used to wrap an element 96 # structure, and convert it from and to XML. 97 ## 98 99 import sys 100 import re 101 import warnings 102 103105 # emulate pre-1.2 find/findtext/findall behaviour125 126 try: 127 from . import ElementPath 128 except ImportError: 129 ElementPath = _SimpleElementPath() 130 131 ## 132 # Parser error. This is a subclass of <b>SyntaxError</b>. 133 # <p> 134 # In addition to the exception value, an exception instance contains a 135 # specific exception code in the <b>code</b> attribute, and the line and 136 # column of the error in the <b>position</b> attribute. 137 140 141 # -------------------------------------------------------------------- 142 143 ## 144 # Checks if an object appears to be a valid element object. 145 # 146 # @param An element instance. 147 # @return A true value if this is an element object. 148 # @defreturn flag 149112 elem = self.find(element, tag) 113 if elem is None: 114 return default 115 return elem.text or ""117 if tag[:3] == ".//": 118 for elem in element.iter(tag[3:]): 119 yield elem 120 for elem in element: 121 if elem.tag == tag: 122 yield elem151 # FIXME: not sure about this; might be a better idea to look 152 # for tag/attrib/text attributes 153 return isinstance(element, Element) or hasattr(element, "tag")154 155 ## 156 # Element class. This class defines the Element interface, and 157 # provides a reference implementation of this interface. 158 # <p> 159 # The element name, attribute names, and attribute values can be 160 # either ASCII strings (ordinary Python strings containing only 7-bit 161 # ASCII characters) or Unicode strings. 162 # 163 # @param tag The element name. 164 # @param attrib An optional dictionary, containing element attributes. 165 # @param **extra Additional attributes, given as keyword arguments. 166 # @see Element 167 # @see SubElement 168 # @see Comment 169 # @see ProcessingInstruction 170172 # <tag attrib>text<child/>...</tag>tail 173 174 ## 175 # (Attribute) Element tag. 176 177 tag = None 178 179 ## 180 # (Attribute) Element attribute dictionary. Where possible, use 181 # {@link #Element.get}, 182 # {@link #Element.set}, 183 # {@link #Element.keys}, and 184 # {@link #Element.items} to access 185 # element attributes. 186 187 attrib = None 188 189 ## 190 # (Attribute) Text before first subelement. This is either a 191 # string or the value None. Note that if there was no text, this 192 # attribute may be either None or an empty string, depending on 193 # the parser. 194 195 text = None 196 197 ## 198 # (Attribute) Text after this element's end tag, but before the 199 # next sibling element's start tag. This is either a string or 200 # the value None. Note that if there was no text, this attribute 201 # may be either None or an empty string, depending on the parser. 202 203 tail = None # text after end tag, if any 204 205 # constructor 206509 510 # compatibility 511 _Element = _ElementInterface = Element 512 513 ## 514 # Subelement factory. This function creates an element instance, and 515 # appends it to an existing element. 516 # <p> 517 # The element name, attribute names, and attribute values can be 518 # either 8-bit ASCII strings or Unicode strings. 519 # 520 # @param parent The parent element. 521 # @param tag The subelement name. 522 # @param attrib An optional dictionary, containing element attributes. 523 # @param **extra Additional attributes, given as keyword arguments. 524 # @return An element instance. 525 # @defreturn Element 526208 attrib = attrib.copy() 209 attrib.update(extra) 210 self.tag = tag 211 self.attrib = attrib 212 self._children = []213215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))216 217 ## 218 # Creates a new element object of the same type as this element. 219 # 220 # @param tag Element tag. 221 # @param attrib Element attributes, given as a dictionary. 222 # @return A new element instance. 223 226 227 ## 228 # (Experimental) Copies the current element. This creates a 229 # shallow copy; subelements will be shared with the original tree. 230 # 231 # @return A new element instance. 232234 elem = self.makeelement(self.tag, self.attrib) 235 elem.text = self.text 236 elem.tail = self.tail 237 elem[:] = self 238 return elem239 240 ## 241 # Returns the number of subelements. Note that this only counts 242 # full elements; to check if there's any content in an element, you 243 # have to check both the length and the <b>text</b> attribute. 244 # 245 # @return The number of subelements. 246 249251 warnings.warn( 252 "The behavior of this method will change in future versions. " 253 "Use specific 'len(elem)' or 'elem is not None' test instead.", 254 FutureWarning, stacklevel=2 255 ) 256 return len(self._children) != 0 # emulate old behaviour, for now257 258 ## 259 # Returns the given subelement, by index. 260 # 261 # @param index What subelement to return. 262 # @return The given subelement. 263 # @exception IndexError If the given element does not exist. 264266 return self._children[index]267 268 ## 269 # Replaces the given subelement, by index. 270 # 271 # @param index What subelement to replace. 272 # @param element The new element value. 273 # @exception IndexError If the given element does not exist. 274276 # if isinstance(index, slice): 277 # for elt in element: 278 # assert iselement(elt) 279 # else: 280 # assert iselement(element) 281 self._children[index] = element282 283 ## 284 # Deletes the given subelement, by index. 285 # 286 # @param index What subelement to delete. 287 # @exception IndexError If the given element does not exist. 288290 del self._children[index]291 292 ## 293 # Adds a subelement to the end of this element. In document order, 294 # the new element will appear after the last existing subelement (or 295 # directly after the text, if it's the first subelement), but before 296 # the end tag for this element. 297 # 298 # @param element The element to add. 299 303 304 ## 305 # Appends subelements from a sequence. 306 # 307 # @param elements A sequence object with zero or more elements. 308 # @since 1.3 309 314 315 ## 316 # Inserts a subelement at the given position in this element. 317 # 318 # @param index Where to insert the new subelement. 319 323 324 ## 325 # Removes a matching subelement. Unlike the <b>find</b> methods, 326 # this method compares elements based on identity, not on tag 327 # value or contents. To remove subelements by other means, the 328 # easiest way is often to use a list comprehension to select what 329 # elements to keep, and use slice assignment to update the parent 330 # element. 331 # 332 # @param element What element to remove. 333 # @exception ValueError If a matching element could not be found. 334 338 339 ## 340 # (Deprecated) Returns all subelements. The elements are returned 341 # in document order. 342 # 343 # @return A list of subelements. 344 # @defreturn list of Element instances 345347 warnings.warn( 348 "This method will be removed in future versions. " 349 "Use 'list(elem)' or iteration over elem instead.", 350 DeprecationWarning, stacklevel=2 351 ) 352 return self._children353 354 ## 355 # Finds the first matching subelement, by tag name or path. 356 # 357 # @param path What element to look for. 358 # @keyparam namespaces Optional namespace prefix map. 359 # @return The first matching element, or None if no element was found. 360 # @defreturn Element or None 361 364 365 ## 366 # Finds text for the first matching subelement, by tag name or path. 367 # 368 # @param path What element to look for. 369 # @param default What to return if the element was not found. 370 # @keyparam namespaces Optional namespace prefix map. 371 # @return The text content of the first matching element, or the 372 # default value no element was found. Note that if the element 373 # is found, but has no text content, this method returns an 374 # empty string. 375 # @defreturn string 376 379 380 ## 381 # Finds all matching subelements, by tag name or path. 382 # 383 # @param path What element to look for. 384 # @keyparam namespaces Optional namespace prefix map. 385 # @return A list or other sequence containing all matching elements, 386 # in document order. 387 # @defreturn list of Element instances 388 391 392 ## 393 # Finds all matching subelements, by tag name or path. 394 # 395 # @param path What element to look for. 396 # @keyparam namespaces Optional namespace prefix map. 397 # @return An iterator or sequence containing all matching elements, 398 # in document order. 399 # @defreturn a generated sequence of Element instances 400 403 404 ## 405 # Resets an element. This function removes all subelements, clears 406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes 407 # to None. 408 413 414 ## 415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but 416 # some implementations may handle this a bit more efficiently. 417 # 418 # @param key What attribute to look for. 419 # @param default What to return if the attribute was not found. 420 # @return The attribute value, or the default value, if the 421 # attribute was not found. 422 # @defreturn string or None 423 426 427 ## 428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>, 429 # but some implementations may handle this a bit more efficiently. 430 # 431 # @param key What attribute to set. 432 # @param value The attribute value. 433 436 437 ## 438 # Gets a list of attribute names. The names are returned in an 439 # arbitrary order (just like for an ordinary Python dictionary). 440 # Equivalent to <b>attrib.keys()</b>. 441 # 442 # @return A list of element attribute names. 443 # @defreturn list of strings 444 447 448 ## 449 # Gets element attributes, as a sequence. The attributes are 450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>. 451 # 452 # @return A list of (name, value) tuples for all attributes. 453 # @defreturn list of (string, string) tuples 454 457 458 ## 459 # Creates a tree iterator. The iterator loops over this element 460 # and all subelements, in document order, and returns all elements 461 # with a matching tag. 462 # <p> 463 # If the tree structure is modified during iteration, new or removed 464 # elements may or may not be included. To get a stable set, use the 465 # list() function on the iterator, and loop over the resulting list. 466 # 467 # @param tag What tags to look for (default is to return all elements). 468 # @return An iterator containing all the matching elements. 469 # @defreturn iterator 470472 if tag == "*": 473 tag = None 474 if tag is None or self.tag == tag: 475 yield self 476 for e in self._children: 477 for e in e.iter(tag): 478 yield e479 480 # compatibility482 # Change for a DeprecationWarning in 1.4 483 warnings.warn( 484 "This method will be removed in future versions. " 485 "Use 'elem.iter()' or 'list(elem.iter())' instead.", 486 PendingDeprecationWarning, stacklevel=2 487 ) 488 return list(self.iter(tag))489 490 ## 491 # Creates a text iterator. The iterator loops over this element 492 # and all subelements, in document order, and returns all inner 493 # text. 494 # 495 # @return An iterator containing all inner text. 496 # @defreturn iterator 497528 attrib = attrib.copy() 529 attrib.update(extra) 530 element = parent.makeelement(tag, attrib) 531 parent.append(element) 532 return element533 534 ## 535 # Comment element factory. This factory function creates a special 536 # element that will be serialized as an XML comment by the standard 537 # serializer. 538 # <p> 539 # The comment string can be either an 8-bit ASCII string or a Unicode 540 # string. 541 # 542 # @param text A string containing the comment string. 543 # @return An element instance, representing a comment. 544 # @defreturn Element 545 550 551 ## 552 # PI element factory. This factory function creates a special element 553 # that will be serialized as an XML processing instruction by the standard 554 # serializer. 555 # 556 # @param target A string containing the PI target. 557 # @param text A string containing the PI contents, if any. 558 # @return An element instance, representing a PI. 559 # @defreturn Element 560562 element = Element(ProcessingInstruction) 563 element.text = target 564 if text: 565 element.text = element.text + " " + text 566 return element567 568 PI = ProcessingInstruction 569 570 ## 571 # QName wrapper. This can be used to wrap a QName attribute value, in 572 # order to get proper namespace handling on output. 573 # 574 # @param text A string containing the QName value, in the form {uri}local, 575 # or, if the tag argument is given, the URI part of a QName. 576 # @param tag Optional tag. If given, the first argument is interpreted as 577 # an URI, and this argument is interpreted as a local name. 578 # @return An opaque object, representing the QName. 579 593 594 # -------------------------------------------------------------------- 595 596 ## 597 # ElementTree wrapper class. This class represents an entire element 598 # hierarchy, and adds some extra support for serialization to and from 599 # standard XML. 600 # 601 # @param element Optional root element. 602 # @keyparam file Optional file handle or file name. If given, the 603 # tree is initialized with the contents of this XML file. 604606828 829 # -------------------------------------------------------------------- 830 # serialization support 831608 # assert element is None or iselement(element) 609 self._root = element # first node 610 if file: 611 self.parse(file)612 613 ## 614 # Gets the root element for this tree. 615 # 616 # @return An element instance. 617 # @defreturn Element 618 621 622 ## 623 # Replaces the root element for this tree. This discards the 624 # current contents of the tree, and replaces it with the given 625 # element. Use with care. 626 # 627 # @param element An element instance. 628 632 633 ## 634 # Loads an external XML document into this element tree. 635 # 636 # @param source A file name or file object. If a file object is 637 # given, it only has to implement a <b>read(n)</b> method. 638 # @keyparam parser An optional parser instance. If not given, the 639 # standard {@link XMLParser} parser is used. 640 # @return The document root element. 641 # @defreturn Element 642 # @exception ParseError If the parser fails to parse the document. 643645 close_source = False 646 if not hasattr(source, "read"): 647 source = open(source, "rb") 648 close_source = True 649 try: 650 if not parser: 651 parser = XMLParser(target=TreeBuilder()) 652 while 1: 653 data = source.read(65536) 654 if not data: 655 break 656 parser.feed(data) 657 self._root = parser.close() 658 return self._root 659 finally: 660 if close_source: 661 source.close()662 663 ## 664 # Creates a tree iterator for the root element. The iterator loops 665 # over all elements in this tree, in document order. 666 # 667 # @param tag What tags to look for (default is to return all elements) 668 # @return An iterator. 669 # @defreturn iterator 670 674 675 # compatibility677 # Change for a DeprecationWarning in 1.4 678 warnings.warn( 679 "This method will be removed in future versions. " 680 "Use 'tree.iter()' or 'list(tree.iter())' instead.", 681 PendingDeprecationWarning, stacklevel=2 682 ) 683 return list(self.iter(tag))684 685 ## 686 # Finds the first toplevel element with given tag. 687 # Same as getroot().find(path). 688 # 689 # @param path What element to look for. 690 # @keyparam namespaces Optional namespace prefix map. 691 # @return The first matching element, or None if no element was found. 692 # @defreturn Element or None 693695 # assert self._root is not None 696 if path[:1] == "/": 697 path = "." + path 698 warnings.warn( 699 "This search is broken in 1.3 and earlier, and will be " 700 "fixed in a future version. If you rely on the current " 701 "behaviour, change it to %r" % path, 702 FutureWarning, stacklevel=2 703 ) 704 return self._root.find(path, namespaces)705 706 ## 707 # Finds the element text for the first toplevel element with given 708 # tag. Same as getroot().findtext(path). 709 # 710 # @param path What toplevel element to look for. 711 # @param default What to return if the element was not found. 712 # @keyparam namespaces Optional namespace prefix map. 713 # @return The text content of the first matching element, or the 714 # default value no element was found. Note that if the element 715 # is found, but has no text content, this method returns an 716 # empty string. 717 # @defreturn string 718720 # assert self._root is not None 721 if path[:1] == "/": 722 path = "." + path 723 warnings.warn( 724 "This search is broken in 1.3 and earlier, and will be " 725 "fixed in a future version. If you rely on the current " 726 "behaviour, change it to %r" % path, 727 FutureWarning, stacklevel=2 728 ) 729 return self._root.findtext(path, default, namespaces)730 731 ## 732 # Finds all toplevel elements with the given tag. 733 # Same as getroot().findall(path). 734 # 735 # @param path What element to look for. 736 # @keyparam namespaces Optional namespace prefix map. 737 # @return A list or iterator containing all matching elements, 738 # in document order. 739 # @defreturn list of Element instances 740742 # assert self._root is not None 743 if path[:1] == "/": 744 path = "." + path 745 warnings.warn( 746 "This search is broken in 1.3 and earlier, and will be " 747 "fixed in a future version. If you rely on the current " 748 "behaviour, change it to %r" % path, 749 FutureWarning, stacklevel=2 750 ) 751 return self._root.findall(path, namespaces)752 753 ## 754 # Finds all matching subelements, by tag name or path. 755 # Same as getroot().iterfind(path). 756 # 757 # @param path What element to look for. 758 # @keyparam namespaces Optional namespace prefix map. 759 # @return An iterator or sequence containing all matching elements, 760 # in document order. 761 # @defreturn a generated sequence of Element instances 762764 # assert self._root is not None 765 if path[:1] == "/": 766 path = "." + path 767 warnings.warn( 768 "This search is broken in 1.3 and earlier, and will be " 769 "fixed in a future version. If you rely on the current " 770 "behaviour, change it to %r" % path, 771 FutureWarning, stacklevel=2 772 ) 773 return self._root.iterfind(path, namespaces)774 775 ## 776 # Writes the element tree to a file, as XML. 777 # 778 # @def write(file, **options) 779 # @param file A file name, or a file object opened for writing. 780 # @param **options Options, given as keyword arguments. 781 # @keyparam encoding Optional output encoding (default is US-ASCII). 782 # @keyparam method Optional output method ("xml", "html", "text" or 783 # "c14n"; default is "xml"). 784 # @keyparam xml_declaration Controls if an XML declaration should 785 # be added to the file. Use False for never, True for always, 786 # None for only if not US-ASCII or UTF-8. None is default. 787788 - def write(self, file_or_filename, 789 # keyword arguments 790 encoding=None, 791 xml_declaration=None, 792 default_namespace=None, 793 method=None):794 # assert self._root is not None 795 if not method: 796 method = "xml" 797 elif method not in _serialize: 798 # FIXME: raise an ImportError for c14n if ElementC14N is missing? 799 raise ValueError("unknown method %r" % method) 800 if hasattr(file_or_filename, "write"): 801 file = file_or_filename 802 else: 803 file = open(file_or_filename, "wb") 804 write = file.write 805 if not encoding: 806 if method == "c14n": 807 encoding = "utf-8" 808 else: 809 encoding = "us-ascii" 810 elif xml_declaration or (xml_declaration is None and 811 encoding not in ("utf-8", "us-ascii")): 812 if method == "xml": 813 write("<?xml version='1.0' encoding='%s'?>\n" % encoding) 814 if method == "text": 815 _serialize_text(write, self._root, encoding) 816 else: 817 qnames, namespaces = _namespaces( 818 self._root, encoding, default_namespace 819 ) 820 serialize = _serialize[method] 821 serialize(write, self._root, encoding, qnames, namespaces) 822 if file_or_filename is not file: 823 file.close()824833 # identify namespaces used in this tree 834 835 # maps qnames to *encoded* prefix:local names 836 qnames = {None: None} 837 838 # maps uri:s to prefixes 839 namespaces = {} 840 if default_namespace: 841 namespaces[default_namespace] = "" 842 843 def encode(text): 844 return text.encode(encoding)845 846 def add_qname(qname): 847 # calculate serialized qname representation 848 try: 849 if qname[:1] == "{": 850 uri, tag = qname[1:].rsplit("}", 1) 851 prefix = namespaces.get(uri) 852 if prefix is None: 853 prefix = _namespace_map.get(uri) 854 if prefix is None: 855 prefix = "ns%d" % len(namespaces) 856 if prefix != "xml": 857 namespaces[uri] = prefix 858 if prefix: 859 qnames[qname] = encode("%s:%s" % (prefix, tag)) 860 else: 861 qnames[qname] = encode(tag) # default element 862 else: 863 if default_namespace: 864 # FIXME: can this be handled in XML 1.0? 865 raise ValueError( 866 "cannot use non-qualified names with " 867 "default_namespace option" 868 ) 869 qnames[qname] = encode(qname) 870 except TypeError: 871 _raise_serialization_error(qname) 872 873 # populate qname and namespaces table 874 try: 875 iterate = elem.iter 876 except AttributeError: 877 iterate = elem.getiterator # cET compatibility 878 for elem in iterate(): 879 tag = elem.tag 880 if isinstance(tag, QName): 881 if tag.text not in qnames: 882 add_qname(tag.text) 883 elif isinstance(tag, basestring): 884 if tag not in qnames: 885 add_qname(tag) 886 elif tag is not None and tag is not Comment and tag is not PI: 887 _raise_serialization_error(tag) 888 for key, value in elem.items(): 889 if isinstance(key, QName): 890 key = key.text 891 if key not in qnames: 892 add_qname(key) 893 if isinstance(value, QName) and value.text not in qnames: 894 add_qname(value.text) 895 text = elem.text 896 if isinstance(text, QName) and text.text not in qnames: 897 add_qname(text.text) 898 return qnames, namespaces 899901 tag = elem.tag 902 text = elem.text 903 if tag is Comment: 904 write("<!--%s-->" % _encode(text, encoding)) 905 elif tag is ProcessingInstruction: 906 write("<?%s?>" % _encode(text, encoding)) 907 else: 908 tag = qnames[tag] 909 if tag is None: 910 if text: 911 write(_escape_cdata(text, encoding)) 912 for e in elem: 913 _serialize_xml(write, e, encoding, qnames, None) 914 else: 915 write("<" + tag) 916 items = elem.items() 917 if items or namespaces: 918 if namespaces: 919 for v, k in sorted(namespaces.items(), 920 key=lambda x: x[1]): # sort on prefix 921 if k: 922 k = ":" + k 923 write(" xmlns%s=\"%s\"" % ( 924 k.encode(encoding), 925 _escape_attrib(v, encoding) 926 )) 927 for k, v in sorted(items): # lexical order 928 if isinstance(k, QName): 929 k = k.text 930 if isinstance(v, QName): 931 v = qnames[v.text] 932 else: 933 v = _escape_attrib(v, encoding) 934 write(" %s=\"%s\"" % (qnames[k], v)) 935 if text or len(elem): 936 write(">") 937 if text: 938 write(_escape_cdata(text, encoding)) 939 for e in elem: 940 _serialize_xml(write, e, encoding, qnames, None) 941 write("</" + tag + ">") 942 else: 943 write(" />") 944 if elem.tail: 945 write(_escape_cdata(elem.tail, encoding))946 947 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 948 "img", "input", "isindex", "link", "meta" "param") 949 950 try: 951 HTML_EMPTY = set(HTML_EMPTY) 952 except NameError: 953 pass 954956 tag = elem.tag 957 text = elem.text 958 if tag is Comment: 959 write("<!--%s-->" % _escape_cdata(text, encoding)) 960 elif tag is ProcessingInstruction: 961 write("<?%s?>" % _escape_cdata(text, encoding)) 962 else: 963 tag = qnames[tag] 964 if tag is None: 965 if text: 966 write(_escape_cdata(text, encoding)) 967 for e in elem: 968 _serialize_html(write, e, encoding, qnames, None) 969 else: 970 write("<" + tag) 971 items = elem.items() 972 if items or namespaces: 973 if namespaces: 974 for v, k in sorted(namespaces.items(), 975 key=lambda x: x[1]): # sort on prefix 976 if k: 977 k = ":" + k 978 write(" xmlns%s=\"%s\"" % ( 979 k.encode(encoding), 980 _escape_attrib(v, encoding) 981 )) 982 for k, v in sorted(items): # lexical order 983 if isinstance(k, QName): 984 k = k.text 985 if isinstance(v, QName): 986 v = qnames[v.text] 987 else: 988 v = _escape_attrib_html(v, encoding) 989 # FIXME: handle boolean attributes 990 write(" %s=\"%s\"" % (qnames[k], v)) 991 write(">") 992 tag = tag.lower() 993 if text: 994 if tag == "script" or tag == "style": 995 write(_encode(text, encoding)) 996 else: 997 write(_escape_cdata(text, encoding)) 998 for e in elem: 999 _serialize_html(write, e, encoding, qnames, None) 1000 if tag not in HTML_EMPTY: 1001 write("</" + tag + ">") 1002 if elem.tail: 1003 write(_escape_cdata(elem.tail, encoding))10041006 for part in elem.itertext(): 1007 write(part.encode(encoding)) 1008 if elem.tail: 1009 write(elem.tail.encode(encoding))1010 1011 _serialize = { 1012 "xml": _serialize_xml, 1013 "html": _serialize_html, 1014 "text": _serialize_text, 1015 # this optional method is imported at the end of the module 1016 # "c14n": _serialize_c14n, 1017 } 1018 1019 ## 1020 # Registers a namespace prefix. The registry is global, and any 1021 # existing mapping for either the given prefix or the namespace URI 1022 # will be removed. 1023 # 1024 # @param prefix Namespace prefix. 1025 # @param uri Namespace uri. Tags and attributes in this namespace 1026 # will be serialized with the given prefix, if at all possible. 1027 # @exception ValueError If the prefix is reserved, or is otherwise 1028 # invalid. 10291031 if re.match("ns\d+$", prefix): 1032 raise ValueError("Prefix format reserved for internal use") 1033 for k, v in _namespace_map.items(): 1034 if k == uri or v == prefix: 1035 del _namespace_map[k] 1036 _namespace_map[uri] = prefix1037 1038 _namespace_map = { 1039 # "well-known" namespace prefixes 1040 "http://www.w3.org/XML/1998/namespace": "xml", 1041 "http://www.w3.org/1999/xhtml": "html", 1042 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 1043 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 1044 # xml schema 1045 "http://www.w3.org/2001/XMLSchema": "xs", 1046 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 1047 # dublin core 1048 "http://purl.org/dc/elements/1.1/": "dc", 1049 } 1050 10551057 try: 1058 return text.encode(encoding, "xmlcharrefreplace") 1059 except (TypeError, AttributeError): 1060 _raise_serialization_error(text)10611063 # escape character data 1064 try: 1065 # it's worth avoiding do-nothing calls for strings that are 1066 # shorter than 500 character, or so. assume that's, by far, 1067 # the most common case in most applications. 1068 if "&" in text: 1069 text = text.replace("&", "&") 1070 if "<" in text: 1071 text = text.replace("<", "<") 1072 if ">" in text: 1073 text = text.replace(">", ">") 1074 return text.encode(encoding, "xmlcharrefreplace") 1075 except (TypeError, AttributeError): 1076 _raise_serialization_error(text)10771079 # escape attribute value 1080 try: 1081 if "&" in text: 1082 text = text.replace("&", "&") 1083 if "<" in text: 1084 text = text.replace("<", "<") 1085 if ">" in text: 1086 text = text.replace(">", ">") 1087 if "\"" in text: 1088 text = text.replace("\"", """) 1089 if "\n" in text: 1090 text = text.replace("\n", " ") 1091 return text.encode(encoding, "xmlcharrefreplace") 1092 except (TypeError, AttributeError): 1093 _raise_serialization_error(text)10941096 # escape attribute value 1097 try: 1098 if "&" in text: 1099 text = text.replace("&", "&") 1100 if ">" in text: 1101 text = text.replace(">", ">") 1102 if "\"" in text: 1103 text = text.replace("\"", """) 1104 return text.encode(encoding, "xmlcharrefreplace") 1105 except (TypeError, AttributeError): 1106 _raise_serialization_error(text)1107 1108 # -------------------------------------------------------------------- 1109 1110 ## 1111 # Generates a string representation of an XML element, including all 1112 # subelements. 1113 # 1114 # @param element An Element instance. 1115 # @keyparam encoding Optional output encoding (default is US-ASCII). 1116 # @keyparam method Optional output method ("xml", "html", "text" or 1117 # "c14n"; default is "xml"). 1118 # @return An encoded string containing the XML data. 1119 # @defreturn string 1120 1124 data = [] 1125 file = dummy() 1126 file.write = data.append 1127 ElementTree(element).write(file, encoding, method=method) 1128 return "".join(data) 1129 1130 ## 1131 # Generates a string representation of an XML element, including all 1132 # subelements. The string is returned as a sequence of string fragments. 1133 # 1134 # @param element An Element instance. 1135 # @keyparam encoding Optional output encoding (default is US-ASCII). 1136 # @keyparam method Optional output method ("xml", "html", "text" or 1137 # "c14n"; default is "xml"). 1138 # @return A sequence object containing the XML data. 1139 # @defreturn sequence 1140 # @since 1.3 1141 1145 data = [] 1146 file = dummy() 1147 file.write = data.append 1148 ElementTree(element).write(file, encoding, method=method) 1149 # FIXME: merge small fragments into larger parts 1150 return data 1151 1152 ## 1153 # Writes an element tree or element structure to sys.stdout. This 1154 # function should be used for debugging only. 1155 # <p> 1156 # The exact output format is implementation dependent. In this 1157 # version, it's written as an ordinary XML file. 1158 # 1159 # @param elem An element tree or an individual element. 11601162 # debugging 1163 if not isinstance(elem, ElementTree): 1164 elem = ElementTree(elem) 1165 elem.write(sys.stdout) 1166 tail = elem.getroot().tail 1167 if not tail or tail[-1] != "\n": 1168 sys.stdout.write("\n")1169 1170 # -------------------------------------------------------------------- 1171 # parsing 1172 1173 ## 1174 # Parses an XML document into an element tree. 1175 # 1176 # @param source A filename or file object containing XML data. 1177 # @param parser An optional parser instance. If not given, the 1178 # standard {@link XMLParser} parser is used. 1179 # @return An ElementTree instance 1180 1185 1186 ## 1187 # Parses an XML document into an element tree incrementally, and reports 1188 # what's going on to the user. 1189 # 1190 # @param source A filename or file object containing XML data. 1191 # @param events A list of events to report back. If omitted, only "end" 1192 # events are reported. 1193 # @param parser An optional parser instance. If not given, the 1194 # standard {@link XMLParser} parser is used. 1195 # @return A (event, elem) iterator. 11961198 close_source = False 1199 if not hasattr(source, "read"): 1200 source = open(source, "rb") 1201 close_source = True 1202 if not parser: 1203 parser = XMLParser(target=TreeBuilder()) 1204 return _IterParseIterator(source, events, parser, close_source)120512071234 parser.StartElementHandler = handler 1235 elif event == "end": 1236 def handler(tag, event=event, append=append, 1237 end=self._parser._end): 1238 append((event, end(tag))) 1239 parser.EndElementHandler = handler 1240 elif event == "start-ns": 1241 def handler(prefix, uri, event=event, append=append): 1242 try: 1243 uri = (uri or "").encode("ascii") 1244 except UnicodeError: 1245 pass 1246 append((event, (prefix or "", uri or ""))) 1247 parser.StartNamespaceDeclHandler = handler 1248 elif event == "end-ns": 1249 def handler(prefix, event=event, append=append): 1250 append((event, None)) 1251 parser.EndNamespaceDeclHandler = handler 1252 else: 1253 raise ValueError("unknown event %r" % event) 12541209 self._file = source 1210 self._close_file = close_source 1211 self._events = [] 1212 self._index = 0 1213 self._error = None 1214 self.root = self._root = None 1215 self._parser = parser 1216 # wire up the parser for event reporting 1217 parser = self._parser._parser 1218 append = self._events.append 1219 if events is None: 1220 events = ["end"] 1221 for event in events: 1222 if event == "start": 1223 try: 1224 parser.ordered_attributes = 1 1225 parser.specified_attributes = 1 1226 def handler(tag, attrib_in, event=event, append=append, 1227 start=self._parser._start_list): 1228 append((event, start(tag, attrib_in)))1229 parser.StartElementHandler = handler 1230 except AttributeError: 1231 def handler(tag, attrib_in, event=event, append=append, 1232 start=self._parser._start): 1233 append((event, start(tag, attrib_in)))1256 while 1: 1257 try: 1258 item = self._events[self._index] 1259 self._index += 1 1260 return item 1261 except IndexError: 1262 pass 1263 if self._error: 1264 e = self._error 1265 self._error = None 1266 raise e 1267 if self._parser is None: 1268 self.root = self._root 1269 if self._close_file: 1270 self._file.close() 1271 raise StopIteration 1272 # load event buffer 1273 del self._events[:] 1274 self._index = 0 1275 data = self._file.read(16384) 1276 if data: 1277 try: 1278 self._parser.feed(data) 1279 except SyntaxError as exc: 1280 self._error = exc 1281 else: 1282 self._root = self._parser.close() 1283 self._parser = None1284 1287 1288 ## 1289 # Parses an XML document from a string constant. This function can 1290 # be used to embed "XML literals" in Python code. 1291 # 1292 # @param source A string containing XML data. 1293 # @param parser An optional parser instance. If not given, the 1294 # standard {@link XMLParser} parser is used. 1295 # @return An Element instance. 1296 # @defreturn Element 12971299 if not parser: 1300 parser = XMLParser(target=TreeBuilder()) 1301 parser.feed(text) 1302 return parser.close()1303 1304 ## 1305 # Parses an XML document from a string constant, and also returns 1306 # a dictionary which maps from element id:s to elements. 1307 # 1308 # @param source A string containing XML data. 1309 # @param parser An optional parser instance. If not given, the 1310 # standard {@link XMLParser} parser is used. 1311 # @return A tuple containing an Element instance and a dictionary. 1312 # @defreturn (Element, dictionary) 13131315 if not parser: 1316 parser = XMLParser(target=TreeBuilder()) 1317 parser.feed(text) 1318 tree = parser.close() 1319 ids = {} 1320 for elem in tree.iter(): 1321 id = elem.get("id") 1322 if id: 1323 ids[id] = elem 1324 return tree, ids1325 1326 ## 1327 # Parses an XML document from a string constant. Same as {@link #XML}. 1328 # 1329 # @def fromstring(text) 1330 # @param source A string containing XML data. 1331 # @return An Element instance. 1332 # @defreturn Element 1333 1334 fromstring = XML 1335 1336 ## 1337 # Parses an XML document from a sequence of string fragments. 1338 # 1339 # @param sequence A list or other sequence containing XML data fragments. 1340 # @param parser An optional parser instance. If not given, the 1341 # standard {@link XMLParser} parser is used. 1342 # @return An Element instance. 1343 # @defreturn Element 1344 # @since 1.3 13451347 if not parser: 1348 parser = XMLParser(target=TreeBuilder()) 1349 for text in sequence: 1350 parser.feed(text) 1351 return parser.close()1352 1353 # -------------------------------------------------------------------- 1354 1355 ## 1356 # Generic element structure builder. This builder converts a sequence 1357 # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link 1358 # #TreeBuilder.end} method calls to a well-formed element structure. 1359 # <p> 1360 # You can use this class to build an element structure using a custom XML 1361 # parser, or a parser for some other XML-like format. 1362 # 1363 # @param element_factory Optional element factory. This factory 1364 # is called to create new Element instances, as necessary. 136513671442 1443 ## 1444 # Element structure builder for XML source data, based on the 1445 # <b>expat</b> parser. 1446 # 1447 # @keyparam target Target object. If omitted, the builder uses an 1448 # instance of the standard {@link #TreeBuilder} class. 1449 # @keyparam html Predefine HTML entities. This flag is not supported 1450 # by the current implementation. 1451 # @keyparam encoding Optional encoding. If given, the value overrides 1452 # the encoding specified in the XML file. 1453 # @see #ElementTree 1454 # @see #TreeBuilder 14551369 self._data = [] # data collector 1370 self._elem = [] # element stack 1371 self._last = None # last element 1372 self._tail = None # true if we're after an end tag 1373 if element_factory is None: 1374 element_factory = Element 1375 self._factory = element_factory1376 1377 ## 1378 # Flushes the builder buffers, and returns the toplevel document 1379 # element. 1380 # 1381 # @return An Element instance. 1382 # @defreturn Element 13831385 assert len(self._elem) == 0, "missing end tags" 1386 assert self._last is not None, "missing toplevel element" 1387 return self._last13881390 if self._data: 1391 if self._last is not None: 1392 text = "".join(self._data) 1393 if self._tail: 1394 assert self._last.tail is None, "internal error (tail)" 1395 self._last.tail = text 1396 else: 1397 assert self._last.text is None, "internal error (text)" 1398 self._last.text = text 1399 self._data = []1400 1401 ## 1402 # Adds text to the current element. 1403 # 1404 # @param data A string. This should be either an 8-bit string 1405 # containing ASCII text, or a Unicode string. 1406 1409 1410 ## 1411 # Opens a new element. 1412 # 1413 # @param tag The element name. 1414 # @param attrib A dictionary containing element attributes. 1415 # @return The opened element. 1416 # @defreturn Element 14171419 self._flush() 1420 self._last = elem = self._factory(tag, attrs) 1421 if self._elem: 1422 self._elem[-1].append(elem) 1423 self._elem.append(elem) 1424 self._tail = 0 1425 return elem1426 1427 ## 1428 # Closes the current element. 1429 # 1430 # @param tag The element name. 1431 # @return The closed element. 1432 # @defreturn Element 143314571659 1660 # compatibility 1661 XMLTreeBuilder = XMLParser 1662 1663 # workaround circular import. 1664 try: 1665 from ElementC14N import _serialize_c14n 1666 _serialize["c14n"] = _serialize_c14n 1667 except ImportError: 1668 pass 16691459 try: 1460 from xml.parsers import expat 1461 except ImportError: 1462 try: 1463 import pyexpat as expat 1464 except ImportError: 1465 raise ImportError( 1466 "No module named expat; use SimpleXMLTreeBuilder instead" 1467 ) 1468 parser = expat.ParserCreate(encoding, "}") 1469 if target is None: 1470 target = TreeBuilder() 1471 # underscored names are provided for compatibility only 1472 self.parser = self._parser = parser 1473 self.target = self._target = target 1474 self._error = expat.error 1475 self._names = {} # name memo cache 1476 # callbacks 1477 parser.DefaultHandlerExpand = self._default 1478 parser.StartElementHandler = self._start 1479 parser.EndElementHandler = self._end 1480 parser.CharacterDataHandler = self._data 1481 # optional callbacks 1482 parser.CommentHandler = self._comment 1483 parser.ProcessingInstructionHandler = self._pi 1484 # let expat do the buffering, if supported 1485 try: 1486 self._parser.buffer_text = 1 1487 except AttributeError: 1488 pass 1489 # use new-style attribute handling, if supported 1490 try: 1491 self._parser.ordered_attributes = 1 1492 self._parser.specified_attributes = 1 1493 parser.StartElementHandler = self._start_list 1494 except AttributeError: 1495 pass 1496 self._doctype = None 1497 self.entity = {} 1498 try: 1499 self.version = "Expat %d.%d.%d" % expat.version_info 1500 except AttributeError: 1501 pass # unknown15021504 err = ParseError(value) 1505 err.code = value.code 1506 err.position = value.lineno, value.offset 1507 raise err15081510 # convert text string to ascii, if possible 1511 try: 1512 return text.encode("ascii") 1513 except UnicodeError: 1514 return text15151517 # expand qname, and convert name string to ascii, if possible 1518 try: 1519 name = self._names[key] 1520 except KeyError: 1521 name = key 1522 if "}" in name: 1523 name = "{" + name 1524 self._names[key] = name = self._fixtext(name) 1525 return name15261528 fixname = self._fixname 1529 fixtext = self._fixtext 1530 tag = fixname(tag) 1531 attrib = {} 1532 for key, value in attrib_in.items(): 1533 attrib[fixname(key)] = fixtext(value) 1534 return self.target.start(tag, attrib)15351537 fixname = self._fixname 1538 fixtext = self._fixtext 1539 tag = fixname(tag) 1540 attrib = {} 1541 if attrib_in: 1542 for i in range(0, len(attrib_in), 2): 1543 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) 1544 return self.target.start(tag, attrib)1545 1548 15511553 try: 1554 comment = self.target.comment 1555 except AttributeError: 1556 pass 1557 else: 1558 return comment(self._fixtext(data))15591561 try: 1562 pi = self.target.pi 1563 except AttributeError: 1564 pass 1565 else: 1566 return pi(self._fixtext(target), self._fixtext(data))15671569 prefix = text[:1] 1570 if prefix == "&": 1571 # deal with undefined entities 1572 try: 1573 self.target.data(self.entity[text[1:-1]]) 1574 except KeyError: 1575 from xml.parsers import expat 1576 err = expat.error( 1577 "undefined entity %s: line %d, column %d" % 1578 (text, self._parser.ErrorLineNumber, 1579 self._parser.ErrorColumnNumber) 1580 ) 1581 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 1582 err.lineno = self._parser.ErrorLineNumber 1583 err.offset = self._parser.ErrorColumnNumber 1584 raise err 1585 elif prefix == "<" and text[:9] == "<!DOCTYPE": 1586 self._doctype = [] # inside a doctype declaration 1587 elif self._doctype is not None: 1588 # parse doctype contents 1589 if prefix == ">": 1590 self._doctype = None 1591 return 1592 text = text.strip() 1593 if not text: 1594 return 1595 self._doctype.append(text) 1596 n = len(self._doctype) 1597 if n > 2: 1598 type = self._doctype[1] 1599 if type == "PUBLIC" and n == 4: 1600 name, type, pubid, system = self._doctype 1601 elif type == "SYSTEM" and n == 3: 1602 name, type, system = self._doctype 1603 pubid = None 1604 else: 1605 return 1606 if pubid: 1607 pubid = pubid[1:-1] 1608 if hasattr(self.target, "doctype"): 1609 self.target.doctype(name, pubid, system[1:-1]) 1610 elif self.doctype is not self._XMLParser__doctype: 1611 # warn about deprecated call 1612 self._XMLParser__doctype(name, pubid, system[1:-1]) 1613 self.doctype(name, pubid, system[1:-1]) 1614 self._doctype = None1615 1616 ## 1617 # (Deprecated) Handles a doctype declaration. 1618 # 1619 # @param name Doctype name. 1620 # @param pubid Public identifier. 1621 # @param system System identifier. 16221624 """This method of XMLParser is deprecated.""" 1625 warnings.warn( 1626 "This method of XMLParser is deprecated. Define doctype() " 1627 "method on the TreeBuilder target.", 1628 DeprecationWarning, 1629 )1630 1631 # sentinel, if doctype is redefined in a subclass 1632 __doctype = doctype 1633 1634 ## 1635 # Feeds data to the parser. 1636 # 1637 # @param data Encoded data. 1638 1644 1645 ## 1646 # Finishes feeding data to the parser. 1647 # 1648 # @return An element structure. 1649 # @defreturn Element 1650
Home | Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0.1 on Fri Sep 28 21:21:00 2012 | http://epydoc.sourceforge.net |