Package lxml :: Module ElementInclude
[hide private]
[frames] | no frames]

Source Code for Module lxml.ElementInclude

  1  # 
  2  # ElementTree 
  3  # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ 
  4  # 
  5  # limited xinclude support for element trees 
  6  # 
  7  # history: 
  8  # 2003-08-15 fl   created 
  9  # 2003-11-14 fl   fixed default loader 
 10  # 
 11  # Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved. 
 12  # 
 13  # fredrik@pythonware.com 
 14  # http://www.pythonware.com 
 15  # 
 16  # -------------------------------------------------------------------- 
 17  # The ElementTree toolkit is 
 18  # 
 19  # Copyright (c) 1999-2004 by Fredrik Lundh 
 20  # 
 21  # By obtaining, using, and/or copying this software and/or its 
 22  # associated documentation, you agree that you have read, understood, 
 23  # and will comply with the following terms and conditions: 
 24  # 
 25  # Permission to use, copy, modify, and distribute this software and 
 26  # its associated documentation for any purpose and without fee is 
 27  # hereby granted, provided that the above copyright notice appears in 
 28  # all copies, and that both that copyright notice and this permission 
 29  # notice appear in supporting documentation, and that the name of 
 30  # Secret Labs AB or the author not be used in advertising or publicity 
 31  # pertaining to distribution of the software without specific, written 
 32  # prior permission. 
 33  # 
 34  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
 35  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
 36  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
 37  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
 38  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
 39  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
 40  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
 41  # OF THIS SOFTWARE. 
 42  # -------------------------------------------------------------------- 
 43   
 44  ## 
 45  # Limited XInclude support for the ElementTree package. 
 46  ## 
 47   
 48  import copy, etree 
 49  from urlparse import urljoin 
 50  from urllib2 import urlopen 
 51   
 52  try: 
 53      set 
 54  except NameError: 
 55      from sets import Set as set 
 56   
 57  XINCLUDE = "{http://www.w3.org/2001/XInclude}" 
 58   
 59  XINCLUDE_INCLUDE = XINCLUDE + "include" 
 60  XINCLUDE_FALLBACK = XINCLUDE + "fallback" 
 61   
 62  ## 
 63  # Fatal include error. 
 64   
65 -class FatalIncludeError(etree.LxmlSyntaxError):
66 pass
67 68 ## 69 # ET compatible default loader. 70 # This loader reads an included resource from disk. 71 # 72 # @param href Resource reference. 73 # @param parse Parse mode. Either "xml" or "text". 74 # @param encoding Optional text encoding. 75 # @return The expanded resource. If the parse mode is "xml", this 76 # is an ElementTree instance. If the parse mode is "text", this 77 # is a Unicode string. If the loader fails, it can return None 78 # or raise an IOError exception. 79 # @throws IOError If the loader fails to load the resource. 80
81 -def default_loader(href, parse, encoding=None):
82 file = open(href) 83 if parse == "xml": 84 data = etree.parse(file).getroot() 85 else: 86 data = file.read() 87 if encoding: 88 data = data.decode(encoding) 89 file.close() 90 return data
91 92 ## 93 # Default loader used by lxml.etree - handles custom resolvers properly 94 # 95
96 -def _lxml_default_loader(href, parse, encoding=None, parser=None):
97 if parse == "xml": 98 data = etree.parse(href, parser).getroot() 99 else: 100 if "://" in href: 101 f = urlopen(href) 102 else: 103 f = open(href) 104 data = f.read() 105 f.close() 106 if encoding: 107 data = data.decode(encoding) 108 return data
109 110 ## 111 # Wrapper for ET compatibility - drops the parser 112
113 -def _wrap_et_loader(loader):
114 def load(href, parse, encoding=None, parser=None): 115 return loader(href, parse, encoding)
116 return load 117 118 119 ## 120 # Expand XInclude directives. 121 # 122 # @param elem Root element. 123 # @param loader Optional resource loader. If omitted, it defaults 124 # to {@link default_loader}. If given, it should be a callable 125 # that implements the same interface as <b>default_loader</b>. 126 # @throws FatalIncludeError If the function fails to include a given 127 # resource, or if the tree contains malformed XInclude elements. 128 # @throws IOError If the function fails to load a given resource. 129 # @returns the node or its replacement if it was an XInclude node 130
131 -def include(elem, loader=None, base_url=None):
132 if base_url is None: 133 if hasattr(elem, 'getroot'): 134 tree = elem 135 elem = elem.getroot() 136 else: 137 tree = elem.getroottree() 138 if hasattr(tree, 'docinfo'): 139 base_url = tree.docinfo.URL 140 elif hasattr(elem, 'getroot'): 141 elem = elem.getroot() 142 _include(elem, loader, base_url=base_url)
143
144 -def _include(elem, loader=None, _parent_hrefs=None, base_url=None):
145 if loader is not None: 146 load_include = _wrap_et_loader(loader) 147 else: 148 load_include = _lxml_default_loader 149 150 if _parent_hrefs is None: 151 _parent_hrefs = set() 152 153 parser = elem.getroottree().parser 154 155 include_elements = list( 156 elem.getiterator('{http://www.w3.org/2001/XInclude}*')) 157 158 for e in include_elements: 159 if e.tag == XINCLUDE_INCLUDE: 160 # process xinclude directive 161 href = urljoin(base_url, e.get("href")) 162 parse = e.get("parse", "xml") 163 parent = e.getparent() 164 if parse == "xml": 165 if href in _parent_hrefs: 166 raise FatalIncludeError( 167 "recursive include of %r detected" % href 168 ) 169 _parent_hrefs.add(href) 170 node = load_include(href, parse, parser=parser) 171 if node is None: 172 raise FatalIncludeError( 173 "cannot load %r as %r" % (href, parse) 174 ) 175 node = _include(node, loader, _parent_hrefs) 176 if e.tail: 177 node.tail = (node.tail or "") + e.tail 178 if parent is None: 179 return node # replaced the root node! 180 parent.replace(e, node) 181 elif parse == "text": 182 text = load_include(href, parse, encoding=e.get("encoding")) 183 if text is None: 184 raise FatalIncludeError( 185 "cannot load %r as %r" % (href, parse) 186 ) 187 predecessor = e.getprevious() 188 if predecessor is not None: 189 predecessor.tail = (predecessor.tail or "") + text 190 elif parent is None: 191 return text # replaced the root node! 192 else: 193 parent.text = (parent.text or "") + text + (e.tail or "") 194 parent.remove(e) 195 else: 196 raise FatalIncludeError( 197 "unknown parse type in xi:include tag (%r)" % parse 198 ) 199 elif e.tag == XINCLUDE_FALLBACK: 200 parent = e.getparent() 201 if parent is not None and parent.tag != XINCLUDE_INCLUDE: 202 raise FatalIncludeError( 203 "xi:fallback tag must be child of xi:include (%r)" % e.tag 204 ) 205 else: 206 raise FatalIncludeError( 207 "Invalid element found in XInclude namespace (%r)" % e.tag 208 ) 209 return elem
210