Home | Trees | Indices | Help |
|
---|
|
1 # 2 # ElementTree 3 # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ 4 # 5 # limited xinclude support for element trees 6 # 7 # history: 8 # 2003-08-15 fl created 9 # 2003-11-14 fl fixed default loader 10 # 11 # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. 12 # 13 # fredrik@pythonware.com 14 # http://www.pythonware.com 15 # 16 # -------------------------------------------------------------------- 17 # The ElementTree toolkit is 18 # 19 # Copyright (c) 1999-2004 by Fredrik Lundh 20 # 21 # By obtaining, using, and/or copying this software and/or its 22 # associated documentation, you agree that you have read, understood, 23 # and will comply with the following terms and conditions: 24 # 25 # Permission to use, copy, modify, and distribute this software and 26 # its associated documentation for any purpose and without fee is 27 # hereby granted, provided that the above copyright notice appears in 28 # all copies, and that both that copyright notice and this permission 29 # notice appear in supporting documentation, and that the name of 30 # Secret Labs AB or the author not be used in advertising or publicity 31 # pertaining to distribution of the software without specific, written 32 # prior permission. 33 # 34 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 35 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 36 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 37 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 38 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 39 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 40 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 41 # OF THIS SOFTWARE. 42 # -------------------------------------------------------------------- 43 44 """ 45 Limited XInclude support for the ElementTree package. 46 47 While lxml.etree has full support for XInclude (see 48 `etree.ElementTree.xinclude()`), this module provides a simpler, pure 49 Python, ElementTree compatible implementation that supports a simple 50 form of custom URL resolvers. 51 """ 52 53 from lxml import etree 54 try: 55 from urlparse import urljoin 56 from urllib2 import urlopen 57 except ImportError: 58 # Python 3 59 from urllib.parse import urljoin 60 from urllib.request import urlopen 61 62 XINCLUDE = "{http://www.w3.org/2001/XInclude}" 63 64 XINCLUDE_INCLUDE = XINCLUDE + "include" 65 XINCLUDE_FALLBACK = XINCLUDE + "fallback" 66 XINCLUDE_ITER_TAG = XINCLUDE + "*" 67 68 # For security reasons, the inclusion depth is limited to this read-only value by default. 69 DEFAULT_MAX_INCLUSION_DEPTH = 6 70 71 72 ## 73 # Fatal include error. 74 77 78 81 82 83 ## 84 # ET compatible default loader. 85 # This loader reads an included resource from disk. 86 # 87 # @param href Resource reference. 88 # @param parse Parse mode. Either "xml" or "text". 89 # @param encoding Optional text encoding. 90 # @return The expanded resource. If the parse mode is "xml", this 91 # is an ElementTree instance. If the parse mode is "text", this 92 # is a Unicode string. If the loader fails, it can return None 93 # or raise an IOError exception. 94 # @throws IOError If the loader fails to load the resource. 9597 file = open(href, 'rb') 98 if parse == "xml": 99 data = etree.parse(file).getroot() 100 else: 101 data = file.read() 102 if not encoding: 103 encoding = 'utf-8' 104 data = data.decode(encoding) 105 file.close() 106 return data107 108 109 ## 110 # Default loader used by lxml.etree - handles custom resolvers properly 111 # 112114 if parse == "xml": 115 data = etree.parse(href, parser).getroot() 116 else: 117 if "://" in href: 118 f = urlopen(href) 119 else: 120 f = open(href, 'rb') 121 data = f.read() 122 f.close() 123 if not encoding: 124 encoding = 'utf-8' 125 data = data.decode(encoding) 126 return data127 128 129 ## 130 # Wrapper for ET compatibility - drops the parser 131 135 return load 136 137 138 ## 139 # Expand XInclude directives. 140 # 141 # @param elem Root element. 142 # @param loader Optional resource loader. If omitted, it defaults 143 # to {@link default_loader}. If given, it should be a callable 144 # that implements the same interface as <b>default_loader</b>. 145 # @param base_url The base URL of the original file, to resolve 146 # relative include file references. 147 # @param max_depth The maximum number of recursive inclusions. 148 # Limited to reduce the risk of malicious content explosion. 149 # Pass None to disable the limitation. 150 # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. 151 # @throws FatalIncludeError If the function fails to include a given 152 # resource, or if the tree contains malformed XInclude elements. 153 # @throws IOError If the function fails to load a given resource. 154 # @returns the node or its replacement if it was an XInclude node 155158 if max_depth is None: 159 max_depth = -1 160 elif max_depth < 0: 161 raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) 162 163 if base_url is None: 164 if hasattr(elem, 'getroot'): 165 tree = elem 166 elem = elem.getroot() 167 else: 168 tree = elem.getroottree() 169 if hasattr(tree, 'docinfo'): 170 base_url = tree.docinfo.URL 171 elif hasattr(elem, 'getroot'): 172 elem = elem.getroot() 173 _include(elem, loader, base_url, max_depth)174 175176 -def _include(elem, loader=None, base_url=None, 177 max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None):178 if loader is not None: 179 load_include = _wrap_et_loader(loader) 180 else: 181 load_include = _lxml_default_loader 182 183 if _parent_hrefs is None: 184 _parent_hrefs = set() 185 186 parser = elem.getroottree().parser 187 188 include_elements = list( 189 elem.iter(XINCLUDE_ITER_TAG)) 190 191 for e in include_elements: 192 if e.tag == XINCLUDE_INCLUDE: 193 # process xinclude directive 194 href = urljoin(base_url, e.get("href")) 195 parse = e.get("parse", "xml") 196 parent = e.getparent() 197 if parse == "xml": 198 if href in _parent_hrefs: 199 raise FatalIncludeError( 200 "recursive include of %r detected" % href 201 ) 202 if max_depth == 0: 203 raise LimitedRecursiveIncludeError( 204 "maximum xinclude depth reached when including file %s" % href) 205 node = load_include(href, parse, parser=parser) 206 if node is None: 207 raise FatalIncludeError( 208 "cannot load %r as %r" % (href, parse) 209 ) 210 node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs) 211 if e.tail: 212 node.tail = (node.tail or "") + e.tail 213 if parent is None: 214 return node # replaced the root node! 215 parent.replace(e, node) 216 elif parse == "text": 217 text = load_include(href, parse, encoding=e.get("encoding")) 218 if text is None: 219 raise FatalIncludeError( 220 "cannot load %r as %r" % (href, parse) 221 ) 222 predecessor = e.getprevious() 223 if predecessor is not None: 224 predecessor.tail = (predecessor.tail or "") + text 225 elif parent is None: 226 return text # replaced the root node! 227 else: 228 parent.text = (parent.text or "") + text + (e.tail or "") 229 parent.remove(e) 230 else: 231 raise FatalIncludeError( 232 "unknown parse type in xi:include tag (%r)" % parse 233 ) 234 elif e.tag == XINCLUDE_FALLBACK: 235 parent = e.getparent() 236 if parent is not None and parent.tag != XINCLUDE_INCLUDE: 237 raise FatalIncludeError( 238 "xi:fallback tag must be child of xi:include (%r)" % e.tag 239 ) 240 else: 241 raise FatalIncludeError( 242 "Invalid element found in XInclude namespace (%r)" % e.tag 243 ) 244 return elem245
Home | Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0.1 on Thu Jul 9 18:29:53 2020 | http://epydoc.sourceforge.net |