Package lxml :: Module _elementpath
[hide private]
[frames] | no frames]

Source Code for Module lxml._elementpath

  1  # This file is taken from ElementTree directly, unchanged beyond this line. 
  2  # 
  3  # limited xpath support for element trees 
  4  # 
  5  # history: 
  6  # 2003-05-23 fl   created 
  7  # 2003-05-28 fl   added support for // etc 
  8  # 2003-08-27 fl   fixed parsing of periods in element names 
  9  # 
 10  # Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved. 
 11  # 
 12  # fredrik@pythonware.com 
 13  # http://www.pythonware.com 
 14  # 
 15  # -------------------------------------------------------------------- 
 16  # The ElementTree toolkit is 
 17  # 
 18  # Copyright (c) 1999-2004 by Fredrik Lundh 
 19  # 
 20  # By obtaining, using, and/or copying this software and/or its 
 21  # associated documentation, you agree that you have read, understood, 
 22  # and will comply with the following terms and conditions: 
 23  # 
 24  # Permission to use, copy, modify, and distribute this software and 
 25  # its associated documentation for any purpose and without fee is 
 26  # hereby granted, provided that the above copyright notice appears in 
 27  # all copies, and that both that copyright notice and this permission 
 28  # notice appear in supporting documentation, and that the name of 
 29  # Secret Labs AB or the author not be used in advertising or publicity 
 30  # pertaining to distribution of the software without specific, written 
 31  # prior permission. 
 32  # 
 33  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
 34  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
 35  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
 36  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
 37  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
 38  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
 39  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
 40  # OF THIS SOFTWARE. 
 41  # -------------------------------------------------------------------- 
 42   
 43  ## 
 44  # Implementation module for XPath support.  There's usually no reason 
 45  # to import this module directly; the <b>ElementTree</b> does this for 
 46  # you, if needed. 
 47  ## 
 48   
 49  import re 
 50   
 51  xpath_tokenizer = re.compile( 
 52      "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" 
 53      ).findall 
 54   
55 -class xpath_descendant_or_self:
56 pass
57 58 ## 59 # Wrapper for a compiled XPath. 60
61 -class Path:
62 63 ## 64 # Create an Path instance from an XPath expression. 65
66 - def __init__(self, path):
67 tokens = xpath_tokenizer(path) 68 # the current version supports 'path/path'-style expressions only 69 self.path = [] 70 self.tag = None 71 if tokens and tokens[0][0] == "/": 72 raise SyntaxError("cannot use absolute path on element") 73 while tokens: 74 op, tag = tokens.pop(0) 75 if tag or op == "*": 76 self.path.append(tag or op) 77 elif op == ".": 78 pass 79 elif op == "/": 80 self.path.append(xpath_descendant_or_self()) 81 continue 82 else: 83 raise SyntaxError("unsupported path syntax (%s)" % op) 84 if tokens: 85 op, tag = tokens.pop(0) 86 if op != "/": 87 raise SyntaxError( 88 "expected path separator (%s)" % (op or tag) 89 ) 90 if self.path and isinstance(self.path[-1], xpath_descendant_or_self): 91 raise SyntaxError("path cannot end with //") 92 if len(self.path) == 1 and isinstance(self.path[0], type("")): 93 self.tag = self.path[0]
94 95 ## 96 # Find first matching object. 97
98 - def find(self, element):
99 tag = self.tag 100 if tag is None: 101 nodeset = self.findall(element) 102 if not nodeset: 103 return None 104 return nodeset[0] 105 for elem in element: 106 if elem.tag == tag: 107 return elem 108 return None
109 110 ## 111 # Find text for first matching object. 112
113 - def findtext(self, element, default=None):
114 tag = self.tag 115 if tag is None: 116 nodeset = self.findall(element) 117 if not nodeset: 118 return default 119 return nodeset[0].text or "" 120 for elem in element: 121 if elem.tag == tag: 122 return elem.text or "" 123 return default
124 125 ## 126 # Find all matching objects. 127
128 - def findall(self, element):
129 nodeset = [element] 130 index = 0 131 while 1: 132 try: 133 path = self.path[index] 134 index = index + 1 135 except IndexError: 136 return nodeset 137 set = [] 138 if isinstance(path, xpath_descendant_or_self): 139 try: 140 tag = self.path[index] 141 if not isinstance(tag, type("")): 142 tag = None 143 else: 144 index = index + 1 145 except IndexError: 146 tag = None # invalid path 147 for node in nodeset: 148 new = list(node.getiterator(tag)) 149 if new and new[0] is node: 150 set.extend(new[1:]) 151 else: 152 set.extend(new) 153 else: 154 for node in nodeset: 155 for node in node: 156 if path == "*" or node.tag == path: 157 set.append(node) 158 if not set: 159 return [] 160 nodeset = set
161 162 _cache = {} 163 164 ## 165 # (Internal) Compile path. 166
167 -def _compile(path):
168 p = _cache.get(path) 169 if p is not None: 170 return p 171 p = Path(path) 172 if len(_cache) >= 100: 173 _cache.clear() 174 _cache[path] = p 175 return p
176 177 ## 178 # Find first matching object. 179
180 -def find(element, path):
181 return _compile(path).find(element)
182 183 ## 184 # Find text for first matching object. 185
186 -def findtext(element, path, default=None):
187 return _compile(path).findtext(element, default)
188 189 ## 190 # Find all matching objects. 191
192 -def findall(element, path):
193 return _compile(path).findall(element)
194