Package lxml :: Module _elementpath
[hide private]
[frames] | no frames]

Source Code for Module lxml._elementpath

  1  # 
  2  # ElementTree 
  3  # $Id: ElementPath.py 3276 2007-09-12 06:52:30Z fredrik $ 
  4  # 
  5  # limited xpath support for element trees 
  6  # 
  7  # history: 
  8  # 2003-05-23 fl   created 
  9  # 2003-05-28 fl   added support for // etc 
 10  # 2003-08-27 fl   fixed parsing of periods in element names 
 11  # 2007-09-10 fl   new selection engine 
 12  # 
 13  # Copyright (c) 2003-2007 by Fredrik Lundh.  All rights reserved. 
 14  # 
 15  # fredrik@pythonware.com 
 16  # http://www.pythonware.com 
 17  # 
 18  # -------------------------------------------------------------------- 
 19  # The ElementTree toolkit is 
 20  # 
 21  # Copyright (c) 1999-2007 by Fredrik Lundh 
 22  # 
 23  # By obtaining, using, and/or copying this software and/or its 
 24  # associated documentation, you agree that you have read, understood, 
 25  # and will comply with the following terms and conditions: 
 26  # 
 27  # Permission to use, copy, modify, and distribute this software and 
 28  # its associated documentation for any purpose and without fee is 
 29  # hereby granted, provided that the above copyright notice appears in 
 30  # all copies, and that both that copyright notice and this permission 
 31  # notice appear in supporting documentation, and that the name of 
 32  # Secret Labs AB or the author not be used in advertising or publicity 
 33  # pertaining to distribution of the software without specific, written 
 34  # prior permission. 
 35  # 
 36  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
 37  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
 38  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
 39  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
 40  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
 41  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
 42  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
 43  # OF THIS SOFTWARE. 
 44  # -------------------------------------------------------------------- 
 45   
 46  ## 
 47  # Implementation module for XPath support.  There's usually no reason 
 48  # to import this module directly; the <b>ElementTree</b> does this for 
 49  # you, if needed. 
 50  ## 
 51   
 52  import re 
 53   
 54  xpath_tokenizer = re.compile( 
 55      "(" 
 56      "'[^']*'|\"[^\"]*\"|" 
 57      "::|" 
 58      "//?|" 
 59      "\.\.|" 
 60      "\(\)|" 
 61      "[/.*:\[\]\(\)@=])|" 
 62      "((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|" 
 63      "\s+" 
 64      ).findall 
 65   
66 -def prepare_tag(next, token):
67 tag = token[1] 68 def select(result): 69 for elem in result: 70 for e in elem.iterchildren(tag=tag): 71 yield e
72 return select 73
74 -def prepare_star(next, token):
75 def select(result): 76 for elem in result: 77 for e in elem: 78 yield e
79 return select 80
81 -def prepare_dot(next, token):
82 def select(result): 83 return result
84 return select 85
86 -def prepare_iter(next, token):
87 token = next() 88 if token[0] == "*": 89 tag = "*" 90 elif not token[0]: 91 tag = token[1] 92 else: 93 raise SyntaxError 94 def select(result): 95 for elem in result: 96 for e in elem.iterdescendants(tag=tag): 97 yield e
98 return select 99
100 -def prepare_dot_dot(next, token):
101 def select(result): 102 for elem in result: 103 parent = elem.getparent() 104 if parent is not None: 105 yield parent
106 return select 107
108 -def prepare_predicate(next, token):
109 # this one should probably be refactored... 110 token = next() 111 if token[0] == "@": 112 # attribute 113 token = next() 114 if token[0]: 115 raise SyntaxError("invalid attribute predicate") 116 key = token[1] 117 token = next() 118 if token[0] == "]": 119 def select(result): 120 for elem in result: 121 if elem.get(key) is not None: 122 yield elem
123 elif token[0] == "=": 124 value = next()[0] 125 if value[:1] == "'" or value[:1] == '"': 126 value = value[1:-1] 127 else: 128 raise SyntaxError("invalid comparison target") 129 token = next() 130 def select(result): 131 for elem in result: 132 if elem.get(key) == value: 133 yield elem 134 if token[0] != "]": 135 raise SyntaxError("invalid attribute predicate") 136 elif not token[0]: 137 tag = token[1] 138 token = next() 139 if token[0] != "]": 140 raise SyntaxError("invalid node predicate") 141 def select(result): 142 for elem in result: 143 try: 144 elem.iterdescendants(tag).next() 145 yield elem 146 except StopIteration: 147 pass 148 else: 149 raise SyntaxError("invalid predicate") 150 return select 151 152 ops = { 153 "": prepare_tag, 154 "*": prepare_star, 155 ".": prepare_dot, 156 "..": prepare_dot_dot, 157 "//": prepare_iter, 158 "[": prepare_predicate, 159 } 160 161 _cache = {} 162 163 # -------------------------------------------------------------------- 164
165 -def _build_path_iterator(path):
166 # compile selector pattern 167 try: 168 return _cache[path] 169 except KeyError: 170 pass 171 if len(_cache) > 100: 172 _cache.clear() 173 174 if path[:1] == "/": 175 raise SyntaxError("cannot use absolute path on element") 176 stream = iter(xpath_tokenizer(path)) 177 next = stream.next; token = next() 178 selector = [] 179 while 1: 180 try: 181 selector.append(ops[token[0]](next, token)) 182 except StopIteration: 183 raise SyntaxError("invalid path") 184 try: 185 token = next() 186 if token[0] == "/": 187 token = next() 188 except StopIteration: 189 break 190 return selector
191 192 ## 193 # Iterate over the matching nodes 194
195 -def iterfind(elem, path):
196 # execute selector pattern 197 selector = _build_path_iterator(path) 198 result = iter((elem,)) 199 for select in selector: 200 result = select(result) 201 return result
202 203 ## 204 # Find first matching object. 205
206 -def find(elem, path):
207 try: 208 return iterfind(elem, path).next() 209 except StopIteration: 210 return None
211 212 ## 213 # Find all matching objects. 214
215 -def findall(elem, path):
216 return list(iterfind(elem, path))
217 218 ## 219 # Find text for first matching object. 220
221 -def findtext(elem, path, default=None):
222 try: 223 elem = iterfind(elem, path).next() 224 return elem.text 225 except StopIteration: 226 return default
227