Package lxml :: Module _elementpath
[hide private]
[frames] | no frames]

Source Code for Module lxml._elementpath

  1  # 
  2  # ElementTree 
  3  # $Id: ElementPath.py 3276 2007-09-12 06:52:30Z fredrik $ 
  4  # 
  5  # limited xpath support for element trees 
  6  # 
  7  # history: 
  8  # 2003-05-23 fl   created 
  9  # 2003-05-28 fl   added support for // etc 
 10  # 2003-08-27 fl   fixed parsing of periods in element names 
 11  # 2007-09-10 fl   new selection engine 
 12  # 
 13  # Copyright (c) 2003-2007 by Fredrik Lundh.  All rights reserved. 
 14  # 
 15  # fredrik@pythonware.com 
 16  # http://www.pythonware.com 
 17  # 
 18  # -------------------------------------------------------------------- 
 19  # The ElementTree toolkit is 
 20  # 
 21  # Copyright (c) 1999-2007 by Fredrik Lundh 
 22  # 
 23  # By obtaining, using, and/or copying this software and/or its 
 24  # associated documentation, you agree that you have read, understood, 
 25  # and will comply with the following terms and conditions: 
 26  # 
 27  # Permission to use, copy, modify, and distribute this software and 
 28  # its associated documentation for any purpose and without fee is 
 29  # hereby granted, provided that the above copyright notice appears in 
 30  # all copies, and that both that copyright notice and this permission 
 31  # notice appear in supporting documentation, and that the name of 
 32  # Secret Labs AB or the author not be used in advertising or publicity 
 33  # pertaining to distribution of the software without specific, written 
 34  # prior permission. 
 35  # 
 36  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
 37  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
 38  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
 39  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
 40  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
 41  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
 42  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
 43  # OF THIS SOFTWARE. 
 44  # -------------------------------------------------------------------- 
 45   
 46  ## 
 47  # Implementation module for XPath support.  There's usually no reason 
 48  # to import this module directly; the <b>ElementTree</b> does this for 
 49  # you, if needed. 
 50  ## 
 51   
 52  import re 
 53   
 54  xpath_tokenizer = re.compile( 
 55      "(" 
 56      "'[^']*'|\"[^\"]*\"|" 
 57      "::|" 
 58      "//?|" 
 59      "\.\.|" 
 60      "\(\)|" 
 61      "[/.*:\[\]\(\)@=])|" 
 62      "((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|" 
 63      "\s+" 
 64      ).findall 
 65   
66 -def prepare_tag(next, token):
67 tag = token[1] 68 def select(context, result): 69 for elem in result: 70 for e in elem: 71 if e.tag == tag: 72 yield e
73 return select 74
75 -def prepare_star(next, token):
76 def select(context, result): 77 for elem in result: 78 for e in elem: 79 yield e
80 return select 81
82 -def prepare_dot(next, token):
83 def select(context, result): 84 for elem in result: 85 yield elem
86 return select 87
88 -def prepare_iter(next, token):
89 token = next() 90 if token[0] == "*": 91 tag = "*" 92 elif not token[0]: 93 tag = token[1] 94 else: 95 raise SyntaxError 96 def select(context, result): 97 for elem in result: 98 for e in elem.iter(tag): 99 if e is not elem: 100 yield e
101 return select 102
103 -def prepare_dot_dot(next, token):
104 def select(context, result): 105 parent_map = context.parent_map 106 if parent_map is None: 107 context.parent_map = parent_map = {} 108 for p in context.root.iter(): 109 for e in p: 110 parent_map[e] = p 111 for elem in result: 112 if elem in parent_map: 113 yield parent_map[elem]
114 return select 115
116 -def prepare_predicate(next, token):
117 # this one should probably be refactored... 118 token = next() 119 if token[0] == "@": 120 # attribute 121 token = next() 122 if token[0]: 123 raise SyntaxError("invalid attribute predicate") 124 key = token[1] 125 token = next() 126 if token[0] == "]": 127 def select(context, result): 128 for elem in result: 129 if elem.get(key) is not None: 130 yield elem
131 elif token[0] == "=": 132 value = next()[0] 133 if value[:1] == "'" or value[:1] == '"': 134 value = value[1:-1] 135 else: 136 raise SyntaxError("invalid comparision target") 137 token = next() 138 def select(context, result): 139 for elem in result: 140 if elem.get(key) == value: 141 yield elem 142 if token[0] != "]": 143 raise SyntaxError("invalid attribute predicate") 144 elif not token[0]: 145 tag = token[1] 146 token = next() 147 if token[0] != "]": 148 raise SyntaxError("invalid node predicate") 149 def select(context, result): 150 for elem in result: 151 if elem.find(tag) is not None: 152 yield elem 153 else: 154 raise SyntaxError("invalid predicate") 155 return select 156 157 ops = { 158 "": prepare_tag, 159 "*": prepare_star, 160 ".": prepare_dot, 161 "..": prepare_dot_dot, 162 "//": prepare_iter, 163 "[": prepare_predicate, 164 } 165 166 _cache = {} 167
168 -class _SelectorContext:
169 parent_map = None
170 - def __init__(self, root):
171 self.root = root
172 173 # -------------------------------------------------------------------- 174 175 ## 176 # Find first matching object. 177
178 -def find(elem, path):
179 try: 180 return iterfind(elem, path).next() 181 except StopIteration: 182 return None
183 184 ## 185 # Find all matching objects. 186
187 -def findall(elem, path):
188 return list(iterfind(elem, path))
189
190 -def iterfind(elem, path):
191 # compile selector pattern 192 try: 193 selector = _cache[path] 194 except KeyError: 195 if len(_cache) > 100: 196 _cache.clear() 197 if path[:1] == "/": 198 raise SyntaxError("cannot use absolute path on element") 199 stream = iter(xpath_tokenizer(path)) 200 next = stream.next; token = next() 201 selector = [] 202 while 1: 203 try: 204 selector.append(ops[token[0]](next, token)) 205 except StopIteration: 206 raise SyntaxError("invalid path") 207 try: 208 token = next() 209 if token[0] == "/": 210 token = next() 211 except StopIteration: 212 break 213 _cache[path] = selector 214 # execute selector pattern 215 result = [elem] 216 context = _SelectorContext(elem) 217 for select in selector: 218 result = select(context, result) 219 return result
220 221 ## 222 # Find text for first matching object. 223
224 -def findtext(elem, path, default=None):
225 try: 226 elem = iterfind(elem, path).next() 227 return elem.text 228 except StopIteration: 229 return default
230