lxml._elementpath

Source Code for Module lxml._elementpath

1 # This file is taken from ElementTree directly, unchanged beyond this line. 2 # 3 # limited xpath support for element trees 4 # 5 # history: 6 # 2003-05-23 fl created 7 # 2003-05-28 fl added support for // etc 8 # 2003-08-27 fl fixed parsing of periods in element names 9 # 10 # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. 11 # 12 # fredrik@pythonware.com 13 # http://www.pythonware.com 14 # 15 # -------------------------------------------------------------------- 16 # The ElementTree toolkit is 17 # 18 # Copyright (c) 1999-2004 by Fredrik Lundh 19 # 20 # By obtaining, using, and/or copying this software and/or its 21 # associated documentation, you agree that you have read, understood, 22 # and will comply with the following terms and conditions: 23 # 24 # Permission to use, copy, modify, and distribute this software and 25 # its associated documentation for any purpose and without fee is 26 # hereby granted, provided that the above copyright notice appears in 27 # all copies, and that both that copyright notice and this permission 28 # notice appear in supporting documentation, and that the name of 29 # Secret Labs AB or the author not be used in advertising or publicity 30 # pertaining to distribution of the software without specific, written 31 # prior permission. 32 # 33 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 34 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 35 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 36 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 37 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 38 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 39 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 40 # OF THIS SOFTWARE. 41 # -------------------------------------------------------------------- 42 43 ## 44 # Implementation module for XPath support. There's usually no reason 45 # to import this module directly; the <b>ElementTree</b> does this for 46 # you, if needed. 47 ## 48 49 import re 50 51 xpath_tokenizer = re.compile( 52 "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" 53 ).findall 54

55 -class xpath_descendant_or_self:

56 pass

57 58 ## 59 # Wrapper for a compiled XPath. 60

61 -class Path:

62 63 ## 64 # Create an Path instance from an XPath expression. 65

66 - def __init__(self, path):

67 tokens = xpath_tokenizer(path) 68 # the current version supports 'path/path'-style expressions only 69 self.path = [] 70 self.tag = None 71 if tokens and tokens[0][0] == "/": 72 raise SyntaxError("cannot use absolute path on element") 73 while tokens: 74 op, tag = tokens.pop(0) 75 if tag or op == "*": 76 self.path.append(tag or op) 77 elif op == ".": 78 pass 79 elif op == "/": 80 self.path.append(xpath_descendant_or_self()) 81 continue 82 else: 83 raise SyntaxError("unsupported path syntax (%s)" % op) 84 if tokens: 85 op, tag = tokens.pop(0) 86 if op != "/": 87 raise SyntaxError( 88 "expected path separator (%s)" % (op or tag) 89 ) 90 if self.path and isinstance(self.path[-1], xpath_descendant_or_self): 91 raise SyntaxError("path cannot end with //") 92 if len(self.path) == 1 and isinstance(self.path[0], type("")): 93 self.tag = self.path[0]

94 95 ## 96 # Find first matching object. 97

98 - def find(self, element):

99 tag = self.tag 100 if tag is None: 101 nodeset = self.findall(element) 102 if not nodeset: 103 return None 104 return nodeset[0] 105 for elem in element: 106 if elem.tag == tag: 107 return elem 108 return None

109 110 ## 111 # Find text for first matching object. 112

113 - def findtext(self, element, default=None):

114 tag = self.tag 115 if tag is None: 116 nodeset = self.findall(element) 117 if not nodeset: 118 return default 119 return nodeset[0].text or "" 120 for elem in element: 121 if elem.tag == tag: 122 return elem.text or "" 123 return default

124 125 ## 126 # Find all matching objects. 127

128 - def findall(self, element):

129 nodeset = [element] 130 index = 0 131 while 1: 132 try: 133 path = self.path[index] 134 index = index + 1 135 except IndexError: 136 return nodeset 137 set = [] 138 if isinstance(path, xpath_descendant_or_self): 139 try: 140 tag = self.path[index] 141 if not isinstance(tag, type("")): 142 tag = None 143 else: 144 index = index + 1 145 except IndexError: 146 tag = None # invalid path 147 for node in nodeset: 148 new = list(node.getiterator(tag)) 149 if new and new[0] is node: 150 set.extend(new[1:]) 151 else: 152 set.extend(new) 153 else: 154 for node in nodeset: 155 for node in node: 156 if path == "*" or node.tag == path: 157 set.append(node) 158 if not set: 159 return [] 160 nodeset = set

161 162 _cache = {} 163 164 ## 165 # (Internal) Compile path. 166

167 -def _compile(path):

168 p = _cache.get(path) 169 if p is not None: 170 return p 171 p = Path(path) 172 if len(_cache) >= 100: 173 _cache.clear() 174 _cache[path] = p 175 return p

176 177 ## 178 # Find first matching object. 179

180 -def find(element, path):

181 return _compile(path).find(element)

182 183 ## 184 # Find text for first matching object. 185

186 -def findtext(element, path, default=None):

187 return _compile(path).findtext(element, default)

188 189 ## 190 # Find all matching objects. 191

192 -def findall(element, path):

193 return _compile(path).findall(element)

194