1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 import re
50
51 xpath_tokenizer = re.compile(
52 "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
53 ).findall
54
57
58
59
60
62
63
64
65
67 tokens = xpath_tokenizer(path)
68
69 self.path = []
70 self.tag = None
71 if tokens and tokens[0][0] == "/":
72 raise SyntaxError("cannot use absolute path on element")
73 while tokens:
74 op, tag = tokens.pop(0)
75 if tag or op == "*":
76 self.path.append(tag or op)
77 elif op == ".":
78 pass
79 elif op == "/":
80 self.path.append(xpath_descendant_or_self())
81 continue
82 else:
83 raise SyntaxError("unsupported path syntax (%s)" % op)
84 if tokens:
85 op, tag = tokens.pop(0)
86 if op != "/":
87 raise SyntaxError(
88 "expected path separator (%s)" % (op or tag)
89 )
90 if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
91 raise SyntaxError("path cannot end with //")
92 if len(self.path) == 1 and isinstance(self.path[0], type("")):
93 self.tag = self.path[0]
94
95
96
97
98 - def find(self, element):
99 tag = self.tag
100 if tag is None:
101 nodeset = self.findall(element)
102 if not nodeset:
103 return None
104 return nodeset[0]
105 for elem in element:
106 if elem.tag == tag:
107 return elem
108 return None
109
110
111
112
113 - def findtext(self, element, default=None):
114 tag = self.tag
115 if tag is None:
116 nodeset = self.findall(element)
117 if not nodeset:
118 return default
119 return nodeset[0].text or ""
120 for elem in element:
121 if elem.tag == tag:
122 return elem.text or ""
123 return default
124
125
126
127
129 nodeset = [element]
130 index = 0
131 while 1:
132 try:
133 path = self.path[index]
134 index = index + 1
135 except IndexError:
136 return nodeset
137 set = []
138 if isinstance(path, xpath_descendant_or_self):
139 try:
140 tag = self.path[index]
141 if not isinstance(tag, type("")):
142 tag = None
143 else:
144 index = index + 1
145 except IndexError:
146 tag = None
147 for node in nodeset:
148 new = list(node.getiterator(tag))
149 if new and new[0] is node:
150 set.extend(new[1:])
151 else:
152 set.extend(new)
153 else:
154 for node in nodeset:
155 for node in node:
156 if path == "*" or node.tag == path:
157 set.append(node)
158 if not set:
159 return []
160 nodeset = set
161
162 _cache = {}
163
164
165
166
168 p = _cache.get(path)
169 if p is not None:
170 return p
171 p = Path(path)
172 if len(_cache) >= 100:
173 _cache.clear()
174 _cache[path] = p
175 return p
176
177
178
179
180 -def find(element, path):
182
183
184
185
186 -def findtext(element, path, default=None):
187 return _compile(path).findtext(element, default)
188
189
190
191
194