Package lxml :: Module cssselect
[hide private]
[frames] | no frames]

Source Code for Module lxml.cssselect

  1  import re 
  2  from lxml import etree 
  3   
  4  __all__ = ['SelectorSyntaxError', 'ExpressionError', 
  5             'CSSSelector'] 
  6   
7 -class SelectorSyntaxError(SyntaxError):
8 pass
9
10 -class ExpressionError(RuntimeError):
11 pass
12
13 -class CSSSelector(etree.XPath):
14
15 - def __init__(self, css):
16 path = css_to_xpath(css) 17 etree.XPath.__init__(self, path) 18 self.css = css
19
20 - def __repr__(self):
21 return '<%s %s for %r>' % ( 22 self.__class__.__name__, 23 hex(abs(id(self)))[2:], 24 self.css)
25 26 ############################## 27 ## Token objects: 28
29 -class _UniToken(unicode):
30 - def __new__(cls, contents, pos):
31 obj = unicode.__new__(cls, contents) 32 obj.pos = pos 33 return obj
34
35 - def __repr__(self):
36 return '%s(%s, %r)' % ( 37 self.__class__.__name__, 38 unicode.__repr__(self), 39 self.pos)
40
41 -class Symbol(_UniToken):
42 pass
43
44 -class String(_UniToken):
45 pass
46
47 -class Token(_UniToken):
48 pass
49 50 ############################################################ 51 ## Parsing 52 ############################################################ 53 54 ############################## 55 ## Syntax objects: 56
57 -class Class(object):
58 """ 59 Represents selector.class_name 60 """ 61
62 - def __init__(self, selector, class_name):
63 self.selector = selector 64 self.class_name = class_name
65
66 - def __repr__(self):
67 return '%s[%r.%s]' % ( 68 self.__class__.__name__, 69 self.selector, 70 self.class_name)
71
72 - def xpath(self):
73 sel_xpath = self.selector.xpath() 74 sel_xpath.add_condition( 75 "contains(concat(' ', normalize-space(@class), ' '), %s)" % xpath_repr(' '+self.class_name+' ')) 76 return sel_xpath
77
78 -class Function(object):
79 """ 80 Represents selector:name(expr) 81 """ 82 83 unsupported = [ 84 'target', 'lang', 'enabled', 'disabled',] 85
86 - def __init__(self, selector, type, name, expr):
87 self.selector = selector 88 self.type = type 89 self.name = name 90 self.expr = expr
91
92 - def __repr__(self):
93 return '%s[%r%s%s(%r)]' % ( 94 self.__class__.__name__, 95 self.selector, 96 self.type, self.name, self.expr)
97
98 - def xpath(self):
99 sel_path = self.selector.xpath() 100 if self.name in self.unsupported: 101 raise ExpressionError( 102 "The psuedo-class %r is not supported" % self.name) 103 method = '_xpath_' + self.name.replace('-', '_') 104 if not hasattr(self, method): 105 raise ExpressionError( 106 "The psuedo-class %r is unknown" % self.name) 107 method = getattr(self, method) 108 return method(sel_path, self.expr)
109
110 - def _xpath_nth_child(self, xpath, expr, last=False, 111 add_name_test=True):
112 a, b = parse_series(expr) 113 if not a and not b and not last: 114 # a=0 means nothing is returned... 115 xpath.add_condition('false() and position() = 0') 116 return xpath 117 if add_name_test: 118 xpath.add_name_test() 119 xpath.add_star_prefix() 120 if a == 0: 121 if last: 122 b = 'last() - %s' % b 123 xpath.add_condition('position() = %s' % b) 124 return xpath 125 if last: 126 # FIXME: I'm not sure if this is right 127 a = -a 128 b = -b 129 if b > 0: 130 b_neg = str(-b) 131 else: 132 b_neg = '+%s' % (-b) 133 if a != 1: 134 expr = ['(position() %s) mod %s = 0' % (b_neg, a)] 135 else: 136 expr = [] 137 if b >= 0: 138 expr.append('position() >= %s' % b) 139 elif b < 0 and last: 140 expr.append('position() < (last() %s)' % b) 141 expr = ' and '.join(expr) 142 if expr: 143 xpath.add_condition(expr) 144 return xpath
145 # FIXME: handle an+b, odd, even 146 # an+b means every-a, plus b, e.g., 2n+1 means odd 147 # 0n+b means b 148 # n+0 means a=1, i.e., all elements 149 # an means every a elements, i.e., 2n means even 150 # -n means -1n 151 # -1n+6 means elements 6 and previous 152
153 - def _xpath_nth_last_child(self, xpath, expr):
154 return self._xpath_nth_child(xpath, expr, last=True)
155
156 - def _xpath_nth_of_type(self, xpath, expr):
157 if xpath.element == '*': 158 raise NotImplementedError( 159 "*:nth-of-type() is not implemented") 160 return self._xpath_nth_child(xpath, expr, add_name_test=False)
161
162 - def _xpath_nth_last_of_type(self, xpath, expr):
163 return self._xpath_nth_child(xpath, expr, last=True, add_name_test=False)
164
165 - def _xpath_contains(self, xpath, expr):
166 # text content, minus tags, must contain expr 167 if isinstance(expr, Element): 168 expr = expr._format_element() 169 xpath.add_condition('contains(css:lower-case(string(.)), %s)' 170 % xpath_repr(expr.lower())) 171 # FIXME: Currently case insensitive matching doesn't seem to be happening 172 return xpath
173
174 - def _xpath_not(self, xpath, expr):
175 # everything for which not expr applies 176 expr = expr.xpath() 177 cond = expr.condition 178 # FIXME: should I do something about element_path? 179 xpath.add_condition('not(%s)' % cond) 180 return xpath
181
182 -def _make_lower_case(context, s):
183 return s.lower()
184 185 ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/') 186 ns.prefix = 'css' 187 ns['lower-case'] = _make_lower_case 188
189 -class Pseudo(object):
190 """ 191 Represents selector:ident 192 """ 193 194 unsupported = ['indeterminate', 'first-line', 'first-letter', 195 'selection', 'before', 'after', 'link', 'visited', 196 'active', 'focus', 'hover'] 197
198 - def __init__(self, element, type, ident):
199 self.element = element 200 assert type in (':', '::') 201 self.type = type 202 self.ident = ident
203
204 - def __repr__(self):
205 return '%s[%r%s%s]' % ( 206 self.__class__.__name__, 207 self.element, 208 self.type, self.ident)
209
210 - def xpath(self):
211 el_xpath = self.element.xpath() 212 if self.ident in self.unsupported: 213 raise ExpressionError( 214 "The psuedo-class %r is unsupported" % self.ident) 215 method = '_xpath_' + self.ident.replace('-', '_') 216 if not hasattr(self, method): 217 raise ExpressionError( 218 "The psuedo-class %r is unknown" % self.ident) 219 method = getattr(self, method) 220 el_xpath = method(el_xpath) 221 return el_xpath
222
223 - def _xpath_checked(self, xpath):
224 # FIXME: is this really all the elements? 225 xpath.add_condition("(@selected or @checked) and (name(.) = 'input' or name(.) = 'option')") 226 return xpath
227
228 - def _xpath_root(self, xpath):
229 # if this element is the root element 230 raise NotImplementedError
231
232 - def _xpath_first_child(self, xpath):
233 xpath.add_star_prefix() 234 xpath.add_name_test() 235 xpath.add_condition('position() = 1') 236 return xpath
237
238 - def _xpath_last_child(self, xpath):
239 xpath.add_star_prefix() 240 xpath.add_name_test() 241 xpath.add_condition('position() = last()') 242 return xpath
243
244 - def _xpath_first_of_type(self, xpath):
245 if xpath.element == '*': 246 raise NotImplementedError( 247 "*:first-of-type is not implemented") 248 xpath.add_star_prefix() 249 xpath.add_condition('position() = 1') 250 return xpath
251
252 - def _xpath_last_of_type(self, xpath):
253 if xpath.element == '*': 254 raise NotImplementedError( 255 "*:last-of-type is not implemented") 256 xpath.add_star_prefix() 257 xpath.add_condition('position() = last()') 258 return xpath
259
260 - def _xpath_only_child(self, xpath):
261 xpath.add_name_test() 262 xpath.add_star_prefix() 263 xpath.add_condition('last() = 1') 264 return xpath
265
266 - def _xpath_only_of_type(self, xpath):
267 if xpath.element == '*': 268 raise NotImplementedError( 269 "*:only-of-type is not implemented") 270 xpath.add_condition('last() = 1') 271 return xpath
272
273 - def _xpath_empty(self, xpath):
274 xpath.add_condition("not(*) and not(normalize-space())") 275 return xpath
276
277 -class Attrib(object):
278 """ 279 Represents selector[namespace|attrib operator value] 280 """ 281
282 - def __init__(self, selector, namespace, attrib, operator, value):
283 self.selector = selector 284 self.namespace = namespace 285 self.attrib = attrib 286 self.operator = operator 287 self.value = value
288
289 - def __repr__(self):
290 if self.operator == 'exists': 291 return '%s[%r[%s]]' % ( 292 self.__class__.__name__, 293 self.selector, 294 self._format_attrib()) 295 else: 296 return '%s[%r[%s %s %r]]' % ( 297 self.__class__.__name__, 298 self.selector, 299 self._format_attrib(), 300 self.operator, 301 self.value)
302
303 - def _format_attrib(self):
304 if self.namespace == '*': 305 return self.attrib 306 else: 307 return '%s|%s' % (self.namespace, self.attrib)
308
309 - def _xpath_attrib(self):
310 # FIXME: if attrib is *? 311 if self.namespace == '*': 312 return '@' + self.attrib 313 else: 314 return '@%s:%s' % (self.namespace, self.attrib)
315
316 - def xpath(self):
317 path = self.selector.xpath() 318 attrib = self._xpath_attrib() 319 value = self.value 320 if self.operator == 'exists': 321 assert not value 322 path.add_condition(attrib) 323 elif self.operator == '=': 324 path.add_condition('%s = %s' % (attrib, 325 xpath_repr(value))) 326 elif self.operator == '!=': 327 # FIXME: this seems like a weird hack... 328 if value: 329 path.add_condition('not(%s) or %s != %s' 330 % (attrib, attrib, xpath_repr(value))) 331 else: 332 path.add_condition('%s != %s' 333 % (attrib, xpath_repr(value))) 334 #path.add_condition('%s != %s' % (attrib, xpath_repr(value))) 335 elif self.operator == '~=': 336 path.add_condition("contains(concat(' ', normalize-space(%s), ' '), %s)" % (attrib, xpath_repr(' '+value+' '))) 337 elif self.operator == '|=': 338 # Weird, but true... 339 path.add_condition('%s = %s or starts-with(%s, %s)' % ( 340 attrib, xpath_repr(value), 341 attrib, xpath_repr(value + '-'))) 342 elif self.operator == '^=': 343 path.add_condition('starts-with(%s, %s)' % ( 344 attrib, xpath_repr(value))) 345 elif self.operator == '$=': 346 # Oddly there is a starts-with in XPath 1.0, but not ends-with 347 path.add_condition('substring(%s, string-length(%s)-%s) = %s' 348 % (attrib, attrib, len(value)-1, xpath_repr(value))) 349 elif self.operator == '*=': 350 # FIXME: case sensitive? 351 path.add_condition('contains(%s, %s)' % ( 352 attrib, xpath_repr(value))) 353 else: 354 assert 0, ("Unknown operator: %r" % self.operator) 355 return path
356
357 -class Element(object):
358 """ 359 Represents namespace|element 360 """ 361
362 - def __init__(self, namespace, element):
363 self.namespace = namespace 364 self.element = element
365
366 - def __repr__(self):
367 return '%s[%s]' % ( 368 self.__class__.__name__, 369 self._format_element())
370
371 - def _format_element(self):
372 if self.namespace == '*': 373 return self.element 374 else: 375 return '%s|%s' % (self.namespace, self.element)
376
377 - def xpath(self):
378 if self.namespace == '*': 379 el = self.element.lower() 380 else: 381 # FIXME: Should we lowercase here? 382 el = '%s:%s' % (self.namespace, self.element) 383 return XPathExpr(element=el)
384
385 -class Hash(object):
386 """ 387 Represents selector#id 388 """ 389
390 - def __init__(self, selector, id):
391 self.selector = selector 392 self.id = id
393
394 - def __repr__(self):
395 return '%s[%r#%s]' % ( 396 self.__class__.__name__, 397 self.selector, self.id)
398
399 - def xpath(self):
400 path = self.selector.xpath() 401 path.add_condition('@id = %s' % xpath_repr(self.id)) 402 return path
403
404 -class Or(object):
405
406 - def __init__(self, items):
407 self.items = items
408 - def __repr__(self):
409 return '%s(%r)' % ( 410 self.__class__.__name__, 411 self.items)
412
413 - def xpath(self):
414 paths = [item.xpath() for item in self.items] 415 return XPathExprOr(paths)
416
417 -class CombinedSelector(object):
418 419 _method_mapping = { 420 ' ': 'descendant', 421 '>': 'child', 422 '+': 'direct_adjacent', 423 '~': 'indirect_adjacent', 424 } 425
426 - def __init__(self, selector, combinator, subselector):
427 assert selector is not None 428 self.selector = selector 429 self.combinator = combinator 430 self.subselector = subselector
431
432 - def __repr__(self):
433 if self.combinator == ' ': 434 comb = '<followed>' 435 else: 436 comb = self.combinator 437 return '%s[%r %s %r]' % ( 438 self.__class__.__name__, 439 self.selector, 440 comb, 441 self.subselector)
442
443 - def xpath(self):
444 if self.combinator not in self._method_mapping: 445 raise ExpressionError( 446 "Unknown combinator: %r" % self.combinator) 447 method = '_xpath_' + self._method_mapping[self.combinator] 448 method = getattr(self, method) 449 path = self.selector.xpath() 450 return method(path, self.subselector)
451
452 - def _xpath_descendant(self, xpath, sub):
453 # when sub is a descendant in any way of xpath 454 xpath.join('/descendant::', sub.xpath()) 455 return xpath
456
457 - def _xpath_child(self, xpath, sub):
458 # when sub is an immediate child of xpath 459 xpath.join('/', sub.xpath()) 460 return xpath
461
462 - def _xpath_direct_adjacent(self, xpath, sub):
463 # when sub immediately follows xpath 464 xpath.join('/following-sibling::', sub.xpath()) 465 xpath.add_name_test() 466 xpath.add_condition('position() = 1') 467 return xpath
468
469 - def _xpath_indirect_adjacent(self, xpath, sub):
470 # when sub comes somewhere after xpath as a sibling 471 xpath.join('/following-sibling::', sub.xpath()) 472 return xpath
473 474 ############################## 475 ## XPathExpr objects: 476 477 _el_re = re.compile(r'^\w+\s*$') 478 _id_re = re.compile(r'^(\w*)#(\w+)\s*$') 479 _class_re = re.compile(r'^(\w*)\.(\w+)\s*$') 480
481 -def css_to_xpath(css_expr, prefix='descendant-or-self::'):
482 if isinstance(css_expr, basestring): 483 match = _el_re.search(css_expr) 484 if match is not None: 485 return '%s%s' % (prefix, match.group(0).strip()) 486 match = _id_re.search(css_expr) 487 if match is not None: 488 return "%s%s[@id = '%s']" % ( 489 prefix, match.group(1) or '*', match.group(2)) 490 match = _class_re.search(css_expr) 491 if match is not None: 492 return "%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]" % ( 493 prefix, match.group(1) or '*', match.group(2)) 494 css_expr = parse(css_expr) 495 expr = css_expr.xpath() 496 assert expr is not None, ( 497 "Got None for xpath expression from %s" % repr(css_expr)) 498 if prefix: 499 expr.add_prefix(prefix) 500 return str(expr)
501
502 -class XPathExpr(object):
503
504 - def __init__(self, prefix=None, path=None, element='*', condition=None, 505 star_prefix=False):
506 self.prefix = prefix 507 self.path = path 508 self.element = element 509 self.condition = condition 510 self.star_prefix = star_prefix
511
512 - def __str__(self):
513 path = '' 514 if self.prefix is not None: 515 path += str(self.prefix) 516 if self.path is not None: 517 path += str(self.path) 518 path += str(self.element) 519 if self.condition: 520 path += '[%s]' % self.condition 521 return path
522
523 - def __repr__(self):
524 return '%s[%s]' % ( 525 self.__class__.__name__, self)
526
527 - def add_condition(self, condition):
528 if self.condition: 529 self.condition = '%s and (%s)' % (self.condition, condition) 530 else: 531 self.condition = condition
532
533 - def add_path(self, part):
534 if self.path is None: 535 self.path = self.element 536 else: 537 self.path += self.element 538 self.element = part
539
540 - def add_prefix(self, prefix):
541 if self.prefix: 542 self.prefix = prefix + self.prefix 543 else: 544 self.prefix = prefix
545
546 - def add_name_test(self):
547 if self.element == '*': 548 # We weren't doing a test anyway 549 return 550 self.add_condition("name() = %s" % xpath_repr(self.element)) 551 self.element = '*'
552
553 - def add_star_prefix(self):
554 """ 555 Adds a /* prefix if there is no prefix. This is when you need 556 to keep context's constrained to a single parent. 557 """ 558 if self.path: 559 self.path += '*/' 560 else: 561 self.path = '*/' 562 self.star_prefix = True
563
564 - def join(self, combiner, other):
565 prefix = str(self) 566 prefix += combiner 567 path = (other.prefix or '') + (other.path or '') 568 # We don't need a star prefix if we are joining to this other 569 # prefix; so we'll get rid of it 570 if other.star_prefix and path == '*/': 571 path = '' 572 self.prefix = prefix 573 self.path = path 574 self.element = other.element 575 self.condition = other.condition
576
577 -class XPathExprOr(XPathExpr):
578 579 """ 580 Represents on |'d expressions. Note that unfortunately it isn't 581 the union, it's the sum, so duplicate elements will appear. 582 """ 583
584 - def __init__(self, items, prefix=None):
585 for item in items: 586 assert item is not None 587 self.items = items 588 self.prefix = prefix
589
590 - def __str__(self):
591 prefix = self.prefix or '' 592 return ' | '.join([prefix + str(i) for i in self.items])
593
594 -def xpath_repr(s):
595 # FIXME: I don't think this is right, but lacking any reasonable 596 # specification on what XPath literals look like (which doesn't seem 597 # to be in the XPath specification) it is hard to do 'right' 598 if isinstance(s, Element): 599 # This is probably a symbol that looks like an expression... 600 s = s._format_element() 601 return repr(str(s))
602 603 ############################## 604 ## Parsing functions 605
606 -def parse(string):
607 stream = TokenStream(tokenize(string)) 608 stream.source = string 609 try: 610 return parse_selector_group(stream) 611 except SelectorSyntaxError, e: 612 e.args = tuple(["%s at %s -> %s" % ( 613 e, stream.used, list(stream))]) 614 raise
615
616 -def parse_selector_group(stream):
617 result = [] 618 while 1: 619 result.append(parse_selector(stream)) 620 if stream.peek() == ',': 621 stream.next() 622 else: 623 break 624 if len(result) == 1: 625 return result[0] 626 else: 627 return Or(result)
628
629 -def parse_selector(stream):
630 result = parse_simple_selector(stream) 631 while 1: 632 peek = stream.peek() 633 if peek == ',' or peek == ')' or peek is None: 634 return result 635 if stream.peek() in ('+', '>', '~'): 636 # A combinator 637 combinator = stream.next() 638 else: 639 combinator = ' ' 640 next_selector = parse_simple_selector(stream) 641 result = CombinedSelector(result, combinator, next_selector) 642 return result
643
644 -def parse_simple_selector(stream):
645 peek = stream.peek() 646 if peek != '*' and not isinstance(peek, Symbol): 647 element = namespace = '*' 648 else: 649 next = stream.next() 650 if next != '*' and not isinstance(next, Symbol): 651 raise SelectorSyntaxError( 652 "Expected symbol, got %r" % next) 653 if stream.peek() == '|': 654 namespace = next 655 stream.next() 656 element = stream.next() 657 if element != '*' and not isinstance(next, Symbol): 658 raise SelectorSyntaxError( 659 "Expected symbol, got %r" % next) 660 else: 661 namespace = '*' 662 element = next 663 result = Element(namespace, element) 664 has_hash = False 665 while 1: 666 peek = stream.peek() 667 if peek == '#': 668 if has_hash: 669 # You can't have two hashes 670 # (FIXME: is there some more general rule I'm missing?) 671 break 672 stream.next() 673 result = Hash(result, stream.next()) 674 has_hash = True 675 continue 676 elif peek == '.': 677 stream.next() 678 result = Class(result, stream.next()) 679 continue 680 elif peek == '[': 681 stream.next() 682 result = parse_attrib(result, stream) 683 next = stream.next() 684 if not next == ']': 685 raise SelectorSyntaxError( 686 "] expected, got %r" % next) 687 continue 688 elif peek == ':' or peek == '::': 689 type = stream.next() 690 ident = stream.next() 691 if not isinstance(ident, Symbol): 692 raise SelectorSyntaxError( 693 "Expected symbol, got %r" % ident) 694 if stream.peek() == '(': 695 stream.next() 696 peek = stream.peek() 697 if isinstance(peek, String): 698 selector = stream.next() 699 elif isinstance(peek, Symbol) and is_int(peek): 700 selector = int(stream.next()) 701 else: 702 # FIXME: parse_simple_selector, or selector, or...? 703 selector = parse_simple_selector(stream) 704 next = stream.next() 705 if not next == ')': 706 raise SelectorSyntaxError( 707 "Expected ), got %r and %r" 708 % (next, selector)) 709 result = Function(result, type, ident, selector) 710 else: 711 result = Pseudo(result, type, ident) 712 continue 713 else: 714 break 715 # FIXME: not sure what "negation" is 716 return result
717
718 -def is_int(v):
719 try: 720 int(v) 721 except ValueError: 722 return False 723 else: 724 return True
725
726 -def parse_attrib(selector, stream):
727 attrib = stream.next() 728 if stream.peek() == '|': 729 namespace = attrib 730 stream.next() 731 attrib = stream.next() 732 else: 733 namespace = '*' 734 if stream.peek() == ']': 735 return Attrib(selector, namespace, attrib, 'exists', None) 736 op = stream.next() 737 if not op in ('^=', '$=', '*=', '=', '~=', '|=', '!='): 738 raise SelectorSyntaxError( 739 "Operator expected, got %r" % op) 740 value = stream.next() 741 if not isinstance(value, (Symbol, String)): 742 raise SelectorSyntaxError( 743 "Expected string or symbol, got %r" % value) 744 return Attrib(selector, namespace, attrib, op, value)
745
746 -def parse_series(s):
747 """ 748 Parses things like '1n+2', or 'an+b' generally, returning (a, b) 749 """ 750 if isinstance(s, Element): 751 s = s._format_element() 752 if not s or s == '*': 753 # Happens when there's nothing, which the CSS parser thinks of as * 754 return (0, 0) 755 if isinstance(s, int): 756 # Happens when you just get a number 757 return (0, s) 758 if s == 'odd': 759 return (2, 1) 760 elif s == 'even': 761 return (2, 0) 762 elif s == 'n': 763 return (1, 0) 764 if 'n' not in s: 765 # Just a b 766 return (0, int(s)) 767 a, b = s.split('n', 1) 768 if not a: 769 a = 1 770 elif a == '-' or a == '+': 771 a = int(a+'1') 772 else: 773 a = int(a) 774 if not b: 775 b = 0 776 elif b == '-' or b == '+': 777 b = int(b+'1') 778 else: 779 b = int(b) 780 return (a, b)
781 782 783 ############################################################ 784 ## Tokenizing 785 ############################################################ 786 787 _whitespace_re = re.compile(r'\s+') 788 789 _comment_re = re.compile(r'/\*.*?\*/', re.S) 790 791 _count_re = re.compile(r'[+-]?\d*n(?:[+-]\d+)?') 792
793 -def tokenize(s):
794 pos = 0 795 s = _comment_re.sub('', s) 796 while 1: 797 match = _whitespace_re.match(s, pos=pos) 798 if match: 799 pos = match.end() 800 if pos >= len(s): 801 return 802 match = _count_re.match(s, pos=pos) 803 if match and match.group() != 'n': 804 sym = s[pos:match.end()] 805 yield Symbol(sym, pos) 806 pos = match.end() 807 continue 808 c = s[pos] 809 c2 = s[pos:pos+2] 810 if c2 in ('~=', '|=', '^=', '$=', '*=', '::', '!='): 811 yield Token(c2, pos) 812 pos += 2 813 continue 814 if c in '>+~,.*=[]()|:#': 815 yield Token(c, pos) 816 pos += 1 817 continue 818 if c == '"' or c == "'": 819 # Quoted string 820 old_pos = pos 821 sym, pos = tokenize_escaped_string(s, pos) 822 yield String(sym, old_pos) 823 continue 824 old_pos = pos 825 sym, pos = tokenize_symbol(s, pos) 826 yield Symbol(sym, old_pos) 827 continue
828
829 -def tokenize_escaped_string(s, pos):
830 quote = s[pos] 831 assert quote in ('"', "'") 832 pos = pos+1 833 start = pos 834 while 1: 835 next = s.find(quote, pos) 836 if next == -1: 837 raise SelectorSyntaxError( 838 "Expected closing %s for string in: %r" 839 % (quote, s[start:])) 840 result = s[start:next] 841 try: 842 result = result.decode('unicode_escape') 843 except UnicodeDecodeError: 844 # Probably a hanging \ 845 pos = next+1 846 else: 847 return result, next+1
848 849 _illegal_symbol = re.compile(r'[^\w\\-]', re.UNICODE) 850
851 -def tokenize_symbol(s, pos):
852 start = pos 853 match = _illegal_symbol.search(s, pos=pos) 854 if not match: 855 # Goes to end of s 856 return s[start:], len(s) 857 if match.start() == pos: 858 assert 0, ( 859 "Unexpected symbol: %r at %s" % (s[pos], pos)) 860 if not match: 861 result = s[start:] 862 pos = len(s) 863 else: 864 result = s[start:match.start()] 865 pos = match.start() 866 try: 867 result = result.decode('unicode_escape') 868 except UnicodeDecodeError, e: 869 raise SelectorSyntaxError( 870 "Bad symbol %r: %s" % (result, e)) 871 return result, pos
872
873 -class TokenStream(object):
874
875 - def __init__(self, tokens, source=None):
876 self.used = [] 877 self.tokens = iter(tokens) 878 self.source = source 879 self.peeked = None 880 self._peeking = False
881
882 - def next(self):
883 if self._peeking: 884 self._peeking = False 885 self.used.append(self.peeked) 886 return self.peeked 887 else: 888 try: 889 next = self.tokens.next() 890 self.used.append(next) 891 return next 892 except StopIteration: 893 return None
894
895 - def __iter__(self):
896 return iter(self.next, None)
897
898 - def peek(self):
899 if not self._peeking: 900 try: 901 self.peeked = self.tokens.next() 902 except StopIteration: 903 return None 904 self._peeking = True 905 return self.peeked
906