1 import re
2 from lxml import etree
3
4 __all__ = ['SelectorSyntaxError', 'ExpressionError',
5 'CSSSelector']
6
9
12
14
19
21 return '<%s %s for %r>' % (
22 self.__class__.__name__,
23 hex(abs(id(self)))[2:],
24 self.css)
25
26
27
28
31 obj = unicode.__new__(cls, contents)
32 obj.pos = pos
33 return obj
34
36 return '%s(%s, %r)' % (
37 self.__class__.__name__,
38 unicode.__repr__(self),
39 self.pos)
40
43
46
49
50
51
52
53
54
55
56
58 """
59 Represents selector.class_name
60 """
61
62 - def __init__(self, selector, class_name):
63 self.selector = selector
64 self.class_name = class_name
65
67 return '%s[%r.%s]' % (
68 self.__class__.__name__,
69 self.selector,
70 self.class_name)
71
73 sel_xpath = self.selector.xpath()
74 sel_xpath.add_condition(
75 "contains(concat(' ', normalize-space(@class), ' '), %s)" % xpath_repr(' '+self.class_name+' '))
76 return sel_xpath
77
79 """
80 Represents selector:name(expr)
81 """
82
83 unsupported = [
84 'target', 'lang', 'enabled', 'disabled',]
85
86 - def __init__(self, selector, type, name, expr):
87 self.selector = selector
88 self.type = type
89 self.name = name
90 self.expr = expr
91
93 return '%s[%r%s%s(%r)]' % (
94 self.__class__.__name__,
95 self.selector,
96 self.type, self.name, self.expr)
97
109
112 a, b = parse_series(expr)
113 if not a and not b and not last:
114
115 xpath.add_condition('false() and position() = 0')
116 return xpath
117 if add_name_test:
118 xpath.add_name_test()
119 xpath.add_star_prefix()
120 if a == 0:
121 if last:
122 b = 'last() - %s' % b
123 xpath.add_condition('position() = %s' % b)
124 return xpath
125 if last:
126
127 a = -a
128 b = -b
129 if b > 0:
130 b_neg = str(-b)
131 else:
132 b_neg = '+%s' % (-b)
133 if a != 1:
134 expr = ['(position() %s) mod %s = 0' % (b_neg, a)]
135 else:
136 expr = []
137 if b >= 0:
138 expr.append('position() >= %s' % b)
139 elif b < 0 and last:
140 expr.append('position() < (last() %s)' % b)
141 expr = ' and '.join(expr)
142 if expr:
143 xpath.add_condition(expr)
144 return xpath
145
146
147
148
149
150
151
152
155
161
164
173
181
184
185 ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
186 ns.prefix = 'css'
187 ns['lower-case'] = _make_lower_case
188
190 """
191 Represents selector:ident
192 """
193
194 unsupported = ['indeterminate', 'first-line', 'first-letter',
195 'selection', 'before', 'after', 'link', 'visited',
196 'active', 'focus', 'hover']
197
198 - def __init__(self, element, type, ident):
199 self.element = element
200 assert type in (':', '::')
201 self.type = type
202 self.ident = ident
203
205 return '%s[%r%s%s]' % (
206 self.__class__.__name__,
207 self.element,
208 self.type, self.ident)
209
222
224
225 xpath.add_condition("(@selected or @checked) and (name(.) = 'input' or name(.) = 'option')")
226 return xpath
227
229
230 raise NotImplementedError
231
237
243
251
259
265
267 if xpath.element == '*':
268 raise NotImplementedError(
269 "*:only-of-type is not implemented")
270 xpath.add_condition('last() = 1')
271 return xpath
272
276
278 """
279 Represents selector[namespace|attrib operator value]
280 """
281
282 - def __init__(self, selector, namespace, attrib, operator, value):
283 self.selector = selector
284 self.namespace = namespace
285 self.attrib = attrib
286 self.operator = operator
287 self.value = value
288
290 if self.operator == 'exists':
291 return '%s[%r[%s]]' % (
292 self.__class__.__name__,
293 self.selector,
294 self._format_attrib())
295 else:
296 return '%s[%r[%s %s %r]]' % (
297 self.__class__.__name__,
298 self.selector,
299 self._format_attrib(),
300 self.operator,
301 self.value)
302
308
310
311 if self.namespace == '*':
312 return '@' + self.attrib
313 else:
314 return '@%s:%s' % (self.namespace, self.attrib)
315
317 path = self.selector.xpath()
318 attrib = self._xpath_attrib()
319 value = self.value
320 if self.operator == 'exists':
321 assert not value
322 path.add_condition(attrib)
323 elif self.operator == '=':
324 path.add_condition('%s = %s' % (attrib,
325 xpath_repr(value)))
326 elif self.operator == '!=':
327
328 if value:
329 path.add_condition('not(%s) or %s != %s'
330 % (attrib, attrib, xpath_repr(value)))
331 else:
332 path.add_condition('%s != %s'
333 % (attrib, xpath_repr(value)))
334
335 elif self.operator == '~=':
336 path.add_condition("contains(concat(' ', normalize-space(%s), ' '), %s)" % (attrib, xpath_repr(' '+value+' ')))
337 elif self.operator == '|=':
338
339 path.add_condition('%s = %s or starts-with(%s, %s)' % (
340 attrib, xpath_repr(value),
341 attrib, xpath_repr(value + '-')))
342 elif self.operator == '^=':
343 path.add_condition('starts-with(%s, %s)' % (
344 attrib, xpath_repr(value)))
345 elif self.operator == '$=':
346
347 path.add_condition('substring(%s, string-length(%s)-%s) = %s'
348 % (attrib, attrib, len(value)-1, xpath_repr(value)))
349 elif self.operator == '*=':
350
351 path.add_condition('contains(%s, %s)' % (
352 attrib, xpath_repr(value)))
353 else:
354 assert 0, ("Unknown operator: %r" % self.operator)
355 return path
356
358 """
359 Represents namespace|element
360 """
361
362 - def __init__(self, namespace, element):
363 self.namespace = namespace
364 self.element = element
365
367 return '%s[%s]' % (
368 self.__class__.__name__,
369 self._format_element())
370
376
378 if self.namespace == '*':
379 el = self.element.lower()
380 else:
381
382 el = '%s:%s' % (self.namespace, self.element)
383 return XPathExpr(element=el)
384
386 """
387 Represents selector#id
388 """
389
391 self.selector = selector
392 self.id = id
393
395 return '%s[%r#%s]' % (
396 self.__class__.__name__,
397 self.selector, self.id)
398
403
405
409 return '%s(%r)' % (
410 self.__class__.__name__,
411 self.items)
412
416
418
419 _method_mapping = {
420 ' ': 'descendant',
421 '>': 'child',
422 '+': 'direct_adjacent',
423 '~': 'indirect_adjacent',
424 }
425
426 - def __init__(self, selector, combinator, subselector):
427 assert selector is not None
428 self.selector = selector
429 self.combinator = combinator
430 self.subselector = subselector
431
433 if self.combinator == ' ':
434 comb = '<followed>'
435 else:
436 comb = self.combinator
437 return '%s[%r %s %r]' % (
438 self.__class__.__name__,
439 self.selector,
440 comb,
441 self.subselector)
442
451
456
461
468
473
474
475
476
477 _el_re = re.compile(r'^\w+\s*$')
478 _id_re = re.compile(r'^(\w*)#(\w+)\s*$')
479 _class_re = re.compile(r'^(\w*)\.(\w+)\s*$')
480
482 if isinstance(css_expr, basestring):
483 match = _el_re.search(css_expr)
484 if match is not None:
485 return '%s%s' % (prefix, match.group(0).strip())
486 match = _id_re.search(css_expr)
487 if match is not None:
488 return "%s%s[@id = '%s']" % (
489 prefix, match.group(1) or '*', match.group(2))
490 match = _class_re.search(css_expr)
491 if match is not None:
492 return "%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]" % (
493 prefix, match.group(1) or '*', match.group(2))
494 css_expr = parse(css_expr)
495 expr = css_expr.xpath()
496 assert expr is not None, (
497 "Got None for xpath expression from %s" % repr(css_expr))
498 if prefix:
499 expr.add_prefix(prefix)
500 return str(expr)
501
503
504 - def __init__(self, prefix=None, path=None, element='*', condition=None,
505 star_prefix=False):
506 self.prefix = prefix
507 self.path = path
508 self.element = element
509 self.condition = condition
510 self.star_prefix = star_prefix
511
513 path = ''
514 if self.prefix is not None:
515 path += str(self.prefix)
516 if self.path is not None:
517 path += str(self.path)
518 path += str(self.element)
519 if self.condition:
520 path += '[%s]' % self.condition
521 return path
522
524 return '%s[%s]' % (
525 self.__class__.__name__, self)
526
528 if self.condition:
529 self.condition = '%s and (%s)' % (self.condition, condition)
530 else:
531 self.condition = condition
532
534 if self.path is None:
535 self.path = self.element
536 else:
537 self.path += self.element
538 self.element = part
539
545
547 if self.element == '*':
548
549 return
550 self.add_condition("name() = %s" % xpath_repr(self.element))
551 self.element = '*'
552
554 """
555 Adds a /* prefix if there is no prefix. This is when you need
556 to keep context's constrained to a single parent.
557 """
558 if self.path:
559 self.path += '*/'
560 else:
561 self.path = '*/'
562 self.star_prefix = True
563
564 - def join(self, combiner, other):
565 prefix = str(self)
566 prefix += combiner
567 path = (other.prefix or '') + (other.path or '')
568
569
570 if other.star_prefix and path == '*/':
571 path = ''
572 self.prefix = prefix
573 self.path = path
574 self.element = other.element
575 self.condition = other.condition
576
578
579 """
580 Represents on |'d expressions. Note that unfortunately it isn't
581 the union, it's the sum, so duplicate elements will appear.
582 """
583
584 - def __init__(self, items, prefix=None):
589
593
602
603
604
605
615
617 result = []
618 while 1:
619 result.append(parse_selector(stream))
620 if stream.peek() == ',':
621 stream.next()
622 else:
623 break
624 if len(result) == 1:
625 return result[0]
626 else:
627 return Or(result)
628
643
645 peek = stream.peek()
646 if peek != '*' and not isinstance(peek, Symbol):
647 element = namespace = '*'
648 else:
649 next = stream.next()
650 if next != '*' and not isinstance(next, Symbol):
651 raise SelectorSyntaxError(
652 "Expected symbol, got %r" % next)
653 if stream.peek() == '|':
654 namespace = next
655 stream.next()
656 element = stream.next()
657 if element != '*' and not isinstance(next, Symbol):
658 raise SelectorSyntaxError(
659 "Expected symbol, got %r" % next)
660 else:
661 namespace = '*'
662 element = next
663 result = Element(namespace, element)
664 has_hash = False
665 while 1:
666 peek = stream.peek()
667 if peek == '#':
668 if has_hash:
669
670
671 break
672 stream.next()
673 result = Hash(result, stream.next())
674 has_hash = True
675 continue
676 elif peek == '.':
677 stream.next()
678 result = Class(result, stream.next())
679 continue
680 elif peek == '[':
681 stream.next()
682 result = parse_attrib(result, stream)
683 next = stream.next()
684 if not next == ']':
685 raise SelectorSyntaxError(
686 "] expected, got %r" % next)
687 continue
688 elif peek == ':' or peek == '::':
689 type = stream.next()
690 ident = stream.next()
691 if not isinstance(ident, Symbol):
692 raise SelectorSyntaxError(
693 "Expected symbol, got %r" % ident)
694 if stream.peek() == '(':
695 stream.next()
696 peek = stream.peek()
697 if isinstance(peek, String):
698 selector = stream.next()
699 elif isinstance(peek, Symbol) and is_int(peek):
700 selector = int(stream.next())
701 else:
702
703 selector = parse_simple_selector(stream)
704 next = stream.next()
705 if not next == ')':
706 raise SelectorSyntaxError(
707 "Expected ), got %r and %r"
708 % (next, selector))
709 result = Function(result, type, ident, selector)
710 else:
711 result = Pseudo(result, type, ident)
712 continue
713 else:
714 break
715
716 return result
717
719 try:
720 int(v)
721 except ValueError:
722 return False
723 else:
724 return True
725
727 attrib = stream.next()
728 if stream.peek() == '|':
729 namespace = attrib
730 stream.next()
731 attrib = stream.next()
732 else:
733 namespace = '*'
734 if stream.peek() == ']':
735 return Attrib(selector, namespace, attrib, 'exists', None)
736 op = stream.next()
737 if not op in ('^=', '$=', '*=', '=', '~=', '|=', '!='):
738 raise SelectorSyntaxError(
739 "Operator expected, got %r" % op)
740 value = stream.next()
741 if not isinstance(value, (Symbol, String)):
742 raise SelectorSyntaxError(
743 "Expected string or symbol, got %r" % value)
744 return Attrib(selector, namespace, attrib, op, value)
745
747 """
748 Parses things like '1n+2', or 'an+b' generally, returning (a, b)
749 """
750 if isinstance(s, Element):
751 s = s._format_element()
752 if not s or s == '*':
753
754 return (0, 0)
755 if isinstance(s, int):
756
757 return (0, s)
758 if s == 'odd':
759 return (2, 1)
760 elif s == 'even':
761 return (2, 0)
762 elif s == 'n':
763 return (1, 0)
764 if 'n' not in s:
765
766 return (0, int(s))
767 a, b = s.split('n', 1)
768 if not a:
769 a = 1
770 elif a == '-' or a == '+':
771 a = int(a+'1')
772 else:
773 a = int(a)
774 if not b:
775 b = 0
776 elif b == '-' or b == '+':
777 b = int(b+'1')
778 else:
779 b = int(b)
780 return (a, b)
781
782
783
784
785
786
787 _whitespace_re = re.compile(r'\s+')
788
789 _comment_re = re.compile(r'/\*.*?\*/', re.S)
790
791 _count_re = re.compile(r'[+-]?\d*n(?:[+-]\d+)?')
792
794 pos = 0
795 s = _comment_re.sub('', s)
796 while 1:
797 match = _whitespace_re.match(s, pos=pos)
798 if match:
799 pos = match.end()
800 if pos >= len(s):
801 return
802 match = _count_re.match(s, pos=pos)
803 if match and match.group() != 'n':
804 sym = s[pos:match.end()]
805 yield Symbol(sym, pos)
806 pos = match.end()
807 continue
808 c = s[pos]
809 c2 = s[pos:pos+2]
810 if c2 in ('~=', '|=', '^=', '$=', '*=', '::', '!='):
811 yield Token(c2, pos)
812 pos += 2
813 continue
814 if c in '>+~,.*=[]()|:#':
815 yield Token(c, pos)
816 pos += 1
817 continue
818 if c == '"' or c == "'":
819
820 old_pos = pos
821 sym, pos = tokenize_escaped_string(s, pos)
822 yield String(sym, old_pos)
823 continue
824 old_pos = pos
825 sym, pos = tokenize_symbol(s, pos)
826 yield Symbol(sym, old_pos)
827 continue
828
830 quote = s[pos]
831 assert quote in ('"', "'")
832 pos = pos+1
833 start = pos
834 while 1:
835 next = s.find(quote, pos)
836 if next == -1:
837 raise SelectorSyntaxError(
838 "Expected closing %s for string in: %r"
839 % (quote, s[start:]))
840 result = s[start:next]
841 try:
842 result = result.decode('unicode_escape')
843 except UnicodeDecodeError:
844
845 pos = next+1
846 else:
847 return result, next+1
848
849 _illegal_symbol = re.compile(r'[^\w\\-]', re.UNICODE)
850
852 start = pos
853 match = _illegal_symbol.search(s, pos=pos)
854 if not match:
855
856 return s[start:], len(s)
857 if match.start() == pos:
858 assert 0, (
859 "Unexpected symbol: %r at %s" % (s[pos], pos))
860 if not match:
861 result = s[start:]
862 pos = len(s)
863 else:
864 result = s[start:match.start()]
865 pos = match.start()
866 try:
867 result = result.decode('unicode_escape')
868 except UnicodeDecodeError, e:
869 raise SelectorSyntaxError(
870 "Bad symbol %r: %s" % (result, e))
871 return result, pos
872
874
875 - def __init__(self, tokens, source=None):
876 self.used = []
877 self.tokens = iter(tokens)
878 self.source = source
879 self.peeked = None
880 self._peeking = False
881
883 if self._peeking:
884 self._peeking = False
885 self.used.append(self.peeked)
886 return self.peeked
887 else:
888 try:
889 next = self.tokens.next()
890 self.used.append(next)
891 return next
892 except StopIteration:
893 return None
894
897
899 if not self._peeking:
900 try:
901 self.peeked = self.tokens.next()
902 except StopIteration:
903 return None
904 self._peeking = True
905 return self.peeked
906