Package lxml :: Module cssselect
[hide private]
[frames] | no frames]

Source Code for Module lxml.cssselect

  1  """CSS Selectors based on XPath. 
  2   
  3  This module supports selecting XML/HTML tags based on CSS selectors. 
  4  See the `CSSSelector` class for details. 
  5   
  6  This is a thin wrapper around cssselect 0.7 or later. 
  7  """ 
  8   
  9  import sys 
 10  from lxml import etree 
 11   
 12  ## Work-around the lack of absolute import in Python 2.4 
 13  #from __future__ import absolute_import 
 14  #from cssselect import ... 
 15  try: 
 16      external_cssselect = __import__('cssselect') 
 17  except ImportError: 
 18      raise ImportError('cssselect seems not to be installed. ' 
 19                        'See http://packages.python.org/cssselect/') 
 20   
 21  SelectorSyntaxError = external_cssselect.SelectorSyntaxError 
 22  ExpressionError = external_cssselect.ExpressionError 
 23  SelectorError = external_cssselect.SelectorError 
 24   
 25   
 26  __all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError', 
 27             'CSSSelector'] 
 28   
 29   
30 -class LxmlTranslator(external_cssselect.GenericTranslator):
31 """ 32 A custom CSS selector to XPath translator with lxml-specific extensions. 33 """
34 - def xpath_contains_function(self, xpath, function):
35 # Defined there, removed in later drafts: 36 # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors 37 if function.argument_types() not in (['STRING'], ['IDENT']): 38 raise ExpressionError( 39 "Expected a single string or ident for :contains(), got %r" 40 % function.arguments) 41 value = function.arguments[0].value 42 return xpath.add_condition( 43 'contains(__lxml_internal_css:lower-case(string(.)), %s)' 44 % self.xpath_literal(value.lower()))
45 46
47 -class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
48 """ 49 lxml extensions + HTML support. 50 """
51 52
53 -def _make_lower_case(context, s):
54 return s.lower()
55 56 ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/') 57 ns.prefix = '__lxml_internal_css' 58 ns['lower-case'] = _make_lower_case 59 60
61 -class CSSSelector(etree.XPath):
62 """A CSS selector. 63 64 Usage:: 65 66 >>> from lxml import etree, cssselect 67 >>> select = cssselect.CSSSelector("a tag > child") 68 69 >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>") 70 >>> [ el.tag for el in select(root) ] 71 ['child'] 72 73 To use CSS namespaces, you need to pass a prefix-to-namespace 74 mapping as ``namespaces`` keyword argument:: 75 76 >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' 77 >>> select_ns = cssselect.CSSSelector('root > rdf|Description', 78 ... namespaces={'rdf': rdfns}) 79 80 >>> rdf = etree.XML(( 81 ... '<root xmlns:rdf="%s">' 82 ... '<rdf:Description>blah</rdf:Description>' 83 ... '</root>') % rdfns) 84 >>> [(el.tag, el.text) for el in select_ns(rdf)] 85 [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')] 86 87 """
88 - def __init__(self, css, namespaces=None, translator='xml'):
89 if translator == 'xml': 90 translator = LxmlTranslator() 91 elif translator == 'html': 92 translator = LxmlHTMLTranslator() 93 elif translator == 'xhtml': 94 translator = LxmlHTMLTranslator(xhtml=True) 95 path = translator.css_to_xpath(css) 96 etree.XPath.__init__(self, path, namespaces=namespaces) 97 self.css = css
98
99 - def __repr__(self):
100 return '<%s %s for %r>' % ( 101 self.__class__.__name__, 102 hex(abs(id(self)))[2:], 103 self.css)
104