Package lxml :: Module cssselect
[hide private]
[frames] | no frames]

Source Code for Module lxml.cssselect

  1  """CSS Selectors based on XPath. 
  2   
  3  This module supports selecting XML/HTML tags based on CSS selectors. 
  4  See the `CSSSelector` class for details. 
  5   
  6  This is a thin wrapper around cssselect 0.7 or later. 
  7  """ 
  8   
  9  from __future__ import absolute_import 
 10   
 11  from . import etree 
 12  try: 
 13      import cssselect as external_cssselect 
 14  except ImportError: 
 15      raise ImportError( 
 16          'cssselect does not seem to be installed. ' 
 17          'See http://packages.python.org/cssselect/') 
 18   
 19   
 20  SelectorSyntaxError = external_cssselect.SelectorSyntaxError 
 21  ExpressionError = external_cssselect.ExpressionError 
 22  SelectorError = external_cssselect.SelectorError 
 23   
 24   
 25  __all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError', 
 26             'CSSSelector'] 
 27   
 28   
29 -class LxmlTranslator(external_cssselect.GenericTranslator):
30 """ 31 A custom CSS selector to XPath translator with lxml-specific extensions. 32 """
33 - def xpath_contains_function(self, xpath, function):
34 # Defined there, removed in later drafts: 35 # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors 36 if function.argument_types() not in (['STRING'], ['IDENT']): 37 raise ExpressionError( 38 "Expected a single string or ident for :contains(), got %r" 39 % function.arguments) 40 value = function.arguments[0].value 41 return xpath.add_condition( 42 'contains(__lxml_internal_css:lower-case(string(.)), %s)' 43 % self.xpath_literal(value.lower()))
44 45
46 -class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
47 """ 48 lxml extensions + HTML support. 49 """
50 51
52 -def _make_lower_case(context, s):
53 return s.lower()
54 55 ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/') 56 ns.prefix = '__lxml_internal_css' 57 ns['lower-case'] = _make_lower_case 58 59
60 -class CSSSelector(etree.XPath):
61 """A CSS selector. 62 63 Usage:: 64 65 >>> from lxml import etree, cssselect 66 >>> select = cssselect.CSSSelector("a tag > child") 67 68 >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>") 69 >>> [ el.tag for el in select(root) ] 70 ['child'] 71 72 To use CSS namespaces, you need to pass a prefix-to-namespace 73 mapping as ``namespaces`` keyword argument:: 74 75 >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' 76 >>> select_ns = cssselect.CSSSelector('root > rdf|Description', 77 ... namespaces={'rdf': rdfns}) 78 79 >>> rdf = etree.XML(( 80 ... '<root xmlns:rdf="%s">' 81 ... '<rdf:Description>blah</rdf:Description>' 82 ... '</root>') % rdfns) 83 >>> [(el.tag, el.text) for el in select_ns(rdf)] 84 [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')] 85 86 """
87 - def __init__(self, css, namespaces=None, translator='xml'):
88 if translator == 'xml': 89 translator = LxmlTranslator() 90 elif translator == 'html': 91 translator = LxmlHTMLTranslator() 92 elif translator == 'xhtml': 93 translator = LxmlHTMLTranslator(xhtml=True) 94 path = translator.css_to_xpath(css) 95 etree.XPath.__init__(self, path, namespaces=namespaces) 96 self.css = css
97
98 - def __repr__(self):
99 return '<%s %s for %r>' % ( 100 self.__class__.__name__, 101 hex(abs(id(self)))[2:], 102 self.css)
103