Package lxml :: Package tests :: Module selftest
[hide private]
[frames] | no frames]

Module selftest

source code

Functions [hide private]
 
stdout() source code
 
fix_compatibility(xml_data) source code
 
serialize(elem, **options) source code
 
summarize(elem) source code
 
summarize_list(seq) source code
 
normalize_crlf(tree) source code
 
check_string(string) source code
 
check_string_or_none(value) source code
 
check_mapping(mapping) source code
 
check_element(element) source code
 
check_element_tree(tree) source code
 
interface()
Test element tree interface.
source code
 
simpleops() source code
 
find()
Test find methods (including xpath syntax).
source code
 
bad_find()
Check bad or unsupported path expressions.
source code
 
parsefile()
Test parsing from file.
source code
 
parseliteral()
## >>> sequence = ["<html><body>", "text</bo", "dy></html>"] ## >>> element = ElementTree.fromstringlist(sequence) ## >>> ElementTree.ElementTree(element).write(stdout()) ## <html><body>text</body></html>
source code
 
iterparse()
Test iterparse interface.
source code
 
writefile()
## Test tag suppression ## >>> elem.tag = None ## >>> serialize(elem) ## 'text<subtag>subtext</subtag>'
source code
 
writestring() source code
 
encoding()
Test encoding issues.
source code
 
iterators()
Test iterators.
source code
 
namespace()
Test namespace issues.
source code
 
qname()
Test QName handling.
source code
 
xpath_tokenizer(p)
Test the XPath tokenizer.
source code
 
xinclude_loader(href, parse='xml', encoding=None) source code
 
xinclude()
Basic inclusion example (XInclude C.1)
source code
 
xinclude_default() source code
 
bug_xmltoolkit25()
typo in ElementTree.findtext
source code
 
bug_xmltoolkit28()
.//tag causes exceptions
source code
Variables [hide private]
  SAMPLE_XML = ElementTree.XML(...
  ENTITY_XML = '<!DOCTYPE points [\n<!ENTITY % user-entities SYS...
  XINCLUDE = {'C1.xml': '<?xml version=\'1.0\'?>\n<document xmln...
  __package__ = 'lxml.tests'
Function Details [hide private]

find()

source code 

Test find methods (including xpath syntax).

>>> elem = SAMPLE_XML
>>> elem.find("tag").tag
'tag'
>>> ElementTree.ElementTree(elem).find("tag").tag
'tag'
>>> elem.find("section/tag").tag
'tag'
>>> ElementTree.ElementTree(elem).find("section/tag").tag
'tag'
>>> elem.findtext("tag")
'text'
>>> elem.findtext("tog")
>>> elem.findtext("tog", "default")
'default'
>>> ElementTree.ElementTree(elem).findtext("tag")
'text'
>>> elem.findtext("section/tag")
'subtext'
>>> ElementTree.ElementTree(elem).findtext("section/tag")
'subtext'
>>> summarize_list(elem.findall("tag"))
['tag', 'tag']
>>> summarize_list(elem.findall("*"))
['tag', 'tag', 'section']
>>> summarize_list(elem.findall(".//tag"))
['tag', 'tag', 'tag']
>>> summarize_list(elem.findall("section/tag"))
['tag']
>>> summarize_list(elem.findall("section//tag"))
['tag']
>>> summarize_list(elem.findall("section/*"))
['tag']
>>> summarize_list(elem.findall("section//*"))
['tag']
>>> summarize_list(elem.findall("section/.//*"))
['tag']
>>> summarize_list(elem.findall("*/*"))
['tag']
>>> summarize_list(elem.findall("*//*"))
['tag']
>>> summarize_list(elem.findall("*/tag"))
['tag']
>>> summarize_list(elem.findall("*/./tag"))
['tag']
>>> summarize_list(elem.findall("./tag"))
['tag', 'tag']
>>> summarize_list(elem.findall(".//tag"))
['tag', 'tag', 'tag']
>>> summarize_list(elem.findall("././tag"))
['tag', 'tag']
>>> summarize_list(elem.findall(".//tag[@class]"))
['tag', 'tag', 'tag']
>>> summarize_list(elem.findall(".//tag[@class='a']"))
['tag']
>>> summarize_list(elem.findall(".//tag[@class='b']"))
['tag', 'tag']
>>> summarize_list(elem.findall(".//tag[@id]"))
['tag']
>>> summarize_list(elem.findall(".//section[tag]"))
['section']
>>> summarize_list(elem.findall(".//section[element]"))
[]
>>> summarize_list(elem.findall("../tag"))
[]
>>> summarize_list(elem.findall("section/../tag"))
['tag', 'tag']
>>> summarize_list(ElementTree.ElementTree(elem).findall("./tag"))
['tag', 'tag']

FIXME: ET's Path module handles this case incorrectly; this gives a warning in 1.3, and the behaviour will be modified in 1.4.

>>> summarize_list(ElementTree.ElementTree(elem).findall("/tag"))
['tag', 'tag']

bad_find()

source code 

Check bad or unsupported path expressions.

>>> elem = SAMPLE_XML
>>> elem.findall("/tag")
Traceback (most recent call last):
SyntaxError: cannot use absolute path on element

# this is supported in ET 1.3: #>>> elem.findall("section//") #Traceback (most recent call last): #SyntaxError: invalid path

parsefile()

source code 

Test parsing from file.

>>> tree = ElementTree.parse("samples/simple.xml")
>>> normalize_crlf(tree)
>>> tree.write(stdout())
<root>
   <element key="value">text</element>
   <element>text</element>tail
   <empty-element/>
</root>
>>> tree = ElementTree.parse("samples/simple-ns.xml")
>>> normalize_crlf(tree)
>>> tree.write(stdout())
<root xmlns="http://namespace/">
   <element key="value">text</element>
   <element>text</element>tail
   <empty-element/>
</root>

## <ns0:root xmlns:ns0="http://namespace/"> ## <ns0:element key="value">text</ns0:element> ## <ns0:element>text</ns0:element>tail ## <ns0:empty-element/> ## </ns0:root>

parseliteral()

source code 
>>> element = ElementTree.XML("<html><body>text</body></html>")
>>> ElementTree.ElementTree(element).write(stdout())
<html><body>text</body></html>
>>> element = ElementTree.fromstring("<html><body>text</body></html>")
>>> ElementTree.ElementTree(element).write(stdout())
<html><body>text</body></html>

## >>> sequence = ["<html><body>", "text</bo", "dy></html>"] ## >>> element = ElementTree.fromstringlist(sequence) ## >>> ElementTree.ElementTree(element).write(stdout()) ## <html><body>text</body></html>

>>> print(repr(ElementTree.tostring(element)).lstrip('b'))
'<html><body>text</body></html>'

# looks different in lxml # >>> print(ElementTree.tostring(element, "ascii")) # <?xml version='1.0' encoding='ascii'?> # <html><body>text</body></html>

>>> _, ids = ElementTree.XMLID("<html><body>text</body></html>")
>>> len(ids)
0
>>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>")
>>> len(ids)
1
>>> ids["body"].tag
'body'

namespace()

source code 

Test namespace issues.

  1. xml namespace
>>> elem = ElementTree.XML("<tag xml:lang='en' />")
>>> serialize(elem) # 1.1
'<tag xml:lang="en"/>'
  1. other "well-known" namespaces
>>> elem = ElementTree.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
>>> serialize(elem) # 2.1
'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>'
>>> elem = ElementTree.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
>>> serialize(elem) # 2.2
'<html:html xmlns:html="http://www.w3.org/1999/xhtml"/>'
>>> elem = ElementTree.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
>>> serialize(elem) # 2.3
'<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope"/>'
  1. unknown namespaces

qname()

source code 

Test QName handling.

  1. decorated tags
>>> elem = ElementTree.Element("{uri}tag")
>>> serialize(elem) # 1.1
'<ns0:tag xmlns:ns0="uri"/>'
>>> elem = ElementTree.Element(ElementTree.QName("{uri}tag"))
>>> serialize(elem) # 1.2
'<ns0:tag xmlns:ns0="uri"/>'
>>> elem = ElementTree.Element(ElementTree.QName("uri", "tag"))
>>> serialize(elem) # 1.3
'<ns0:tag xmlns:ns0="uri"/>'

# ns/attribute order ...

## 2) decorated attributes

## >>> elem.clear() ## >>> elem.attrib["{uri}key"] = "value" ## >>> serialize(elem) # 2.1 ## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>'

## >>> elem.clear() ## >>> elem.attrib[ElementTree.QName("{uri}key")] = "value" ## >>> serialize(elem) # 2.2 ## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>'

## 3) decorated values are not converted by default, but the ## QName wrapper can be used for values

## >>> elem.clear() ## >>> elem.attrib["{uri}key"] = "{uri}value" ## >>> serialize(elem) # 3.1 ## '<ns0:tag ns0:key="{uri}value" xmlns:ns0="uri"/>'

## >>> elem.clear() ## >>> elem.attrib["{uri}key"] = ElementTree.QName("{uri}value") ## >>> serialize(elem) # 3.2 ## '<ns0:tag ns0:key="ns0:value" xmlns:ns0="uri"/>'

## >>> elem.clear() ## >>> subelem = ElementTree.Element("tag") ## >>> subelem.attrib["{uri1}key"] = ElementTree.QName("{uri2}value") ## >>> elem.append(subelem) ## >>> elem.append(subelem) ## >>> serialize(elem) # 3.3 ## '<ns0:tag xmlns:ns0="uri"><tag ns1:key="ns2:value" xmlns:ns1="uri1" xmlns:ns2="uri2"/><tag ns1:key="ns2:value" xmlns:ns1="uri1" xmlns:ns2="uri2"/></ns0:tag>'

xinclude()

source code 

Basic inclusion example (XInclude C.1)

>>> document = xinclude_loader("C1.xml")
>>> ElementInclude.include(document, xinclude_loader)
>>> print(serialize(document)) # C1
<document>
  <p>120 Mz is adequate for an average home user.</p>
  <disclaimer>
  <p>The opinions represented herein represent those of the individual
  and should not be interpreted as official policy endorsed by this
  organization.</p>
</disclaimer>
</document>

Textual inclusion example (XInclude C.2)

>>> document = xinclude_loader("C2.xml")
>>> ElementInclude.include(document, xinclude_loader)
>>> print(serialize(document)) # C2
<document>
  <p>This document has been accessed
  324387 times.</p>
</document>

Textual inclusion of XML example (XInclude C.3)

>>> document = xinclude_loader("C3.xml")
>>> ElementInclude.include(document, xinclude_loader)
>>> print(serialize(document)) # C3
<document>
  <p>The following is the source of the "data.xml" resource:</p>
  <example>&lt;?xml version='1.0'?&gt;
&lt;data&gt;
  &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
&lt;/data&gt;
</example>
</document>

## Fallback example (XInclude C.5) ## Note! Fallback support is not yet implemented

## >>> document = xinclude_loader("C5.xml") ## >>> ElementInclude.include(document, xinclude_loader) ## Traceback (most recent call last): ## IOError: resource not found ## >>> # print(serialize(document)) # C5


Variables Details [hide private]

SAMPLE_XML

Value:
ElementTree.XML("""
<body>
  <tag class='a'>text</tag>
  <tag class='b' />
   <section>
    <tag class='b' id='inner'>subtext</tag>
   </section>
</body>
...

ENTITY_XML

Value:
'''<!DOCTYPE points [
<!ENTITY % user-entities SYSTEM \'user-entities.xml\'>
%user-entities;
]>
<document>&entity;</document>
'''

XINCLUDE

Value:
{'C1.xml': '''<?xml version=\'1.0\'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
  <p>120 Mz is adequate for an average home user.</p>
  <xi:include href="disclaimer.xml"/>
</document>
''',
 'C2.xml': '''<?xml version=\'1.0\'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
...