Package lxml :: Package tests :: Module test_unicode
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_unicode

 1  # -*- coding: utf-8 -*- 
 2  import unittest, doctest, sys, os.path 
 3   
 4  this_dir = os.path.dirname(__file__) 
 5  if this_dir not in sys.path: 
 6      sys.path.insert(0, this_dir) # needed for Py3 
 7   
 8  from common_imports import StringIO, etree, SillyFileLike, HelperTestCase 
 9  from common_imports import _str, _bytes 
10   
11  try: 
12      unicode 
13  except NameError: 
14      unicode = str 
15   
16  ascii_uni = _str('a') 
17   
18  klingon = _bytes("\\uF8D2").decode("unicode_escape") # not valid for XML names 
19   
20  invalid_tag = _str("test") + klingon 
21   
22  uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape") # some non-ASCII characters 
23   
24  uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>" 
25                ).decode("unicode_escape") 
26   
27 -class UnicodeTestCase(HelperTestCase):
28 - def test_unicode_xml(self):
29 tree = etree.XML(_str('<p>%s</p>') % uni) 30 self.assertEqual(uni, tree.text)
31
32 - def test_unicode_xml_broken(self):
33 uxml = _str('<?xml version="1.0" encoding="UTF-8"?>') + \ 34 _str('<p>%s</p>') % uni 35 self.assertRaises(ValueError, etree.XML, uxml)
36
37 - def test_unicode_tag(self):
38 el = etree.Element(uni) 39 self.assertEqual(uni, el.tag)
40
42 # sadly, Klingon is not well-formed 43 self.assertRaises(ValueError, etree.Element, invalid_tag)
44
45 - def test_unicode_nstag(self):
46 tag = _str("{http://abc/}%s") % uni 47 el = etree.Element(tag) 48 self.assertEqual(tag, el.tag)
49
50 - def test_unicode_ns_invalid(self):
51 # namespace URIs must conform to RFC 3986 52 tag = _str("{http://%s/}abc") % uni 53 self.assertRaises(ValueError, etree.Element, tag)
54
56 # sadly, Klingon is not well-formed 57 tag = _str("{http://abc/}%s") % invalid_tag 58 self.assertRaises(ValueError, etree.Element, tag)
59
60 - def test_unicode_qname(self):
61 qname = etree.QName(uni, uni) 62 tag = _str("{%s}%s") % (uni, uni) 63 self.assertEqual(qname.text, tag) 64 self.assertEqual(unicode(qname), tag)
65
67 self.assertRaises(ValueError, etree.QName, invalid_tag)
68
69 - def test_unicode_attr(self):
70 el = etree.Element('foo', {'bar': uni}) 71 self.assertEqual(uni, el.attrib['bar'])
72
73 - def test_unicode_comment(self):
74 el = etree.Comment(uni) 75 self.assertEqual(uni, el.text)
76
78 el = etree.parse(StringIO(_str('<p>%s</p>') % uni)).getroot() 79 self.assertEqual(uni, el.text)
80 81 ## def test_parse_fileobject_unicode(self): 82 ## # parse unicode from unamed file object (not support by ElementTree) 83 ## f = SillyFileLike(uxml) 84 ## root = etree.parse(f).getroot() 85 ## self.assertEqual(unicode(etree.tostring(root, 'UTF-8'), 'UTF-8'), 86 ## uxml) 87
88 -def test_suite():
89 suite = unittest.TestSuite() 90 suite.addTests([unittest.makeSuite(UnicodeTestCase)]) 91 return suite
92