1
2 import unittest
3 import sys
4 import os.path
5
6 this_dir = os.path.dirname(__file__)
7 if this_dir not in sys.path:
8 sys.path.insert(0, this_dir)
9
10 from common_imports import StringIO, etree, SillyFileLike, HelperTestCase
11 from common_imports import _str, _bytes, _chr
12
13 try:
14 unicode
15 except NameError:
16 unicode = str
17
18 ascii_uni = _bytes('a').decode('utf8')
19
20 klingon = _bytes("\\uF8D2").decode("unicode_escape")
21
22 invalid_tag = _bytes("test").decode('utf8') + klingon
23
24 uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape")
25
26 uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>"
27 ).decode("unicode_escape")
28
29
32
33 self.assertEqual(_str('\x10'), _str('\u0010'))
34 self.assertEqual(_str('\x10'), _str('\U00000010'))
35 self.assertEqual(_str('\u1234'), _str('\U00001234'))
36
40
42 if sys.maxunicode < 1114111:
43 return
44 tree = etree.XML(_bytes('<p>\\U00026007</p>').decode('unicode_escape'))
45 self.assertEqual(1, len(tree.text))
46 self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'),
47 tree.text)
48
50 uxml = ('<?xml version="1.0" encoding="UTF-8"?>' +
51 '<p>%s</p>' % uni)
52 self.assertRaises(ValueError, etree.XML, uxml)
53
57
61
66
71
76
82
85
89
93
98
102
104 x = etree.ProcessingInstruction(_str('Å'), _str('\u0131'))
105 repr(x)
106
110
112 e = etree.Element('e')
113
114 def settext(text):
115 e.text = text
116
117 self.assertRaises(ValueError, settext, _str('ab\ufffe'))
118 self.assertRaises(ValueError, settext, _str('ö\ffff'))
119 self.assertRaises(ValueError, settext, _str('\u0123\ud800'))
120 self.assertRaises(ValueError, settext, _str('x\ud8ff'))
121 self.assertRaises(ValueError, settext, _str('\U00010000\udfff'))
122 self.assertRaises(ValueError, settext, _str('abd\x00def'))
123
124 settext(_str('\ud7ff\ue000\U00010000\U0010FFFFäöas'))
125
126 for char_val in range(0xD800, 0xDFFF+1):
127 self.assertRaises(ValueError, settext, 'abc' + _chr(char_val))
128 self.assertRaises(ValueError, settext, _chr(char_val))
129 self.assertRaises(ValueError, settext, _chr(char_val) + 'abc')
130
131 self.assertRaises(ValueError, settext, _bytes('\xe4'))
132 self.assertRaises(ValueError, settext, _bytes('\x80'))
133 self.assertRaises(ValueError, settext, _bytes('\xff'))
134 self.assertRaises(ValueError, settext, _bytes('\x08'))
135 self.assertRaises(ValueError, settext, _bytes('\x19'))
136 self.assertRaises(ValueError, settext, _bytes('\x20\x00'))
137
138 settext(_bytes('\x09\x0A\x0D\x20\x60\x7f'))
139
144
145 self.assertRaises(ValueError, el, ':')
146 self.assertRaises(ValueError, el, '0a')
147 self.assertRaises(ValueError, el, _str('\u203f'))
148
149 el(_str('\u0132'))
150
151
152
156
157
158
159
160
161
162
163
164
169
174
185
188
191
194
197
200
203
206
209
210
212 suite = unittest.TestSuite()
213 suite.addTests([unittest.makeSuite(UnicodeTestCase)])
214 suite.addTests([unittest.makeSuite(EncodingsTestCase)])
215 return suite
216