1
2 from __future__ import absolute_import
3
4 import unittest
5 import sys
6
7 from .common_imports import StringIO, etree, HelperTestCase, _str, _bytes, _chr
8
9 try:
10 unicode
11 except NameError:
12 unicode = str
13
14 ascii_uni = _bytes('a').decode('utf8')
15
16 klingon = _bytes("\\uF8D2").decode("unicode_escape")
17
18 invalid_tag = _bytes("test").decode('utf8') + klingon
19
20 uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape")
21
22 uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>"
23 ).decode("unicode_escape")
24
25
28
29 self.assertEqual(_str('\x10'), _str('\u0010'))
30 self.assertEqual(_str('\x10'), _str('\U00000010'))
31 self.assertEqual(_str('\u1234'), _str('\U00001234'))
32
36
38 if sys.maxunicode < 1114111:
39 return
40 tree = etree.XML(_bytes('<p>\\U00026007</p>').decode('unicode_escape'))
41 self.assertEqual(1, len(tree.text))
42 self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'),
43 tree.text)
44
46 uxml = ('<?xml version="1.0" encoding="UTF-8"?>' +
47 '<p>%s</p>' % uni)
48 self.assertRaises(ValueError, etree.XML, uxml)
49
53
57
62
67
72
78
81
85
89
94
98
100 x = etree.ProcessingInstruction(_str('Å'), _str('\u0131'))
101 repr(x)
102
106
108 e = etree.Element('e')
109
110 def settext(text):
111 e.text = text
112
113 self.assertRaises(ValueError, settext, _str('ab\ufffe'))
114 self.assertRaises(ValueError, settext, _str('ö\ffff'))
115 self.assertRaises(ValueError, settext, _str('\u0123\ud800'))
116 self.assertRaises(ValueError, settext, _str('x\ud8ff'))
117 self.assertRaises(ValueError, settext, _str('\U00010000\udfff'))
118 self.assertRaises(ValueError, settext, _str('abd\x00def'))
119
120 settext(_str('\ud7ff\ue000\U00010000\U0010FFFFäöas'))
121
122 for char_val in range(0xD800, 0xDFFF+1):
123 self.assertRaises(ValueError, settext, 'abc' + _chr(char_val))
124 self.assertRaises(ValueError, settext, _chr(char_val))
125 self.assertRaises(ValueError, settext, _chr(char_val) + 'abc')
126
127 self.assertRaises(ValueError, settext, _bytes('\xe4'))
128 self.assertRaises(ValueError, settext, _bytes('\x80'))
129 self.assertRaises(ValueError, settext, _bytes('\xff'))
130 self.assertRaises(ValueError, settext, _bytes('\x08'))
131 self.assertRaises(ValueError, settext, _bytes('\x19'))
132 self.assertRaises(ValueError, settext, _bytes('\x20\x00'))
133
134 settext(_bytes('\x09\x0A\x0D\x20\x60\x7f'))
135
140
141 self.assertRaises(ValueError, el, ':')
142 self.assertRaises(ValueError, el, '0a')
143 self.assertRaises(ValueError, el, _str('\u203f'))
144
145 el(_str('\u0132'))
146
147
148
152
153
154
155
156
157
158
159
160
165
170
181
184
187
190
193
196
199
202
205
206
208 suite = unittest.TestSuite()
209 suite.addTests([unittest.makeSuite(UnicodeTestCase)])
210 suite.addTests([unittest.makeSuite(EncodingsTestCase)])
211 return suite
212