1
2
3 """
4 HTML parser test cases for etree
5 """
6
7 import unittest
8 import tempfile, os, os.path, sys
9
10 this_dir = os.path.dirname(__file__)
11 if this_dir not in sys.path:
12 sys.path.insert(0, this_dir)
13
14 from common_imports import etree, StringIO, BytesIO, fileInTestDir, _bytes, _str
15 from common_imports import SillyFileLike, HelperTestCase
16
17 try:
18 unicode = __builtins__["unicode"]
19 except (NameError, KeyError):
20 unicode = str
21
23 """HTML parser test cases
24 """
25 etree = etree
26
27 html_str = _bytes("<html><head><title>test</title></head><body><h1>page title</h1></body></html>")
28 html_str_pretty = _bytes("""\
29 <html>
30 <head><title>test</title></head>
31 <body><h1>page title</h1></body>
32 </html>
33 """)
34 broken_html_str = _bytes("<html><head><title>test<body><h1>page title</h3></p></html>")
35 uhtml_str = _str("<html><head><title>test á\uF8D2</title></head><body><h1>page á\uF8D2 title</h1></body></html>")
36
40
45
51
56
63
65 parser = self.etree.HTMLParser()
66 Element = parser.makeelement
67
68 el = Element('name')
69 self.assertRaises(ValueError, Element, '{}')
70 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
71
72 self.assertRaises(ValueError, Element, '{test}')
73 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
74
88
90 parser = self.etree.HTMLParser()
91 Element = parser.makeelement
92
93 self.assertRaises(ValueError, Element, 'p"name')
94 self.assertRaises(ValueError, Element, "na'me")
95 self.assertRaises(ValueError, Element, '{test}"name')
96 self.assertRaises(ValueError, Element, "{test}name'")
97
98 el = Element('name')
99 self.assertRaises(ValueError, setattr, el, 'tag', "pname'")
100 self.assertRaises(ValueError, setattr, el, 'tag', '"pname')
101 self.assertEquals(el.tag, "name")
102
104 parser = self.etree.HTMLParser()
105 Element = parser.makeelement
106
107 self.assertRaises(ValueError, Element, ' name ')
108 self.assertRaises(ValueError, Element, 'na me')
109 self.assertRaises(ValueError, Element, '{test} name')
110
111 el = Element('name')
112 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
113 self.assertEquals(el.tag, "name")
114
124
136
138 parser = self.etree.HTMLParser()
139 Element = parser.makeelement
140 SubElement = self.etree.SubElement
141
142 el = Element('name')
143 self.assertRaises(ValueError, SubElement, el, "name'")
144 self.assertRaises(ValueError, SubElement, el, 'na"me')
145 self.assertRaises(ValueError, SubElement, el, "{test}na'me")
146 self.assertRaises(ValueError, SubElement, el, '{test}"name')
147
157
164
174
176 text = _str('Søk på nettet')
177 wrong_head = _str('''
178 <head>
179 <meta http-equiv="Content-Type"
180 content="text/html; charset=UTF-8" />
181 </head>''')
182 html_latin1 = (_str('<html>%s<body><p>%s</p></body></html>') % (wrong_head,
183 text)
184 ).encode('iso-8859-1')
185
186 self.assertRaises(self.etree.ParseError,
187 self.etree.parse,
188 BytesIO(html_latin1))
189
190 tree = self.etree.parse(
191 BytesIO(html_latin1),
192 self.etree.HTMLParser(encoding="iso-8859-1"))
193 p = tree.find("//p")
194 self.assertEquals(p.text, text)
195
200
202
203 html = _bytes('<html><head><style>foo</style></head></html>')
204 element = self.etree.HTML(html)
205 self.assertEquals(element[0][0].text, "foo")
206
210
223
231
232
233
234
235
236
237
238
245
260
262 iterparse = self.etree.iterparse
263 f = BytesIO(
264 '<html><head><title>TITLE</title><body><p>P</p></body></html>')
265
266 iterator = iterparse(f, html=True)
267 self.assertEquals(None, iterator.root)
268
269 events = list(iterator)
270 root = iterator.root
271 self.assert_(root is not None)
272 self.assertEquals(
273 [('end', root[0][0]), ('end', root[0]), ('end', root[1][0]),
274 ('end', root[1]), ('end', root)],
275 events)
276
278 iterparse = self.etree.iterparse
279 iterator = iterparse(fileInTestDir("css_shakespear.html"),
280 html=True)
281
282 self.assertEquals(None, iterator.root)
283 events = list(iterator)
284 root = iterator.root
285 self.assert_(root is not None)
286 self.assertEquals(249, len(events))
287 self.assertEquals(
288 [],
289 [ event for (event, element) in events if event != 'end' ])
290
292 suite = unittest.TestSuite()
293 suite.addTests([unittest.makeSuite(HtmlParserTestCase)])
294 return suite
295
296 if __name__ == '__main__':
297 print('to test use test.py %s' % __file__)
298