1
2
3 """
4 HTML parser test cases for etree
5 """
6
7 import unittest
8 import tempfile, os, os.path, sys
9
10 this_dir = os.path.dirname(__file__)
11 if this_dir not in sys.path:
12 sys.path.insert(0, this_dir)
13
14 from common_imports import etree, StringIO, BytesIO, fileInTestDir, _bytes, _str
15 from common_imports import SillyFileLike, HelperTestCase
16
17 try:
18 unicode = __builtins__["unicode"]
19 except (NameError, KeyError):
20 unicode = str
21
23 """HTML parser test cases
24 """
25 etree = etree
26
27 html_str = _bytes("<html><head><title>test</title></head><body><h1>page title</h1></body></html>")
28 html_str_pretty = _bytes("""\
29 <html>
30 <head><title>test</title></head>
31 <body><h1>page title</h1></body>
32 </html>
33 """)
34 broken_html_str = _bytes("<html><head><title>test<body><h1>page title</h3></p></html>")
35 uhtml_str = _str("<html><head><title>test á\uF8D2</title></head><body><h1>page á\uF8D2 title</h1></body></html>")
36
40
45
50
55
62
64 parser = self.etree.HTMLParser()
65 Element = parser.makeelement
66
67 el = Element('name')
68 self.assertRaises(ValueError, Element, '{}')
69 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
70
71 self.assertRaises(ValueError, Element, '{test}')
72 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
73
87
89 parser = self.etree.HTMLParser()
90 Element = parser.makeelement
91
92 self.assertRaises(ValueError, Element, 'p"name')
93 self.assertRaises(ValueError, Element, "na'me")
94 self.assertRaises(ValueError, Element, '{test}"name')
95 self.assertRaises(ValueError, Element, "{test}name'")
96
97 el = Element('name')
98 self.assertRaises(ValueError, setattr, el, 'tag', "pname'")
99 self.assertRaises(ValueError, setattr, el, 'tag', '"pname')
100 self.assertEquals(el.tag, "name")
101
103 parser = self.etree.HTMLParser()
104 Element = parser.makeelement
105
106 self.assertRaises(ValueError, Element, ' name ')
107 self.assertRaises(ValueError, Element, 'na me')
108 self.assertRaises(ValueError, Element, '{test} name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
112 self.assertEquals(el.tag, "name")
113
123
135
137 parser = self.etree.HTMLParser()
138 Element = parser.makeelement
139 SubElement = self.etree.SubElement
140
141 el = Element('name')
142 self.assertRaises(ValueError, SubElement, el, "name'")
143 self.assertRaises(ValueError, SubElement, el, 'na"me')
144 self.assertRaises(ValueError, SubElement, el, "{test}na'me")
145 self.assertRaises(ValueError, SubElement, el, '{test}"name')
146
156
163
173
175 text = _str('Søk på nettet')
176 wrong_head = _str('''
177 <head>
178 <meta http-equiv="Content-Type"
179 content="text/html; charset=UTF-8" />
180 </head>''')
181 html_latin1 = (_str('<html>%s<body><p>%s</p></body></html>') % (wrong_head,
182 text)
183 ).encode('iso-8859-1')
184
185 self.assertRaises(self.etree.ParseError,
186 self.etree.parse,
187 BytesIO(html_latin1))
188
189 tree = self.etree.parse(
190 BytesIO(html_latin1),
191 self.etree.HTMLParser(encoding="iso-8859-1"))
192 p = tree.find("//p")
193 self.assertEquals(p.text, text)
194
199
201
202 html = _bytes('<html><head><style>foo</style></head></html>')
203 element = self.etree.HTML(html)
204 self.assertEquals(element[0][0].text, "foo")
205
209
221
228
229
230
231
232
233
234
235
242
257
259 iterparse = self.etree.iterparse
260 f = BytesIO(
261 '<html><head><title>TITLE</title><body><p>P</p></body></html>')
262
263 iterator = iterparse(f, html=True)
264 self.assertEquals(None, iterator.root)
265
266 events = list(iterator)
267 root = iterator.root
268 self.assert_(root is not None)
269 self.assertEquals(
270 [('end', root[0][0]), ('end', root[0]), ('end', root[1][0]),
271 ('end', root[1]), ('end', root)],
272 events)
273
275 iterparse = self.etree.iterparse
276 iterator = iterparse(fileInTestDir("css_shakespear.html"),
277 html=True)
278
279 self.assertEquals(None, iterator.root)
280 events = list(iterator)
281 root = iterator.root
282 self.assert_(root is not None)
283 self.assertEquals(249, len(events))
284 self.assertEquals(
285 [],
286 [ event for (event, element) in events if event != 'end' ])
287
289 suite = unittest.TestSuite()
290 suite.addTests([unittest.makeSuite(HtmlParserTestCase)])
291 return suite
292
293 if __name__ == '__main__':
294 print('to test use test.py %s' % __file__)
295