1
2
3 """
4 Tests for thread usage in lxml.etree.
5 """
6
7 import re
8 import sys
9 import os.path
10 import unittest
11 import threading
12
13 this_dir = os.path.dirname(__file__)
14 if this_dir not in sys.path:
15 sys.path.insert(0, this_dir)
16
17 from common_imports import etree, HelperTestCase, BytesIO, _bytes
18
19 try:
20 from Queue import Queue
21 except ImportError:
22 from queue import Queue
23
24
26 """Threading tests"""
27 etree = etree
28
30 thread = threading.Thread(target=func)
31 thread.start()
32 thread.join()
33
35 sync = threading.Event()
36 lock = threading.Lock()
37 counter = dict(started=0, finished=0, failed=0)
38
39 def sync_start(func):
40 with lock:
41 started = counter['started'] + 1
42 counter['started'] = started
43 if started < count + (main_func is not None):
44 sync.wait(4)
45 assert sync.is_set()
46 sync.set()
47 try:
48 func()
49 except:
50 with lock:
51 counter['failed'] += 1
52 raise
53 else:
54 with lock:
55 counter['finished'] += 1
56
57 threads = [threading.Thread(target=sync_start, args=(func,)) for _ in range(count)]
58 for thread in threads:
59 thread.start()
60 if main_func is not None:
61 sync_start(main_func)
62 for thread in threads:
63 thread.join()
64
65 self.assertEqual(0, counter['failed'])
66 self.assertEqual(counter['finished'], counter['started'])
67
78
79 self._run_thread(run_thread)
80 self.assertEqual(xml, tostring(main_root))
81
83 XML = self.etree.XML
84 style = XML(_bytes('''\
85 <xsl:stylesheet version="1.0"
86 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
87 <xsl:template match="*">
88 <foo><xsl:copy><xsl:value-of select="/a/b/text()" /></xsl:copy></foo>
89 </xsl:template>
90 </xsl:stylesheet>'''))
91 st = etree.XSLT(style)
92
93 result = []
94
95 def run_thread():
96 root = XML(_bytes('<a><b>B</b><c>C</c></a>'))
97 result.append( st(root) )
98
99 self._run_thread(run_thread)
100 self.assertEqual('''\
101 <?xml version="1.0"?>
102 <foo><a>B</a></foo>
103 ''',
104 str(result[0]))
105
121
122 self._run_thread(run_thread)
123 self.assertEqual(_bytes('<a><b>B</b><c>C</c><foo><a>B</a></foo></a>'),
124 tostring(root))
125
127 style = self.parse('''\
128 <xsl:stylesheet version="1.0"
129 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
130 <xsl:template match="tag" />
131 <!-- extend time for parsing + transform -->
132 ''' + '\n'.join('<xsl:template match="tag%x" />' % i for i in range(200)) + '''
133 <xsl:foo />
134 </xsl:stylesheet>''')
135 self.assertRaises(etree.XSLTParseError,
136 etree.XSLT, style)
137
138 error_logs = []
139
140 def run_thread():
141 try:
142 etree.XSLT(style)
143 except etree.XSLTParseError as e:
144 error_logs.append(e.error_log)
145 else:
146 self.assertFalse(True, "XSLT parsing should have failed but didn't")
147
148 self._run_threads(16, run_thread)
149
150 self.assertEqual(16, len(error_logs))
151 last_log = None
152 for log in error_logs:
153 self.assertTrue(len(log))
154 if last_log is not None:
155 self.assertEqual(len(last_log), len(log))
156 self.assertEqual(4, len(log))
157 for error in log:
158 self.assertTrue(':ERROR:XSLT:' in str(error))
159 last_log = log
160
162 tree = self.parse('<tagFF/>')
163 style = self.parse('''\
164 <xsl:stylesheet version="1.0"
165 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
166 <xsl:template name="tag0">
167 <xsl:message terminate="yes">FAIL</xsl:message>
168 </xsl:template>
169 <!-- extend time for parsing + transform -->
170 ''' + '\n'.join('<xsl:template match="tag%X" name="tag%x"> <xsl:call-template name="tag%x" /> </xsl:template>' % (i, i, i-1)
171 for i in range(1, 256)) + '''
172 </xsl:stylesheet>''')
173 self.assertRaises(etree.XSLTApplyError,
174 etree.XSLT(style), tree)
175
176 error_logs = []
177
178 def run_thread():
179 transform = etree.XSLT(style)
180 try:
181 transform(tree)
182 except etree.XSLTApplyError:
183 error_logs.append(transform.error_log)
184 else:
185 self.assertFalse(True, "XSLT parsing should have failed but didn't")
186
187 self._run_threads(16, run_thread)
188
189 self.assertEqual(16, len(error_logs))
190 last_log = None
191 for log in error_logs:
192 self.assertTrue(len(log))
193 if last_log is not None:
194 self.assertEqual(len(last_log), len(log))
195 self.assertEqual(1, len(log))
196 for error in log:
197 self.assertTrue(':ERROR:XSLT:' in str(error))
198 last_log = log
199
201
202
203 XML = self.etree.XML
204 tostring = self.etree.tostring
205 style = self.etree.XSLT(XML(_bytes('''\
206 <xsl:stylesheet version="1.0"
207 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
208 <xsl:template match="*">
209 <root class="abc">
210 <xsl:copy-of select="@class" />
211 <xsl:attribute name="class">xyz</xsl:attribute>
212 </root>
213 </xsl:template>
214 </xsl:stylesheet>''')))
215
216 result = []
217 def run_thread():
218 root = XML(_bytes('<ROOT class="ABC" />'))
219 result.append( style(root).getroot() )
220
221 self._run_thread(run_thread)
222 self.assertEqual(_bytes('<root class="xyz"/>'),
223 tostring(result[0]))
224
226 XML = self.etree.XML
227 tostring = self.etree.tostring
228 root = XML(_bytes('<a><b>B</b><c>C</c></a>'))
229
230 stylesheets = []
231
232 def run_thread():
233 style = XML(_bytes('''\
234 <xsl:stylesheet
235 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
236 version="1.0">
237 <xsl:output method="xml" />
238 <xsl:template match="/">
239 <div id="test">
240 <xsl:apply-templates/>
241 </div>
242 </xsl:template>
243 </xsl:stylesheet>'''))
244 stylesheets.append( etree.XSLT(style) )
245
246 self._run_thread(run_thread)
247
248 st = stylesheets[0]
249 result = tostring( st(root) )
250
251 self.assertEqual(_bytes('<div id="test">BC</div>'),
252 result)
253
276
277 self.etree.clear_error_log()
278 threads = []
279 for thread_no in range(1, 10):
280 t = threading.Thread(target=parse_error_test,
281 args=(thread_no,))
282 threads.append(t)
283 t.start()
284
285 parse_error_test(0)
286
287 for t in threads:
288 t.join()
289
305
306 def run_parse():
307 thread_root = self.etree.parse(BytesIO(xml)).getroot()
308 result.append(thread_root[0])
309 result.append(thread_root[-1])
310
311 def run_move_main():
312 result.append(fragment[0])
313
314 def run_build():
315 result.append(
316 Element("{myns}foo", attrib={'{test}attr':'val'}))
317 SubElement(result, "{otherns}tasty")
318
319 def run_xslt():
320 style = XML(_bytes('''\
321 <xsl:stylesheet version="1.0"
322 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
323 <xsl:template match="*">
324 <xsl:copy><foo><xsl:value-of select="/a/b/text()" /></foo></xsl:copy>
325 </xsl:template>
326 </xsl:stylesheet>'''))
327 st = etree.XSLT(style)
328 result.append( st(root).getroot() )
329
330 for test in (run_XML, run_parse, run_move_main, run_xslt, run_build):
331 tostring(result)
332 self._run_thread(test)
333
334 self.assertEqual(
335 _bytes('<ns0:root xmlns:ns0="myns" att="someval"><b>B</b>'
336 '<c xmlns="test">C</c><b>B</b><c xmlns="test">C</c><tags/>'
337 '<a><foo>B</foo></a>'
338 '<ns0:foo xmlns:ns1="test" ns1:attr="val"/>'
339 '<ns1:tasty xmlns:ns1="otherns"/></ns0:root>'),
340 tostring(result))
341
342 def strip_first():
343 root = Element("newroot")
344 root.append(result[0])
345
346 while len(result):
347 self._run_thread(strip_first)
348
349 self.assertEqual(
350 _bytes('<ns0:root xmlns:ns0="myns" att="someval"/>'),
351 tostring(result))
352
354 SubElement = self.etree.SubElement
355 names = list('abcdefghijklmnop')
356 runs_per_name = range(50)
357 result_matches = re.compile(
358 br'<thread_root>'
359 br'(?:<[a-p]{5} thread_attr_[a-p]="value" thread_attr2_[a-p]="value2"\s?/>)+'
360 br'</thread_root>').match
361
362 def testrun():
363 for _ in range(3):
364 root = self.etree.Element('thread_root')
365 for name in names:
366 tag_name = name * 5
367 new = []
368 for _ in runs_per_name:
369 el = SubElement(root, tag_name, {'thread_attr_' + name: 'value'})
370 new.append(el)
371 for el in new:
372 el.set('thread_attr2_' + name, 'value2')
373 s = etree.tostring(root)
374 self.assertTrue(result_matches(s))
375
376
377 self._run_threads(10, testrun)
378
379
380 self._run_threads(10, testrun, main_func=testrun)
381
383 XML = self.etree.XML
384 root = XML(_bytes('<root><a>A</a><b xmlns="test">B</b><c/></root>'))
385 child_count = len(root)
386 def testrun():
387 for i in range(10000):
388 el = root[i%child_count]
389 del el
390 self._run_threads(10, testrun)
391
393 XML = self.etree.XML
394
395 class TestElement(etree.ElementBase):
396 pass
397
398 class MyLookup(etree.CustomElementClassLookup):
399 repeat = range(100)
400 def lookup(self, t, d, ns, name):
401 count = 0
402 for i in self.repeat:
403
404 count += 1
405 return TestElement
406
407 parser = self.etree.XMLParser()
408 parser.set_element_class_lookup(MyLookup())
409
410 root = XML(_bytes('<root><a>A</a><b xmlns="test">B</b><c/></root>'),
411 parser)
412
413 child_count = len(root)
414 def testrun():
415 for i in range(1000):
416 el = root[i%child_count]
417 del el
418 self._run_threads(10, testrun)
419
420
422 """Threading tests based on a thread worker pipeline.
423 """
424 etree = etree
425 item_count = 40
426
427 - class Worker(threading.Thread):
428 - def __init__(self, in_queue, in_count, **kwargs):
429 threading.Thread.__init__(self)
430 self.in_queue = in_queue
431 self.in_count = in_count
432 self.out_queue = Queue(in_count)
433 self.__dict__.update(kwargs)
434
436 get, put = self.in_queue.get, self.out_queue.put
437 handle = self.handle
438 for _ in range(self.in_count):
439 put(handle(get()))
440
442 raise NotImplementedError()
443
446 return _fromstring(xml)
447
454
459
464
470
475
479
480 xml = (b'''\
481 <!DOCTYPE threadtest [
482 <!ELEMENT threadtest (thread-tag1,thread-tag2)+>
483 <!ATTLIST threadtest
484 version CDATA "1.0"
485 >
486 <!ELEMENT thread-tag1 EMPTY>
487 <!ELEMENT thread-tag2 (div)>
488 <!ELEMENT div (threaded)>
489 <!ATTLIST div
490 huhu CDATA #IMPLIED
491 >
492 <!ELEMENT threaded EMPTY>
493 <!ATTLIST threaded
494 host CDATA #REQUIRED
495 >
496 ]>
497 <threadtest version="123">
498 ''' + (b'''
499 <thread-tag1 />
500 <thread-tag2>
501 <div huhu="true">
502 <threaded host="here" />
503 </div>
504 </thread-tag2>
505 ''') * 20 + b'''
506 </threadtest>''')
507
517
519 item_count = self.item_count
520 xml = self.xml.replace(b'thread', b'THREAD')
521
522
523 in_queue, start, last = self._build_pipeline(
524 item_count,
525 self.ParseWorker,
526 self.RotateWorker,
527 self.ReverseWorker,
528 self.ParseAndExtendWorker,
529 self.Validate,
530 self.ParseAndInjectWorker,
531 self.SerialiseWorker,
532 xml=xml)
533
534
535 put = start.in_queue.put
536 for _ in range(item_count):
537 put(xml)
538
539
540 start.start()
541
542 last.join(60)
543 self.assertEqual(item_count, last.out_queue.qsize())
544
545 get = last.out_queue.get
546 results = [get() for _ in range(item_count)]
547
548 comparison = results[0]
549 for i, result in enumerate(results[1:]):
550 self.assertEqual(comparison, result)
551
553 item_count = self.item_count
554 xml = self.xml.replace(b'thread', b'GLOBAL')
555 XML = self.etree.XML
556
557 in_queue, start, last = self._build_pipeline(
558 item_count,
559 self.RotateWorker,
560 self.ReverseWorker,
561 self.ParseAndExtendWorker,
562 self.Validate,
563 self.SerialiseWorker,
564 xml=xml)
565
566
567 put = start.in_queue.put
568 for _ in range(item_count):
569 put(XML(xml))
570
571
572 start.start()
573
574 last.join(60)
575 self.assertEqual(item_count, last.out_queue.qsize())
576
577 get = last.out_queue.get
578 results = [get() for _ in range(item_count)]
579
580 comparison = results[0]
581 for i, result in enumerate(results[1:]):
582 self.assertEqual(comparison, result)
583
584
590
591 if __name__ == '__main__':
592 print('to test use test.py %s' % __file__)
593