1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48 import copy, etree
49 from urlparse import urljoin
50
51 try:
52 set
53 except NameError:
54 from sets import Set as set
55
56 XINCLUDE = "{http://www.w3.org/2001/XInclude}"
57
58 XINCLUDE_INCLUDE = XINCLUDE + "include"
59 XINCLUDE_FALLBACK = XINCLUDE + "fallback"
60
61
62
63
66
67
68
69
70
71
72
73
74
75
76
77
78
79
81 file = open(href)
82 if parse == "xml":
83 data = etree.parse(file).getroot()
84 else:
85 data = file.read()
86 if encoding:
87 data = data.decode(encoding)
88 file.close()
89 return data
90
91
92
93
94
96 if parse == "xml":
97 data = etree.parse(href, parser).getroot()
98 else:
99 data = open(href).read()
100 if encoding:
101 data = data.decode(encoding)
102 return data
103
104
105
106
108 def load(href, parse, encoding=None, parser=None):
109 return loader(href, parse, encoding)
110 return load
111
112
113
114
115
116
117
118
119
120
121
122
123
124
126 if hasattr(elem, 'getroot'):
127 tree = elem
128 elem = elem.getroot()
129 else:
130 tree = elem.getroottree()
131 if hasattr(tree, 'docinfo'):
132 base_url = tree.docinfo.URL
133 _include(elem, loader, base_url=base_url)
134
135 -def _include(elem, loader=None, _parent_hrefs=None, base_url=None):
136 if loader is not None:
137 load_include = _wrap_et_loader(loader)
138 else:
139 load_include = _lxml_default_loader
140
141 if _parent_hrefs is None:
142 _parent_hrefs = set()
143
144 parser = elem.getroottree().parser
145
146 include_elements = list(
147 elem.getiterator('{http://www.w3.org/2001/XInclude}*'))
148
149 for e in include_elements:
150 if e.tag == XINCLUDE_INCLUDE:
151
152 href = urljoin(base_url, e.get("href"))
153 parse = e.get("parse", "xml")
154 parent = e.getparent()
155 if parse == "xml":
156 if href in _parent_hrefs:
157 raise FatalIncludeError(
158 "recursive include of %r detected" % href
159 )
160 _parent_hrefs.add(href)
161 node = load_include(href, parse, parser=parser)
162 if node is None:
163 raise FatalIncludeError(
164 "cannot load %r as %r" % (href, parse)
165 )
166 node = _include(node, loader, _parent_hrefs)
167 if e.tail:
168 node.tail = (node.tail or "") + e.tail
169 if parent is None:
170 return node
171 parent.replace(e, node)
172 elif parse == "text":
173 text = load_include(href, parse, encoding=e.get("encoding"))
174 if text is None:
175 raise FatalIncludeError(
176 "cannot load %r as %r" % (href, parse)
177 )
178 predecessor = e.getprevious()
179 if predecessor is not None:
180 predecessor.tail = (predecessor.tail or "") + text
181 elif parent is None:
182 return text
183 else:
184 parent.text = (parent.text or "") + text + (e.tail or "")
185 parent.remove(e)
186 else:
187 raise FatalIncludeError(
188 "unknown parse type in xi:include tag (%r)" % parse
189 )
190 elif e.tag == XINCLUDE_FALLBACK:
191 parent = e.getparent()
192 if parent is not None and parent.tag != XINCLUDE_INCLUDE:
193 raise FatalIncludeError(
194 "xi:fallback tag must be child of xi:include (%r)" % e.tag
195 )
196 else:
197 raise FatalIncludeError(
198 "Invalid element found in XInclude namespace (%r)" % e.tag
199 )
200 return elem
201