Mercurial > traipse_dev
comparison orpg/xmltramp.py @ 0:4385a7d0efd1 grumpy-goblin
Deleted and repushed it with the 'grumpy-goblin' branch. I forgot a y
author | sirebral |
---|---|
date | Tue, 14 Jul 2009 16:41:58 -0500 |
parents | |
children | 551cd440acce |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4385a7d0efd1 |
---|---|
1 """xmltramp: Make XML documents easily accessible.""" | |
2 | |
3 __version__ = "2.16" | |
4 __author__ = "Aaron Swartz" | |
5 __credits__ = "Many thanks to pjz, bitsko, and DanC." | |
6 __copyright__ = "(C) 2003 Aaron Swartz. GNU GPL 2." | |
7 | |
8 if not hasattr(__builtins__, 'True'): True, False = 1, 0 | |
9 def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u'')) | |
10 def islst(f): return isinstance(f, type(())) or isinstance(f, type([])) | |
11 | |
12 empty = {'http://www.w3.org/1999/xhtml': ['img', 'br', 'hr', 'meta', 'link', 'base', 'param', 'input', 'col', 'area']} | |
13 | |
14 def quote(x, elt=True): | |
15 if elt and '<' in x and len(x) > 24 and x.find(']]>') == -1: return "<![CDATA["+x+"]]>" | |
16 else: x = x.replace('&', '&').replace('<', '<').replace(']]>', ']]>') | |
17 if not elt: x = x.replace('"', '"') | |
18 return x | |
19 | |
20 class Element: | |
21 def __init__(self, name, attrs=None, children=None, prefixes=None): | |
22 if islst(name) and name[0] == None: name = name[1] | |
23 if attrs: | |
24 na = {} | |
25 for k in attrs.keys(): | |
26 if islst(k) and k[0] == None: na[k[1]] = attrs[k] | |
27 else: na[k] = attrs[k] | |
28 attrs = na | |
29 self._name = name | |
30 self._attrs = attrs or {} | |
31 self._dir = children or [] | |
32 prefixes = prefixes or {} | |
33 self._prefixes = dict(zip(prefixes.values(), prefixes.keys())) | |
34 if prefixes: self._dNS = prefixes.get(None, None) | |
35 else: self._dNS = None | |
36 | |
37 def __repr__(self, recursive=0, multiline=0, inprefixes=None): | |
38 def qname(name, inprefixes): | |
39 if islst(name): | |
40 if inprefixes[name[0]] is not None: | |
41 return inprefixes[name[0]]+':'+name[1] | |
42 else: | |
43 return name[1] | |
44 else: | |
45 return name | |
46 | |
47 def arep(a, inprefixes, addns=1): | |
48 out = '' | |
49 for p in self._prefixes.keys(): | |
50 if not p in inprefixes.keys(): | |
51 if addns: out += ' xmlns' | |
52 if addns and self._prefixes[p]: out += ':'+self._prefixes[p] | |
53 if addns: out += '="'+quote(p, False)+'"' | |
54 inprefixes[p] = self._prefixes[p] | |
55 for k in a.keys(): | |
56 out += ' ' + qname(k, inprefixes)+ '="' + quote(a[k], False) + '"' | |
57 return out | |
58 inprefixes = inprefixes or {u'http://www.w3.org/XML/1998/namespace':'xml'} | |
59 | |
60 # need to call first to set inprefixes: | |
61 attributes = arep(self._attrs, inprefixes, recursive) | |
62 out = '<' + qname(self._name, inprefixes) + attributes | |
63 if not self._dir and (self._name[0] in empty.keys() | |
64 and self._name[1] in empty[self._name[0]]): | |
65 out += ' />' | |
66 return out | |
67 out += '>' | |
68 if recursive: | |
69 content = 0 | |
70 for x in self._dir: | |
71 if isinstance(x, Element): content = 1 | |
72 pad = '\n' + ('\t' * recursive) | |
73 for x in self._dir: | |
74 if multiline and content: out += pad | |
75 if isstr(x): out += quote(x) | |
76 elif isinstance(x, Element): | |
77 out += x.__repr__(recursive+1, multiline, inprefixes.copy()) | |
78 else: | |
79 raise TypeError, "I wasn't expecting "+`x`+"." | |
80 if multiline and content: out += '\n' + ('\t' * (recursive-1)) | |
81 else: | |
82 if self._dir: out += '...' | |
83 out += '</'+qname(self._name, inprefixes)+'>' | |
84 return out | |
85 | |
86 def __unicode__(self): | |
87 text = '' | |
88 for x in self._dir: | |
89 text += unicode(x) | |
90 return ' '.join(text.split()) | |
91 | |
92 def __str__(self): | |
93 return self.__unicode__().encode('utf-8') | |
94 | |
95 def __getattr__(self, n): | |
96 if n[0] == '_': raise AttributeError, "Use foo['"+n+"'] to access the child element." | |
97 if self._dNS: n = (self._dNS, n) | |
98 for x in self._dir: | |
99 if isinstance(x, Element) and x._name == n: return x | |
100 raise AttributeError, 'No child element named \''+n+"'" | |
101 | |
102 def __hasattr__(self, n): | |
103 for x in self._dir: | |
104 if isinstance(x, Element) and x._name == n: return True | |
105 return False | |
106 | |
107 def __setattr__(self, n, v): | |
108 if n[0] == '_': self.__dict__[n] = v | |
109 else: self[n] = v | |
110 | |
111 def __getitem__(self, n): | |
112 if isinstance(n, type(0)): # d[1] == d._dir[1] | |
113 return self._dir[n] | |
114 elif isinstance(n, slice(0).__class__): | |
115 # numerical slices | |
116 if isinstance(n.start, type(0)): return self._dir[n.start:n.stop] | |
117 # d['foo':] == all <foo>s | |
118 n = n.start | |
119 if self._dNS and not islst(n): n = (self._dNS, n) | |
120 out = [] | |
121 for x in self._dir: | |
122 if isinstance(x, Element) and x._name == n: out.append(x) | |
123 return out | |
124 else: # d['foo'] == first <foo> | |
125 if self._dNS and not islst(n): n = (self._dNS, n) | |
126 for x in self._dir: | |
127 if isinstance(x, Element) and x._name == n: return x | |
128 raise KeyError | |
129 | |
130 def __setitem__(self, n, v): | |
131 if isinstance(n, type(0)): # d[1] | |
132 self._dir[n] = v | |
133 elif isinstance(n, slice(0).__class__): | |
134 # d['foo':] adds a new foo | |
135 n = n.start | |
136 if self._dNS and not islst(n): n = (self._dNS, n) | |
137 nv = Element(n) | |
138 self._dir.append(nv) | |
139 | |
140 else: # d["foo"] replaces first <foo> and dels rest | |
141 if self._dNS and not islst(n): n = (self._dNS, n) | |
142 nv = Element(n); nv._dir.append(v) | |
143 replaced = False | |
144 todel = [] | |
145 for i in range(len(self)): | |
146 if self[i]._name == n: | |
147 if replaced: | |
148 todel.append(i) | |
149 else: | |
150 self[i] = nv | |
151 replaced = True | |
152 if not replaced: self._dir.append(nv) | |
153 for i in todel: del self[i] | |
154 | |
155 def __delitem__(self, n): | |
156 if isinstance(n, type(0)): del self._dir[n] | |
157 elif isinstance(n, slice(0).__class__): | |
158 # delete all <foo>s | |
159 n = n.start | |
160 if self._dNS and not islst(n): n = (self._dNS, n) | |
161 for i in range(len(self)): | |
162 if self[i]._name == n: del self[i] | |
163 else: | |
164 # delete first foo | |
165 for i in range(len(self)): | |
166 if self[i]._name == n: del self[i] | |
167 break | |
168 | |
169 def __call__(self, *_pos, **_set): | |
170 if _set: | |
171 for k in _set.keys(): self._attrs[k] = _set[k] | |
172 if len(_pos) > 1: | |
173 for i in range(0, len(_pos), 2): | |
174 self._attrs[_pos[i]] = _pos[i+1] | |
175 if len(_pos) == 1 is not None: | |
176 return self._attrs[_pos[0]] | |
177 if len(_pos) == 0: | |
178 return self._attrs | |
179 | |
180 def __len__(self): return len(self._dir) | |
181 | |
182 class Namespace: | |
183 def __init__(self, uri): self.__uri = uri | |
184 def __getattr__(self, n): return (self.__uri, n) | |
185 def __getitem__(self, n): return (self.__uri, n) | |
186 | |
187 from xml.sax.handler import EntityResolver, DTDHandler, ContentHandler, ErrorHandler | |
188 | |
189 class Seeder(EntityResolver, DTDHandler, ContentHandler, ErrorHandler): | |
190 def __init__(self): | |
191 self.stack = [] | |
192 self.ch = '' | |
193 self.prefixes = {} | |
194 ContentHandler.__init__(self) | |
195 | |
196 def startPrefixMapping(self, prefix, uri): | |
197 if not self.prefixes.has_key(prefix): self.prefixes[prefix] = [] | |
198 self.prefixes[prefix].append(uri) | |
199 def endPrefixMapping(self, prefix): | |
200 self.prefixes[prefix].pop() | |
201 | |
202 def startElementNS(self, name, qname, attrs): | |
203 ch = self.ch; self.ch = '' | |
204 if ch and not ch.isspace(): self.stack[-1]._dir.append(ch) | |
205 attrs = dict(attrs) | |
206 newprefixes = {} | |
207 for k in self.prefixes.keys(): newprefixes[k] = self.prefixes[k][-1] | |
208 self.stack.append(Element(name, attrs, prefixes=newprefixes.copy())) | |
209 | |
210 def characters(self, ch): | |
211 self.ch += ch | |
212 | |
213 def endElementNS(self, name, qname): | |
214 ch = self.ch; self.ch = '' | |
215 if ch and not ch.isspace(): self.stack[-1]._dir.append(ch) | |
216 element = self.stack.pop() | |
217 if self.stack: | |
218 self.stack[-1]._dir.append(element) | |
219 else: | |
220 self.result = element | |
221 | |
222 from xml.sax import make_parser | |
223 from xml.sax.handler import feature_namespaces | |
224 | |
225 def seed(fileobj): | |
226 seeder = Seeder() | |
227 parser = make_parser() | |
228 parser.setFeature(feature_namespaces, 1) | |
229 parser.setContentHandler(seeder) | |
230 parser.parse(fileobj) | |
231 return seeder.result | |
232 | |
233 def parse(text): | |
234 from StringIO import StringIO | |
235 return seed(StringIO(text)) | |
236 | |
237 def load(url): | |
238 import urllib | |
239 return seed(urllib.urlopen(url)) | |
240 | |
241 def unittest(): | |
242 parse('<doc>a<baz>f<b>o</b>ob<b>a</b>r</baz>a</doc>').__repr__(1,1) == \ | |
243 '<doc>\n\ta<baz>\n\t\tf<b>o</b>ob<b>a</b>r\n\t</baz>a\n</doc>' | |
244 assert str(parse("<doc />")) == "" | |
245 assert str(parse("<doc>I <b>love</b> you.</doc>")) == "I love you." | |
246 assert parse("<doc>\nmom\nwow\n</doc>")[0].strip() == "mom\nwow" | |
247 assert str(parse('<bing> <bang> <bong>center</bong> </bang> </bing>')) == "center" | |
248 assert str(parse('<doc>\xcf\x80</doc>')) == '\xcf\x80' | |
249 d = Element('foo', attrs={'foo':'bar'}, children=['hit with a', Element('bar'), Element('bar')]) | |
250 | |
251 try: | |
252 d._doesnotexist | |
253 raise "ExpectedError", "but found success. Damn." | |
254 except AttributeError: pass | |
255 assert d.bar._name == 'bar' | |
256 try: | |
257 d.doesnotexist | |
258 raise "ExpectedError", "but found success. Damn." | |
259 except AttributeError: pass | |
260 assert hasattr(d, 'bar') == True | |
261 assert d('foo') == 'bar' | |
262 d(silly='yes') | |
263 assert d('silly') == 'yes' | |
264 assert d() == d._attrs | |
265 assert d[0] == 'hit with a' | |
266 d[0] = 'ice cream' | |
267 assert d[0] == 'ice cream' | |
268 del d[0] | |
269 assert d[0]._name == "bar" | |
270 assert len(d[:]) == len(d._dir) | |
271 assert len(d[1:]) == len(d._dir) - 1 | |
272 assert len(d['bar':]) == 2 | |
273 d['bar':] = 'baz' | |
274 assert len(d['bar':]) == 3 | |
275 assert d['bar']._name == 'bar' | |
276 d = Element('foo') | |
277 doc = Namespace("http://example.org/bar") | |
278 bbc = Namespace("http://example.org/bbc") | |
279 dc = Namespace("http://purl.org/dc/elements/1.1/") | |
280 d = parse("""<doc version="2.7182818284590451" | |
281 xmlns="http://example.org/bar" | |
282 xmlns:dc="http://purl.org/dc/elements/1.1/" | |
283 xmlns:bbc="http://example.org/bbc"> | |
284 <author>John Polk and John Palfrey</author> | |
285 <dc:creator>John Polk</dc:creator> | |
286 <dc:creator>John Palfrey</dc:creator> | |
287 <bbc:show bbc:station="4">Buffy</bbc:show> | |
288 </doc>""") | |
289 assert repr(d) == '<doc version="2.7182818284590451">...</doc>' | |
290 assert d.__repr__(1) == '<doc xmlns:bbc="http://example.org/bbc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://example.org/bar" version="2.7182818284590451"><author>John Polk and John Palfrey</author><dc:creator>John Polk</dc:creator><dc:creator>John Palfrey</dc:creator><bbc:show bbc:station="4">Buffy</bbc:show></doc>' | |
291 assert d.__repr__(1,1) == '<doc xmlns:bbc="http://example.org/bbc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://example.org/bar" version="2.7182818284590451">\n\t<author>John Polk and John Palfrey</author>\n\t<dc:creator>John Polk</dc:creator>\n\t<dc:creator>John Palfrey</dc:creator>\n\t<bbc:show bbc:station="4">Buffy</bbc:show>\n</doc>' | |
292 assert repr(parse("<doc xml:lang='en' />")) == '<doc xml:lang="en"></doc>' | |
293 assert str(d.author) == str(d['author']) == "John Polk and John Palfrey" | |
294 assert d.author._name == doc.author | |
295 assert str(d[dc.creator]) == "John Polk" | |
296 assert d[dc.creator]._name == dc.creator | |
297 assert str(d[dc.creator:][1]) == "John Palfrey" | |
298 d[dc.creator] = "Me!!!" | |
299 assert str(d[dc.creator]) == "Me!!!" | |
300 assert len(d[dc.creator:]) == 1 | |
301 d[dc.creator:] = "You!!!" | |
302 assert len(d[dc.creator:]) == 2 | |
303 assert d[bbc.show](bbc.station) == "4" | |
304 d[bbc.show](bbc.station, "5") | |
305 assert d[bbc.show](bbc.station) == "5" | |
306 e = Element('e') | |
307 e.c = '<img src="foo">' | |
308 assert e.__repr__(1) == '<e><c><img src="foo"></c></e>' | |
309 e.c = '2 > 4' | |
310 assert e.__repr__(1) == '<e><c>2 > 4</c></e>' | |
311 e.c = 'CDATA sections are <em>closed</em> with ]]>.' | |
312 assert e.__repr__(1) == '<e><c>CDATA sections are <em>closed</em> with ]]>.</c></e>' | |
313 e.c = parse('<div xmlns="http://www.w3.org/1999/xhtml">i<br /><span></span>love<br />you</div>') | |
314 assert e.__repr__(1) == '<e><c><div xmlns="http://www.w3.org/1999/xhtml">i<br /><span></span>love<br />you</div></c></e>' | |
315 e = Element('e') | |
316 e('c', 'that "sucks"') | |
317 assert e.__repr__(1) == '<e c="that "sucks""></e>' | |
318 assert quote("]]>") == "]]>" | |
319 assert quote('< dkdkdsd dkd sksdksdfsd fsdfdsf]]> kfdfkg >') == '< dkdkdsd dkd sksdksdfsd fsdfdsf]]> kfdfkg >' | |
320 assert parse('<x a="<"></x>').__repr__(1) == '<x a="<"></x>' | |
321 assert parse('<a xmlns="http://a"><b xmlns="http://b"/></a>').__repr__(1) == '<a xmlns="http://a"><b xmlns="http://b"></b></a>' | |
322 | |
323 if __name__ == '__main__': unittest() |