comparison orpg/xmltramp.py @ 0:4385a7d0efd1 grumpy-goblin

Deleted and repushed it with the 'grumpy-goblin' branch. I forgot a y
author sirebral
date Tue, 14 Jul 2009 16:41:58 -0500
parents
children 551cd440acce
comparison
equal deleted inserted replaced
-1:000000000000 0:4385a7d0efd1
1 """xmltramp: Make XML documents easily accessible."""
2
3 __version__ = "2.16"
4 __author__ = "Aaron Swartz"
5 __credits__ = "Many thanks to pjz, bitsko, and DanC."
6 __copyright__ = "(C) 2003 Aaron Swartz. GNU GPL 2."
7
8 if not hasattr(__builtins__, 'True'): True, False = 1, 0
9 def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u''))
10 def islst(f): return isinstance(f, type(())) or isinstance(f, type([]))
11
12 empty = {'http://www.w3.org/1999/xhtml': ['img', 'br', 'hr', 'meta', 'link', 'base', 'param', 'input', 'col', 'area']}
13
14 def quote(x, elt=True):
15 if elt and '<' in x and len(x) > 24 and x.find(']]>') == -1: return "<![CDATA["+x+"]]>"
16 else: x = x.replace('&', '&amp;').replace('<', '&lt;').replace(']]>', ']]&gt;')
17 if not elt: x = x.replace('"', '&quot;')
18 return x
19
20 class Element:
21 def __init__(self, name, attrs=None, children=None, prefixes=None):
22 if islst(name) and name[0] == None: name = name[1]
23 if attrs:
24 na = {}
25 for k in attrs.keys():
26 if islst(k) and k[0] == None: na[k[1]] = attrs[k]
27 else: na[k] = attrs[k]
28 attrs = na
29 self._name = name
30 self._attrs = attrs or {}
31 self._dir = children or []
32 prefixes = prefixes or {}
33 self._prefixes = dict(zip(prefixes.values(), prefixes.keys()))
34 if prefixes: self._dNS = prefixes.get(None, None)
35 else: self._dNS = None
36
37 def __repr__(self, recursive=0, multiline=0, inprefixes=None):
38 def qname(name, inprefixes):
39 if islst(name):
40 if inprefixes[name[0]] is not None:
41 return inprefixes[name[0]]+':'+name[1]
42 else:
43 return name[1]
44 else:
45 return name
46
47 def arep(a, inprefixes, addns=1):
48 out = ''
49 for p in self._prefixes.keys():
50 if not p in inprefixes.keys():
51 if addns: out += ' xmlns'
52 if addns and self._prefixes[p]: out += ':'+self._prefixes[p]
53 if addns: out += '="'+quote(p, False)+'"'
54 inprefixes[p] = self._prefixes[p]
55 for k in a.keys():
56 out += ' ' + qname(k, inprefixes)+ '="' + quote(a[k], False) + '"'
57 return out
58 inprefixes = inprefixes or {u'http://www.w3.org/XML/1998/namespace':'xml'}
59
60 # need to call first to set inprefixes:
61 attributes = arep(self._attrs, inprefixes, recursive)
62 out = '<' + qname(self._name, inprefixes) + attributes
63 if not self._dir and (self._name[0] in empty.keys()
64 and self._name[1] in empty[self._name[0]]):
65 out += ' />'
66 return out
67 out += '>'
68 if recursive:
69 content = 0
70 for x in self._dir:
71 if isinstance(x, Element): content = 1
72 pad = '\n' + ('\t' * recursive)
73 for x in self._dir:
74 if multiline and content: out += pad
75 if isstr(x): out += quote(x)
76 elif isinstance(x, Element):
77 out += x.__repr__(recursive+1, multiline, inprefixes.copy())
78 else:
79 raise TypeError, "I wasn't expecting "+`x`+"."
80 if multiline and content: out += '\n' + ('\t' * (recursive-1))
81 else:
82 if self._dir: out += '...'
83 out += '</'+qname(self._name, inprefixes)+'>'
84 return out
85
86 def __unicode__(self):
87 text = ''
88 for x in self._dir:
89 text += unicode(x)
90 return ' '.join(text.split())
91
92 def __str__(self):
93 return self.__unicode__().encode('utf-8')
94
95 def __getattr__(self, n):
96 if n[0] == '_': raise AttributeError, "Use foo['"+n+"'] to access the child element."
97 if self._dNS: n = (self._dNS, n)
98 for x in self._dir:
99 if isinstance(x, Element) and x._name == n: return x
100 raise AttributeError, 'No child element named \''+n+"'"
101
102 def __hasattr__(self, n):
103 for x in self._dir:
104 if isinstance(x, Element) and x._name == n: return True
105 return False
106
107 def __setattr__(self, n, v):
108 if n[0] == '_': self.__dict__[n] = v
109 else: self[n] = v
110
111 def __getitem__(self, n):
112 if isinstance(n, type(0)): # d[1] == d._dir[1]
113 return self._dir[n]
114 elif isinstance(n, slice(0).__class__):
115 # numerical slices
116 if isinstance(n.start, type(0)): return self._dir[n.start:n.stop]
117 # d['foo':] == all <foo>s
118 n = n.start
119 if self._dNS and not islst(n): n = (self._dNS, n)
120 out = []
121 for x in self._dir:
122 if isinstance(x, Element) and x._name == n: out.append(x)
123 return out
124 else: # d['foo'] == first <foo>
125 if self._dNS and not islst(n): n = (self._dNS, n)
126 for x in self._dir:
127 if isinstance(x, Element) and x._name == n: return x
128 raise KeyError
129
130 def __setitem__(self, n, v):
131 if isinstance(n, type(0)): # d[1]
132 self._dir[n] = v
133 elif isinstance(n, slice(0).__class__):
134 # d['foo':] adds a new foo
135 n = n.start
136 if self._dNS and not islst(n): n = (self._dNS, n)
137 nv = Element(n)
138 self._dir.append(nv)
139
140 else: # d["foo"] replaces first <foo> and dels rest
141 if self._dNS and not islst(n): n = (self._dNS, n)
142 nv = Element(n); nv._dir.append(v)
143 replaced = False
144 todel = []
145 for i in range(len(self)):
146 if self[i]._name == n:
147 if replaced:
148 todel.append(i)
149 else:
150 self[i] = nv
151 replaced = True
152 if not replaced: self._dir.append(nv)
153 for i in todel: del self[i]
154
155 def __delitem__(self, n):
156 if isinstance(n, type(0)): del self._dir[n]
157 elif isinstance(n, slice(0).__class__):
158 # delete all <foo>s
159 n = n.start
160 if self._dNS and not islst(n): n = (self._dNS, n)
161 for i in range(len(self)):
162 if self[i]._name == n: del self[i]
163 else:
164 # delete first foo
165 for i in range(len(self)):
166 if self[i]._name == n: del self[i]
167 break
168
169 def __call__(self, *_pos, **_set):
170 if _set:
171 for k in _set.keys(): self._attrs[k] = _set[k]
172 if len(_pos) > 1:
173 for i in range(0, len(_pos), 2):
174 self._attrs[_pos[i]] = _pos[i+1]
175 if len(_pos) == 1 is not None:
176 return self._attrs[_pos[0]]
177 if len(_pos) == 0:
178 return self._attrs
179
180 def __len__(self): return len(self._dir)
181
182 class Namespace:
183 def __init__(self, uri): self.__uri = uri
184 def __getattr__(self, n): return (self.__uri, n)
185 def __getitem__(self, n): return (self.__uri, n)
186
187 from xml.sax.handler import EntityResolver, DTDHandler, ContentHandler, ErrorHandler
188
189 class Seeder(EntityResolver, DTDHandler, ContentHandler, ErrorHandler):
190 def __init__(self):
191 self.stack = []
192 self.ch = ''
193 self.prefixes = {}
194 ContentHandler.__init__(self)
195
196 def startPrefixMapping(self, prefix, uri):
197 if not self.prefixes.has_key(prefix): self.prefixes[prefix] = []
198 self.prefixes[prefix].append(uri)
199 def endPrefixMapping(self, prefix):
200 self.prefixes[prefix].pop()
201
202 def startElementNS(self, name, qname, attrs):
203 ch = self.ch; self.ch = ''
204 if ch and not ch.isspace(): self.stack[-1]._dir.append(ch)
205 attrs = dict(attrs)
206 newprefixes = {}
207 for k in self.prefixes.keys(): newprefixes[k] = self.prefixes[k][-1]
208 self.stack.append(Element(name, attrs, prefixes=newprefixes.copy()))
209
210 def characters(self, ch):
211 self.ch += ch
212
213 def endElementNS(self, name, qname):
214 ch = self.ch; self.ch = ''
215 if ch and not ch.isspace(): self.stack[-1]._dir.append(ch)
216 element = self.stack.pop()
217 if self.stack:
218 self.stack[-1]._dir.append(element)
219 else:
220 self.result = element
221
222 from xml.sax import make_parser
223 from xml.sax.handler import feature_namespaces
224
225 def seed(fileobj):
226 seeder = Seeder()
227 parser = make_parser()
228 parser.setFeature(feature_namespaces, 1)
229 parser.setContentHandler(seeder)
230 parser.parse(fileobj)
231 return seeder.result
232
233 def parse(text):
234 from StringIO import StringIO
235 return seed(StringIO(text))
236
237 def load(url):
238 import urllib
239 return seed(urllib.urlopen(url))
240
241 def unittest():
242 parse('<doc>a<baz>f<b>o</b>ob<b>a</b>r</baz>a</doc>').__repr__(1,1) == \
243 '<doc>\n\ta<baz>\n\t\tf<b>o</b>ob<b>a</b>r\n\t</baz>a\n</doc>'
244 assert str(parse("<doc />")) == ""
245 assert str(parse("<doc>I <b>love</b> you.</doc>")) == "I love you."
246 assert parse("<doc>\nmom\nwow\n</doc>")[0].strip() == "mom\nwow"
247 assert str(parse('<bing> <bang> <bong>center</bong> </bang> </bing>')) == "center"
248 assert str(parse('<doc>\xcf\x80</doc>')) == '\xcf\x80'
249 d = Element('foo', attrs={'foo':'bar'}, children=['hit with a', Element('bar'), Element('bar')])
250
251 try:
252 d._doesnotexist
253 raise "ExpectedError", "but found success. Damn."
254 except AttributeError: pass
255 assert d.bar._name == 'bar'
256 try:
257 d.doesnotexist
258 raise "ExpectedError", "but found success. Damn."
259 except AttributeError: pass
260 assert hasattr(d, 'bar') == True
261 assert d('foo') == 'bar'
262 d(silly='yes')
263 assert d('silly') == 'yes'
264 assert d() == d._attrs
265 assert d[0] == 'hit with a'
266 d[0] = 'ice cream'
267 assert d[0] == 'ice cream'
268 del d[0]
269 assert d[0]._name == "bar"
270 assert len(d[:]) == len(d._dir)
271 assert len(d[1:]) == len(d._dir) - 1
272 assert len(d['bar':]) == 2
273 d['bar':] = 'baz'
274 assert len(d['bar':]) == 3
275 assert d['bar']._name == 'bar'
276 d = Element('foo')
277 doc = Namespace("http://example.org/bar")
278 bbc = Namespace("http://example.org/bbc")
279 dc = Namespace("http://purl.org/dc/elements/1.1/")
280 d = parse("""<doc version="2.7182818284590451"
281 xmlns="http://example.org/bar"
282 xmlns:dc="http://purl.org/dc/elements/1.1/"
283 xmlns:bbc="http://example.org/bbc">
284 <author>John Polk and John Palfrey</author>
285 <dc:creator>John Polk</dc:creator>
286 <dc:creator>John Palfrey</dc:creator>
287 <bbc:show bbc:station="4">Buffy</bbc:show>
288 </doc>""")
289 assert repr(d) == '<doc version="2.7182818284590451">...</doc>'
290 assert d.__repr__(1) == '<doc xmlns:bbc="http://example.org/bbc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://example.org/bar" version="2.7182818284590451"><author>John Polk and John Palfrey</author><dc:creator>John Polk</dc:creator><dc:creator>John Palfrey</dc:creator><bbc:show bbc:station="4">Buffy</bbc:show></doc>'
291 assert d.__repr__(1,1) == '<doc xmlns:bbc="http://example.org/bbc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://example.org/bar" version="2.7182818284590451">\n\t<author>John Polk and John Palfrey</author>\n\t<dc:creator>John Polk</dc:creator>\n\t<dc:creator>John Palfrey</dc:creator>\n\t<bbc:show bbc:station="4">Buffy</bbc:show>\n</doc>'
292 assert repr(parse("<doc xml:lang='en' />")) == '<doc xml:lang="en"></doc>'
293 assert str(d.author) == str(d['author']) == "John Polk and John Palfrey"
294 assert d.author._name == doc.author
295 assert str(d[dc.creator]) == "John Polk"
296 assert d[dc.creator]._name == dc.creator
297 assert str(d[dc.creator:][1]) == "John Palfrey"
298 d[dc.creator] = "Me!!!"
299 assert str(d[dc.creator]) == "Me!!!"
300 assert len(d[dc.creator:]) == 1
301 d[dc.creator:] = "You!!!"
302 assert len(d[dc.creator:]) == 2
303 assert d[bbc.show](bbc.station) == "4"
304 d[bbc.show](bbc.station, "5")
305 assert d[bbc.show](bbc.station) == "5"
306 e = Element('e')
307 e.c = '<img src="foo">'
308 assert e.__repr__(1) == '<e><c>&lt;img src="foo"></c></e>'
309 e.c = '2 > 4'
310 assert e.__repr__(1) == '<e><c>2 > 4</c></e>'
311 e.c = 'CDATA sections are <em>closed</em> with ]]>.'
312 assert e.__repr__(1) == '<e><c>CDATA sections are &lt;em>closed&lt;/em> with ]]&gt;.</c></e>'
313 e.c = parse('<div xmlns="http://www.w3.org/1999/xhtml">i<br /><span></span>love<br />you</div>')
314 assert e.__repr__(1) == '<e><c><div xmlns="http://www.w3.org/1999/xhtml">i<br /><span></span>love<br />you</div></c></e>'
315 e = Element('e')
316 e('c', 'that "sucks"')
317 assert e.__repr__(1) == '<e c="that &quot;sucks&quot;"></e>'
318 assert quote("]]>") == "]]&gt;"
319 assert quote('< dkdkdsd dkd sksdksdfsd fsdfdsf]]> kfdfkg >') == '&lt; dkdkdsd dkd sksdksdfsd fsdfdsf]]&gt; kfdfkg >'
320 assert parse('<x a="&lt;"></x>').__repr__(1) == '<x a="&lt;"></x>'
321 assert parse('<a xmlns="http://a"><b xmlns="http://b"/></a>').__repr__(1) == '<a xmlns="http://a"><b xmlns="http://b"></b></a>'
322
323 if __name__ == '__main__': unittest()