Mercurial > traipse_dev
view orpg/xmltramp.py @ 31:3769c8d6431e traipse_dev
And then I overwrite my edits. Mostly I cleaned up, only lost some small notes.
author | sirebral |
---|---|
date | Sun, 02 Aug 2009 01:00:33 -0500 |
parents | c88a794d9a22 |
children |
line wrap: on
line source
"""xmltramp: Make XML documents easily accessible.""" __version__ = "2.16" __author__ = "Aaron Swartz" __credits__ = "Many thanks to pjz, bitsko, and DanC." __copyright__ = "(C) 2003 Aaron Swartz. GNU GPL 2." if not hasattr(__builtins__, 'True'): True, False = 1, 0 def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u'')) def islst(f): return isinstance(f, type(())) or isinstance(f, type([])) empty = {'http://www.w3.org/1999/xhtml': ['img', 'br', 'hr', 'meta', 'link', 'base', 'param', 'input', 'col', 'area']} def quote(x, elt=True): if elt and '<' in x and len(x) > 24 and x.find(']]>') == -1: return "<![CDATA["+x+"]]>" else: x = x.replace('&', '&').replace('<', '<').replace(']]>', ']]>') if not elt: x = x.replace('"', '"') return x class Element: def __init__(self, name, attrs=None, children=None, prefixes=None): if islst(name) and name[0] == None: name = name[1] if attrs: na = {} for k in attrs.keys(): if islst(k) and k[0] == None: na[k[1]] = attrs[k] else: na[k] = attrs[k] attrs = na self._name = name self._attrs = attrs or {} self._dir = children or [] prefixes = prefixes or {} self._prefixes = dict(zip(prefixes.values(), prefixes.keys())) if prefixes: self._dNS = prefixes.get(None, None) else: self._dNS = None def __repr__(self, recursive=0, multiline=0, inprefixes=None): def qname(name, inprefixes): if islst(name): if inprefixes[name[0]] is not None: return inprefixes[name[0]]+':'+name[1] else: return name[1] else: return name def arep(a, inprefixes, addns=1): out = '' for p in self._prefixes.keys(): if not p in inprefixes.keys(): if addns: out += ' xmlns' if addns and self._prefixes[p]: out += ':'+self._prefixes[p] if addns: out += '="'+quote(p, False)+'"' inprefixes[p] = self._prefixes[p] for k in a.keys(): out += ' ' + qname(k, inprefixes)+ '="' + quote(a[k], False) + '"' return out inprefixes = inprefixes or {u'http://www.w3.org/XML/1998/namespace':'xml'} # need to call first to set inprefixes: attributes = arep(self._attrs, inprefixes, recursive) out = '<' + qname(self._name, inprefixes) + attributes if not self._dir and (self._name[0] in empty.keys() and self._name[1] in empty[self._name[0]]): out += ' />' return out out += '>' if recursive: content = 0 for x in self._dir: if isinstance(x, Element): content = 1 pad = '\n' + ('\t' * recursive) for x in self._dir: if multiline and content: out += pad if isstr(x): out += quote(x) elif isinstance(x, Element): out += x.__repr__(recursive+1, multiline, inprefixes.copy()) else: raise TypeError, "I wasn't expecting "+`x`+"." if multiline and content: out += '\n' + ('\t' * (recursive-1)) else: if self._dir: out += '...' out += '</'+qname(self._name, inprefixes)+'>' return out def __unicode__(self): text = '' for x in self._dir: text += unicode(x) return ' '.join(text.split()) def __str__(self): return self.__unicode__().encode('utf-8') def __getattr__(self, n): if n[0] == '_': raise AttributeError, "Use foo['"+n+"'] to access the child element." if self._dNS: n = (self._dNS, n) for x in self._dir: if isinstance(x, Element) and x._name == n: return x raise AttributeError, 'No child element named \''+n+"'" def __hasattr__(self, n): for x in self._dir: if isinstance(x, Element) and x._name == n: return True return False def __setattr__(self, n, v): if n[0] == '_': self.__dict__[n] = v else: self[n] = v def __getitem__(self, n): if isinstance(n, type(0)): # d[1] == d._dir[1] return self._dir[n] elif isinstance(n, slice(0).__class__): # numerical slices if isinstance(n.start, type(0)): return self._dir[n.start:n.stop] # d['foo':] == all <foo>s n = n.start if self._dNS and not islst(n): n = (self._dNS, n) out = [] for x in self._dir: if isinstance(x, Element) and x._name == n: out.append(x) return out else: # d['foo'] == first <foo> if self._dNS and not islst(n): n = (self._dNS, n) for x in self._dir: if isinstance(x, Element) and x._name == n: return x raise KeyError def __setitem__(self, n, v): if isinstance(n, type(0)): # d[1] self._dir[n] = v elif isinstance(n, slice(0).__class__): # d['foo':] adds a new foo n = n.start if self._dNS and not islst(n): n = (self._dNS, n) nv = Element(n) self._dir.append(nv) else: # d["foo"] replaces first <foo> and dels rest if self._dNS and not islst(n): n = (self._dNS, n) nv = Element(n); nv._dir.append(v) replaced = False todel = [] for i in range(len(self)): if self[i]._name == n: if replaced: todel.append(i) else: self[i] = nv replaced = True if not replaced: self._dir.append(nv) for i in todel: del self[i] def __delitem__(self, n): if isinstance(n, type(0)): del self._dir[n] elif isinstance(n, slice(0).__class__): # delete all <foo>s n = n.start if self._dNS and not islst(n): n = (self._dNS, n) for i in range(len(self)): if self[i]._name == n: del self[i] else: # delete first foo for i in range(len(self)): if self[i]._name == n: del self[i] break def __call__(self, *_pos, **_set): if _set: for k in _set.keys(): self._attrs[k] = _set[k] if len(_pos) > 1: for i in range(0, len(_pos), 2): self._attrs[_pos[i]] = _pos[i+1] if len(_pos) == 1 is not None: return self._attrs[_pos[0]] if len(_pos) == 0: return self._attrs def __len__(self): return len(self._dir) class Namespace: def __init__(self, uri): self.__uri = uri def __getattr__(self, n): return (self.__uri, n) def __getitem__(self, n): return (self.__uri, n) from xml.sax.handler import EntityResolver, DTDHandler, ContentHandler, ErrorHandler class Seeder(EntityResolver, DTDHandler, ContentHandler, ErrorHandler): def __init__(self): self.stack = [] self.ch = '' self.prefixes = {} ContentHandler.__init__(self) def startPrefixMapping(self, prefix, uri): if not self.prefixes.has_key(prefix): self.prefixes[prefix] = [] self.prefixes[prefix].append(uri) def endPrefixMapping(self, prefix): self.prefixes[prefix].pop() def startElementNS(self, name, qname, attrs): ch = self.ch; self.ch = '' if ch and not ch.isspace(): self.stack[-1]._dir.append(ch) attrs = dict(attrs) newprefixes = {} for k in self.prefixes.keys(): newprefixes[k] = self.prefixes[k][-1] self.stack.append(Element(name, attrs, prefixes=newprefixes.copy())) def characters(self, ch): self.ch += ch def endElementNS(self, name, qname): ch = self.ch; self.ch = '' if ch and not ch.isspace(): self.stack[-1]._dir.append(ch) element = self.stack.pop() if self.stack: self.stack[-1]._dir.append(element) else: self.result = element from xml.sax import make_parser from xml.sax.handler import feature_namespaces def seed(fileobj): seeder = Seeder() parser = make_parser() parser.setFeature(feature_namespaces, 1) parser.setContentHandler(seeder) parser.parse(fileobj) return seeder.result def parse(text): from StringIO import StringIO return seed(StringIO(text)) def load(url): import urllib return seed(urllib.urlopen(url)) def unittest(): parse('<doc>a<baz>f<b>o</b>ob<b>a</b>r</baz>a</doc>').__repr__(1,1) == \ '<doc>\n\ta<baz>\n\t\tf<b>o</b>ob<b>a</b>r\n\t</baz>a\n</doc>' assert str(parse("<doc />")) == "" assert str(parse("<doc>I <b>love</b> you.</doc>")) == "I love you." assert parse("<doc>\nmom\nwow\n</doc>")[0].strip() == "mom\nwow" assert str(parse('<bing> <bang> <bong>center</bong> </bang> </bing>')) == "center" assert str(parse('<doc>\xcf\x80</doc>')) == '\xcf\x80' d = Element('foo', attrs={'foo':'bar'}, children=['hit with a', Element('bar'), Element('bar')]) try: d._doesnotexist raise "ExpectedError", "but found success. Damn." except AttributeError: pass assert d.bar._name == 'bar' try: d.doesnotexist raise "ExpectedError", "but found success. Damn." except AttributeError: pass assert hasattr(d, 'bar') == True assert d('foo') == 'bar' d(silly='yes') assert d('silly') == 'yes' assert d() == d._attrs assert d[0] == 'hit with a' d[0] = 'ice cream' assert d[0] == 'ice cream' del d[0] assert d[0]._name == "bar" assert len(d[:]) == len(d._dir) assert len(d[1:]) == len(d._dir) - 1 assert len(d['bar':]) == 2 d['bar':] = 'baz' assert len(d['bar':]) == 3 assert d['bar']._name == 'bar' d = Element('foo') doc = Namespace("http://example.org/bar") bbc = Namespace("http://example.org/bbc") dc = Namespace("http://purl.org/dc/elements/1.1/") d = parse("""<doc version="2.7182818284590451" xmlns="http://example.org/bar" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:bbc="http://example.org/bbc"> <author>John Polk and John Palfrey</author> <dc:creator>John Polk</dc:creator> <dc:creator>John Palfrey</dc:creator> <bbc:show bbc:station="4">Buffy</bbc:show> </doc>""") assert repr(d) == '<doc version="2.7182818284590451">...</doc>' assert d.__repr__(1) == '<doc xmlns:bbc="http://example.org/bbc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://example.org/bar" version="2.7182818284590451"><author>John Polk and John Palfrey</author><dc:creator>John Polk</dc:creator><dc:creator>John Palfrey</dc:creator><bbc:show bbc:station="4">Buffy</bbc:show></doc>' assert d.__repr__(1,1) == '<doc xmlns:bbc="http://example.org/bbc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://example.org/bar" version="2.7182818284590451">\n\t<author>John Polk and John Palfrey</author>\n\t<dc:creator>John Polk</dc:creator>\n\t<dc:creator>John Palfrey</dc:creator>\n\t<bbc:show bbc:station="4">Buffy</bbc:show>\n</doc>' assert repr(parse("<doc xml:lang='en' />")) == '<doc xml:lang="en"></doc>' assert str(d.author) == str(d['author']) == "John Polk and John Palfrey" assert d.author._name == doc.author assert str(d[dc.creator]) == "John Polk" assert d[dc.creator]._name == dc.creator assert str(d[dc.creator:][1]) == "John Palfrey" d[dc.creator] = "Me!!!" assert str(d[dc.creator]) == "Me!!!" assert len(d[dc.creator:]) == 1 d[dc.creator:] = "You!!!" assert len(d[dc.creator:]) == 2 assert d[bbc.show](bbc.station) == "4" d[bbc.show](bbc.station, "5") assert d[bbc.show](bbc.station) == "5" e = Element('e') e.c = '<img src="foo">' assert e.__repr__(1) == '<e><c><img src="foo"></c></e>' e.c = '2 > 4' assert e.__repr__(1) == '<e><c>2 > 4</c></e>' e.c = 'CDATA sections are <em>closed</em> with ]]>.' assert e.__repr__(1) == '<e><c>CDATA sections are <em>closed</em> with ]]>.</c></e>' e.c = parse('<div xmlns="http://www.w3.org/1999/xhtml">i<br /><span></span>love<br />you</div>') assert e.__repr__(1) == '<e><c><div xmlns="http://www.w3.org/1999/xhtml">i<br /><span></span>love<br />you</div></c></e>' e = Element('e') e('c', 'that "sucks"') assert e.__repr__(1) == '<e c="that "sucks""></e>' assert quote("]]>") == "]]>" assert quote('< dkdkdsd dkd sksdksdfsd fsdfdsf]]> kfdfkg >') == '< dkdkdsd dkd sksdksdfsd fsdfdsf]]> kfdfkg >' assert parse('<x a="<"></x>').__repr__(1) == '<x a="<"></x>' assert parse('<a xmlns="http://a"><b xmlns="http://b"/></a>').__repr__(1) == '<a xmlns="http://a"><b xmlns="http://b"></b></a>' if __name__ == '__main__': unittest()