view orpg/xmltramp.py @ 15:cab94a90f9dc traipse_dev

Added 1.8.0 fix to log. Fixed problem with images that do not load the first time. Problem was in the fetching dict. del path instead.
author sirebral
date Tue, 21 Jul 2009 02:34:21 -0500
parents 4385a7d0efd1
children 551cd440acce
line wrap: on
line source

"""xmltramp: Make XML documents easily accessible."""

__version__ = "2.16"
__author__ = "Aaron Swartz"
__credits__ = "Many thanks to pjz, bitsko, and DanC."
__copyright__ = "(C) 2003 Aaron Swartz. GNU GPL 2."

if not hasattr(__builtins__, 'True'): True, False = 1, 0
def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u''))
def islst(f): return isinstance(f, type(())) or isinstance(f, type([]))

empty = {'http://www.w3.org/1999/xhtml': ['img', 'br', 'hr', 'meta', 'link', 'base', 'param', 'input', 'col', 'area']}

def quote(x, elt=True):
    if elt and '<' in x and len(x) > 24 and x.find(']]>') == -1: return "<![CDATA["+x+"]]>"
    else: x = x.replace('&', '&amp;').replace('<', '&lt;').replace(']]>', ']]&gt;')
    if not elt: x = x.replace('"', '&quot;')
    return x

class Element:
    def __init__(self, name, attrs=None, children=None, prefixes=None):
        if islst(name) and name[0] == None: name = name[1]
        if attrs:
            na = {}
            for k in attrs.keys():
                if islst(k) and k[0] == None: na[k[1]] = attrs[k]
                else: na[k] = attrs[k]
            attrs = na
        self._name = name
        self._attrs = attrs or {}
        self._dir = children or []
        prefixes = prefixes or {}
        self._prefixes = dict(zip(prefixes.values(), prefixes.keys()))
        if prefixes: self._dNS = prefixes.get(None, None)
        else: self._dNS = None

    def __repr__(self, recursive=0, multiline=0, inprefixes=None):
        def qname(name, inprefixes):
            if islst(name):
                if inprefixes[name[0]] is not None:
                    return inprefixes[name[0]]+':'+name[1]
                else:
                    return name[1]
            else:
                return name

        def arep(a, inprefixes, addns=1):
            out = ''
            for p in self._prefixes.keys():
                if not p in inprefixes.keys():
                    if addns: out += ' xmlns'
                    if addns and self._prefixes[p]: out += ':'+self._prefixes[p]
                    if addns: out += '="'+quote(p, False)+'"'
                    inprefixes[p] = self._prefixes[p]
            for k in a.keys():
                out += ' ' + qname(k, inprefixes)+ '="' + quote(a[k], False) + '"'
            return out
        inprefixes = inprefixes or {u'http://www.w3.org/XML/1998/namespace':'xml'}

        # need to call first to set inprefixes:
        attributes = arep(self._attrs, inprefixes, recursive)
        out = '<' + qname(self._name, inprefixes)  + attributes
        if not self._dir and (self._name[0] in empty.keys()
          and self._name[1] in empty[self._name[0]]):
            out += ' />'
            return out
        out += '>'
        if recursive:
            content = 0
            for x in self._dir:
                if isinstance(x, Element): content = 1
            pad = '\n' + ('\t' * recursive)
            for x in self._dir:
                if multiline and content: out +=  pad
                if isstr(x): out += quote(x)
                elif isinstance(x, Element):
                    out += x.__repr__(recursive+1, multiline, inprefixes.copy())
                else:
                    raise TypeError, "I wasn't expecting "+`x`+"."
            if multiline and content: out += '\n' + ('\t' * (recursive-1))
        else:
            if self._dir: out += '...'
        out += '</'+qname(self._name, inprefixes)+'>'
        return out

    def __unicode__(self):
        text = ''
        for x in self._dir:
            text += unicode(x)
        return ' '.join(text.split())

    def __str__(self):
        return self.__unicode__().encode('utf-8')

    def __getattr__(self, n):
        if n[0] == '_': raise AttributeError, "Use foo['"+n+"'] to access the child element."
        if self._dNS: n = (self._dNS, n)
        for x in self._dir:
            if isinstance(x, Element) and x._name == n: return x
        raise AttributeError, 'No child element named \''+n+"'"

    def __hasattr__(self, n):
        for x in self._dir:
            if isinstance(x, Element) and x._name == n: return True
        return False

    def __setattr__(self, n, v):
        if n[0] == '_': self.__dict__[n] = v
        else: self[n] = v

    def __getitem__(self, n):
        if isinstance(n, type(0)): # d[1] == d._dir[1]
            return self._dir[n]
        elif isinstance(n, slice(0).__class__):
            # numerical slices
            if isinstance(n.start, type(0)): return self._dir[n.start:n.stop]
            # d['foo':] == all <foo>s
            n = n.start
            if self._dNS and not islst(n): n = (self._dNS, n)
            out = []
            for x in self._dir:
                if isinstance(x, Element) and x._name == n: out.append(x)
            return out
        else: # d['foo'] == first <foo>
            if self._dNS and not islst(n): n = (self._dNS, n)
            for x in self._dir:
                if isinstance(x, Element) and x._name == n: return x
            raise KeyError

    def __setitem__(self, n, v):
        if isinstance(n, type(0)): # d[1]
            self._dir[n] = v
        elif isinstance(n, slice(0).__class__):
            # d['foo':] adds a new foo
            n = n.start
            if self._dNS and not islst(n): n = (self._dNS, n)
            nv = Element(n)
            self._dir.append(nv)

        else: # d["foo"] replaces first <foo> and dels rest
            if self._dNS and not islst(n): n = (self._dNS, n)
            nv = Element(n); nv._dir.append(v)
            replaced = False
            todel = []
            for i in range(len(self)):
                if self[i]._name == n:
                    if replaced:
                        todel.append(i)
                    else:
                        self[i] = nv
                        replaced = True
            if not replaced: self._dir.append(nv)
            for i in todel: del self[i]

    def __delitem__(self, n):
        if isinstance(n, type(0)): del self._dir[n]
        elif isinstance(n, slice(0).__class__):
            # delete all <foo>s
            n = n.start
            if self._dNS and not islst(n): n = (self._dNS, n)
            for i in range(len(self)):
                if self[i]._name == n: del self[i]
        else:
            # delete first foo
            for i in range(len(self)):
                if self[i]._name == n: del self[i]
                break

    def __call__(self, *_pos, **_set):
        if _set:
            for k in _set.keys(): self._attrs[k] = _set[k]
        if len(_pos) > 1:
            for i in range(0, len(_pos), 2):
                self._attrs[_pos[i]] = _pos[i+1]
        if len(_pos) == 1 is not None:
            return self._attrs[_pos[0]]
        if len(_pos) == 0:
            return self._attrs

    def __len__(self): return len(self._dir)

class Namespace:
    def __init__(self, uri): self.__uri = uri
    def __getattr__(self, n): return (self.__uri, n)
    def __getitem__(self, n): return (self.__uri, n)

from xml.sax.handler import EntityResolver, DTDHandler, ContentHandler, ErrorHandler

class Seeder(EntityResolver, DTDHandler, ContentHandler, ErrorHandler):
    def __init__(self):
        self.stack = []
        self.ch = ''
        self.prefixes = {}
        ContentHandler.__init__(self)

    def startPrefixMapping(self, prefix, uri):
        if not self.prefixes.has_key(prefix): self.prefixes[prefix] = []
        self.prefixes[prefix].append(uri)
    def endPrefixMapping(self, prefix):
        self.prefixes[prefix].pop()

    def startElementNS(self, name, qname, attrs):
        ch = self.ch; self.ch = ''
        if ch and not ch.isspace(): self.stack[-1]._dir.append(ch)
        attrs = dict(attrs)
        newprefixes = {}
        for k in self.prefixes.keys(): newprefixes[k] = self.prefixes[k][-1]
        self.stack.append(Element(name, attrs, prefixes=newprefixes.copy()))

    def characters(self, ch):
        self.ch += ch

    def endElementNS(self, name, qname):
        ch = self.ch; self.ch = ''
        if ch and not ch.isspace(): self.stack[-1]._dir.append(ch)
        element = self.stack.pop()
        if self.stack:
            self.stack[-1]._dir.append(element)
        else:
            self.result = element

from xml.sax import make_parser
from xml.sax.handler import feature_namespaces

def seed(fileobj):
    seeder = Seeder()
    parser = make_parser()
    parser.setFeature(feature_namespaces, 1)
    parser.setContentHandler(seeder)
    parser.parse(fileobj)
    return seeder.result

def parse(text):
    from StringIO import StringIO
    return seed(StringIO(text))

def load(url):
    import urllib
    return seed(urllib.urlopen(url))

def unittest():
    parse('<doc>a<baz>f<b>o</b>ob<b>a</b>r</baz>a</doc>').__repr__(1,1) == \
      '<doc>\n\ta<baz>\n\t\tf<b>o</b>ob<b>a</b>r\n\t</baz>a\n</doc>'
    assert str(parse("<doc />")) == ""
    assert str(parse("<doc>I <b>love</b> you.</doc>")) == "I love you."
    assert parse("<doc>\nmom\nwow\n</doc>")[0].strip() == "mom\nwow"
    assert str(parse('<bing>  <bang> <bong>center</bong> </bang>  </bing>')) == "center"
    assert str(parse('<doc>\xcf\x80</doc>')) == '\xcf\x80'
    d = Element('foo', attrs={'foo':'bar'}, children=['hit with a', Element('bar'), Element('bar')])

    try:
        d._doesnotexist
        raise "ExpectedError", "but found success. Damn."
    except AttributeError: pass
    assert d.bar._name == 'bar'
    try:
        d.doesnotexist
        raise "ExpectedError", "but found success. Damn."
    except AttributeError: pass
    assert hasattr(d, 'bar') == True
    assert d('foo') == 'bar'
    d(silly='yes')
    assert d('silly') == 'yes'
    assert d() == d._attrs
    assert d[0] == 'hit with a'
    d[0] = 'ice cream'
    assert d[0] == 'ice cream'
    del d[0]
    assert d[0]._name == "bar"
    assert len(d[:]) == len(d._dir)
    assert len(d[1:]) == len(d._dir) - 1
    assert len(d['bar':]) == 2
    d['bar':] = 'baz'
    assert len(d['bar':]) == 3
    assert d['bar']._name == 'bar'
    d = Element('foo')
    doc = Namespace("http://example.org/bar")
    bbc = Namespace("http://example.org/bbc")
    dc = Namespace("http://purl.org/dc/elements/1.1/")
    d = parse("""<doc version="2.7182818284590451"
      xmlns="http://example.org/bar"
      xmlns:dc="http://purl.org/dc/elements/1.1/"
      xmlns:bbc="http://example.org/bbc">
            <author>John Polk and John Palfrey</author>
            <dc:creator>John Polk</dc:creator>
            <dc:creator>John Palfrey</dc:creator>
            <bbc:show bbc:station="4">Buffy</bbc:show>
    </doc>""")
    assert repr(d) == '<doc version="2.7182818284590451">...</doc>'
    assert d.__repr__(1) == '<doc xmlns:bbc="http://example.org/bbc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://example.org/bar" version="2.7182818284590451"><author>John Polk and John Palfrey</author><dc:creator>John Polk</dc:creator><dc:creator>John Palfrey</dc:creator><bbc:show bbc:station="4">Buffy</bbc:show></doc>'
    assert d.__repr__(1,1) == '<doc xmlns:bbc="http://example.org/bbc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://example.org/bar" version="2.7182818284590451">\n\t<author>John Polk and John Palfrey</author>\n\t<dc:creator>John Polk</dc:creator>\n\t<dc:creator>John Palfrey</dc:creator>\n\t<bbc:show bbc:station="4">Buffy</bbc:show>\n</doc>'
    assert repr(parse("<doc xml:lang='en' />")) == '<doc xml:lang="en"></doc>'
    assert str(d.author) == str(d['author']) == "John Polk and John Palfrey"
    assert d.author._name == doc.author
    assert str(d[dc.creator]) == "John Polk"
    assert d[dc.creator]._name == dc.creator
    assert str(d[dc.creator:][1]) == "John Palfrey"
    d[dc.creator] = "Me!!!"
    assert str(d[dc.creator]) == "Me!!!"
    assert len(d[dc.creator:]) == 1
    d[dc.creator:] = "You!!!"
    assert len(d[dc.creator:]) == 2
    assert d[bbc.show](bbc.station) == "4"
    d[bbc.show](bbc.station, "5")
    assert d[bbc.show](bbc.station) == "5"
    e = Element('e')
    e.c = '<img src="foo">'
    assert e.__repr__(1) == '<e><c>&lt;img src="foo"></c></e>'
    e.c = '2 > 4'
    assert e.__repr__(1) == '<e><c>2 > 4</c></e>'
    e.c = 'CDATA sections are <em>closed</em> with ]]>.'
    assert e.__repr__(1) == '<e><c>CDATA sections are &lt;em>closed&lt;/em> with ]]&gt;.</c></e>'
    e.c = parse('<div xmlns="http://www.w3.org/1999/xhtml">i<br /><span></span>love<br />you</div>')
    assert e.__repr__(1) == '<e><c><div xmlns="http://www.w3.org/1999/xhtml">i<br /><span></span>love<br />you</div></c></e>'
    e = Element('e')
    e('c', 'that "sucks"')
    assert e.__repr__(1) == '<e c="that &quot;sucks&quot;"></e>'
    assert quote("]]>") == "]]&gt;"
    assert quote('< dkdkdsd dkd sksdksdfsd fsdfdsf]]> kfdfkg >') == '&lt; dkdkdsd dkd sksdksdfsd fsdfdsf]]&gt; kfdfkg >'
    assert parse('<x a="&lt;"></x>').__repr__(1) == '<x a="&lt;"></x>'
    assert parse('<a xmlns="http://a"><b xmlns="http://b"/></a>').__repr__(1) == '<a xmlns="http://a"><b xmlns="http://b"></b></a>'

if __name__ == '__main__': unittest()