view orpg/minidom.py @ 5:5a2c95067daf grumpy-goblin

This update causes the software to remove approot.py and approot.pyc These files will be created the next time you run but if they remain they may call an old directory.
author sirebral
date Wed, 15 Jul 2009 03:59:50 -0500
parents 4385a7d0efd1
children 551cd440acce
line wrap: on
line source

"""\
minidom.py -- a lightweight DOM implementation based on SAX.

parse( "foo.xml" )

parseString( "<foo><bar/></foo>" )

Todo:
=====
 * convenience methods for getting elements and text.
 * more testing
 * bring some of the writer and linearizer code into conformance with this
        interface
 * SAX 2 namespaces
"""

from orpg import pulldom
import string
from StringIO import StringIO
import types

class Node:
    ELEMENT_NODE                = 1
    ATTRIBUTE_NODE              = 2
    TEXT_NODE                   = 3
    CDATA_SECTION_NODE          = 4
    ENTITY_REFERENCE_NODE       = 5
    ENTITY_NODE                 = 6
    PROCESSING_INSTRUCTION_NODE = 7
    COMMENT_NODE                = 8
    DOCUMENT_NODE               = 9
    DOCUMENT_TYPE_NODE          = 10
    DOCUMENT_FRAGMENT_NODE      = 11
    NOTATION_NODE               = 12
    allnodes = {}
    _debug = 0
    _makeParentNodes = 1
    debug = None

    def __init__(self):
        self.childNodes = []
        self.ownerDocument = None
        if Node._debug:
            index = repr(id(self)) + repr(self.__class__)
            Node.allnodes[index] = repr(self.__dict__)
            if Node.debug is None:
                Node.debug = StringIO()
                #open( "debug4.out", "w" )
            Node.debug.write("create %s\n" % index)

    def __getattr__(self, key):
        if key[0:2] == "__":
            raise AttributeError
        # getattr should never call getattr!
        if self.__dict__.has_key("inGetAttr"):
            del self.inGetAttr
            raise AttributeError, key
        prefix, attrname = key[:5], key[5:]
        if prefix == "_get_":
            self.inGetAttr = 1
            if hasattr(self, attrname):
                del self.inGetAttr
                return (lambda self=self, attrname=attrname:
                                getattr(self, attrname))
            else:
                del self.inGetAttr
                raise AttributeError, key
        else:
            self.inGetAttr = 1
            try:
                func = getattr(self, "_get_" + key)
            except AttributeError:
                raise AttributeError, key
            del self.inGetAttr
            return func()

    def __nonzero__(self):
        return 1

    def toxml(self,pretty=0):
        writer = StringIO()
        self.writexml(writer,pretty)
        return str(writer.getvalue())

    def hasChildNodes(self):
        if self.childNodes:
            return 1
        else:
            return 0

    def getChildren(self):
        return self.childNodes

    def _get_firstChild(self):
        if self.hasChildNodes():
            return self.childNodes[0]
        else:
            return None

    def _get_lastChild(self):
        return self.childNodes[-1]

    def insertBefore(self, newChild, refChild):
        if refChild == None:
            return self.appendChild(newChild)
        index = self.childNodes.index(refChild)
        self.childNodes.insert(index, newChild)
        if self._makeParentNodes:
            newChild.parentNode = self
            newChild.ownerDocument = self.ownerDocument
        return newChild

    def appendChild(self, node):
        if self.childNodes:
            last = self.lastChild
            node.previousSibling = last
            last.nextSibling = node
        else:
            node.previousSibling = None
        node.nextSibling = None
        node.ownerDocument = self.ownerDocument
        node.parentNode = self
        self.childNodes.append(node)
        return node

    def replaceChild(self, newChild, oldChild):
        index = self.childNodes.index(oldChild)
        self.childNodes[index] = newChild
        newChild.ownerDocument = self.ownerDocument
        return oldChild

    def removeChild(self, oldChild):
        index = self.childNodes.index(oldChild)
        del self.childNodes[index]
        return oldChild

    def cloneNode(self, deep=0):
        newNode = Node()
        if deep:
            self.deep_clone(newNode)
        return newNode

    def deep_clone(self, newNode):
        for child in self.childNodes:
            new_child = child.cloneNode(1)
            newNode.appendChild(new_child)

    def normalize(self):
        """Join adjacent Text nodes and delete empty Text nodes
        in the full depth of the sub-tree underneath this Node.
        """
        i = 0
        while i < len(self.childNodes):
            cn = self.childNodes[i]
            if cn.nodeType == Node.TEXT_NODE:
                i = i + 1
                # join adjacent Text nodes
                while i < len(self.childNodes) and self.childNodes[i].nodeType == Node.TEXT_NODE:
                    cn.nodeValue = cn.data = cn.data + self.childNodes[i].data
                    del self.childNodes[i]
                # delete empty nodes
                if cn.nodeValue.strip() == "":
                    i = i - 1
                    del self.childNodes[i]
                continue
            elif cn.nodeType == Node.ELEMENT_NODE:
                cn.normalize()
            i = i + 1

    def unlink(self):
        self.parentNode = None
        while self.childNodes:
            self.childNodes[-1].unlink()
            del self.childNodes[-1] # probably not most efficient!
        self.childNodes = None
        self.previousSibling = None
        self.nextSibling = None
        if self.attributes:
            for attr in self._attrs.values():
                self.removeAttributeNode(attr)
            assert not len(self._attrs)
            assert not len(self._attrsNS)
        if Node._debug:
            index = repr(id(self)) + repr(self.__class__)
            self.debug.write("Deleting: %s\n" % index)
            del Node.allnodes[index]

def _write_data(writer, data):
    "Writes datachars to writer."
    data = string.replace(data, "&", "&amp;")
    data = string.replace(data, "<", "&lt;")
    data = string.replace(data, "\"", "&quot;")
    data = string.replace(data, ">", "&gt;")
    writer.write(data)

def _getElementsByTagNameHelper(parent, name, rc):
    for node in parent.childNodes:
        if node.nodeType == Node.ELEMENT_NODE and \
            (name == "*" or node.tagName == name):
            rc.append(node)
        _getElementsByTagNameHelper(node, name, rc)
    return rc

def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
    for node in parent.childNodes:
        if node.nodeType == Node.ELEMENT_NODE:
            if ((localName == "*" or node.tagName == localName) and
                (nsURI == "*" or node.namespaceURI == nsURI)):
                rc.append(node)
            _getElementsByTagNameNSHelper(node, name, rc)

class Attr(Node):
    nodeType = Node.ATTRIBUTE_NODE

    def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
        # skip setattr for performance
        self.__dict__["localName"] = localName or qName
        self.__dict__["nodeName"] = self.__dict__["name"] = qName
        self.__dict__["namespaceURI"] = namespaceURI
        self.__dict__["prefix"] = prefix
        self.attributes = None
        Node.__init__(self)
        # nodeValue and value are set elsewhere

    def __setattr__(self, name, value):
        if name in ("value", "nodeValue"):
            self.__dict__["value"] = self.__dict__["nodeValue"] = value
        else:
            self.__dict__[name] = value

    def cloneNode(self, deep=0):
        newNode = Attr(self.__dict__["name"],self.__dict__["namespaceURI"],
                        self.__dict__["localName"],self.__dict__["prefix"])
        newNode.__dict__["value"] = newNode.__dict__["nodeValue"] = self.value
        if deep:
            self.deep_clone(newNode)
        return newNode

class AttributeList:
    """the attribute list is a transient interface to the underlying
    dictionaries.  mutations here will change the underlying element's
    dictionary"""
    def __init__(self, attrs, attrsNS):
        self._attrs = attrs
        self._attrsNS = attrsNS
        self.length = len(self._attrs.keys())

    def copy(self):
        clone = AttributeList(self._attrs.copy(),self._attrsNS.copy())
        return clone

    def item(self, index):
        try:
            return self[self.keys()[index]]
        except IndexError:
            return None

    def items(self):
        return map(lambda node: (node.tagName, node.value),
                   self._attrs.values())

    def itemsNS(self):
        return map(lambda node: ((node.URI, node.localName), node.value),
                   self._attrs.values())

    def keys(self):
        return self._attrs.keys()

    def keysNS(self):
        return self._attrsNS.keys()

    def values(self):
        return self._attrs.values()

    def __len__(self):
        return self.length

    def __cmp__(self, other):
        if self._attrs is getattr(other, "_attrs", None):
            return 0
        else:
            return cmp(id(self), id(other))

    #FIXME: is it appropriate to return .value?
    def __getitem__(self, attname_or_tuple):
        if type(attname_or_tuple) is types.TupleType:
            return self._attrsNS[attname_or_tuple]
        else:
            return self._attrs[attname_or_tuple]

    # same as set
    def __setitem__(self, attname, value):
        if type(value) is types.StringType:
            node = Attr(attname)
            node.value=value
        else:
            assert isinstance(value, Attr) or type(value) is types.StringType
            node = value
        old = self._attrs.get(attname, None)
        if old:
            old.unlink()
        self._attrs[node.name] = node
        self._attrsNS[(node.namespaceURI, node.localName)] = node

    def __delitem__(self, attname_or_tuple):
        node = self[attname_or_tuple]
        node.unlink()
        del self._attrs[node.name]
        del self._attrsNS[(node.namespaceURI, node.localName)]

class Element(Node):
    nodeType = Node.ELEMENT_NODE

    def __init__(self, tagName, namespaceURI="", prefix="",
                 localName=None):
        Node.__init__(self)
        self.tagName = self.nodeName = tagName
        self.localName = localName or tagName
        self.prefix = prefix
        self.namespaceURI = namespaceURI
        self.nodeValue = None
        self._attrs={}  # attributes are double-indexed:
        self._attrsNS={}#    tagName -> Attribute
                #    URI,localName -> Attribute
                # in the future: consider lazy generation of attribute objects
                #                this is too tricky for now because of headaches
                #                with namespaces.

    def cloneNode(self, deep=0):
        newNode = Element(self.tagName,self.namespaceURI,self.prefix,self.localName )
        keys = self._attrs.keys()
        for k in keys:
            attr = self._attrs[k].cloneNode(1)
            newNode.setAttributeNode(attr)
        if deep:
            self.deep_clone(newNode)
        return newNode

    def _get_tagName(self):
        return str(self.tagName)

    def getAttributeKeys(self):
        result = []
        if self._attrs:
            return self._attrs.keys()
        else:
            return None

    def getAttribute(self, attname):
        if self.hasAttribute(attname):
            return str(self._attrs[attname].value)
        else:
            return ""

    def getAttributeNS(self, namespaceURI, localName):
        return self._attrsNS[(namespaceURI, localName)].value

    def setAttribute(self, attname, value):
        attr = Attr(attname)
        # for performance
        attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
        self.setAttributeNode(attr)

    def setAttributeNS(self, namespaceURI, qualifiedName, value):
        prefix, localname = _nssplit(qualifiedName)
        # for performance
        attr = Attr(qualifiedName, namespaceURI, localname, prefix)
        attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
        self.setAttributeNode(attr)
        # FIXME: return original node if something changed.

    def getAttributeNode(self, attrname):
        return self._attrs.get(attrname)

    def getAttributeNodeNS(self, namespaceURI, localName):
        return self._attrsNS[(namespaceURI, localName)]

    def setAttributeNode(self, attr):
        old = self._attrs.get(attr.name, None)
        if old:
            old.unlink()
        self._attrs[attr.name] = attr
        self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
        # FIXME: return old value if something changed

    def removeAttribute(self, name):
        attr = self._attrs[name]
        self.removeAttributeNode(attr)

    def removeAttributeNS(self, namespaceURI, localName):
        attr = self._attrsNS[(namespaceURI, localName)]
        self.removeAttributeNode(attr)

    def removeAttributeNode(self, node):
        node.unlink()
        del self._attrs[node.name]
        del self._attrsNS[(node.namespaceURI, node.localName)]

    def hasAttribute(self, name):
        return self._attrs.has_key(name)

    def hasAttributeNS(self, namespaceURI, localName):
        return self._attrsNS.has_key((namespaceURI, localName))

    def getElementsByTagName(self, name):
        return _getElementsByTagNameHelper(self, name, [])

    def getElementsByTagNameNS(self, namespaceURI, localName):
        _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])

    def __repr__(self):
        return "<DOM Element: %s at %s>" % (self.tagName, id(self))

    # undocumented
    def writexml(self, writer, tabs=0):
        tab_str = ""
        if tabs:
            tab_str = "\n" + ("  "*(tabs-1))
            tabs += 1
        writer.write(tab_str + "<" + self.tagName)
        a_names = self._get_attributes().keys()
        a_names.sort()

        for a_name in a_names:
            writer.write(" %s=\"" % a_name)
            _write_data(writer, self._get_attributes()[a_name].value)
            writer.write("\"")
        if self.childNodes:
            writer.write(">")
            for node in self.childNodes:
                node.writexml(writer,tabs)
            if self.childNodes[0].nodeType == Node.TEXT_NODE:
                tab_str = ""
            writer.write(tab_str + "</%s>" % self.tagName)
        else:
            writer.write("/>")

    def _get_attributes(self):
        return AttributeList(self._attrs, self._attrsNS)

class Comment(Node):
    nodeType = Node.COMMENT_NODE

    def __init__(self, data):
        Node.__init__(self)
        self.data = self.nodeValue = data
        self.nodeName = "#comment"
        self.attributes = None

    def writexml(self, writer, tabs=0):
        writer.write("<!--%s-->" % self.data)

    def cloneNode(self, deep=0):
        newNode = Comment(self.data)
        if deep:
            self.deep_clone(newNode)
        return newNode

class ProcessingInstruction(Node):
    nodeType = Node.PROCESSING_INSTRUCTION_NODE

    def __init__(self, target, data):
        Node.__init__(self)
        self.target = self.nodeName = target
        self.data = self.nodeValue = data
        self.attributes = None

    def writexml(self, writer, tabs=0):
        writer.write("<?%s %s?>" % (self.target, self.data))

    def cloneNode(self, deep=0):
        newNode = ProcessingInstruction(self.target, self.data)
        if deep:
            self.deep_clone(newNode)
        return newNode

class Text(Node):
    nodeType = Node.TEXT_NODE
    nodeName = "#text"

    def __init__(self, data):
        Node.__init__(self)
        self.data = self.nodeValue = data
        self.attributes = None

    def __repr__(self):
        if len(self.data) > 10:
            dotdotdot = "..."
        else:
            dotdotdot = ""
        return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)

    def writexml(self, writer, tabs=0):
        _write_data(writer, self.data)

    def _get_nodeValue(self):
        return str(self.nodeValue)

    def _set_nodeValue(self,data):
        self.data = self.nodeValue = data

    def cloneNode(self, deep=0):
        newNode = Text(self.data)
        if deep:
            self.deep_clone(newNode)
        return newNode

def _nssplit(qualifiedName):
    fields = string.split(qualifiedName,':', 1)
    if len(fields) == 2:
        return fields
    elif len(fields) == 1:
        return ('', fields[0])

class Document(Node):
    nodeType = Node.DOCUMENT_NODE
    documentElement = None

    def __init__(self):
        Node.__init__(self)
        self.attributes = None
        self.nodeName = "#document"
        self.nodeValue = None
        self.ownerDocument = self

    def appendChild(self, node):
        if node.nodeType == Node.ELEMENT_NODE:
            if self.documentElement:
                raise TypeError, "Two document elements disallowed"
            else:
                self.documentElement = node
        Node.appendChild(self, node)
        return node
    createElement = Element
    createTextNode = Text
    createComment = Comment
    createProcessingInstruction = ProcessingInstruction
    createAttribute = Attr

    def createElementNS(self, namespaceURI, qualifiedName):
        prefix,localName = _nssplit(qualifiedName)
        return Element(qualifiedName, namespaceURI, prefix, localName)

    def createAttributeNS(self, namespaceURI, qualifiedName):
        prefix,localName = _nssplit(qualifiedName)
        return Attr(qualifiedName, namespaceURI, localName, prefix)

    def getElementsByTagNameNS(self, namespaceURI, localName):
        _getElementsByTagNameNSHelper(self, namespaceURI, localName)

    def unlink(self):
        self.documentElement = None
        Node.unlink(self)

    def getElementsByTagName(self, name):
        rc = []
        _getElementsByTagNameHelper(self, name, rc)
        return rc

    def writexml(self, writer):
        for node in self.childNodes:
            node.writexml(writer)

def _doparse(func, args, kwargs):
    events = apply(func, args, kwargs)
    toktype, rootNode = events.getEvent()
    events.expandNode(rootNode)
    return rootNode

def parse(*args, **kwargs):
    "Parse a file into a DOM by filename or file object"
    return _doparse(pulldom.parse, args, kwargs)

def parseString(*args, **kwargs):
    "Parse a file into a DOM from a string"
    return _doparse(pulldom.parseString, args, kwargs)