comparison orpg/minidom.py @ 0:4385a7d0efd1 grumpy-goblin

Deleted and repushed it with the 'grumpy-goblin' branch. I forgot a y
author sirebral
date Tue, 14 Jul 2009 16:41:58 -0500
parents
children 551cd440acce
comparison
equal deleted inserted replaced
-1:000000000000 0:4385a7d0efd1
1 """\
2 minidom.py -- a lightweight DOM implementation based on SAX.
3
4 parse( "foo.xml" )
5
6 parseString( "<foo><bar/></foo>" )
7
8 Todo:
9 =====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15 """
16
17 from orpg import pulldom
18 import string
19 from StringIO import StringIO
20 import types
21
22 class Node:
23 ELEMENT_NODE = 1
24 ATTRIBUTE_NODE = 2
25 TEXT_NODE = 3
26 CDATA_SECTION_NODE = 4
27 ENTITY_REFERENCE_NODE = 5
28 ENTITY_NODE = 6
29 PROCESSING_INSTRUCTION_NODE = 7
30 COMMENT_NODE = 8
31 DOCUMENT_NODE = 9
32 DOCUMENT_TYPE_NODE = 10
33 DOCUMENT_FRAGMENT_NODE = 11
34 NOTATION_NODE = 12
35 allnodes = {}
36 _debug = 0
37 _makeParentNodes = 1
38 debug = None
39
40 def __init__(self):
41 self.childNodes = []
42 self.ownerDocument = None
43 if Node._debug:
44 index = repr(id(self)) + repr(self.__class__)
45 Node.allnodes[index] = repr(self.__dict__)
46 if Node.debug is None:
47 Node.debug = StringIO()
48 #open( "debug4.out", "w" )
49 Node.debug.write("create %s\n" % index)
50
51 def __getattr__(self, key):
52 if key[0:2] == "__":
53 raise AttributeError
54 # getattr should never call getattr!
55 if self.__dict__.has_key("inGetAttr"):
56 del self.inGetAttr
57 raise AttributeError, key
58 prefix, attrname = key[:5], key[5:]
59 if prefix == "_get_":
60 self.inGetAttr = 1
61 if hasattr(self, attrname):
62 del self.inGetAttr
63 return (lambda self=self, attrname=attrname:
64 getattr(self, attrname))
65 else:
66 del self.inGetAttr
67 raise AttributeError, key
68 else:
69 self.inGetAttr = 1
70 try:
71 func = getattr(self, "_get_" + key)
72 except AttributeError:
73 raise AttributeError, key
74 del self.inGetAttr
75 return func()
76
77 def __nonzero__(self):
78 return 1
79
80 def toxml(self,pretty=0):
81 writer = StringIO()
82 self.writexml(writer,pretty)
83 return str(writer.getvalue())
84
85 def hasChildNodes(self):
86 if self.childNodes:
87 return 1
88 else:
89 return 0
90
91 def getChildren(self):
92 return self.childNodes
93
94 def _get_firstChild(self):
95 if self.hasChildNodes():
96 return self.childNodes[0]
97 else:
98 return None
99
100 def _get_lastChild(self):
101 return self.childNodes[-1]
102
103 def insertBefore(self, newChild, refChild):
104 if refChild == None:
105 return self.appendChild(newChild)
106 index = self.childNodes.index(refChild)
107 self.childNodes.insert(index, newChild)
108 if self._makeParentNodes:
109 newChild.parentNode = self
110 newChild.ownerDocument = self.ownerDocument
111 return newChild
112
113 def appendChild(self, node):
114 if self.childNodes:
115 last = self.lastChild
116 node.previousSibling = last
117 last.nextSibling = node
118 else:
119 node.previousSibling = None
120 node.nextSibling = None
121 node.ownerDocument = self.ownerDocument
122 node.parentNode = self
123 self.childNodes.append(node)
124 return node
125
126 def replaceChild(self, newChild, oldChild):
127 index = self.childNodes.index(oldChild)
128 self.childNodes[index] = newChild
129 newChild.ownerDocument = self.ownerDocument
130 return oldChild
131
132 def removeChild(self, oldChild):
133 index = self.childNodes.index(oldChild)
134 del self.childNodes[index]
135 return oldChild
136
137 def cloneNode(self, deep=0):
138 newNode = Node()
139 if deep:
140 self.deep_clone(newNode)
141 return newNode
142
143 def deep_clone(self, newNode):
144 for child in self.childNodes:
145 new_child = child.cloneNode(1)
146 newNode.appendChild(new_child)
147
148 def normalize(self):
149 """Join adjacent Text nodes and delete empty Text nodes
150 in the full depth of the sub-tree underneath this Node.
151 """
152 i = 0
153 while i < len(self.childNodes):
154 cn = self.childNodes[i]
155 if cn.nodeType == Node.TEXT_NODE:
156 i = i + 1
157 # join adjacent Text nodes
158 while i < len(self.childNodes) and self.childNodes[i].nodeType == Node.TEXT_NODE:
159 cn.nodeValue = cn.data = cn.data + self.childNodes[i].data
160 del self.childNodes[i]
161 # delete empty nodes
162 if cn.nodeValue.strip() == "":
163 i = i - 1
164 del self.childNodes[i]
165 continue
166 elif cn.nodeType == Node.ELEMENT_NODE:
167 cn.normalize()
168 i = i + 1
169
170 def unlink(self):
171 self.parentNode = None
172 while self.childNodes:
173 self.childNodes[-1].unlink()
174 del self.childNodes[-1] # probably not most efficient!
175 self.childNodes = None
176 self.previousSibling = None
177 self.nextSibling = None
178 if self.attributes:
179 for attr in self._attrs.values():
180 self.removeAttributeNode(attr)
181 assert not len(self._attrs)
182 assert not len(self._attrsNS)
183 if Node._debug:
184 index = repr(id(self)) + repr(self.__class__)
185 self.debug.write("Deleting: %s\n" % index)
186 del Node.allnodes[index]
187
188 def _write_data(writer, data):
189 "Writes datachars to writer."
190 data = string.replace(data, "&", "&amp;")
191 data = string.replace(data, "<", "&lt;")
192 data = string.replace(data, "\"", "&quot;")
193 data = string.replace(data, ">", "&gt;")
194 writer.write(data)
195
196 def _getElementsByTagNameHelper(parent, name, rc):
197 for node in parent.childNodes:
198 if node.nodeType == Node.ELEMENT_NODE and \
199 (name == "*" or node.tagName == name):
200 rc.append(node)
201 _getElementsByTagNameHelper(node, name, rc)
202 return rc
203
204 def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
205 for node in parent.childNodes:
206 if node.nodeType == Node.ELEMENT_NODE:
207 if ((localName == "*" or node.tagName == localName) and
208 (nsURI == "*" or node.namespaceURI == nsURI)):
209 rc.append(node)
210 _getElementsByTagNameNSHelper(node, name, rc)
211
212 class Attr(Node):
213 nodeType = Node.ATTRIBUTE_NODE
214
215 def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
216 # skip setattr for performance
217 self.__dict__["localName"] = localName or qName
218 self.__dict__["nodeName"] = self.__dict__["name"] = qName
219 self.__dict__["namespaceURI"] = namespaceURI
220 self.__dict__["prefix"] = prefix
221 self.attributes = None
222 Node.__init__(self)
223 # nodeValue and value are set elsewhere
224
225 def __setattr__(self, name, value):
226 if name in ("value", "nodeValue"):
227 self.__dict__["value"] = self.__dict__["nodeValue"] = value
228 else:
229 self.__dict__[name] = value
230
231 def cloneNode(self, deep=0):
232 newNode = Attr(self.__dict__["name"],self.__dict__["namespaceURI"],
233 self.__dict__["localName"],self.__dict__["prefix"])
234 newNode.__dict__["value"] = newNode.__dict__["nodeValue"] = self.value
235 if deep:
236 self.deep_clone(newNode)
237 return newNode
238
239 class AttributeList:
240 """the attribute list is a transient interface to the underlying
241 dictionaries. mutations here will change the underlying element's
242 dictionary"""
243 def __init__(self, attrs, attrsNS):
244 self._attrs = attrs
245 self._attrsNS = attrsNS
246 self.length = len(self._attrs.keys())
247
248 def copy(self):
249 clone = AttributeList(self._attrs.copy(),self._attrsNS.copy())
250 return clone
251
252 def item(self, index):
253 try:
254 return self[self.keys()[index]]
255 except IndexError:
256 return None
257
258 def items(self):
259 return map(lambda node: (node.tagName, node.value),
260 self._attrs.values())
261
262 def itemsNS(self):
263 return map(lambda node: ((node.URI, node.localName), node.value),
264 self._attrs.values())
265
266 def keys(self):
267 return self._attrs.keys()
268
269 def keysNS(self):
270 return self._attrsNS.keys()
271
272 def values(self):
273 return self._attrs.values()
274
275 def __len__(self):
276 return self.length
277
278 def __cmp__(self, other):
279 if self._attrs is getattr(other, "_attrs", None):
280 return 0
281 else:
282 return cmp(id(self), id(other))
283
284 #FIXME: is it appropriate to return .value?
285 def __getitem__(self, attname_or_tuple):
286 if type(attname_or_tuple) is types.TupleType:
287 return self._attrsNS[attname_or_tuple]
288 else:
289 return self._attrs[attname_or_tuple]
290
291 # same as set
292 def __setitem__(self, attname, value):
293 if type(value) is types.StringType:
294 node = Attr(attname)
295 node.value=value
296 else:
297 assert isinstance(value, Attr) or type(value) is types.StringType
298 node = value
299 old = self._attrs.get(attname, None)
300 if old:
301 old.unlink()
302 self._attrs[node.name] = node
303 self._attrsNS[(node.namespaceURI, node.localName)] = node
304
305 def __delitem__(self, attname_or_tuple):
306 node = self[attname_or_tuple]
307 node.unlink()
308 del self._attrs[node.name]
309 del self._attrsNS[(node.namespaceURI, node.localName)]
310
311 class Element(Node):
312 nodeType = Node.ELEMENT_NODE
313
314 def __init__(self, tagName, namespaceURI="", prefix="",
315 localName=None):
316 Node.__init__(self)
317 self.tagName = self.nodeName = tagName
318 self.localName = localName or tagName
319 self.prefix = prefix
320 self.namespaceURI = namespaceURI
321 self.nodeValue = None
322 self._attrs={} # attributes are double-indexed:
323 self._attrsNS={}# tagName -> Attribute
324 # URI,localName -> Attribute
325 # in the future: consider lazy generation of attribute objects
326 # this is too tricky for now because of headaches
327 # with namespaces.
328
329 def cloneNode(self, deep=0):
330 newNode = Element(self.tagName,self.namespaceURI,self.prefix,self.localName )
331 keys = self._attrs.keys()
332 for k in keys:
333 attr = self._attrs[k].cloneNode(1)
334 newNode.setAttributeNode(attr)
335 if deep:
336 self.deep_clone(newNode)
337 return newNode
338
339 def _get_tagName(self):
340 return str(self.tagName)
341
342 def getAttributeKeys(self):
343 result = []
344 if self._attrs:
345 return self._attrs.keys()
346 else:
347 return None
348
349 def getAttribute(self, attname):
350 if self.hasAttribute(attname):
351 return str(self._attrs[attname].value)
352 else:
353 return ""
354
355 def getAttributeNS(self, namespaceURI, localName):
356 return self._attrsNS[(namespaceURI, localName)].value
357
358 def setAttribute(self, attname, value):
359 attr = Attr(attname)
360 # for performance
361 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
362 self.setAttributeNode(attr)
363
364 def setAttributeNS(self, namespaceURI, qualifiedName, value):
365 prefix, localname = _nssplit(qualifiedName)
366 # for performance
367 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
368 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
369 self.setAttributeNode(attr)
370 # FIXME: return original node if something changed.
371
372 def getAttributeNode(self, attrname):
373 return self._attrs.get(attrname)
374
375 def getAttributeNodeNS(self, namespaceURI, localName):
376 return self._attrsNS[(namespaceURI, localName)]
377
378 def setAttributeNode(self, attr):
379 old = self._attrs.get(attr.name, None)
380 if old:
381 old.unlink()
382 self._attrs[attr.name] = attr
383 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
384 # FIXME: return old value if something changed
385
386 def removeAttribute(self, name):
387 attr = self._attrs[name]
388 self.removeAttributeNode(attr)
389
390 def removeAttributeNS(self, namespaceURI, localName):
391 attr = self._attrsNS[(namespaceURI, localName)]
392 self.removeAttributeNode(attr)
393
394 def removeAttributeNode(self, node):
395 node.unlink()
396 del self._attrs[node.name]
397 del self._attrsNS[(node.namespaceURI, node.localName)]
398
399 def hasAttribute(self, name):
400 return self._attrs.has_key(name)
401
402 def hasAttributeNS(self, namespaceURI, localName):
403 return self._attrsNS.has_key((namespaceURI, localName))
404
405 def getElementsByTagName(self, name):
406 return _getElementsByTagNameHelper(self, name, [])
407
408 def getElementsByTagNameNS(self, namespaceURI, localName):
409 _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
410
411 def __repr__(self):
412 return "<DOM Element: %s at %s>" % (self.tagName, id(self))
413
414 # undocumented
415 def writexml(self, writer, tabs=0):
416 tab_str = ""
417 if tabs:
418 tab_str = "\n" + (" "*(tabs-1))
419 tabs += 1
420 writer.write(tab_str + "<" + self.tagName)
421 a_names = self._get_attributes().keys()
422 a_names.sort()
423
424 for a_name in a_names:
425 writer.write(" %s=\"" % a_name)
426 _write_data(writer, self._get_attributes()[a_name].value)
427 writer.write("\"")
428 if self.childNodes:
429 writer.write(">")
430 for node in self.childNodes:
431 node.writexml(writer,tabs)
432 if self.childNodes[0].nodeType == Node.TEXT_NODE:
433 tab_str = ""
434 writer.write(tab_str + "</%s>" % self.tagName)
435 else:
436 writer.write("/>")
437
438 def _get_attributes(self):
439 return AttributeList(self._attrs, self._attrsNS)
440
441 class Comment(Node):
442 nodeType = Node.COMMENT_NODE
443
444 def __init__(self, data):
445 Node.__init__(self)
446 self.data = self.nodeValue = data
447 self.nodeName = "#comment"
448 self.attributes = None
449
450 def writexml(self, writer, tabs=0):
451 writer.write("<!--%s-->" % self.data)
452
453 def cloneNode(self, deep=0):
454 newNode = Comment(self.data)
455 if deep:
456 self.deep_clone(newNode)
457 return newNode
458
459 class ProcessingInstruction(Node):
460 nodeType = Node.PROCESSING_INSTRUCTION_NODE
461
462 def __init__(self, target, data):
463 Node.__init__(self)
464 self.target = self.nodeName = target
465 self.data = self.nodeValue = data
466 self.attributes = None
467
468 def writexml(self, writer, tabs=0):
469 writer.write("<?%s %s?>" % (self.target, self.data))
470
471 def cloneNode(self, deep=0):
472 newNode = ProcessingInstruction(self.target, self.data)
473 if deep:
474 self.deep_clone(newNode)
475 return newNode
476
477 class Text(Node):
478 nodeType = Node.TEXT_NODE
479 nodeName = "#text"
480
481 def __init__(self, data):
482 Node.__init__(self)
483 self.data = self.nodeValue = data
484 self.attributes = None
485
486 def __repr__(self):
487 if len(self.data) > 10:
488 dotdotdot = "..."
489 else:
490 dotdotdot = ""
491 return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
492
493 def writexml(self, writer, tabs=0):
494 _write_data(writer, self.data)
495
496 def _get_nodeValue(self):
497 return str(self.nodeValue)
498
499 def _set_nodeValue(self,data):
500 self.data = self.nodeValue = data
501
502 def cloneNode(self, deep=0):
503 newNode = Text(self.data)
504 if deep:
505 self.deep_clone(newNode)
506 return newNode
507
508 def _nssplit(qualifiedName):
509 fields = string.split(qualifiedName,':', 1)
510 if len(fields) == 2:
511 return fields
512 elif len(fields) == 1:
513 return ('', fields[0])
514
515 class Document(Node):
516 nodeType = Node.DOCUMENT_NODE
517 documentElement = None
518
519 def __init__(self):
520 Node.__init__(self)
521 self.attributes = None
522 self.nodeName = "#document"
523 self.nodeValue = None
524 self.ownerDocument = self
525
526 def appendChild(self, node):
527 if node.nodeType == Node.ELEMENT_NODE:
528 if self.documentElement:
529 raise TypeError, "Two document elements disallowed"
530 else:
531 self.documentElement = node
532 Node.appendChild(self, node)
533 return node
534 createElement = Element
535 createTextNode = Text
536 createComment = Comment
537 createProcessingInstruction = ProcessingInstruction
538 createAttribute = Attr
539
540 def createElementNS(self, namespaceURI, qualifiedName):
541 prefix,localName = _nssplit(qualifiedName)
542 return Element(qualifiedName, namespaceURI, prefix, localName)
543
544 def createAttributeNS(self, namespaceURI, qualifiedName):
545 prefix,localName = _nssplit(qualifiedName)
546 return Attr(qualifiedName, namespaceURI, localName, prefix)
547
548 def getElementsByTagNameNS(self, namespaceURI, localName):
549 _getElementsByTagNameNSHelper(self, namespaceURI, localName)
550
551 def unlink(self):
552 self.documentElement = None
553 Node.unlink(self)
554
555 def getElementsByTagName(self, name):
556 rc = []
557 _getElementsByTagNameHelper(self, name, rc)
558 return rc
559
560 def writexml(self, writer):
561 for node in self.childNodes:
562 node.writexml(writer)
563
564 def _doparse(func, args, kwargs):
565 events = apply(func, args, kwargs)
566 toktype, rootNode = events.getEvent()
567 events.expandNode(rootNode)
568 return rootNode
569
570 def parse(*args, **kwargs):
571 "Parse a file into a DOM by filename or file object"
572 return _doparse(pulldom.parse, args, kwargs)
573
574 def parseString(*args, **kwargs):
575 "Parse a file into a DOM from a string"
576 return _doparse(pulldom.parseString, args, kwargs)