Mercurial > traipse_dev
comparison orpg/minidom.py @ 0:4385a7d0efd1 grumpy-goblin
Deleted and repushed it with the 'grumpy-goblin' branch. I forgot a y
author | sirebral |
---|---|
date | Tue, 14 Jul 2009 16:41:58 -0500 |
parents | |
children | 551cd440acce |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4385a7d0efd1 |
---|---|
1 """\ | |
2 minidom.py -- a lightweight DOM implementation based on SAX. | |
3 | |
4 parse( "foo.xml" ) | |
5 | |
6 parseString( "<foo><bar/></foo>" ) | |
7 | |
8 Todo: | |
9 ===== | |
10 * convenience methods for getting elements and text. | |
11 * more testing | |
12 * bring some of the writer and linearizer code into conformance with this | |
13 interface | |
14 * SAX 2 namespaces | |
15 """ | |
16 | |
17 from orpg import pulldom | |
18 import string | |
19 from StringIO import StringIO | |
20 import types | |
21 | |
22 class Node: | |
23 ELEMENT_NODE = 1 | |
24 ATTRIBUTE_NODE = 2 | |
25 TEXT_NODE = 3 | |
26 CDATA_SECTION_NODE = 4 | |
27 ENTITY_REFERENCE_NODE = 5 | |
28 ENTITY_NODE = 6 | |
29 PROCESSING_INSTRUCTION_NODE = 7 | |
30 COMMENT_NODE = 8 | |
31 DOCUMENT_NODE = 9 | |
32 DOCUMENT_TYPE_NODE = 10 | |
33 DOCUMENT_FRAGMENT_NODE = 11 | |
34 NOTATION_NODE = 12 | |
35 allnodes = {} | |
36 _debug = 0 | |
37 _makeParentNodes = 1 | |
38 debug = None | |
39 | |
40 def __init__(self): | |
41 self.childNodes = [] | |
42 self.ownerDocument = None | |
43 if Node._debug: | |
44 index = repr(id(self)) + repr(self.__class__) | |
45 Node.allnodes[index] = repr(self.__dict__) | |
46 if Node.debug is None: | |
47 Node.debug = StringIO() | |
48 #open( "debug4.out", "w" ) | |
49 Node.debug.write("create %s\n" % index) | |
50 | |
51 def __getattr__(self, key): | |
52 if key[0:2] == "__": | |
53 raise AttributeError | |
54 # getattr should never call getattr! | |
55 if self.__dict__.has_key("inGetAttr"): | |
56 del self.inGetAttr | |
57 raise AttributeError, key | |
58 prefix, attrname = key[:5], key[5:] | |
59 if prefix == "_get_": | |
60 self.inGetAttr = 1 | |
61 if hasattr(self, attrname): | |
62 del self.inGetAttr | |
63 return (lambda self=self, attrname=attrname: | |
64 getattr(self, attrname)) | |
65 else: | |
66 del self.inGetAttr | |
67 raise AttributeError, key | |
68 else: | |
69 self.inGetAttr = 1 | |
70 try: | |
71 func = getattr(self, "_get_" + key) | |
72 except AttributeError: | |
73 raise AttributeError, key | |
74 del self.inGetAttr | |
75 return func() | |
76 | |
77 def __nonzero__(self): | |
78 return 1 | |
79 | |
80 def toxml(self,pretty=0): | |
81 writer = StringIO() | |
82 self.writexml(writer,pretty) | |
83 return str(writer.getvalue()) | |
84 | |
85 def hasChildNodes(self): | |
86 if self.childNodes: | |
87 return 1 | |
88 else: | |
89 return 0 | |
90 | |
91 def getChildren(self): | |
92 return self.childNodes | |
93 | |
94 def _get_firstChild(self): | |
95 if self.hasChildNodes(): | |
96 return self.childNodes[0] | |
97 else: | |
98 return None | |
99 | |
100 def _get_lastChild(self): | |
101 return self.childNodes[-1] | |
102 | |
103 def insertBefore(self, newChild, refChild): | |
104 if refChild == None: | |
105 return self.appendChild(newChild) | |
106 index = self.childNodes.index(refChild) | |
107 self.childNodes.insert(index, newChild) | |
108 if self._makeParentNodes: | |
109 newChild.parentNode = self | |
110 newChild.ownerDocument = self.ownerDocument | |
111 return newChild | |
112 | |
113 def appendChild(self, node): | |
114 if self.childNodes: | |
115 last = self.lastChild | |
116 node.previousSibling = last | |
117 last.nextSibling = node | |
118 else: | |
119 node.previousSibling = None | |
120 node.nextSibling = None | |
121 node.ownerDocument = self.ownerDocument | |
122 node.parentNode = self | |
123 self.childNodes.append(node) | |
124 return node | |
125 | |
126 def replaceChild(self, newChild, oldChild): | |
127 index = self.childNodes.index(oldChild) | |
128 self.childNodes[index] = newChild | |
129 newChild.ownerDocument = self.ownerDocument | |
130 return oldChild | |
131 | |
132 def removeChild(self, oldChild): | |
133 index = self.childNodes.index(oldChild) | |
134 del self.childNodes[index] | |
135 return oldChild | |
136 | |
137 def cloneNode(self, deep=0): | |
138 newNode = Node() | |
139 if deep: | |
140 self.deep_clone(newNode) | |
141 return newNode | |
142 | |
143 def deep_clone(self, newNode): | |
144 for child in self.childNodes: | |
145 new_child = child.cloneNode(1) | |
146 newNode.appendChild(new_child) | |
147 | |
148 def normalize(self): | |
149 """Join adjacent Text nodes and delete empty Text nodes | |
150 in the full depth of the sub-tree underneath this Node. | |
151 """ | |
152 i = 0 | |
153 while i < len(self.childNodes): | |
154 cn = self.childNodes[i] | |
155 if cn.nodeType == Node.TEXT_NODE: | |
156 i = i + 1 | |
157 # join adjacent Text nodes | |
158 while i < len(self.childNodes) and self.childNodes[i].nodeType == Node.TEXT_NODE: | |
159 cn.nodeValue = cn.data = cn.data + self.childNodes[i].data | |
160 del self.childNodes[i] | |
161 # delete empty nodes | |
162 if cn.nodeValue.strip() == "": | |
163 i = i - 1 | |
164 del self.childNodes[i] | |
165 continue | |
166 elif cn.nodeType == Node.ELEMENT_NODE: | |
167 cn.normalize() | |
168 i = i + 1 | |
169 | |
170 def unlink(self): | |
171 self.parentNode = None | |
172 while self.childNodes: | |
173 self.childNodes[-1].unlink() | |
174 del self.childNodes[-1] # probably not most efficient! | |
175 self.childNodes = None | |
176 self.previousSibling = None | |
177 self.nextSibling = None | |
178 if self.attributes: | |
179 for attr in self._attrs.values(): | |
180 self.removeAttributeNode(attr) | |
181 assert not len(self._attrs) | |
182 assert not len(self._attrsNS) | |
183 if Node._debug: | |
184 index = repr(id(self)) + repr(self.__class__) | |
185 self.debug.write("Deleting: %s\n" % index) | |
186 del Node.allnodes[index] | |
187 | |
188 def _write_data(writer, data): | |
189 "Writes datachars to writer." | |
190 data = string.replace(data, "&", "&") | |
191 data = string.replace(data, "<", "<") | |
192 data = string.replace(data, "\"", """) | |
193 data = string.replace(data, ">", ">") | |
194 writer.write(data) | |
195 | |
196 def _getElementsByTagNameHelper(parent, name, rc): | |
197 for node in parent.childNodes: | |
198 if node.nodeType == Node.ELEMENT_NODE and \ | |
199 (name == "*" or node.tagName == name): | |
200 rc.append(node) | |
201 _getElementsByTagNameHelper(node, name, rc) | |
202 return rc | |
203 | |
204 def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc): | |
205 for node in parent.childNodes: | |
206 if node.nodeType == Node.ELEMENT_NODE: | |
207 if ((localName == "*" or node.tagName == localName) and | |
208 (nsURI == "*" or node.namespaceURI == nsURI)): | |
209 rc.append(node) | |
210 _getElementsByTagNameNSHelper(node, name, rc) | |
211 | |
212 class Attr(Node): | |
213 nodeType = Node.ATTRIBUTE_NODE | |
214 | |
215 def __init__(self, qName, namespaceURI="", localName=None, prefix=None): | |
216 # skip setattr for performance | |
217 self.__dict__["localName"] = localName or qName | |
218 self.__dict__["nodeName"] = self.__dict__["name"] = qName | |
219 self.__dict__["namespaceURI"] = namespaceURI | |
220 self.__dict__["prefix"] = prefix | |
221 self.attributes = None | |
222 Node.__init__(self) | |
223 # nodeValue and value are set elsewhere | |
224 | |
225 def __setattr__(self, name, value): | |
226 if name in ("value", "nodeValue"): | |
227 self.__dict__["value"] = self.__dict__["nodeValue"] = value | |
228 else: | |
229 self.__dict__[name] = value | |
230 | |
231 def cloneNode(self, deep=0): | |
232 newNode = Attr(self.__dict__["name"],self.__dict__["namespaceURI"], | |
233 self.__dict__["localName"],self.__dict__["prefix"]) | |
234 newNode.__dict__["value"] = newNode.__dict__["nodeValue"] = self.value | |
235 if deep: | |
236 self.deep_clone(newNode) | |
237 return newNode | |
238 | |
239 class AttributeList: | |
240 """the attribute list is a transient interface to the underlying | |
241 dictionaries. mutations here will change the underlying element's | |
242 dictionary""" | |
243 def __init__(self, attrs, attrsNS): | |
244 self._attrs = attrs | |
245 self._attrsNS = attrsNS | |
246 self.length = len(self._attrs.keys()) | |
247 | |
248 def copy(self): | |
249 clone = AttributeList(self._attrs.copy(),self._attrsNS.copy()) | |
250 return clone | |
251 | |
252 def item(self, index): | |
253 try: | |
254 return self[self.keys()[index]] | |
255 except IndexError: | |
256 return None | |
257 | |
258 def items(self): | |
259 return map(lambda node: (node.tagName, node.value), | |
260 self._attrs.values()) | |
261 | |
262 def itemsNS(self): | |
263 return map(lambda node: ((node.URI, node.localName), node.value), | |
264 self._attrs.values()) | |
265 | |
266 def keys(self): | |
267 return self._attrs.keys() | |
268 | |
269 def keysNS(self): | |
270 return self._attrsNS.keys() | |
271 | |
272 def values(self): | |
273 return self._attrs.values() | |
274 | |
275 def __len__(self): | |
276 return self.length | |
277 | |
278 def __cmp__(self, other): | |
279 if self._attrs is getattr(other, "_attrs", None): | |
280 return 0 | |
281 else: | |
282 return cmp(id(self), id(other)) | |
283 | |
284 #FIXME: is it appropriate to return .value? | |
285 def __getitem__(self, attname_or_tuple): | |
286 if type(attname_or_tuple) is types.TupleType: | |
287 return self._attrsNS[attname_or_tuple] | |
288 else: | |
289 return self._attrs[attname_or_tuple] | |
290 | |
291 # same as set | |
292 def __setitem__(self, attname, value): | |
293 if type(value) is types.StringType: | |
294 node = Attr(attname) | |
295 node.value=value | |
296 else: | |
297 assert isinstance(value, Attr) or type(value) is types.StringType | |
298 node = value | |
299 old = self._attrs.get(attname, None) | |
300 if old: | |
301 old.unlink() | |
302 self._attrs[node.name] = node | |
303 self._attrsNS[(node.namespaceURI, node.localName)] = node | |
304 | |
305 def __delitem__(self, attname_or_tuple): | |
306 node = self[attname_or_tuple] | |
307 node.unlink() | |
308 del self._attrs[node.name] | |
309 del self._attrsNS[(node.namespaceURI, node.localName)] | |
310 | |
311 class Element(Node): | |
312 nodeType = Node.ELEMENT_NODE | |
313 | |
314 def __init__(self, tagName, namespaceURI="", prefix="", | |
315 localName=None): | |
316 Node.__init__(self) | |
317 self.tagName = self.nodeName = tagName | |
318 self.localName = localName or tagName | |
319 self.prefix = prefix | |
320 self.namespaceURI = namespaceURI | |
321 self.nodeValue = None | |
322 self._attrs={} # attributes are double-indexed: | |
323 self._attrsNS={}# tagName -> Attribute | |
324 # URI,localName -> Attribute | |
325 # in the future: consider lazy generation of attribute objects | |
326 # this is too tricky for now because of headaches | |
327 # with namespaces. | |
328 | |
329 def cloneNode(self, deep=0): | |
330 newNode = Element(self.tagName,self.namespaceURI,self.prefix,self.localName ) | |
331 keys = self._attrs.keys() | |
332 for k in keys: | |
333 attr = self._attrs[k].cloneNode(1) | |
334 newNode.setAttributeNode(attr) | |
335 if deep: | |
336 self.deep_clone(newNode) | |
337 return newNode | |
338 | |
339 def _get_tagName(self): | |
340 return str(self.tagName) | |
341 | |
342 def getAttributeKeys(self): | |
343 result = [] | |
344 if self._attrs: | |
345 return self._attrs.keys() | |
346 else: | |
347 return None | |
348 | |
349 def getAttribute(self, attname): | |
350 if self.hasAttribute(attname): | |
351 return str(self._attrs[attname].value) | |
352 else: | |
353 return "" | |
354 | |
355 def getAttributeNS(self, namespaceURI, localName): | |
356 return self._attrsNS[(namespaceURI, localName)].value | |
357 | |
358 def setAttribute(self, attname, value): | |
359 attr = Attr(attname) | |
360 # for performance | |
361 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value | |
362 self.setAttributeNode(attr) | |
363 | |
364 def setAttributeNS(self, namespaceURI, qualifiedName, value): | |
365 prefix, localname = _nssplit(qualifiedName) | |
366 # for performance | |
367 attr = Attr(qualifiedName, namespaceURI, localname, prefix) | |
368 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value | |
369 self.setAttributeNode(attr) | |
370 # FIXME: return original node if something changed. | |
371 | |
372 def getAttributeNode(self, attrname): | |
373 return self._attrs.get(attrname) | |
374 | |
375 def getAttributeNodeNS(self, namespaceURI, localName): | |
376 return self._attrsNS[(namespaceURI, localName)] | |
377 | |
378 def setAttributeNode(self, attr): | |
379 old = self._attrs.get(attr.name, None) | |
380 if old: | |
381 old.unlink() | |
382 self._attrs[attr.name] = attr | |
383 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr | |
384 # FIXME: return old value if something changed | |
385 | |
386 def removeAttribute(self, name): | |
387 attr = self._attrs[name] | |
388 self.removeAttributeNode(attr) | |
389 | |
390 def removeAttributeNS(self, namespaceURI, localName): | |
391 attr = self._attrsNS[(namespaceURI, localName)] | |
392 self.removeAttributeNode(attr) | |
393 | |
394 def removeAttributeNode(self, node): | |
395 node.unlink() | |
396 del self._attrs[node.name] | |
397 del self._attrsNS[(node.namespaceURI, node.localName)] | |
398 | |
399 def hasAttribute(self, name): | |
400 return self._attrs.has_key(name) | |
401 | |
402 def hasAttributeNS(self, namespaceURI, localName): | |
403 return self._attrsNS.has_key((namespaceURI, localName)) | |
404 | |
405 def getElementsByTagName(self, name): | |
406 return _getElementsByTagNameHelper(self, name, []) | |
407 | |
408 def getElementsByTagNameNS(self, namespaceURI, localName): | |
409 _getElementsByTagNameNSHelper(self, namespaceURI, localName, []) | |
410 | |
411 def __repr__(self): | |
412 return "<DOM Element: %s at %s>" % (self.tagName, id(self)) | |
413 | |
414 # undocumented | |
415 def writexml(self, writer, tabs=0): | |
416 tab_str = "" | |
417 if tabs: | |
418 tab_str = "\n" + (" "*(tabs-1)) | |
419 tabs += 1 | |
420 writer.write(tab_str + "<" + self.tagName) | |
421 a_names = self._get_attributes().keys() | |
422 a_names.sort() | |
423 | |
424 for a_name in a_names: | |
425 writer.write(" %s=\"" % a_name) | |
426 _write_data(writer, self._get_attributes()[a_name].value) | |
427 writer.write("\"") | |
428 if self.childNodes: | |
429 writer.write(">") | |
430 for node in self.childNodes: | |
431 node.writexml(writer,tabs) | |
432 if self.childNodes[0].nodeType == Node.TEXT_NODE: | |
433 tab_str = "" | |
434 writer.write(tab_str + "</%s>" % self.tagName) | |
435 else: | |
436 writer.write("/>") | |
437 | |
438 def _get_attributes(self): | |
439 return AttributeList(self._attrs, self._attrsNS) | |
440 | |
441 class Comment(Node): | |
442 nodeType = Node.COMMENT_NODE | |
443 | |
444 def __init__(self, data): | |
445 Node.__init__(self) | |
446 self.data = self.nodeValue = data | |
447 self.nodeName = "#comment" | |
448 self.attributes = None | |
449 | |
450 def writexml(self, writer, tabs=0): | |
451 writer.write("<!--%s-->" % self.data) | |
452 | |
453 def cloneNode(self, deep=0): | |
454 newNode = Comment(self.data) | |
455 if deep: | |
456 self.deep_clone(newNode) | |
457 return newNode | |
458 | |
459 class ProcessingInstruction(Node): | |
460 nodeType = Node.PROCESSING_INSTRUCTION_NODE | |
461 | |
462 def __init__(self, target, data): | |
463 Node.__init__(self) | |
464 self.target = self.nodeName = target | |
465 self.data = self.nodeValue = data | |
466 self.attributes = None | |
467 | |
468 def writexml(self, writer, tabs=0): | |
469 writer.write("<?%s %s?>" % (self.target, self.data)) | |
470 | |
471 def cloneNode(self, deep=0): | |
472 newNode = ProcessingInstruction(self.target, self.data) | |
473 if deep: | |
474 self.deep_clone(newNode) | |
475 return newNode | |
476 | |
477 class Text(Node): | |
478 nodeType = Node.TEXT_NODE | |
479 nodeName = "#text" | |
480 | |
481 def __init__(self, data): | |
482 Node.__init__(self) | |
483 self.data = self.nodeValue = data | |
484 self.attributes = None | |
485 | |
486 def __repr__(self): | |
487 if len(self.data) > 10: | |
488 dotdotdot = "..." | |
489 else: | |
490 dotdotdot = "" | |
491 return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot) | |
492 | |
493 def writexml(self, writer, tabs=0): | |
494 _write_data(writer, self.data) | |
495 | |
496 def _get_nodeValue(self): | |
497 return str(self.nodeValue) | |
498 | |
499 def _set_nodeValue(self,data): | |
500 self.data = self.nodeValue = data | |
501 | |
502 def cloneNode(self, deep=0): | |
503 newNode = Text(self.data) | |
504 if deep: | |
505 self.deep_clone(newNode) | |
506 return newNode | |
507 | |
508 def _nssplit(qualifiedName): | |
509 fields = string.split(qualifiedName,':', 1) | |
510 if len(fields) == 2: | |
511 return fields | |
512 elif len(fields) == 1: | |
513 return ('', fields[0]) | |
514 | |
515 class Document(Node): | |
516 nodeType = Node.DOCUMENT_NODE | |
517 documentElement = None | |
518 | |
519 def __init__(self): | |
520 Node.__init__(self) | |
521 self.attributes = None | |
522 self.nodeName = "#document" | |
523 self.nodeValue = None | |
524 self.ownerDocument = self | |
525 | |
526 def appendChild(self, node): | |
527 if node.nodeType == Node.ELEMENT_NODE: | |
528 if self.documentElement: | |
529 raise TypeError, "Two document elements disallowed" | |
530 else: | |
531 self.documentElement = node | |
532 Node.appendChild(self, node) | |
533 return node | |
534 createElement = Element | |
535 createTextNode = Text | |
536 createComment = Comment | |
537 createProcessingInstruction = ProcessingInstruction | |
538 createAttribute = Attr | |
539 | |
540 def createElementNS(self, namespaceURI, qualifiedName): | |
541 prefix,localName = _nssplit(qualifiedName) | |
542 return Element(qualifiedName, namespaceURI, prefix, localName) | |
543 | |
544 def createAttributeNS(self, namespaceURI, qualifiedName): | |
545 prefix,localName = _nssplit(qualifiedName) | |
546 return Attr(qualifiedName, namespaceURI, localName, prefix) | |
547 | |
548 def getElementsByTagNameNS(self, namespaceURI, localName): | |
549 _getElementsByTagNameNSHelper(self, namespaceURI, localName) | |
550 | |
551 def unlink(self): | |
552 self.documentElement = None | |
553 Node.unlink(self) | |
554 | |
555 def getElementsByTagName(self, name): | |
556 rc = [] | |
557 _getElementsByTagNameHelper(self, name, rc) | |
558 return rc | |
559 | |
560 def writexml(self, writer): | |
561 for node in self.childNodes: | |
562 node.writexml(writer) | |
563 | |
564 def _doparse(func, args, kwargs): | |
565 events = apply(func, args, kwargs) | |
566 toktype, rootNode = events.getEvent() | |
567 events.expandNode(rootNode) | |
568 return rootNode | |
569 | |
570 def parse(*args, **kwargs): | |
571 "Parse a file into a DOM by filename or file object" | |
572 return _doparse(pulldom.parse, args, kwargs) | |
573 | |
574 def parseString(*args, **kwargs): | |
575 "Parse a file into a DOM from a string" | |
576 return _doparse(pulldom.parseString, args, kwargs) |