Mercurial > python-cmd2
comparison pyparsing_py3.py @ 342:12010fcc4e38
pyparsing_py3'
author | catherine@Drou |
---|---|
date | Tue, 16 Feb 2010 12:57:05 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
341:9e593c480782 | 342:12010fcc4e38 |
---|---|
1 # module pyparsing.py | |
2 # | |
3 # Copyright (c) 2003-2009 Paul T. McGuire | |
4 # | |
5 # Permission is hereby granted, free of charge, to any person obtaining | |
6 # a copy of this software and associated documentation files (the | |
7 # "Software"), to deal in the Software without restriction, including | |
8 # without limitation the rights to use, copy, modify, merge, publish, | |
9 # distribute, sublicense, and/or sell copies of the Software, and to | |
10 # permit persons to whom the Software is furnished to do so, subject to | |
11 # the following conditions: | |
12 # | |
13 # The above copyright notice and this permission notice shall be | |
14 # included in all copies or substantial portions of the Software. | |
15 # | |
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
23 # | |
24 #from __future__ import generators | |
25 | |
26 __doc__ = \ | |
27 """ | |
28 pyparsing module - Classes and methods to define and execute parsing grammars | |
29 | |
30 The pyparsing module is an alternative approach to creating and executing simple grammars, | |
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you | |
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module | |
33 provides a library of classes that you use to construct the grammar directly in Python. | |
34 | |
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: | |
36 | |
37 from pyparsing_py3 import Word, alphas | |
38 | |
39 # define grammar of a greeting | |
40 greet = Word( alphas ) + "," + Word( alphas ) + "!" | |
41 | |
42 hello = "Hello, World!" | |
43 print hello, "->", greet.parseString( hello ) | |
44 | |
45 The program outputs the following:: | |
46 | |
47 Hello, World! -> ['Hello', ',', 'World', '!'] | |
48 | |
49 The Python representation of the grammar is quite readable, owing to the self-explanatory | |
50 class names, and the use of '+', '|' and '^' operators. | |
51 | |
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an | |
53 object with named attributes. | |
54 | |
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers: | |
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) | |
57 - quoted strings | |
58 - embedded comments | |
59 """ | |
60 | |
61 __version__ = "1.5.2.Py3" | |
62 __versionTime__ = "9 April 2009 12:21" | |
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" | |
64 | |
65 import string | |
66 from weakref import ref as wkref | |
67 import copy | |
68 import sys | |
69 import warnings | |
70 import re | |
71 import sre_constants | |
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) | |
73 | |
74 __all__ = [ | |
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', | |
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', | |
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', | |
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', | |
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', | |
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', | |
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', | |
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', | |
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', | |
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', | |
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', | |
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', | |
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', | |
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', | |
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', | |
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', | |
91 'indentedBlock', 'originalTextFor', | |
92 ] | |
93 | |
94 """ | |
95 Detect if we are running version 3.X and make appropriate changes | |
96 Robert A. Clark | |
97 """ | |
98 _PY3K = sys.version_info[0] > 2 | |
99 if _PY3K: | |
100 _MAX_INT = sys.maxsize | |
101 basestring = str | |
102 unichr = chr | |
103 _ustr = str | |
104 _str2dict = set | |
105 alphas = string.ascii_lowercase + string.ascii_uppercase | |
106 else: | |
107 _MAX_INT = sys.maxint | |
108 | |
109 def _ustr(obj): | |
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries | |
111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It | |
112 then < returns the unicode object | encodes it with the default encoding | ... >. | |
113 """ | |
114 if isinstance(obj,unicode): | |
115 return obj | |
116 | |
117 try: | |
118 # If this works, then _ustr(obj) has the same behaviour as str(obj), so | |
119 # it won't break any existing code. | |
120 return str(obj) | |
121 | |
122 except UnicodeEncodeError: | |
123 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) | |
124 # state that "The return value must be a string object". However, does a | |
125 # unicode object (being a subclass of basestring) count as a "string | |
126 # object"? | |
127 # If so, then return a unicode object: | |
128 return unicode(obj) | |
129 # Else encode it... but how? There are many choices... :) | |
130 # Replace unprintables with escape codes? | |
131 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') | |
132 # Replace unprintables with question marks? | |
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') | |
134 # ... | |
135 | |
136 def _str2dict(strg): | |
137 return dict( [(c,0) for c in strg] ) | |
138 | |
139 alphas = string.lowercase + string.uppercase | |
140 | |
141 | |
142 def _xml_escape(data): | |
143 """Escape &, <, >, ", ', etc. in a string of data.""" | |
144 | |
145 # ampersand must be replaced first | |
146 from_symbols = '&><"\'' | |
147 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] | |
148 for from_,to_ in zip(from_symbols, to_symbols): | |
149 data = data.replace(from_, to_) | |
150 return data | |
151 | |
152 class _Constants(object): | |
153 pass | |
154 | |
155 nums = string.digits | |
156 hexnums = nums + "ABCDEFabcdef" | |
157 alphanums = alphas + nums | |
158 _bslash = chr(92) | |
159 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) | |
160 | |
161 class ParseBaseException(Exception): | |
162 """base exception class for all parsing runtime exceptions""" | |
163 # Performance tuning: we construct a *lot* of these, so keep this | |
164 # constructor as small and fast as possible | |
165 def __init__( self, pstr, loc=0, msg=None, elem=None ): | |
166 self.loc = loc | |
167 if msg is None: | |
168 self.msg = pstr | |
169 self.pstr = "" | |
170 else: | |
171 self.msg = msg | |
172 self.pstr = pstr | |
173 self.parserElement = elem | |
174 | |
175 def __getattr__( self, aname ): | |
176 """supported attributes by name are: | |
177 - lineno - returns the line number of the exception text | |
178 - col - returns the column number of the exception text | |
179 - line - returns the line containing the exception text | |
180 """ | |
181 if( aname == "lineno" ): | |
182 return lineno( self.loc, self.pstr ) | |
183 elif( aname in ("col", "column") ): | |
184 return col( self.loc, self.pstr ) | |
185 elif( aname == "line" ): | |
186 return line( self.loc, self.pstr ) | |
187 else: | |
188 raise AttributeError(aname) | |
189 | |
190 def __str__( self ): | |
191 return "%s (at char %d), (line:%d, col:%d)" % \ | |
192 ( self.msg, self.loc, self.lineno, self.column ) | |
193 def __repr__( self ): | |
194 return _ustr(self) | |
195 def markInputline( self, markerString = ">!<" ): | |
196 """Extracts the exception line from the input string, and marks | |
197 the location of the exception with a special symbol. | |
198 """ | |
199 line_str = self.line | |
200 line_column = self.column - 1 | |
201 if markerString: | |
202 line_str = "".join( [line_str[:line_column], | |
203 markerString, line_str[line_column:]]) | |
204 return line_str.strip() | |
205 def __dir__(self): | |
206 return "loc msg pstr parserElement lineno col line " \ | |
207 "markInputLine __str__ __repr__".split() | |
208 | |
209 class ParseException(ParseBaseException): | |
210 """exception thrown when parse expressions don't match class; | |
211 supported attributes by name are: | |
212 - lineno - returns the line number of the exception text | |
213 - col - returns the column number of the exception text | |
214 - line - returns the line containing the exception text | |
215 """ | |
216 pass | |
217 | |
218 class ParseFatalException(ParseBaseException): | |
219 """user-throwable exception thrown when inconsistent parse content | |
220 is found; stops all parsing immediately""" | |
221 pass | |
222 | |
223 class ParseSyntaxException(ParseFatalException): | |
224 """just like ParseFatalException, but thrown internally when an | |
225 ErrorStop indicates that parsing is to stop immediately because | |
226 an unbacktrackable syntax error has been found""" | |
227 def __init__(self, pe): | |
228 super(ParseSyntaxException, self).__init__( | |
229 pe.pstr, pe.loc, pe.msg, pe.parserElement) | |
230 | |
231 #~ class ReparseException(ParseBaseException): | |
232 #~ """Experimental class - parse actions can raise this exception to cause | |
233 #~ pyparsing to reparse the input string: | |
234 #~ - with a modified input string, and/or | |
235 #~ - with a modified start location | |
236 #~ Set the values of the ReparseException in the constructor, and raise the | |
237 #~ exception in a parse action to cause pyparsing to use the new string/location. | |
238 #~ Setting the values as None causes no change to be made. | |
239 #~ """ | |
240 #~ def __init_( self, newstring, restartLoc ): | |
241 #~ self.newParseText = newstring | |
242 #~ self.reparseLoc = restartLoc | |
243 | |
244 class RecursiveGrammarException(Exception): | |
245 """exception thrown by validate() if the grammar could be improperly recursive""" | |
246 def __init__( self, parseElementList ): | |
247 self.parseElementTrace = parseElementList | |
248 | |
249 def __str__( self ): | |
250 return "RecursiveGrammarException: %s" % self.parseElementTrace | |
251 | |
252 class _ParseResultsWithOffset(object): | |
253 def __init__(self,p1,p2): | |
254 self.tup = (p1,p2) | |
255 def __getitem__(self,i): | |
256 return self.tup[i] | |
257 def __repr__(self): | |
258 return repr(self.tup) | |
259 def setOffset(self,i): | |
260 self.tup = (self.tup[0],i) | |
261 | |
262 class ParseResults(object): | |
263 """Structured parse results, to provide multiple means of access to the parsed data: | |
264 - as a list (len(results)) | |
265 - by list index (results[0], results[1], etc.) | |
266 - by attribute (results.<resultsName>) | |
267 """ | |
268 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) | |
269 def __new__(cls, toklist, name=None, asList=True, modal=True ): | |
270 if isinstance(toklist, cls): | |
271 return toklist | |
272 retobj = object.__new__(cls) | |
273 retobj.__doinit = True | |
274 return retobj | |
275 | |
276 # Performance tuning: we construct a *lot* of these, so keep this | |
277 # constructor as small and fast as possible | |
278 def __init__( self, toklist, name=None, asList=True, modal=True ): | |
279 if self.__doinit: | |
280 self.__doinit = False | |
281 self.__name = None | |
282 self.__parent = None | |
283 self.__accumNames = {} | |
284 if isinstance(toklist, list): | |
285 self.__toklist = toklist[:] | |
286 else: | |
287 self.__toklist = [toklist] | |
288 self.__tokdict = dict() | |
289 | |
290 if name: | |
291 if not modal: | |
292 self.__accumNames[name] = 0 | |
293 if isinstance(name,int): | |
294 name = _ustr(name) # will always return a str, but use _ustr for consistency | |
295 self.__name = name | |
296 if not toklist in (None,'',[]): | |
297 if isinstance(toklist,basestring): | |
298 toklist = [ toklist ] | |
299 if asList: | |
300 if isinstance(toklist,ParseResults): | |
301 self[name] = _ParseResultsWithOffset(toklist.copy(),0) | |
302 else: | |
303 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) | |
304 self[name].__name = name | |
305 else: | |
306 try: | |
307 self[name] = toklist[0] | |
308 except (KeyError,TypeError,IndexError): | |
309 self[name] = toklist | |
310 | |
311 def __getitem__( self, i ): | |
312 if isinstance( i, (int,slice) ): | |
313 return self.__toklist[i] | |
314 else: | |
315 if i not in self.__accumNames: | |
316 return self.__tokdict[i][-1][0] | |
317 else: | |
318 return ParseResults([ v[0] for v in self.__tokdict[i] ]) | |
319 | |
320 def __setitem__( self, k, v ): | |
321 if isinstance(v,_ParseResultsWithOffset): | |
322 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] | |
323 sub = v[0] | |
324 elif isinstance(k,int): | |
325 self.__toklist[k] = v | |
326 sub = v | |
327 else: | |
328 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] | |
329 sub = v | |
330 if isinstance(sub,ParseResults): | |
331 sub.__parent = wkref(self) | |
332 | |
333 def __delitem__( self, i ): | |
334 if isinstance(i,(int,slice)): | |
335 mylen = len( self.__toklist ) | |
336 del self.__toklist[i] | |
337 | |
338 # convert int to slice | |
339 if isinstance(i, int): | |
340 if i < 0: | |
341 i += mylen | |
342 i = slice(i, i+1) | |
343 # get removed indices | |
344 removed = list(range(*i.indices(mylen))) | |
345 removed.reverse() | |
346 # fixup indices in token dictionary | |
347 for name in self.__tokdict: | |
348 occurrences = self.__tokdict[name] | |
349 for j in removed: | |
350 for k, (value, position) in enumerate(occurrences): | |
351 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) | |
352 else: | |
353 del self.__tokdict[i] | |
354 | |
355 def __contains__( self, k ): | |
356 return k in self.__tokdict | |
357 | |
358 def __len__( self ): return len( self.__toklist ) | |
359 def __bool__(self): return len( self.__toklist ) > 0 | |
360 __nonzero__ = __bool__ | |
361 def __iter__( self ): return iter( self.__toklist ) | |
362 def __reversed__( self ): return iter( reversed(self.__toklist) ) | |
363 def keys( self ): | |
364 """Returns all named result keys.""" | |
365 return self.__tokdict.keys() | |
366 | |
367 def pop( self, index=-1 ): | |
368 """Removes and returns item at specified index (default=last). | |
369 Will work with either numeric indices or dict-key indicies.""" | |
370 ret = self[index] | |
371 del self[index] | |
372 return ret | |
373 | |
374 def get(self, key, defaultValue=None): | |
375 """Returns named result matching the given key, or if there is no | |
376 such name, then returns the given defaultValue or None if no | |
377 defaultValue is specified.""" | |
378 if key in self: | |
379 return self[key] | |
380 else: | |
381 return defaultValue | |
382 | |
383 def insert( self, index, insStr ): | |
384 self.__toklist.insert(index, insStr) | |
385 # fixup indices in token dictionary | |
386 for name in self.__tokdict: | |
387 occurrences = self.__tokdict[name] | |
388 for k, (value, position) in enumerate(occurrences): | |
389 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) | |
390 | |
391 def items( self ): | |
392 """Returns all named result keys and values as a list of tuples.""" | |
393 return [(k,self[k]) for k in self.__tokdict] | |
394 | |
395 def values( self ): | |
396 """Returns all named result values.""" | |
397 return [ v[-1][0] for v in self.__tokdict.values() ] | |
398 | |
399 def __getattr__( self, name ): | |
400 if name not in self.__slots__: | |
401 if name in self.__tokdict: | |
402 if name not in self.__accumNames: | |
403 return self.__tokdict[name][-1][0] | |
404 else: | |
405 return ParseResults([ v[0] for v in self.__tokdict[name] ]) | |
406 else: | |
407 return "" | |
408 return None | |
409 | |
410 def __add__( self, other ): | |
411 ret = self.copy() | |
412 ret += other | |
413 return ret | |
414 | |
415 def __iadd__( self, other ): | |
416 if other.__tokdict: | |
417 offset = len(self.__toklist) | |
418 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) | |
419 otheritems = other.__tokdict.items() | |
420 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) | |
421 for (k,vlist) in otheritems for v in vlist] | |
422 for k,v in otherdictitems: | |
423 self[k] = v | |
424 if isinstance(v[0],ParseResults): | |
425 v[0].__parent = wkref(self) | |
426 | |
427 self.__toklist += other.__toklist | |
428 self.__accumNames.update( other.__accumNames ) | |
429 del other | |
430 return self | |
431 | |
432 def __repr__( self ): | |
433 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) | |
434 | |
435 def __str__( self ): | |
436 out = "[" | |
437 sep = "" | |
438 for i in self.__toklist: | |
439 if isinstance(i, ParseResults): | |
440 out += sep + _ustr(i) | |
441 else: | |
442 out += sep + repr(i) | |
443 sep = ", " | |
444 out += "]" | |
445 return out | |
446 | |
447 def _asStringList( self, sep='' ): | |
448 out = [] | |
449 for item in self.__toklist: | |
450 if out and sep: | |
451 out.append(sep) | |
452 if isinstance( item, ParseResults ): | |
453 out += item._asStringList() | |
454 else: | |
455 out.append( _ustr(item) ) | |
456 return out | |
457 | |
458 def asList( self ): | |
459 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" | |
460 out = [] | |
461 for res in self.__toklist: | |
462 if isinstance(res,ParseResults): | |
463 out.append( res.asList() ) | |
464 else: | |
465 out.append( res ) | |
466 return out | |
467 | |
468 def asDict( self ): | |
469 """Returns the named parse results as dictionary.""" | |
470 return dict( self.items() ) | |
471 | |
472 def copy( self ): | |
473 """Returns a new copy of a ParseResults object.""" | |
474 ret = ParseResults( self.__toklist ) | |
475 ret.__tokdict = self.__tokdict.copy() | |
476 ret.__parent = self.__parent | |
477 ret.__accumNames.update( self.__accumNames ) | |
478 ret.__name = self.__name | |
479 return ret | |
480 | |
481 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): | |
482 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" | |
483 nl = "\n" | |
484 out = [] | |
485 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() | |
486 for v in vlist ] ) | |
487 nextLevelIndent = indent + " " | |
488 | |
489 # collapse out indents if formatting is not desired | |
490 if not formatted: | |
491 indent = "" | |
492 nextLevelIndent = "" | |
493 nl = "" | |
494 | |
495 selfTag = None | |
496 if doctag is not None: | |
497 selfTag = doctag | |
498 else: | |
499 if self.__name: | |
500 selfTag = self.__name | |
501 | |
502 if not selfTag: | |
503 if namedItemsOnly: | |
504 return "" | |
505 else: | |
506 selfTag = "ITEM" | |
507 | |
508 out += [ nl, indent, "<", selfTag, ">" ] | |
509 | |
510 worklist = self.__toklist | |
511 for i,res in enumerate(worklist): | |
512 if isinstance(res,ParseResults): | |
513 if i in namedItems: | |
514 out += [ res.asXML(namedItems[i], | |
515 namedItemsOnly and doctag is None, | |
516 nextLevelIndent, | |
517 formatted)] | |
518 else: | |
519 out += [ res.asXML(None, | |
520 namedItemsOnly and doctag is None, | |
521 nextLevelIndent, | |
522 formatted)] | |
523 else: | |
524 # individual token, see if there is a name for it | |
525 resTag = None | |
526 if i in namedItems: | |
527 resTag = namedItems[i] | |
528 if not resTag: | |
529 if namedItemsOnly: | |
530 continue | |
531 else: | |
532 resTag = "ITEM" | |
533 xmlBodyText = _xml_escape(_ustr(res)) | |
534 out += [ nl, nextLevelIndent, "<", resTag, ">", | |
535 xmlBodyText, | |
536 "</", resTag, ">" ] | |
537 | |
538 out += [ nl, indent, "</", selfTag, ">" ] | |
539 return "".join(out) | |
540 | |
541 def __lookup(self,sub): | |
542 for k,vlist in self.__tokdict.items(): | |
543 for v,loc in vlist: | |
544 if sub is v: | |
545 return k | |
546 return None | |
547 | |
548 def getName(self): | |
549 """Returns the results name for this token expression.""" | |
550 if self.__name: | |
551 return self.__name | |
552 elif self.__parent: | |
553 par = self.__parent() | |
554 if par: | |
555 return par.__lookup(self) | |
556 else: | |
557 return None | |
558 elif (len(self) == 1 and | |
559 len(self.__tokdict) == 1 and | |
560 self.__tokdict.values()[0][0][1] in (0,-1)): | |
561 return self.__tokdict.keys()[0] | |
562 else: | |
563 return None | |
564 | |
565 def dump(self,indent='',depth=0): | |
566 """Diagnostic method for listing out the contents of a ParseResults. | |
567 Accepts an optional indent argument so that this string can be embedded | |
568 in a nested display of other data.""" | |
569 out = [] | |
570 out.append( indent+_ustr(self.asList()) ) | |
571 keys = self.items() | |
572 keys.sort() | |
573 for k,v in keys: | |
574 if out: | |
575 out.append('\n') | |
576 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) | |
577 if isinstance(v,ParseResults): | |
578 if v.keys(): | |
579 out.append( v.dump(indent,depth+1) ) | |
580 else: | |
581 out.append(_ustr(v)) | |
582 else: | |
583 out.append(_ustr(v)) | |
584 return "".join(out) | |
585 | |
586 # add support for pickle protocol | |
587 def __getstate__(self): | |
588 return ( self.__toklist, | |
589 ( self.__tokdict.copy(), | |
590 self.__parent is not None and self.__parent() or None, | |
591 self.__accumNames, | |
592 self.__name ) ) | |
593 | |
594 def __setstate__(self,state): | |
595 self.__toklist = state[0] | |
596 self.__tokdict, \ | |
597 par, \ | |
598 inAccumNames, \ | |
599 self.__name = state[1] | |
600 self.__accumNames = {} | |
601 self.__accumNames.update(inAccumNames) | |
602 if par is not None: | |
603 self.__parent = wkref(par) | |
604 else: | |
605 self.__parent = None | |
606 | |
607 def __dir__(self): | |
608 return dir(super(ParseResults,self)) + self.keys() | |
609 | |
610 def col (loc,strg): | |
611 """Returns current column within a string, counting newlines as line separators. | |
612 The first column is number 1. | |
613 | |
614 Note: the default parsing behavior is to expand tabs in the input string | |
615 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information | |
616 on parsing strings containing <TAB>s, and suggested methods to maintain a | |
617 consistent view of the parsed string, the parse location, and line and column | |
618 positions within the parsed string. | |
619 """ | |
620 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) | |
621 | |
622 def lineno(loc,strg): | |
623 """Returns current line number within a string, counting newlines as line separators. | |
624 The first line is number 1. | |
625 | |
626 Note: the default parsing behavior is to expand tabs in the input string | |
627 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information | |
628 on parsing strings containing <TAB>s, and suggested methods to maintain a | |
629 consistent view of the parsed string, the parse location, and line and column | |
630 positions within the parsed string. | |
631 """ | |
632 return strg.count("\n",0,loc) + 1 | |
633 | |
634 def line( loc, strg ): | |
635 """Returns the line of text containing loc within a string, counting newlines as line separators. | |
636 """ | |
637 lastCR = strg.rfind("\n", 0, loc) | |
638 nextCR = strg.find("\n", loc) | |
639 if nextCR > 0: | |
640 return strg[lastCR+1:nextCR] | |
641 else: | |
642 return strg[lastCR+1:] | |
643 | |
644 def _defaultStartDebugAction( instring, loc, expr ): | |
645 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) | |
646 | |
647 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): | |
648 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) | |
649 | |
650 def _defaultExceptionDebugAction( instring, loc, expr, exc ): | |
651 print ("Exception raised:" + _ustr(exc)) | |
652 | |
653 def nullDebugAction(*args): | |
654 """'Do-nothing' debug action, to suppress debugging output during parsing.""" | |
655 pass | |
656 | |
657 class ParserElement(object): | |
658 """Abstract base level parser element class.""" | |
659 DEFAULT_WHITE_CHARS = " \n\t\r" | |
660 | |
661 def setDefaultWhitespaceChars( chars ): | |
662 """Overrides the default whitespace chars | |
663 """ | |
664 ParserElement.DEFAULT_WHITE_CHARS = chars | |
665 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) | |
666 | |
667 def __init__( self, savelist=False ): | |
668 self.parseAction = list() | |
669 self.failAction = None | |
670 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall | |
671 self.strRepr = None | |
672 self.resultsName = None | |
673 self.saveAsList = savelist | |
674 self.skipWhitespace = True | |
675 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS | |
676 self.copyDefaultWhiteChars = True | |
677 self.mayReturnEmpty = False # used when checking for left-recursion | |
678 self.keepTabs = False | |
679 self.ignoreExprs = list() | |
680 self.debug = False | |
681 self.streamlined = False | |
682 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index | |
683 self.errmsg = "" | |
684 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) | |
685 self.debugActions = ( None, None, None ) #custom debug actions | |
686 self.re = None | |
687 self.callPreparse = True # used to avoid redundant calls to preParse | |
688 self.callDuringTry = False | |
689 | |
690 def copy( self ): | |
691 """Make a copy of this ParserElement. Useful for defining different parse actions | |
692 for the same parsing pattern, using copies of the original parse element.""" | |
693 cpy = copy.copy( self ) | |
694 cpy.parseAction = self.parseAction[:] | |
695 cpy.ignoreExprs = self.ignoreExprs[:] | |
696 if self.copyDefaultWhiteChars: | |
697 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS | |
698 return cpy | |
699 | |
700 def setName( self, name ): | |
701 """Define name for this expression, for use in debugging.""" | |
702 self.name = name | |
703 self.errmsg = "Expected " + self.name | |
704 if hasattr(self,"exception"): | |
705 self.exception.msg = self.errmsg | |
706 return self | |
707 | |
708 def setResultsName( self, name, listAllMatches=False ): | |
709 """Define name for referencing matching tokens as a nested attribute | |
710 of the returned parse results. | |
711 NOTE: this returns a *copy* of the original ParserElement object; | |
712 this is so that the client can define a basic element, such as an | |
713 integer, and reference it in multiple places with different names. | |
714 """ | |
715 newself = self.copy() | |
716 newself.resultsName = name | |
717 newself.modalResults = not listAllMatches | |
718 return newself | |
719 | |
720 def setBreak(self,breakFlag = True): | |
721 """Method to invoke the Python pdb debugger when this element is | |
722 about to be parsed. Set breakFlag to True to enable, False to | |
723 disable. | |
724 """ | |
725 if breakFlag: | |
726 _parseMethod = self._parse | |
727 def breaker(instring, loc, doActions=True, callPreParse=True): | |
728 import pdb | |
729 pdb.set_trace() | |
730 return _parseMethod( instring, loc, doActions, callPreParse ) | |
731 breaker._originalParseMethod = _parseMethod | |
732 self._parse = breaker | |
733 else: | |
734 if hasattr(self._parse,"_originalParseMethod"): | |
735 self._parse = self._parse._originalParseMethod | |
736 return self | |
737 | |
738 def _normalizeParseActionArgs( f ): | |
739 """Internal method used to decorate parse actions that take fewer than 3 arguments, | |
740 so that all parse actions can be called as f(s,l,t).""" | |
741 STAR_ARGS = 4 | |
742 | |
743 try: | |
744 restore = None | |
745 if isinstance(f,type): | |
746 restore = f | |
747 f = f.__init__ | |
748 if not _PY3K: | |
749 codeObj = f.func_code | |
750 else: | |
751 codeObj = f.code | |
752 if codeObj.co_flags & STAR_ARGS: | |
753 return f | |
754 numargs = codeObj.co_argcount | |
755 if not _PY3K: | |
756 if hasattr(f,"im_self"): | |
757 numargs -= 1 | |
758 else: | |
759 if hasattr(f,"__self__"): | |
760 numargs -= 1 | |
761 if restore: | |
762 f = restore | |
763 except AttributeError: | |
764 try: | |
765 if not _PY3K: | |
766 call_im_func_code = f.__call__.im_func.func_code | |
767 else: | |
768 call_im_func_code = f.__code__ | |
769 | |
770 # not a function, must be a callable object, get info from the | |
771 # im_func binding of its bound __call__ method | |
772 if call_im_func_code.co_flags & STAR_ARGS: | |
773 return f | |
774 numargs = call_im_func_code.co_argcount | |
775 if not _PY3K: | |
776 if hasattr(f.__call__,"im_self"): | |
777 numargs -= 1 | |
778 else: | |
779 if hasattr(f.__call__,"__self__"): | |
780 numargs -= 0 | |
781 except AttributeError: | |
782 if not _PY3K: | |
783 call_func_code = f.__call__.func_code | |
784 else: | |
785 call_func_code = f.__call__.__code__ | |
786 # not a bound method, get info directly from __call__ method | |
787 if call_func_code.co_flags & STAR_ARGS: | |
788 return f | |
789 numargs = call_func_code.co_argcount | |
790 if not _PY3K: | |
791 if hasattr(f.__call__,"im_self"): | |
792 numargs -= 1 | |
793 else: | |
794 if hasattr(f.__call__,"__self__"): | |
795 numargs -= 1 | |
796 | |
797 | |
798 #~ print ("adding function %s with %d args" % (f.func_name,numargs)) | |
799 if numargs == 3: | |
800 return f | |
801 else: | |
802 if numargs > 3: | |
803 def tmp(s,l,t): | |
804 return f(f.__call__.__self__, s,l,t) | |
805 if numargs == 2: | |
806 def tmp(s,l,t): | |
807 return f(l,t) | |
808 elif numargs == 1: | |
809 def tmp(s,l,t): | |
810 return f(t) | |
811 else: #~ numargs == 0: | |
812 def tmp(s,l,t): | |
813 return f() | |
814 try: | |
815 tmp.__name__ = f.__name__ | |
816 except (AttributeError,TypeError): | |
817 # no need for special handling if attribute doesnt exist | |
818 pass | |
819 try: | |
820 tmp.__doc__ = f.__doc__ | |
821 except (AttributeError,TypeError): | |
822 # no need for special handling if attribute doesnt exist | |
823 pass | |
824 try: | |
825 tmp.__dict__.update(f.__dict__) | |
826 except (AttributeError,TypeError): | |
827 # no need for special handling if attribute doesnt exist | |
828 pass | |
829 return tmp | |
830 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) | |
831 | |
832 def setParseAction( self, *fns, **kwargs ): | |
833 """Define action to perform when successfully matching parse element definition. | |
834 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), | |
835 fn(loc,toks), fn(toks), or just fn(), where: | |
836 - s = the original string being parsed (see note below) | |
837 - loc = the location of the matching substring | |
838 - toks = a list of the matched tokens, packaged as a ParseResults object | |
839 If the functions in fns modify the tokens, they can return them as the return | |
840 value from fn, and the modified list of tokens will replace the original. | |
841 Otherwise, fn does not need to return any value. | |
842 | |
843 Note: the default parsing behavior is to expand tabs in the input string | |
844 before starting the parsing process. See L{I{parseString}<parseString>} for more information | |
845 on parsing strings containing <TAB>s, and suggested methods to maintain a | |
846 consistent view of the parsed string, the parse location, and line and column | |
847 positions within the parsed string. | |
848 """ | |
849 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) | |
850 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) | |
851 return self | |
852 | |
853 def addParseAction( self, *fns, **kwargs ): | |
854 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" | |
855 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) | |
856 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) | |
857 return self | |
858 | |
859 def setFailAction( self, fn ): | |
860 """Define action to perform if parsing fails at this expression. | |
861 Fail acton fn is a callable function that takes the arguments | |
862 fn(s,loc,expr,err) where: | |
863 - s = string being parsed | |
864 - loc = location where expression match was attempted and failed | |
865 - expr = the parse expression that failed | |
866 - err = the exception thrown | |
867 The function returns no value. It may throw ParseFatalException | |
868 if it is desired to stop parsing immediately.""" | |
869 self.failAction = fn | |
870 return self | |
871 | |
872 def _skipIgnorables( self, instring, loc ): | |
873 exprsFound = True | |
874 while exprsFound: | |
875 exprsFound = False | |
876 for e in self.ignoreExprs: | |
877 try: | |
878 while 1: | |
879 loc,dummy = e._parse( instring, loc ) | |
880 exprsFound = True | |
881 except ParseException: | |
882 pass | |
883 return loc | |
884 | |
885 def preParse( self, instring, loc ): | |
886 if self.ignoreExprs: | |
887 loc = self._skipIgnorables( instring, loc ) | |
888 | |
889 if self.skipWhitespace: | |
890 wt = self.whiteChars | |
891 instrlen = len(instring) | |
892 while loc < instrlen and instring[loc] in wt: | |
893 loc += 1 | |
894 | |
895 return loc | |
896 | |
897 def parseImpl( self, instring, loc, doActions=True ): | |
898 return loc, [] | |
899 | |
900 def postParse( self, instring, loc, tokenlist ): | |
901 return tokenlist | |
902 | |
903 #~ @profile | |
904 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): | |
905 debugging = ( self.debug ) #and doActions ) | |
906 | |
907 if debugging or self.failAction: | |
908 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) | |
909 if (self.debugActions[0] ): | |
910 self.debugActions[0]( instring, loc, self ) | |
911 if callPreParse and self.callPreparse: | |
912 preloc = self.preParse( instring, loc ) | |
913 else: | |
914 preloc = loc | |
915 tokensStart = loc | |
916 try: | |
917 try: | |
918 loc,tokens = self.parseImpl( instring, preloc, doActions ) | |
919 except IndexError: | |
920 raise ParseException( instring, len(instring), self.errmsg, self ) | |
921 except ParseBaseException: | |
922 #~ print ("Exception raised:", err) | |
923 err = None | |
924 if self.debugActions[2]: | |
925 err = sys.exc_info()[1] | |
926 self.debugActions[2]( instring, tokensStart, self, err ) | |
927 if self.failAction: | |
928 if err is None: | |
929 err = sys.exc_info()[1] | |
930 self.failAction( instring, tokensStart, self, err ) | |
931 raise | |
932 else: | |
933 if callPreParse and self.callPreparse: | |
934 preloc = self.preParse( instring, loc ) | |
935 else: | |
936 preloc = loc | |
937 tokensStart = loc | |
938 if self.mayIndexError or loc >= len(instring): | |
939 try: | |
940 loc,tokens = self.parseImpl( instring, preloc, doActions ) | |
941 except IndexError: | |
942 raise ParseException( instring, len(instring), self.errmsg, self ) | |
943 else: | |
944 loc,tokens = self.parseImpl( instring, preloc, doActions ) | |
945 | |
946 tokens = self.postParse( instring, loc, tokens ) | |
947 | |
948 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) | |
949 if self.parseAction and (doActions or self.callDuringTry): | |
950 if debugging: | |
951 try: | |
952 for fn in self.parseAction: | |
953 tokens = fn( instring, tokensStart, retTokens ) | |
954 if tokens is not None: | |
955 retTokens = ParseResults( tokens, | |
956 self.resultsName, | |
957 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), | |
958 modal=self.modalResults ) | |
959 except ParseBaseException: | |
960 #~ print "Exception raised in user parse action:", err | |
961 if (self.debugActions[2] ): | |
962 err = sys.exc_info()[1] | |
963 self.debugActions[2]( instring, tokensStart, self, err ) | |
964 raise | |
965 else: | |
966 for fn in self.parseAction: | |
967 tokens = fn( instring, tokensStart, retTokens ) | |
968 if tokens is not None: | |
969 retTokens = ParseResults( tokens, | |
970 self.resultsName, | |
971 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), | |
972 modal=self.modalResults ) | |
973 | |
974 if debugging: | |
975 #~ print ("Matched",self,"->",retTokens.asList()) | |
976 if (self.debugActions[1] ): | |
977 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) | |
978 | |
979 return loc, retTokens | |
980 | |
981 def tryParse( self, instring, loc ): | |
982 try: | |
983 return self._parse( instring, loc, doActions=False )[0] | |
984 except ParseFatalException: | |
985 raise ParseException( instring, loc, self.errmsg, self) | |
986 | |
987 # this method gets repeatedly called during backtracking with the same arguments - | |
988 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression | |
989 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): | |
990 lookup = (self,instring,loc,callPreParse,doActions) | |
991 if lookup in ParserElement._exprArgCache: | |
992 value = ParserElement._exprArgCache[ lookup ] | |
993 if isinstance(value,Exception): | |
994 raise value | |
995 return value | |
996 else: | |
997 try: | |
998 value = self._parseNoCache( instring, loc, doActions, callPreParse ) | |
999 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) | |
1000 return value | |
1001 except ParseBaseException: | |
1002 pe = sys.exc_info()[1] | |
1003 ParserElement._exprArgCache[ lookup ] = pe | |
1004 raise | |
1005 | |
1006 _parse = _parseNoCache | |
1007 | |
1008 # argument cache for optimizing repeated calls when backtracking through recursive expressions | |
1009 _exprArgCache = {} | |
1010 def resetCache(): | |
1011 ParserElement._exprArgCache.clear() | |
1012 resetCache = staticmethod(resetCache) | |
1013 | |
1014 _packratEnabled = False | |
1015 def enablePackrat(): | |
1016 """Enables "packrat" parsing, which adds memoizing to the parsing logic. | |
1017 Repeated parse attempts at the same string location (which happens | |
1018 often in many complex grammars) can immediately return a cached value, | |
1019 instead of re-executing parsing/validating code. Memoizing is done of | |
1020 both valid results and parsing exceptions. | |
1021 | |
1022 This speedup may break existing programs that use parse actions that | |
1023 have side-effects. For this reason, packrat parsing is disabled when | |
1024 you first import pyparsing_py3 as pyparsing. To activate the packrat feature, your | |
1025 program must call the class method ParserElement.enablePackrat(). If | |
1026 your program uses psyco to "compile as you go", you must call | |
1027 enablePackrat before calling psyco.full(). If you do not do this, | |
1028 Python will crash. For best results, call enablePackrat() immediately | |
1029 after importing pyparsing. | |
1030 """ | |
1031 if not ParserElement._packratEnabled: | |
1032 ParserElement._packratEnabled = True | |
1033 ParserElement._parse = ParserElement._parseCache | |
1034 enablePackrat = staticmethod(enablePackrat) | |
1035 | |
1036 def parseString( self, instring, parseAll=False ): | |
1037 """Execute the parse expression with the given string. | |
1038 This is the main interface to the client code, once the complete | |
1039 expression has been built. | |
1040 | |
1041 If you want the grammar to require that the entire input string be | |
1042 successfully parsed, then set parseAll to True (equivalent to ending | |
1043 the grammar with StringEnd()). | |
1044 | |
1045 Note: parseString implicitly calls expandtabs() on the input string, | |
1046 in order to report proper column numbers in parse actions. | |
1047 If the input string contains tabs and | |
1048 the grammar uses parse actions that use the loc argument to index into the | |
1049 string being parsed, you can ensure you have a consistent view of the input | |
1050 string by: | |
1051 - calling parseWithTabs on your grammar before calling parseString | |
1052 (see L{I{parseWithTabs}<parseWithTabs>}) | |
1053 - define your parse action using the full (s,loc,toks) signature, and | |
1054 reference the input string using the parse action's s argument | |
1055 - explictly expand the tabs in your input string before calling | |
1056 parseString | |
1057 """ | |
1058 ParserElement.resetCache() | |
1059 if not self.streamlined: | |
1060 self.streamline() | |
1061 #~ self.saveAsList = True | |
1062 for e in self.ignoreExprs: | |
1063 e.streamline() | |
1064 if not self.keepTabs: | |
1065 instring = instring.expandtabs() | |
1066 try: | |
1067 loc, tokens = self._parse( instring, 0 ) | |
1068 if parseAll: | |
1069 loc = self.preParse( instring, loc ) | |
1070 StringEnd()._parse( instring, loc ) | |
1071 except ParseBaseException: | |
1072 exc = sys.exc_info()[1] | |
1073 # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1074 raise exc | |
1075 else: | |
1076 return tokens | |
1077 | |
1078 def scanString( self, instring, maxMatches=_MAX_INT ): | |
1079 """Scan the input string for expression matches. Each match will return the | |
1080 matching tokens, start location, and end location. May be called with optional | |
1081 maxMatches argument, to clip scanning after 'n' matches are found. | |
1082 | |
1083 Note that the start and end locations are reported relative to the string | |
1084 being parsed. See L{I{parseString}<parseString>} for more information on parsing | |
1085 strings with embedded tabs.""" | |
1086 if not self.streamlined: | |
1087 self.streamline() | |
1088 for e in self.ignoreExprs: | |
1089 e.streamline() | |
1090 | |
1091 if not self.keepTabs: | |
1092 instring = _ustr(instring).expandtabs() | |
1093 instrlen = len(instring) | |
1094 loc = 0 | |
1095 preparseFn = self.preParse | |
1096 parseFn = self._parse | |
1097 ParserElement.resetCache() | |
1098 matches = 0 | |
1099 try: | |
1100 while loc <= instrlen and matches < maxMatches: | |
1101 try: | |
1102 preloc = preparseFn( instring, loc ) | |
1103 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) | |
1104 except ParseException: | |
1105 loc = preloc+1 | |
1106 else: | |
1107 if nextLoc > loc: | |
1108 matches += 1 | |
1109 yield tokens, preloc, nextLoc | |
1110 loc = nextLoc | |
1111 else: | |
1112 loc = preloc+1 | |
1113 except ParseBaseException: | |
1114 pe = sys.exc_info()[1] | |
1115 raise pe | |
1116 | |
1117 def transformString( self, instring ): | |
1118 """Extension to scanString, to modify matching text with modified tokens that may | |
1119 be returned from a parse action. To use transformString, define a grammar and | |
1120 attach a parse action to it that modifies the returned token list. | |
1121 Invoking transformString() on a target string will then scan for matches, | |
1122 and replace the matched text patterns according to the logic in the parse | |
1123 action. transformString() returns the resulting transformed string.""" | |
1124 out = [] | |
1125 lastE = 0 | |
1126 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to | |
1127 # keep string locs straight between transformString and scanString | |
1128 self.keepTabs = True | |
1129 try: | |
1130 for t,s,e in self.scanString( instring ): | |
1131 out.append( instring[lastE:s] ) | |
1132 if t: | |
1133 if isinstance(t,ParseResults): | |
1134 out += t.asList() | |
1135 elif isinstance(t,list): | |
1136 out += t | |
1137 else: | |
1138 out.append(t) | |
1139 lastE = e | |
1140 out.append(instring[lastE:]) | |
1141 return "".join(map(_ustr,out)) | |
1142 except ParseBaseException: | |
1143 pe = sys.exc_info()[1] | |
1144 raise pe | |
1145 | |
1146 def searchString( self, instring, maxMatches=_MAX_INT ): | |
1147 """Another extension to scanString, simplifying the access to the tokens found | |
1148 to match the given parse expression. May be called with optional | |
1149 maxMatches argument, to clip searching after 'n' matches are found. | |
1150 """ | |
1151 try: | |
1152 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) | |
1153 except ParseBaseException: | |
1154 pe = sys.exc_info()[1] | |
1155 raise pe | |
1156 | |
1157 def __add__(self, other ): | |
1158 """Implementation of + operator - returns And""" | |
1159 if isinstance( other, basestring ): | |
1160 other = Literal( other ) | |
1161 if not isinstance( other, ParserElement ): | |
1162 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1163 SyntaxWarning, stacklevel=2) | |
1164 return None | |
1165 return And( [ self, other ] ) | |
1166 | |
1167 def __radd__(self, other ): | |
1168 """Implementation of + operator when left operand is not a ParserElement""" | |
1169 if isinstance( other, basestring ): | |
1170 other = Literal( other ) | |
1171 if not isinstance( other, ParserElement ): | |
1172 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1173 SyntaxWarning, stacklevel=2) | |
1174 return None | |
1175 return other + self | |
1176 | |
1177 def __sub__(self, other): | |
1178 """Implementation of - operator, returns And with error stop""" | |
1179 if isinstance( other, basestring ): | |
1180 other = Literal( other ) | |
1181 if not isinstance( other, ParserElement ): | |
1182 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1183 SyntaxWarning, stacklevel=2) | |
1184 return None | |
1185 return And( [ self, And._ErrorStop(), other ] ) | |
1186 | |
1187 def __rsub__(self, other ): | |
1188 """Implementation of - operator when left operand is not a ParserElement""" | |
1189 if isinstance( other, basestring ): | |
1190 other = Literal( other ) | |
1191 if not isinstance( other, ParserElement ): | |
1192 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1193 SyntaxWarning, stacklevel=2) | |
1194 return None | |
1195 return other - self | |
1196 | |
1197 def __mul__(self,other): | |
1198 if isinstance(other,int): | |
1199 minElements, optElements = other,0 | |
1200 elif isinstance(other,tuple): | |
1201 other = (other + (None, None))[:2] | |
1202 if other[0] is None: | |
1203 other = (0, other[1]) | |
1204 if isinstance(other[0],int) and other[1] is None: | |
1205 if other[0] == 0: | |
1206 return ZeroOrMore(self) | |
1207 if other[0] == 1: | |
1208 return OneOrMore(self) | |
1209 else: | |
1210 return self*other[0] + ZeroOrMore(self) | |
1211 elif isinstance(other[0],int) and isinstance(other[1],int): | |
1212 minElements, optElements = other | |
1213 optElements -= minElements | |
1214 else: | |
1215 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) | |
1216 else: | |
1217 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) | |
1218 | |
1219 if minElements < 0: | |
1220 raise ValueError("cannot multiply ParserElement by negative value") | |
1221 if optElements < 0: | |
1222 raise ValueError("second tuple value must be greater or equal to first tuple value") | |
1223 if minElements == optElements == 0: | |
1224 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") | |
1225 | |
1226 if (optElements): | |
1227 def makeOptionalList(n): | |
1228 if n>1: | |
1229 return Optional(self + makeOptionalList(n-1)) | |
1230 else: | |
1231 return Optional(self) | |
1232 if minElements: | |
1233 if minElements == 1: | |
1234 ret = self + makeOptionalList(optElements) | |
1235 else: | |
1236 ret = And([self]*minElements) + makeOptionalList(optElements) | |
1237 else: | |
1238 ret = makeOptionalList(optElements) | |
1239 else: | |
1240 if minElements == 1: | |
1241 ret = self | |
1242 else: | |
1243 ret = And([self]*minElements) | |
1244 return ret | |
1245 | |
1246 def __rmul__(self, other): | |
1247 return self.__mul__(other) | |
1248 | |
1249 def __or__(self, other ): | |
1250 """Implementation of | operator - returns MatchFirst""" | |
1251 if isinstance( other, basestring ): | |
1252 other = Literal( other ) | |
1253 if not isinstance( other, ParserElement ): | |
1254 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1255 SyntaxWarning, stacklevel=2) | |
1256 return None | |
1257 return MatchFirst( [ self, other ] ) | |
1258 | |
1259 def __ror__(self, other ): | |
1260 """Implementation of | operator when left operand is not a ParserElement""" | |
1261 if isinstance( other, basestring ): | |
1262 other = Literal( other ) | |
1263 if not isinstance( other, ParserElement ): | |
1264 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1265 SyntaxWarning, stacklevel=2) | |
1266 return None | |
1267 return other | self | |
1268 | |
1269 def __xor__(self, other ): | |
1270 """Implementation of ^ operator - returns Or""" | |
1271 if isinstance( other, basestring ): | |
1272 other = Literal( other ) | |
1273 if not isinstance( other, ParserElement ): | |
1274 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1275 SyntaxWarning, stacklevel=2) | |
1276 return None | |
1277 return Or( [ self, other ] ) | |
1278 | |
1279 def __rxor__(self, other ): | |
1280 """Implementation of ^ operator when left operand is not a ParserElement""" | |
1281 if isinstance( other, basestring ): | |
1282 other = Literal( other ) | |
1283 if not isinstance( other, ParserElement ): | |
1284 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1285 SyntaxWarning, stacklevel=2) | |
1286 return None | |
1287 return other ^ self | |
1288 | |
1289 def __and__(self, other ): | |
1290 """Implementation of & operator - returns Each""" | |
1291 if isinstance( other, basestring ): | |
1292 other = Literal( other ) | |
1293 if not isinstance( other, ParserElement ): | |
1294 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1295 SyntaxWarning, stacklevel=2) | |
1296 return None | |
1297 return Each( [ self, other ] ) | |
1298 | |
1299 def __rand__(self, other ): | |
1300 """Implementation of & operator when left operand is not a ParserElement""" | |
1301 if isinstance( other, basestring ): | |
1302 other = Literal( other ) | |
1303 if not isinstance( other, ParserElement ): | |
1304 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1305 SyntaxWarning, stacklevel=2) | |
1306 return None | |
1307 return other & self | |
1308 | |
1309 def __invert__( self ): | |
1310 """Implementation of ~ operator - returns NotAny""" | |
1311 return NotAny( self ) | |
1312 | |
1313 def __call__(self, name): | |
1314 """Shortcut for setResultsName, with listAllMatches=default:: | |
1315 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") | |
1316 could be written as:: | |
1317 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") | |
1318 """ | |
1319 return self.setResultsName(name) | |
1320 | |
1321 def suppress( self ): | |
1322 """Suppresses the output of this ParserElement; useful to keep punctuation from | |
1323 cluttering up returned output. | |
1324 """ | |
1325 return Suppress( self ) | |
1326 | |
1327 def leaveWhitespace( self ): | |
1328 """Disables the skipping of whitespace before matching the characters in the | |
1329 ParserElement's defined pattern. This is normally only used internally by | |
1330 the pyparsing module, but may be needed in some whitespace-sensitive grammars. | |
1331 """ | |
1332 self.skipWhitespace = False | |
1333 return self | |
1334 | |
1335 def setWhitespaceChars( self, chars ): | |
1336 """Overrides the default whitespace chars | |
1337 """ | |
1338 self.skipWhitespace = True | |
1339 self.whiteChars = chars | |
1340 self.copyDefaultWhiteChars = False | |
1341 return self | |
1342 | |
1343 def parseWithTabs( self ): | |
1344 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. | |
1345 Must be called before parseString when the input grammar contains elements that | |
1346 match <TAB> characters.""" | |
1347 self.keepTabs = True | |
1348 return self | |
1349 | |
1350 def ignore( self, other ): | |
1351 """Define expression to be ignored (e.g., comments) while doing pattern | |
1352 matching; may be called repeatedly, to define multiple comment or other | |
1353 ignorable patterns. | |
1354 """ | |
1355 if isinstance( other, Suppress ): | |
1356 if other not in self.ignoreExprs: | |
1357 self.ignoreExprs.append( other ) | |
1358 else: | |
1359 self.ignoreExprs.append( Suppress( other ) ) | |
1360 return self | |
1361 | |
1362 def setDebugActions( self, startAction, successAction, exceptionAction ): | |
1363 """Enable display of debugging messages while doing pattern matching.""" | |
1364 self.debugActions = (startAction or _defaultStartDebugAction, | |
1365 successAction or _defaultSuccessDebugAction, | |
1366 exceptionAction or _defaultExceptionDebugAction) | |
1367 self.debug = True | |
1368 return self | |
1369 | |
1370 def setDebug( self, flag=True ): | |
1371 """Enable display of debugging messages while doing pattern matching. | |
1372 Set flag to True to enable, False to disable.""" | |
1373 if flag: | |
1374 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) | |
1375 else: | |
1376 self.debug = False | |
1377 return self | |
1378 | |
1379 def __str__( self ): | |
1380 return self.name | |
1381 | |
1382 def __repr__( self ): | |
1383 return _ustr(self) | |
1384 | |
1385 def streamline( self ): | |
1386 self.streamlined = True | |
1387 self.strRepr = None | |
1388 return self | |
1389 | |
1390 def checkRecursion( self, parseElementList ): | |
1391 pass | |
1392 | |
1393 def validate( self, validateTrace=[] ): | |
1394 """Check defined expressions for valid structure, check for infinite recursive definitions.""" | |
1395 self.checkRecursion( [] ) | |
1396 | |
1397 def parseFile( self, file_or_filename, parseAll=False ): | |
1398 """Execute the parse expression on the given file or filename. | |
1399 If a filename is specified (instead of a file object), | |
1400 the entire file is opened, read, and closed before parsing. | |
1401 """ | |
1402 try: | |
1403 file_contents = file_or_filename.read() | |
1404 except AttributeError: | |
1405 f = open(file_or_filename, "rb") | |
1406 file_contents = f.read() | |
1407 f.close() | |
1408 try: | |
1409 return self.parseString(file_contents, parseAll) | |
1410 except ParseBaseException: | |
1411 # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1412 exc = sys.exc_info()[1] | |
1413 raise exc | |
1414 | |
1415 def getException(self): | |
1416 return ParseException("",0,self.errmsg,self) | |
1417 | |
1418 def __getattr__(self,aname): | |
1419 if aname == "myException": | |
1420 self.myException = ret = self.getException(); | |
1421 return ret; | |
1422 else: | |
1423 raise AttributeError("no such attribute " + aname) | |
1424 | |
1425 def __eq__(self,other): | |
1426 if isinstance(other, ParserElement): | |
1427 return self is other or self.__dict__ == other.__dict__ | |
1428 elif isinstance(other, basestring): | |
1429 try: | |
1430 self.parseString(_ustr(other), parseAll=True) | |
1431 return True | |
1432 except ParseBaseException: | |
1433 return False | |
1434 else: | |
1435 return super(ParserElement,self)==other | |
1436 | |
1437 def __ne__(self,other): | |
1438 return not (self == other) | |
1439 | |
1440 def __hash__(self): | |
1441 return hash(id(self)) | |
1442 | |
1443 def __req__(self,other): | |
1444 return self == other | |
1445 | |
1446 def __rne__(self,other): | |
1447 return not (self == other) | |
1448 | |
1449 | |
1450 class Token(ParserElement): | |
1451 """Abstract ParserElement subclass, for defining atomic matching patterns.""" | |
1452 def __init__( self ): | |
1453 super(Token,self).__init__( savelist=False ) | |
1454 #self.myException = ParseException("",0,"",self) | |
1455 | |
1456 def setName(self, name): | |
1457 s = super(Token,self).setName(name) | |
1458 self.errmsg = "Expected " + self.name | |
1459 #s.myException.msg = self.errmsg | |
1460 return s | |
1461 | |
1462 | |
1463 class Empty(Token): | |
1464 """An empty token, will always match.""" | |
1465 def __init__( self ): | |
1466 super(Empty,self).__init__() | |
1467 self.name = "Empty" | |
1468 self.mayReturnEmpty = True | |
1469 self.mayIndexError = False | |
1470 | |
1471 | |
1472 class NoMatch(Token): | |
1473 """A token that will never match.""" | |
1474 def __init__( self ): | |
1475 super(NoMatch,self).__init__() | |
1476 self.name = "NoMatch" | |
1477 self.mayReturnEmpty = True | |
1478 self.mayIndexError = False | |
1479 self.errmsg = "Unmatchable token" | |
1480 #self.myException.msg = self.errmsg | |
1481 | |
1482 def parseImpl( self, instring, loc, doActions=True ): | |
1483 exc = self.myException | |
1484 exc.loc = loc | |
1485 exc.pstr = instring | |
1486 raise exc | |
1487 | |
1488 | |
1489 class Literal(Token): | |
1490 """Token to exactly match a specified string.""" | |
1491 def __init__( self, matchString ): | |
1492 super(Literal,self).__init__() | |
1493 self.match = matchString | |
1494 self.matchLen = len(matchString) | |
1495 try: | |
1496 self.firstMatchChar = matchString[0] | |
1497 except IndexError: | |
1498 warnings.warn("null string passed to Literal; use Empty() instead", | |
1499 SyntaxWarning, stacklevel=2) | |
1500 self.__class__ = Empty | |
1501 self.name = '"%s"' % _ustr(self.match) | |
1502 self.errmsg = "Expected " + self.name | |
1503 self.mayReturnEmpty = False | |
1504 #self.myException.msg = self.errmsg | |
1505 self.mayIndexError = False | |
1506 | |
1507 # Performance tuning: this routine gets called a *lot* | |
1508 # if this is a single character match string and the first character matches, | |
1509 # short-circuit as quickly as possible, and avoid calling startswith | |
1510 #~ @profile | |
1511 def parseImpl( self, instring, loc, doActions=True ): | |
1512 if (instring[loc] == self.firstMatchChar and | |
1513 (self.matchLen==1 or instring.startswith(self.match,loc)) ): | |
1514 return loc+self.matchLen, self.match | |
1515 #~ raise ParseException( instring, loc, self.errmsg ) | |
1516 exc = self.myException | |
1517 exc.loc = loc | |
1518 exc.pstr = instring | |
1519 raise exc | |
1520 _L = Literal | |
1521 | |
1522 class Keyword(Token): | |
1523 """Token to exactly match a specified string as a keyword, that is, it must be | |
1524 immediately followed by a non-keyword character. Compare with Literal:: | |
1525 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. | |
1526 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' | |
1527 Accepts two optional constructor arguments in addition to the keyword string: | |
1528 identChars is a string of characters that would be valid identifier characters, | |
1529 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive | |
1530 matching, default is False. | |
1531 """ | |
1532 DEFAULT_KEYWORD_CHARS = alphanums+"_$" | |
1533 | |
1534 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): | |
1535 super(Keyword,self).__init__() | |
1536 self.match = matchString | |
1537 self.matchLen = len(matchString) | |
1538 try: | |
1539 self.firstMatchChar = matchString[0] | |
1540 except IndexError: | |
1541 warnings.warn("null string passed to Keyword; use Empty() instead", | |
1542 SyntaxWarning, stacklevel=2) | |
1543 self.name = '"%s"' % self.match | |
1544 self.errmsg = "Expected " + self.name | |
1545 self.mayReturnEmpty = False | |
1546 #self.myException.msg = self.errmsg | |
1547 self.mayIndexError = False | |
1548 self.caseless = caseless | |
1549 if caseless: | |
1550 self.caselessmatch = matchString.upper() | |
1551 identChars = identChars.upper() | |
1552 self.identChars = _str2dict(identChars) | |
1553 | |
1554 def parseImpl( self, instring, loc, doActions=True ): | |
1555 if self.caseless: | |
1556 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and | |
1557 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and | |
1558 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): | |
1559 return loc+self.matchLen, self.match | |
1560 else: | |
1561 if (instring[loc] == self.firstMatchChar and | |
1562 (self.matchLen==1 or instring.startswith(self.match,loc)) and | |
1563 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and | |
1564 (loc == 0 or instring[loc-1] not in self.identChars) ): | |
1565 return loc+self.matchLen, self.match | |
1566 #~ raise ParseException( instring, loc, self.errmsg ) | |
1567 exc = self.myException | |
1568 exc.loc = loc | |
1569 exc.pstr = instring | |
1570 raise exc | |
1571 | |
1572 def copy(self): | |
1573 c = super(Keyword,self).copy() | |
1574 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS | |
1575 return c | |
1576 | |
1577 def setDefaultKeywordChars( chars ): | |
1578 """Overrides the default Keyword chars | |
1579 """ | |
1580 Keyword.DEFAULT_KEYWORD_CHARS = chars | |
1581 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) | |
1582 | |
1583 class CaselessLiteral(Literal): | |
1584 """Token to match a specified string, ignoring case of letters. | |
1585 Note: the matched results will always be in the case of the given | |
1586 match string, NOT the case of the input text. | |
1587 """ | |
1588 def __init__( self, matchString ): | |
1589 super(CaselessLiteral,self).__init__( matchString.upper() ) | |
1590 # Preserve the defining literal. | |
1591 self.returnString = matchString | |
1592 self.name = "'%s'" % self.returnString | |
1593 self.errmsg = "Expected " + self.name | |
1594 #self.myException.msg = self.errmsg | |
1595 | |
1596 def parseImpl( self, instring, loc, doActions=True ): | |
1597 if instring[ loc:loc+self.matchLen ].upper() == self.match: | |
1598 return loc+self.matchLen, self.returnString | |
1599 #~ raise ParseException( instring, loc, self.errmsg ) | |
1600 exc = self.myException | |
1601 exc.loc = loc | |
1602 exc.pstr = instring | |
1603 raise exc | |
1604 | |
1605 class CaselessKeyword(Keyword): | |
1606 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): | |
1607 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) | |
1608 | |
1609 def parseImpl( self, instring, loc, doActions=True ): | |
1610 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and | |
1611 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): | |
1612 return loc+self.matchLen, self.match | |
1613 #~ raise ParseException( instring, loc, self.errmsg ) | |
1614 exc = self.myException | |
1615 exc.loc = loc | |
1616 exc.pstr = instring | |
1617 raise exc | |
1618 | |
1619 class Word(Token): | |
1620 """Token for matching words composed of allowed character sets. | |
1621 Defined with string containing all allowed initial characters, | |
1622 an optional string containing allowed body characters (if omitted, | |
1623 defaults to the initial character set), and an optional minimum, | |
1624 maximum, and/or exact length. The default value for min is 1 (a | |
1625 minimum value < 1 is not valid); the default values for max and exact | |
1626 are 0, meaning no maximum or exact length restriction. | |
1627 """ | |
1628 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): | |
1629 super(Word,self).__init__() | |
1630 self.initCharsOrig = initChars | |
1631 self.initChars = _str2dict(initChars) | |
1632 if bodyChars : | |
1633 self.bodyCharsOrig = bodyChars | |
1634 self.bodyChars = _str2dict(bodyChars) | |
1635 else: | |
1636 self.bodyCharsOrig = initChars | |
1637 self.bodyChars = _str2dict(initChars) | |
1638 | |
1639 self.maxSpecified = max > 0 | |
1640 | |
1641 if min < 1: | |
1642 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") | |
1643 | |
1644 self.minLen = min | |
1645 | |
1646 if max > 0: | |
1647 self.maxLen = max | |
1648 else: | |
1649 self.maxLen = _MAX_INT | |
1650 | |
1651 if exact > 0: | |
1652 self.maxLen = exact | |
1653 self.minLen = exact | |
1654 | |
1655 self.name = _ustr(self) | |
1656 self.errmsg = "Expected " + self.name | |
1657 #self.myException.msg = self.errmsg | |
1658 self.mayIndexError = False | |
1659 self.asKeyword = asKeyword | |
1660 | |
1661 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): | |
1662 if self.bodyCharsOrig == self.initCharsOrig: | |
1663 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) | |
1664 elif len(self.bodyCharsOrig) == 1: | |
1665 self.reString = "%s[%s]*" % \ | |
1666 (re.escape(self.initCharsOrig), | |
1667 _escapeRegexRangeChars(self.bodyCharsOrig),) | |
1668 else: | |
1669 self.reString = "[%s][%s]*" % \ | |
1670 (_escapeRegexRangeChars(self.initCharsOrig), | |
1671 _escapeRegexRangeChars(self.bodyCharsOrig),) | |
1672 if self.asKeyword: | |
1673 self.reString = r"\b"+self.reString+r"\b" | |
1674 try: | |
1675 self.re = re.compile( self.reString ) | |
1676 except: | |
1677 self.re = None | |
1678 | |
1679 def parseImpl( self, instring, loc, doActions=True ): | |
1680 if self.re: | |
1681 result = self.re.match(instring,loc) | |
1682 if not result: | |
1683 exc = self.myException | |
1684 exc.loc = loc | |
1685 exc.pstr = instring | |
1686 raise exc | |
1687 | |
1688 loc = result.end() | |
1689 return loc,result.group() | |
1690 | |
1691 if not(instring[ loc ] in self.initChars): | |
1692 #~ raise ParseException( instring, loc, self.errmsg ) | |
1693 exc = self.myException | |
1694 exc.loc = loc | |
1695 exc.pstr = instring | |
1696 raise exc | |
1697 start = loc | |
1698 loc += 1 | |
1699 instrlen = len(instring) | |
1700 bodychars = self.bodyChars | |
1701 maxloc = start + self.maxLen | |
1702 maxloc = min( maxloc, instrlen ) | |
1703 while loc < maxloc and instring[loc] in bodychars: | |
1704 loc += 1 | |
1705 | |
1706 throwException = False | |
1707 if loc - start < self.minLen: | |
1708 throwException = True | |
1709 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: | |
1710 throwException = True | |
1711 if self.asKeyword: | |
1712 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): | |
1713 throwException = True | |
1714 | |
1715 if throwException: | |
1716 #~ raise ParseException( instring, loc, self.errmsg ) | |
1717 exc = self.myException | |
1718 exc.loc = loc | |
1719 exc.pstr = instring | |
1720 raise exc | |
1721 | |
1722 return loc, instring[start:loc] | |
1723 | |
1724 def __str__( self ): | |
1725 try: | |
1726 return super(Word,self).__str__() | |
1727 except: | |
1728 pass | |
1729 | |
1730 | |
1731 if self.strRepr is None: | |
1732 | |
1733 def charsAsStr(s): | |
1734 if len(s)>4: | |
1735 return s[:4]+"..." | |
1736 else: | |
1737 return s | |
1738 | |
1739 if ( self.initCharsOrig != self.bodyCharsOrig ): | |
1740 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) | |
1741 else: | |
1742 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) | |
1743 | |
1744 return self.strRepr | |
1745 | |
1746 | |
1747 class Regex(Token): | |
1748 """Token for matching strings that match a given regular expression. | |
1749 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. | |
1750 """ | |
1751 def __init__( self, pattern, flags=0): | |
1752 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" | |
1753 super(Regex,self).__init__() | |
1754 | |
1755 if len(pattern) == 0: | |
1756 warnings.warn("null string passed to Regex; use Empty() instead", | |
1757 SyntaxWarning, stacklevel=2) | |
1758 | |
1759 self.pattern = pattern | |
1760 self.flags = flags | |
1761 | |
1762 try: | |
1763 self.re = re.compile(self.pattern, self.flags) | |
1764 self.reString = self.pattern | |
1765 except sre_constants.error: | |
1766 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, | |
1767 SyntaxWarning, stacklevel=2) | |
1768 raise | |
1769 | |
1770 self.name = _ustr(self) | |
1771 self.errmsg = "Expected " + self.name | |
1772 #self.myException.msg = self.errmsg | |
1773 self.mayIndexError = False | |
1774 self.mayReturnEmpty = True | |
1775 | |
1776 def parseImpl( self, instring, loc, doActions=True ): | |
1777 result = self.re.match(instring,loc) | |
1778 if not result: | |
1779 exc = self.myException | |
1780 exc.loc = loc | |
1781 exc.pstr = instring | |
1782 raise exc | |
1783 | |
1784 loc = result.end() | |
1785 d = result.groupdict() | |
1786 ret = ParseResults(result.group()) | |
1787 if d: | |
1788 for k in d: | |
1789 ret[k] = d[k] | |
1790 return loc,ret | |
1791 | |
1792 def __str__( self ): | |
1793 try: | |
1794 return super(Regex,self).__str__() | |
1795 except: | |
1796 pass | |
1797 | |
1798 if self.strRepr is None: | |
1799 self.strRepr = "Re:(%s)" % repr(self.pattern) | |
1800 | |
1801 return self.strRepr | |
1802 | |
1803 | |
1804 class QuotedString(Token): | |
1805 """Token for matching strings that are delimited by quoting characters. | |
1806 """ | |
1807 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): | |
1808 """ | |
1809 Defined with the following parameters: | |
1810 - quoteChar - string of one or more characters defining the quote delimiting string | |
1811 - escChar - character to escape quotes, typically backslash (default=None) | |
1812 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) | |
1813 - multiline - boolean indicating whether quotes can span multiple lines (default=False) | |
1814 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) | |
1815 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) | |
1816 """ | |
1817 super(QuotedString,self).__init__() | |
1818 | |
1819 # remove white space from quote chars - wont work anyway | |
1820 quoteChar = quoteChar.strip() | |
1821 if len(quoteChar) == 0: | |
1822 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) | |
1823 raise SyntaxError() | |
1824 | |
1825 if endQuoteChar is None: | |
1826 endQuoteChar = quoteChar | |
1827 else: | |
1828 endQuoteChar = endQuoteChar.strip() | |
1829 if len(endQuoteChar) == 0: | |
1830 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) | |
1831 raise SyntaxError() | |
1832 | |
1833 self.quoteChar = quoteChar | |
1834 self.quoteCharLen = len(quoteChar) | |
1835 self.firstQuoteChar = quoteChar[0] | |
1836 self.endQuoteChar = endQuoteChar | |
1837 self.endQuoteCharLen = len(endQuoteChar) | |
1838 self.escChar = escChar | |
1839 self.escQuote = escQuote | |
1840 self.unquoteResults = unquoteResults | |
1841 | |
1842 if multiline: | |
1843 self.flags = re.MULTILINE | re.DOTALL | |
1844 self.pattern = r'%s(?:[^%s%s]' % \ | |
1845 ( re.escape(self.quoteChar), | |
1846 _escapeRegexRangeChars(self.endQuoteChar[0]), | |
1847 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) | |
1848 else: | |
1849 self.flags = 0 | |
1850 self.pattern = r'%s(?:[^%s\n\r%s]' % \ | |
1851 ( re.escape(self.quoteChar), | |
1852 _escapeRegexRangeChars(self.endQuoteChar[0]), | |
1853 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) | |
1854 if len(self.endQuoteChar) > 1: | |
1855 self.pattern += ( | |
1856 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), | |
1857 _escapeRegexRangeChars(self.endQuoteChar[i])) | |
1858 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' | |
1859 ) | |
1860 if escQuote: | |
1861 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) | |
1862 if escChar: | |
1863 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) | |
1864 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" | |
1865 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) | |
1866 | |
1867 try: | |
1868 self.re = re.compile(self.pattern, self.flags) | |
1869 self.reString = self.pattern | |
1870 except sre_constants.error: | |
1871 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, | |
1872 SyntaxWarning, stacklevel=2) | |
1873 raise | |
1874 | |
1875 self.name = _ustr(self) | |
1876 self.errmsg = "Expected " + self.name | |
1877 #self.myException.msg = self.errmsg | |
1878 self.mayIndexError = False | |
1879 self.mayReturnEmpty = True | |
1880 | |
1881 def parseImpl( self, instring, loc, doActions=True ): | |
1882 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None | |
1883 if not result: | |
1884 exc = self.myException | |
1885 exc.loc = loc | |
1886 exc.pstr = instring | |
1887 raise exc | |
1888 | |
1889 loc = result.end() | |
1890 ret = result.group() | |
1891 | |
1892 if self.unquoteResults: | |
1893 | |
1894 # strip off quotes | |
1895 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] | |
1896 | |
1897 if isinstance(ret,basestring): | |
1898 # replace escaped characters | |
1899 if self.escChar: | |
1900 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) | |
1901 | |
1902 # replace escaped quotes | |
1903 if self.escQuote: | |
1904 ret = ret.replace(self.escQuote, self.endQuoteChar) | |
1905 | |
1906 return loc, ret | |
1907 | |
1908 def __str__( self ): | |
1909 try: | |
1910 return super(QuotedString,self).__str__() | |
1911 except: | |
1912 pass | |
1913 | |
1914 if self.strRepr is None: | |
1915 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) | |
1916 | |
1917 return self.strRepr | |
1918 | |
1919 | |
1920 class CharsNotIn(Token): | |
1921 """Token for matching words composed of characters *not* in a given set. | |
1922 Defined with string containing all disallowed characters, and an optional | |
1923 minimum, maximum, and/or exact length. The default value for min is 1 (a | |
1924 minimum value < 1 is not valid); the default values for max and exact | |
1925 are 0, meaning no maximum or exact length restriction. | |
1926 """ | |
1927 def __init__( self, notChars, min=1, max=0, exact=0 ): | |
1928 super(CharsNotIn,self).__init__() | |
1929 self.skipWhitespace = False | |
1930 self.notChars = notChars | |
1931 | |
1932 if min < 1: | |
1933 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") | |
1934 | |
1935 self.minLen = min | |
1936 | |
1937 if max > 0: | |
1938 self.maxLen = max | |
1939 else: | |
1940 self.maxLen = _MAX_INT | |
1941 | |
1942 if exact > 0: | |
1943 self.maxLen = exact | |
1944 self.minLen = exact | |
1945 | |
1946 self.name = _ustr(self) | |
1947 self.errmsg = "Expected " + self.name | |
1948 self.mayReturnEmpty = ( self.minLen == 0 ) | |
1949 #self.myException.msg = self.errmsg | |
1950 self.mayIndexError = False | |
1951 | |
1952 def parseImpl( self, instring, loc, doActions=True ): | |
1953 if instring[loc] in self.notChars: | |
1954 #~ raise ParseException( instring, loc, self.errmsg ) | |
1955 exc = self.myException | |
1956 exc.loc = loc | |
1957 exc.pstr = instring | |
1958 raise exc | |
1959 | |
1960 start = loc | |
1961 loc += 1 | |
1962 notchars = self.notChars | |
1963 maxlen = min( start+self.maxLen, len(instring) ) | |
1964 while loc < maxlen and \ | |
1965 (instring[loc] not in notchars): | |
1966 loc += 1 | |
1967 | |
1968 if loc - start < self.minLen: | |
1969 #~ raise ParseException( instring, loc, self.errmsg ) | |
1970 exc = self.myException | |
1971 exc.loc = loc | |
1972 exc.pstr = instring | |
1973 raise exc | |
1974 | |
1975 return loc, instring[start:loc] | |
1976 | |
1977 def __str__( self ): | |
1978 try: | |
1979 return super(CharsNotIn, self).__str__() | |
1980 except: | |
1981 pass | |
1982 | |
1983 if self.strRepr is None: | |
1984 if len(self.notChars) > 4: | |
1985 self.strRepr = "!W:(%s...)" % self.notChars[:4] | |
1986 else: | |
1987 self.strRepr = "!W:(%s)" % self.notChars | |
1988 | |
1989 return self.strRepr | |
1990 | |
1991 class White(Token): | |
1992 """Special matching class for matching whitespace. Normally, whitespace is ignored | |
1993 by pyparsing grammars. This class is included when some whitespace structures | |
1994 are significant. Define with a string containing the whitespace characters to be | |
1995 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments, | |
1996 as defined for the Word class.""" | |
1997 whiteStrs = { | |
1998 " " : "<SPC>", | |
1999 "\t": "<TAB>", | |
2000 "\n": "<LF>", | |
2001 "\r": "<CR>", | |
2002 "\f": "<FF>", | |
2003 } | |
2004 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): | |
2005 super(White,self).__init__() | |
2006 self.matchWhite = ws | |
2007 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) | |
2008 #~ self.leaveWhitespace() | |
2009 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) | |
2010 self.mayReturnEmpty = True | |
2011 self.errmsg = "Expected " + self.name | |
2012 #self.myException.msg = self.errmsg | |
2013 | |
2014 self.minLen = min | |
2015 | |
2016 if max > 0: | |
2017 self.maxLen = max | |
2018 else: | |
2019 self.maxLen = _MAX_INT | |
2020 | |
2021 if exact > 0: | |
2022 self.maxLen = exact | |
2023 self.minLen = exact | |
2024 | |
2025 def parseImpl( self, instring, loc, doActions=True ): | |
2026 if not(instring[ loc ] in self.matchWhite): | |
2027 #~ raise ParseException( instring, loc, self.errmsg ) | |
2028 exc = self.myException | |
2029 exc.loc = loc | |
2030 exc.pstr = instring | |
2031 raise exc | |
2032 start = loc | |
2033 loc += 1 | |
2034 maxloc = start + self.maxLen | |
2035 maxloc = min( maxloc, len(instring) ) | |
2036 while loc < maxloc and instring[loc] in self.matchWhite: | |
2037 loc += 1 | |
2038 | |
2039 if loc - start < self.minLen: | |
2040 #~ raise ParseException( instring, loc, self.errmsg ) | |
2041 exc = self.myException | |
2042 exc.loc = loc | |
2043 exc.pstr = instring | |
2044 raise exc | |
2045 | |
2046 return loc, instring[start:loc] | |
2047 | |
2048 | |
2049 class _PositionToken(Token): | |
2050 def __init__( self ): | |
2051 super(_PositionToken,self).__init__() | |
2052 self.name=self.__class__.__name__ | |
2053 self.mayReturnEmpty = True | |
2054 self.mayIndexError = False | |
2055 | |
2056 class GoToColumn(_PositionToken): | |
2057 """Token to advance to a specific column of input text; useful for tabular report scraping.""" | |
2058 def __init__( self, colno ): | |
2059 super(GoToColumn,self).__init__() | |
2060 self.col = colno | |
2061 | |
2062 def preParse( self, instring, loc ): | |
2063 if col(loc,instring) != self.col: | |
2064 instrlen = len(instring) | |
2065 if self.ignoreExprs: | |
2066 loc = self._skipIgnorables( instring, loc ) | |
2067 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : | |
2068 loc += 1 | |
2069 return loc | |
2070 | |
2071 def parseImpl( self, instring, loc, doActions=True ): | |
2072 thiscol = col( loc, instring ) | |
2073 if thiscol > self.col: | |
2074 raise ParseException( instring, loc, "Text not in expected column", self ) | |
2075 newloc = loc + self.col - thiscol | |
2076 ret = instring[ loc: newloc ] | |
2077 return newloc, ret | |
2078 | |
2079 class LineStart(_PositionToken): | |
2080 """Matches if current position is at the beginning of a line within the parse string""" | |
2081 def __init__( self ): | |
2082 super(LineStart,self).__init__() | |
2083 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) | |
2084 self.errmsg = "Expected start of line" | |
2085 #self.myException.msg = self.errmsg | |
2086 | |
2087 def preParse( self, instring, loc ): | |
2088 preloc = super(LineStart,self).preParse(instring,loc) | |
2089 if instring[preloc] == "\n": | |
2090 loc += 1 | |
2091 return loc | |
2092 | |
2093 def parseImpl( self, instring, loc, doActions=True ): | |
2094 if not( loc==0 or | |
2095 (loc == self.preParse( instring, 0 )) or | |
2096 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: | |
2097 #~ raise ParseException( instring, loc, "Expected start of line" ) | |
2098 exc = self.myException | |
2099 exc.loc = loc | |
2100 exc.pstr = instring | |
2101 raise exc | |
2102 return loc, [] | |
2103 | |
2104 class LineEnd(_PositionToken): | |
2105 """Matches if current position is at the end of a line within the parse string""" | |
2106 def __init__( self ): | |
2107 super(LineEnd,self).__init__() | |
2108 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) | |
2109 self.errmsg = "Expected end of line" | |
2110 #self.myException.msg = self.errmsg | |
2111 | |
2112 def parseImpl( self, instring, loc, doActions=True ): | |
2113 if loc<len(instring): | |
2114 if instring[loc] == "\n": | |
2115 return loc+1, "\n" | |
2116 else: | |
2117 #~ raise ParseException( instring, loc, "Expected end of line" ) | |
2118 exc = self.myException | |
2119 exc.loc = loc | |
2120 exc.pstr = instring | |
2121 raise exc | |
2122 elif loc == len(instring): | |
2123 return loc+1, [] | |
2124 else: | |
2125 exc = self.myException | |
2126 exc.loc = loc | |
2127 exc.pstr = instring | |
2128 raise exc | |
2129 | |
2130 class StringStart(_PositionToken): | |
2131 """Matches if current position is at the beginning of the parse string""" | |
2132 def __init__( self ): | |
2133 super(StringStart,self).__init__() | |
2134 self.errmsg = "Expected start of text" | |
2135 #self.myException.msg = self.errmsg | |
2136 | |
2137 def parseImpl( self, instring, loc, doActions=True ): | |
2138 if loc != 0: | |
2139 # see if entire string up to here is just whitespace and ignoreables | |
2140 if loc != self.preParse( instring, 0 ): | |
2141 #~ raise ParseException( instring, loc, "Expected start of text" ) | |
2142 exc = self.myException | |
2143 exc.loc = loc | |
2144 exc.pstr = instring | |
2145 raise exc | |
2146 return loc, [] | |
2147 | |
2148 class StringEnd(_PositionToken): | |
2149 """Matches if current position is at the end of the parse string""" | |
2150 def __init__( self ): | |
2151 super(StringEnd,self).__init__() | |
2152 self.errmsg = "Expected end of text" | |
2153 #self.myException.msg = self.errmsg | |
2154 | |
2155 def parseImpl( self, instring, loc, doActions=True ): | |
2156 if loc < len(instring): | |
2157 #~ raise ParseException( instring, loc, "Expected end of text" ) | |
2158 exc = self.myException | |
2159 exc.loc = loc | |
2160 exc.pstr = instring | |
2161 raise exc | |
2162 elif loc == len(instring): | |
2163 return loc+1, [] | |
2164 elif loc > len(instring): | |
2165 return loc, [] | |
2166 else: | |
2167 exc = self.myException | |
2168 exc.loc = loc | |
2169 exc.pstr = instring | |
2170 raise exc | |
2171 | |
2172 class WordStart(_PositionToken): | |
2173 """Matches if the current position is at the beginning of a Word, and | |
2174 is not preceded by any character in a given set of wordChars | |
2175 (default=printables). To emulate the \b behavior of regular expressions, | |
2176 use WordStart(alphanums). WordStart will also match at the beginning of | |
2177 the string being parsed, or at the beginning of a line. | |
2178 """ | |
2179 def __init__(self, wordChars = printables): | |
2180 super(WordStart,self).__init__() | |
2181 self.wordChars = _str2dict(wordChars) | |
2182 self.errmsg = "Not at the start of a word" | |
2183 | |
2184 def parseImpl(self, instring, loc, doActions=True ): | |
2185 if loc != 0: | |
2186 if (instring[loc-1] in self.wordChars or | |
2187 instring[loc] not in self.wordChars): | |
2188 exc = self.myException | |
2189 exc.loc = loc | |
2190 exc.pstr = instring | |
2191 raise exc | |
2192 return loc, [] | |
2193 | |
2194 class WordEnd(_PositionToken): | |
2195 """Matches if the current position is at the end of a Word, and | |
2196 is not followed by any character in a given set of wordChars | |
2197 (default=printables). To emulate the \b behavior of regular expressions, | |
2198 use WordEnd(alphanums). WordEnd will also match at the end of | |
2199 the string being parsed, or at the end of a line. | |
2200 """ | |
2201 def __init__(self, wordChars = printables): | |
2202 super(WordEnd,self).__init__() | |
2203 self.wordChars = _str2dict(wordChars) | |
2204 self.skipWhitespace = False | |
2205 self.errmsg = "Not at the end of a word" | |
2206 | |
2207 def parseImpl(self, instring, loc, doActions=True ): | |
2208 instrlen = len(instring) | |
2209 if instrlen>0 and loc<instrlen: | |
2210 if (instring[loc] in self.wordChars or | |
2211 instring[loc-1] not in self.wordChars): | |
2212 #~ raise ParseException( instring, loc, "Expected end of word" ) | |
2213 exc = self.myException | |
2214 exc.loc = loc | |
2215 exc.pstr = instring | |
2216 raise exc | |
2217 return loc, [] | |
2218 | |
2219 | |
2220 class ParseExpression(ParserElement): | |
2221 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" | |
2222 def __init__( self, exprs, savelist = False ): | |
2223 super(ParseExpression,self).__init__(savelist) | |
2224 if isinstance( exprs, list ): | |
2225 self.exprs = exprs | |
2226 elif isinstance( exprs, basestring ): | |
2227 self.exprs = [ Literal( exprs ) ] | |
2228 else: | |
2229 try: | |
2230 self.exprs = list( exprs ) | |
2231 except TypeError: | |
2232 self.exprs = [ exprs ] | |
2233 self.callPreparse = False | |
2234 | |
2235 def __getitem__( self, i ): | |
2236 return self.exprs[i] | |
2237 | |
2238 def append( self, other ): | |
2239 self.exprs.append( other ) | |
2240 self.strRepr = None | |
2241 return self | |
2242 | |
2243 def leaveWhitespace( self ): | |
2244 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on | |
2245 all contained expressions.""" | |
2246 self.skipWhitespace = False | |
2247 self.exprs = [ e.copy() for e in self.exprs ] | |
2248 for e in self.exprs: | |
2249 e.leaveWhitespace() | |
2250 return self | |
2251 | |
2252 def ignore( self, other ): | |
2253 if isinstance( other, Suppress ): | |
2254 if other not in self.ignoreExprs: | |
2255 super( ParseExpression, self).ignore( other ) | |
2256 for e in self.exprs: | |
2257 e.ignore( self.ignoreExprs[-1] ) | |
2258 else: | |
2259 super( ParseExpression, self).ignore( other ) | |
2260 for e in self.exprs: | |
2261 e.ignore( self.ignoreExprs[-1] ) | |
2262 return self | |
2263 | |
2264 def __str__( self ): | |
2265 try: | |
2266 return super(ParseExpression,self).__str__() | |
2267 except: | |
2268 pass | |
2269 | |
2270 if self.strRepr is None: | |
2271 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) | |
2272 return self.strRepr | |
2273 | |
2274 def streamline( self ): | |
2275 super(ParseExpression,self).streamline() | |
2276 | |
2277 for e in self.exprs: | |
2278 e.streamline() | |
2279 | |
2280 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) | |
2281 # but only if there are no parse actions or resultsNames on the nested And's | |
2282 # (likewise for Or's and MatchFirst's) | |
2283 if ( len(self.exprs) == 2 ): | |
2284 other = self.exprs[0] | |
2285 if ( isinstance( other, self.__class__ ) and | |
2286 not(other.parseAction) and | |
2287 other.resultsName is None and | |
2288 not other.debug ): | |
2289 self.exprs = other.exprs[:] + [ self.exprs[1] ] | |
2290 self.strRepr = None | |
2291 self.mayReturnEmpty |= other.mayReturnEmpty | |
2292 self.mayIndexError |= other.mayIndexError | |
2293 | |
2294 other = self.exprs[-1] | |
2295 if ( isinstance( other, self.__class__ ) and | |
2296 not(other.parseAction) and | |
2297 other.resultsName is None and | |
2298 not other.debug ): | |
2299 self.exprs = self.exprs[:-1] + other.exprs[:] | |
2300 self.strRepr = None | |
2301 self.mayReturnEmpty |= other.mayReturnEmpty | |
2302 self.mayIndexError |= other.mayIndexError | |
2303 | |
2304 return self | |
2305 | |
2306 def setResultsName( self, name, listAllMatches=False ): | |
2307 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) | |
2308 return ret | |
2309 | |
2310 def validate( self, validateTrace=[] ): | |
2311 tmp = validateTrace[:]+[self] | |
2312 for e in self.exprs: | |
2313 e.validate(tmp) | |
2314 self.checkRecursion( [] ) | |
2315 | |
2316 class And(ParseExpression): | |
2317 """Requires all given ParseExpressions to be found in the given order. | |
2318 Expressions may be separated by whitespace. | |
2319 May be constructed using the '+' operator. | |
2320 """ | |
2321 | |
2322 class _ErrorStop(Empty): | |
2323 def __init__(self, *args, **kwargs): | |
2324 super(Empty,self).__init__(*args, **kwargs) | |
2325 self.leaveWhitespace() | |
2326 | |
2327 def __init__( self, exprs, savelist = True ): | |
2328 super(And,self).__init__(exprs, savelist) | |
2329 self.mayReturnEmpty = True | |
2330 for e in self.exprs: | |
2331 if not e.mayReturnEmpty: | |
2332 self.mayReturnEmpty = False | |
2333 break | |
2334 self.setWhitespaceChars( exprs[0].whiteChars ) | |
2335 self.skipWhitespace = exprs[0].skipWhitespace | |
2336 self.callPreparse = True | |
2337 | |
2338 def parseImpl( self, instring, loc, doActions=True ): | |
2339 # pass False as last arg to _parse for first element, since we already | |
2340 # pre-parsed the string as part of our And pre-parsing | |
2341 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) | |
2342 errorStop = False | |
2343 for e in self.exprs[1:]: | |
2344 if isinstance(e, And._ErrorStop): | |
2345 errorStop = True | |
2346 continue | |
2347 if errorStop: | |
2348 try: | |
2349 loc, exprtokens = e._parse( instring, loc, doActions ) | |
2350 except ParseSyntaxException: | |
2351 raise | |
2352 except ParseBaseException: | |
2353 pe = sys.exc_info()[1] | |
2354 raise ParseSyntaxException(pe) | |
2355 except IndexError: | |
2356 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) | |
2357 else: | |
2358 loc, exprtokens = e._parse( instring, loc, doActions ) | |
2359 if exprtokens or exprtokens.keys(): | |
2360 resultlist += exprtokens | |
2361 return loc, resultlist | |
2362 | |
2363 def __iadd__(self, other ): | |
2364 if isinstance( other, basestring ): | |
2365 other = Literal( other ) | |
2366 return self.append( other ) #And( [ self, other ] ) | |
2367 | |
2368 def checkRecursion( self, parseElementList ): | |
2369 subRecCheckList = parseElementList[:] + [ self ] | |
2370 for e in self.exprs: | |
2371 e.checkRecursion( subRecCheckList ) | |
2372 if not e.mayReturnEmpty: | |
2373 break | |
2374 | |
2375 def __str__( self ): | |
2376 if hasattr(self,"name"): | |
2377 return self.name | |
2378 | |
2379 if self.strRepr is None: | |
2380 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | |
2381 | |
2382 return self.strRepr | |
2383 | |
2384 | |
2385 class Or(ParseExpression): | |
2386 """Requires that at least one ParseExpression is found. | |
2387 If two expressions match, the expression that matches the longest string will be used. | |
2388 May be constructed using the '^' operator. | |
2389 """ | |
2390 def __init__( self, exprs, savelist = False ): | |
2391 super(Or,self).__init__(exprs, savelist) | |
2392 self.mayReturnEmpty = False | |
2393 for e in self.exprs: | |
2394 if e.mayReturnEmpty: | |
2395 self.mayReturnEmpty = True | |
2396 break | |
2397 | |
2398 def parseImpl( self, instring, loc, doActions=True ): | |
2399 maxExcLoc = -1 | |
2400 maxMatchLoc = -1 | |
2401 maxException = None | |
2402 for e in self.exprs: | |
2403 try: | |
2404 loc2 = e.tryParse( instring, loc ) | |
2405 except ParseException: | |
2406 err = sys.exc_info()[1] | |
2407 if err.loc > maxExcLoc: | |
2408 maxException = err | |
2409 maxExcLoc = err.loc | |
2410 except IndexError: | |
2411 if len(instring) > maxExcLoc: | |
2412 maxException = ParseException(instring,len(instring),e.errmsg,self) | |
2413 maxExcLoc = len(instring) | |
2414 else: | |
2415 if loc2 > maxMatchLoc: | |
2416 maxMatchLoc = loc2 | |
2417 maxMatchExp = e | |
2418 | |
2419 if maxMatchLoc < 0: | |
2420 if maxException is not None: | |
2421 raise maxException | |
2422 else: | |
2423 raise ParseException(instring, loc, "no defined alternatives to match", self) | |
2424 | |
2425 return maxMatchExp._parse( instring, loc, doActions ) | |
2426 | |
2427 def __ixor__(self, other ): | |
2428 if isinstance( other, basestring ): | |
2429 other = Literal( other ) | |
2430 return self.append( other ) #Or( [ self, other ] ) | |
2431 | |
2432 def __str__( self ): | |
2433 if hasattr(self,"name"): | |
2434 return self.name | |
2435 | |
2436 if self.strRepr is None: | |
2437 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | |
2438 | |
2439 return self.strRepr | |
2440 | |
2441 def checkRecursion( self, parseElementList ): | |
2442 subRecCheckList = parseElementList[:] + [ self ] | |
2443 for e in self.exprs: | |
2444 e.checkRecursion( subRecCheckList ) | |
2445 | |
2446 | |
2447 class MatchFirst(ParseExpression): | |
2448 """Requires that at least one ParseExpression is found. | |
2449 If two expressions match, the first one listed is the one that will match. | |
2450 May be constructed using the '|' operator. | |
2451 """ | |
2452 def __init__( self, exprs, savelist = False ): | |
2453 super(MatchFirst,self).__init__(exprs, savelist) | |
2454 if exprs: | |
2455 self.mayReturnEmpty = False | |
2456 for e in self.exprs: | |
2457 if e.mayReturnEmpty: | |
2458 self.mayReturnEmpty = True | |
2459 break | |
2460 else: | |
2461 self.mayReturnEmpty = True | |
2462 | |
2463 def parseImpl( self, instring, loc, doActions=True ): | |
2464 maxExcLoc = -1 | |
2465 maxException = None | |
2466 for e in self.exprs: | |
2467 try: | |
2468 ret = e._parse( instring, loc, doActions ) | |
2469 return ret | |
2470 except ParseException as err: | |
2471 if err.loc > maxExcLoc: | |
2472 maxException = err | |
2473 maxExcLoc = err.loc | |
2474 except IndexError: | |
2475 if len(instring) > maxExcLoc: | |
2476 maxException = ParseException(instring,len(instring),e.errmsg,self) | |
2477 maxExcLoc = len(instring) | |
2478 | |
2479 # only got here if no expression matched, raise exception for match that made it the furthest | |
2480 else: | |
2481 if maxException is not None: | |
2482 raise maxException | |
2483 else: | |
2484 raise ParseException(instring, loc, "no defined alternatives to match", self) | |
2485 | |
2486 def __ior__(self, other ): | |
2487 if isinstance( other, basestring ): | |
2488 other = Literal( other ) | |
2489 return self.append( other ) #MatchFirst( [ self, other ] ) | |
2490 | |
2491 def __str__( self ): | |
2492 if hasattr(self,"name"): | |
2493 return self.name | |
2494 | |
2495 if self.strRepr is None: | |
2496 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | |
2497 | |
2498 return self.strRepr | |
2499 | |
2500 def checkRecursion( self, parseElementList ): | |
2501 subRecCheckList = parseElementList[:] + [ self ] | |
2502 for e in self.exprs: | |
2503 e.checkRecursion( subRecCheckList ) | |
2504 | |
2505 | |
2506 class Each(ParseExpression): | |
2507 """Requires all given ParseExpressions to be found, but in any order. | |
2508 Expressions may be separated by whitespace. | |
2509 May be constructed using the '&' operator. | |
2510 """ | |
2511 def __init__( self, exprs, savelist = True ): | |
2512 super(Each,self).__init__(exprs, savelist) | |
2513 self.mayReturnEmpty = True | |
2514 for e in self.exprs: | |
2515 if not e.mayReturnEmpty: | |
2516 self.mayReturnEmpty = False | |
2517 break | |
2518 self.skipWhitespace = True | |
2519 self.initExprGroups = True | |
2520 | |
2521 def parseImpl( self, instring, loc, doActions=True ): | |
2522 if self.initExprGroups: | |
2523 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] | |
2524 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] | |
2525 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] | |
2526 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] | |
2527 self.required += self.multirequired | |
2528 self.initExprGroups = False | |
2529 tmpLoc = loc | |
2530 tmpReqd = self.required[:] | |
2531 tmpOpt = self.optionals[:] | |
2532 matchOrder = [] | |
2533 | |
2534 keepMatching = True | |
2535 while keepMatching: | |
2536 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired | |
2537 failed = [] | |
2538 for e in tmpExprs: | |
2539 try: | |
2540 tmpLoc = e.tryParse( instring, tmpLoc ) | |
2541 except ParseException: | |
2542 failed.append(e) | |
2543 else: | |
2544 matchOrder.append(e) | |
2545 if e in tmpReqd: | |
2546 tmpReqd.remove(e) | |
2547 elif e in tmpOpt: | |
2548 tmpOpt.remove(e) | |
2549 if len(failed) == len(tmpExprs): | |
2550 keepMatching = False | |
2551 | |
2552 if tmpReqd: | |
2553 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) | |
2554 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) | |
2555 | |
2556 # add any unmatched Optionals, in case they have default values defined | |
2557 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) | |
2558 | |
2559 resultlist = [] | |
2560 for e in matchOrder: | |
2561 loc,results = e._parse(instring,loc,doActions) | |
2562 resultlist.append(results) | |
2563 | |
2564 finalResults = ParseResults([]) | |
2565 for r in resultlist: | |
2566 dups = {} | |
2567 for k in r.keys(): | |
2568 if k in finalResults.keys(): | |
2569 tmp = ParseResults(finalResults[k]) | |
2570 tmp += ParseResults(r[k]) | |
2571 dups[k] = tmp | |
2572 finalResults += ParseResults(r) | |
2573 for k,v in dups.items(): | |
2574 finalResults[k] = v | |
2575 return loc, finalResults | |
2576 | |
2577 def __str__( self ): | |
2578 if hasattr(self,"name"): | |
2579 return self.name | |
2580 | |
2581 if self.strRepr is None: | |
2582 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | |
2583 | |
2584 return self.strRepr | |
2585 | |
2586 def checkRecursion( self, parseElementList ): | |
2587 subRecCheckList = parseElementList[:] + [ self ] | |
2588 for e in self.exprs: | |
2589 e.checkRecursion( subRecCheckList ) | |
2590 | |
2591 | |
2592 class ParseElementEnhance(ParserElement): | |
2593 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" | |
2594 def __init__( self, expr, savelist=False ): | |
2595 super(ParseElementEnhance,self).__init__(savelist) | |
2596 if isinstance( expr, basestring ): | |
2597 expr = Literal(expr) | |
2598 self.expr = expr | |
2599 self.strRepr = None | |
2600 if expr is not None: | |
2601 self.mayIndexError = expr.mayIndexError | |
2602 self.mayReturnEmpty = expr.mayReturnEmpty | |
2603 self.setWhitespaceChars( expr.whiteChars ) | |
2604 self.skipWhitespace = expr.skipWhitespace | |
2605 self.saveAsList = expr.saveAsList | |
2606 self.callPreparse = expr.callPreparse | |
2607 self.ignoreExprs.extend(expr.ignoreExprs) | |
2608 | |
2609 def parseImpl( self, instring, loc, doActions=True ): | |
2610 if self.expr is not None: | |
2611 return self.expr._parse( instring, loc, doActions, callPreParse=False ) | |
2612 else: | |
2613 raise ParseException("",loc,self.errmsg,self) | |
2614 | |
2615 def leaveWhitespace( self ): | |
2616 self.skipWhitespace = False | |
2617 self.expr = self.expr.copy() | |
2618 if self.expr is not None: | |
2619 self.expr.leaveWhitespace() | |
2620 return self | |
2621 | |
2622 def ignore( self, other ): | |
2623 if isinstance( other, Suppress ): | |
2624 if other not in self.ignoreExprs: | |
2625 super( ParseElementEnhance, self).ignore( other ) | |
2626 if self.expr is not None: | |
2627 self.expr.ignore( self.ignoreExprs[-1] ) | |
2628 else: | |
2629 super( ParseElementEnhance, self).ignore( other ) | |
2630 if self.expr is not None: | |
2631 self.expr.ignore( self.ignoreExprs[-1] ) | |
2632 return self | |
2633 | |
2634 def streamline( self ): | |
2635 super(ParseElementEnhance,self).streamline() | |
2636 if self.expr is not None: | |
2637 self.expr.streamline() | |
2638 return self | |
2639 | |
2640 def checkRecursion( self, parseElementList ): | |
2641 if self in parseElementList: | |
2642 raise RecursiveGrammarException( parseElementList+[self] ) | |
2643 subRecCheckList = parseElementList[:] + [ self ] | |
2644 if self.expr is not None: | |
2645 self.expr.checkRecursion( subRecCheckList ) | |
2646 | |
2647 def validate( self, validateTrace=[] ): | |
2648 tmp = validateTrace[:]+[self] | |
2649 if self.expr is not None: | |
2650 self.expr.validate(tmp) | |
2651 self.checkRecursion( [] ) | |
2652 | |
2653 def __str__( self ): | |
2654 try: | |
2655 return super(ParseElementEnhance,self).__str__() | |
2656 except: | |
2657 pass | |
2658 | |
2659 if self.strRepr is None and self.expr is not None: | |
2660 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) | |
2661 return self.strRepr | |
2662 | |
2663 | |
2664 class FollowedBy(ParseElementEnhance): | |
2665 """Lookahead matching of the given parse expression. FollowedBy | |
2666 does *not* advance the parsing position within the input string, it only | |
2667 verifies that the specified parse expression matches at the current | |
2668 position. FollowedBy always returns a null token list.""" | |
2669 def __init__( self, expr ): | |
2670 super(FollowedBy,self).__init__(expr) | |
2671 self.mayReturnEmpty = True | |
2672 | |
2673 def parseImpl( self, instring, loc, doActions=True ): | |
2674 self.expr.tryParse( instring, loc ) | |
2675 return loc, [] | |
2676 | |
2677 | |
2678 class NotAny(ParseElementEnhance): | |
2679 """Lookahead to disallow matching with the given parse expression. NotAny | |
2680 does *not* advance the parsing position within the input string, it only | |
2681 verifies that the specified parse expression does *not* match at the current | |
2682 position. Also, NotAny does *not* skip over leading whitespace. NotAny | |
2683 always returns a null token list. May be constructed using the '~' operator.""" | |
2684 def __init__( self, expr ): | |
2685 super(NotAny,self).__init__(expr) | |
2686 #~ self.leaveWhitespace() | |
2687 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs | |
2688 self.mayReturnEmpty = True | |
2689 self.errmsg = "Found unwanted token, "+_ustr(self.expr) | |
2690 #self.myException = ParseException("",0,self.errmsg,self) | |
2691 | |
2692 def parseImpl( self, instring, loc, doActions=True ): | |
2693 try: | |
2694 self.expr.tryParse( instring, loc ) | |
2695 except (ParseException,IndexError): | |
2696 pass | |
2697 else: | |
2698 #~ raise ParseException(instring, loc, self.errmsg ) | |
2699 exc = self.myException | |
2700 exc.loc = loc | |
2701 exc.pstr = instring | |
2702 raise exc | |
2703 return loc, [] | |
2704 | |
2705 def __str__( self ): | |
2706 if hasattr(self,"name"): | |
2707 return self.name | |
2708 | |
2709 if self.strRepr is None: | |
2710 self.strRepr = "~{" + _ustr(self.expr) + "}" | |
2711 | |
2712 return self.strRepr | |
2713 | |
2714 | |
2715 class ZeroOrMore(ParseElementEnhance): | |
2716 """Optional repetition of zero or more of the given expression.""" | |
2717 def __init__( self, expr ): | |
2718 super(ZeroOrMore,self).__init__(expr) | |
2719 self.mayReturnEmpty = True | |
2720 | |
2721 def parseImpl( self, instring, loc, doActions=True ): | |
2722 tokens = [] | |
2723 try: | |
2724 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) | |
2725 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) | |
2726 while 1: | |
2727 if hasIgnoreExprs: | |
2728 preloc = self._skipIgnorables( instring, loc ) | |
2729 else: | |
2730 preloc = loc | |
2731 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) | |
2732 if tmptokens or tmptokens.keys(): | |
2733 tokens += tmptokens | |
2734 except (ParseException,IndexError): | |
2735 pass | |
2736 | |
2737 return loc, tokens | |
2738 | |
2739 def __str__( self ): | |
2740 if hasattr(self,"name"): | |
2741 return self.name | |
2742 | |
2743 if self.strRepr is None: | |
2744 self.strRepr = "[" + _ustr(self.expr) + "]..." | |
2745 | |
2746 return self.strRepr | |
2747 | |
2748 def setResultsName( self, name, listAllMatches=False ): | |
2749 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) | |
2750 ret.saveAsList = True | |
2751 return ret | |
2752 | |
2753 | |
2754 class OneOrMore(ParseElementEnhance): | |
2755 """Repetition of one or more of the given expression.""" | |
2756 def parseImpl( self, instring, loc, doActions=True ): | |
2757 # must be at least one | |
2758 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) | |
2759 try: | |
2760 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) | |
2761 while 1: | |
2762 if hasIgnoreExprs: | |
2763 preloc = self._skipIgnorables( instring, loc ) | |
2764 else: | |
2765 preloc = loc | |
2766 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) | |
2767 if tmptokens or tmptokens.keys(): | |
2768 tokens += tmptokens | |
2769 except (ParseException,IndexError): | |
2770 pass | |
2771 | |
2772 return loc, tokens | |
2773 | |
2774 def __str__( self ): | |
2775 if hasattr(self,"name"): | |
2776 return self.name | |
2777 | |
2778 if self.strRepr is None: | |
2779 self.strRepr = "{" + _ustr(self.expr) + "}..." | |
2780 | |
2781 return self.strRepr | |
2782 | |
2783 def setResultsName( self, name, listAllMatches=False ): | |
2784 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) | |
2785 ret.saveAsList = True | |
2786 return ret | |
2787 | |
2788 class _NullToken(object): | |
2789 def __bool__(self): | |
2790 return False | |
2791 __nonzero__ = __bool__ | |
2792 def __str__(self): | |
2793 return "" | |
2794 | |
2795 _optionalNotMatched = _NullToken() | |
2796 class Optional(ParseElementEnhance): | |
2797 """Optional matching of the given expression. | |
2798 A default return string can also be specified, if the optional expression | |
2799 is not found. | |
2800 """ | |
2801 def __init__( self, exprs, default=_optionalNotMatched ): | |
2802 super(Optional,self).__init__( exprs, savelist=False ) | |
2803 self.defaultValue = default | |
2804 self.mayReturnEmpty = True | |
2805 | |
2806 def parseImpl( self, instring, loc, doActions=True ): | |
2807 try: | |
2808 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) | |
2809 except (ParseException,IndexError): | |
2810 if self.defaultValue is not _optionalNotMatched: | |
2811 if self.expr.resultsName: | |
2812 tokens = ParseResults([ self.defaultValue ]) | |
2813 tokens[self.expr.resultsName] = self.defaultValue | |
2814 else: | |
2815 tokens = [ self.defaultValue ] | |
2816 else: | |
2817 tokens = [] | |
2818 return loc, tokens | |
2819 | |
2820 def __str__( self ): | |
2821 if hasattr(self,"name"): | |
2822 return self.name | |
2823 | |
2824 if self.strRepr is None: | |
2825 self.strRepr = "[" + _ustr(self.expr) + "]" | |
2826 | |
2827 return self.strRepr | |
2828 | |
2829 | |
2830 class SkipTo(ParseElementEnhance): | |
2831 """Token for skipping over all undefined text until the matched expression is found. | |
2832 If include is set to true, the matched expression is also parsed (the skipped text | |
2833 and matched expression are returned as a 2-element list). The ignore | |
2834 argument is used to define grammars (typically quoted strings and comments) that | |
2835 might contain false matches. | |
2836 """ | |
2837 def __init__( self, other, include=False, ignore=None, failOn=None ): | |
2838 super( SkipTo, self ).__init__( other ) | |
2839 self.ignoreExpr = ignore | |
2840 self.mayReturnEmpty = True | |
2841 self.mayIndexError = False | |
2842 self.includeMatch = include | |
2843 self.asList = False | |
2844 if failOn is not None and isinstance(failOn, basestring): | |
2845 self.failOn = Literal(failOn) | |
2846 else: | |
2847 self.failOn = failOn | |
2848 self.errmsg = "No match found for "+_ustr(self.expr) | |
2849 #self.myException = ParseException("",0,self.errmsg,self) | |
2850 | |
2851 def parseImpl( self, instring, loc, doActions=True ): | |
2852 startLoc = loc | |
2853 instrlen = len(instring) | |
2854 expr = self.expr | |
2855 failParse = False | |
2856 while loc <= instrlen: | |
2857 try: | |
2858 if self.failOn: | |
2859 try: | |
2860 self.failOn.tryParse(instring, loc) | |
2861 except ParseBaseException: | |
2862 pass | |
2863 else: | |
2864 failParse = True | |
2865 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) | |
2866 failParse = False | |
2867 if self.ignoreExpr is not None: | |
2868 while 1: | |
2869 try: | |
2870 loc = self.ignoreExpr.tryParse(instring,loc) | |
2871 # print("found ignoreExpr, advance to", loc) | |
2872 except ParseBaseException: | |
2873 break | |
2874 expr._parse( instring, loc, doActions=False, callPreParse=False ) | |
2875 skipText = instring[startLoc:loc] | |
2876 if self.includeMatch: | |
2877 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) | |
2878 if mat: | |
2879 skipRes = ParseResults( skipText ) | |
2880 skipRes += mat | |
2881 return loc, [ skipRes ] | |
2882 else: | |
2883 return loc, [ skipText ] | |
2884 else: | |
2885 return loc, [ skipText ] | |
2886 except (ParseException,IndexError): | |
2887 if failParse: | |
2888 raise | |
2889 else: | |
2890 loc += 1 | |
2891 exc = self.myException | |
2892 exc.loc = loc | |
2893 exc.pstr = instring | |
2894 raise exc | |
2895 | |
2896 class Forward(ParseElementEnhance): | |
2897 """Forward declaration of an expression to be defined later - | |
2898 used for recursive grammars, such as algebraic infix notation. | |
2899 When the expression is known, it is assigned to the Forward variable using the '<<' operator. | |
2900 | |
2901 Note: take care when assigning to Forward not to overlook precedence of operators. | |
2902 Specifically, '|' has a lower precedence than '<<', so that:: | |
2903 fwdExpr << a | b | c | |
2904 will actually be evaluated as:: | |
2905 (fwdExpr << a) | b | c | |
2906 thereby leaving b and c out as parseable alternatives. It is recommended that you | |
2907 explicitly group the values inserted into the Forward:: | |
2908 fwdExpr << (a | b | c) | |
2909 """ | |
2910 def __init__( self, other=None ): | |
2911 super(Forward,self).__init__( other, savelist=False ) | |
2912 | |
2913 def __lshift__( self, other ): | |
2914 if isinstance( other, basestring ): | |
2915 other = Literal(other) | |
2916 self.expr = other | |
2917 self.mayReturnEmpty = other.mayReturnEmpty | |
2918 self.strRepr = None | |
2919 self.mayIndexError = self.expr.mayIndexError | |
2920 self.mayReturnEmpty = self.expr.mayReturnEmpty | |
2921 self.setWhitespaceChars( self.expr.whiteChars ) | |
2922 self.skipWhitespace = self.expr.skipWhitespace | |
2923 self.saveAsList = self.expr.saveAsList | |
2924 self.ignoreExprs.extend(self.expr.ignoreExprs) | |
2925 return None | |
2926 | |
2927 def leaveWhitespace( self ): | |
2928 self.skipWhitespace = False | |
2929 return self | |
2930 | |
2931 def streamline( self ): | |
2932 if not self.streamlined: | |
2933 self.streamlined = True | |
2934 if self.expr is not None: | |
2935 self.expr.streamline() | |
2936 return self | |
2937 | |
2938 def validate( self, validateTrace=[] ): | |
2939 if self not in validateTrace: | |
2940 tmp = validateTrace[:]+[self] | |
2941 if self.expr is not None: | |
2942 self.expr.validate(tmp) | |
2943 self.checkRecursion([]) | |
2944 | |
2945 def __str__( self ): | |
2946 if hasattr(self,"name"): | |
2947 return self.name | |
2948 | |
2949 self._revertClass = self.__class__ | |
2950 self.__class__ = _ForwardNoRecurse | |
2951 try: | |
2952 if self.expr is not None: | |
2953 retString = _ustr(self.expr) | |
2954 else: | |
2955 retString = "None" | |
2956 finally: | |
2957 self.__class__ = self._revertClass | |
2958 return self.__class__.__name__ + ": " + retString | |
2959 | |
2960 def copy(self): | |
2961 if self.expr is not None: | |
2962 return super(Forward,self).copy() | |
2963 else: | |
2964 ret = Forward() | |
2965 ret << self | |
2966 return ret | |
2967 | |
2968 class _ForwardNoRecurse(Forward): | |
2969 def __str__( self ): | |
2970 return "..." | |
2971 | |
2972 class TokenConverter(ParseElementEnhance): | |
2973 """Abstract subclass of ParseExpression, for converting parsed results.""" | |
2974 def __init__( self, expr, savelist=False ): | |
2975 super(TokenConverter,self).__init__( expr )#, savelist ) | |
2976 self.saveAsList = False | |
2977 | |
2978 class Upcase(TokenConverter): | |
2979 """Converter to upper case all matching tokens.""" | |
2980 def __init__(self, *args): | |
2981 super(Upcase,self).__init__(*args) | |
2982 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", | |
2983 DeprecationWarning,stacklevel=2) | |
2984 | |
2985 def postParse( self, instring, loc, tokenlist ): | |
2986 return list(map( string.upper, tokenlist )) | |
2987 | |
2988 | |
2989 class Combine(TokenConverter): | |
2990 """Converter to concatenate all matching tokens to a single string. | |
2991 By default, the matching patterns must also be contiguous in the input string; | |
2992 this can be disabled by specifying 'adjacent=False' in the constructor. | |
2993 """ | |
2994 def __init__( self, expr, joinString="", adjacent=True ): | |
2995 super(Combine,self).__init__( expr ) | |
2996 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself | |
2997 if adjacent: | |
2998 self.leaveWhitespace() | |
2999 self.adjacent = adjacent | |
3000 self.skipWhitespace = True | |
3001 self.joinString = joinString | |
3002 | |
3003 def ignore( self, other ): | |
3004 if self.adjacent: | |
3005 ParserElement.ignore(self, other) | |
3006 else: | |
3007 super( Combine, self).ignore( other ) | |
3008 return self | |
3009 | |
3010 def postParse( self, instring, loc, tokenlist ): | |
3011 retToks = tokenlist.copy() | |
3012 del retToks[:] | |
3013 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) | |
3014 | |
3015 if self.resultsName and len(retToks.keys())>0: | |
3016 return [ retToks ] | |
3017 else: | |
3018 return retToks | |
3019 | |
3020 class Group(TokenConverter): | |
3021 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" | |
3022 def __init__( self, expr ): | |
3023 super(Group,self).__init__( expr ) | |
3024 self.saveAsList = True | |
3025 | |
3026 def postParse( self, instring, loc, tokenlist ): | |
3027 return [ tokenlist ] | |
3028 | |
3029 class Dict(TokenConverter): | |
3030 """Converter to return a repetitive expression as a list, but also as a dictionary. | |
3031 Each element can also be referenced using the first token in the expression as its key. | |
3032 Useful for tabular report scraping when the first column can be used as a item key. | |
3033 """ | |
3034 def __init__( self, exprs ): | |
3035 super(Dict,self).__init__( exprs ) | |
3036 self.saveAsList = True | |
3037 | |
3038 def postParse( self, instring, loc, tokenlist ): | |
3039 for i,tok in enumerate(tokenlist): | |
3040 if len(tok) == 0: | |
3041 continue | |
3042 ikey = tok[0] | |
3043 if isinstance(ikey,int): | |
3044 ikey = _ustr(tok[0]).strip() | |
3045 if len(tok)==1: | |
3046 tokenlist[ikey] = _ParseResultsWithOffset("",i) | |
3047 elif len(tok)==2 and not isinstance(tok[1],ParseResults): | |
3048 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) | |
3049 else: | |
3050 dictvalue = tok.copy() #ParseResults(i) | |
3051 del dictvalue[0] | |
3052 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): | |
3053 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) | |
3054 else: | |
3055 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) | |
3056 | |
3057 if self.resultsName: | |
3058 return [ tokenlist ] | |
3059 else: | |
3060 return tokenlist | |
3061 | |
3062 | |
3063 class Suppress(TokenConverter): | |
3064 """Converter for ignoring the results of a parsed expression.""" | |
3065 def postParse( self, instring, loc, tokenlist ): | |
3066 return [] | |
3067 | |
3068 def suppress( self ): | |
3069 return self | |
3070 | |
3071 | |
3072 class OnlyOnce(object): | |
3073 """Wrapper for parse actions, to ensure they are only called once.""" | |
3074 def __init__(self, methodCall): | |
3075 self.callable = ParserElement._normalizeParseActionArgs(methodCall) | |
3076 self.called = False | |
3077 def __call__(self,s,l,t): | |
3078 if not self.called: | |
3079 results = self.callable(s,l,t) | |
3080 self.called = True | |
3081 return results | |
3082 raise ParseException(s,l,"") | |
3083 def reset(self): | |
3084 self.called = False | |
3085 | |
3086 def traceParseAction(f): | |
3087 """Decorator for debugging parse actions.""" | |
3088 f = ParserElement._normalizeParseActionArgs(f) | |
3089 def z(*paArgs): | |
3090 thisFunc = f.func_name | |
3091 s,l,t = paArgs[-3:] | |
3092 if len(paArgs)>3: | |
3093 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc | |
3094 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) | |
3095 try: | |
3096 ret = f(*paArgs) | |
3097 except Exception: | |
3098 exc = sys.exc_info()[1] | |
3099 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) | |
3100 raise | |
3101 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) | |
3102 return ret | |
3103 try: | |
3104 z.__name__ = f.__name__ | |
3105 except AttributeError: | |
3106 pass | |
3107 return z | |
3108 | |
3109 # | |
3110 # global helpers | |
3111 # | |
3112 def delimitedList( expr, delim=",", combine=False ): | |
3113 """Helper to define a delimited list of expressions - the delimiter defaults to ','. | |
3114 By default, the list elements and delimiters can have intervening whitespace, and | |
3115 comments, but this can be overridden by passing 'combine=True' in the constructor. | |
3116 If combine is set to True, the matching tokens are returned as a single token | |
3117 string, with the delimiters included; otherwise, the matching tokens are returned | |
3118 as a list of tokens, with the delimiters suppressed. | |
3119 """ | |
3120 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." | |
3121 if combine: | |
3122 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) | |
3123 else: | |
3124 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) | |
3125 | |
3126 def countedArray( expr ): | |
3127 """Helper to define a counted list of expressions. | |
3128 This helper defines a pattern of the form:: | |
3129 integer expr expr expr... | |
3130 where the leading integer tells how many expr expressions follow. | |
3131 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. | |
3132 """ | |
3133 arrayExpr = Forward() | |
3134 def countFieldParseAction(s,l,t): | |
3135 n = int(t[0]) | |
3136 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) | |
3137 return [] | |
3138 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) | |
3139 | |
3140 def _flatten(L): | |
3141 if type(L) is not list: return [L] | |
3142 if L == []: return L | |
3143 return _flatten(L[0]) + _flatten(L[1:]) | |
3144 | |
3145 def matchPreviousLiteral(expr): | |
3146 """Helper to define an expression that is indirectly defined from | |
3147 the tokens matched in a previous expression, that is, it looks | |
3148 for a 'repeat' of a previous expression. For example:: | |
3149 first = Word(nums) | |
3150 second = matchPreviousLiteral(first) | |
3151 matchExpr = first + ":" + second | |
3152 will match "1:1", but not "1:2". Because this matches a | |
3153 previous literal, will also match the leading "1:1" in "1:10". | |
3154 If this is not desired, use matchPreviousExpr. | |
3155 Do *not* use with packrat parsing enabled. | |
3156 """ | |
3157 rep = Forward() | |
3158 def copyTokenToRepeater(s,l,t): | |
3159 if t: | |
3160 if len(t) == 1: | |
3161 rep << t[0] | |
3162 else: | |
3163 # flatten t tokens | |
3164 tflat = _flatten(t.asList()) | |
3165 rep << And( [ Literal(tt) for tt in tflat ] ) | |
3166 else: | |
3167 rep << Empty() | |
3168 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) | |
3169 return rep | |
3170 | |
3171 def matchPreviousExpr(expr): | |
3172 """Helper to define an expression that is indirectly defined from | |
3173 the tokens matched in a previous expression, that is, it looks | |
3174 for a 'repeat' of a previous expression. For example:: | |
3175 first = Word(nums) | |
3176 second = matchPreviousExpr(first) | |
3177 matchExpr = first + ":" + second | |
3178 will match "1:1", but not "1:2". Because this matches by | |
3179 expressions, will *not* match the leading "1:1" in "1:10"; | |
3180 the expressions are evaluated first, and then compared, so | |
3181 "1" is compared with "10". | |
3182 Do *not* use with packrat parsing enabled. | |
3183 """ | |
3184 rep = Forward() | |
3185 e2 = expr.copy() | |
3186 rep << e2 | |
3187 def copyTokenToRepeater(s,l,t): | |
3188 matchTokens = _flatten(t.asList()) | |
3189 def mustMatchTheseTokens(s,l,t): | |
3190 theseTokens = _flatten(t.asList()) | |
3191 if theseTokens != matchTokens: | |
3192 raise ParseException("",0,"") | |
3193 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) | |
3194 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) | |
3195 return rep | |
3196 | |
3197 def _escapeRegexRangeChars(s): | |
3198 #~ escape these chars: ^-] | |
3199 for c in r"\^-]": | |
3200 s = s.replace(c,_bslash+c) | |
3201 s = s.replace("\n",r"\n") | |
3202 s = s.replace("\t",r"\t") | |
3203 return _ustr(s) | |
3204 | |
3205 def oneOf( strs, caseless=False, useRegex=True ): | |
3206 """Helper to quickly define a set of alternative Literals, and makes sure to do | |
3207 longest-first testing when there is a conflict, regardless of the input order, | |
3208 but returns a MatchFirst for best performance. | |
3209 | |
3210 Parameters: | |
3211 - strs - a string of space-delimited literals, or a list of string literals | |
3212 - caseless - (default=False) - treat all literals as caseless | |
3213 - useRegex - (default=True) - as an optimization, will generate a Regex | |
3214 object; otherwise, will generate a MatchFirst object (if caseless=True, or | |
3215 if creating a Regex raises an exception) | |
3216 """ | |
3217 if caseless: | |
3218 isequal = ( lambda a,b: a.upper() == b.upper() ) | |
3219 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) | |
3220 parseElementClass = CaselessLiteral | |
3221 else: | |
3222 isequal = ( lambda a,b: a == b ) | |
3223 masks = ( lambda a,b: b.startswith(a) ) | |
3224 parseElementClass = Literal | |
3225 | |
3226 if isinstance(strs,(list,tuple)): | |
3227 symbols = list(strs[:]) | |
3228 elif isinstance(strs,basestring): | |
3229 symbols = strs.split() | |
3230 else: | |
3231 warnings.warn("Invalid argument to oneOf, expected string or list", | |
3232 SyntaxWarning, stacklevel=2) | |
3233 | |
3234 i = 0 | |
3235 while i < len(symbols)-1: | |
3236 cur = symbols[i] | |
3237 for j,other in enumerate(symbols[i+1:]): | |
3238 if ( isequal(other, cur) ): | |
3239 del symbols[i+j+1] | |
3240 break | |
3241 elif ( masks(cur, other) ): | |
3242 del symbols[i+j+1] | |
3243 symbols.insert(i,other) | |
3244 cur = other | |
3245 break | |
3246 else: | |
3247 i += 1 | |
3248 | |
3249 if not caseless and useRegex: | |
3250 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) | |
3251 try: | |
3252 if len(symbols)==len("".join(symbols)): | |
3253 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) | |
3254 else: | |
3255 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) | |
3256 except: | |
3257 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", | |
3258 SyntaxWarning, stacklevel=2) | |
3259 | |
3260 | |
3261 # last resort, just use MatchFirst | |
3262 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) | |
3263 | |
3264 def dictOf( key, value ): | |
3265 """Helper to easily and clearly define a dictionary by specifying the respective patterns | |
3266 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens | |
3267 in the proper order. The key pattern can include delimiting markers or punctuation, | |
3268 as long as they are suppressed, thereby leaving the significant key text. The value | |
3269 pattern can include named results, so that the Dict results can include named token | |
3270 fields. | |
3271 """ | |
3272 return Dict( ZeroOrMore( Group ( key + value ) ) ) | |
3273 | |
3274 def originalTextFor(expr, asString=True): | |
3275 """Helper to return the original, untokenized text for a given expression. Useful to | |
3276 restore the parsed fields of an HTML start tag into the raw tag text itself, or to | |
3277 revert separate tokens with intervening whitespace back to the original matching | |
3278 input text. Simpler to use than the parse action keepOriginalText, and does not | |
3279 require the inspect module to chase up the call stack. By default, returns a | |
3280 string containing the original parsed text. | |
3281 | |
3282 If the optional asString argument is passed as False, then the return value is a | |
3283 ParseResults containing any results names that were originally matched, and a | |
3284 single token containing the original matched text from the input string. So if | |
3285 the expression passed to originalTextFor contains expressions with defined | |
3286 results names, you must set asString to False if you want to preserve those | |
3287 results name values.""" | |
3288 locMarker = Empty().setParseAction(lambda s,loc,t: loc) | |
3289 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end") | |
3290 if asString: | |
3291 extractText = lambda s,l,t: s[t._original_start:t._original_end] | |
3292 else: | |
3293 def extractText(s,l,t): | |
3294 del t[:] | |
3295 t.insert(0, s[t._original_start:t._original_end]) | |
3296 del t["_original_start"] | |
3297 del t["_original_end"] | |
3298 matchExpr.setParseAction(extractText) | |
3299 return matchExpr | |
3300 | |
3301 # convenience constants for positional expressions | |
3302 empty = Empty().setName("empty") | |
3303 lineStart = LineStart().setName("lineStart") | |
3304 lineEnd = LineEnd().setName("lineEnd") | |
3305 stringStart = StringStart().setName("stringStart") | |
3306 stringEnd = StringEnd().setName("stringEnd") | |
3307 | |
3308 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) | |
3309 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) | |
3310 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) | |
3311 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) | |
3312 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) | |
3313 _charRange = Group(_singleChar + Suppress("-") + _singleChar) | |
3314 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" | |
3315 | |
3316 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) | |
3317 | |
3318 def srange(s): | |
3319 r"""Helper to easily define string ranges for use in Word construction. Borrows | |
3320 syntax from regexp '[]' string range definitions:: | |
3321 srange("[0-9]") -> "0123456789" | |
3322 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" | |
3323 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" | |
3324 The input string must be enclosed in []'s, and the returned string is the expanded | |
3325 character set joined into a single string. | |
3326 The values enclosed in the []'s may be:: | |
3327 a single character | |
3328 an escaped character with a leading backslash (such as \- or \]) | |
3329 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) | |
3330 an escaped octal character with a leading '\0' (\041, which is a '!' character) | |
3331 a range of any of the above, separated by a dash ('a-z', etc.) | |
3332 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) | |
3333 """ | |
3334 try: | |
3335 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) | |
3336 except: | |
3337 return "" | |
3338 | |
3339 def matchOnlyAtCol(n): | |
3340 """Helper method for defining parse actions that require matching at a specific | |
3341 column in the input text. | |
3342 """ | |
3343 def verifyCol(strg,locn,toks): | |
3344 if col(locn,strg) != n: | |
3345 raise ParseException(strg,locn,"matched token not at column %d" % n) | |
3346 return verifyCol | |
3347 | |
3348 def replaceWith(replStr): | |
3349 """Helper method for common parse actions that simply return a literal value. Especially | |
3350 useful when used with transformString(). | |
3351 """ | |
3352 def _replFunc(*args): | |
3353 return [replStr] | |
3354 return _replFunc | |
3355 | |
3356 def removeQuotes(s,l,t): | |
3357 """Helper parse action for removing quotation marks from parsed quoted strings. | |
3358 To use, add this parse action to quoted string using:: | |
3359 quotedString.setParseAction( removeQuotes ) | |
3360 """ | |
3361 return t[0][1:-1] | |
3362 | |
3363 def upcaseTokens(s,l,t): | |
3364 """Helper parse action to convert tokens to upper case.""" | |
3365 return [ tt.upper() for tt in map(_ustr,t) ] | |
3366 | |
3367 def downcaseTokens(s,l,t): | |
3368 """Helper parse action to convert tokens to lower case.""" | |
3369 return [ tt.lower() for tt in map(_ustr,t) ] | |
3370 | |
3371 def keepOriginalText(s,startLoc,t): | |
3372 """Helper parse action to preserve original parsed text, | |
3373 overriding any nested parse actions.""" | |
3374 try: | |
3375 endloc = getTokensEndLoc() | |
3376 except ParseException: | |
3377 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") | |
3378 del t[:] | |
3379 t += ParseResults(s[startLoc:endloc]) | |
3380 return t | |
3381 | |
3382 def getTokensEndLoc(): | |
3383 """Method to be called from within a parse action to determine the end | |
3384 location of the parsed tokens.""" | |
3385 import inspect | |
3386 fstack = inspect.stack() | |
3387 try: | |
3388 # search up the stack (through intervening argument normalizers) for correct calling routine | |
3389 for f in fstack[2:]: | |
3390 if f[3] == "_parseNoCache": | |
3391 endloc = f[0].f_locals["loc"] | |
3392 return endloc | |
3393 else: | |
3394 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") | |
3395 finally: | |
3396 del fstack | |
3397 | |
3398 def _makeTags(tagStr, xml): | |
3399 """Internal helper to construct opening and closing tag expressions, given a tag name""" | |
3400 if isinstance(tagStr,basestring): | |
3401 resname = tagStr | |
3402 tagStr = Keyword(tagStr, caseless=not xml) | |
3403 else: | |
3404 resname = tagStr.name | |
3405 | |
3406 tagAttrName = Word(alphas,alphanums+"_-:") | |
3407 if (xml): | |
3408 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) | |
3409 openTag = Suppress("<") + tagStr + \ | |
3410 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ | |
3411 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") | |
3412 else: | |
3413 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) | |
3414 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) | |
3415 openTag = Suppress("<") + tagStr + \ | |
3416 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ | |
3417 Optional( Suppress("=") + tagAttrValue ) ))) + \ | |
3418 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") | |
3419 closeTag = Combine(_L("</") + tagStr + ">") | |
3420 | |
3421 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) | |
3422 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) | |
3423 | |
3424 return openTag, closeTag | |
3425 | |
3426 def makeHTMLTags(tagStr): | |
3427 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" | |
3428 return _makeTags( tagStr, False ) | |
3429 | |
3430 def makeXMLTags(tagStr): | |
3431 """Helper to construct opening and closing tag expressions for XML, given a tag name""" | |
3432 return _makeTags( tagStr, True ) | |
3433 | |
3434 def withAttribute(*args,**attrDict): | |
3435 """Helper to create a validating parse action to be used with start tags created | |
3436 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag | |
3437 with a required attribute value, to avoid false matches on common tags such as | |
3438 <TD> or <DIV>. | |
3439 | |
3440 Call withAttribute with a series of attribute names and values. Specify the list | |
3441 of filter attributes names and values as: | |
3442 - keyword arguments, as in (class="Customer",align="right"), or | |
3443 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) | |
3444 For attribute names with a namespace prefix, you must use the second form. Attribute | |
3445 names are matched insensitive to upper/lower case. | |
3446 | |
3447 To verify that the attribute exists, but without specifying a value, pass | |
3448 withAttribute.ANY_VALUE as the value. | |
3449 """ | |
3450 if args: | |
3451 attrs = args[:] | |
3452 else: | |
3453 attrs = attrDict.items() | |
3454 attrs = [(k,v) for k,v in attrs] | |
3455 def pa(s,l,tokens): | |
3456 for attrName,attrValue in attrs: | |
3457 if attrName not in tokens: | |
3458 raise ParseException(s,l,"no matching attribute " + attrName) | |
3459 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: | |
3460 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % | |
3461 (attrName, tokens[attrName], attrValue)) | |
3462 return pa | |
3463 withAttribute.ANY_VALUE = object() | |
3464 | |
3465 opAssoc = _Constants() | |
3466 opAssoc.LEFT = object() | |
3467 opAssoc.RIGHT = object() | |
3468 | |
3469 def operatorPrecedence( baseExpr, opList ): | |
3470 """Helper method for constructing grammars of expressions made up of | |
3471 operators working in a precedence hierarchy. Operators may be unary or | |
3472 binary, left- or right-associative. Parse actions can also be attached | |
3473 to operator expressions. | |
3474 | |
3475 Parameters: | |
3476 - baseExpr - expression representing the most basic element for the nested | |
3477 - opList - list of tuples, one for each operator precedence level in the | |
3478 expression grammar; each tuple is of the form | |
3479 (opExpr, numTerms, rightLeftAssoc, parseAction), where: | |
3480 - opExpr is the pyparsing expression for the operator; | |
3481 may also be a string, which will be converted to a Literal; | |
3482 if numTerms is 3, opExpr is a tuple of two expressions, for the | |
3483 two operators separating the 3 terms | |
3484 - numTerms is the number of terms for this operator (must | |
3485 be 1, 2, or 3) | |
3486 - rightLeftAssoc is the indicator whether the operator is | |
3487 right or left associative, using the pyparsing-defined | |
3488 constants opAssoc.RIGHT and opAssoc.LEFT. | |
3489 - parseAction is the parse action to be associated with | |
3490 expressions matching this operator expression (the | |
3491 parse action tuple member may be omitted) | |
3492 """ | |
3493 ret = Forward() | |
3494 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) | |
3495 for i,operDef in enumerate(opList): | |
3496 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] | |
3497 if arity == 3: | |
3498 if opExpr is None or len(opExpr) != 2: | |
3499 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") | |
3500 opExpr1, opExpr2 = opExpr | |
3501 thisExpr = Forward()#.setName("expr%d" % i) | |
3502 if rightLeftAssoc == opAssoc.LEFT: | |
3503 if arity == 1: | |
3504 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) | |
3505 elif arity == 2: | |
3506 if opExpr is not None: | |
3507 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) | |
3508 else: | |
3509 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) | |
3510 elif arity == 3: | |
3511 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ | |
3512 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) | |
3513 else: | |
3514 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") | |
3515 elif rightLeftAssoc == opAssoc.RIGHT: | |
3516 if arity == 1: | |
3517 # try to avoid LR with this extra test | |
3518 if not isinstance(opExpr, Optional): | |
3519 opExpr = Optional(opExpr) | |
3520 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) | |
3521 elif arity == 2: | |
3522 if opExpr is not None: | |
3523 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) | |
3524 else: | |
3525 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) | |
3526 elif arity == 3: | |
3527 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ | |
3528 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) | |
3529 else: | |
3530 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") | |
3531 else: | |
3532 raise ValueError("operator must indicate right or left associativity") | |
3533 if pa: | |
3534 matchExpr.setParseAction( pa ) | |
3535 thisExpr << ( matchExpr | lastExpr ) | |
3536 lastExpr = thisExpr | |
3537 ret << lastExpr | |
3538 return ret | |
3539 | |
3540 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") | |
3541 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") | |
3542 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") | |
3543 unicodeString = Combine(_L('u') + quotedString.copy()) | |
3544 | |
3545 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): | |
3546 """Helper method for defining nested lists enclosed in opening and closing | |
3547 delimiters ("(" and ")" are the default). | |
3548 | |
3549 Parameters: | |
3550 - opener - opening character for a nested list (default="("); can also be a pyparsing expression | |
3551 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression | |
3552 - content - expression for items within the nested lists (default=None) | |
3553 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) | |
3554 | |
3555 If an expression is not provided for the content argument, the nested | |
3556 expression will capture all whitespace-delimited content between delimiters | |
3557 as a list of separate values. | |
3558 | |
3559 Use the ignoreExpr argument to define expressions that may contain | |
3560 opening or closing characters that should not be treated as opening | |
3561 or closing characters for nesting, such as quotedString or a comment | |
3562 expression. Specify multiple expressions using an Or or MatchFirst. | |
3563 The default is quotedString, but if no expressions are to be ignored, | |
3564 then pass None for this argument. | |
3565 """ | |
3566 if opener == closer: | |
3567 raise ValueError("opening and closing strings cannot be the same") | |
3568 if content is None: | |
3569 if isinstance(opener,basestring) and isinstance(closer,basestring): | |
3570 if len(opener) == 1 and len(closer)==1: | |
3571 if ignoreExpr is not None: | |
3572 content = (Combine(OneOrMore(~ignoreExpr + | |
3573 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) | |
3574 ).setParseAction(lambda t:t[0].strip())) | |
3575 else: | |
3576 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS | |
3577 ).setParseAction(lambda t:t[0].strip())) | |
3578 else: | |
3579 if ignoreExpr is not None: | |
3580 content = (Combine(OneOrMore(~ignoreExpr + | |
3581 ~Literal(opener) + ~Literal(closer) + | |
3582 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) | |
3583 ).setParseAction(lambda t:t[0].strip())) | |
3584 else: | |
3585 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + | |
3586 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) | |
3587 ).setParseAction(lambda t:t[0].strip())) | |
3588 else: | |
3589 raise ValueError("opening and closing arguments must be strings if no content expression is given") | |
3590 ret = Forward() | |
3591 if ignoreExpr is not None: | |
3592 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) | |
3593 else: | |
3594 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) | |
3595 return ret | |
3596 | |
3597 def indentedBlock(blockStatementExpr, indentStack, indent=True): | |
3598 """Helper method for defining space-delimited indentation blocks, such as | |
3599 those used to define block statements in Python source code. | |
3600 | |
3601 Parameters: | |
3602 - blockStatementExpr - expression defining syntax of statement that | |
3603 is repeated within the indented block | |
3604 - indentStack - list created by caller to manage indentation stack | |
3605 (multiple statementWithIndentedBlock expressions within a single grammar | |
3606 should share a common indentStack) | |
3607 - indent - boolean indicating whether block must be indented beyond the | |
3608 the current level; set to False for block of left-most statements | |
3609 (default=True) | |
3610 | |
3611 A valid block must contain at least one blockStatement. | |
3612 """ | |
3613 def checkPeerIndent(s,l,t): | |
3614 if l >= len(s): return | |
3615 curCol = col(l,s) | |
3616 if curCol != indentStack[-1]: | |
3617 if curCol > indentStack[-1]: | |
3618 raise ParseFatalException(s,l,"illegal nesting") | |
3619 raise ParseException(s,l,"not a peer entry") | |
3620 | |
3621 def checkSubIndent(s,l,t): | |
3622 curCol = col(l,s) | |
3623 if curCol > indentStack[-1]: | |
3624 indentStack.append( curCol ) | |
3625 else: | |
3626 raise ParseException(s,l,"not a subentry") | |
3627 | |
3628 def checkUnindent(s,l,t): | |
3629 if l >= len(s): return | |
3630 curCol = col(l,s) | |
3631 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): | |
3632 raise ParseException(s,l,"not an unindent") | |
3633 indentStack.pop() | |
3634 | |
3635 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) | |
3636 INDENT = Empty() + Empty().setParseAction(checkSubIndent) | |
3637 PEER = Empty().setParseAction(checkPeerIndent) | |
3638 UNDENT = Empty().setParseAction(checkUnindent) | |
3639 if indent: | |
3640 smExpr = Group( Optional(NL) + | |
3641 FollowedBy(blockStatementExpr) + | |
3642 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) | |
3643 else: | |
3644 smExpr = Group( Optional(NL) + | |
3645 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) | |
3646 blockStatementExpr.ignore(_bslash + LineEnd()) | |
3647 return smExpr | |
3648 | |
3649 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") | |
3650 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") | |
3651 | |
3652 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) | |
3653 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() | |
3654 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) | |
3655 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None | |
3656 | |
3657 # it's easy to get these comment structures wrong - they're very common, so may as well make them available | |
3658 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") | |
3659 | |
3660 htmlComment = Regex(r"<!--[\s\S]*?-->") | |
3661 restOfLine = Regex(r".*").leaveWhitespace() | |
3662 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") | |
3663 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") | |
3664 | |
3665 javaStyleComment = cppStyleComment | |
3666 pythonStyleComment = Regex(r"#.*").setName("Python style comment") | |
3667 _noncomma = "".join( [ c for c in printables if c != "," ] ) | |
3668 _commasepitem = Combine(OneOrMore(Word(_noncomma) + | |
3669 Optional( Word(" \t") + | |
3670 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") | |
3671 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList") | |
3672 | |
3673 | |
3674 if __name__ == "__main__": | |
3675 | |
3676 def test( teststring ): | |
3677 try: | |
3678 tokens = simpleSQL.parseString( teststring ) | |
3679 tokenlist = tokens.asList() | |
3680 print (teststring + "->" + str(tokenlist)) | |
3681 print ("tokens = " + str(tokens)) | |
3682 print ("tokens.columns = " + str(tokens.columns)) | |
3683 print ("tokens.tables = " + str(tokens.tables)) | |
3684 print (tokens.asXML("SQL",True)) | |
3685 except ParseBaseException: | |
3686 err = sys.exc_info()[1] | |
3687 print (teststring + "->") | |
3688 print (err.line) | |
3689 print (" "*(err.column-1) + "^") | |
3690 print (err) | |
3691 print() | |
3692 | |
3693 selectToken = CaselessLiteral( "select" ) | |
3694 fromToken = CaselessLiteral( "from" ) | |
3695 | |
3696 ident = Word( alphas, alphanums + "_$" ) | |
3697 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) | |
3698 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") | |
3699 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) | |
3700 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") | |
3701 simpleSQL = ( selectToken + \ | |
3702 ( '*' | columnNameList ).setResultsName( "columns" ) + \ | |
3703 fromToken + \ | |
3704 tableNameList.setResultsName( "tables" ) ) | |
3705 | |
3706 test( "SELECT * from XYZZY, ABC" ) | |
3707 test( "select * from SYS.XYZZY" ) | |
3708 test( "Select A from Sys.dual" ) | |
3709 test( "Select AA,BB,CC from Sys.dual" ) | |
3710 test( "Select A, B, C from Sys.dual" ) | |
3711 test( "Select A, B, C from Sys.dual" ) | |
3712 test( "Xelect A, B, C from Sys.dual" ) | |
3713 test( "Select A, B, C frox Sys.dual" ) | |
3714 test( "Select" ) | |
3715 test( "Select ^^^ frox Sys.dual" ) | |
3716 test( "Select A, B, C from Sys.dual, Table2 " ) |