comparison pyparsing_py3.py @ 342:12010fcc4e38

pyparsing_py3'
author catherine@Drou
date Tue, 16 Feb 2010 12:57:05 -0500
parents
children
comparison
equal deleted inserted replaced
341:9e593c480782 342:12010fcc4e38
1 # module pyparsing.py
2 #
3 # Copyright (c) 2003-2009 Paul T. McGuire
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #
24 #from __future__ import generators
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
37 from pyparsing_py3 import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45 The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
51
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
54
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59 """
60
61 __version__ = "1.5.2.Py3"
62 __versionTime__ = "9 April 2009 12:21"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65 import string
66 from weakref import ref as wkref
67 import copy
68 import sys
69 import warnings
70 import re
71 import sre_constants
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
73
74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock', 'originalTextFor',
92 ]
93
94 """
95 Detect if we are running version 3.X and make appropriate changes
96 Robert A. Clark
97 """
98 _PY3K = sys.version_info[0] > 2
99 if _PY3K:
100 _MAX_INT = sys.maxsize
101 basestring = str
102 unichr = chr
103 _ustr = str
104 _str2dict = set
105 alphas = string.ascii_lowercase + string.ascii_uppercase
106 else:
107 _MAX_INT = sys.maxint
108
109 def _ustr(obj):
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
112 then < returns the unicode object | encodes it with the default encoding | ... >.
113 """
114 if isinstance(obj,unicode):
115 return obj
116
117 try:
118 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
119 # it won't break any existing code.
120 return str(obj)
121
122 except UnicodeEncodeError:
123 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
124 # state that "The return value must be a string object". However, does a
125 # unicode object (being a subclass of basestring) count as a "string
126 # object"?
127 # If so, then return a unicode object:
128 return unicode(obj)
129 # Else encode it... but how? There are many choices... :)
130 # Replace unprintables with escape codes?
131 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
132 # Replace unprintables with question marks?
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
134 # ...
135
136 def _str2dict(strg):
137 return dict( [(c,0) for c in strg] )
138
139 alphas = string.lowercase + string.uppercase
140
141
142 def _xml_escape(data):
143 """Escape &, <, >, ", ', etc. in a string of data."""
144
145 # ampersand must be replaced first
146 from_symbols = '&><"\''
147 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
148 for from_,to_ in zip(from_symbols, to_symbols):
149 data = data.replace(from_, to_)
150 return data
151
152 class _Constants(object):
153 pass
154
155 nums = string.digits
156 hexnums = nums + "ABCDEFabcdef"
157 alphanums = alphas + nums
158 _bslash = chr(92)
159 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
160
161 class ParseBaseException(Exception):
162 """base exception class for all parsing runtime exceptions"""
163 # Performance tuning: we construct a *lot* of these, so keep this
164 # constructor as small and fast as possible
165 def __init__( self, pstr, loc=0, msg=None, elem=None ):
166 self.loc = loc
167 if msg is None:
168 self.msg = pstr
169 self.pstr = ""
170 else:
171 self.msg = msg
172 self.pstr = pstr
173 self.parserElement = elem
174
175 def __getattr__( self, aname ):
176 """supported attributes by name are:
177 - lineno - returns the line number of the exception text
178 - col - returns the column number of the exception text
179 - line - returns the line containing the exception text
180 """
181 if( aname == "lineno" ):
182 return lineno( self.loc, self.pstr )
183 elif( aname in ("col", "column") ):
184 return col( self.loc, self.pstr )
185 elif( aname == "line" ):
186 return line( self.loc, self.pstr )
187 else:
188 raise AttributeError(aname)
189
190 def __str__( self ):
191 return "%s (at char %d), (line:%d, col:%d)" % \
192 ( self.msg, self.loc, self.lineno, self.column )
193 def __repr__( self ):
194 return _ustr(self)
195 def markInputline( self, markerString = ">!<" ):
196 """Extracts the exception line from the input string, and marks
197 the location of the exception with a special symbol.
198 """
199 line_str = self.line
200 line_column = self.column - 1
201 if markerString:
202 line_str = "".join( [line_str[:line_column],
203 markerString, line_str[line_column:]])
204 return line_str.strip()
205 def __dir__(self):
206 return "loc msg pstr parserElement lineno col line " \
207 "markInputLine __str__ __repr__".split()
208
209 class ParseException(ParseBaseException):
210 """exception thrown when parse expressions don't match class;
211 supported attributes by name are:
212 - lineno - returns the line number of the exception text
213 - col - returns the column number of the exception text
214 - line - returns the line containing the exception text
215 """
216 pass
217
218 class ParseFatalException(ParseBaseException):
219 """user-throwable exception thrown when inconsistent parse content
220 is found; stops all parsing immediately"""
221 pass
222
223 class ParseSyntaxException(ParseFatalException):
224 """just like ParseFatalException, but thrown internally when an
225 ErrorStop indicates that parsing is to stop immediately because
226 an unbacktrackable syntax error has been found"""
227 def __init__(self, pe):
228 super(ParseSyntaxException, self).__init__(
229 pe.pstr, pe.loc, pe.msg, pe.parserElement)
230
231 #~ class ReparseException(ParseBaseException):
232 #~ """Experimental class - parse actions can raise this exception to cause
233 #~ pyparsing to reparse the input string:
234 #~ - with a modified input string, and/or
235 #~ - with a modified start location
236 #~ Set the values of the ReparseException in the constructor, and raise the
237 #~ exception in a parse action to cause pyparsing to use the new string/location.
238 #~ Setting the values as None causes no change to be made.
239 #~ """
240 #~ def __init_( self, newstring, restartLoc ):
241 #~ self.newParseText = newstring
242 #~ self.reparseLoc = restartLoc
243
244 class RecursiveGrammarException(Exception):
245 """exception thrown by validate() if the grammar could be improperly recursive"""
246 def __init__( self, parseElementList ):
247 self.parseElementTrace = parseElementList
248
249 def __str__( self ):
250 return "RecursiveGrammarException: %s" % self.parseElementTrace
251
252 class _ParseResultsWithOffset(object):
253 def __init__(self,p1,p2):
254 self.tup = (p1,p2)
255 def __getitem__(self,i):
256 return self.tup[i]
257 def __repr__(self):
258 return repr(self.tup)
259 def setOffset(self,i):
260 self.tup = (self.tup[0],i)
261
262 class ParseResults(object):
263 """Structured parse results, to provide multiple means of access to the parsed data:
264 - as a list (len(results))
265 - by list index (results[0], results[1], etc.)
266 - by attribute (results.<resultsName>)
267 """
268 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
269 def __new__(cls, toklist, name=None, asList=True, modal=True ):
270 if isinstance(toklist, cls):
271 return toklist
272 retobj = object.__new__(cls)
273 retobj.__doinit = True
274 return retobj
275
276 # Performance tuning: we construct a *lot* of these, so keep this
277 # constructor as small and fast as possible
278 def __init__( self, toklist, name=None, asList=True, modal=True ):
279 if self.__doinit:
280 self.__doinit = False
281 self.__name = None
282 self.__parent = None
283 self.__accumNames = {}
284 if isinstance(toklist, list):
285 self.__toklist = toklist[:]
286 else:
287 self.__toklist = [toklist]
288 self.__tokdict = dict()
289
290 if name:
291 if not modal:
292 self.__accumNames[name] = 0
293 if isinstance(name,int):
294 name = _ustr(name) # will always return a str, but use _ustr for consistency
295 self.__name = name
296 if not toklist in (None,'',[]):
297 if isinstance(toklist,basestring):
298 toklist = [ toklist ]
299 if asList:
300 if isinstance(toklist,ParseResults):
301 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
302 else:
303 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
304 self[name].__name = name
305 else:
306 try:
307 self[name] = toklist[0]
308 except (KeyError,TypeError,IndexError):
309 self[name] = toklist
310
311 def __getitem__( self, i ):
312 if isinstance( i, (int,slice) ):
313 return self.__toklist[i]
314 else:
315 if i not in self.__accumNames:
316 return self.__tokdict[i][-1][0]
317 else:
318 return ParseResults([ v[0] for v in self.__tokdict[i] ])
319
320 def __setitem__( self, k, v ):
321 if isinstance(v,_ParseResultsWithOffset):
322 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
323 sub = v[0]
324 elif isinstance(k,int):
325 self.__toklist[k] = v
326 sub = v
327 else:
328 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
329 sub = v
330 if isinstance(sub,ParseResults):
331 sub.__parent = wkref(self)
332
333 def __delitem__( self, i ):
334 if isinstance(i,(int,slice)):
335 mylen = len( self.__toklist )
336 del self.__toklist[i]
337
338 # convert int to slice
339 if isinstance(i, int):
340 if i < 0:
341 i += mylen
342 i = slice(i, i+1)
343 # get removed indices
344 removed = list(range(*i.indices(mylen)))
345 removed.reverse()
346 # fixup indices in token dictionary
347 for name in self.__tokdict:
348 occurrences = self.__tokdict[name]
349 for j in removed:
350 for k, (value, position) in enumerate(occurrences):
351 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
352 else:
353 del self.__tokdict[i]
354
355 def __contains__( self, k ):
356 return k in self.__tokdict
357
358 def __len__( self ): return len( self.__toklist )
359 def __bool__(self): return len( self.__toklist ) > 0
360 __nonzero__ = __bool__
361 def __iter__( self ): return iter( self.__toklist )
362 def __reversed__( self ): return iter( reversed(self.__toklist) )
363 def keys( self ):
364 """Returns all named result keys."""
365 return self.__tokdict.keys()
366
367 def pop( self, index=-1 ):
368 """Removes and returns item at specified index (default=last).
369 Will work with either numeric indices or dict-key indicies."""
370 ret = self[index]
371 del self[index]
372 return ret
373
374 def get(self, key, defaultValue=None):
375 """Returns named result matching the given key, or if there is no
376 such name, then returns the given defaultValue or None if no
377 defaultValue is specified."""
378 if key in self:
379 return self[key]
380 else:
381 return defaultValue
382
383 def insert( self, index, insStr ):
384 self.__toklist.insert(index, insStr)
385 # fixup indices in token dictionary
386 for name in self.__tokdict:
387 occurrences = self.__tokdict[name]
388 for k, (value, position) in enumerate(occurrences):
389 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
390
391 def items( self ):
392 """Returns all named result keys and values as a list of tuples."""
393 return [(k,self[k]) for k in self.__tokdict]
394
395 def values( self ):
396 """Returns all named result values."""
397 return [ v[-1][0] for v in self.__tokdict.values() ]
398
399 def __getattr__( self, name ):
400 if name not in self.__slots__:
401 if name in self.__tokdict:
402 if name not in self.__accumNames:
403 return self.__tokdict[name][-1][0]
404 else:
405 return ParseResults([ v[0] for v in self.__tokdict[name] ])
406 else:
407 return ""
408 return None
409
410 def __add__( self, other ):
411 ret = self.copy()
412 ret += other
413 return ret
414
415 def __iadd__( self, other ):
416 if other.__tokdict:
417 offset = len(self.__toklist)
418 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
419 otheritems = other.__tokdict.items()
420 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
421 for (k,vlist) in otheritems for v in vlist]
422 for k,v in otherdictitems:
423 self[k] = v
424 if isinstance(v[0],ParseResults):
425 v[0].__parent = wkref(self)
426
427 self.__toklist += other.__toklist
428 self.__accumNames.update( other.__accumNames )
429 del other
430 return self
431
432 def __repr__( self ):
433 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
434
435 def __str__( self ):
436 out = "["
437 sep = ""
438 for i in self.__toklist:
439 if isinstance(i, ParseResults):
440 out += sep + _ustr(i)
441 else:
442 out += sep + repr(i)
443 sep = ", "
444 out += "]"
445 return out
446
447 def _asStringList( self, sep='' ):
448 out = []
449 for item in self.__toklist:
450 if out and sep:
451 out.append(sep)
452 if isinstance( item, ParseResults ):
453 out += item._asStringList()
454 else:
455 out.append( _ustr(item) )
456 return out
457
458 def asList( self ):
459 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
460 out = []
461 for res in self.__toklist:
462 if isinstance(res,ParseResults):
463 out.append( res.asList() )
464 else:
465 out.append( res )
466 return out
467
468 def asDict( self ):
469 """Returns the named parse results as dictionary."""
470 return dict( self.items() )
471
472 def copy( self ):
473 """Returns a new copy of a ParseResults object."""
474 ret = ParseResults( self.__toklist )
475 ret.__tokdict = self.__tokdict.copy()
476 ret.__parent = self.__parent
477 ret.__accumNames.update( self.__accumNames )
478 ret.__name = self.__name
479 return ret
480
481 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
482 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
483 nl = "\n"
484 out = []
485 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
486 for v in vlist ] )
487 nextLevelIndent = indent + " "
488
489 # collapse out indents if formatting is not desired
490 if not formatted:
491 indent = ""
492 nextLevelIndent = ""
493 nl = ""
494
495 selfTag = None
496 if doctag is not None:
497 selfTag = doctag
498 else:
499 if self.__name:
500 selfTag = self.__name
501
502 if not selfTag:
503 if namedItemsOnly:
504 return ""
505 else:
506 selfTag = "ITEM"
507
508 out += [ nl, indent, "<", selfTag, ">" ]
509
510 worklist = self.__toklist
511 for i,res in enumerate(worklist):
512 if isinstance(res,ParseResults):
513 if i in namedItems:
514 out += [ res.asXML(namedItems[i],
515 namedItemsOnly and doctag is None,
516 nextLevelIndent,
517 formatted)]
518 else:
519 out += [ res.asXML(None,
520 namedItemsOnly and doctag is None,
521 nextLevelIndent,
522 formatted)]
523 else:
524 # individual token, see if there is a name for it
525 resTag = None
526 if i in namedItems:
527 resTag = namedItems[i]
528 if not resTag:
529 if namedItemsOnly:
530 continue
531 else:
532 resTag = "ITEM"
533 xmlBodyText = _xml_escape(_ustr(res))
534 out += [ nl, nextLevelIndent, "<", resTag, ">",
535 xmlBodyText,
536 "</", resTag, ">" ]
537
538 out += [ nl, indent, "</", selfTag, ">" ]
539 return "".join(out)
540
541 def __lookup(self,sub):
542 for k,vlist in self.__tokdict.items():
543 for v,loc in vlist:
544 if sub is v:
545 return k
546 return None
547
548 def getName(self):
549 """Returns the results name for this token expression."""
550 if self.__name:
551 return self.__name
552 elif self.__parent:
553 par = self.__parent()
554 if par:
555 return par.__lookup(self)
556 else:
557 return None
558 elif (len(self) == 1 and
559 len(self.__tokdict) == 1 and
560 self.__tokdict.values()[0][0][1] in (0,-1)):
561 return self.__tokdict.keys()[0]
562 else:
563 return None
564
565 def dump(self,indent='',depth=0):
566 """Diagnostic method for listing out the contents of a ParseResults.
567 Accepts an optional indent argument so that this string can be embedded
568 in a nested display of other data."""
569 out = []
570 out.append( indent+_ustr(self.asList()) )
571 keys = self.items()
572 keys.sort()
573 for k,v in keys:
574 if out:
575 out.append('\n')
576 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
577 if isinstance(v,ParseResults):
578 if v.keys():
579 out.append( v.dump(indent,depth+1) )
580 else:
581 out.append(_ustr(v))
582 else:
583 out.append(_ustr(v))
584 return "".join(out)
585
586 # add support for pickle protocol
587 def __getstate__(self):
588 return ( self.__toklist,
589 ( self.__tokdict.copy(),
590 self.__parent is not None and self.__parent() or None,
591 self.__accumNames,
592 self.__name ) )
593
594 def __setstate__(self,state):
595 self.__toklist = state[0]
596 self.__tokdict, \
597 par, \
598 inAccumNames, \
599 self.__name = state[1]
600 self.__accumNames = {}
601 self.__accumNames.update(inAccumNames)
602 if par is not None:
603 self.__parent = wkref(par)
604 else:
605 self.__parent = None
606
607 def __dir__(self):
608 return dir(super(ParseResults,self)) + self.keys()
609
610 def col (loc,strg):
611 """Returns current column within a string, counting newlines as line separators.
612 The first column is number 1.
613
614 Note: the default parsing behavior is to expand tabs in the input string
615 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
616 on parsing strings containing <TAB>s, and suggested methods to maintain a
617 consistent view of the parsed string, the parse location, and line and column
618 positions within the parsed string.
619 """
620 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
621
622 def lineno(loc,strg):
623 """Returns current line number within a string, counting newlines as line separators.
624 The first line is number 1.
625
626 Note: the default parsing behavior is to expand tabs in the input string
627 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
628 on parsing strings containing <TAB>s, and suggested methods to maintain a
629 consistent view of the parsed string, the parse location, and line and column
630 positions within the parsed string.
631 """
632 return strg.count("\n",0,loc) + 1
633
634 def line( loc, strg ):
635 """Returns the line of text containing loc within a string, counting newlines as line separators.
636 """
637 lastCR = strg.rfind("\n", 0, loc)
638 nextCR = strg.find("\n", loc)
639 if nextCR > 0:
640 return strg[lastCR+1:nextCR]
641 else:
642 return strg[lastCR+1:]
643
644 def _defaultStartDebugAction( instring, loc, expr ):
645 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
646
647 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
648 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
649
650 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
651 print ("Exception raised:" + _ustr(exc))
652
653 def nullDebugAction(*args):
654 """'Do-nothing' debug action, to suppress debugging output during parsing."""
655 pass
656
657 class ParserElement(object):
658 """Abstract base level parser element class."""
659 DEFAULT_WHITE_CHARS = " \n\t\r"
660
661 def setDefaultWhitespaceChars( chars ):
662 """Overrides the default whitespace chars
663 """
664 ParserElement.DEFAULT_WHITE_CHARS = chars
665 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
666
667 def __init__( self, savelist=False ):
668 self.parseAction = list()
669 self.failAction = None
670 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
671 self.strRepr = None
672 self.resultsName = None
673 self.saveAsList = savelist
674 self.skipWhitespace = True
675 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
676 self.copyDefaultWhiteChars = True
677 self.mayReturnEmpty = False # used when checking for left-recursion
678 self.keepTabs = False
679 self.ignoreExprs = list()
680 self.debug = False
681 self.streamlined = False
682 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
683 self.errmsg = ""
684 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
685 self.debugActions = ( None, None, None ) #custom debug actions
686 self.re = None
687 self.callPreparse = True # used to avoid redundant calls to preParse
688 self.callDuringTry = False
689
690 def copy( self ):
691 """Make a copy of this ParserElement. Useful for defining different parse actions
692 for the same parsing pattern, using copies of the original parse element."""
693 cpy = copy.copy( self )
694 cpy.parseAction = self.parseAction[:]
695 cpy.ignoreExprs = self.ignoreExprs[:]
696 if self.copyDefaultWhiteChars:
697 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
698 return cpy
699
700 def setName( self, name ):
701 """Define name for this expression, for use in debugging."""
702 self.name = name
703 self.errmsg = "Expected " + self.name
704 if hasattr(self,"exception"):
705 self.exception.msg = self.errmsg
706 return self
707
708 def setResultsName( self, name, listAllMatches=False ):
709 """Define name for referencing matching tokens as a nested attribute
710 of the returned parse results.
711 NOTE: this returns a *copy* of the original ParserElement object;
712 this is so that the client can define a basic element, such as an
713 integer, and reference it in multiple places with different names.
714 """
715 newself = self.copy()
716 newself.resultsName = name
717 newself.modalResults = not listAllMatches
718 return newself
719
720 def setBreak(self,breakFlag = True):
721 """Method to invoke the Python pdb debugger when this element is
722 about to be parsed. Set breakFlag to True to enable, False to
723 disable.
724 """
725 if breakFlag:
726 _parseMethod = self._parse
727 def breaker(instring, loc, doActions=True, callPreParse=True):
728 import pdb
729 pdb.set_trace()
730 return _parseMethod( instring, loc, doActions, callPreParse )
731 breaker._originalParseMethod = _parseMethod
732 self._parse = breaker
733 else:
734 if hasattr(self._parse,"_originalParseMethod"):
735 self._parse = self._parse._originalParseMethod
736 return self
737
738 def _normalizeParseActionArgs( f ):
739 """Internal method used to decorate parse actions that take fewer than 3 arguments,
740 so that all parse actions can be called as f(s,l,t)."""
741 STAR_ARGS = 4
742
743 try:
744 restore = None
745 if isinstance(f,type):
746 restore = f
747 f = f.__init__
748 if not _PY3K:
749 codeObj = f.func_code
750 else:
751 codeObj = f.code
752 if codeObj.co_flags & STAR_ARGS:
753 return f
754 numargs = codeObj.co_argcount
755 if not _PY3K:
756 if hasattr(f,"im_self"):
757 numargs -= 1
758 else:
759 if hasattr(f,"__self__"):
760 numargs -= 1
761 if restore:
762 f = restore
763 except AttributeError:
764 try:
765 if not _PY3K:
766 call_im_func_code = f.__call__.im_func.func_code
767 else:
768 call_im_func_code = f.__code__
769
770 # not a function, must be a callable object, get info from the
771 # im_func binding of its bound __call__ method
772 if call_im_func_code.co_flags & STAR_ARGS:
773 return f
774 numargs = call_im_func_code.co_argcount
775 if not _PY3K:
776 if hasattr(f.__call__,"im_self"):
777 numargs -= 1
778 else:
779 if hasattr(f.__call__,"__self__"):
780 numargs -= 0
781 except AttributeError:
782 if not _PY3K:
783 call_func_code = f.__call__.func_code
784 else:
785 call_func_code = f.__call__.__code__
786 # not a bound method, get info directly from __call__ method
787 if call_func_code.co_flags & STAR_ARGS:
788 return f
789 numargs = call_func_code.co_argcount
790 if not _PY3K:
791 if hasattr(f.__call__,"im_self"):
792 numargs -= 1
793 else:
794 if hasattr(f.__call__,"__self__"):
795 numargs -= 1
796
797
798 #~ print ("adding function %s with %d args" % (f.func_name,numargs))
799 if numargs == 3:
800 return f
801 else:
802 if numargs > 3:
803 def tmp(s,l,t):
804 return f(f.__call__.__self__, s,l,t)
805 if numargs == 2:
806 def tmp(s,l,t):
807 return f(l,t)
808 elif numargs == 1:
809 def tmp(s,l,t):
810 return f(t)
811 else: #~ numargs == 0:
812 def tmp(s,l,t):
813 return f()
814 try:
815 tmp.__name__ = f.__name__
816 except (AttributeError,TypeError):
817 # no need for special handling if attribute doesnt exist
818 pass
819 try:
820 tmp.__doc__ = f.__doc__
821 except (AttributeError,TypeError):
822 # no need for special handling if attribute doesnt exist
823 pass
824 try:
825 tmp.__dict__.update(f.__dict__)
826 except (AttributeError,TypeError):
827 # no need for special handling if attribute doesnt exist
828 pass
829 return tmp
830 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
831
832 def setParseAction( self, *fns, **kwargs ):
833 """Define action to perform when successfully matching parse element definition.
834 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
835 fn(loc,toks), fn(toks), or just fn(), where:
836 - s = the original string being parsed (see note below)
837 - loc = the location of the matching substring
838 - toks = a list of the matched tokens, packaged as a ParseResults object
839 If the functions in fns modify the tokens, they can return them as the return
840 value from fn, and the modified list of tokens will replace the original.
841 Otherwise, fn does not need to return any value.
842
843 Note: the default parsing behavior is to expand tabs in the input string
844 before starting the parsing process. See L{I{parseString}<parseString>} for more information
845 on parsing strings containing <TAB>s, and suggested methods to maintain a
846 consistent view of the parsed string, the parse location, and line and column
847 positions within the parsed string.
848 """
849 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
850 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
851 return self
852
853 def addParseAction( self, *fns, **kwargs ):
854 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
855 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
856 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
857 return self
858
859 def setFailAction( self, fn ):
860 """Define action to perform if parsing fails at this expression.
861 Fail acton fn is a callable function that takes the arguments
862 fn(s,loc,expr,err) where:
863 - s = string being parsed
864 - loc = location where expression match was attempted and failed
865 - expr = the parse expression that failed
866 - err = the exception thrown
867 The function returns no value. It may throw ParseFatalException
868 if it is desired to stop parsing immediately."""
869 self.failAction = fn
870 return self
871
872 def _skipIgnorables( self, instring, loc ):
873 exprsFound = True
874 while exprsFound:
875 exprsFound = False
876 for e in self.ignoreExprs:
877 try:
878 while 1:
879 loc,dummy = e._parse( instring, loc )
880 exprsFound = True
881 except ParseException:
882 pass
883 return loc
884
885 def preParse( self, instring, loc ):
886 if self.ignoreExprs:
887 loc = self._skipIgnorables( instring, loc )
888
889 if self.skipWhitespace:
890 wt = self.whiteChars
891 instrlen = len(instring)
892 while loc < instrlen and instring[loc] in wt:
893 loc += 1
894
895 return loc
896
897 def parseImpl( self, instring, loc, doActions=True ):
898 return loc, []
899
900 def postParse( self, instring, loc, tokenlist ):
901 return tokenlist
902
903 #~ @profile
904 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
905 debugging = ( self.debug ) #and doActions )
906
907 if debugging or self.failAction:
908 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
909 if (self.debugActions[0] ):
910 self.debugActions[0]( instring, loc, self )
911 if callPreParse and self.callPreparse:
912 preloc = self.preParse( instring, loc )
913 else:
914 preloc = loc
915 tokensStart = loc
916 try:
917 try:
918 loc,tokens = self.parseImpl( instring, preloc, doActions )
919 except IndexError:
920 raise ParseException( instring, len(instring), self.errmsg, self )
921 except ParseBaseException:
922 #~ print ("Exception raised:", err)
923 err = None
924 if self.debugActions[2]:
925 err = sys.exc_info()[1]
926 self.debugActions[2]( instring, tokensStart, self, err )
927 if self.failAction:
928 if err is None:
929 err = sys.exc_info()[1]
930 self.failAction( instring, tokensStart, self, err )
931 raise
932 else:
933 if callPreParse and self.callPreparse:
934 preloc = self.preParse( instring, loc )
935 else:
936 preloc = loc
937 tokensStart = loc
938 if self.mayIndexError or loc >= len(instring):
939 try:
940 loc,tokens = self.parseImpl( instring, preloc, doActions )
941 except IndexError:
942 raise ParseException( instring, len(instring), self.errmsg, self )
943 else:
944 loc,tokens = self.parseImpl( instring, preloc, doActions )
945
946 tokens = self.postParse( instring, loc, tokens )
947
948 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
949 if self.parseAction and (doActions or self.callDuringTry):
950 if debugging:
951 try:
952 for fn in self.parseAction:
953 tokens = fn( instring, tokensStart, retTokens )
954 if tokens is not None:
955 retTokens = ParseResults( tokens,
956 self.resultsName,
957 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
958 modal=self.modalResults )
959 except ParseBaseException:
960 #~ print "Exception raised in user parse action:", err
961 if (self.debugActions[2] ):
962 err = sys.exc_info()[1]
963 self.debugActions[2]( instring, tokensStart, self, err )
964 raise
965 else:
966 for fn in self.parseAction:
967 tokens = fn( instring, tokensStart, retTokens )
968 if tokens is not None:
969 retTokens = ParseResults( tokens,
970 self.resultsName,
971 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
972 modal=self.modalResults )
973
974 if debugging:
975 #~ print ("Matched",self,"->",retTokens.asList())
976 if (self.debugActions[1] ):
977 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
978
979 return loc, retTokens
980
981 def tryParse( self, instring, loc ):
982 try:
983 return self._parse( instring, loc, doActions=False )[0]
984 except ParseFatalException:
985 raise ParseException( instring, loc, self.errmsg, self)
986
987 # this method gets repeatedly called during backtracking with the same arguments -
988 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
989 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
990 lookup = (self,instring,loc,callPreParse,doActions)
991 if lookup in ParserElement._exprArgCache:
992 value = ParserElement._exprArgCache[ lookup ]
993 if isinstance(value,Exception):
994 raise value
995 return value
996 else:
997 try:
998 value = self._parseNoCache( instring, loc, doActions, callPreParse )
999 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1000 return value
1001 except ParseBaseException:
1002 pe = sys.exc_info()[1]
1003 ParserElement._exprArgCache[ lookup ] = pe
1004 raise
1005
1006 _parse = _parseNoCache
1007
1008 # argument cache for optimizing repeated calls when backtracking through recursive expressions
1009 _exprArgCache = {}
1010 def resetCache():
1011 ParserElement._exprArgCache.clear()
1012 resetCache = staticmethod(resetCache)
1013
1014 _packratEnabled = False
1015 def enablePackrat():
1016 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1017 Repeated parse attempts at the same string location (which happens
1018 often in many complex grammars) can immediately return a cached value,
1019 instead of re-executing parsing/validating code. Memoizing is done of
1020 both valid results and parsing exceptions.
1021
1022 This speedup may break existing programs that use parse actions that
1023 have side-effects. For this reason, packrat parsing is disabled when
1024 you first import pyparsing_py3 as pyparsing. To activate the packrat feature, your
1025 program must call the class method ParserElement.enablePackrat(). If
1026 your program uses psyco to "compile as you go", you must call
1027 enablePackrat before calling psyco.full(). If you do not do this,
1028 Python will crash. For best results, call enablePackrat() immediately
1029 after importing pyparsing.
1030 """
1031 if not ParserElement._packratEnabled:
1032 ParserElement._packratEnabled = True
1033 ParserElement._parse = ParserElement._parseCache
1034 enablePackrat = staticmethod(enablePackrat)
1035
1036 def parseString( self, instring, parseAll=False ):
1037 """Execute the parse expression with the given string.
1038 This is the main interface to the client code, once the complete
1039 expression has been built.
1040
1041 If you want the grammar to require that the entire input string be
1042 successfully parsed, then set parseAll to True (equivalent to ending
1043 the grammar with StringEnd()).
1044
1045 Note: parseString implicitly calls expandtabs() on the input string,
1046 in order to report proper column numbers in parse actions.
1047 If the input string contains tabs and
1048 the grammar uses parse actions that use the loc argument to index into the
1049 string being parsed, you can ensure you have a consistent view of the input
1050 string by:
1051 - calling parseWithTabs on your grammar before calling parseString
1052 (see L{I{parseWithTabs}<parseWithTabs>})
1053 - define your parse action using the full (s,loc,toks) signature, and
1054 reference the input string using the parse action's s argument
1055 - explictly expand the tabs in your input string before calling
1056 parseString
1057 """
1058 ParserElement.resetCache()
1059 if not self.streamlined:
1060 self.streamline()
1061 #~ self.saveAsList = True
1062 for e in self.ignoreExprs:
1063 e.streamline()
1064 if not self.keepTabs:
1065 instring = instring.expandtabs()
1066 try:
1067 loc, tokens = self._parse( instring, 0 )
1068 if parseAll:
1069 loc = self.preParse( instring, loc )
1070 StringEnd()._parse( instring, loc )
1071 except ParseBaseException:
1072 exc = sys.exc_info()[1]
1073 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1074 raise exc
1075 else:
1076 return tokens
1077
1078 def scanString( self, instring, maxMatches=_MAX_INT ):
1079 """Scan the input string for expression matches. Each match will return the
1080 matching tokens, start location, and end location. May be called with optional
1081 maxMatches argument, to clip scanning after 'n' matches are found.
1082
1083 Note that the start and end locations are reported relative to the string
1084 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1085 strings with embedded tabs."""
1086 if not self.streamlined:
1087 self.streamline()
1088 for e in self.ignoreExprs:
1089 e.streamline()
1090
1091 if not self.keepTabs:
1092 instring = _ustr(instring).expandtabs()
1093 instrlen = len(instring)
1094 loc = 0
1095 preparseFn = self.preParse
1096 parseFn = self._parse
1097 ParserElement.resetCache()
1098 matches = 0
1099 try:
1100 while loc <= instrlen and matches < maxMatches:
1101 try:
1102 preloc = preparseFn( instring, loc )
1103 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1104 except ParseException:
1105 loc = preloc+1
1106 else:
1107 if nextLoc > loc:
1108 matches += 1
1109 yield tokens, preloc, nextLoc
1110 loc = nextLoc
1111 else:
1112 loc = preloc+1
1113 except ParseBaseException:
1114 pe = sys.exc_info()[1]
1115 raise pe
1116
1117 def transformString( self, instring ):
1118 """Extension to scanString, to modify matching text with modified tokens that may
1119 be returned from a parse action. To use transformString, define a grammar and
1120 attach a parse action to it that modifies the returned token list.
1121 Invoking transformString() on a target string will then scan for matches,
1122 and replace the matched text patterns according to the logic in the parse
1123 action. transformString() returns the resulting transformed string."""
1124 out = []
1125 lastE = 0
1126 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1127 # keep string locs straight between transformString and scanString
1128 self.keepTabs = True
1129 try:
1130 for t,s,e in self.scanString( instring ):
1131 out.append( instring[lastE:s] )
1132 if t:
1133 if isinstance(t,ParseResults):
1134 out += t.asList()
1135 elif isinstance(t,list):
1136 out += t
1137 else:
1138 out.append(t)
1139 lastE = e
1140 out.append(instring[lastE:])
1141 return "".join(map(_ustr,out))
1142 except ParseBaseException:
1143 pe = sys.exc_info()[1]
1144 raise pe
1145
1146 def searchString( self, instring, maxMatches=_MAX_INT ):
1147 """Another extension to scanString, simplifying the access to the tokens found
1148 to match the given parse expression. May be called with optional
1149 maxMatches argument, to clip searching after 'n' matches are found.
1150 """
1151 try:
1152 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1153 except ParseBaseException:
1154 pe = sys.exc_info()[1]
1155 raise pe
1156
1157 def __add__(self, other ):
1158 """Implementation of + operator - returns And"""
1159 if isinstance( other, basestring ):
1160 other = Literal( other )
1161 if not isinstance( other, ParserElement ):
1162 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1163 SyntaxWarning, stacklevel=2)
1164 return None
1165 return And( [ self, other ] )
1166
1167 def __radd__(self, other ):
1168 """Implementation of + operator when left operand is not a ParserElement"""
1169 if isinstance( other, basestring ):
1170 other = Literal( other )
1171 if not isinstance( other, ParserElement ):
1172 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1173 SyntaxWarning, stacklevel=2)
1174 return None
1175 return other + self
1176
1177 def __sub__(self, other):
1178 """Implementation of - operator, returns And with error stop"""
1179 if isinstance( other, basestring ):
1180 other = Literal( other )
1181 if not isinstance( other, ParserElement ):
1182 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1183 SyntaxWarning, stacklevel=2)
1184 return None
1185 return And( [ self, And._ErrorStop(), other ] )
1186
1187 def __rsub__(self, other ):
1188 """Implementation of - operator when left operand is not a ParserElement"""
1189 if isinstance( other, basestring ):
1190 other = Literal( other )
1191 if not isinstance( other, ParserElement ):
1192 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1193 SyntaxWarning, stacklevel=2)
1194 return None
1195 return other - self
1196
1197 def __mul__(self,other):
1198 if isinstance(other,int):
1199 minElements, optElements = other,0
1200 elif isinstance(other,tuple):
1201 other = (other + (None, None))[:2]
1202 if other[0] is None:
1203 other = (0, other[1])
1204 if isinstance(other[0],int) and other[1] is None:
1205 if other[0] == 0:
1206 return ZeroOrMore(self)
1207 if other[0] == 1:
1208 return OneOrMore(self)
1209 else:
1210 return self*other[0] + ZeroOrMore(self)
1211 elif isinstance(other[0],int) and isinstance(other[1],int):
1212 minElements, optElements = other
1213 optElements -= minElements
1214 else:
1215 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1216 else:
1217 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1218
1219 if minElements < 0:
1220 raise ValueError("cannot multiply ParserElement by negative value")
1221 if optElements < 0:
1222 raise ValueError("second tuple value must be greater or equal to first tuple value")
1223 if minElements == optElements == 0:
1224 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1225
1226 if (optElements):
1227 def makeOptionalList(n):
1228 if n>1:
1229 return Optional(self + makeOptionalList(n-1))
1230 else:
1231 return Optional(self)
1232 if minElements:
1233 if minElements == 1:
1234 ret = self + makeOptionalList(optElements)
1235 else:
1236 ret = And([self]*minElements) + makeOptionalList(optElements)
1237 else:
1238 ret = makeOptionalList(optElements)
1239 else:
1240 if minElements == 1:
1241 ret = self
1242 else:
1243 ret = And([self]*minElements)
1244 return ret
1245
1246 def __rmul__(self, other):
1247 return self.__mul__(other)
1248
1249 def __or__(self, other ):
1250 """Implementation of | operator - returns MatchFirst"""
1251 if isinstance( other, basestring ):
1252 other = Literal( other )
1253 if not isinstance( other, ParserElement ):
1254 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1255 SyntaxWarning, stacklevel=2)
1256 return None
1257 return MatchFirst( [ self, other ] )
1258
1259 def __ror__(self, other ):
1260 """Implementation of | operator when left operand is not a ParserElement"""
1261 if isinstance( other, basestring ):
1262 other = Literal( other )
1263 if not isinstance( other, ParserElement ):
1264 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1265 SyntaxWarning, stacklevel=2)
1266 return None
1267 return other | self
1268
1269 def __xor__(self, other ):
1270 """Implementation of ^ operator - returns Or"""
1271 if isinstance( other, basestring ):
1272 other = Literal( other )
1273 if not isinstance( other, ParserElement ):
1274 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1275 SyntaxWarning, stacklevel=2)
1276 return None
1277 return Or( [ self, other ] )
1278
1279 def __rxor__(self, other ):
1280 """Implementation of ^ operator when left operand is not a ParserElement"""
1281 if isinstance( other, basestring ):
1282 other = Literal( other )
1283 if not isinstance( other, ParserElement ):
1284 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1285 SyntaxWarning, stacklevel=2)
1286 return None
1287 return other ^ self
1288
1289 def __and__(self, other ):
1290 """Implementation of & operator - returns Each"""
1291 if isinstance( other, basestring ):
1292 other = Literal( other )
1293 if not isinstance( other, ParserElement ):
1294 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1295 SyntaxWarning, stacklevel=2)
1296 return None
1297 return Each( [ self, other ] )
1298
1299 def __rand__(self, other ):
1300 """Implementation of & operator when left operand is not a ParserElement"""
1301 if isinstance( other, basestring ):
1302 other = Literal( other )
1303 if not isinstance( other, ParserElement ):
1304 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1305 SyntaxWarning, stacklevel=2)
1306 return None
1307 return other & self
1308
1309 def __invert__( self ):
1310 """Implementation of ~ operator - returns NotAny"""
1311 return NotAny( self )
1312
1313 def __call__(self, name):
1314 """Shortcut for setResultsName, with listAllMatches=default::
1315 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1316 could be written as::
1317 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1318 """
1319 return self.setResultsName(name)
1320
1321 def suppress( self ):
1322 """Suppresses the output of this ParserElement; useful to keep punctuation from
1323 cluttering up returned output.
1324 """
1325 return Suppress( self )
1326
1327 def leaveWhitespace( self ):
1328 """Disables the skipping of whitespace before matching the characters in the
1329 ParserElement's defined pattern. This is normally only used internally by
1330 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1331 """
1332 self.skipWhitespace = False
1333 return self
1334
1335 def setWhitespaceChars( self, chars ):
1336 """Overrides the default whitespace chars
1337 """
1338 self.skipWhitespace = True
1339 self.whiteChars = chars
1340 self.copyDefaultWhiteChars = False
1341 return self
1342
1343 def parseWithTabs( self ):
1344 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1345 Must be called before parseString when the input grammar contains elements that
1346 match <TAB> characters."""
1347 self.keepTabs = True
1348 return self
1349
1350 def ignore( self, other ):
1351 """Define expression to be ignored (e.g., comments) while doing pattern
1352 matching; may be called repeatedly, to define multiple comment or other
1353 ignorable patterns.
1354 """
1355 if isinstance( other, Suppress ):
1356 if other not in self.ignoreExprs:
1357 self.ignoreExprs.append( other )
1358 else:
1359 self.ignoreExprs.append( Suppress( other ) )
1360 return self
1361
1362 def setDebugActions( self, startAction, successAction, exceptionAction ):
1363 """Enable display of debugging messages while doing pattern matching."""
1364 self.debugActions = (startAction or _defaultStartDebugAction,
1365 successAction or _defaultSuccessDebugAction,
1366 exceptionAction or _defaultExceptionDebugAction)
1367 self.debug = True
1368 return self
1369
1370 def setDebug( self, flag=True ):
1371 """Enable display of debugging messages while doing pattern matching.
1372 Set flag to True to enable, False to disable."""
1373 if flag:
1374 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1375 else:
1376 self.debug = False
1377 return self
1378
1379 def __str__( self ):
1380 return self.name
1381
1382 def __repr__( self ):
1383 return _ustr(self)
1384
1385 def streamline( self ):
1386 self.streamlined = True
1387 self.strRepr = None
1388 return self
1389
1390 def checkRecursion( self, parseElementList ):
1391 pass
1392
1393 def validate( self, validateTrace=[] ):
1394 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1395 self.checkRecursion( [] )
1396
1397 def parseFile( self, file_or_filename, parseAll=False ):
1398 """Execute the parse expression on the given file or filename.
1399 If a filename is specified (instead of a file object),
1400 the entire file is opened, read, and closed before parsing.
1401 """
1402 try:
1403 file_contents = file_or_filename.read()
1404 except AttributeError:
1405 f = open(file_or_filename, "rb")
1406 file_contents = f.read()
1407 f.close()
1408 try:
1409 return self.parseString(file_contents, parseAll)
1410 except ParseBaseException:
1411 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1412 exc = sys.exc_info()[1]
1413 raise exc
1414
1415 def getException(self):
1416 return ParseException("",0,self.errmsg,self)
1417
1418 def __getattr__(self,aname):
1419 if aname == "myException":
1420 self.myException = ret = self.getException();
1421 return ret;
1422 else:
1423 raise AttributeError("no such attribute " + aname)
1424
1425 def __eq__(self,other):
1426 if isinstance(other, ParserElement):
1427 return self is other or self.__dict__ == other.__dict__
1428 elif isinstance(other, basestring):
1429 try:
1430 self.parseString(_ustr(other), parseAll=True)
1431 return True
1432 except ParseBaseException:
1433 return False
1434 else:
1435 return super(ParserElement,self)==other
1436
1437 def __ne__(self,other):
1438 return not (self == other)
1439
1440 def __hash__(self):
1441 return hash(id(self))
1442
1443 def __req__(self,other):
1444 return self == other
1445
1446 def __rne__(self,other):
1447 return not (self == other)
1448
1449
1450 class Token(ParserElement):
1451 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1452 def __init__( self ):
1453 super(Token,self).__init__( savelist=False )
1454 #self.myException = ParseException("",0,"",self)
1455
1456 def setName(self, name):
1457 s = super(Token,self).setName(name)
1458 self.errmsg = "Expected " + self.name
1459 #s.myException.msg = self.errmsg
1460 return s
1461
1462
1463 class Empty(Token):
1464 """An empty token, will always match."""
1465 def __init__( self ):
1466 super(Empty,self).__init__()
1467 self.name = "Empty"
1468 self.mayReturnEmpty = True
1469 self.mayIndexError = False
1470
1471
1472 class NoMatch(Token):
1473 """A token that will never match."""
1474 def __init__( self ):
1475 super(NoMatch,self).__init__()
1476 self.name = "NoMatch"
1477 self.mayReturnEmpty = True
1478 self.mayIndexError = False
1479 self.errmsg = "Unmatchable token"
1480 #self.myException.msg = self.errmsg
1481
1482 def parseImpl( self, instring, loc, doActions=True ):
1483 exc = self.myException
1484 exc.loc = loc
1485 exc.pstr = instring
1486 raise exc
1487
1488
1489 class Literal(Token):
1490 """Token to exactly match a specified string."""
1491 def __init__( self, matchString ):
1492 super(Literal,self).__init__()
1493 self.match = matchString
1494 self.matchLen = len(matchString)
1495 try:
1496 self.firstMatchChar = matchString[0]
1497 except IndexError:
1498 warnings.warn("null string passed to Literal; use Empty() instead",
1499 SyntaxWarning, stacklevel=2)
1500 self.__class__ = Empty
1501 self.name = '"%s"' % _ustr(self.match)
1502 self.errmsg = "Expected " + self.name
1503 self.mayReturnEmpty = False
1504 #self.myException.msg = self.errmsg
1505 self.mayIndexError = False
1506
1507 # Performance tuning: this routine gets called a *lot*
1508 # if this is a single character match string and the first character matches,
1509 # short-circuit as quickly as possible, and avoid calling startswith
1510 #~ @profile
1511 def parseImpl( self, instring, loc, doActions=True ):
1512 if (instring[loc] == self.firstMatchChar and
1513 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1514 return loc+self.matchLen, self.match
1515 #~ raise ParseException( instring, loc, self.errmsg )
1516 exc = self.myException
1517 exc.loc = loc
1518 exc.pstr = instring
1519 raise exc
1520 _L = Literal
1521
1522 class Keyword(Token):
1523 """Token to exactly match a specified string as a keyword, that is, it must be
1524 immediately followed by a non-keyword character. Compare with Literal::
1525 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1526 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1527 Accepts two optional constructor arguments in addition to the keyword string:
1528 identChars is a string of characters that would be valid identifier characters,
1529 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1530 matching, default is False.
1531 """
1532 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1533
1534 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1535 super(Keyword,self).__init__()
1536 self.match = matchString
1537 self.matchLen = len(matchString)
1538 try:
1539 self.firstMatchChar = matchString[0]
1540 except IndexError:
1541 warnings.warn("null string passed to Keyword; use Empty() instead",
1542 SyntaxWarning, stacklevel=2)
1543 self.name = '"%s"' % self.match
1544 self.errmsg = "Expected " + self.name
1545 self.mayReturnEmpty = False
1546 #self.myException.msg = self.errmsg
1547 self.mayIndexError = False
1548 self.caseless = caseless
1549 if caseless:
1550 self.caselessmatch = matchString.upper()
1551 identChars = identChars.upper()
1552 self.identChars = _str2dict(identChars)
1553
1554 def parseImpl( self, instring, loc, doActions=True ):
1555 if self.caseless:
1556 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1557 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1558 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1559 return loc+self.matchLen, self.match
1560 else:
1561 if (instring[loc] == self.firstMatchChar and
1562 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1563 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1564 (loc == 0 or instring[loc-1] not in self.identChars) ):
1565 return loc+self.matchLen, self.match
1566 #~ raise ParseException( instring, loc, self.errmsg )
1567 exc = self.myException
1568 exc.loc = loc
1569 exc.pstr = instring
1570 raise exc
1571
1572 def copy(self):
1573 c = super(Keyword,self).copy()
1574 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1575 return c
1576
1577 def setDefaultKeywordChars( chars ):
1578 """Overrides the default Keyword chars
1579 """
1580 Keyword.DEFAULT_KEYWORD_CHARS = chars
1581 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1582
1583 class CaselessLiteral(Literal):
1584 """Token to match a specified string, ignoring case of letters.
1585 Note: the matched results will always be in the case of the given
1586 match string, NOT the case of the input text.
1587 """
1588 def __init__( self, matchString ):
1589 super(CaselessLiteral,self).__init__( matchString.upper() )
1590 # Preserve the defining literal.
1591 self.returnString = matchString
1592 self.name = "'%s'" % self.returnString
1593 self.errmsg = "Expected " + self.name
1594 #self.myException.msg = self.errmsg
1595
1596 def parseImpl( self, instring, loc, doActions=True ):
1597 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1598 return loc+self.matchLen, self.returnString
1599 #~ raise ParseException( instring, loc, self.errmsg )
1600 exc = self.myException
1601 exc.loc = loc
1602 exc.pstr = instring
1603 raise exc
1604
1605 class CaselessKeyword(Keyword):
1606 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1607 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1608
1609 def parseImpl( self, instring, loc, doActions=True ):
1610 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1611 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1612 return loc+self.matchLen, self.match
1613 #~ raise ParseException( instring, loc, self.errmsg )
1614 exc = self.myException
1615 exc.loc = loc
1616 exc.pstr = instring
1617 raise exc
1618
1619 class Word(Token):
1620 """Token for matching words composed of allowed character sets.
1621 Defined with string containing all allowed initial characters,
1622 an optional string containing allowed body characters (if omitted,
1623 defaults to the initial character set), and an optional minimum,
1624 maximum, and/or exact length. The default value for min is 1 (a
1625 minimum value < 1 is not valid); the default values for max and exact
1626 are 0, meaning no maximum or exact length restriction.
1627 """
1628 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1629 super(Word,self).__init__()
1630 self.initCharsOrig = initChars
1631 self.initChars = _str2dict(initChars)
1632 if bodyChars :
1633 self.bodyCharsOrig = bodyChars
1634 self.bodyChars = _str2dict(bodyChars)
1635 else:
1636 self.bodyCharsOrig = initChars
1637 self.bodyChars = _str2dict(initChars)
1638
1639 self.maxSpecified = max > 0
1640
1641 if min < 1:
1642 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1643
1644 self.minLen = min
1645
1646 if max > 0:
1647 self.maxLen = max
1648 else:
1649 self.maxLen = _MAX_INT
1650
1651 if exact > 0:
1652 self.maxLen = exact
1653 self.minLen = exact
1654
1655 self.name = _ustr(self)
1656 self.errmsg = "Expected " + self.name
1657 #self.myException.msg = self.errmsg
1658 self.mayIndexError = False
1659 self.asKeyword = asKeyword
1660
1661 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1662 if self.bodyCharsOrig == self.initCharsOrig:
1663 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1664 elif len(self.bodyCharsOrig) == 1:
1665 self.reString = "%s[%s]*" % \
1666 (re.escape(self.initCharsOrig),
1667 _escapeRegexRangeChars(self.bodyCharsOrig),)
1668 else:
1669 self.reString = "[%s][%s]*" % \
1670 (_escapeRegexRangeChars(self.initCharsOrig),
1671 _escapeRegexRangeChars(self.bodyCharsOrig),)
1672 if self.asKeyword:
1673 self.reString = r"\b"+self.reString+r"\b"
1674 try:
1675 self.re = re.compile( self.reString )
1676 except:
1677 self.re = None
1678
1679 def parseImpl( self, instring, loc, doActions=True ):
1680 if self.re:
1681 result = self.re.match(instring,loc)
1682 if not result:
1683 exc = self.myException
1684 exc.loc = loc
1685 exc.pstr = instring
1686 raise exc
1687
1688 loc = result.end()
1689 return loc,result.group()
1690
1691 if not(instring[ loc ] in self.initChars):
1692 #~ raise ParseException( instring, loc, self.errmsg )
1693 exc = self.myException
1694 exc.loc = loc
1695 exc.pstr = instring
1696 raise exc
1697 start = loc
1698 loc += 1
1699 instrlen = len(instring)
1700 bodychars = self.bodyChars
1701 maxloc = start + self.maxLen
1702 maxloc = min( maxloc, instrlen )
1703 while loc < maxloc and instring[loc] in bodychars:
1704 loc += 1
1705
1706 throwException = False
1707 if loc - start < self.minLen:
1708 throwException = True
1709 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1710 throwException = True
1711 if self.asKeyword:
1712 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1713 throwException = True
1714
1715 if throwException:
1716 #~ raise ParseException( instring, loc, self.errmsg )
1717 exc = self.myException
1718 exc.loc = loc
1719 exc.pstr = instring
1720 raise exc
1721
1722 return loc, instring[start:loc]
1723
1724 def __str__( self ):
1725 try:
1726 return super(Word,self).__str__()
1727 except:
1728 pass
1729
1730
1731 if self.strRepr is None:
1732
1733 def charsAsStr(s):
1734 if len(s)>4:
1735 return s[:4]+"..."
1736 else:
1737 return s
1738
1739 if ( self.initCharsOrig != self.bodyCharsOrig ):
1740 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1741 else:
1742 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1743
1744 return self.strRepr
1745
1746
1747 class Regex(Token):
1748 """Token for matching strings that match a given regular expression.
1749 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1750 """
1751 def __init__( self, pattern, flags=0):
1752 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1753 super(Regex,self).__init__()
1754
1755 if len(pattern) == 0:
1756 warnings.warn("null string passed to Regex; use Empty() instead",
1757 SyntaxWarning, stacklevel=2)
1758
1759 self.pattern = pattern
1760 self.flags = flags
1761
1762 try:
1763 self.re = re.compile(self.pattern, self.flags)
1764 self.reString = self.pattern
1765 except sre_constants.error:
1766 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1767 SyntaxWarning, stacklevel=2)
1768 raise
1769
1770 self.name = _ustr(self)
1771 self.errmsg = "Expected " + self.name
1772 #self.myException.msg = self.errmsg
1773 self.mayIndexError = False
1774 self.mayReturnEmpty = True
1775
1776 def parseImpl( self, instring, loc, doActions=True ):
1777 result = self.re.match(instring,loc)
1778 if not result:
1779 exc = self.myException
1780 exc.loc = loc
1781 exc.pstr = instring
1782 raise exc
1783
1784 loc = result.end()
1785 d = result.groupdict()
1786 ret = ParseResults(result.group())
1787 if d:
1788 for k in d:
1789 ret[k] = d[k]
1790 return loc,ret
1791
1792 def __str__( self ):
1793 try:
1794 return super(Regex,self).__str__()
1795 except:
1796 pass
1797
1798 if self.strRepr is None:
1799 self.strRepr = "Re:(%s)" % repr(self.pattern)
1800
1801 return self.strRepr
1802
1803
1804 class QuotedString(Token):
1805 """Token for matching strings that are delimited by quoting characters.
1806 """
1807 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1808 """
1809 Defined with the following parameters:
1810 - quoteChar - string of one or more characters defining the quote delimiting string
1811 - escChar - character to escape quotes, typically backslash (default=None)
1812 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1813 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1814 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1815 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1816 """
1817 super(QuotedString,self).__init__()
1818
1819 # remove white space from quote chars - wont work anyway
1820 quoteChar = quoteChar.strip()
1821 if len(quoteChar) == 0:
1822 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1823 raise SyntaxError()
1824
1825 if endQuoteChar is None:
1826 endQuoteChar = quoteChar
1827 else:
1828 endQuoteChar = endQuoteChar.strip()
1829 if len(endQuoteChar) == 0:
1830 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1831 raise SyntaxError()
1832
1833 self.quoteChar = quoteChar
1834 self.quoteCharLen = len(quoteChar)
1835 self.firstQuoteChar = quoteChar[0]
1836 self.endQuoteChar = endQuoteChar
1837 self.endQuoteCharLen = len(endQuoteChar)
1838 self.escChar = escChar
1839 self.escQuote = escQuote
1840 self.unquoteResults = unquoteResults
1841
1842 if multiline:
1843 self.flags = re.MULTILINE | re.DOTALL
1844 self.pattern = r'%s(?:[^%s%s]' % \
1845 ( re.escape(self.quoteChar),
1846 _escapeRegexRangeChars(self.endQuoteChar[0]),
1847 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1848 else:
1849 self.flags = 0
1850 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1851 ( re.escape(self.quoteChar),
1852 _escapeRegexRangeChars(self.endQuoteChar[0]),
1853 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1854 if len(self.endQuoteChar) > 1:
1855 self.pattern += (
1856 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1857 _escapeRegexRangeChars(self.endQuoteChar[i]))
1858 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1859 )
1860 if escQuote:
1861 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1862 if escChar:
1863 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1864 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1865 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1866
1867 try:
1868 self.re = re.compile(self.pattern, self.flags)
1869 self.reString = self.pattern
1870 except sre_constants.error:
1871 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1872 SyntaxWarning, stacklevel=2)
1873 raise
1874
1875 self.name = _ustr(self)
1876 self.errmsg = "Expected " + self.name
1877 #self.myException.msg = self.errmsg
1878 self.mayIndexError = False
1879 self.mayReturnEmpty = True
1880
1881 def parseImpl( self, instring, loc, doActions=True ):
1882 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1883 if not result:
1884 exc = self.myException
1885 exc.loc = loc
1886 exc.pstr = instring
1887 raise exc
1888
1889 loc = result.end()
1890 ret = result.group()
1891
1892 if self.unquoteResults:
1893
1894 # strip off quotes
1895 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1896
1897 if isinstance(ret,basestring):
1898 # replace escaped characters
1899 if self.escChar:
1900 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1901
1902 # replace escaped quotes
1903 if self.escQuote:
1904 ret = ret.replace(self.escQuote, self.endQuoteChar)
1905
1906 return loc, ret
1907
1908 def __str__( self ):
1909 try:
1910 return super(QuotedString,self).__str__()
1911 except:
1912 pass
1913
1914 if self.strRepr is None:
1915 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1916
1917 return self.strRepr
1918
1919
1920 class CharsNotIn(Token):
1921 """Token for matching words composed of characters *not* in a given set.
1922 Defined with string containing all disallowed characters, and an optional
1923 minimum, maximum, and/or exact length. The default value for min is 1 (a
1924 minimum value < 1 is not valid); the default values for max and exact
1925 are 0, meaning no maximum or exact length restriction.
1926 """
1927 def __init__( self, notChars, min=1, max=0, exact=0 ):
1928 super(CharsNotIn,self).__init__()
1929 self.skipWhitespace = False
1930 self.notChars = notChars
1931
1932 if min < 1:
1933 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1934
1935 self.minLen = min
1936
1937 if max > 0:
1938 self.maxLen = max
1939 else:
1940 self.maxLen = _MAX_INT
1941
1942 if exact > 0:
1943 self.maxLen = exact
1944 self.minLen = exact
1945
1946 self.name = _ustr(self)
1947 self.errmsg = "Expected " + self.name
1948 self.mayReturnEmpty = ( self.minLen == 0 )
1949 #self.myException.msg = self.errmsg
1950 self.mayIndexError = False
1951
1952 def parseImpl( self, instring, loc, doActions=True ):
1953 if instring[loc] in self.notChars:
1954 #~ raise ParseException( instring, loc, self.errmsg )
1955 exc = self.myException
1956 exc.loc = loc
1957 exc.pstr = instring
1958 raise exc
1959
1960 start = loc
1961 loc += 1
1962 notchars = self.notChars
1963 maxlen = min( start+self.maxLen, len(instring) )
1964 while loc < maxlen and \
1965 (instring[loc] not in notchars):
1966 loc += 1
1967
1968 if loc - start < self.minLen:
1969 #~ raise ParseException( instring, loc, self.errmsg )
1970 exc = self.myException
1971 exc.loc = loc
1972 exc.pstr = instring
1973 raise exc
1974
1975 return loc, instring[start:loc]
1976
1977 def __str__( self ):
1978 try:
1979 return super(CharsNotIn, self).__str__()
1980 except:
1981 pass
1982
1983 if self.strRepr is None:
1984 if len(self.notChars) > 4:
1985 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1986 else:
1987 self.strRepr = "!W:(%s)" % self.notChars
1988
1989 return self.strRepr
1990
1991 class White(Token):
1992 """Special matching class for matching whitespace. Normally, whitespace is ignored
1993 by pyparsing grammars. This class is included when some whitespace structures
1994 are significant. Define with a string containing the whitespace characters to be
1995 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
1996 as defined for the Word class."""
1997 whiteStrs = {
1998 " " : "<SPC>",
1999 "\t": "<TAB>",
2000 "\n": "<LF>",
2001 "\r": "<CR>",
2002 "\f": "<FF>",
2003 }
2004 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2005 super(White,self).__init__()
2006 self.matchWhite = ws
2007 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2008 #~ self.leaveWhitespace()
2009 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2010 self.mayReturnEmpty = True
2011 self.errmsg = "Expected " + self.name
2012 #self.myException.msg = self.errmsg
2013
2014 self.minLen = min
2015
2016 if max > 0:
2017 self.maxLen = max
2018 else:
2019 self.maxLen = _MAX_INT
2020
2021 if exact > 0:
2022 self.maxLen = exact
2023 self.minLen = exact
2024
2025 def parseImpl( self, instring, loc, doActions=True ):
2026 if not(instring[ loc ] in self.matchWhite):
2027 #~ raise ParseException( instring, loc, self.errmsg )
2028 exc = self.myException
2029 exc.loc = loc
2030 exc.pstr = instring
2031 raise exc
2032 start = loc
2033 loc += 1
2034 maxloc = start + self.maxLen
2035 maxloc = min( maxloc, len(instring) )
2036 while loc < maxloc and instring[loc] in self.matchWhite:
2037 loc += 1
2038
2039 if loc - start < self.minLen:
2040 #~ raise ParseException( instring, loc, self.errmsg )
2041 exc = self.myException
2042 exc.loc = loc
2043 exc.pstr = instring
2044 raise exc
2045
2046 return loc, instring[start:loc]
2047
2048
2049 class _PositionToken(Token):
2050 def __init__( self ):
2051 super(_PositionToken,self).__init__()
2052 self.name=self.__class__.__name__
2053 self.mayReturnEmpty = True
2054 self.mayIndexError = False
2055
2056 class GoToColumn(_PositionToken):
2057 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2058 def __init__( self, colno ):
2059 super(GoToColumn,self).__init__()
2060 self.col = colno
2061
2062 def preParse( self, instring, loc ):
2063 if col(loc,instring) != self.col:
2064 instrlen = len(instring)
2065 if self.ignoreExprs:
2066 loc = self._skipIgnorables( instring, loc )
2067 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2068 loc += 1
2069 return loc
2070
2071 def parseImpl( self, instring, loc, doActions=True ):
2072 thiscol = col( loc, instring )
2073 if thiscol > self.col:
2074 raise ParseException( instring, loc, "Text not in expected column", self )
2075 newloc = loc + self.col - thiscol
2076 ret = instring[ loc: newloc ]
2077 return newloc, ret
2078
2079 class LineStart(_PositionToken):
2080 """Matches if current position is at the beginning of a line within the parse string"""
2081 def __init__( self ):
2082 super(LineStart,self).__init__()
2083 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2084 self.errmsg = "Expected start of line"
2085 #self.myException.msg = self.errmsg
2086
2087 def preParse( self, instring, loc ):
2088 preloc = super(LineStart,self).preParse(instring,loc)
2089 if instring[preloc] == "\n":
2090 loc += 1
2091 return loc
2092
2093 def parseImpl( self, instring, loc, doActions=True ):
2094 if not( loc==0 or
2095 (loc == self.preParse( instring, 0 )) or
2096 (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2097 #~ raise ParseException( instring, loc, "Expected start of line" )
2098 exc = self.myException
2099 exc.loc = loc
2100 exc.pstr = instring
2101 raise exc
2102 return loc, []
2103
2104 class LineEnd(_PositionToken):
2105 """Matches if current position is at the end of a line within the parse string"""
2106 def __init__( self ):
2107 super(LineEnd,self).__init__()
2108 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2109 self.errmsg = "Expected end of line"
2110 #self.myException.msg = self.errmsg
2111
2112 def parseImpl( self, instring, loc, doActions=True ):
2113 if loc<len(instring):
2114 if instring[loc] == "\n":
2115 return loc+1, "\n"
2116 else:
2117 #~ raise ParseException( instring, loc, "Expected end of line" )
2118 exc = self.myException
2119 exc.loc = loc
2120 exc.pstr = instring
2121 raise exc
2122 elif loc == len(instring):
2123 return loc+1, []
2124 else:
2125 exc = self.myException
2126 exc.loc = loc
2127 exc.pstr = instring
2128 raise exc
2129
2130 class StringStart(_PositionToken):
2131 """Matches if current position is at the beginning of the parse string"""
2132 def __init__( self ):
2133 super(StringStart,self).__init__()
2134 self.errmsg = "Expected start of text"
2135 #self.myException.msg = self.errmsg
2136
2137 def parseImpl( self, instring, loc, doActions=True ):
2138 if loc != 0:
2139 # see if entire string up to here is just whitespace and ignoreables
2140 if loc != self.preParse( instring, 0 ):
2141 #~ raise ParseException( instring, loc, "Expected start of text" )
2142 exc = self.myException
2143 exc.loc = loc
2144 exc.pstr = instring
2145 raise exc
2146 return loc, []
2147
2148 class StringEnd(_PositionToken):
2149 """Matches if current position is at the end of the parse string"""
2150 def __init__( self ):
2151 super(StringEnd,self).__init__()
2152 self.errmsg = "Expected end of text"
2153 #self.myException.msg = self.errmsg
2154
2155 def parseImpl( self, instring, loc, doActions=True ):
2156 if loc < len(instring):
2157 #~ raise ParseException( instring, loc, "Expected end of text" )
2158 exc = self.myException
2159 exc.loc = loc
2160 exc.pstr = instring
2161 raise exc
2162 elif loc == len(instring):
2163 return loc+1, []
2164 elif loc > len(instring):
2165 return loc, []
2166 else:
2167 exc = self.myException
2168 exc.loc = loc
2169 exc.pstr = instring
2170 raise exc
2171
2172 class WordStart(_PositionToken):
2173 """Matches if the current position is at the beginning of a Word, and
2174 is not preceded by any character in a given set of wordChars
2175 (default=printables). To emulate the \b behavior of regular expressions,
2176 use WordStart(alphanums). WordStart will also match at the beginning of
2177 the string being parsed, or at the beginning of a line.
2178 """
2179 def __init__(self, wordChars = printables):
2180 super(WordStart,self).__init__()
2181 self.wordChars = _str2dict(wordChars)
2182 self.errmsg = "Not at the start of a word"
2183
2184 def parseImpl(self, instring, loc, doActions=True ):
2185 if loc != 0:
2186 if (instring[loc-1] in self.wordChars or
2187 instring[loc] not in self.wordChars):
2188 exc = self.myException
2189 exc.loc = loc
2190 exc.pstr = instring
2191 raise exc
2192 return loc, []
2193
2194 class WordEnd(_PositionToken):
2195 """Matches if the current position is at the end of a Word, and
2196 is not followed by any character in a given set of wordChars
2197 (default=printables). To emulate the \b behavior of regular expressions,
2198 use WordEnd(alphanums). WordEnd will also match at the end of
2199 the string being parsed, or at the end of a line.
2200 """
2201 def __init__(self, wordChars = printables):
2202 super(WordEnd,self).__init__()
2203 self.wordChars = _str2dict(wordChars)
2204 self.skipWhitespace = False
2205 self.errmsg = "Not at the end of a word"
2206
2207 def parseImpl(self, instring, loc, doActions=True ):
2208 instrlen = len(instring)
2209 if instrlen>0 and loc<instrlen:
2210 if (instring[loc] in self.wordChars or
2211 instring[loc-1] not in self.wordChars):
2212 #~ raise ParseException( instring, loc, "Expected end of word" )
2213 exc = self.myException
2214 exc.loc = loc
2215 exc.pstr = instring
2216 raise exc
2217 return loc, []
2218
2219
2220 class ParseExpression(ParserElement):
2221 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2222 def __init__( self, exprs, savelist = False ):
2223 super(ParseExpression,self).__init__(savelist)
2224 if isinstance( exprs, list ):
2225 self.exprs = exprs
2226 elif isinstance( exprs, basestring ):
2227 self.exprs = [ Literal( exprs ) ]
2228 else:
2229 try:
2230 self.exprs = list( exprs )
2231 except TypeError:
2232 self.exprs = [ exprs ]
2233 self.callPreparse = False
2234
2235 def __getitem__( self, i ):
2236 return self.exprs[i]
2237
2238 def append( self, other ):
2239 self.exprs.append( other )
2240 self.strRepr = None
2241 return self
2242
2243 def leaveWhitespace( self ):
2244 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2245 all contained expressions."""
2246 self.skipWhitespace = False
2247 self.exprs = [ e.copy() for e in self.exprs ]
2248 for e in self.exprs:
2249 e.leaveWhitespace()
2250 return self
2251
2252 def ignore( self, other ):
2253 if isinstance( other, Suppress ):
2254 if other not in self.ignoreExprs:
2255 super( ParseExpression, self).ignore( other )
2256 for e in self.exprs:
2257 e.ignore( self.ignoreExprs[-1] )
2258 else:
2259 super( ParseExpression, self).ignore( other )
2260 for e in self.exprs:
2261 e.ignore( self.ignoreExprs[-1] )
2262 return self
2263
2264 def __str__( self ):
2265 try:
2266 return super(ParseExpression,self).__str__()
2267 except:
2268 pass
2269
2270 if self.strRepr is None:
2271 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2272 return self.strRepr
2273
2274 def streamline( self ):
2275 super(ParseExpression,self).streamline()
2276
2277 for e in self.exprs:
2278 e.streamline()
2279
2280 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2281 # but only if there are no parse actions or resultsNames on the nested And's
2282 # (likewise for Or's and MatchFirst's)
2283 if ( len(self.exprs) == 2 ):
2284 other = self.exprs[0]
2285 if ( isinstance( other, self.__class__ ) and
2286 not(other.parseAction) and
2287 other.resultsName is None and
2288 not other.debug ):
2289 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2290 self.strRepr = None
2291 self.mayReturnEmpty |= other.mayReturnEmpty
2292 self.mayIndexError |= other.mayIndexError
2293
2294 other = self.exprs[-1]
2295 if ( isinstance( other, self.__class__ ) and
2296 not(other.parseAction) and
2297 other.resultsName is None and
2298 not other.debug ):
2299 self.exprs = self.exprs[:-1] + other.exprs[:]
2300 self.strRepr = None
2301 self.mayReturnEmpty |= other.mayReturnEmpty
2302 self.mayIndexError |= other.mayIndexError
2303
2304 return self
2305
2306 def setResultsName( self, name, listAllMatches=False ):
2307 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
2308 return ret
2309
2310 def validate( self, validateTrace=[] ):
2311 tmp = validateTrace[:]+[self]
2312 for e in self.exprs:
2313 e.validate(tmp)
2314 self.checkRecursion( [] )
2315
2316 class And(ParseExpression):
2317 """Requires all given ParseExpressions to be found in the given order.
2318 Expressions may be separated by whitespace.
2319 May be constructed using the '+' operator.
2320 """
2321
2322 class _ErrorStop(Empty):
2323 def __init__(self, *args, **kwargs):
2324 super(Empty,self).__init__(*args, **kwargs)
2325 self.leaveWhitespace()
2326
2327 def __init__( self, exprs, savelist = True ):
2328 super(And,self).__init__(exprs, savelist)
2329 self.mayReturnEmpty = True
2330 for e in self.exprs:
2331 if not e.mayReturnEmpty:
2332 self.mayReturnEmpty = False
2333 break
2334 self.setWhitespaceChars( exprs[0].whiteChars )
2335 self.skipWhitespace = exprs[0].skipWhitespace
2336 self.callPreparse = True
2337
2338 def parseImpl( self, instring, loc, doActions=True ):
2339 # pass False as last arg to _parse for first element, since we already
2340 # pre-parsed the string as part of our And pre-parsing
2341 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2342 errorStop = False
2343 for e in self.exprs[1:]:
2344 if isinstance(e, And._ErrorStop):
2345 errorStop = True
2346 continue
2347 if errorStop:
2348 try:
2349 loc, exprtokens = e._parse( instring, loc, doActions )
2350 except ParseSyntaxException:
2351 raise
2352 except ParseBaseException:
2353 pe = sys.exc_info()[1]
2354 raise ParseSyntaxException(pe)
2355 except IndexError:
2356 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2357 else:
2358 loc, exprtokens = e._parse( instring, loc, doActions )
2359 if exprtokens or exprtokens.keys():
2360 resultlist += exprtokens
2361 return loc, resultlist
2362
2363 def __iadd__(self, other ):
2364 if isinstance( other, basestring ):
2365 other = Literal( other )
2366 return self.append( other ) #And( [ self, other ] )
2367
2368 def checkRecursion( self, parseElementList ):
2369 subRecCheckList = parseElementList[:] + [ self ]
2370 for e in self.exprs:
2371 e.checkRecursion( subRecCheckList )
2372 if not e.mayReturnEmpty:
2373 break
2374
2375 def __str__( self ):
2376 if hasattr(self,"name"):
2377 return self.name
2378
2379 if self.strRepr is None:
2380 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2381
2382 return self.strRepr
2383
2384
2385 class Or(ParseExpression):
2386 """Requires that at least one ParseExpression is found.
2387 If two expressions match, the expression that matches the longest string will be used.
2388 May be constructed using the '^' operator.
2389 """
2390 def __init__( self, exprs, savelist = False ):
2391 super(Or,self).__init__(exprs, savelist)
2392 self.mayReturnEmpty = False
2393 for e in self.exprs:
2394 if e.mayReturnEmpty:
2395 self.mayReturnEmpty = True
2396 break
2397
2398 def parseImpl( self, instring, loc, doActions=True ):
2399 maxExcLoc = -1
2400 maxMatchLoc = -1
2401 maxException = None
2402 for e in self.exprs:
2403 try:
2404 loc2 = e.tryParse( instring, loc )
2405 except ParseException:
2406 err = sys.exc_info()[1]
2407 if err.loc > maxExcLoc:
2408 maxException = err
2409 maxExcLoc = err.loc
2410 except IndexError:
2411 if len(instring) > maxExcLoc:
2412 maxException = ParseException(instring,len(instring),e.errmsg,self)
2413 maxExcLoc = len(instring)
2414 else:
2415 if loc2 > maxMatchLoc:
2416 maxMatchLoc = loc2
2417 maxMatchExp = e
2418
2419 if maxMatchLoc < 0:
2420 if maxException is not None:
2421 raise maxException
2422 else:
2423 raise ParseException(instring, loc, "no defined alternatives to match", self)
2424
2425 return maxMatchExp._parse( instring, loc, doActions )
2426
2427 def __ixor__(self, other ):
2428 if isinstance( other, basestring ):
2429 other = Literal( other )
2430 return self.append( other ) #Or( [ self, other ] )
2431
2432 def __str__( self ):
2433 if hasattr(self,"name"):
2434 return self.name
2435
2436 if self.strRepr is None:
2437 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2438
2439 return self.strRepr
2440
2441 def checkRecursion( self, parseElementList ):
2442 subRecCheckList = parseElementList[:] + [ self ]
2443 for e in self.exprs:
2444 e.checkRecursion( subRecCheckList )
2445
2446
2447 class MatchFirst(ParseExpression):
2448 """Requires that at least one ParseExpression is found.
2449 If two expressions match, the first one listed is the one that will match.
2450 May be constructed using the '|' operator.
2451 """
2452 def __init__( self, exprs, savelist = False ):
2453 super(MatchFirst,self).__init__(exprs, savelist)
2454 if exprs:
2455 self.mayReturnEmpty = False
2456 for e in self.exprs:
2457 if e.mayReturnEmpty:
2458 self.mayReturnEmpty = True
2459 break
2460 else:
2461 self.mayReturnEmpty = True
2462
2463 def parseImpl( self, instring, loc, doActions=True ):
2464 maxExcLoc = -1
2465 maxException = None
2466 for e in self.exprs:
2467 try:
2468 ret = e._parse( instring, loc, doActions )
2469 return ret
2470 except ParseException as err:
2471 if err.loc > maxExcLoc:
2472 maxException = err
2473 maxExcLoc = err.loc
2474 except IndexError:
2475 if len(instring) > maxExcLoc:
2476 maxException = ParseException(instring,len(instring),e.errmsg,self)
2477 maxExcLoc = len(instring)
2478
2479 # only got here if no expression matched, raise exception for match that made it the furthest
2480 else:
2481 if maxException is not None:
2482 raise maxException
2483 else:
2484 raise ParseException(instring, loc, "no defined alternatives to match", self)
2485
2486 def __ior__(self, other ):
2487 if isinstance( other, basestring ):
2488 other = Literal( other )
2489 return self.append( other ) #MatchFirst( [ self, other ] )
2490
2491 def __str__( self ):
2492 if hasattr(self,"name"):
2493 return self.name
2494
2495 if self.strRepr is None:
2496 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2497
2498 return self.strRepr
2499
2500 def checkRecursion( self, parseElementList ):
2501 subRecCheckList = parseElementList[:] + [ self ]
2502 for e in self.exprs:
2503 e.checkRecursion( subRecCheckList )
2504
2505
2506 class Each(ParseExpression):
2507 """Requires all given ParseExpressions to be found, but in any order.
2508 Expressions may be separated by whitespace.
2509 May be constructed using the '&' operator.
2510 """
2511 def __init__( self, exprs, savelist = True ):
2512 super(Each,self).__init__(exprs, savelist)
2513 self.mayReturnEmpty = True
2514 for e in self.exprs:
2515 if not e.mayReturnEmpty:
2516 self.mayReturnEmpty = False
2517 break
2518 self.skipWhitespace = True
2519 self.initExprGroups = True
2520
2521 def parseImpl( self, instring, loc, doActions=True ):
2522 if self.initExprGroups:
2523 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2524 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2525 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2526 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2527 self.required += self.multirequired
2528 self.initExprGroups = False
2529 tmpLoc = loc
2530 tmpReqd = self.required[:]
2531 tmpOpt = self.optionals[:]
2532 matchOrder = []
2533
2534 keepMatching = True
2535 while keepMatching:
2536 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2537 failed = []
2538 for e in tmpExprs:
2539 try:
2540 tmpLoc = e.tryParse( instring, tmpLoc )
2541 except ParseException:
2542 failed.append(e)
2543 else:
2544 matchOrder.append(e)
2545 if e in tmpReqd:
2546 tmpReqd.remove(e)
2547 elif e in tmpOpt:
2548 tmpOpt.remove(e)
2549 if len(failed) == len(tmpExprs):
2550 keepMatching = False
2551
2552 if tmpReqd:
2553 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2554 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2555
2556 # add any unmatched Optionals, in case they have default values defined
2557 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
2558
2559 resultlist = []
2560 for e in matchOrder:
2561 loc,results = e._parse(instring,loc,doActions)
2562 resultlist.append(results)
2563
2564 finalResults = ParseResults([])
2565 for r in resultlist:
2566 dups = {}
2567 for k in r.keys():
2568 if k in finalResults.keys():
2569 tmp = ParseResults(finalResults[k])
2570 tmp += ParseResults(r[k])
2571 dups[k] = tmp
2572 finalResults += ParseResults(r)
2573 for k,v in dups.items():
2574 finalResults[k] = v
2575 return loc, finalResults
2576
2577 def __str__( self ):
2578 if hasattr(self,"name"):
2579 return self.name
2580
2581 if self.strRepr is None:
2582 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2583
2584 return self.strRepr
2585
2586 def checkRecursion( self, parseElementList ):
2587 subRecCheckList = parseElementList[:] + [ self ]
2588 for e in self.exprs:
2589 e.checkRecursion( subRecCheckList )
2590
2591
2592 class ParseElementEnhance(ParserElement):
2593 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2594 def __init__( self, expr, savelist=False ):
2595 super(ParseElementEnhance,self).__init__(savelist)
2596 if isinstance( expr, basestring ):
2597 expr = Literal(expr)
2598 self.expr = expr
2599 self.strRepr = None
2600 if expr is not None:
2601 self.mayIndexError = expr.mayIndexError
2602 self.mayReturnEmpty = expr.mayReturnEmpty
2603 self.setWhitespaceChars( expr.whiteChars )
2604 self.skipWhitespace = expr.skipWhitespace
2605 self.saveAsList = expr.saveAsList
2606 self.callPreparse = expr.callPreparse
2607 self.ignoreExprs.extend(expr.ignoreExprs)
2608
2609 def parseImpl( self, instring, loc, doActions=True ):
2610 if self.expr is not None:
2611 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2612 else:
2613 raise ParseException("",loc,self.errmsg,self)
2614
2615 def leaveWhitespace( self ):
2616 self.skipWhitespace = False
2617 self.expr = self.expr.copy()
2618 if self.expr is not None:
2619 self.expr.leaveWhitespace()
2620 return self
2621
2622 def ignore( self, other ):
2623 if isinstance( other, Suppress ):
2624 if other not in self.ignoreExprs:
2625 super( ParseElementEnhance, self).ignore( other )
2626 if self.expr is not None:
2627 self.expr.ignore( self.ignoreExprs[-1] )
2628 else:
2629 super( ParseElementEnhance, self).ignore( other )
2630 if self.expr is not None:
2631 self.expr.ignore( self.ignoreExprs[-1] )
2632 return self
2633
2634 def streamline( self ):
2635 super(ParseElementEnhance,self).streamline()
2636 if self.expr is not None:
2637 self.expr.streamline()
2638 return self
2639
2640 def checkRecursion( self, parseElementList ):
2641 if self in parseElementList:
2642 raise RecursiveGrammarException( parseElementList+[self] )
2643 subRecCheckList = parseElementList[:] + [ self ]
2644 if self.expr is not None:
2645 self.expr.checkRecursion( subRecCheckList )
2646
2647 def validate( self, validateTrace=[] ):
2648 tmp = validateTrace[:]+[self]
2649 if self.expr is not None:
2650 self.expr.validate(tmp)
2651 self.checkRecursion( [] )
2652
2653 def __str__( self ):
2654 try:
2655 return super(ParseElementEnhance,self).__str__()
2656 except:
2657 pass
2658
2659 if self.strRepr is None and self.expr is not None:
2660 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2661 return self.strRepr
2662
2663
2664 class FollowedBy(ParseElementEnhance):
2665 """Lookahead matching of the given parse expression. FollowedBy
2666 does *not* advance the parsing position within the input string, it only
2667 verifies that the specified parse expression matches at the current
2668 position. FollowedBy always returns a null token list."""
2669 def __init__( self, expr ):
2670 super(FollowedBy,self).__init__(expr)
2671 self.mayReturnEmpty = True
2672
2673 def parseImpl( self, instring, loc, doActions=True ):
2674 self.expr.tryParse( instring, loc )
2675 return loc, []
2676
2677
2678 class NotAny(ParseElementEnhance):
2679 """Lookahead to disallow matching with the given parse expression. NotAny
2680 does *not* advance the parsing position within the input string, it only
2681 verifies that the specified parse expression does *not* match at the current
2682 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2683 always returns a null token list. May be constructed using the '~' operator."""
2684 def __init__( self, expr ):
2685 super(NotAny,self).__init__(expr)
2686 #~ self.leaveWhitespace()
2687 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2688 self.mayReturnEmpty = True
2689 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2690 #self.myException = ParseException("",0,self.errmsg,self)
2691
2692 def parseImpl( self, instring, loc, doActions=True ):
2693 try:
2694 self.expr.tryParse( instring, loc )
2695 except (ParseException,IndexError):
2696 pass
2697 else:
2698 #~ raise ParseException(instring, loc, self.errmsg )
2699 exc = self.myException
2700 exc.loc = loc
2701 exc.pstr = instring
2702 raise exc
2703 return loc, []
2704
2705 def __str__( self ):
2706 if hasattr(self,"name"):
2707 return self.name
2708
2709 if self.strRepr is None:
2710 self.strRepr = "~{" + _ustr(self.expr) + "}"
2711
2712 return self.strRepr
2713
2714
2715 class ZeroOrMore(ParseElementEnhance):
2716 """Optional repetition of zero or more of the given expression."""
2717 def __init__( self, expr ):
2718 super(ZeroOrMore,self).__init__(expr)
2719 self.mayReturnEmpty = True
2720
2721 def parseImpl( self, instring, loc, doActions=True ):
2722 tokens = []
2723 try:
2724 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2725 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2726 while 1:
2727 if hasIgnoreExprs:
2728 preloc = self._skipIgnorables( instring, loc )
2729 else:
2730 preloc = loc
2731 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2732 if tmptokens or tmptokens.keys():
2733 tokens += tmptokens
2734 except (ParseException,IndexError):
2735 pass
2736
2737 return loc, tokens
2738
2739 def __str__( self ):
2740 if hasattr(self,"name"):
2741 return self.name
2742
2743 if self.strRepr is None:
2744 self.strRepr = "[" + _ustr(self.expr) + "]..."
2745
2746 return self.strRepr
2747
2748 def setResultsName( self, name, listAllMatches=False ):
2749 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2750 ret.saveAsList = True
2751 return ret
2752
2753
2754 class OneOrMore(ParseElementEnhance):
2755 """Repetition of one or more of the given expression."""
2756 def parseImpl( self, instring, loc, doActions=True ):
2757 # must be at least one
2758 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2759 try:
2760 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2761 while 1:
2762 if hasIgnoreExprs:
2763 preloc = self._skipIgnorables( instring, loc )
2764 else:
2765 preloc = loc
2766 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2767 if tmptokens or tmptokens.keys():
2768 tokens += tmptokens
2769 except (ParseException,IndexError):
2770 pass
2771
2772 return loc, tokens
2773
2774 def __str__( self ):
2775 if hasattr(self,"name"):
2776 return self.name
2777
2778 if self.strRepr is None:
2779 self.strRepr = "{" + _ustr(self.expr) + "}..."
2780
2781 return self.strRepr
2782
2783 def setResultsName( self, name, listAllMatches=False ):
2784 ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2785 ret.saveAsList = True
2786 return ret
2787
2788 class _NullToken(object):
2789 def __bool__(self):
2790 return False
2791 __nonzero__ = __bool__
2792 def __str__(self):
2793 return ""
2794
2795 _optionalNotMatched = _NullToken()
2796 class Optional(ParseElementEnhance):
2797 """Optional matching of the given expression.
2798 A default return string can also be specified, if the optional expression
2799 is not found.
2800 """
2801 def __init__( self, exprs, default=_optionalNotMatched ):
2802 super(Optional,self).__init__( exprs, savelist=False )
2803 self.defaultValue = default
2804 self.mayReturnEmpty = True
2805
2806 def parseImpl( self, instring, loc, doActions=True ):
2807 try:
2808 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2809 except (ParseException,IndexError):
2810 if self.defaultValue is not _optionalNotMatched:
2811 if self.expr.resultsName:
2812 tokens = ParseResults([ self.defaultValue ])
2813 tokens[self.expr.resultsName] = self.defaultValue
2814 else:
2815 tokens = [ self.defaultValue ]
2816 else:
2817 tokens = []
2818 return loc, tokens
2819
2820 def __str__( self ):
2821 if hasattr(self,"name"):
2822 return self.name
2823
2824 if self.strRepr is None:
2825 self.strRepr = "[" + _ustr(self.expr) + "]"
2826
2827 return self.strRepr
2828
2829
2830 class SkipTo(ParseElementEnhance):
2831 """Token for skipping over all undefined text until the matched expression is found.
2832 If include is set to true, the matched expression is also parsed (the skipped text
2833 and matched expression are returned as a 2-element list). The ignore
2834 argument is used to define grammars (typically quoted strings and comments) that
2835 might contain false matches.
2836 """
2837 def __init__( self, other, include=False, ignore=None, failOn=None ):
2838 super( SkipTo, self ).__init__( other )
2839 self.ignoreExpr = ignore
2840 self.mayReturnEmpty = True
2841 self.mayIndexError = False
2842 self.includeMatch = include
2843 self.asList = False
2844 if failOn is not None and isinstance(failOn, basestring):
2845 self.failOn = Literal(failOn)
2846 else:
2847 self.failOn = failOn
2848 self.errmsg = "No match found for "+_ustr(self.expr)
2849 #self.myException = ParseException("",0,self.errmsg,self)
2850
2851 def parseImpl( self, instring, loc, doActions=True ):
2852 startLoc = loc
2853 instrlen = len(instring)
2854 expr = self.expr
2855 failParse = False
2856 while loc <= instrlen:
2857 try:
2858 if self.failOn:
2859 try:
2860 self.failOn.tryParse(instring, loc)
2861 except ParseBaseException:
2862 pass
2863 else:
2864 failParse = True
2865 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2866 failParse = False
2867 if self.ignoreExpr is not None:
2868 while 1:
2869 try:
2870 loc = self.ignoreExpr.tryParse(instring,loc)
2871 # print("found ignoreExpr, advance to", loc)
2872 except ParseBaseException:
2873 break
2874 expr._parse( instring, loc, doActions=False, callPreParse=False )
2875 skipText = instring[startLoc:loc]
2876 if self.includeMatch:
2877 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2878 if mat:
2879 skipRes = ParseResults( skipText )
2880 skipRes += mat
2881 return loc, [ skipRes ]
2882 else:
2883 return loc, [ skipText ]
2884 else:
2885 return loc, [ skipText ]
2886 except (ParseException,IndexError):
2887 if failParse:
2888 raise
2889 else:
2890 loc += 1
2891 exc = self.myException
2892 exc.loc = loc
2893 exc.pstr = instring
2894 raise exc
2895
2896 class Forward(ParseElementEnhance):
2897 """Forward declaration of an expression to be defined later -
2898 used for recursive grammars, such as algebraic infix notation.
2899 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2900
2901 Note: take care when assigning to Forward not to overlook precedence of operators.
2902 Specifically, '|' has a lower precedence than '<<', so that::
2903 fwdExpr << a | b | c
2904 will actually be evaluated as::
2905 (fwdExpr << a) | b | c
2906 thereby leaving b and c out as parseable alternatives. It is recommended that you
2907 explicitly group the values inserted into the Forward::
2908 fwdExpr << (a | b | c)
2909 """
2910 def __init__( self, other=None ):
2911 super(Forward,self).__init__( other, savelist=False )
2912
2913 def __lshift__( self, other ):
2914 if isinstance( other, basestring ):
2915 other = Literal(other)
2916 self.expr = other
2917 self.mayReturnEmpty = other.mayReturnEmpty
2918 self.strRepr = None
2919 self.mayIndexError = self.expr.mayIndexError
2920 self.mayReturnEmpty = self.expr.mayReturnEmpty
2921 self.setWhitespaceChars( self.expr.whiteChars )
2922 self.skipWhitespace = self.expr.skipWhitespace
2923 self.saveAsList = self.expr.saveAsList
2924 self.ignoreExprs.extend(self.expr.ignoreExprs)
2925 return None
2926
2927 def leaveWhitespace( self ):
2928 self.skipWhitespace = False
2929 return self
2930
2931 def streamline( self ):
2932 if not self.streamlined:
2933 self.streamlined = True
2934 if self.expr is not None:
2935 self.expr.streamline()
2936 return self
2937
2938 def validate( self, validateTrace=[] ):
2939 if self not in validateTrace:
2940 tmp = validateTrace[:]+[self]
2941 if self.expr is not None:
2942 self.expr.validate(tmp)
2943 self.checkRecursion([])
2944
2945 def __str__( self ):
2946 if hasattr(self,"name"):
2947 return self.name
2948
2949 self._revertClass = self.__class__
2950 self.__class__ = _ForwardNoRecurse
2951 try:
2952 if self.expr is not None:
2953 retString = _ustr(self.expr)
2954 else:
2955 retString = "None"
2956 finally:
2957 self.__class__ = self._revertClass
2958 return self.__class__.__name__ + ": " + retString
2959
2960 def copy(self):
2961 if self.expr is not None:
2962 return super(Forward,self).copy()
2963 else:
2964 ret = Forward()
2965 ret << self
2966 return ret
2967
2968 class _ForwardNoRecurse(Forward):
2969 def __str__( self ):
2970 return "..."
2971
2972 class TokenConverter(ParseElementEnhance):
2973 """Abstract subclass of ParseExpression, for converting parsed results."""
2974 def __init__( self, expr, savelist=False ):
2975 super(TokenConverter,self).__init__( expr )#, savelist )
2976 self.saveAsList = False
2977
2978 class Upcase(TokenConverter):
2979 """Converter to upper case all matching tokens."""
2980 def __init__(self, *args):
2981 super(Upcase,self).__init__(*args)
2982 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2983 DeprecationWarning,stacklevel=2)
2984
2985 def postParse( self, instring, loc, tokenlist ):
2986 return list(map( string.upper, tokenlist ))
2987
2988
2989 class Combine(TokenConverter):
2990 """Converter to concatenate all matching tokens to a single string.
2991 By default, the matching patterns must also be contiguous in the input string;
2992 this can be disabled by specifying 'adjacent=False' in the constructor.
2993 """
2994 def __init__( self, expr, joinString="", adjacent=True ):
2995 super(Combine,self).__init__( expr )
2996 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2997 if adjacent:
2998 self.leaveWhitespace()
2999 self.adjacent = adjacent
3000 self.skipWhitespace = True
3001 self.joinString = joinString
3002
3003 def ignore( self, other ):
3004 if self.adjacent:
3005 ParserElement.ignore(self, other)
3006 else:
3007 super( Combine, self).ignore( other )
3008 return self
3009
3010 def postParse( self, instring, loc, tokenlist ):
3011 retToks = tokenlist.copy()
3012 del retToks[:]
3013 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3014
3015 if self.resultsName and len(retToks.keys())>0:
3016 return [ retToks ]
3017 else:
3018 return retToks
3019
3020 class Group(TokenConverter):
3021 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3022 def __init__( self, expr ):
3023 super(Group,self).__init__( expr )
3024 self.saveAsList = True
3025
3026 def postParse( self, instring, loc, tokenlist ):
3027 return [ tokenlist ]
3028
3029 class Dict(TokenConverter):
3030 """Converter to return a repetitive expression as a list, but also as a dictionary.
3031 Each element can also be referenced using the first token in the expression as its key.
3032 Useful for tabular report scraping when the first column can be used as a item key.
3033 """
3034 def __init__( self, exprs ):
3035 super(Dict,self).__init__( exprs )
3036 self.saveAsList = True
3037
3038 def postParse( self, instring, loc, tokenlist ):
3039 for i,tok in enumerate(tokenlist):
3040 if len(tok) == 0:
3041 continue
3042 ikey = tok[0]
3043 if isinstance(ikey,int):
3044 ikey = _ustr(tok[0]).strip()
3045 if len(tok)==1:
3046 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3047 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3048 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3049 else:
3050 dictvalue = tok.copy() #ParseResults(i)
3051 del dictvalue[0]
3052 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3053 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3054 else:
3055 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3056
3057 if self.resultsName:
3058 return [ tokenlist ]
3059 else:
3060 return tokenlist
3061
3062
3063 class Suppress(TokenConverter):
3064 """Converter for ignoring the results of a parsed expression."""
3065 def postParse( self, instring, loc, tokenlist ):
3066 return []
3067
3068 def suppress( self ):
3069 return self
3070
3071
3072 class OnlyOnce(object):
3073 """Wrapper for parse actions, to ensure they are only called once."""
3074 def __init__(self, methodCall):
3075 self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3076 self.called = False
3077 def __call__(self,s,l,t):
3078 if not self.called:
3079 results = self.callable(s,l,t)
3080 self.called = True
3081 return results
3082 raise ParseException(s,l,"")
3083 def reset(self):
3084 self.called = False
3085
3086 def traceParseAction(f):
3087 """Decorator for debugging parse actions."""
3088 f = ParserElement._normalizeParseActionArgs(f)
3089 def z(*paArgs):
3090 thisFunc = f.func_name
3091 s,l,t = paArgs[-3:]
3092 if len(paArgs)>3:
3093 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3094 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3095 try:
3096 ret = f(*paArgs)
3097 except Exception:
3098 exc = sys.exc_info()[1]
3099 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3100 raise
3101 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3102 return ret
3103 try:
3104 z.__name__ = f.__name__
3105 except AttributeError:
3106 pass
3107 return z
3108
3109 #
3110 # global helpers
3111 #
3112 def delimitedList( expr, delim=",", combine=False ):
3113 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3114 By default, the list elements and delimiters can have intervening whitespace, and
3115 comments, but this can be overridden by passing 'combine=True' in the constructor.
3116 If combine is set to True, the matching tokens are returned as a single token
3117 string, with the delimiters included; otherwise, the matching tokens are returned
3118 as a list of tokens, with the delimiters suppressed.
3119 """
3120 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3121 if combine:
3122 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3123 else:
3124 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3125
3126 def countedArray( expr ):
3127 """Helper to define a counted list of expressions.
3128 This helper defines a pattern of the form::
3129 integer expr expr expr...
3130 where the leading integer tells how many expr expressions follow.
3131 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3132 """
3133 arrayExpr = Forward()
3134 def countFieldParseAction(s,l,t):
3135 n = int(t[0])
3136 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3137 return []
3138 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3139
3140 def _flatten(L):
3141 if type(L) is not list: return [L]
3142 if L == []: return L
3143 return _flatten(L[0]) + _flatten(L[1:])
3144
3145 def matchPreviousLiteral(expr):
3146 """Helper to define an expression that is indirectly defined from
3147 the tokens matched in a previous expression, that is, it looks
3148 for a 'repeat' of a previous expression. For example::
3149 first = Word(nums)
3150 second = matchPreviousLiteral(first)
3151 matchExpr = first + ":" + second
3152 will match "1:1", but not "1:2". Because this matches a
3153 previous literal, will also match the leading "1:1" in "1:10".
3154 If this is not desired, use matchPreviousExpr.
3155 Do *not* use with packrat parsing enabled.
3156 """
3157 rep = Forward()
3158 def copyTokenToRepeater(s,l,t):
3159 if t:
3160 if len(t) == 1:
3161 rep << t[0]
3162 else:
3163 # flatten t tokens
3164 tflat = _flatten(t.asList())
3165 rep << And( [ Literal(tt) for tt in tflat ] )
3166 else:
3167 rep << Empty()
3168 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3169 return rep
3170
3171 def matchPreviousExpr(expr):
3172 """Helper to define an expression that is indirectly defined from
3173 the tokens matched in a previous expression, that is, it looks
3174 for a 'repeat' of a previous expression. For example::
3175 first = Word(nums)
3176 second = matchPreviousExpr(first)
3177 matchExpr = first + ":" + second
3178 will match "1:1", but not "1:2". Because this matches by
3179 expressions, will *not* match the leading "1:1" in "1:10";
3180 the expressions are evaluated first, and then compared, so
3181 "1" is compared with "10".
3182 Do *not* use with packrat parsing enabled.
3183 """
3184 rep = Forward()
3185 e2 = expr.copy()
3186 rep << e2
3187 def copyTokenToRepeater(s,l,t):
3188 matchTokens = _flatten(t.asList())
3189 def mustMatchTheseTokens(s,l,t):
3190 theseTokens = _flatten(t.asList())
3191 if theseTokens != matchTokens:
3192 raise ParseException("",0,"")
3193 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3194 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3195 return rep
3196
3197 def _escapeRegexRangeChars(s):
3198 #~ escape these chars: ^-]
3199 for c in r"\^-]":
3200 s = s.replace(c,_bslash+c)
3201 s = s.replace("\n",r"\n")
3202 s = s.replace("\t",r"\t")
3203 return _ustr(s)
3204
3205 def oneOf( strs, caseless=False, useRegex=True ):
3206 """Helper to quickly define a set of alternative Literals, and makes sure to do
3207 longest-first testing when there is a conflict, regardless of the input order,
3208 but returns a MatchFirst for best performance.
3209
3210 Parameters:
3211 - strs - a string of space-delimited literals, or a list of string literals
3212 - caseless - (default=False) - treat all literals as caseless
3213 - useRegex - (default=True) - as an optimization, will generate a Regex
3214 object; otherwise, will generate a MatchFirst object (if caseless=True, or
3215 if creating a Regex raises an exception)
3216 """
3217 if caseless:
3218 isequal = ( lambda a,b: a.upper() == b.upper() )
3219 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3220 parseElementClass = CaselessLiteral
3221 else:
3222 isequal = ( lambda a,b: a == b )
3223 masks = ( lambda a,b: b.startswith(a) )
3224 parseElementClass = Literal
3225
3226 if isinstance(strs,(list,tuple)):
3227 symbols = list(strs[:])
3228 elif isinstance(strs,basestring):
3229 symbols = strs.split()
3230 else:
3231 warnings.warn("Invalid argument to oneOf, expected string or list",
3232 SyntaxWarning, stacklevel=2)
3233
3234 i = 0
3235 while i < len(symbols)-1:
3236 cur = symbols[i]
3237 for j,other in enumerate(symbols[i+1:]):
3238 if ( isequal(other, cur) ):
3239 del symbols[i+j+1]
3240 break
3241 elif ( masks(cur, other) ):
3242 del symbols[i+j+1]
3243 symbols.insert(i,other)
3244 cur = other
3245 break
3246 else:
3247 i += 1
3248
3249 if not caseless and useRegex:
3250 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3251 try:
3252 if len(symbols)==len("".join(symbols)):
3253 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3254 else:
3255 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3256 except:
3257 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3258 SyntaxWarning, stacklevel=2)
3259
3260
3261 # last resort, just use MatchFirst
3262 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3263
3264 def dictOf( key, value ):
3265 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3266 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3267 in the proper order. The key pattern can include delimiting markers or punctuation,
3268 as long as they are suppressed, thereby leaving the significant key text. The value
3269 pattern can include named results, so that the Dict results can include named token
3270 fields.
3271 """
3272 return Dict( ZeroOrMore( Group ( key + value ) ) )
3273
3274 def originalTextFor(expr, asString=True):
3275 """Helper to return the original, untokenized text for a given expression. Useful to
3276 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3277 revert separate tokens with intervening whitespace back to the original matching
3278 input text. Simpler to use than the parse action keepOriginalText, and does not
3279 require the inspect module to chase up the call stack. By default, returns a
3280 string containing the original parsed text.
3281
3282 If the optional asString argument is passed as False, then the return value is a
3283 ParseResults containing any results names that were originally matched, and a
3284 single token containing the original matched text from the input string. So if
3285 the expression passed to originalTextFor contains expressions with defined
3286 results names, you must set asString to False if you want to preserve those
3287 results name values."""
3288 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3289 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
3290 if asString:
3291 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3292 else:
3293 def extractText(s,l,t):
3294 del t[:]
3295 t.insert(0, s[t._original_start:t._original_end])
3296 del t["_original_start"]
3297 del t["_original_end"]
3298 matchExpr.setParseAction(extractText)
3299 return matchExpr
3300
3301 # convenience constants for positional expressions
3302 empty = Empty().setName("empty")
3303 lineStart = LineStart().setName("lineStart")
3304 lineEnd = LineEnd().setName("lineEnd")
3305 stringStart = StringStart().setName("stringStart")
3306 stringEnd = StringEnd().setName("stringEnd")
3307
3308 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3309 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3310 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3311 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3312 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3313 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3314 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3315
3316 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3317
3318 def srange(s):
3319 r"""Helper to easily define string ranges for use in Word construction. Borrows
3320 syntax from regexp '[]' string range definitions::
3321 srange("[0-9]") -> "0123456789"
3322 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3323 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3324 The input string must be enclosed in []'s, and the returned string is the expanded
3325 character set joined into a single string.
3326 The values enclosed in the []'s may be::
3327 a single character
3328 an escaped character with a leading backslash (such as \- or \])
3329 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3330 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3331 a range of any of the above, separated by a dash ('a-z', etc.)
3332 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3333 """
3334 try:
3335 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3336 except:
3337 return ""
3338
3339 def matchOnlyAtCol(n):
3340 """Helper method for defining parse actions that require matching at a specific
3341 column in the input text.
3342 """
3343 def verifyCol(strg,locn,toks):
3344 if col(locn,strg) != n:
3345 raise ParseException(strg,locn,"matched token not at column %d" % n)
3346 return verifyCol
3347
3348 def replaceWith(replStr):
3349 """Helper method for common parse actions that simply return a literal value. Especially
3350 useful when used with transformString().
3351 """
3352 def _replFunc(*args):
3353 return [replStr]
3354 return _replFunc
3355
3356 def removeQuotes(s,l,t):
3357 """Helper parse action for removing quotation marks from parsed quoted strings.
3358 To use, add this parse action to quoted string using::
3359 quotedString.setParseAction( removeQuotes )
3360 """
3361 return t[0][1:-1]
3362
3363 def upcaseTokens(s,l,t):
3364 """Helper parse action to convert tokens to upper case."""
3365 return [ tt.upper() for tt in map(_ustr,t) ]
3366
3367 def downcaseTokens(s,l,t):
3368 """Helper parse action to convert tokens to lower case."""
3369 return [ tt.lower() for tt in map(_ustr,t) ]
3370
3371 def keepOriginalText(s,startLoc,t):
3372 """Helper parse action to preserve original parsed text,
3373 overriding any nested parse actions."""
3374 try:
3375 endloc = getTokensEndLoc()
3376 except ParseException:
3377 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3378 del t[:]
3379 t += ParseResults(s[startLoc:endloc])
3380 return t
3381
3382 def getTokensEndLoc():
3383 """Method to be called from within a parse action to determine the end
3384 location of the parsed tokens."""
3385 import inspect
3386 fstack = inspect.stack()
3387 try:
3388 # search up the stack (through intervening argument normalizers) for correct calling routine
3389 for f in fstack[2:]:
3390 if f[3] == "_parseNoCache":
3391 endloc = f[0].f_locals["loc"]
3392 return endloc
3393 else:
3394 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3395 finally:
3396 del fstack
3397
3398 def _makeTags(tagStr, xml):
3399 """Internal helper to construct opening and closing tag expressions, given a tag name"""
3400 if isinstance(tagStr,basestring):
3401 resname = tagStr
3402 tagStr = Keyword(tagStr, caseless=not xml)
3403 else:
3404 resname = tagStr.name
3405
3406 tagAttrName = Word(alphas,alphanums+"_-:")
3407 if (xml):
3408 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3409 openTag = Suppress("<") + tagStr + \
3410 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
3411 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3412 else:
3413 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
3414 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
3415 openTag = Suppress("<") + tagStr + \
3416 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
3417 Optional( Suppress("=") + tagAttrValue ) ))) + \
3418 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3419 closeTag = Combine(_L("</") + tagStr + ">")
3420
3421 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3422 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3423
3424 return openTag, closeTag
3425
3426 def makeHTMLTags(tagStr):
3427 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3428 return _makeTags( tagStr, False )
3429
3430 def makeXMLTags(tagStr):
3431 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3432 return _makeTags( tagStr, True )
3433
3434 def withAttribute(*args,**attrDict):
3435 """Helper to create a validating parse action to be used with start tags created
3436 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3437 with a required attribute value, to avoid false matches on common tags such as
3438 <TD> or <DIV>.
3439
3440 Call withAttribute with a series of attribute names and values. Specify the list
3441 of filter attributes names and values as:
3442 - keyword arguments, as in (class="Customer",align="right"), or
3443 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3444 For attribute names with a namespace prefix, you must use the second form. Attribute
3445 names are matched insensitive to upper/lower case.
3446
3447 To verify that the attribute exists, but without specifying a value, pass
3448 withAttribute.ANY_VALUE as the value.
3449 """
3450 if args:
3451 attrs = args[:]
3452 else:
3453 attrs = attrDict.items()
3454 attrs = [(k,v) for k,v in attrs]
3455 def pa(s,l,tokens):
3456 for attrName,attrValue in attrs:
3457 if attrName not in tokens:
3458 raise ParseException(s,l,"no matching attribute " + attrName)
3459 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3460 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3461 (attrName, tokens[attrName], attrValue))
3462 return pa
3463 withAttribute.ANY_VALUE = object()
3464
3465 opAssoc = _Constants()
3466 opAssoc.LEFT = object()
3467 opAssoc.RIGHT = object()
3468
3469 def operatorPrecedence( baseExpr, opList ):
3470 """Helper method for constructing grammars of expressions made up of
3471 operators working in a precedence hierarchy. Operators may be unary or
3472 binary, left- or right-associative. Parse actions can also be attached
3473 to operator expressions.
3474
3475 Parameters:
3476 - baseExpr - expression representing the most basic element for the nested
3477 - opList - list of tuples, one for each operator precedence level in the
3478 expression grammar; each tuple is of the form
3479 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3480 - opExpr is the pyparsing expression for the operator;
3481 may also be a string, which will be converted to a Literal;
3482 if numTerms is 3, opExpr is a tuple of two expressions, for the
3483 two operators separating the 3 terms
3484 - numTerms is the number of terms for this operator (must
3485 be 1, 2, or 3)
3486 - rightLeftAssoc is the indicator whether the operator is
3487 right or left associative, using the pyparsing-defined
3488 constants opAssoc.RIGHT and opAssoc.LEFT.
3489 - parseAction is the parse action to be associated with
3490 expressions matching this operator expression (the
3491 parse action tuple member may be omitted)
3492 """
3493 ret = Forward()
3494 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3495 for i,operDef in enumerate(opList):
3496 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3497 if arity == 3:
3498 if opExpr is None or len(opExpr) != 2:
3499 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3500 opExpr1, opExpr2 = opExpr
3501 thisExpr = Forward()#.setName("expr%d" % i)
3502 if rightLeftAssoc == opAssoc.LEFT:
3503 if arity == 1:
3504 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3505 elif arity == 2:
3506 if opExpr is not None:
3507 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3508 else:
3509 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3510 elif arity == 3:
3511 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3512 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3513 else:
3514 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3515 elif rightLeftAssoc == opAssoc.RIGHT:
3516 if arity == 1:
3517 # try to avoid LR with this extra test
3518 if not isinstance(opExpr, Optional):
3519 opExpr = Optional(opExpr)
3520 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3521 elif arity == 2:
3522 if opExpr is not None:
3523 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3524 else:
3525 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3526 elif arity == 3:
3527 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3528 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3529 else:
3530 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3531 else:
3532 raise ValueError("operator must indicate right or left associativity")
3533 if pa:
3534 matchExpr.setParseAction( pa )
3535 thisExpr << ( matchExpr | lastExpr )
3536 lastExpr = thisExpr
3537 ret << lastExpr
3538 return ret
3539
3540 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3541 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3542 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3543 unicodeString = Combine(_L('u') + quotedString.copy())
3544
3545 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3546 """Helper method for defining nested lists enclosed in opening and closing
3547 delimiters ("(" and ")" are the default).
3548
3549 Parameters:
3550 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3551 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3552 - content - expression for items within the nested lists (default=None)
3553 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3554
3555 If an expression is not provided for the content argument, the nested
3556 expression will capture all whitespace-delimited content between delimiters
3557 as a list of separate values.
3558
3559 Use the ignoreExpr argument to define expressions that may contain
3560 opening or closing characters that should not be treated as opening
3561 or closing characters for nesting, such as quotedString or a comment
3562 expression. Specify multiple expressions using an Or or MatchFirst.
3563 The default is quotedString, but if no expressions are to be ignored,
3564 then pass None for this argument.
3565 """
3566 if opener == closer:
3567 raise ValueError("opening and closing strings cannot be the same")
3568 if content is None:
3569 if isinstance(opener,basestring) and isinstance(closer,basestring):
3570 if len(opener) == 1 and len(closer)==1:
3571 if ignoreExpr is not None:
3572 content = (Combine(OneOrMore(~ignoreExpr +
3573 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3574 ).setParseAction(lambda t:t[0].strip()))
3575 else:
3576 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3577 ).setParseAction(lambda t:t[0].strip()))
3578 else:
3579 if ignoreExpr is not None:
3580 content = (Combine(OneOrMore(~ignoreExpr +
3581 ~Literal(opener) + ~Literal(closer) +
3582 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3583 ).setParseAction(lambda t:t[0].strip()))
3584 else:
3585 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3586 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3587 ).setParseAction(lambda t:t[0].strip()))
3588 else:
3589 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3590 ret = Forward()
3591 if ignoreExpr is not None:
3592 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3593 else:
3594 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3595 return ret
3596
3597 def indentedBlock(blockStatementExpr, indentStack, indent=True):
3598 """Helper method for defining space-delimited indentation blocks, such as
3599 those used to define block statements in Python source code.
3600
3601 Parameters:
3602 - blockStatementExpr - expression defining syntax of statement that
3603 is repeated within the indented block
3604 - indentStack - list created by caller to manage indentation stack
3605 (multiple statementWithIndentedBlock expressions within a single grammar
3606 should share a common indentStack)
3607 - indent - boolean indicating whether block must be indented beyond the
3608 the current level; set to False for block of left-most statements
3609 (default=True)
3610
3611 A valid block must contain at least one blockStatement.
3612 """
3613 def checkPeerIndent(s,l,t):
3614 if l >= len(s): return
3615 curCol = col(l,s)
3616 if curCol != indentStack[-1]:
3617 if curCol > indentStack[-1]:
3618 raise ParseFatalException(s,l,"illegal nesting")
3619 raise ParseException(s,l,"not a peer entry")
3620
3621 def checkSubIndent(s,l,t):
3622 curCol = col(l,s)
3623 if curCol > indentStack[-1]:
3624 indentStack.append( curCol )
3625 else:
3626 raise ParseException(s,l,"not a subentry")
3627
3628 def checkUnindent(s,l,t):
3629 if l >= len(s): return
3630 curCol = col(l,s)
3631 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3632 raise ParseException(s,l,"not an unindent")
3633 indentStack.pop()
3634
3635 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3636 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3637 PEER = Empty().setParseAction(checkPeerIndent)
3638 UNDENT = Empty().setParseAction(checkUnindent)
3639 if indent:
3640 smExpr = Group( Optional(NL) +
3641 FollowedBy(blockStatementExpr) +
3642 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3643 else:
3644 smExpr = Group( Optional(NL) +
3645 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3646 blockStatementExpr.ignore(_bslash + LineEnd())
3647 return smExpr
3648
3649 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3650 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3651
3652 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3653 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3654 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3655 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3656
3657 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
3658 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3659
3660 htmlComment = Regex(r"<!--[\s\S]*?-->")
3661 restOfLine = Regex(r".*").leaveWhitespace()
3662 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3663 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3664
3665 javaStyleComment = cppStyleComment
3666 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3667 _noncomma = "".join( [ c for c in printables if c != "," ] )
3668 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3669 Optional( Word(" \t") +
3670 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3671 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3672
3673
3674 if __name__ == "__main__":
3675
3676 def test( teststring ):
3677 try:
3678 tokens = simpleSQL.parseString( teststring )
3679 tokenlist = tokens.asList()
3680 print (teststring + "->" + str(tokenlist))
3681 print ("tokens = " + str(tokens))
3682 print ("tokens.columns = " + str(tokens.columns))
3683 print ("tokens.tables = " + str(tokens.tables))
3684 print (tokens.asXML("SQL",True))
3685 except ParseBaseException:
3686 err = sys.exc_info()[1]
3687 print (teststring + "->")
3688 print (err.line)
3689 print (" "*(err.column-1) + "^")
3690 print (err)
3691 print()
3692
3693 selectToken = CaselessLiteral( "select" )
3694 fromToken = CaselessLiteral( "from" )
3695
3696 ident = Word( alphas, alphanums + "_$" )
3697 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3698 columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3699 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3700 tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
3701 simpleSQL = ( selectToken + \
3702 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3703 fromToken + \
3704 tableNameList.setResultsName( "tables" ) )
3705
3706 test( "SELECT * from XYZZY, ABC" )
3707 test( "select * from SYS.XYZZY" )
3708 test( "Select A from Sys.dual" )
3709 test( "Select AA,BB,CC from Sys.dual" )
3710 test( "Select A, B, C from Sys.dual" )
3711 test( "Select A, B, C from Sys.dual" )
3712 test( "Xelect A, B, C from Sys.dual" )
3713 test( "Select A, B, C frox Sys.dual" )
3714 test( "Select" )
3715 test( "Select ^^^ frox Sys.dual" )
3716 test( "Select A, B, C from Sys.dual, Table2 " )