342
|
1 # module pyparsing.py
|
|
2 #
|
|
3 # Copyright (c) 2003-2009 Paul T. McGuire
|
|
4 #
|
|
5 # Permission is hereby granted, free of charge, to any person obtaining
|
|
6 # a copy of this software and associated documentation files (the
|
|
7 # "Software"), to deal in the Software without restriction, including
|
|
8 # without limitation the rights to use, copy, modify, merge, publish,
|
|
9 # distribute, sublicense, and/or sell copies of the Software, and to
|
|
10 # permit persons to whom the Software is furnished to do so, subject to
|
|
11 # the following conditions:
|
|
12 #
|
|
13 # The above copyright notice and this permission notice shall be
|
|
14 # included in all copies or substantial portions of the Software.
|
|
15 #
|
|
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
23 #
|
|
24 #from __future__ import generators
|
|
25
|
|
26 __doc__ = \
|
|
27 """
|
|
28 pyparsing module - Classes and methods to define and execute parsing grammars
|
|
29
|
|
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
|
|
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
|
|
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
|
|
33 provides a library of classes that you use to construct the grammar directly in Python.
|
|
34
|
|
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
|
|
36
|
|
37 from pyparsing_py3 import Word, alphas
|
|
38
|
|
39 # define grammar of a greeting
|
|
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
|
|
41
|
|
42 hello = "Hello, World!"
|
|
43 print hello, "->", greet.parseString( hello )
|
|
44
|
|
45 The program outputs the following::
|
|
46
|
|
47 Hello, World! -> ['Hello', ',', 'World', '!']
|
|
48
|
|
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
|
|
50 class names, and the use of '+', '|' and '^' operators.
|
|
51
|
|
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
|
|
53 object with named attributes.
|
|
54
|
|
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
|
|
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
|
|
57 - quoted strings
|
|
58 - embedded comments
|
|
59 """
|
|
60
|
|
61 __version__ = "1.5.2.Py3"
|
|
62 __versionTime__ = "9 April 2009 12:21"
|
|
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
|
|
64
|
|
65 import string
|
|
66 from weakref import ref as wkref
|
|
67 import copy
|
|
68 import sys
|
|
69 import warnings
|
|
70 import re
|
|
71 import sre_constants
|
|
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
|
|
73
|
|
74 __all__ = [
|
|
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
|
|
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
|
|
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
|
|
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
|
|
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
|
|
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
|
|
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
|
|
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
|
|
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
|
|
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
|
|
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
|
|
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
|
|
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
|
|
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
|
|
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
|
|
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
|
|
91 'indentedBlock', 'originalTextFor',
|
|
92 ]
|
|
93
|
|
94 """
|
|
95 Detect if we are running version 3.X and make appropriate changes
|
|
96 Robert A. Clark
|
|
97 """
|
|
98 _PY3K = sys.version_info[0] > 2
|
|
99 if _PY3K:
|
|
100 _MAX_INT = sys.maxsize
|
|
101 basestring = str
|
|
102 unichr = chr
|
|
103 _ustr = str
|
|
104 _str2dict = set
|
|
105 alphas = string.ascii_lowercase + string.ascii_uppercase
|
|
106 else:
|
|
107 _MAX_INT = sys.maxint
|
|
108
|
|
109 def _ustr(obj):
|
|
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
|
|
111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
|
|
112 then < returns the unicode object | encodes it with the default encoding | ... >.
|
|
113 """
|
|
114 if isinstance(obj,unicode):
|
|
115 return obj
|
|
116
|
|
117 try:
|
|
118 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
|
|
119 # it won't break any existing code.
|
|
120 return str(obj)
|
|
121
|
|
122 except UnicodeEncodeError:
|
|
123 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
|
|
124 # state that "The return value must be a string object". However, does a
|
|
125 # unicode object (being a subclass of basestring) count as a "string
|
|
126 # object"?
|
|
127 # If so, then return a unicode object:
|
|
128 return unicode(obj)
|
|
129 # Else encode it... but how? There are many choices... :)
|
|
130 # Replace unprintables with escape codes?
|
|
131 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
|
|
132 # Replace unprintables with question marks?
|
|
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
|
|
134 # ...
|
|
135
|
|
136 def _str2dict(strg):
|
|
137 return dict( [(c,0) for c in strg] )
|
|
138
|
|
139 alphas = string.lowercase + string.uppercase
|
|
140
|
|
141
|
|
142 def _xml_escape(data):
|
|
143 """Escape &, <, >, ", ', etc. in a string of data."""
|
|
144
|
|
145 # ampersand must be replaced first
|
|
146 from_symbols = '&><"\''
|
|
147 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
|
|
148 for from_,to_ in zip(from_symbols, to_symbols):
|
|
149 data = data.replace(from_, to_)
|
|
150 return data
|
|
151
|
|
152 class _Constants(object):
|
|
153 pass
|
|
154
|
|
155 nums = string.digits
|
|
156 hexnums = nums + "ABCDEFabcdef"
|
|
157 alphanums = alphas + nums
|
|
158 _bslash = chr(92)
|
|
159 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
|
|
160
|
|
161 class ParseBaseException(Exception):
|
|
162 """base exception class for all parsing runtime exceptions"""
|
|
163 # Performance tuning: we construct a *lot* of these, so keep this
|
|
164 # constructor as small and fast as possible
|
|
165 def __init__( self, pstr, loc=0, msg=None, elem=None ):
|
|
166 self.loc = loc
|
|
167 if msg is None:
|
|
168 self.msg = pstr
|
|
169 self.pstr = ""
|
|
170 else:
|
|
171 self.msg = msg
|
|
172 self.pstr = pstr
|
|
173 self.parserElement = elem
|
|
174
|
|
175 def __getattr__( self, aname ):
|
|
176 """supported attributes by name are:
|
|
177 - lineno - returns the line number of the exception text
|
|
178 - col - returns the column number of the exception text
|
|
179 - line - returns the line containing the exception text
|
|
180 """
|
|
181 if( aname == "lineno" ):
|
|
182 return lineno( self.loc, self.pstr )
|
|
183 elif( aname in ("col", "column") ):
|
|
184 return col( self.loc, self.pstr )
|
|
185 elif( aname == "line" ):
|
|
186 return line( self.loc, self.pstr )
|
|
187 else:
|
|
188 raise AttributeError(aname)
|
|
189
|
|
190 def __str__( self ):
|
|
191 return "%s (at char %d), (line:%d, col:%d)" % \
|
|
192 ( self.msg, self.loc, self.lineno, self.column )
|
|
193 def __repr__( self ):
|
|
194 return _ustr(self)
|
|
195 def markInputline( self, markerString = ">!<" ):
|
|
196 """Extracts the exception line from the input string, and marks
|
|
197 the location of the exception with a special symbol.
|
|
198 """
|
|
199 line_str = self.line
|
|
200 line_column = self.column - 1
|
|
201 if markerString:
|
|
202 line_str = "".join( [line_str[:line_column],
|
|
203 markerString, line_str[line_column:]])
|
|
204 return line_str.strip()
|
|
205 def __dir__(self):
|
|
206 return "loc msg pstr parserElement lineno col line " \
|
|
207 "markInputLine __str__ __repr__".split()
|
|
208
|
|
209 class ParseException(ParseBaseException):
|
|
210 """exception thrown when parse expressions don't match class;
|
|
211 supported attributes by name are:
|
|
212 - lineno - returns the line number of the exception text
|
|
213 - col - returns the column number of the exception text
|
|
214 - line - returns the line containing the exception text
|
|
215 """
|
|
216 pass
|
|
217
|
|
218 class ParseFatalException(ParseBaseException):
|
|
219 """user-throwable exception thrown when inconsistent parse content
|
|
220 is found; stops all parsing immediately"""
|
|
221 pass
|
|
222
|
|
223 class ParseSyntaxException(ParseFatalException):
|
|
224 """just like ParseFatalException, but thrown internally when an
|
|
225 ErrorStop indicates that parsing is to stop immediately because
|
|
226 an unbacktrackable syntax error has been found"""
|
|
227 def __init__(self, pe):
|
|
228 super(ParseSyntaxException, self).__init__(
|
|
229 pe.pstr, pe.loc, pe.msg, pe.parserElement)
|
|
230
|
|
231 #~ class ReparseException(ParseBaseException):
|
|
232 #~ """Experimental class - parse actions can raise this exception to cause
|
|
233 #~ pyparsing to reparse the input string:
|
|
234 #~ - with a modified input string, and/or
|
|
235 #~ - with a modified start location
|
|
236 #~ Set the values of the ReparseException in the constructor, and raise the
|
|
237 #~ exception in a parse action to cause pyparsing to use the new string/location.
|
|
238 #~ Setting the values as None causes no change to be made.
|
|
239 #~ """
|
|
240 #~ def __init_( self, newstring, restartLoc ):
|
|
241 #~ self.newParseText = newstring
|
|
242 #~ self.reparseLoc = restartLoc
|
|
243
|
|
244 class RecursiveGrammarException(Exception):
|
|
245 """exception thrown by validate() if the grammar could be improperly recursive"""
|
|
246 def __init__( self, parseElementList ):
|
|
247 self.parseElementTrace = parseElementList
|
|
248
|
|
249 def __str__( self ):
|
|
250 return "RecursiveGrammarException: %s" % self.parseElementTrace
|
|
251
|
|
252 class _ParseResultsWithOffset(object):
|
|
253 def __init__(self,p1,p2):
|
|
254 self.tup = (p1,p2)
|
|
255 def __getitem__(self,i):
|
|
256 return self.tup[i]
|
|
257 def __repr__(self):
|
|
258 return repr(self.tup)
|
|
259 def setOffset(self,i):
|
|
260 self.tup = (self.tup[0],i)
|
|
261
|
|
262 class ParseResults(object):
|
|
263 """Structured parse results, to provide multiple means of access to the parsed data:
|
|
264 - as a list (len(results))
|
|
265 - by list index (results[0], results[1], etc.)
|
|
266 - by attribute (results.<resultsName>)
|
|
267 """
|
|
268 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
|
|
269 def __new__(cls, toklist, name=None, asList=True, modal=True ):
|
|
270 if isinstance(toklist, cls):
|
|
271 return toklist
|
|
272 retobj = object.__new__(cls)
|
|
273 retobj.__doinit = True
|
|
274 return retobj
|
|
275
|
|
276 # Performance tuning: we construct a *lot* of these, so keep this
|
|
277 # constructor as small and fast as possible
|
|
278 def __init__( self, toklist, name=None, asList=True, modal=True ):
|
|
279 if self.__doinit:
|
|
280 self.__doinit = False
|
|
281 self.__name = None
|
|
282 self.__parent = None
|
|
283 self.__accumNames = {}
|
|
284 if isinstance(toklist, list):
|
|
285 self.__toklist = toklist[:]
|
|
286 else:
|
|
287 self.__toklist = [toklist]
|
|
288 self.__tokdict = dict()
|
|
289
|
|
290 if name:
|
|
291 if not modal:
|
|
292 self.__accumNames[name] = 0
|
|
293 if isinstance(name,int):
|
|
294 name = _ustr(name) # will always return a str, but use _ustr for consistency
|
|
295 self.__name = name
|
|
296 if not toklist in (None,'',[]):
|
|
297 if isinstance(toklist,basestring):
|
|
298 toklist = [ toklist ]
|
|
299 if asList:
|
|
300 if isinstance(toklist,ParseResults):
|
|
301 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
|
|
302 else:
|
|
303 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
|
|
304 self[name].__name = name
|
|
305 else:
|
|
306 try:
|
|
307 self[name] = toklist[0]
|
|
308 except (KeyError,TypeError,IndexError):
|
|
309 self[name] = toklist
|
|
310
|
|
311 def __getitem__( self, i ):
|
|
312 if isinstance( i, (int,slice) ):
|
|
313 return self.__toklist[i]
|
|
314 else:
|
|
315 if i not in self.__accumNames:
|
|
316 return self.__tokdict[i][-1][0]
|
|
317 else:
|
|
318 return ParseResults([ v[0] for v in self.__tokdict[i] ])
|
|
319
|
|
320 def __setitem__( self, k, v ):
|
|
321 if isinstance(v,_ParseResultsWithOffset):
|
|
322 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
|
|
323 sub = v[0]
|
|
324 elif isinstance(k,int):
|
|
325 self.__toklist[k] = v
|
|
326 sub = v
|
|
327 else:
|
|
328 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
|
|
329 sub = v
|
|
330 if isinstance(sub,ParseResults):
|
|
331 sub.__parent = wkref(self)
|
|
332
|
|
333 def __delitem__( self, i ):
|
|
334 if isinstance(i,(int,slice)):
|
|
335 mylen = len( self.__toklist )
|
|
336 del self.__toklist[i]
|
|
337
|
|
338 # convert int to slice
|
|
339 if isinstance(i, int):
|
|
340 if i < 0:
|
|
341 i += mylen
|
|
342 i = slice(i, i+1)
|
|
343 # get removed indices
|
|
344 removed = list(range(*i.indices(mylen)))
|
|
345 removed.reverse()
|
|
346 # fixup indices in token dictionary
|
|
347 for name in self.__tokdict:
|
|
348 occurrences = self.__tokdict[name]
|
|
349 for j in removed:
|
|
350 for k, (value, position) in enumerate(occurrences):
|
|
351 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
|
|
352 else:
|
|
353 del self.__tokdict[i]
|
|
354
|
|
355 def __contains__( self, k ):
|
|
356 return k in self.__tokdict
|
|
357
|
|
358 def __len__( self ): return len( self.__toklist )
|
|
359 def __bool__(self): return len( self.__toklist ) > 0
|
|
360 __nonzero__ = __bool__
|
|
361 def __iter__( self ): return iter( self.__toklist )
|
|
362 def __reversed__( self ): return iter( reversed(self.__toklist) )
|
|
363 def keys( self ):
|
|
364 """Returns all named result keys."""
|
|
365 return self.__tokdict.keys()
|
|
366
|
|
367 def pop( self, index=-1 ):
|
|
368 """Removes and returns item at specified index (default=last).
|
|
369 Will work with either numeric indices or dict-key indicies."""
|
|
370 ret = self[index]
|
|
371 del self[index]
|
|
372 return ret
|
|
373
|
|
374 def get(self, key, defaultValue=None):
|
|
375 """Returns named result matching the given key, or if there is no
|
|
376 such name, then returns the given defaultValue or None if no
|
|
377 defaultValue is specified."""
|
|
378 if key in self:
|
|
379 return self[key]
|
|
380 else:
|
|
381 return defaultValue
|
|
382
|
|
383 def insert( self, index, insStr ):
|
|
384 self.__toklist.insert(index, insStr)
|
|
385 # fixup indices in token dictionary
|
|
386 for name in self.__tokdict:
|
|
387 occurrences = self.__tokdict[name]
|
|
388 for k, (value, position) in enumerate(occurrences):
|
|
389 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
|
|
390
|
|
391 def items( self ):
|
|
392 """Returns all named result keys and values as a list of tuples."""
|
|
393 return [(k,self[k]) for k in self.__tokdict]
|
|
394
|
|
395 def values( self ):
|
|
396 """Returns all named result values."""
|
|
397 return [ v[-1][0] for v in self.__tokdict.values() ]
|
|
398
|
|
399 def __getattr__( self, name ):
|
|
400 if name not in self.__slots__:
|
|
401 if name in self.__tokdict:
|
|
402 if name not in self.__accumNames:
|
|
403 return self.__tokdict[name][-1][0]
|
|
404 else:
|
|
405 return ParseResults([ v[0] for v in self.__tokdict[name] ])
|
|
406 else:
|
|
407 return ""
|
|
408 return None
|
|
409
|
|
410 def __add__( self, other ):
|
|
411 ret = self.copy()
|
|
412 ret += other
|
|
413 return ret
|
|
414
|
|
415 def __iadd__( self, other ):
|
|
416 if other.__tokdict:
|
|
417 offset = len(self.__toklist)
|
|
418 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
|
|
419 otheritems = other.__tokdict.items()
|
|
420 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
|
|
421 for (k,vlist) in otheritems for v in vlist]
|
|
422 for k,v in otherdictitems:
|
|
423 self[k] = v
|
|
424 if isinstance(v[0],ParseResults):
|
|
425 v[0].__parent = wkref(self)
|
|
426
|
|
427 self.__toklist += other.__toklist
|
|
428 self.__accumNames.update( other.__accumNames )
|
|
429 del other
|
|
430 return self
|
|
431
|
|
432 def __repr__( self ):
|
|
433 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
|
|
434
|
|
435 def __str__( self ):
|
|
436 out = "["
|
|
437 sep = ""
|
|
438 for i in self.__toklist:
|
|
439 if isinstance(i, ParseResults):
|
|
440 out += sep + _ustr(i)
|
|
441 else:
|
|
442 out += sep + repr(i)
|
|
443 sep = ", "
|
|
444 out += "]"
|
|
445 return out
|
|
446
|
|
447 def _asStringList( self, sep='' ):
|
|
448 out = []
|
|
449 for item in self.__toklist:
|
|
450 if out and sep:
|
|
451 out.append(sep)
|
|
452 if isinstance( item, ParseResults ):
|
|
453 out += item._asStringList()
|
|
454 else:
|
|
455 out.append( _ustr(item) )
|
|
456 return out
|
|
457
|
|
458 def asList( self ):
|
|
459 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
|
|
460 out = []
|
|
461 for res in self.__toklist:
|
|
462 if isinstance(res,ParseResults):
|
|
463 out.append( res.asList() )
|
|
464 else:
|
|
465 out.append( res )
|
|
466 return out
|
|
467
|
|
468 def asDict( self ):
|
|
469 """Returns the named parse results as dictionary."""
|
|
470 return dict( self.items() )
|
|
471
|
|
472 def copy( self ):
|
|
473 """Returns a new copy of a ParseResults object."""
|
|
474 ret = ParseResults( self.__toklist )
|
|
475 ret.__tokdict = self.__tokdict.copy()
|
|
476 ret.__parent = self.__parent
|
|
477 ret.__accumNames.update( self.__accumNames )
|
|
478 ret.__name = self.__name
|
|
479 return ret
|
|
480
|
|
481 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
|
|
482 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
|
|
483 nl = "\n"
|
|
484 out = []
|
|
485 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
|
|
486 for v in vlist ] )
|
|
487 nextLevelIndent = indent + " "
|
|
488
|
|
489 # collapse out indents if formatting is not desired
|
|
490 if not formatted:
|
|
491 indent = ""
|
|
492 nextLevelIndent = ""
|
|
493 nl = ""
|
|
494
|
|
495 selfTag = None
|
|
496 if doctag is not None:
|
|
497 selfTag = doctag
|
|
498 else:
|
|
499 if self.__name:
|
|
500 selfTag = self.__name
|
|
501
|
|
502 if not selfTag:
|
|
503 if namedItemsOnly:
|
|
504 return ""
|
|
505 else:
|
|
506 selfTag = "ITEM"
|
|
507
|
|
508 out += [ nl, indent, "<", selfTag, ">" ]
|
|
509
|
|
510 worklist = self.__toklist
|
|
511 for i,res in enumerate(worklist):
|
|
512 if isinstance(res,ParseResults):
|
|
513 if i in namedItems:
|
|
514 out += [ res.asXML(namedItems[i],
|
|
515 namedItemsOnly and doctag is None,
|
|
516 nextLevelIndent,
|
|
517 formatted)]
|
|
518 else:
|
|
519 out += [ res.asXML(None,
|
|
520 namedItemsOnly and doctag is None,
|
|
521 nextLevelIndent,
|
|
522 formatted)]
|
|
523 else:
|
|
524 # individual token, see if there is a name for it
|
|
525 resTag = None
|
|
526 if i in namedItems:
|
|
527 resTag = namedItems[i]
|
|
528 if not resTag:
|
|
529 if namedItemsOnly:
|
|
530 continue
|
|
531 else:
|
|
532 resTag = "ITEM"
|
|
533 xmlBodyText = _xml_escape(_ustr(res))
|
|
534 out += [ nl, nextLevelIndent, "<", resTag, ">",
|
|
535 xmlBodyText,
|
|
536 "</", resTag, ">" ]
|
|
537
|
|
538 out += [ nl, indent, "</", selfTag, ">" ]
|
|
539 return "".join(out)
|
|
540
|
|
541 def __lookup(self,sub):
|
|
542 for k,vlist in self.__tokdict.items():
|
|
543 for v,loc in vlist:
|
|
544 if sub is v:
|
|
545 return k
|
|
546 return None
|
|
547
|
|
548 def getName(self):
|
|
549 """Returns the results name for this token expression."""
|
|
550 if self.__name:
|
|
551 return self.__name
|
|
552 elif self.__parent:
|
|
553 par = self.__parent()
|
|
554 if par:
|
|
555 return par.__lookup(self)
|
|
556 else:
|
|
557 return None
|
|
558 elif (len(self) == 1 and
|
|
559 len(self.__tokdict) == 1 and
|
|
560 self.__tokdict.values()[0][0][1] in (0,-1)):
|
|
561 return self.__tokdict.keys()[0]
|
|
562 else:
|
|
563 return None
|
|
564
|
|
565 def dump(self,indent='',depth=0):
|
|
566 """Diagnostic method for listing out the contents of a ParseResults.
|
|
567 Accepts an optional indent argument so that this string can be embedded
|
|
568 in a nested display of other data."""
|
|
569 out = []
|
|
570 out.append( indent+_ustr(self.asList()) )
|
|
571 keys = self.items()
|
|
572 keys.sort()
|
|
573 for k,v in keys:
|
|
574 if out:
|
|
575 out.append('\n')
|
|
576 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
|
|
577 if isinstance(v,ParseResults):
|
|
578 if v.keys():
|
|
579 out.append( v.dump(indent,depth+1) )
|
|
580 else:
|
|
581 out.append(_ustr(v))
|
|
582 else:
|
|
583 out.append(_ustr(v))
|
|
584 return "".join(out)
|
|
585
|
|
586 # add support for pickle protocol
|
|
587 def __getstate__(self):
|
|
588 return ( self.__toklist,
|
|
589 ( self.__tokdict.copy(),
|
|
590 self.__parent is not None and self.__parent() or None,
|
|
591 self.__accumNames,
|
|
592 self.__name ) )
|
|
593
|
|
594 def __setstate__(self,state):
|
|
595 self.__toklist = state[0]
|
|
596 self.__tokdict, \
|
|
597 par, \
|
|
598 inAccumNames, \
|
|
599 self.__name = state[1]
|
|
600 self.__accumNames = {}
|
|
601 self.__accumNames.update(inAccumNames)
|
|
602 if par is not None:
|
|
603 self.__parent = wkref(par)
|
|
604 else:
|
|
605 self.__parent = None
|
|
606
|
|
607 def __dir__(self):
|
|
608 return dir(super(ParseResults,self)) + self.keys()
|
|
609
|
|
610 def col (loc,strg):
|
|
611 """Returns current column within a string, counting newlines as line separators.
|
|
612 The first column is number 1.
|
|
613
|
|
614 Note: the default parsing behavior is to expand tabs in the input string
|
|
615 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
|
|
616 on parsing strings containing <TAB>s, and suggested methods to maintain a
|
|
617 consistent view of the parsed string, the parse location, and line and column
|
|
618 positions within the parsed string.
|
|
619 """
|
|
620 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
|
|
621
|
|
622 def lineno(loc,strg):
|
|
623 """Returns current line number within a string, counting newlines as line separators.
|
|
624 The first line is number 1.
|
|
625
|
|
626 Note: the default parsing behavior is to expand tabs in the input string
|
|
627 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
|
|
628 on parsing strings containing <TAB>s, and suggested methods to maintain a
|
|
629 consistent view of the parsed string, the parse location, and line and column
|
|
630 positions within the parsed string.
|
|
631 """
|
|
632 return strg.count("\n",0,loc) + 1
|
|
633
|
|
634 def line( loc, strg ):
|
|
635 """Returns the line of text containing loc within a string, counting newlines as line separators.
|
|
636 """
|
|
637 lastCR = strg.rfind("\n", 0, loc)
|
|
638 nextCR = strg.find("\n", loc)
|
|
639 if nextCR > 0:
|
|
640 return strg[lastCR+1:nextCR]
|
|
641 else:
|
|
642 return strg[lastCR+1:]
|
|
643
|
|
644 def _defaultStartDebugAction( instring, loc, expr ):
|
|
645 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
|
|
646
|
|
647 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
|
|
648 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
|
|
649
|
|
650 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
|
|
651 print ("Exception raised:" + _ustr(exc))
|
|
652
|
|
653 def nullDebugAction(*args):
|
|
654 """'Do-nothing' debug action, to suppress debugging output during parsing."""
|
|
655 pass
|
|
656
|
|
657 class ParserElement(object):
|
|
658 """Abstract base level parser element class."""
|
|
659 DEFAULT_WHITE_CHARS = " \n\t\r"
|
|
660
|
|
661 def setDefaultWhitespaceChars( chars ):
|
|
662 """Overrides the default whitespace chars
|
|
663 """
|
|
664 ParserElement.DEFAULT_WHITE_CHARS = chars
|
|
665 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
|
|
666
|
|
667 def __init__( self, savelist=False ):
|
|
668 self.parseAction = list()
|
|
669 self.failAction = None
|
|
670 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
|
|
671 self.strRepr = None
|
|
672 self.resultsName = None
|
|
673 self.saveAsList = savelist
|
|
674 self.skipWhitespace = True
|
|
675 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
|
|
676 self.copyDefaultWhiteChars = True
|
|
677 self.mayReturnEmpty = False # used when checking for left-recursion
|
|
678 self.keepTabs = False
|
|
679 self.ignoreExprs = list()
|
|
680 self.debug = False
|
|
681 self.streamlined = False
|
|
682 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
|
|
683 self.errmsg = ""
|
|
684 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
|
|
685 self.debugActions = ( None, None, None ) #custom debug actions
|
|
686 self.re = None
|
|
687 self.callPreparse = True # used to avoid redundant calls to preParse
|
|
688 self.callDuringTry = False
|
|
689
|
|
690 def copy( self ):
|
|
691 """Make a copy of this ParserElement. Useful for defining different parse actions
|
|
692 for the same parsing pattern, using copies of the original parse element."""
|
|
693 cpy = copy.copy( self )
|
|
694 cpy.parseAction = self.parseAction[:]
|
|
695 cpy.ignoreExprs = self.ignoreExprs[:]
|
|
696 if self.copyDefaultWhiteChars:
|
|
697 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
|
|
698 return cpy
|
|
699
|
|
700 def setName( self, name ):
|
|
701 """Define name for this expression, for use in debugging."""
|
|
702 self.name = name
|
|
703 self.errmsg = "Expected " + self.name
|
|
704 if hasattr(self,"exception"):
|
|
705 self.exception.msg = self.errmsg
|
|
706 return self
|
|
707
|
|
708 def setResultsName( self, name, listAllMatches=False ):
|
|
709 """Define name for referencing matching tokens as a nested attribute
|
|
710 of the returned parse results.
|
|
711 NOTE: this returns a *copy* of the original ParserElement object;
|
|
712 this is so that the client can define a basic element, such as an
|
|
713 integer, and reference it in multiple places with different names.
|
|
714 """
|
|
715 newself = self.copy()
|
|
716 newself.resultsName = name
|
|
717 newself.modalResults = not listAllMatches
|
|
718 return newself
|
|
719
|
|
720 def setBreak(self,breakFlag = True):
|
|
721 """Method to invoke the Python pdb debugger when this element is
|
|
722 about to be parsed. Set breakFlag to True to enable, False to
|
|
723 disable.
|
|
724 """
|
|
725 if breakFlag:
|
|
726 _parseMethod = self._parse
|
|
727 def breaker(instring, loc, doActions=True, callPreParse=True):
|
|
728 import pdb
|
|
729 pdb.set_trace()
|
|
730 return _parseMethod( instring, loc, doActions, callPreParse )
|
|
731 breaker._originalParseMethod = _parseMethod
|
|
732 self._parse = breaker
|
|
733 else:
|
|
734 if hasattr(self._parse,"_originalParseMethod"):
|
|
735 self._parse = self._parse._originalParseMethod
|
|
736 return self
|
|
737
|
|
738 def _normalizeParseActionArgs( f ):
|
|
739 """Internal method used to decorate parse actions that take fewer than 3 arguments,
|
|
740 so that all parse actions can be called as f(s,l,t)."""
|
|
741 STAR_ARGS = 4
|
|
742
|
|
743 try:
|
|
744 restore = None
|
|
745 if isinstance(f,type):
|
|
746 restore = f
|
|
747 f = f.__init__
|
|
748 if not _PY3K:
|
|
749 codeObj = f.func_code
|
|
750 else:
|
|
751 codeObj = f.code
|
|
752 if codeObj.co_flags & STAR_ARGS:
|
|
753 return f
|
|
754 numargs = codeObj.co_argcount
|
|
755 if not _PY3K:
|
|
756 if hasattr(f,"im_self"):
|
|
757 numargs -= 1
|
|
758 else:
|
|
759 if hasattr(f,"__self__"):
|
|
760 numargs -= 1
|
|
761 if restore:
|
|
762 f = restore
|
|
763 except AttributeError:
|
|
764 try:
|
|
765 if not _PY3K:
|
|
766 call_im_func_code = f.__call__.im_func.func_code
|
|
767 else:
|
|
768 call_im_func_code = f.__code__
|
|
769
|
|
770 # not a function, must be a callable object, get info from the
|
|
771 # im_func binding of its bound __call__ method
|
|
772 if call_im_func_code.co_flags & STAR_ARGS:
|
|
773 return f
|
|
774 numargs = call_im_func_code.co_argcount
|
|
775 if not _PY3K:
|
|
776 if hasattr(f.__call__,"im_self"):
|
|
777 numargs -= 1
|
|
778 else:
|
|
779 if hasattr(f.__call__,"__self__"):
|
|
780 numargs -= 0
|
|
781 except AttributeError:
|
|
782 if not _PY3K:
|
|
783 call_func_code = f.__call__.func_code
|
|
784 else:
|
|
785 call_func_code = f.__call__.__code__
|
|
786 # not a bound method, get info directly from __call__ method
|
|
787 if call_func_code.co_flags & STAR_ARGS:
|
|
788 return f
|
|
789 numargs = call_func_code.co_argcount
|
|
790 if not _PY3K:
|
|
791 if hasattr(f.__call__,"im_self"):
|
|
792 numargs -= 1
|
|
793 else:
|
|
794 if hasattr(f.__call__,"__self__"):
|
|
795 numargs -= 1
|
|
796
|
|
797
|
|
798 #~ print ("adding function %s with %d args" % (f.func_name,numargs))
|
|
799 if numargs == 3:
|
|
800 return f
|
|
801 else:
|
|
802 if numargs > 3:
|
|
803 def tmp(s,l,t):
|
|
804 return f(f.__call__.__self__, s,l,t)
|
|
805 if numargs == 2:
|
|
806 def tmp(s,l,t):
|
|
807 return f(l,t)
|
|
808 elif numargs == 1:
|
|
809 def tmp(s,l,t):
|
|
810 return f(t)
|
|
811 else: #~ numargs == 0:
|
|
812 def tmp(s,l,t):
|
|
813 return f()
|
|
814 try:
|
|
815 tmp.__name__ = f.__name__
|
|
816 except (AttributeError,TypeError):
|
|
817 # no need for special handling if attribute doesnt exist
|
|
818 pass
|
|
819 try:
|
|
820 tmp.__doc__ = f.__doc__
|
|
821 except (AttributeError,TypeError):
|
|
822 # no need for special handling if attribute doesnt exist
|
|
823 pass
|
|
824 try:
|
|
825 tmp.__dict__.update(f.__dict__)
|
|
826 except (AttributeError,TypeError):
|
|
827 # no need for special handling if attribute doesnt exist
|
|
828 pass
|
|
829 return tmp
|
|
830 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
|
|
831
|
|
832 def setParseAction( self, *fns, **kwargs ):
|
|
833 """Define action to perform when successfully matching parse element definition.
|
|
834 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
|
|
835 fn(loc,toks), fn(toks), or just fn(), where:
|
|
836 - s = the original string being parsed (see note below)
|
|
837 - loc = the location of the matching substring
|
|
838 - toks = a list of the matched tokens, packaged as a ParseResults object
|
|
839 If the functions in fns modify the tokens, they can return them as the return
|
|
840 value from fn, and the modified list of tokens will replace the original.
|
|
841 Otherwise, fn does not need to return any value.
|
|
842
|
|
843 Note: the default parsing behavior is to expand tabs in the input string
|
|
844 before starting the parsing process. See L{I{parseString}<parseString>} for more information
|
|
845 on parsing strings containing <TAB>s, and suggested methods to maintain a
|
|
846 consistent view of the parsed string, the parse location, and line and column
|
|
847 positions within the parsed string.
|
|
848 """
|
|
849 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
|
|
850 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
|
851 return self
|
|
852
|
|
853 def addParseAction( self, *fns, **kwargs ):
|
|
854 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
|
|
855 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
|
|
856 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
|
857 return self
|
|
858
|
|
859 def setFailAction( self, fn ):
|
|
860 """Define action to perform if parsing fails at this expression.
|
|
861 Fail acton fn is a callable function that takes the arguments
|
|
862 fn(s,loc,expr,err) where:
|
|
863 - s = string being parsed
|
|
864 - loc = location where expression match was attempted and failed
|
|
865 - expr = the parse expression that failed
|
|
866 - err = the exception thrown
|
|
867 The function returns no value. It may throw ParseFatalException
|
|
868 if it is desired to stop parsing immediately."""
|
|
869 self.failAction = fn
|
|
870 return self
|
|
871
|
|
872 def _skipIgnorables( self, instring, loc ):
|
|
873 exprsFound = True
|
|
874 while exprsFound:
|
|
875 exprsFound = False
|
|
876 for e in self.ignoreExprs:
|
|
877 try:
|
|
878 while 1:
|
|
879 loc,dummy = e._parse( instring, loc )
|
|
880 exprsFound = True
|
|
881 except ParseException:
|
|
882 pass
|
|
883 return loc
|
|
884
|
|
885 def preParse( self, instring, loc ):
|
|
886 if self.ignoreExprs:
|
|
887 loc = self._skipIgnorables( instring, loc )
|
|
888
|
|
889 if self.skipWhitespace:
|
|
890 wt = self.whiteChars
|
|
891 instrlen = len(instring)
|
|
892 while loc < instrlen and instring[loc] in wt:
|
|
893 loc += 1
|
|
894
|
|
895 return loc
|
|
896
|
|
897 def parseImpl( self, instring, loc, doActions=True ):
|
|
898 return loc, []
|
|
899
|
|
900 def postParse( self, instring, loc, tokenlist ):
|
|
901 return tokenlist
|
|
902
|
|
903 #~ @profile
|
|
904 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
|
|
905 debugging = ( self.debug ) #and doActions )
|
|
906
|
|
907 if debugging or self.failAction:
|
|
908 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
|
|
909 if (self.debugActions[0] ):
|
|
910 self.debugActions[0]( instring, loc, self )
|
|
911 if callPreParse and self.callPreparse:
|
|
912 preloc = self.preParse( instring, loc )
|
|
913 else:
|
|
914 preloc = loc
|
|
915 tokensStart = loc
|
|
916 try:
|
|
917 try:
|
|
918 loc,tokens = self.parseImpl( instring, preloc, doActions )
|
|
919 except IndexError:
|
|
920 raise ParseException( instring, len(instring), self.errmsg, self )
|
|
921 except ParseBaseException:
|
|
922 #~ print ("Exception raised:", err)
|
|
923 err = None
|
|
924 if self.debugActions[2]:
|
|
925 err = sys.exc_info()[1]
|
|
926 self.debugActions[2]( instring, tokensStart, self, err )
|
|
927 if self.failAction:
|
|
928 if err is None:
|
|
929 err = sys.exc_info()[1]
|
|
930 self.failAction( instring, tokensStart, self, err )
|
|
931 raise
|
|
932 else:
|
|
933 if callPreParse and self.callPreparse:
|
|
934 preloc = self.preParse( instring, loc )
|
|
935 else:
|
|
936 preloc = loc
|
|
937 tokensStart = loc
|
|
938 if self.mayIndexError or loc >= len(instring):
|
|
939 try:
|
|
940 loc,tokens = self.parseImpl( instring, preloc, doActions )
|
|
941 except IndexError:
|
|
942 raise ParseException( instring, len(instring), self.errmsg, self )
|
|
943 else:
|
|
944 loc,tokens = self.parseImpl( instring, preloc, doActions )
|
|
945
|
|
946 tokens = self.postParse( instring, loc, tokens )
|
|
947
|
|
948 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
|
|
949 if self.parseAction and (doActions or self.callDuringTry):
|
|
950 if debugging:
|
|
951 try:
|
|
952 for fn in self.parseAction:
|
|
953 tokens = fn( instring, tokensStart, retTokens )
|
|
954 if tokens is not None:
|
|
955 retTokens = ParseResults( tokens,
|
|
956 self.resultsName,
|
|
957 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
|
|
958 modal=self.modalResults )
|
|
959 except ParseBaseException:
|
|
960 #~ print "Exception raised in user parse action:", err
|
|
961 if (self.debugActions[2] ):
|
|
962 err = sys.exc_info()[1]
|
|
963 self.debugActions[2]( instring, tokensStart, self, err )
|
|
964 raise
|
|
965 else:
|
|
966 for fn in self.parseAction:
|
|
967 tokens = fn( instring, tokensStart, retTokens )
|
|
968 if tokens is not None:
|
|
969 retTokens = ParseResults( tokens,
|
|
970 self.resultsName,
|
|
971 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
|
|
972 modal=self.modalResults )
|
|
973
|
|
974 if debugging:
|
|
975 #~ print ("Matched",self,"->",retTokens.asList())
|
|
976 if (self.debugActions[1] ):
|
|
977 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
|
|
978
|
|
979 return loc, retTokens
|
|
980
|
|
981 def tryParse( self, instring, loc ):
|
|
982 try:
|
|
983 return self._parse( instring, loc, doActions=False )[0]
|
|
984 except ParseFatalException:
|
|
985 raise ParseException( instring, loc, self.errmsg, self)
|
|
986
|
|
987 # this method gets repeatedly called during backtracking with the same arguments -
|
|
988 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
|
|
989 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
|
|
990 lookup = (self,instring,loc,callPreParse,doActions)
|
|
991 if lookup in ParserElement._exprArgCache:
|
|
992 value = ParserElement._exprArgCache[ lookup ]
|
|
993 if isinstance(value,Exception):
|
|
994 raise value
|
|
995 return value
|
|
996 else:
|
|
997 try:
|
|
998 value = self._parseNoCache( instring, loc, doActions, callPreParse )
|
|
999 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
|
|
1000 return value
|
|
1001 except ParseBaseException:
|
|
1002 pe = sys.exc_info()[1]
|
|
1003 ParserElement._exprArgCache[ lookup ] = pe
|
|
1004 raise
|
|
1005
|
|
1006 _parse = _parseNoCache
|
|
1007
|
|
1008 # argument cache for optimizing repeated calls when backtracking through recursive expressions
|
|
1009 _exprArgCache = {}
|
|
1010 def resetCache():
|
|
1011 ParserElement._exprArgCache.clear()
|
|
1012 resetCache = staticmethod(resetCache)
|
|
1013
|
|
1014 _packratEnabled = False
|
|
1015 def enablePackrat():
|
|
1016 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
|
|
1017 Repeated parse attempts at the same string location (which happens
|
|
1018 often in many complex grammars) can immediately return a cached value,
|
|
1019 instead of re-executing parsing/validating code. Memoizing is done of
|
|
1020 both valid results and parsing exceptions.
|
|
1021
|
|
1022 This speedup may break existing programs that use parse actions that
|
|
1023 have side-effects. For this reason, packrat parsing is disabled when
|
|
1024 you first import pyparsing_py3 as pyparsing. To activate the packrat feature, your
|
|
1025 program must call the class method ParserElement.enablePackrat(). If
|
|
1026 your program uses psyco to "compile as you go", you must call
|
|
1027 enablePackrat before calling psyco.full(). If you do not do this,
|
|
1028 Python will crash. For best results, call enablePackrat() immediately
|
|
1029 after importing pyparsing.
|
|
1030 """
|
|
1031 if not ParserElement._packratEnabled:
|
|
1032 ParserElement._packratEnabled = True
|
|
1033 ParserElement._parse = ParserElement._parseCache
|
|
1034 enablePackrat = staticmethod(enablePackrat)
|
|
1035
|
|
1036 def parseString( self, instring, parseAll=False ):
|
|
1037 """Execute the parse expression with the given string.
|
|
1038 This is the main interface to the client code, once the complete
|
|
1039 expression has been built.
|
|
1040
|
|
1041 If you want the grammar to require that the entire input string be
|
|
1042 successfully parsed, then set parseAll to True (equivalent to ending
|
|
1043 the grammar with StringEnd()).
|
|
1044
|
|
1045 Note: parseString implicitly calls expandtabs() on the input string,
|
|
1046 in order to report proper column numbers in parse actions.
|
|
1047 If the input string contains tabs and
|
|
1048 the grammar uses parse actions that use the loc argument to index into the
|
|
1049 string being parsed, you can ensure you have a consistent view of the input
|
|
1050 string by:
|
|
1051 - calling parseWithTabs on your grammar before calling parseString
|
|
1052 (see L{I{parseWithTabs}<parseWithTabs>})
|
|
1053 - define your parse action using the full (s,loc,toks) signature, and
|
|
1054 reference the input string using the parse action's s argument
|
|
1055 - explictly expand the tabs in your input string before calling
|
|
1056 parseString
|
|
1057 """
|
|
1058 ParserElement.resetCache()
|
|
1059 if not self.streamlined:
|
|
1060 self.streamline()
|
|
1061 #~ self.saveAsList = True
|
|
1062 for e in self.ignoreExprs:
|
|
1063 e.streamline()
|
|
1064 if not self.keepTabs:
|
|
1065 instring = instring.expandtabs()
|
|
1066 try:
|
|
1067 loc, tokens = self._parse( instring, 0 )
|
|
1068 if parseAll:
|
|
1069 loc = self.preParse( instring, loc )
|
|
1070 StringEnd()._parse( instring, loc )
|
|
1071 except ParseBaseException:
|
|
1072 exc = sys.exc_info()[1]
|
|
1073 # catch and re-raise exception from here, clears out pyparsing internal stack trace
|
|
1074 raise exc
|
|
1075 else:
|
|
1076 return tokens
|
|
1077
|
|
1078 def scanString( self, instring, maxMatches=_MAX_INT ):
|
|
1079 """Scan the input string for expression matches. Each match will return the
|
|
1080 matching tokens, start location, and end location. May be called with optional
|
|
1081 maxMatches argument, to clip scanning after 'n' matches are found.
|
|
1082
|
|
1083 Note that the start and end locations are reported relative to the string
|
|
1084 being parsed. See L{I{parseString}<parseString>} for more information on parsing
|
|
1085 strings with embedded tabs."""
|
|
1086 if not self.streamlined:
|
|
1087 self.streamline()
|
|
1088 for e in self.ignoreExprs:
|
|
1089 e.streamline()
|
|
1090
|
|
1091 if not self.keepTabs:
|
|
1092 instring = _ustr(instring).expandtabs()
|
|
1093 instrlen = len(instring)
|
|
1094 loc = 0
|
|
1095 preparseFn = self.preParse
|
|
1096 parseFn = self._parse
|
|
1097 ParserElement.resetCache()
|
|
1098 matches = 0
|
|
1099 try:
|
|
1100 while loc <= instrlen and matches < maxMatches:
|
|
1101 try:
|
|
1102 preloc = preparseFn( instring, loc )
|
|
1103 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
|
|
1104 except ParseException:
|
|
1105 loc = preloc+1
|
|
1106 else:
|
|
1107 if nextLoc > loc:
|
|
1108 matches += 1
|
|
1109 yield tokens, preloc, nextLoc
|
|
1110 loc = nextLoc
|
|
1111 else:
|
|
1112 loc = preloc+1
|
|
1113 except ParseBaseException:
|
|
1114 pe = sys.exc_info()[1]
|
|
1115 raise pe
|
|
1116
|
|
1117 def transformString( self, instring ):
|
|
1118 """Extension to scanString, to modify matching text with modified tokens that may
|
|
1119 be returned from a parse action. To use transformString, define a grammar and
|
|
1120 attach a parse action to it that modifies the returned token list.
|
|
1121 Invoking transformString() on a target string will then scan for matches,
|
|
1122 and replace the matched text patterns according to the logic in the parse
|
|
1123 action. transformString() returns the resulting transformed string."""
|
|
1124 out = []
|
|
1125 lastE = 0
|
|
1126 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
|
|
1127 # keep string locs straight between transformString and scanString
|
|
1128 self.keepTabs = True
|
|
1129 try:
|
|
1130 for t,s,e in self.scanString( instring ):
|
|
1131 out.append( instring[lastE:s] )
|
|
1132 if t:
|
|
1133 if isinstance(t,ParseResults):
|
|
1134 out += t.asList()
|
|
1135 elif isinstance(t,list):
|
|
1136 out += t
|
|
1137 else:
|
|
1138 out.append(t)
|
|
1139 lastE = e
|
|
1140 out.append(instring[lastE:])
|
|
1141 return "".join(map(_ustr,out))
|
|
1142 except ParseBaseException:
|
|
1143 pe = sys.exc_info()[1]
|
|
1144 raise pe
|
|
1145
|
|
1146 def searchString( self, instring, maxMatches=_MAX_INT ):
|
|
1147 """Another extension to scanString, simplifying the access to the tokens found
|
|
1148 to match the given parse expression. May be called with optional
|
|
1149 maxMatches argument, to clip searching after 'n' matches are found.
|
|
1150 """
|
|
1151 try:
|
|
1152 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
|
|
1153 except ParseBaseException:
|
|
1154 pe = sys.exc_info()[1]
|
|
1155 raise pe
|
|
1156
|
|
1157 def __add__(self, other ):
|
|
1158 """Implementation of + operator - returns And"""
|
|
1159 if isinstance( other, basestring ):
|
|
1160 other = Literal( other )
|
|
1161 if not isinstance( other, ParserElement ):
|
|
1162 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1163 SyntaxWarning, stacklevel=2)
|
|
1164 return None
|
|
1165 return And( [ self, other ] )
|
|
1166
|
|
1167 def __radd__(self, other ):
|
|
1168 """Implementation of + operator when left operand is not a ParserElement"""
|
|
1169 if isinstance( other, basestring ):
|
|
1170 other = Literal( other )
|
|
1171 if not isinstance( other, ParserElement ):
|
|
1172 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1173 SyntaxWarning, stacklevel=2)
|
|
1174 return None
|
|
1175 return other + self
|
|
1176
|
|
1177 def __sub__(self, other):
|
|
1178 """Implementation of - operator, returns And with error stop"""
|
|
1179 if isinstance( other, basestring ):
|
|
1180 other = Literal( other )
|
|
1181 if not isinstance( other, ParserElement ):
|
|
1182 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1183 SyntaxWarning, stacklevel=2)
|
|
1184 return None
|
|
1185 return And( [ self, And._ErrorStop(), other ] )
|
|
1186
|
|
1187 def __rsub__(self, other ):
|
|
1188 """Implementation of - operator when left operand is not a ParserElement"""
|
|
1189 if isinstance( other, basestring ):
|
|
1190 other = Literal( other )
|
|
1191 if not isinstance( other, ParserElement ):
|
|
1192 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1193 SyntaxWarning, stacklevel=2)
|
|
1194 return None
|
|
1195 return other - self
|
|
1196
|
|
1197 def __mul__(self,other):
|
|
1198 if isinstance(other,int):
|
|
1199 minElements, optElements = other,0
|
|
1200 elif isinstance(other,tuple):
|
|
1201 other = (other + (None, None))[:2]
|
|
1202 if other[0] is None:
|
|
1203 other = (0, other[1])
|
|
1204 if isinstance(other[0],int) and other[1] is None:
|
|
1205 if other[0] == 0:
|
|
1206 return ZeroOrMore(self)
|
|
1207 if other[0] == 1:
|
|
1208 return OneOrMore(self)
|
|
1209 else:
|
|
1210 return self*other[0] + ZeroOrMore(self)
|
|
1211 elif isinstance(other[0],int) and isinstance(other[1],int):
|
|
1212 minElements, optElements = other
|
|
1213 optElements -= minElements
|
|
1214 else:
|
|
1215 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
|
|
1216 else:
|
|
1217 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
|
|
1218
|
|
1219 if minElements < 0:
|
|
1220 raise ValueError("cannot multiply ParserElement by negative value")
|
|
1221 if optElements < 0:
|
|
1222 raise ValueError("second tuple value must be greater or equal to first tuple value")
|
|
1223 if minElements == optElements == 0:
|
|
1224 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
|
|
1225
|
|
1226 if (optElements):
|
|
1227 def makeOptionalList(n):
|
|
1228 if n>1:
|
|
1229 return Optional(self + makeOptionalList(n-1))
|
|
1230 else:
|
|
1231 return Optional(self)
|
|
1232 if minElements:
|
|
1233 if minElements == 1:
|
|
1234 ret = self + makeOptionalList(optElements)
|
|
1235 else:
|
|
1236 ret = And([self]*minElements) + makeOptionalList(optElements)
|
|
1237 else:
|
|
1238 ret = makeOptionalList(optElements)
|
|
1239 else:
|
|
1240 if minElements == 1:
|
|
1241 ret = self
|
|
1242 else:
|
|
1243 ret = And([self]*minElements)
|
|
1244 return ret
|
|
1245
|
|
1246 def __rmul__(self, other):
|
|
1247 return self.__mul__(other)
|
|
1248
|
|
1249 def __or__(self, other ):
|
|
1250 """Implementation of | operator - returns MatchFirst"""
|
|
1251 if isinstance( other, basestring ):
|
|
1252 other = Literal( other )
|
|
1253 if not isinstance( other, ParserElement ):
|
|
1254 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1255 SyntaxWarning, stacklevel=2)
|
|
1256 return None
|
|
1257 return MatchFirst( [ self, other ] )
|
|
1258
|
|
1259 def __ror__(self, other ):
|
|
1260 """Implementation of | operator when left operand is not a ParserElement"""
|
|
1261 if isinstance( other, basestring ):
|
|
1262 other = Literal( other )
|
|
1263 if not isinstance( other, ParserElement ):
|
|
1264 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1265 SyntaxWarning, stacklevel=2)
|
|
1266 return None
|
|
1267 return other | self
|
|
1268
|
|
1269 def __xor__(self, other ):
|
|
1270 """Implementation of ^ operator - returns Or"""
|
|
1271 if isinstance( other, basestring ):
|
|
1272 other = Literal( other )
|
|
1273 if not isinstance( other, ParserElement ):
|
|
1274 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1275 SyntaxWarning, stacklevel=2)
|
|
1276 return None
|
|
1277 return Or( [ self, other ] )
|
|
1278
|
|
1279 def __rxor__(self, other ):
|
|
1280 """Implementation of ^ operator when left operand is not a ParserElement"""
|
|
1281 if isinstance( other, basestring ):
|
|
1282 other = Literal( other )
|
|
1283 if not isinstance( other, ParserElement ):
|
|
1284 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1285 SyntaxWarning, stacklevel=2)
|
|
1286 return None
|
|
1287 return other ^ self
|
|
1288
|
|
1289 def __and__(self, other ):
|
|
1290 """Implementation of & operator - returns Each"""
|
|
1291 if isinstance( other, basestring ):
|
|
1292 other = Literal( other )
|
|
1293 if not isinstance( other, ParserElement ):
|
|
1294 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1295 SyntaxWarning, stacklevel=2)
|
|
1296 return None
|
|
1297 return Each( [ self, other ] )
|
|
1298
|
|
1299 def __rand__(self, other ):
|
|
1300 """Implementation of & operator when left operand is not a ParserElement"""
|
|
1301 if isinstance( other, basestring ):
|
|
1302 other = Literal( other )
|
|
1303 if not isinstance( other, ParserElement ):
|
|
1304 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
|
|
1305 SyntaxWarning, stacklevel=2)
|
|
1306 return None
|
|
1307 return other & self
|
|
1308
|
|
1309 def __invert__( self ):
|
|
1310 """Implementation of ~ operator - returns NotAny"""
|
|
1311 return NotAny( self )
|
|
1312
|
|
1313 def __call__(self, name):
|
|
1314 """Shortcut for setResultsName, with listAllMatches=default::
|
|
1315 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
|
|
1316 could be written as::
|
|
1317 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
|
|
1318 """
|
|
1319 return self.setResultsName(name)
|
|
1320
|
|
1321 def suppress( self ):
|
|
1322 """Suppresses the output of this ParserElement; useful to keep punctuation from
|
|
1323 cluttering up returned output.
|
|
1324 """
|
|
1325 return Suppress( self )
|
|
1326
|
|
1327 def leaveWhitespace( self ):
|
|
1328 """Disables the skipping of whitespace before matching the characters in the
|
|
1329 ParserElement's defined pattern. This is normally only used internally by
|
|
1330 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
|
|
1331 """
|
|
1332 self.skipWhitespace = False
|
|
1333 return self
|
|
1334
|
|
1335 def setWhitespaceChars( self, chars ):
|
|
1336 """Overrides the default whitespace chars
|
|
1337 """
|
|
1338 self.skipWhitespace = True
|
|
1339 self.whiteChars = chars
|
|
1340 self.copyDefaultWhiteChars = False
|
|
1341 return self
|
|
1342
|
|
1343 def parseWithTabs( self ):
|
|
1344 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
|
|
1345 Must be called before parseString when the input grammar contains elements that
|
|
1346 match <TAB> characters."""
|
|
1347 self.keepTabs = True
|
|
1348 return self
|
|
1349
|
|
1350 def ignore( self, other ):
|
|
1351 """Define expression to be ignored (e.g., comments) while doing pattern
|
|
1352 matching; may be called repeatedly, to define multiple comment or other
|
|
1353 ignorable patterns.
|
|
1354 """
|
|
1355 if isinstance( other, Suppress ):
|
|
1356 if other not in self.ignoreExprs:
|
|
1357 self.ignoreExprs.append( other )
|
|
1358 else:
|
|
1359 self.ignoreExprs.append( Suppress( other ) )
|
|
1360 return self
|
|
1361
|
|
1362 def setDebugActions( self, startAction, successAction, exceptionAction ):
|
|
1363 """Enable display of debugging messages while doing pattern matching."""
|
|
1364 self.debugActions = (startAction or _defaultStartDebugAction,
|
|
1365 successAction or _defaultSuccessDebugAction,
|
|
1366 exceptionAction or _defaultExceptionDebugAction)
|
|
1367 self.debug = True
|
|
1368 return self
|
|
1369
|
|
1370 def setDebug( self, flag=True ):
|
|
1371 """Enable display of debugging messages while doing pattern matching.
|
|
1372 Set flag to True to enable, False to disable."""
|
|
1373 if flag:
|
|
1374 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
|
|
1375 else:
|
|
1376 self.debug = False
|
|
1377 return self
|
|
1378
|
|
1379 def __str__( self ):
|
|
1380 return self.name
|
|
1381
|
|
1382 def __repr__( self ):
|
|
1383 return _ustr(self)
|
|
1384
|
|
1385 def streamline( self ):
|
|
1386 self.streamlined = True
|
|
1387 self.strRepr = None
|
|
1388 return self
|
|
1389
|
|
1390 def checkRecursion( self, parseElementList ):
|
|
1391 pass
|
|
1392
|
|
1393 def validate( self, validateTrace=[] ):
|
|
1394 """Check defined expressions for valid structure, check for infinite recursive definitions."""
|
|
1395 self.checkRecursion( [] )
|
|
1396
|
|
1397 def parseFile( self, file_or_filename, parseAll=False ):
|
|
1398 """Execute the parse expression on the given file or filename.
|
|
1399 If a filename is specified (instead of a file object),
|
|
1400 the entire file is opened, read, and closed before parsing.
|
|
1401 """
|
|
1402 try:
|
|
1403 file_contents = file_or_filename.read()
|
|
1404 except AttributeError:
|
|
1405 f = open(file_or_filename, "rb")
|
|
1406 file_contents = f.read()
|
|
1407 f.close()
|
|
1408 try:
|
|
1409 return self.parseString(file_contents, parseAll)
|
|
1410 except ParseBaseException:
|
|
1411 # catch and re-raise exception from here, clears out pyparsing internal stack trace
|
|
1412 exc = sys.exc_info()[1]
|
|
1413 raise exc
|
|
1414
|
|
1415 def getException(self):
|
|
1416 return ParseException("",0,self.errmsg,self)
|
|
1417
|
|
1418 def __getattr__(self,aname):
|
|
1419 if aname == "myException":
|
|
1420 self.myException = ret = self.getException();
|
|
1421 return ret;
|
|
1422 else:
|
|
1423 raise AttributeError("no such attribute " + aname)
|
|
1424
|
|
1425 def __eq__(self,other):
|
|
1426 if isinstance(other, ParserElement):
|
|
1427 return self is other or self.__dict__ == other.__dict__
|
|
1428 elif isinstance(other, basestring):
|
|
1429 try:
|
|
1430 self.parseString(_ustr(other), parseAll=True)
|
|
1431 return True
|
|
1432 except ParseBaseException:
|
|
1433 return False
|
|
1434 else:
|
|
1435 return super(ParserElement,self)==other
|
|
1436
|
|
1437 def __ne__(self,other):
|
|
1438 return not (self == other)
|
|
1439
|
|
1440 def __hash__(self):
|
|
1441 return hash(id(self))
|
|
1442
|
|
1443 def __req__(self,other):
|
|
1444 return self == other
|
|
1445
|
|
1446 def __rne__(self,other):
|
|
1447 return not (self == other)
|
|
1448
|
|
1449
|
|
1450 class Token(ParserElement):
|
|
1451 """Abstract ParserElement subclass, for defining atomic matching patterns."""
|
|
1452 def __init__( self ):
|
|
1453 super(Token,self).__init__( savelist=False )
|
|
1454 #self.myException = ParseException("",0,"",self)
|
|
1455
|
|
1456 def setName(self, name):
|
|
1457 s = super(Token,self).setName(name)
|
|
1458 self.errmsg = "Expected " + self.name
|
|
1459 #s.myException.msg = self.errmsg
|
|
1460 return s
|
|
1461
|
|
1462
|
|
1463 class Empty(Token):
|
|
1464 """An empty token, will always match."""
|
|
1465 def __init__( self ):
|
|
1466 super(Empty,self).__init__()
|
|
1467 self.name = "Empty"
|
|
1468 self.mayReturnEmpty = True
|
|
1469 self.mayIndexError = False
|
|
1470
|
|
1471
|
|
1472 class NoMatch(Token):
|
|
1473 """A token that will never match."""
|
|
1474 def __init__( self ):
|
|
1475 super(NoMatch,self).__init__()
|
|
1476 self.name = "NoMatch"
|
|
1477 self.mayReturnEmpty = True
|
|
1478 self.mayIndexError = False
|
|
1479 self.errmsg = "Unmatchable token"
|
|
1480 #self.myException.msg = self.errmsg
|
|
1481
|
|
1482 def parseImpl( self, instring, loc, doActions=True ):
|
|
1483 exc = self.myException
|
|
1484 exc.loc = loc
|
|
1485 exc.pstr = instring
|
|
1486 raise exc
|
|
1487
|
|
1488
|
|
1489 class Literal(Token):
|
|
1490 """Token to exactly match a specified string."""
|
|
1491 def __init__( self, matchString ):
|
|
1492 super(Literal,self).__init__()
|
|
1493 self.match = matchString
|
|
1494 self.matchLen = len(matchString)
|
|
1495 try:
|
|
1496 self.firstMatchChar = matchString[0]
|
|
1497 except IndexError:
|
|
1498 warnings.warn("null string passed to Literal; use Empty() instead",
|
|
1499 SyntaxWarning, stacklevel=2)
|
|
1500 self.__class__ = Empty
|
|
1501 self.name = '"%s"' % _ustr(self.match)
|
|
1502 self.errmsg = "Expected " + self.name
|
|
1503 self.mayReturnEmpty = False
|
|
1504 #self.myException.msg = self.errmsg
|
|
1505 self.mayIndexError = False
|
|
1506
|
|
1507 # Performance tuning: this routine gets called a *lot*
|
|
1508 # if this is a single character match string and the first character matches,
|
|
1509 # short-circuit as quickly as possible, and avoid calling startswith
|
|
1510 #~ @profile
|
|
1511 def parseImpl( self, instring, loc, doActions=True ):
|
|
1512 if (instring[loc] == self.firstMatchChar and
|
|
1513 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
|
|
1514 return loc+self.matchLen, self.match
|
|
1515 #~ raise ParseException( instring, loc, self.errmsg )
|
|
1516 exc = self.myException
|
|
1517 exc.loc = loc
|
|
1518 exc.pstr = instring
|
|
1519 raise exc
|
|
1520 _L = Literal
|
|
1521
|
|
1522 class Keyword(Token):
|
|
1523 """Token to exactly match a specified string as a keyword, that is, it must be
|
|
1524 immediately followed by a non-keyword character. Compare with Literal::
|
|
1525 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
|
|
1526 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
|
|
1527 Accepts two optional constructor arguments in addition to the keyword string:
|
|
1528 identChars is a string of characters that would be valid identifier characters,
|
|
1529 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
|
|
1530 matching, default is False.
|
|
1531 """
|
|
1532 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
|
|
1533
|
|
1534 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
|
|
1535 super(Keyword,self).__init__()
|
|
1536 self.match = matchString
|
|
1537 self.matchLen = len(matchString)
|
|
1538 try:
|
|
1539 self.firstMatchChar = matchString[0]
|
|
1540 except IndexError:
|
|
1541 warnings.warn("null string passed to Keyword; use Empty() instead",
|
|
1542 SyntaxWarning, stacklevel=2)
|
|
1543 self.name = '"%s"' % self.match
|
|
1544 self.errmsg = "Expected " + self.name
|
|
1545 self.mayReturnEmpty = False
|
|
1546 #self.myException.msg = self.errmsg
|
|
1547 self.mayIndexError = False
|
|
1548 self.caseless = caseless
|
|
1549 if caseless:
|
|
1550 self.caselessmatch = matchString.upper()
|
|
1551 identChars = identChars.upper()
|
|
1552 self.identChars = _str2dict(identChars)
|
|
1553
|
|
1554 def parseImpl( self, instring, loc, doActions=True ):
|
|
1555 if self.caseless:
|
|
1556 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
|
|
1557 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
|
|
1558 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
|
|
1559 return loc+self.matchLen, self.match
|
|
1560 else:
|
|
1561 if (instring[loc] == self.firstMatchChar and
|
|
1562 (self.matchLen==1 or instring.startswith(self.match,loc)) and
|
|
1563 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
|
|
1564 (loc == 0 or instring[loc-1] not in self.identChars) ):
|
|
1565 return loc+self.matchLen, self.match
|
|
1566 #~ raise ParseException( instring, loc, self.errmsg )
|
|
1567 exc = self.myException
|
|
1568 exc.loc = loc
|
|
1569 exc.pstr = instring
|
|
1570 raise exc
|
|
1571
|
|
1572 def copy(self):
|
|
1573 c = super(Keyword,self).copy()
|
|
1574 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
|
|
1575 return c
|
|
1576
|
|
1577 def setDefaultKeywordChars( chars ):
|
|
1578 """Overrides the default Keyword chars
|
|
1579 """
|
|
1580 Keyword.DEFAULT_KEYWORD_CHARS = chars
|
|
1581 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
|
|
1582
|
|
1583 class CaselessLiteral(Literal):
|
|
1584 """Token to match a specified string, ignoring case of letters.
|
|
1585 Note: the matched results will always be in the case of the given
|
|
1586 match string, NOT the case of the input text.
|
|
1587 """
|
|
1588 def __init__( self, matchString ):
|
|
1589 super(CaselessLiteral,self).__init__( matchString.upper() )
|
|
1590 # Preserve the defining literal.
|
|
1591 self.returnString = matchString
|
|
1592 self.name = "'%s'" % self.returnString
|
|
1593 self.errmsg = "Expected " + self.name
|
|
1594 #self.myException.msg = self.errmsg
|
|
1595
|
|
1596 def parseImpl( self, instring, loc, doActions=True ):
|
|
1597 if instring[ loc:loc+self.matchLen ].upper() == self.match:
|
|
1598 return loc+self.matchLen, self.returnString
|
|
1599 #~ raise ParseException( instring, loc, self.errmsg )
|
|
1600 exc = self.myException
|
|
1601 exc.loc = loc
|
|
1602 exc.pstr = instring
|
|
1603 raise exc
|
|
1604
|
|
1605 class CaselessKeyword(Keyword):
|
|
1606 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
|
|
1607 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
|
|
1608
|
|
1609 def parseImpl( self, instring, loc, doActions=True ):
|
|
1610 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
|
|
1611 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
|
|
1612 return loc+self.matchLen, self.match
|
|
1613 #~ raise ParseException( instring, loc, self.errmsg )
|
|
1614 exc = self.myException
|
|
1615 exc.loc = loc
|
|
1616 exc.pstr = instring
|
|
1617 raise exc
|
|
1618
|
|
1619 class Word(Token):
|
|
1620 """Token for matching words composed of allowed character sets.
|
|
1621 Defined with string containing all allowed initial characters,
|
|
1622 an optional string containing allowed body characters (if omitted,
|
|
1623 defaults to the initial character set), and an optional minimum,
|
|
1624 maximum, and/or exact length. The default value for min is 1 (a
|
|
1625 minimum value < 1 is not valid); the default values for max and exact
|
|
1626 are 0, meaning no maximum or exact length restriction.
|
|
1627 """
|
|
1628 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
|
|
1629 super(Word,self).__init__()
|
|
1630 self.initCharsOrig = initChars
|
|
1631 self.initChars = _str2dict(initChars)
|
|
1632 if bodyChars :
|
|
1633 self.bodyCharsOrig = bodyChars
|
|
1634 self.bodyChars = _str2dict(bodyChars)
|
|
1635 else:
|
|
1636 self.bodyCharsOrig = initChars
|
|
1637 self.bodyChars = _str2dict(initChars)
|
|
1638
|
|
1639 self.maxSpecified = max > 0
|
|
1640
|
|
1641 if min < 1:
|
|
1642 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
|
|
1643
|
|
1644 self.minLen = min
|
|
1645
|
|
1646 if max > 0:
|
|
1647 self.maxLen = max
|
|
1648 else:
|
|
1649 self.maxLen = _MAX_INT
|
|
1650
|
|
1651 if exact > 0:
|
|
1652 self.maxLen = exact
|
|
1653 self.minLen = exact
|
|
1654
|
|
1655 self.name = _ustr(self)
|
|
1656 self.errmsg = "Expected " + self.name
|
|
1657 #self.myException.msg = self.errmsg
|
|
1658 self.mayIndexError = False
|
|
1659 self.asKeyword = asKeyword
|
|
1660
|
|
1661 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
|
|
1662 if self.bodyCharsOrig == self.initCharsOrig:
|
|
1663 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
|
|
1664 elif len(self.bodyCharsOrig) == 1:
|
|
1665 self.reString = "%s[%s]*" % \
|
|
1666 (re.escape(self.initCharsOrig),
|
|
1667 _escapeRegexRangeChars(self.bodyCharsOrig),)
|
|
1668 else:
|
|
1669 self.reString = "[%s][%s]*" % \
|
|
1670 (_escapeRegexRangeChars(self.initCharsOrig),
|
|
1671 _escapeRegexRangeChars(self.bodyCharsOrig),)
|
|
1672 if self.asKeyword:
|
|
1673 self.reString = r"\b"+self.reString+r"\b"
|
|
1674 try:
|
|
1675 self.re = re.compile( self.reString )
|
|
1676 except:
|
|
1677 self.re = None
|
|
1678
|
|
1679 def parseImpl( self, instring, loc, doActions=True ):
|
|
1680 if self.re:
|
|
1681 result = self.re.match(instring,loc)
|
|
1682 if not result:
|
|
1683 exc = self.myException
|
|
1684 exc.loc = loc
|
|
1685 exc.pstr = instring
|
|
1686 raise exc
|
|
1687
|
|
1688 loc = result.end()
|
|
1689 return loc,result.group()
|
|
1690
|
|
1691 if not(instring[ loc ] in self.initChars):
|
|
1692 #~ raise ParseException( instring, loc, self.errmsg )
|
|
1693 exc = self.myException
|
|
1694 exc.loc = loc
|
|
1695 exc.pstr = instring
|
|
1696 raise exc
|
|
1697 start = loc
|
|
1698 loc += 1
|
|
1699 instrlen = len(instring)
|
|
1700 bodychars = self.bodyChars
|
|
1701 maxloc = start + self.maxLen
|
|
1702 maxloc = min( maxloc, instrlen )
|
|
1703 while loc < maxloc and instring[loc] in bodychars:
|
|
1704 loc += 1
|
|
1705
|
|
1706 throwException = False
|
|
1707 if loc - start < self.minLen:
|
|
1708 throwException = True
|
|
1709 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
|
|
1710 throwException = True
|
|
1711 if self.asKeyword:
|
|
1712 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
|
|
1713 throwException = True
|
|
1714
|
|
1715 if throwException:
|
|
1716 #~ raise ParseException( instring, loc, self.errmsg )
|
|
1717 exc = self.myException
|
|
1718 exc.loc = loc
|
|
1719 exc.pstr = instring
|
|
1720 raise exc
|
|
1721
|
|
1722 return loc, instring[start:loc]
|
|
1723
|
|
1724 def __str__( self ):
|
|
1725 try:
|
|
1726 return super(Word,self).__str__()
|
|
1727 except:
|
|
1728 pass
|
|
1729
|
|
1730
|
|
1731 if self.strRepr is None:
|
|
1732
|
|
1733 def charsAsStr(s):
|
|
1734 if len(s)>4:
|
|
1735 return s[:4]+"..."
|
|
1736 else:
|
|
1737 return s
|
|
1738
|
|
1739 if ( self.initCharsOrig != self.bodyCharsOrig ):
|
|
1740 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
|
|
1741 else:
|
|
1742 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
|
|
1743
|
|
1744 return self.strRepr
|
|
1745
|
|
1746
|
|
1747 class Regex(Token):
|
|
1748 """Token for matching strings that match a given regular expression.
|
|
1749 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
|
|
1750 """
|
|
1751 def __init__( self, pattern, flags=0):
|
|
1752 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
|
|
1753 super(Regex,self).__init__()
|
|
1754
|
|
1755 if len(pattern) == 0:
|
|
1756 warnings.warn("null string passed to Regex; use Empty() instead",
|
|
1757 SyntaxWarning, stacklevel=2)
|
|
1758
|
|
1759 self.pattern = pattern
|
|
1760 self.flags = flags
|
|
1761
|
|
1762 try:
|
|
1763 self.re = re.compile(self.pattern, self.flags)
|
|
1764 self.reString = self.pattern
|
|
1765 except sre_constants.error:
|
|
1766 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
|
|
1767 SyntaxWarning, stacklevel=2)
|
|
1768 raise
|
|
1769
|
|
1770 self.name = _ustr(self)
|
|
1771 self.errmsg = "Expected " + self.name
|
|
1772 #self.myException.msg = self.errmsg
|
|
1773 self.mayIndexError = False
|
|
1774 self.mayReturnEmpty = True
|
|
1775
|
|
1776 def parseImpl( self, instring, loc, doActions=True ):
|
|
1777 result = self.re.match(instring,loc)
|
|
1778 if not result:
|
|
1779 exc = self.myException
|
|
1780 exc.loc = loc
|
|
1781 exc.pstr = instring
|
|
1782 raise exc
|
|
1783
|
|
1784 loc = result.end()
|
|
1785 d = result.groupdict()
|
|
1786 ret = ParseResults(result.group())
|
|
1787 if d:
|
|
1788 for k in d:
|
|
1789 ret[k] = d[k]
|
|
1790 return loc,ret
|
|
1791
|
|
1792 def __str__( self ):
|
|
1793 try:
|
|
1794 return super(Regex,self).__str__()
|
|
1795 except:
|
|
1796 pass
|
|
1797
|
|
1798 if self.strRepr is None:
|
|
1799 self.strRepr = "Re:(%s)" % repr(self.pattern)
|
|
1800
|
|
1801 return self.strRepr
|
|
1802
|
|
1803
|
|
1804 class QuotedString(Token):
|
|
1805 """Token for matching strings that are delimited by quoting characters.
|
|
1806 """
|
|
1807 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
|
|
1808 """
|
|
1809 Defined with the following parameters:
|
|
1810 - quoteChar - string of one or more characters defining the quote delimiting string
|
|
1811 - escChar - character to escape quotes, typically backslash (default=None)
|
|
1812 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
|
|
1813 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
|
|
1814 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
|
|
1815 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
|
|
1816 """
|
|
1817 super(QuotedString,self).__init__()
|
|
1818
|
|
1819 # remove white space from quote chars - wont work anyway
|
|
1820 quoteChar = quoteChar.strip()
|
|
1821 if len(quoteChar) == 0:
|
|
1822 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
|
|
1823 raise SyntaxError()
|
|
1824
|
|
1825 if endQuoteChar is None:
|
|
1826 endQuoteChar = quoteChar
|
|
1827 else:
|
|
1828 endQuoteChar = endQuoteChar.strip()
|
|
1829 if len(endQuoteChar) == 0:
|
|
1830 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
|
|
1831 raise SyntaxError()
|
|
1832
|
|
1833 self.quoteChar = quoteChar
|
|
1834 self.quoteCharLen = len(quoteChar)
|
|
1835 self.firstQuoteChar = quoteChar[0]
|
|
1836 self.endQuoteChar = endQuoteChar
|
|
1837 self.endQuoteCharLen = len(endQuoteChar)
|
|
1838 self.escChar = escChar
|
|
1839 self.escQuote = escQuote
|
|
1840 self.unquoteResults = unquoteResults
|
|
1841
|
|
1842 if multiline:
|
|
1843 self.flags = re.MULTILINE | re.DOTALL
|
|
1844 self.pattern = r'%s(?:[^%s%s]' % \
|
|
1845 ( re.escape(self.quoteChar),
|
|
1846 _escapeRegexRangeChars(self.endQuoteChar[0]),
|
|
1847 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
|
|
1848 else:
|
|
1849 self.flags = 0
|
|
1850 self.pattern = r'%s(?:[^%s\n\r%s]' % \
|
|
1851 ( re.escape(self.quoteChar),
|
|
1852 _escapeRegexRangeChars(self.endQuoteChar[0]),
|
|
1853 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
|
|
1854 if len(self.endQuoteChar) > 1:
|
|
1855 self.pattern += (
|
|
1856 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
|
|
1857 _escapeRegexRangeChars(self.endQuoteChar[i]))
|
|
1858 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
|
|
1859 )
|
|
1860 if escQuote:
|
|
1861 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
|
|
1862 if escChar:
|
|
1863 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
|
|
1864 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
|
|
1865 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
|
|
1866
|
|
1867 try:
|
|
1868 self.re = re.compile(self.pattern, self.flags)
|
|
1869 self.reString = self.pattern
|
|
1870 except sre_constants.error:
|
|
1871 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
|
|
1872 SyntaxWarning, stacklevel=2)
|
|
1873 raise
|
|
1874
|
|
1875 self.name = _ustr(self)
|
|
1876 self.errmsg = "Expected " + self.name
|
|
1877 #self.myException.msg = self.errmsg
|
|
1878 self.mayIndexError = False
|
|
1879 self.mayReturnEmpty = True
|
|
1880
|
|
1881 def parseImpl( self, instring, loc, doActions=True ):
|
|
1882 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
|
|
1883 if not result:
|
|
1884 exc = self.myException
|
|
1885 exc.loc = loc
|
|
1886 exc.pstr = instring
|
|
1887 raise exc
|
|
1888
|
|
1889 loc = result.end()
|
|
1890 ret = result.group()
|
|
1891
|
|
1892 if self.unquoteResults:
|
|
1893
|
|
1894 # strip off quotes
|
|
1895 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
|
|
1896
|
|
1897 if isinstance(ret,basestring):
|
|
1898 # replace escaped characters
|
|
1899 if self.escChar:
|
|
1900 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
|
|
1901
|
|
1902 # replace escaped quotes
|
|
1903 if self.escQuote:
|
|
1904 ret = ret.replace(self.escQuote, self.endQuoteChar)
|
|
1905
|
|
1906 return loc, ret
|
|
1907
|
|
1908 def __str__( self ):
|
|
1909 try:
|
|
1910 return super(QuotedString,self).__str__()
|
|
1911 except:
|
|
1912 pass
|
|
1913
|
|
1914 if self.strRepr is None:
|
|
1915 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
|
|
1916
|
|
1917 return self.strRepr
|
|
1918
|
|
1919
|
|
1920 class CharsNotIn(Token):
|
|
1921 """Token for matching words composed of characters *not* in a given set.
|
|
1922 Defined with string containing all disallowed characters, and an optional
|
|
1923 minimum, maximum, and/or exact length. The default value for min is 1 (a
|
|
1924 minimum value < 1 is not valid); the default values for max and exact
|
|
1925 are 0, meaning no maximum or exact length restriction.
|
|
1926 """
|
|
1927 def __init__( self, notChars, min=1, max=0, exact=0 ):
|
|
1928 super(CharsNotIn,self).__init__()
|
|
1929 self.skipWhitespace = False
|
|
1930 self.notChars = notChars
|
|
1931
|
|
1932 if min < 1:
|
|
1933 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
|
|
1934
|
|
1935 self.minLen = min
|
|
1936
|
|
1937 if max > 0:
|
|
1938 self.maxLen = max
|
|
1939 else:
|
|
1940 self.maxLen = _MAX_INT
|
|
1941
|
|
1942 if exact > 0:
|
|
1943 self.maxLen = exact
|
|
1944 self.minLen = exact
|
|
1945
|
|
1946 self.name = _ustr(self)
|
|
1947 self.errmsg = "Expected " + self.name
|
|
1948 self.mayReturnEmpty = ( self.minLen == 0 )
|
|
1949 #self.myException.msg = self.errmsg
|
|
1950 self.mayIndexError = False
|
|
1951
|
|
1952 def parseImpl( self, instring, loc, doActions=True ):
|
|
1953 if instring[loc] in self.notChars:
|
|
1954 #~ raise ParseException( instring, loc, self.errmsg )
|
|
1955 exc = self.myException
|
|
1956 exc.loc = loc
|
|
1957 exc.pstr = instring
|
|
1958 raise exc
|
|
1959
|
|
1960 start = loc
|
|
1961 loc += 1
|
|
1962 notchars = self.notChars
|
|
1963 maxlen = min( start+self.maxLen, len(instring) )
|
|
1964 while loc < maxlen and \
|
|
1965 (instring[loc] not in notchars):
|
|
1966 loc += 1
|
|
1967
|
|
1968 if loc - start < self.minLen:
|
|
1969 #~ raise ParseException( instring, loc, self.errmsg )
|
|
1970 exc = self.myException
|
|
1971 exc.loc = loc
|
|
1972 exc.pstr = instring
|
|
1973 raise exc
|
|
1974
|
|
1975 return loc, instring[start:loc]
|
|
1976
|
|
1977 def __str__( self ):
|
|
1978 try:
|
|
1979 return super(CharsNotIn, self).__str__()
|
|
1980 except:
|
|
1981 pass
|
|
1982
|
|
1983 if self.strRepr is None:
|
|
1984 if len(self.notChars) > 4:
|
|
1985 self.strRepr = "!W:(%s...)" % self.notChars[:4]
|
|
1986 else:
|
|
1987 self.strRepr = "!W:(%s)" % self.notChars
|
|
1988
|
|
1989 return self.strRepr
|
|
1990
|
|
1991 class White(Token):
|
|
1992 """Special matching class for matching whitespace. Normally, whitespace is ignored
|
|
1993 by pyparsing grammars. This class is included when some whitespace structures
|
|
1994 are significant. Define with a string containing the whitespace characters to be
|
|
1995 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
|
|
1996 as defined for the Word class."""
|
|
1997 whiteStrs = {
|
|
1998 " " : "<SPC>",
|
|
1999 "\t": "<TAB>",
|
|
2000 "\n": "<LF>",
|
|
2001 "\r": "<CR>",
|
|
2002 "\f": "<FF>",
|
|
2003 }
|
|
2004 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
|
|
2005 super(White,self).__init__()
|
|
2006 self.matchWhite = ws
|
|
2007 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
|
|
2008 #~ self.leaveWhitespace()
|
|
2009 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
|
|
2010 self.mayReturnEmpty = True
|
|
2011 self.errmsg = "Expected " + self.name
|
|
2012 #self.myException.msg = self.errmsg
|
|
2013
|
|
2014 self.minLen = min
|
|
2015
|
|
2016 if max > 0:
|
|
2017 self.maxLen = max
|
|
2018 else:
|
|
2019 self.maxLen = _MAX_INT
|
|
2020
|
|
2021 if exact > 0:
|
|
2022 self.maxLen = exact
|
|
2023 self.minLen = exact
|
|
2024
|
|
2025 def parseImpl( self, instring, loc, doActions=True ):
|
|
2026 if not(instring[ loc ] in self.matchWhite):
|
|
2027 #~ raise ParseException( instring, loc, self.errmsg )
|
|
2028 exc = self.myException
|
|
2029 exc.loc = loc
|
|
2030 exc.pstr = instring
|
|
2031 raise exc
|
|
2032 start = loc
|
|
2033 loc += 1
|
|
2034 maxloc = start + self.maxLen
|
|
2035 maxloc = min( maxloc, len(instring) )
|
|
2036 while loc < maxloc and instring[loc] in self.matchWhite:
|
|
2037 loc += 1
|
|
2038
|
|
2039 if loc - start < self.minLen:
|
|
2040 #~ raise ParseException( instring, loc, self.errmsg )
|
|
2041 exc = self.myException
|
|
2042 exc.loc = loc
|
|
2043 exc.pstr = instring
|
|
2044 raise exc
|
|
2045
|
|
2046 return loc, instring[start:loc]
|
|
2047
|
|
2048
|
|
2049 class _PositionToken(Token):
|
|
2050 def __init__( self ):
|
|
2051 super(_PositionToken,self).__init__()
|
|
2052 self.name=self.__class__.__name__
|
|
2053 self.mayReturnEmpty = True
|
|
2054 self.mayIndexError = False
|
|
2055
|
|
2056 class GoToColumn(_PositionToken):
|
|
2057 """Token to advance to a specific column of input text; useful for tabular report scraping."""
|
|
2058 def __init__( self, colno ):
|
|
2059 super(GoToColumn,self).__init__()
|
|
2060 self.col = colno
|
|
2061
|
|
2062 def preParse( self, instring, loc ):
|
|
2063 if col(loc,instring) != self.col:
|
|
2064 instrlen = len(instring)
|
|
2065 if self.ignoreExprs:
|
|
2066 loc = self._skipIgnorables( instring, loc )
|
|
2067 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
|
|
2068 loc += 1
|
|
2069 return loc
|
|
2070
|
|
2071 def parseImpl( self, instring, loc, doActions=True ):
|
|
2072 thiscol = col( loc, instring )
|
|
2073 if thiscol > self.col:
|
|
2074 raise ParseException( instring, loc, "Text not in expected column", self )
|
|
2075 newloc = loc + self.col - thiscol
|
|
2076 ret = instring[ loc: newloc ]
|
|
2077 return newloc, ret
|
|
2078
|
|
2079 class LineStart(_PositionToken):
|
|
2080 """Matches if current position is at the beginning of a line within the parse string"""
|
|
2081 def __init__( self ):
|
|
2082 super(LineStart,self).__init__()
|
|
2083 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
|
2084 self.errmsg = "Expected start of line"
|
|
2085 #self.myException.msg = self.errmsg
|
|
2086
|
|
2087 def preParse( self, instring, loc ):
|
|
2088 preloc = super(LineStart,self).preParse(instring,loc)
|
|
2089 if instring[preloc] == "\n":
|
|
2090 loc += 1
|
|
2091 return loc
|
|
2092
|
|
2093 def parseImpl( self, instring, loc, doActions=True ):
|
|
2094 if not( loc==0 or
|
|
2095 (loc == self.preParse( instring, 0 )) or
|
|
2096 (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
|
|
2097 #~ raise ParseException( instring, loc, "Expected start of line" )
|
|
2098 exc = self.myException
|
|
2099 exc.loc = loc
|
|
2100 exc.pstr = instring
|
|
2101 raise exc
|
|
2102 return loc, []
|
|
2103
|
|
2104 class LineEnd(_PositionToken):
|
|
2105 """Matches if current position is at the end of a line within the parse string"""
|
|
2106 def __init__( self ):
|
|
2107 super(LineEnd,self).__init__()
|
|
2108 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
|
2109 self.errmsg = "Expected end of line"
|
|
2110 #self.myException.msg = self.errmsg
|
|
2111
|
|
2112 def parseImpl( self, instring, loc, doActions=True ):
|
|
2113 if loc<len(instring):
|
|
2114 if instring[loc] == "\n":
|
|
2115 return loc+1, "\n"
|
|
2116 else:
|
|
2117 #~ raise ParseException( instring, loc, "Expected end of line" )
|
|
2118 exc = self.myException
|
|
2119 exc.loc = loc
|
|
2120 exc.pstr = instring
|
|
2121 raise exc
|
|
2122 elif loc == len(instring):
|
|
2123 return loc+1, []
|
|
2124 else:
|
|
2125 exc = self.myException
|
|
2126 exc.loc = loc
|
|
2127 exc.pstr = instring
|
|
2128 raise exc
|
|
2129
|
|
2130 class StringStart(_PositionToken):
|
|
2131 """Matches if current position is at the beginning of the parse string"""
|
|
2132 def __init__( self ):
|
|
2133 super(StringStart,self).__init__()
|
|
2134 self.errmsg = "Expected start of text"
|
|
2135 #self.myException.msg = self.errmsg
|
|
2136
|
|
2137 def parseImpl( self, instring, loc, doActions=True ):
|
|
2138 if loc != 0:
|
|
2139 # see if entire string up to here is just whitespace and ignoreables
|
|
2140 if loc != self.preParse( instring, 0 ):
|
|
2141 #~ raise ParseException( instring, loc, "Expected start of text" )
|
|
2142 exc = self.myException
|
|
2143 exc.loc = loc
|
|
2144 exc.pstr = instring
|
|
2145 raise exc
|
|
2146 return loc, []
|
|
2147
|
|
2148 class StringEnd(_PositionToken):
|
|
2149 """Matches if current position is at the end of the parse string"""
|
|
2150 def __init__( self ):
|
|
2151 super(StringEnd,self).__init__()
|
|
2152 self.errmsg = "Expected end of text"
|
|
2153 #self.myException.msg = self.errmsg
|
|
2154
|
|
2155 def parseImpl( self, instring, loc, doActions=True ):
|
|
2156 if loc < len(instring):
|
|
2157 #~ raise ParseException( instring, loc, "Expected end of text" )
|
|
2158 exc = self.myException
|
|
2159 exc.loc = loc
|
|
2160 exc.pstr = instring
|
|
2161 raise exc
|
|
2162 elif loc == len(instring):
|
|
2163 return loc+1, []
|
|
2164 elif loc > len(instring):
|
|
2165 return loc, []
|
|
2166 else:
|
|
2167 exc = self.myException
|
|
2168 exc.loc = loc
|
|
2169 exc.pstr = instring
|
|
2170 raise exc
|
|
2171
|
|
2172 class WordStart(_PositionToken):
|
|
2173 """Matches if the current position is at the beginning of a Word, and
|
|
2174 is not preceded by any character in a given set of wordChars
|
|
2175 (default=printables). To emulate the \b behavior of regular expressions,
|
|
2176 use WordStart(alphanums). WordStart will also match at the beginning of
|
|
2177 the string being parsed, or at the beginning of a line.
|
|
2178 """
|
|
2179 def __init__(self, wordChars = printables):
|
|
2180 super(WordStart,self).__init__()
|
|
2181 self.wordChars = _str2dict(wordChars)
|
|
2182 self.errmsg = "Not at the start of a word"
|
|
2183
|
|
2184 def parseImpl(self, instring, loc, doActions=True ):
|
|
2185 if loc != 0:
|
|
2186 if (instring[loc-1] in self.wordChars or
|
|
2187 instring[loc] not in self.wordChars):
|
|
2188 exc = self.myException
|
|
2189 exc.loc = loc
|
|
2190 exc.pstr = instring
|
|
2191 raise exc
|
|
2192 return loc, []
|
|
2193
|
|
2194 class WordEnd(_PositionToken):
|
|
2195 """Matches if the current position is at the end of a Word, and
|
|
2196 is not followed by any character in a given set of wordChars
|
|
2197 (default=printables). To emulate the \b behavior of regular expressions,
|
|
2198 use WordEnd(alphanums). WordEnd will also match at the end of
|
|
2199 the string being parsed, or at the end of a line.
|
|
2200 """
|
|
2201 def __init__(self, wordChars = printables):
|
|
2202 super(WordEnd,self).__init__()
|
|
2203 self.wordChars = _str2dict(wordChars)
|
|
2204 self.skipWhitespace = False
|
|
2205 self.errmsg = "Not at the end of a word"
|
|
2206
|
|
2207 def parseImpl(self, instring, loc, doActions=True ):
|
|
2208 instrlen = len(instring)
|
|
2209 if instrlen>0 and loc<instrlen:
|
|
2210 if (instring[loc] in self.wordChars or
|
|
2211 instring[loc-1] not in self.wordChars):
|
|
2212 #~ raise ParseException( instring, loc, "Expected end of word" )
|
|
2213 exc = self.myException
|
|
2214 exc.loc = loc
|
|
2215 exc.pstr = instring
|
|
2216 raise exc
|
|
2217 return loc, []
|
|
2218
|
|
2219
|
|
2220 class ParseExpression(ParserElement):
|
|
2221 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
|
|
2222 def __init__( self, exprs, savelist = False ):
|
|
2223 super(ParseExpression,self).__init__(savelist)
|
|
2224 if isinstance( exprs, list ):
|
|
2225 self.exprs = exprs
|
|
2226 elif isinstance( exprs, basestring ):
|
|
2227 self.exprs = [ Literal( exprs ) ]
|
|
2228 else:
|
|
2229 try:
|
|
2230 self.exprs = list( exprs )
|
|
2231 except TypeError:
|
|
2232 self.exprs = [ exprs ]
|
|
2233 self.callPreparse = False
|
|
2234
|
|
2235 def __getitem__( self, i ):
|
|
2236 return self.exprs[i]
|
|
2237
|
|
2238 def append( self, other ):
|
|
2239 self.exprs.append( other )
|
|
2240 self.strRepr = None
|
|
2241 return self
|
|
2242
|
|
2243 def leaveWhitespace( self ):
|
|
2244 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
|
|
2245 all contained expressions."""
|
|
2246 self.skipWhitespace = False
|
|
2247 self.exprs = [ e.copy() for e in self.exprs ]
|
|
2248 for e in self.exprs:
|
|
2249 e.leaveWhitespace()
|
|
2250 return self
|
|
2251
|
|
2252 def ignore( self, other ):
|
|
2253 if isinstance( other, Suppress ):
|
|
2254 if other not in self.ignoreExprs:
|
|
2255 super( ParseExpression, self).ignore( other )
|
|
2256 for e in self.exprs:
|
|
2257 e.ignore( self.ignoreExprs[-1] )
|
|
2258 else:
|
|
2259 super( ParseExpression, self).ignore( other )
|
|
2260 for e in self.exprs:
|
|
2261 e.ignore( self.ignoreExprs[-1] )
|
|
2262 return self
|
|
2263
|
|
2264 def __str__( self ):
|
|
2265 try:
|
|
2266 return super(ParseExpression,self).__str__()
|
|
2267 except:
|
|
2268 pass
|
|
2269
|
|
2270 if self.strRepr is None:
|
|
2271 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
|
|
2272 return self.strRepr
|
|
2273
|
|
2274 def streamline( self ):
|
|
2275 super(ParseExpression,self).streamline()
|
|
2276
|
|
2277 for e in self.exprs:
|
|
2278 e.streamline()
|
|
2279
|
|
2280 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
|
|
2281 # but only if there are no parse actions or resultsNames on the nested And's
|
|
2282 # (likewise for Or's and MatchFirst's)
|
|
2283 if ( len(self.exprs) == 2 ):
|
|
2284 other = self.exprs[0]
|
|
2285 if ( isinstance( other, self.__class__ ) and
|
|
2286 not(other.parseAction) and
|
|
2287 other.resultsName is None and
|
|
2288 not other.debug ):
|
|
2289 self.exprs = other.exprs[:] + [ self.exprs[1] ]
|
|
2290 self.strRepr = None
|
|
2291 self.mayReturnEmpty |= other.mayReturnEmpty
|
|
2292 self.mayIndexError |= other.mayIndexError
|
|
2293
|
|
2294 other = self.exprs[-1]
|
|
2295 if ( isinstance( other, self.__class__ ) and
|
|
2296 not(other.parseAction) and
|
|
2297 other.resultsName is None and
|
|
2298 not other.debug ):
|
|
2299 self.exprs = self.exprs[:-1] + other.exprs[:]
|
|
2300 self.strRepr = None
|
|
2301 self.mayReturnEmpty |= other.mayReturnEmpty
|
|
2302 self.mayIndexError |= other.mayIndexError
|
|
2303
|
|
2304 return self
|
|
2305
|
|
2306 def setResultsName( self, name, listAllMatches=False ):
|
|
2307 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
|
|
2308 return ret
|
|
2309
|
|
2310 def validate( self, validateTrace=[] ):
|
|
2311 tmp = validateTrace[:]+[self]
|
|
2312 for e in self.exprs:
|
|
2313 e.validate(tmp)
|
|
2314 self.checkRecursion( [] )
|
|
2315
|
|
2316 class And(ParseExpression):
|
|
2317 """Requires all given ParseExpressions to be found in the given order.
|
|
2318 Expressions may be separated by whitespace.
|
|
2319 May be constructed using the '+' operator.
|
|
2320 """
|
|
2321
|
|
2322 class _ErrorStop(Empty):
|
|
2323 def __init__(self, *args, **kwargs):
|
|
2324 super(Empty,self).__init__(*args, **kwargs)
|
|
2325 self.leaveWhitespace()
|
|
2326
|
|
2327 def __init__( self, exprs, savelist = True ):
|
|
2328 super(And,self).__init__(exprs, savelist)
|
|
2329 self.mayReturnEmpty = True
|
|
2330 for e in self.exprs:
|
|
2331 if not e.mayReturnEmpty:
|
|
2332 self.mayReturnEmpty = False
|
|
2333 break
|
|
2334 self.setWhitespaceChars( exprs[0].whiteChars )
|
|
2335 self.skipWhitespace = exprs[0].skipWhitespace
|
|
2336 self.callPreparse = True
|
|
2337
|
|
2338 def parseImpl( self, instring, loc, doActions=True ):
|
|
2339 # pass False as last arg to _parse for first element, since we already
|
|
2340 # pre-parsed the string as part of our And pre-parsing
|
|
2341 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
|
|
2342 errorStop = False
|
|
2343 for e in self.exprs[1:]:
|
|
2344 if isinstance(e, And._ErrorStop):
|
|
2345 errorStop = True
|
|
2346 continue
|
|
2347 if errorStop:
|
|
2348 try:
|
|
2349 loc, exprtokens = e._parse( instring, loc, doActions )
|
|
2350 except ParseSyntaxException:
|
|
2351 raise
|
|
2352 except ParseBaseException:
|
|
2353 pe = sys.exc_info()[1]
|
|
2354 raise ParseSyntaxException(pe)
|
|
2355 except IndexError:
|
|
2356 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
|
|
2357 else:
|
|
2358 loc, exprtokens = e._parse( instring, loc, doActions )
|
|
2359 if exprtokens or exprtokens.keys():
|
|
2360 resultlist += exprtokens
|
|
2361 return loc, resultlist
|
|
2362
|
|
2363 def __iadd__(self, other ):
|
|
2364 if isinstance( other, basestring ):
|
|
2365 other = Literal( other )
|
|
2366 return self.append( other ) #And( [ self, other ] )
|
|
2367
|
|
2368 def checkRecursion( self, parseElementList ):
|
|
2369 subRecCheckList = parseElementList[:] + [ self ]
|
|
2370 for e in self.exprs:
|
|
2371 e.checkRecursion( subRecCheckList )
|
|
2372 if not e.mayReturnEmpty:
|
|
2373 break
|
|
2374
|
|
2375 def __str__( self ):
|
|
2376 if hasattr(self,"name"):
|
|
2377 return self.name
|
|
2378
|
|
2379 if self.strRepr is None:
|
|
2380 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
|
|
2381
|
|
2382 return self.strRepr
|
|
2383
|
|
2384
|
|
2385 class Or(ParseExpression):
|
|
2386 """Requires that at least one ParseExpression is found.
|
|
2387 If two expressions match, the expression that matches the longest string will be used.
|
|
2388 May be constructed using the '^' operator.
|
|
2389 """
|
|
2390 def __init__( self, exprs, savelist = False ):
|
|
2391 super(Or,self).__init__(exprs, savelist)
|
|
2392 self.mayReturnEmpty = False
|
|
2393 for e in self.exprs:
|
|
2394 if e.mayReturnEmpty:
|
|
2395 self.mayReturnEmpty = True
|
|
2396 break
|
|
2397
|
|
2398 def parseImpl( self, instring, loc, doActions=True ):
|
|
2399 maxExcLoc = -1
|
|
2400 maxMatchLoc = -1
|
|
2401 maxException = None
|
|
2402 for e in self.exprs:
|
|
2403 try:
|
|
2404 loc2 = e.tryParse( instring, loc )
|
|
2405 except ParseException:
|
|
2406 err = sys.exc_info()[1]
|
|
2407 if err.loc > maxExcLoc:
|
|
2408 maxException = err
|
|
2409 maxExcLoc = err.loc
|
|
2410 except IndexError:
|
|
2411 if len(instring) > maxExcLoc:
|
|
2412 maxException = ParseException(instring,len(instring),e.errmsg,self)
|
|
2413 maxExcLoc = len(instring)
|
|
2414 else:
|
|
2415 if loc2 > maxMatchLoc:
|
|
2416 maxMatchLoc = loc2
|
|
2417 maxMatchExp = e
|
|
2418
|
|
2419 if maxMatchLoc < 0:
|
|
2420 if maxException is not None:
|
|
2421 raise maxException
|
|
2422 else:
|
|
2423 raise ParseException(instring, loc, "no defined alternatives to match", self)
|
|
2424
|
|
2425 return maxMatchExp._parse( instring, loc, doActions )
|
|
2426
|
|
2427 def __ixor__(self, other ):
|
|
2428 if isinstance( other, basestring ):
|
|
2429 other = Literal( other )
|
|
2430 return self.append( other ) #Or( [ self, other ] )
|
|
2431
|
|
2432 def __str__( self ):
|
|
2433 if hasattr(self,"name"):
|
|
2434 return self.name
|
|
2435
|
|
2436 if self.strRepr is None:
|
|
2437 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
|
|
2438
|
|
2439 return self.strRepr
|
|
2440
|
|
2441 def checkRecursion( self, parseElementList ):
|
|
2442 subRecCheckList = parseElementList[:] + [ self ]
|
|
2443 for e in self.exprs:
|
|
2444 e.checkRecursion( subRecCheckList )
|
|
2445
|
|
2446
|
|
2447 class MatchFirst(ParseExpression):
|
|
2448 """Requires that at least one ParseExpression is found.
|
|
2449 If two expressions match, the first one listed is the one that will match.
|
|
2450 May be constructed using the '|' operator.
|
|
2451 """
|
|
2452 def __init__( self, exprs, savelist = False ):
|
|
2453 super(MatchFirst,self).__init__(exprs, savelist)
|
|
2454 if exprs:
|
|
2455 self.mayReturnEmpty = False
|
|
2456 for e in self.exprs:
|
|
2457 if e.mayReturnEmpty:
|
|
2458 self.mayReturnEmpty = True
|
|
2459 break
|
|
2460 else:
|
|
2461 self.mayReturnEmpty = True
|
|
2462
|
|
2463 def parseImpl( self, instring, loc, doActions=True ):
|
|
2464 maxExcLoc = -1
|
|
2465 maxException = None
|
|
2466 for e in self.exprs:
|
|
2467 try:
|
|
2468 ret = e._parse( instring, loc, doActions )
|
|
2469 return ret
|
|
2470 except ParseException as err:
|
|
2471 if err.loc > maxExcLoc:
|
|
2472 maxException = err
|
|
2473 maxExcLoc = err.loc
|
|
2474 except IndexError:
|
|
2475 if len(instring) > maxExcLoc:
|
|
2476 maxException = ParseException(instring,len(instring),e.errmsg,self)
|
|
2477 maxExcLoc = len(instring)
|
|
2478
|
|
2479 # only got here if no expression matched, raise exception for match that made it the furthest
|
|
2480 else:
|
|
2481 if maxException is not None:
|
|
2482 raise maxException
|
|
2483 else:
|
|
2484 raise ParseException(instring, loc, "no defined alternatives to match", self)
|
|
2485
|
|
2486 def __ior__(self, other ):
|
|
2487 if isinstance( other, basestring ):
|
|
2488 other = Literal( other )
|
|
2489 return self.append( other ) #MatchFirst( [ self, other ] )
|
|
2490
|
|
2491 def __str__( self ):
|
|
2492 if hasattr(self,"name"):
|
|
2493 return self.name
|
|
2494
|
|
2495 if self.strRepr is None:
|
|
2496 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
|
|
2497
|
|
2498 return self.strRepr
|
|
2499
|
|
2500 def checkRecursion( self, parseElementList ):
|
|
2501 subRecCheckList = parseElementList[:] + [ self ]
|
|
2502 for e in self.exprs:
|
|
2503 e.checkRecursion( subRecCheckList )
|
|
2504
|
|
2505
|
|
2506 class Each(ParseExpression):
|
|
2507 """Requires all given ParseExpressions to be found, but in any order.
|
|
2508 Expressions may be separated by whitespace.
|
|
2509 May be constructed using the '&' operator.
|
|
2510 """
|
|
2511 def __init__( self, exprs, savelist = True ):
|
|
2512 super(Each,self).__init__(exprs, savelist)
|
|
2513 self.mayReturnEmpty = True
|
|
2514 for e in self.exprs:
|
|
2515 if not e.mayReturnEmpty:
|
|
2516 self.mayReturnEmpty = False
|
|
2517 break
|
|
2518 self.skipWhitespace = True
|
|
2519 self.initExprGroups = True
|
|
2520
|
|
2521 def parseImpl( self, instring, loc, doActions=True ):
|
|
2522 if self.initExprGroups:
|
|
2523 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
|
|
2524 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
|
|
2525 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
|
|
2526 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
|
|
2527 self.required += self.multirequired
|
|
2528 self.initExprGroups = False
|
|
2529 tmpLoc = loc
|
|
2530 tmpReqd = self.required[:]
|
|
2531 tmpOpt = self.optionals[:]
|
|
2532 matchOrder = []
|
|
2533
|
|
2534 keepMatching = True
|
|
2535 while keepMatching:
|
|
2536 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
|
|
2537 failed = []
|
|
2538 for e in tmpExprs:
|
|
2539 try:
|
|
2540 tmpLoc = e.tryParse( instring, tmpLoc )
|
|
2541 except ParseException:
|
|
2542 failed.append(e)
|
|
2543 else:
|
|
2544 matchOrder.append(e)
|
|
2545 if e in tmpReqd:
|
|
2546 tmpReqd.remove(e)
|
|
2547 elif e in tmpOpt:
|
|
2548 tmpOpt.remove(e)
|
|
2549 if len(failed) == len(tmpExprs):
|
|
2550 keepMatching = False
|
|
2551
|
|
2552 if tmpReqd:
|
|
2553 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
|
|
2554 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
|
|
2555
|
|
2556 # add any unmatched Optionals, in case they have default values defined
|
|
2557 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
|
|
2558
|
|
2559 resultlist = []
|
|
2560 for e in matchOrder:
|
|
2561 loc,results = e._parse(instring,loc,doActions)
|
|
2562 resultlist.append(results)
|
|
2563
|
|
2564 finalResults = ParseResults([])
|
|
2565 for r in resultlist:
|
|
2566 dups = {}
|
|
2567 for k in r.keys():
|
|
2568 if k in finalResults.keys():
|
|
2569 tmp = ParseResults(finalResults[k])
|
|
2570 tmp += ParseResults(r[k])
|
|
2571 dups[k] = tmp
|
|
2572 finalResults += ParseResults(r)
|
|
2573 for k,v in dups.items():
|
|
2574 finalResults[k] = v
|
|
2575 return loc, finalResults
|
|
2576
|
|
2577 def __str__( self ):
|
|
2578 if hasattr(self,"name"):
|
|
2579 return self.name
|
|
2580
|
|
2581 if self.strRepr is None:
|
|
2582 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
|
|
2583
|
|
2584 return self.strRepr
|
|
2585
|
|
2586 def checkRecursion( self, parseElementList ):
|
|
2587 subRecCheckList = parseElementList[:] + [ self ]
|
|
2588 for e in self.exprs:
|
|
2589 e.checkRecursion( subRecCheckList )
|
|
2590
|
|
2591
|
|
2592 class ParseElementEnhance(ParserElement):
|
|
2593 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
|
|
2594 def __init__( self, expr, savelist=False ):
|
|
2595 super(ParseElementEnhance,self).__init__(savelist)
|
|
2596 if isinstance( expr, basestring ):
|
|
2597 expr = Literal(expr)
|
|
2598 self.expr = expr
|
|
2599 self.strRepr = None
|
|
2600 if expr is not None:
|
|
2601 self.mayIndexError = expr.mayIndexError
|
|
2602 self.mayReturnEmpty = expr.mayReturnEmpty
|
|
2603 self.setWhitespaceChars( expr.whiteChars )
|
|
2604 self.skipWhitespace = expr.skipWhitespace
|
|
2605 self.saveAsList = expr.saveAsList
|
|
2606 self.callPreparse = expr.callPreparse
|
|
2607 self.ignoreExprs.extend(expr.ignoreExprs)
|
|
2608
|
|
2609 def parseImpl( self, instring, loc, doActions=True ):
|
|
2610 if self.expr is not None:
|
|
2611 return self.expr._parse( instring, loc, doActions, callPreParse=False )
|
|
2612 else:
|
|
2613 raise ParseException("",loc,self.errmsg,self)
|
|
2614
|
|
2615 def leaveWhitespace( self ):
|
|
2616 self.skipWhitespace = False
|
|
2617 self.expr = self.expr.copy()
|
|
2618 if self.expr is not None:
|
|
2619 self.expr.leaveWhitespace()
|
|
2620 return self
|
|
2621
|
|
2622 def ignore( self, other ):
|
|
2623 if isinstance( other, Suppress ):
|
|
2624 if other not in self.ignoreExprs:
|
|
2625 super( ParseElementEnhance, self).ignore( other )
|
|
2626 if self.expr is not None:
|
|
2627 self.expr.ignore( self.ignoreExprs[-1] )
|
|
2628 else:
|
|
2629 super( ParseElementEnhance, self).ignore( other )
|
|
2630 if self.expr is not None:
|
|
2631 self.expr.ignore( self.ignoreExprs[-1] )
|
|
2632 return self
|
|
2633
|
|
2634 def streamline( self ):
|
|
2635 super(ParseElementEnhance,self).streamline()
|
|
2636 if self.expr is not None:
|
|
2637 self.expr.streamline()
|
|
2638 return self
|
|
2639
|
|
2640 def checkRecursion( self, parseElementList ):
|
|
2641 if self in parseElementList:
|
|
2642 raise RecursiveGrammarException( parseElementList+[self] )
|
|
2643 subRecCheckList = parseElementList[:] + [ self ]
|
|
2644 if self.expr is not None:
|
|
2645 self.expr.checkRecursion( subRecCheckList )
|
|
2646
|
|
2647 def validate( self, validateTrace=[] ):
|
|
2648 tmp = validateTrace[:]+[self]
|
|
2649 if self.expr is not None:
|
|
2650 self.expr.validate(tmp)
|
|
2651 self.checkRecursion( [] )
|
|
2652
|
|
2653 def __str__( self ):
|
|
2654 try:
|
|
2655 return super(ParseElementEnhance,self).__str__()
|
|
2656 except:
|
|
2657 pass
|
|
2658
|
|
2659 if self.strRepr is None and self.expr is not None:
|
|
2660 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
|
|
2661 return self.strRepr
|
|
2662
|
|
2663
|
|
2664 class FollowedBy(ParseElementEnhance):
|
|
2665 """Lookahead matching of the given parse expression. FollowedBy
|
|
2666 does *not* advance the parsing position within the input string, it only
|
|
2667 verifies that the specified parse expression matches at the current
|
|
2668 position. FollowedBy always returns a null token list."""
|
|
2669 def __init__( self, expr ):
|
|
2670 super(FollowedBy,self).__init__(expr)
|
|
2671 self.mayReturnEmpty = True
|
|
2672
|
|
2673 def parseImpl( self, instring, loc, doActions=True ):
|
|
2674 self.expr.tryParse( instring, loc )
|
|
2675 return loc, []
|
|
2676
|
|
2677
|
|
2678 class NotAny(ParseElementEnhance):
|
|
2679 """Lookahead to disallow matching with the given parse expression. NotAny
|
|
2680 does *not* advance the parsing position within the input string, it only
|
|
2681 verifies that the specified parse expression does *not* match at the current
|
|
2682 position. Also, NotAny does *not* skip over leading whitespace. NotAny
|
|
2683 always returns a null token list. May be constructed using the '~' operator."""
|
|
2684 def __init__( self, expr ):
|
|
2685 super(NotAny,self).__init__(expr)
|
|
2686 #~ self.leaveWhitespace()
|
|
2687 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
|
|
2688 self.mayReturnEmpty = True
|
|
2689 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
|
|
2690 #self.myException = ParseException("",0,self.errmsg,self)
|
|
2691
|
|
2692 def parseImpl( self, instring, loc, doActions=True ):
|
|
2693 try:
|
|
2694 self.expr.tryParse( instring, loc )
|
|
2695 except (ParseException,IndexError):
|
|
2696 pass
|
|
2697 else:
|
|
2698 #~ raise ParseException(instring, loc, self.errmsg )
|
|
2699 exc = self.myException
|
|
2700 exc.loc = loc
|
|
2701 exc.pstr = instring
|
|
2702 raise exc
|
|
2703 return loc, []
|
|
2704
|
|
2705 def __str__( self ):
|
|
2706 if hasattr(self,"name"):
|
|
2707 return self.name
|
|
2708
|
|
2709 if self.strRepr is None:
|
|
2710 self.strRepr = "~{" + _ustr(self.expr) + "}"
|
|
2711
|
|
2712 return self.strRepr
|
|
2713
|
|
2714
|
|
2715 class ZeroOrMore(ParseElementEnhance):
|
|
2716 """Optional repetition of zero or more of the given expression."""
|
|
2717 def __init__( self, expr ):
|
|
2718 super(ZeroOrMore,self).__init__(expr)
|
|
2719 self.mayReturnEmpty = True
|
|
2720
|
|
2721 def parseImpl( self, instring, loc, doActions=True ):
|
|
2722 tokens = []
|
|
2723 try:
|
|
2724 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
|
|
2725 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
|
|
2726 while 1:
|
|
2727 if hasIgnoreExprs:
|
|
2728 preloc = self._skipIgnorables( instring, loc )
|
|
2729 else:
|
|
2730 preloc = loc
|
|
2731 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
|
|
2732 if tmptokens or tmptokens.keys():
|
|
2733 tokens += tmptokens
|
|
2734 except (ParseException,IndexError):
|
|
2735 pass
|
|
2736
|
|
2737 return loc, tokens
|
|
2738
|
|
2739 def __str__( self ):
|
|
2740 if hasattr(self,"name"):
|
|
2741 return self.name
|
|
2742
|
|
2743 if self.strRepr is None:
|
|
2744 self.strRepr = "[" + _ustr(self.expr) + "]..."
|
|
2745
|
|
2746 return self.strRepr
|
|
2747
|
|
2748 def setResultsName( self, name, listAllMatches=False ):
|
|
2749 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
|
|
2750 ret.saveAsList = True
|
|
2751 return ret
|
|
2752
|
|
2753
|
|
2754 class OneOrMore(ParseElementEnhance):
|
|
2755 """Repetition of one or more of the given expression."""
|
|
2756 def parseImpl( self, instring, loc, doActions=True ):
|
|
2757 # must be at least one
|
|
2758 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
|
|
2759 try:
|
|
2760 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
|
|
2761 while 1:
|
|
2762 if hasIgnoreExprs:
|
|
2763 preloc = self._skipIgnorables( instring, loc )
|
|
2764 else:
|
|
2765 preloc = loc
|
|
2766 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
|
|
2767 if tmptokens or tmptokens.keys():
|
|
2768 tokens += tmptokens
|
|
2769 except (ParseException,IndexError):
|
|
2770 pass
|
|
2771
|
|
2772 return loc, tokens
|
|
2773
|
|
2774 def __str__( self ):
|
|
2775 if hasattr(self,"name"):
|
|
2776 return self.name
|
|
2777
|
|
2778 if self.strRepr is None:
|
|
2779 self.strRepr = "{" + _ustr(self.expr) + "}..."
|
|
2780
|
|
2781 return self.strRepr
|
|
2782
|
|
2783 def setResultsName( self, name, listAllMatches=False ):
|
|
2784 ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
|
|
2785 ret.saveAsList = True
|
|
2786 return ret
|
|
2787
|
|
2788 class _NullToken(object):
|
|
2789 def __bool__(self):
|
|
2790 return False
|
|
2791 __nonzero__ = __bool__
|
|
2792 def __str__(self):
|
|
2793 return ""
|
|
2794
|
|
2795 _optionalNotMatched = _NullToken()
|
|
2796 class Optional(ParseElementEnhance):
|
|
2797 """Optional matching of the given expression.
|
|
2798 A default return string can also be specified, if the optional expression
|
|
2799 is not found.
|
|
2800 """
|
|
2801 def __init__( self, exprs, default=_optionalNotMatched ):
|
|
2802 super(Optional,self).__init__( exprs, savelist=False )
|
|
2803 self.defaultValue = default
|
|
2804 self.mayReturnEmpty = True
|
|
2805
|
|
2806 def parseImpl( self, instring, loc, doActions=True ):
|
|
2807 try:
|
|
2808 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
|
|
2809 except (ParseException,IndexError):
|
|
2810 if self.defaultValue is not _optionalNotMatched:
|
|
2811 if self.expr.resultsName:
|
|
2812 tokens = ParseResults([ self.defaultValue ])
|
|
2813 tokens[self.expr.resultsName] = self.defaultValue
|
|
2814 else:
|
|
2815 tokens = [ self.defaultValue ]
|
|
2816 else:
|
|
2817 tokens = []
|
|
2818 return loc, tokens
|
|
2819
|
|
2820 def __str__( self ):
|
|
2821 if hasattr(self,"name"):
|
|
2822 return self.name
|
|
2823
|
|
2824 if self.strRepr is None:
|
|
2825 self.strRepr = "[" + _ustr(self.expr) + "]"
|
|
2826
|
|
2827 return self.strRepr
|
|
2828
|
|
2829
|
|
2830 class SkipTo(ParseElementEnhance):
|
|
2831 """Token for skipping over all undefined text until the matched expression is found.
|
|
2832 If include is set to true, the matched expression is also parsed (the skipped text
|
|
2833 and matched expression are returned as a 2-element list). The ignore
|
|
2834 argument is used to define grammars (typically quoted strings and comments) that
|
|
2835 might contain false matches.
|
|
2836 """
|
|
2837 def __init__( self, other, include=False, ignore=None, failOn=None ):
|
|
2838 super( SkipTo, self ).__init__( other )
|
|
2839 self.ignoreExpr = ignore
|
|
2840 self.mayReturnEmpty = True
|
|
2841 self.mayIndexError = False
|
|
2842 self.includeMatch = include
|
|
2843 self.asList = False
|
|
2844 if failOn is not None and isinstance(failOn, basestring):
|
|
2845 self.failOn = Literal(failOn)
|
|
2846 else:
|
|
2847 self.failOn = failOn
|
|
2848 self.errmsg = "No match found for "+_ustr(self.expr)
|
|
2849 #self.myException = ParseException("",0,self.errmsg,self)
|
|
2850
|
|
2851 def parseImpl( self, instring, loc, doActions=True ):
|
|
2852 startLoc = loc
|
|
2853 instrlen = len(instring)
|
|
2854 expr = self.expr
|
|
2855 failParse = False
|
|
2856 while loc <= instrlen:
|
|
2857 try:
|
|
2858 if self.failOn:
|
|
2859 try:
|
|
2860 self.failOn.tryParse(instring, loc)
|
|
2861 except ParseBaseException:
|
|
2862 pass
|
|
2863 else:
|
|
2864 failParse = True
|
|
2865 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
|
|
2866 failParse = False
|
|
2867 if self.ignoreExpr is not None:
|
|
2868 while 1:
|
|
2869 try:
|
|
2870 loc = self.ignoreExpr.tryParse(instring,loc)
|
|
2871 # print("found ignoreExpr, advance to", loc)
|
|
2872 except ParseBaseException:
|
|
2873 break
|
|
2874 expr._parse( instring, loc, doActions=False, callPreParse=False )
|
|
2875 skipText = instring[startLoc:loc]
|
|
2876 if self.includeMatch:
|
|
2877 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
|
|
2878 if mat:
|
|
2879 skipRes = ParseResults( skipText )
|
|
2880 skipRes += mat
|
|
2881 return loc, [ skipRes ]
|
|
2882 else:
|
|
2883 return loc, [ skipText ]
|
|
2884 else:
|
|
2885 return loc, [ skipText ]
|
|
2886 except (ParseException,IndexError):
|
|
2887 if failParse:
|
|
2888 raise
|
|
2889 else:
|
|
2890 loc += 1
|
|
2891 exc = self.myException
|
|
2892 exc.loc = loc
|
|
2893 exc.pstr = instring
|
|
2894 raise exc
|
|
2895
|
|
2896 class Forward(ParseElementEnhance):
|
|
2897 """Forward declaration of an expression to be defined later -
|
|
2898 used for recursive grammars, such as algebraic infix notation.
|
|
2899 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
|
|
2900
|
|
2901 Note: take care when assigning to Forward not to overlook precedence of operators.
|
|
2902 Specifically, '|' has a lower precedence than '<<', so that::
|
|
2903 fwdExpr << a | b | c
|
|
2904 will actually be evaluated as::
|
|
2905 (fwdExpr << a) | b | c
|
|
2906 thereby leaving b and c out as parseable alternatives. It is recommended that you
|
|
2907 explicitly group the values inserted into the Forward::
|
|
2908 fwdExpr << (a | b | c)
|
|
2909 """
|
|
2910 def __init__( self, other=None ):
|
|
2911 super(Forward,self).__init__( other, savelist=False )
|
|
2912
|
|
2913 def __lshift__( self, other ):
|
|
2914 if isinstance( other, basestring ):
|
|
2915 other = Literal(other)
|
|
2916 self.expr = other
|
|
2917 self.mayReturnEmpty = other.mayReturnEmpty
|
|
2918 self.strRepr = None
|
|
2919 self.mayIndexError = self.expr.mayIndexError
|
|
2920 self.mayReturnEmpty = self.expr.mayReturnEmpty
|
|
2921 self.setWhitespaceChars( self.expr.whiteChars )
|
|
2922 self.skipWhitespace = self.expr.skipWhitespace
|
|
2923 self.saveAsList = self.expr.saveAsList
|
|
2924 self.ignoreExprs.extend(self.expr.ignoreExprs)
|
|
2925 return None
|
|
2926
|
|
2927 def leaveWhitespace( self ):
|
|
2928 self.skipWhitespace = False
|
|
2929 return self
|
|
2930
|
|
2931 def streamline( self ):
|
|
2932 if not self.streamlined:
|
|
2933 self.streamlined = True
|
|
2934 if self.expr is not None:
|
|
2935 self.expr.streamline()
|
|
2936 return self
|
|
2937
|
|
2938 def validate( self, validateTrace=[] ):
|
|
2939 if self not in validateTrace:
|
|
2940 tmp = validateTrace[:]+[self]
|
|
2941 if self.expr is not None:
|
|
2942 self.expr.validate(tmp)
|
|
2943 self.checkRecursion([])
|
|
2944
|
|
2945 def __str__( self ):
|
|
2946 if hasattr(self,"name"):
|
|
2947 return self.name
|
|
2948
|
|
2949 self._revertClass = self.__class__
|
|
2950 self.__class__ = _ForwardNoRecurse
|
|
2951 try:
|
|
2952 if self.expr is not None:
|
|
2953 retString = _ustr(self.expr)
|
|
2954 else:
|
|
2955 retString = "None"
|
|
2956 finally:
|
|
2957 self.__class__ = self._revertClass
|
|
2958 return self.__class__.__name__ + ": " + retString
|
|
2959
|
|
2960 def copy(self):
|
|
2961 if self.expr is not None:
|
|
2962 return super(Forward,self).copy()
|
|
2963 else:
|
|
2964 ret = Forward()
|
|
2965 ret << self
|
|
2966 return ret
|
|
2967
|
|
2968 class _ForwardNoRecurse(Forward):
|
|
2969 def __str__( self ):
|
|
2970 return "..."
|
|
2971
|
|
2972 class TokenConverter(ParseElementEnhance):
|
|
2973 """Abstract subclass of ParseExpression, for converting parsed results."""
|
|
2974 def __init__( self, expr, savelist=False ):
|
|
2975 super(TokenConverter,self).__init__( expr )#, savelist )
|
|
2976 self.saveAsList = False
|
|
2977
|
|
2978 class Upcase(TokenConverter):
|
|
2979 """Converter to upper case all matching tokens."""
|
|
2980 def __init__(self, *args):
|
|
2981 super(Upcase,self).__init__(*args)
|
|
2982 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
|
|
2983 DeprecationWarning,stacklevel=2)
|
|
2984
|
|
2985 def postParse( self, instring, loc, tokenlist ):
|
|
2986 return list(map( string.upper, tokenlist ))
|
|
2987
|
|
2988
|
|
2989 class Combine(TokenConverter):
|
|
2990 """Converter to concatenate all matching tokens to a single string.
|
|
2991 By default, the matching patterns must also be contiguous in the input string;
|
|
2992 this can be disabled by specifying 'adjacent=False' in the constructor.
|
|
2993 """
|
|
2994 def __init__( self, expr, joinString="", adjacent=True ):
|
|
2995 super(Combine,self).__init__( expr )
|
|
2996 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
|
|
2997 if adjacent:
|
|
2998 self.leaveWhitespace()
|
|
2999 self.adjacent = adjacent
|
|
3000 self.skipWhitespace = True
|
|
3001 self.joinString = joinString
|
|
3002
|
|
3003 def ignore( self, other ):
|
|
3004 if self.adjacent:
|
|
3005 ParserElement.ignore(self, other)
|
|
3006 else:
|
|
3007 super( Combine, self).ignore( other )
|
|
3008 return self
|
|
3009
|
|
3010 def postParse( self, instring, loc, tokenlist ):
|
|
3011 retToks = tokenlist.copy()
|
|
3012 del retToks[:]
|
|
3013 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
|
|
3014
|
|
3015 if self.resultsName and len(retToks.keys())>0:
|
|
3016 return [ retToks ]
|
|
3017 else:
|
|
3018 return retToks
|
|
3019
|
|
3020 class Group(TokenConverter):
|
|
3021 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
|
|
3022 def __init__( self, expr ):
|
|
3023 super(Group,self).__init__( expr )
|
|
3024 self.saveAsList = True
|
|
3025
|
|
3026 def postParse( self, instring, loc, tokenlist ):
|
|
3027 return [ tokenlist ]
|
|
3028
|
|
3029 class Dict(TokenConverter):
|
|
3030 """Converter to return a repetitive expression as a list, but also as a dictionary.
|
|
3031 Each element can also be referenced using the first token in the expression as its key.
|
|
3032 Useful for tabular report scraping when the first column can be used as a item key.
|
|
3033 """
|
|
3034 def __init__( self, exprs ):
|
|
3035 super(Dict,self).__init__( exprs )
|
|
3036 self.saveAsList = True
|
|
3037
|
|
3038 def postParse( self, instring, loc, tokenlist ):
|
|
3039 for i,tok in enumerate(tokenlist):
|
|
3040 if len(tok) == 0:
|
|
3041 continue
|
|
3042 ikey = tok[0]
|
|
3043 if isinstance(ikey,int):
|
|
3044 ikey = _ustr(tok[0]).strip()
|
|
3045 if len(tok)==1:
|
|
3046 tokenlist[ikey] = _ParseResultsWithOffset("",i)
|
|
3047 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
|
|
3048 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
|
|
3049 else:
|
|
3050 dictvalue = tok.copy() #ParseResults(i)
|
|
3051 del dictvalue[0]
|
|
3052 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
|
|
3053 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
|
|
3054 else:
|
|
3055 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
|
|
3056
|
|
3057 if self.resultsName:
|
|
3058 return [ tokenlist ]
|
|
3059 else:
|
|
3060 return tokenlist
|
|
3061
|
|
3062
|
|
3063 class Suppress(TokenConverter):
|
|
3064 """Converter for ignoring the results of a parsed expression."""
|
|
3065 def postParse( self, instring, loc, tokenlist ):
|
|
3066 return []
|
|
3067
|
|
3068 def suppress( self ):
|
|
3069 return self
|
|
3070
|
|
3071
|
|
3072 class OnlyOnce(object):
|
|
3073 """Wrapper for parse actions, to ensure they are only called once."""
|
|
3074 def __init__(self, methodCall):
|
|
3075 self.callable = ParserElement._normalizeParseActionArgs(methodCall)
|
|
3076 self.called = False
|
|
3077 def __call__(self,s,l,t):
|
|
3078 if not self.called:
|
|
3079 results = self.callable(s,l,t)
|
|
3080 self.called = True
|
|
3081 return results
|
|
3082 raise ParseException(s,l,"")
|
|
3083 def reset(self):
|
|
3084 self.called = False
|
|
3085
|
|
3086 def traceParseAction(f):
|
|
3087 """Decorator for debugging parse actions."""
|
|
3088 f = ParserElement._normalizeParseActionArgs(f)
|
|
3089 def z(*paArgs):
|
|
3090 thisFunc = f.func_name
|
|
3091 s,l,t = paArgs[-3:]
|
|
3092 if len(paArgs)>3:
|
|
3093 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
|
|
3094 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
|
|
3095 try:
|
|
3096 ret = f(*paArgs)
|
|
3097 except Exception:
|
|
3098 exc = sys.exc_info()[1]
|
|
3099 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
|
|
3100 raise
|
|
3101 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
|
|
3102 return ret
|
|
3103 try:
|
|
3104 z.__name__ = f.__name__
|
|
3105 except AttributeError:
|
|
3106 pass
|
|
3107 return z
|
|
3108
|
|
3109 #
|
|
3110 # global helpers
|
|
3111 #
|
|
3112 def delimitedList( expr, delim=",", combine=False ):
|
|
3113 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
|
|
3114 By default, the list elements and delimiters can have intervening whitespace, and
|
|
3115 comments, but this can be overridden by passing 'combine=True' in the constructor.
|
|
3116 If combine is set to True, the matching tokens are returned as a single token
|
|
3117 string, with the delimiters included; otherwise, the matching tokens are returned
|
|
3118 as a list of tokens, with the delimiters suppressed.
|
|
3119 """
|
|
3120 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
|
|
3121 if combine:
|
|
3122 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
|
|
3123 else:
|
|
3124 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
|
|
3125
|
|
3126 def countedArray( expr ):
|
|
3127 """Helper to define a counted list of expressions.
|
|
3128 This helper defines a pattern of the form::
|
|
3129 integer expr expr expr...
|
|
3130 where the leading integer tells how many expr expressions follow.
|
|
3131 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
|
|
3132 """
|
|
3133 arrayExpr = Forward()
|
|
3134 def countFieldParseAction(s,l,t):
|
|
3135 n = int(t[0])
|
|
3136 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
|
|
3137 return []
|
|
3138 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
|
|
3139
|
|
3140 def _flatten(L):
|
|
3141 if type(L) is not list: return [L]
|
|
3142 if L == []: return L
|
|
3143 return _flatten(L[0]) + _flatten(L[1:])
|
|
3144
|
|
3145 def matchPreviousLiteral(expr):
|
|
3146 """Helper to define an expression that is indirectly defined from
|
|
3147 the tokens matched in a previous expression, that is, it looks
|
|
3148 for a 'repeat' of a previous expression. For example::
|
|
3149 first = Word(nums)
|
|
3150 second = matchPreviousLiteral(first)
|
|
3151 matchExpr = first + ":" + second
|
|
3152 will match "1:1", but not "1:2". Because this matches a
|
|
3153 previous literal, will also match the leading "1:1" in "1:10".
|
|
3154 If this is not desired, use matchPreviousExpr.
|
|
3155 Do *not* use with packrat parsing enabled.
|
|
3156 """
|
|
3157 rep = Forward()
|
|
3158 def copyTokenToRepeater(s,l,t):
|
|
3159 if t:
|
|
3160 if len(t) == 1:
|
|
3161 rep << t[0]
|
|
3162 else:
|
|
3163 # flatten t tokens
|
|
3164 tflat = _flatten(t.asList())
|
|
3165 rep << And( [ Literal(tt) for tt in tflat ] )
|
|
3166 else:
|
|
3167 rep << Empty()
|
|
3168 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
|
|
3169 return rep
|
|
3170
|
|
3171 def matchPreviousExpr(expr):
|
|
3172 """Helper to define an expression that is indirectly defined from
|
|
3173 the tokens matched in a previous expression, that is, it looks
|
|
3174 for a 'repeat' of a previous expression. For example::
|
|
3175 first = Word(nums)
|
|
3176 second = matchPreviousExpr(first)
|
|
3177 matchExpr = first + ":" + second
|
|
3178 will match "1:1", but not "1:2". Because this matches by
|
|
3179 expressions, will *not* match the leading "1:1" in "1:10";
|
|
3180 the expressions are evaluated first, and then compared, so
|
|
3181 "1" is compared with "10".
|
|
3182 Do *not* use with packrat parsing enabled.
|
|
3183 """
|
|
3184 rep = Forward()
|
|
3185 e2 = expr.copy()
|
|
3186 rep << e2
|
|
3187 def copyTokenToRepeater(s,l,t):
|
|
3188 matchTokens = _flatten(t.asList())
|
|
3189 def mustMatchTheseTokens(s,l,t):
|
|
3190 theseTokens = _flatten(t.asList())
|
|
3191 if theseTokens != matchTokens:
|
|
3192 raise ParseException("",0,"")
|
|
3193 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
|
|
3194 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
|
|
3195 return rep
|
|
3196
|
|
3197 def _escapeRegexRangeChars(s):
|
|
3198 #~ escape these chars: ^-]
|
|
3199 for c in r"\^-]":
|
|
3200 s = s.replace(c,_bslash+c)
|
|
3201 s = s.replace("\n",r"\n")
|
|
3202 s = s.replace("\t",r"\t")
|
|
3203 return _ustr(s)
|
|
3204
|
|
3205 def oneOf( strs, caseless=False, useRegex=True ):
|
|
3206 """Helper to quickly define a set of alternative Literals, and makes sure to do
|
|
3207 longest-first testing when there is a conflict, regardless of the input order,
|
|
3208 but returns a MatchFirst for best performance.
|
|
3209
|
|
3210 Parameters:
|
|
3211 - strs - a string of space-delimited literals, or a list of string literals
|
|
3212 - caseless - (default=False) - treat all literals as caseless
|
|
3213 - useRegex - (default=True) - as an optimization, will generate a Regex
|
|
3214 object; otherwise, will generate a MatchFirst object (if caseless=True, or
|
|
3215 if creating a Regex raises an exception)
|
|
3216 """
|
|
3217 if caseless:
|
|
3218 isequal = ( lambda a,b: a.upper() == b.upper() )
|
|
3219 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
|
|
3220 parseElementClass = CaselessLiteral
|
|
3221 else:
|
|
3222 isequal = ( lambda a,b: a == b )
|
|
3223 masks = ( lambda a,b: b.startswith(a) )
|
|
3224 parseElementClass = Literal
|
|
3225
|
|
3226 if isinstance(strs,(list,tuple)):
|
|
3227 symbols = list(strs[:])
|
|
3228 elif isinstance(strs,basestring):
|
|
3229 symbols = strs.split()
|
|
3230 else:
|
|
3231 warnings.warn("Invalid argument to oneOf, expected string or list",
|
|
3232 SyntaxWarning, stacklevel=2)
|
|
3233
|
|
3234 i = 0
|
|
3235 while i < len(symbols)-1:
|
|
3236 cur = symbols[i]
|
|
3237 for j,other in enumerate(symbols[i+1:]):
|
|
3238 if ( isequal(other, cur) ):
|
|
3239 del symbols[i+j+1]
|
|
3240 break
|
|
3241 elif ( masks(cur, other) ):
|
|
3242 del symbols[i+j+1]
|
|
3243 symbols.insert(i,other)
|
|
3244 cur = other
|
|
3245 break
|
|
3246 else:
|
|
3247 i += 1
|
|
3248
|
|
3249 if not caseless and useRegex:
|
|
3250 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
|
|
3251 try:
|
|
3252 if len(symbols)==len("".join(symbols)):
|
|
3253 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
|
|
3254 else:
|
|
3255 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
|
|
3256 except:
|
|
3257 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
|
|
3258 SyntaxWarning, stacklevel=2)
|
|
3259
|
|
3260
|
|
3261 # last resort, just use MatchFirst
|
|
3262 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
|
|
3263
|
|
3264 def dictOf( key, value ):
|
|
3265 """Helper to easily and clearly define a dictionary by specifying the respective patterns
|
|
3266 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
|
|
3267 in the proper order. The key pattern can include delimiting markers or punctuation,
|
|
3268 as long as they are suppressed, thereby leaving the significant key text. The value
|
|
3269 pattern can include named results, so that the Dict results can include named token
|
|
3270 fields.
|
|
3271 """
|
|
3272 return Dict( ZeroOrMore( Group ( key + value ) ) )
|
|
3273
|
|
3274 def originalTextFor(expr, asString=True):
|
|
3275 """Helper to return the original, untokenized text for a given expression. Useful to
|
|
3276 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
|
3277 revert separate tokens with intervening whitespace back to the original matching
|
|
3278 input text. Simpler to use than the parse action keepOriginalText, and does not
|
|
3279 require the inspect module to chase up the call stack. By default, returns a
|
|
3280 string containing the original parsed text.
|
|
3281
|
|
3282 If the optional asString argument is passed as False, then the return value is a
|
|
3283 ParseResults containing any results names that were originally matched, and a
|
|
3284 single token containing the original matched text from the input string. So if
|
|
3285 the expression passed to originalTextFor contains expressions with defined
|
|
3286 results names, you must set asString to False if you want to preserve those
|
|
3287 results name values."""
|
|
3288 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
|
|
3289 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
|
|
3290 if asString:
|
|
3291 extractText = lambda s,l,t: s[t._original_start:t._original_end]
|
|
3292 else:
|
|
3293 def extractText(s,l,t):
|
|
3294 del t[:]
|
|
3295 t.insert(0, s[t._original_start:t._original_end])
|
|
3296 del t["_original_start"]
|
|
3297 del t["_original_end"]
|
|
3298 matchExpr.setParseAction(extractText)
|
|
3299 return matchExpr
|
|
3300
|
|
3301 # convenience constants for positional expressions
|
|
3302 empty = Empty().setName("empty")
|
|
3303 lineStart = LineStart().setName("lineStart")
|
|
3304 lineEnd = LineEnd().setName("lineEnd")
|
|
3305 stringStart = StringStart().setName("stringStart")
|
|
3306 stringEnd = StringEnd().setName("stringEnd")
|
|
3307
|
|
3308 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
|
|
3309 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
|
|
3310 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
|
|
3311 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
|
|
3312 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
|
|
3313 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
|
|
3314 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
|
|
3315
|
|
3316 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
|
|
3317
|
|
3318 def srange(s):
|
|
3319 r"""Helper to easily define string ranges for use in Word construction. Borrows
|
|
3320 syntax from regexp '[]' string range definitions::
|
|
3321 srange("[0-9]") -> "0123456789"
|
|
3322 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
|
|
3323 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
|
|
3324 The input string must be enclosed in []'s, and the returned string is the expanded
|
|
3325 character set joined into a single string.
|
|
3326 The values enclosed in the []'s may be::
|
|
3327 a single character
|
|
3328 an escaped character with a leading backslash (such as \- or \])
|
|
3329 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
|
|
3330 an escaped octal character with a leading '\0' (\041, which is a '!' character)
|
|
3331 a range of any of the above, separated by a dash ('a-z', etc.)
|
|
3332 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
|
|
3333 """
|
|
3334 try:
|
|
3335 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
|
|
3336 except:
|
|
3337 return ""
|
|
3338
|
|
3339 def matchOnlyAtCol(n):
|
|
3340 """Helper method for defining parse actions that require matching at a specific
|
|
3341 column in the input text.
|
|
3342 """
|
|
3343 def verifyCol(strg,locn,toks):
|
|
3344 if col(locn,strg) != n:
|
|
3345 raise ParseException(strg,locn,"matched token not at column %d" % n)
|
|
3346 return verifyCol
|
|
3347
|
|
3348 def replaceWith(replStr):
|
|
3349 """Helper method for common parse actions that simply return a literal value. Especially
|
|
3350 useful when used with transformString().
|
|
3351 """
|
|
3352 def _replFunc(*args):
|
|
3353 return [replStr]
|
|
3354 return _replFunc
|
|
3355
|
|
3356 def removeQuotes(s,l,t):
|
|
3357 """Helper parse action for removing quotation marks from parsed quoted strings.
|
|
3358 To use, add this parse action to quoted string using::
|
|
3359 quotedString.setParseAction( removeQuotes )
|
|
3360 """
|
|
3361 return t[0][1:-1]
|
|
3362
|
|
3363 def upcaseTokens(s,l,t):
|
|
3364 """Helper parse action to convert tokens to upper case."""
|
|
3365 return [ tt.upper() for tt in map(_ustr,t) ]
|
|
3366
|
|
3367 def downcaseTokens(s,l,t):
|
|
3368 """Helper parse action to convert tokens to lower case."""
|
|
3369 return [ tt.lower() for tt in map(_ustr,t) ]
|
|
3370
|
|
3371 def keepOriginalText(s,startLoc,t):
|
|
3372 """Helper parse action to preserve original parsed text,
|
|
3373 overriding any nested parse actions."""
|
|
3374 try:
|
|
3375 endloc = getTokensEndLoc()
|
|
3376 except ParseException:
|
|
3377 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
|
|
3378 del t[:]
|
|
3379 t += ParseResults(s[startLoc:endloc])
|
|
3380 return t
|
|
3381
|
|
3382 def getTokensEndLoc():
|
|
3383 """Method to be called from within a parse action to determine the end
|
|
3384 location of the parsed tokens."""
|
|
3385 import inspect
|
|
3386 fstack = inspect.stack()
|
|
3387 try:
|
|
3388 # search up the stack (through intervening argument normalizers) for correct calling routine
|
|
3389 for f in fstack[2:]:
|
|
3390 if f[3] == "_parseNoCache":
|
|
3391 endloc = f[0].f_locals["loc"]
|
|
3392 return endloc
|
|
3393 else:
|
|
3394 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
|
|
3395 finally:
|
|
3396 del fstack
|
|
3397
|
|
3398 def _makeTags(tagStr, xml):
|
|
3399 """Internal helper to construct opening and closing tag expressions, given a tag name"""
|
|
3400 if isinstance(tagStr,basestring):
|
|
3401 resname = tagStr
|
|
3402 tagStr = Keyword(tagStr, caseless=not xml)
|
|
3403 else:
|
|
3404 resname = tagStr.name
|
|
3405
|
|
3406 tagAttrName = Word(alphas,alphanums+"_-:")
|
|
3407 if (xml):
|
|
3408 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
|
|
3409 openTag = Suppress("<") + tagStr + \
|
|
3410 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
|
|
3411 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
|
|
3412 else:
|
|
3413 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
|
|
3414 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
|
|
3415 openTag = Suppress("<") + tagStr + \
|
|
3416 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
|
|
3417 Optional( Suppress("=") + tagAttrValue ) ))) + \
|
|
3418 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
|
|
3419 closeTag = Combine(_L("</") + tagStr + ">")
|
|
3420
|
|
3421 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
|
|
3422 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
|
|
3423
|
|
3424 return openTag, closeTag
|
|
3425
|
|
3426 def makeHTMLTags(tagStr):
|
|
3427 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
|
|
3428 return _makeTags( tagStr, False )
|
|
3429
|
|
3430 def makeXMLTags(tagStr):
|
|
3431 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
|
|
3432 return _makeTags( tagStr, True )
|
|
3433
|
|
3434 def withAttribute(*args,**attrDict):
|
|
3435 """Helper to create a validating parse action to be used with start tags created
|
|
3436 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
|
|
3437 with a required attribute value, to avoid false matches on common tags such as
|
|
3438 <TD> or <DIV>.
|
|
3439
|
|
3440 Call withAttribute with a series of attribute names and values. Specify the list
|
|
3441 of filter attributes names and values as:
|
|
3442 - keyword arguments, as in (class="Customer",align="right"), or
|
|
3443 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
|
|
3444 For attribute names with a namespace prefix, you must use the second form. Attribute
|
|
3445 names are matched insensitive to upper/lower case.
|
|
3446
|
|
3447 To verify that the attribute exists, but without specifying a value, pass
|
|
3448 withAttribute.ANY_VALUE as the value.
|
|
3449 """
|
|
3450 if args:
|
|
3451 attrs = args[:]
|
|
3452 else:
|
|
3453 attrs = attrDict.items()
|
|
3454 attrs = [(k,v) for k,v in attrs]
|
|
3455 def pa(s,l,tokens):
|
|
3456 for attrName,attrValue in attrs:
|
|
3457 if attrName not in tokens:
|
|
3458 raise ParseException(s,l,"no matching attribute " + attrName)
|
|
3459 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
|
|
3460 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
|
|
3461 (attrName, tokens[attrName], attrValue))
|
|
3462 return pa
|
|
3463 withAttribute.ANY_VALUE = object()
|
|
3464
|
|
3465 opAssoc = _Constants()
|
|
3466 opAssoc.LEFT = object()
|
|
3467 opAssoc.RIGHT = object()
|
|
3468
|
|
3469 def operatorPrecedence( baseExpr, opList ):
|
|
3470 """Helper method for constructing grammars of expressions made up of
|
|
3471 operators working in a precedence hierarchy. Operators may be unary or
|
|
3472 binary, left- or right-associative. Parse actions can also be attached
|
|
3473 to operator expressions.
|
|
3474
|
|
3475 Parameters:
|
|
3476 - baseExpr - expression representing the most basic element for the nested
|
|
3477 - opList - list of tuples, one for each operator precedence level in the
|
|
3478 expression grammar; each tuple is of the form
|
|
3479 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
|
|
3480 - opExpr is the pyparsing expression for the operator;
|
|
3481 may also be a string, which will be converted to a Literal;
|
|
3482 if numTerms is 3, opExpr is a tuple of two expressions, for the
|
|
3483 two operators separating the 3 terms
|
|
3484 - numTerms is the number of terms for this operator (must
|
|
3485 be 1, 2, or 3)
|
|
3486 - rightLeftAssoc is the indicator whether the operator is
|
|
3487 right or left associative, using the pyparsing-defined
|
|
3488 constants opAssoc.RIGHT and opAssoc.LEFT.
|
|
3489 - parseAction is the parse action to be associated with
|
|
3490 expressions matching this operator expression (the
|
|
3491 parse action tuple member may be omitted)
|
|
3492 """
|
|
3493 ret = Forward()
|
|
3494 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
|
|
3495 for i,operDef in enumerate(opList):
|
|
3496 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
|
|
3497 if arity == 3:
|
|
3498 if opExpr is None or len(opExpr) != 2:
|
|
3499 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
|
|
3500 opExpr1, opExpr2 = opExpr
|
|
3501 thisExpr = Forward()#.setName("expr%d" % i)
|
|
3502 if rightLeftAssoc == opAssoc.LEFT:
|
|
3503 if arity == 1:
|
|
3504 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
|
|
3505 elif arity == 2:
|
|
3506 if opExpr is not None:
|
|
3507 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
|
|
3508 else:
|
|
3509 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
|
|
3510 elif arity == 3:
|
|
3511 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
|
|
3512 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
|
|
3513 else:
|
|
3514 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
|
|
3515 elif rightLeftAssoc == opAssoc.RIGHT:
|
|
3516 if arity == 1:
|
|
3517 # try to avoid LR with this extra test
|
|
3518 if not isinstance(opExpr, Optional):
|
|
3519 opExpr = Optional(opExpr)
|
|
3520 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
|
|
3521 elif arity == 2:
|
|
3522 if opExpr is not None:
|
|
3523 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
|
|
3524 else:
|
|
3525 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
|
|
3526 elif arity == 3:
|
|
3527 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
|
|
3528 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
|
|
3529 else:
|
|
3530 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
|
|
3531 else:
|
|
3532 raise ValueError("operator must indicate right or left associativity")
|
|
3533 if pa:
|
|
3534 matchExpr.setParseAction( pa )
|
|
3535 thisExpr << ( matchExpr | lastExpr )
|
|
3536 lastExpr = thisExpr
|
|
3537 ret << lastExpr
|
|
3538 return ret
|
|
3539
|
|
3540 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
|
|
3541 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
|
|
3542 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
|
|
3543 unicodeString = Combine(_L('u') + quotedString.copy())
|
|
3544
|
|
3545 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
|
|
3546 """Helper method for defining nested lists enclosed in opening and closing
|
|
3547 delimiters ("(" and ")" are the default).
|
|
3548
|
|
3549 Parameters:
|
|
3550 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
|
|
3551 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
|
|
3552 - content - expression for items within the nested lists (default=None)
|
|
3553 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
|
|
3554
|
|
3555 If an expression is not provided for the content argument, the nested
|
|
3556 expression will capture all whitespace-delimited content between delimiters
|
|
3557 as a list of separate values.
|
|
3558
|
|
3559 Use the ignoreExpr argument to define expressions that may contain
|
|
3560 opening or closing characters that should not be treated as opening
|
|
3561 or closing characters for nesting, such as quotedString or a comment
|
|
3562 expression. Specify multiple expressions using an Or or MatchFirst.
|
|
3563 The default is quotedString, but if no expressions are to be ignored,
|
|
3564 then pass None for this argument.
|
|
3565 """
|
|
3566 if opener == closer:
|
|
3567 raise ValueError("opening and closing strings cannot be the same")
|
|
3568 if content is None:
|
|
3569 if isinstance(opener,basestring) and isinstance(closer,basestring):
|
|
3570 if len(opener) == 1 and len(closer)==1:
|
|
3571 if ignoreExpr is not None:
|
|
3572 content = (Combine(OneOrMore(~ignoreExpr +
|
|
3573 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
|
3574 ).setParseAction(lambda t:t[0].strip()))
|
|
3575 else:
|
|
3576 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
|
|
3577 ).setParseAction(lambda t:t[0].strip()))
|
|
3578 else:
|
|
3579 if ignoreExpr is not None:
|
|
3580 content = (Combine(OneOrMore(~ignoreExpr +
|
|
3581 ~Literal(opener) + ~Literal(closer) +
|
|
3582 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
|
3583 ).setParseAction(lambda t:t[0].strip()))
|
|
3584 else:
|
|
3585 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
|
|
3586 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
|
3587 ).setParseAction(lambda t:t[0].strip()))
|
|
3588 else:
|
|
3589 raise ValueError("opening and closing arguments must be strings if no content expression is given")
|
|
3590 ret = Forward()
|
|
3591 if ignoreExpr is not None:
|
|
3592 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
|
|
3593 else:
|
|
3594 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
|
|
3595 return ret
|
|
3596
|
|
3597 def indentedBlock(blockStatementExpr, indentStack, indent=True):
|
|
3598 """Helper method for defining space-delimited indentation blocks, such as
|
|
3599 those used to define block statements in Python source code.
|
|
3600
|
|
3601 Parameters:
|
|
3602 - blockStatementExpr - expression defining syntax of statement that
|
|
3603 is repeated within the indented block
|
|
3604 - indentStack - list created by caller to manage indentation stack
|
|
3605 (multiple statementWithIndentedBlock expressions within a single grammar
|
|
3606 should share a common indentStack)
|
|
3607 - indent - boolean indicating whether block must be indented beyond the
|
|
3608 the current level; set to False for block of left-most statements
|
|
3609 (default=True)
|
|
3610
|
|
3611 A valid block must contain at least one blockStatement.
|
|
3612 """
|
|
3613 def checkPeerIndent(s,l,t):
|
|
3614 if l >= len(s): return
|
|
3615 curCol = col(l,s)
|
|
3616 if curCol != indentStack[-1]:
|
|
3617 if curCol > indentStack[-1]:
|
|
3618 raise ParseFatalException(s,l,"illegal nesting")
|
|
3619 raise ParseException(s,l,"not a peer entry")
|
|
3620
|
|
3621 def checkSubIndent(s,l,t):
|
|
3622 curCol = col(l,s)
|
|
3623 if curCol > indentStack[-1]:
|
|
3624 indentStack.append( curCol )
|
|
3625 else:
|
|
3626 raise ParseException(s,l,"not a subentry")
|
|
3627
|
|
3628 def checkUnindent(s,l,t):
|
|
3629 if l >= len(s): return
|
|
3630 curCol = col(l,s)
|
|
3631 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
|
|
3632 raise ParseException(s,l,"not an unindent")
|
|
3633 indentStack.pop()
|
|
3634
|
|
3635 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
|
|
3636 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
|
|
3637 PEER = Empty().setParseAction(checkPeerIndent)
|
|
3638 UNDENT = Empty().setParseAction(checkUnindent)
|
|
3639 if indent:
|
|
3640 smExpr = Group( Optional(NL) +
|
|
3641 FollowedBy(blockStatementExpr) +
|
|
3642 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
|
|
3643 else:
|
|
3644 smExpr = Group( Optional(NL) +
|
|
3645 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
|
|
3646 blockStatementExpr.ignore(_bslash + LineEnd())
|
|
3647 return smExpr
|
|
3648
|
|
3649 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
|
|
3650 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
|
|
3651
|
|
3652 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
|
|
3653 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
|
|
3654 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
|
|
3655 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
|
|
3656
|
|
3657 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
|
|
3658 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
|
|
3659
|
|
3660 htmlComment = Regex(r"<!--[\s\S]*?-->")
|
|
3661 restOfLine = Regex(r".*").leaveWhitespace()
|
|
3662 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
|
|
3663 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
|
|
3664
|
|
3665 javaStyleComment = cppStyleComment
|
|
3666 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
|
|
3667 _noncomma = "".join( [ c for c in printables if c != "," ] )
|
|
3668 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
|
|
3669 Optional( Word(" \t") +
|
|
3670 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
|
|
3671 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
|
|
3672
|
|
3673
|
|
3674 if __name__ == "__main__":
|
|
3675
|
|
3676 def test( teststring ):
|
|
3677 try:
|
|
3678 tokens = simpleSQL.parseString( teststring )
|
|
3679 tokenlist = tokens.asList()
|
|
3680 print (teststring + "->" + str(tokenlist))
|
|
3681 print ("tokens = " + str(tokens))
|
|
3682 print ("tokens.columns = " + str(tokens.columns))
|
|
3683 print ("tokens.tables = " + str(tokens.tables))
|
|
3684 print (tokens.asXML("SQL",True))
|
|
3685 except ParseBaseException:
|
|
3686 err = sys.exc_info()[1]
|
|
3687 print (teststring + "->")
|
|
3688 print (err.line)
|
|
3689 print (" "*(err.column-1) + "^")
|
|
3690 print (err)
|
|
3691 print()
|
|
3692
|
|
3693 selectToken = CaselessLiteral( "select" )
|
|
3694 fromToken = CaselessLiteral( "from" )
|
|
3695
|
|
3696 ident = Word( alphas, alphanums + "_$" )
|
|
3697 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
|
|
3698 columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
|
|
3699 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
|
|
3700 tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
|
|
3701 simpleSQL = ( selectToken + \
|
|
3702 ( '*' | columnNameList ).setResultsName( "columns" ) + \
|
|
3703 fromToken + \
|
|
3704 tableNameList.setResultsName( "tables" ) )
|
|
3705
|
|
3706 test( "SELECT * from XYZZY, ABC" )
|
|
3707 test( "select * from SYS.XYZZY" )
|
|
3708 test( "Select A from Sys.dual" )
|
|
3709 test( "Select AA,BB,CC from Sys.dual" )
|
|
3710 test( "Select A, B, C from Sys.dual" )
|
|
3711 test( "Select A, B, C from Sys.dual" )
|
|
3712 test( "Xelect A, B, C from Sys.dual" )
|
|
3713 test( "Select A, B, C frox Sys.dual" )
|
|
3714 test( "Select" )
|
|
3715 test( "Select ^^^ frox Sys.dual" )
|
|
3716 test( "Select A, B, C from Sys.dual, Table2 " )
|