comparison python/pyyacc.py @ 194:b01429a5d695

Fixed test
author Windel Bouwman
date Wed, 29 May 2013 22:36:37 +0200
parents 6cd6260789a1
children 37ac6c016e0f
comparison
equal deleted inserted replaced
193:f091e7d70996 194:b01429a5d695
26 self.terminals = terminals 26 self.terminals = terminals
27 self.nonterminals = [] 27 self.nonterminals = []
28 self.productions = [] 28 self.productions = []
29 self._first = None # Cached first set 29 self._first = None # Cached first set
30 30
31 def add_production(self, name, symbols): 31 def add_production(self, name, symbols, f=None):
32 """ Add a production rule to the grammar """ 32 """ Add a production rule to the grammar """
33 production = Production(name, symbols) 33 production = Production(name, symbols, f)
34 self.productions.append(production) 34 self.productions.append(production)
35 if name in self.terminals: 35 if name in self.terminals:
36 raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) 36 raise ParserGenerationException("Cannot redefine terminal {0}".format(name))
37 if not name in self.nonterminals: 37 if not name in self.nonterminals:
38 self.nonterminals.append(name) 38 self.nonterminals.append(name)
39 self._first = None # Invalidate cached version
39 40
40 def productionsForName(self, name): 41 def productionsForName(self, name):
41 """ Retrieve all productions for a non terminal """ 42 """ Retrieve all productions for a non terminal """
42 return [p for p in self.productions if p.name == name] 43 return [p for p in self.productions if p.name == name]
43 44
154 155
155 def checkSymbols(self): 156 def checkSymbols(self):
156 """ Checks no symbols are undefined """ 157 """ Checks no symbols are undefined """
157 for production in self.productions: 158 for production in self.productions:
158 for symbol in production.symbols: 159 for symbol in production.symbols:
159 if symbol not in self.Symbols: 160 if symbol not in self.Symbols + [EPS]:
160 raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) 161 raise ParserGenerationException('Symbol {0} undefined'.format(symbol))
161 162
162 163
163 def genParser(self): 164 def genParser(self):
164 """ Generates a parser from the grammar """ 165 """ Generates a parser from the grammar """
167 goto_table = {} 168 goto_table = {}
168 iis = self.initialItemSet() 169 iis = self.initialItemSet()
169 170
170 # First generate all item sets by using the nextItemset function: 171 # First generate all item sets by using the nextItemset function:
171 states, transitions = self.genCanonicalSet(iis) 172 states, transitions = self.genCanonicalSet(iis)
173
174 def action_str(act):
175 a, p = act
176 if a is SHIFT:
177 return 'Shift {0}'.format(0)
178 elif a is REDUCE:
179 return 'Reduce {0}'.format(p)
180 return 'Other'
172 181
173 def setAction(state, t, action): 182 def setAction(state, t, action):
174 key = (state, t) 183 key = (state, t)
175 if key in action_table: 184 if key in action_table:
176 action2 = action_table[key] 185 action2 = action_table[key]
177 if action != action2: 186 if action != action2:
178 raise ParserGenerationException('LR construction conflict') 187 if (action2[0] == REDUCE) and (action[0] == SHIFT):
188 # Automatically resolve and do the shift action!
189 # Simple, but almost always what you want!!
190 action_table[key] = action
191 else:
192 if (action2[0] == SHIFT) and (action[0] == REDUCE):
193 pass
194 else:
195 a1 = action_str(action)
196 a2 = action_str(action2)
197 raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2))
179 else: 198 else:
180 action_table[key] = action 199 action_table[key] = action
181 200
182 # Fill action table: 201 # Fill action table:
183 for state in states: 202 for state in states:
202 return LRParser(action_table, goto_table) 221 return LRParser(action_table, goto_table)
203 222
204 223
205 class Production: 224 class Production:
206 """ Production rule for a grammar """ 225 """ Production rule for a grammar """
207 def __init__(self, name, symbols): 226 def __init__(self, name, symbols, f=None):
208 self.name = name 227 self.name = name
209 self.symbols = symbols 228 self.symbols = symbols
229 self.f = f
210 230
211 def __repr__(self): 231 def __repr__(self):
212 return '{0} -> {1}'.format(self.name, self.symbols) 232 return '{0} -> {1}'.format(self.name, self.symbols)
213 233
214 234
285 305
286 def parse(self, toks): 306 def parse(self, toks):
287 """ Parse an iterable with tokens """ 307 """ Parse an iterable with tokens """
288 assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks)) 308 assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks))
289 stack = [0] 309 stack = [0]
290 look_ahead = toks.__next__() 310 try:
311 look_ahead = toks.__next__()
312 except StopIteration:
313 look_ahead = Token(EOF, EOF)
291 assert type(look_ahead) is Token 314 assert type(look_ahead) is Token
292 while True: 315 while True:
293 state = stack[-1] # top of stack 316 state = stack[-1] # top of stack
294 key = (state, look_ahead.typ) 317 key = (state, look_ahead.typ)
295 if not key in self.action_table: 318 if not key in self.action_table:
296 raise Exception('Error parsing at character {0}'.format(look_ahead)) 319 raise ParserException('Error parsing at character {0}'.format(look_ahead))
297 action, param = self.action_table[key] 320 action, param = self.action_table[key]
298 if action == REDUCE: 321 if action == REDUCE:
322 #print('reduce', param)
323 f_args = []
299 for s in param.symbols: 324 for s in param.symbols:
300 stack.pop() 325 stack.pop()
301 stack.pop() 326 stack.pop()
327 f_args.append(0)
328 if param.f:
329 param.f(*f_args)
302 state = stack[-1] 330 state = stack[-1]
303 stack.append(param.name) 331 stack.append(param.name)
304 stack.append(self.goto_table[(state, param.name)]) 332 stack.append(self.goto_table[(state, param.name)])
305 elif action == SHIFT: 333 elif action == SHIFT:
306 stack.append(look_ahead.typ) 334 stack.append(look_ahead.typ)
307 stack.append(param) 335 stack.append(param)
308 try: 336 try:
309 look_ahead = toks.__next__() 337 look_ahead = toks.__next__()
310 except StopIteration: 338 except StopIteration:
311 look_ahead = Token(EOF, EOF, 0) 339 look_ahead = Token(EOF, EOF)
312 assert type(look_ahead) is Token 340 assert type(look_ahead) is Token
313 elif action == ACCEPT: 341 elif action == ACCEPT:
314 break 342 break
315 343
316
317 def testSimpleGrammar():
318 # 1. define a simple grammar:
319 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
320 g.add_production('input', ['expression'])
321 g.add_production('expression', ['term'])
322 g.add_production('expression', ['expression', '+', 'term'])
323 g.add_production('term', ['factor'])
324 g.add_production('term', ['term', '*', 'factor'])
325 g.add_production('factor', ['(', 'expression', ')'])
326 g.add_production('factor', ['identifier'])
327 g.start_symbol = 'input'
328 # 2. define input:
329 tokens = ['identifier', '+', 'identifier', '+', 'identifier']
330 # 3. build parser:
331 p = g.genParser()
332 # 4. feed input:
333 def genTokens(lst):
334 for t in lst:
335 yield Token(t, t, 0)
336 p.parse(genTokens(tokens))
337
338
339 if __name__ == '__main__':
340 testSimpleGrammar()
341