Mercurial > lcfOS
comparison python/pyyacc.py @ 194:b01429a5d695
Fixed test
author | Windel Bouwman |
---|---|
date | Wed, 29 May 2013 22:36:37 +0200 |
parents | 6cd6260789a1 |
children | 37ac6c016e0f |
comparison
equal
deleted
inserted
replaced
193:f091e7d70996 | 194:b01429a5d695 |
---|---|
26 self.terminals = terminals | 26 self.terminals = terminals |
27 self.nonterminals = [] | 27 self.nonterminals = [] |
28 self.productions = [] | 28 self.productions = [] |
29 self._first = None # Cached first set | 29 self._first = None # Cached first set |
30 | 30 |
31 def add_production(self, name, symbols): | 31 def add_production(self, name, symbols, f=None): |
32 """ Add a production rule to the grammar """ | 32 """ Add a production rule to the grammar """ |
33 production = Production(name, symbols) | 33 production = Production(name, symbols, f) |
34 self.productions.append(production) | 34 self.productions.append(production) |
35 if name in self.terminals: | 35 if name in self.terminals: |
36 raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) | 36 raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) |
37 if not name in self.nonterminals: | 37 if not name in self.nonterminals: |
38 self.nonterminals.append(name) | 38 self.nonterminals.append(name) |
39 self._first = None # Invalidate cached version | |
39 | 40 |
40 def productionsForName(self, name): | 41 def productionsForName(self, name): |
41 """ Retrieve all productions for a non terminal """ | 42 """ Retrieve all productions for a non terminal """ |
42 return [p for p in self.productions if p.name == name] | 43 return [p for p in self.productions if p.name == name] |
43 | 44 |
154 | 155 |
155 def checkSymbols(self): | 156 def checkSymbols(self): |
156 """ Checks no symbols are undefined """ | 157 """ Checks no symbols are undefined """ |
157 for production in self.productions: | 158 for production in self.productions: |
158 for symbol in production.symbols: | 159 for symbol in production.symbols: |
159 if symbol not in self.Symbols: | 160 if symbol not in self.Symbols + [EPS]: |
160 raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) | 161 raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) |
161 | 162 |
162 | 163 |
163 def genParser(self): | 164 def genParser(self): |
164 """ Generates a parser from the grammar """ | 165 """ Generates a parser from the grammar """ |
167 goto_table = {} | 168 goto_table = {} |
168 iis = self.initialItemSet() | 169 iis = self.initialItemSet() |
169 | 170 |
170 # First generate all item sets by using the nextItemset function: | 171 # First generate all item sets by using the nextItemset function: |
171 states, transitions = self.genCanonicalSet(iis) | 172 states, transitions = self.genCanonicalSet(iis) |
173 | |
174 def action_str(act): | |
175 a, p = act | |
176 if a is SHIFT: | |
177 return 'Shift {0}'.format(0) | |
178 elif a is REDUCE: | |
179 return 'Reduce {0}'.format(p) | |
180 return 'Other' | |
172 | 181 |
173 def setAction(state, t, action): | 182 def setAction(state, t, action): |
174 key = (state, t) | 183 key = (state, t) |
175 if key in action_table: | 184 if key in action_table: |
176 action2 = action_table[key] | 185 action2 = action_table[key] |
177 if action != action2: | 186 if action != action2: |
178 raise ParserGenerationException('LR construction conflict') | 187 if (action2[0] == REDUCE) and (action[0] == SHIFT): |
188 # Automatically resolve and do the shift action! | |
189 # Simple, but almost always what you want!! | |
190 action_table[key] = action | |
191 else: | |
192 if (action2[0] == SHIFT) and (action[0] == REDUCE): | |
193 pass | |
194 else: | |
195 a1 = action_str(action) | |
196 a2 = action_str(action2) | |
197 raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2)) | |
179 else: | 198 else: |
180 action_table[key] = action | 199 action_table[key] = action |
181 | 200 |
182 # Fill action table: | 201 # Fill action table: |
183 for state in states: | 202 for state in states: |
202 return LRParser(action_table, goto_table) | 221 return LRParser(action_table, goto_table) |
203 | 222 |
204 | 223 |
205 class Production: | 224 class Production: |
206 """ Production rule for a grammar """ | 225 """ Production rule for a grammar """ |
207 def __init__(self, name, symbols): | 226 def __init__(self, name, symbols, f=None): |
208 self.name = name | 227 self.name = name |
209 self.symbols = symbols | 228 self.symbols = symbols |
229 self.f = f | |
210 | 230 |
211 def __repr__(self): | 231 def __repr__(self): |
212 return '{0} -> {1}'.format(self.name, self.symbols) | 232 return '{0} -> {1}'.format(self.name, self.symbols) |
213 | 233 |
214 | 234 |
285 | 305 |
286 def parse(self, toks): | 306 def parse(self, toks): |
287 """ Parse an iterable with tokens """ | 307 """ Parse an iterable with tokens """ |
288 assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks)) | 308 assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks)) |
289 stack = [0] | 309 stack = [0] |
290 look_ahead = toks.__next__() | 310 try: |
311 look_ahead = toks.__next__() | |
312 except StopIteration: | |
313 look_ahead = Token(EOF, EOF) | |
291 assert type(look_ahead) is Token | 314 assert type(look_ahead) is Token |
292 while True: | 315 while True: |
293 state = stack[-1] # top of stack | 316 state = stack[-1] # top of stack |
294 key = (state, look_ahead.typ) | 317 key = (state, look_ahead.typ) |
295 if not key in self.action_table: | 318 if not key in self.action_table: |
296 raise Exception('Error parsing at character {0}'.format(look_ahead)) | 319 raise ParserException('Error parsing at character {0}'.format(look_ahead)) |
297 action, param = self.action_table[key] | 320 action, param = self.action_table[key] |
298 if action == REDUCE: | 321 if action == REDUCE: |
322 #print('reduce', param) | |
323 f_args = [] | |
299 for s in param.symbols: | 324 for s in param.symbols: |
300 stack.pop() | 325 stack.pop() |
301 stack.pop() | 326 stack.pop() |
327 f_args.append(0) | |
328 if param.f: | |
329 param.f(*f_args) | |
302 state = stack[-1] | 330 state = stack[-1] |
303 stack.append(param.name) | 331 stack.append(param.name) |
304 stack.append(self.goto_table[(state, param.name)]) | 332 stack.append(self.goto_table[(state, param.name)]) |
305 elif action == SHIFT: | 333 elif action == SHIFT: |
306 stack.append(look_ahead.typ) | 334 stack.append(look_ahead.typ) |
307 stack.append(param) | 335 stack.append(param) |
308 try: | 336 try: |
309 look_ahead = toks.__next__() | 337 look_ahead = toks.__next__() |
310 except StopIteration: | 338 except StopIteration: |
311 look_ahead = Token(EOF, EOF, 0) | 339 look_ahead = Token(EOF, EOF) |
312 assert type(look_ahead) is Token | 340 assert type(look_ahead) is Token |
313 elif action == ACCEPT: | 341 elif action == ACCEPT: |
314 break | 342 break |
315 | 343 |
316 | |
317 def testSimpleGrammar(): | |
318 # 1. define a simple grammar: | |
319 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) | |
320 g.add_production('input', ['expression']) | |
321 g.add_production('expression', ['term']) | |
322 g.add_production('expression', ['expression', '+', 'term']) | |
323 g.add_production('term', ['factor']) | |
324 g.add_production('term', ['term', '*', 'factor']) | |
325 g.add_production('factor', ['(', 'expression', ')']) | |
326 g.add_production('factor', ['identifier']) | |
327 g.start_symbol = 'input' | |
328 # 2. define input: | |
329 tokens = ['identifier', '+', 'identifier', '+', 'identifier'] | |
330 # 3. build parser: | |
331 p = g.genParser() | |
332 # 4. feed input: | |
333 def genTokens(lst): | |
334 for t in lst: | |
335 yield Token(t, t, 0) | |
336 p.parse(genTokens(tokens)) | |
337 | |
338 | |
339 if __name__ == '__main__': | |
340 testSimpleGrammar() | |
341 |