# HG changeset patch # User Windel Bouwman # Date 1369421103 -7200 # Node ID 51a6440d6398e3e4112f5f953ca84980ed427a1a # Parent fe2b72381a83d8a0de5089bdb61159bb9c56ed57 Fixed LR(1) parser diff -r fe2b72381a83 -r 51a6440d6398 python/pyyacc.py --- a/python/pyyacc.py Fri May 24 16:13:23 2013 +0200 +++ b/python/pyyacc.py Fri May 24 20:45:03 2013 +0200 @@ -8,6 +8,13 @@ REDUCE = 2 ACCEPT = 3 +class ParserGenerationException(Exception): + pass + + +class ParserException(Exception): + pass + class Grammar: """ Defines a grammar of a language """ @@ -15,6 +22,7 @@ self.terminals = terminals self.nonterminals = [] self.productions = [] + self._first = None # Cached first set def add_production(self, name, symbols): """ Add a production rule to the grammar """ @@ -33,6 +41,12 @@ """ Get all the symbols defined by this grammar """ return self.nonterminals + self.terminals + @property + def first(self): + if not self._first: + self._first = self.calcFirstSets() + return self._first + def calcFirstSets(self): """ Calculate first sets for each grammar symbol @@ -45,7 +59,7 @@ first[t] = set([t]) for nt in self.nonterminals: first[nt] = set() - epsset = set([EPS]) + epsset = {EPS} while True: some_change = False for p in self.productions: @@ -78,7 +92,7 @@ f.discard(EPS) f.add(item.look_ahead) return f - + # Start of algorithm: while worklist: item = worklist.pop(0) if not item.IsShift: @@ -110,13 +124,13 @@ return self.closure(next_set) def genCanonicalSet(self, iis): - states = set() + states = [] worklist = [] - goto_table = {} + transitions = {} def addSt(s): if not (s in states): worklist.append(s) - states.add(s) + states.append(s) addSt(iis) while len(worklist) > 0: itemset = worklist.pop(0) @@ -124,41 +138,50 @@ nis = self.nextItemSet(itemset, symbol) if not nis: continue - goto_table[(itemset, symbol)] = nis addSt(nis) - return states, goto_table + transitions[(states.index(itemset), symbol)] = states.index(nis) + return states, transitions def genParser(self): """ Generates a parser from the grammar """ action_table = {} - self.first = self.calcFirstSets() + goto_table = {} iis = self.initialItemSet() # First generate all item sets by using the nextItemset function: - states, goto_table = self.genCanonicalSet(iis) - - # Number the states: - number_states = {} - for s, i in zip(states, range(len(states))): - number_states[s] = i + states, transitions = self.genCanonicalSet(iis) + + def setAction(state, t, action): + key = (state, t) + if key in action_table: + action2 = action_table[key] + if action != action2: + raise ParserGenerationException('LR construction conflict') + else: + action_table[key] = action # Fill action table: for state in states: + # Detect conflicts: for item in state: if item.IsShift and item.Next in self.terminals: - action_table[(state, item.Next)] = (SHIFT, 0) - elif item.IsReduce: - if item.look_ahead == EOF: - action_table[(state, item.look_ahead)] = (ACCEPT, 0) + # Rule 1, a shift item: + nextstate = transitions[(states.index(state), item.Next)] + setAction(states.index(state), item.Next, (SHIFT, nextstate)) + if item.IsReduce: + if item.production.name == self.start_symbol and item.look_ahead == EOF: + # Rule 3: accept: + setAction(states.index(state), item.look_ahead, (ACCEPT, None)) else: - action_table[(state, item.look_ahead)] = (REDUCE, item.production) - else: - pass + # Rule 2, reduce item: + setAction(states.index(state), item.look_ahead, (REDUCE, item.production)) + for nt in self.nonterminals: + key = (states.index(state), nt) + if key in transitions: + goto_table[key] = transitions[key] - p = LRParser(action_table) - p.goto_table = goto_table - p.s0 = iis - return p + return LRParser(action_table, goto_table) + class Production: """ Production rule for a grammar """ @@ -222,21 +245,23 @@ postdot = ' '.join(prod.symbols[self.dotpos:]) nt = prod.name args = (nt, predot, postdot, self.look_ahead) - return '[{0} -> {1} . {2}, {3}]'.format(*args) + return '[{0} -> {1} . {2} -> {3}]'.format(*args) class LRParser: """ LR parser """ - def __init__(self, action_table): + def __init__(self, action_table, goto_table): self.action_table = action_table + self.goto_table = goto_table def parse(self, toks): - stack = [EOF, self.s0] + stack = [0] look_ahead = toks.pop(0) while True: state = stack[-1] # top of stack key = (state, look_ahead) if not key in self.action_table: + print(key) raise Exception('Error parsing') action, param = self.action_table[(state, look_ahead)] if action == REDUCE: @@ -248,8 +273,7 @@ stack.append(self.goto_table[(state, param.name)]) elif action == SHIFT: stack.append(look_ahead) - s, i = stack[-2:] - stack.append(self.goto_table[(s, i)]) + stack.append(param) look_ahead = toks.pop(0) elif action == ACCEPT: break diff -r fe2b72381a83 -r 51a6440d6398 python/testpyy.py --- a/python/testpyy.py Fri May 24 16:13:23 2013 +0200 +++ b/python/testpyy.py Fri May 24 20:45:03 2013 +0200 @@ -24,6 +24,32 @@ # 4. feed input: p.parse(tokens) +class testExpressionGrammar(unittest.TestCase): + def setUp(self): + g = Grammar(['EOF', 'identifier', '(', ')', '+', '*', 'num']) + g.add_production('input', ['expression']) + g.add_production('expression', ['term']) + g.add_production('expression', ['expression', '+', 'term']) + g.add_production('term', ['factor']) + g.add_production('term', ['term', '*', 'factor']) + g.add_production('factor', ['(', 'expression', ')']) + g.add_production('factor', ['identifier']) + g.add_production('factor', ['num']) + g.start_symbol = 'input' + self.g = g + + def testFirstSimpleGrammar(self): + # 1. define a simple grammar: + first = self.g.calcFirstSets() + self.assertEqual(first['input'], {'identifier', '(', 'num'}) + self.assertEqual(first['term'], {'identifier', '(', 'num'}) + + def testCanonical(self): + s0 = self.g.initialItemSet() + s, gt = self.g.genCanonicalSet(s0) + # Must result in 12 sets: + self.assertEqual(len(s), 24) + class testPG(unittest.TestCase): """ Tests several parts of the parser generator """ def setUp(self): @@ -34,7 +60,6 @@ g.add_production('pair', ['(', 'pair', ')']) g.add_production('pair', ['(', ')']) g.start_symbol = 'goal' - g.first = g.calcFirstSets() self.g = g def testFirstSet(self): @@ -60,15 +85,13 @@ def testCanonical(self): s0 = self.g.initialItemSet() s, gt = self.g.genCanonicalSet(s0) - pprint.pprint(s) # Must result in 12 sets: self.assertEqual(len(s), 12) def testClosure(self): p0, p1, p2, p3, p4 = self.g.productions s0 = set() - for p in self.g.productionsForName(self.g.start_symbol): - s0.add(Item(p, 0, EOF)) + s0.add(Item(p0, 0, EOF)) self.assertEqual(len(s0), 1) # 1 rule self.assertIn(Item(p0, 0, EOF), s0) @@ -85,9 +108,12 @@ self.assertIn(Item(p4, 0, '('), s0) def testParser(self): - tokens = ['(', '(', ')', '(', ')', ')', 'EOF'] + tokens = ['(', '(', ')', ')', '(', ')', EOF] # 3. build parser: p = self.g.genParser() + self.assertEqual(len(p.goto_table), 5) + self.assertEqual(len(p.action_table), 19) + # 4. feed input: p.parse(tokens)