# HG changeset patch # User Windel Bouwman # Date 1369404803 -7200 # Node ID fe2b72381a83d8a0de5089bdb61159bb9c56ed57 # Parent 4fd075e8259c873bd8919f3d77c820e913234055 Added testset for pyy diff -r 4fd075e8259c -r fe2b72381a83 python/pyyacc.py --- a/python/pyyacc.py Sat May 18 18:30:26 2013 +0200 +++ b/python/pyyacc.py Fri May 24 16:13:23 2013 +0200 @@ -3,6 +3,10 @@ """ EPS = 'EPS' +EOF = 'EOF' +SHIFT = 1 +REDUCE = 2 +ACCEPT = 3 class Grammar: @@ -30,9 +34,14 @@ return self.nonterminals + self.terminals def calcFirstSets(self): - """ Calculate first sets for each grammar symbol """ + """ + Calculate first sets for each grammar symbol + This is a dictionary which maps each grammar symbol + to a set of terminals that can be encountered first + when looking for the symbol. + """ first = {} - for t in self.terminals + ['EOF', EPS]: + for t in self.terminals + [EOF, EPS]: first[t] = set([t]) for nt in self.nonterminals: first[nt] = set() @@ -55,6 +64,101 @@ break return first + def closure(self, itemset): + """ Expand itemset by using epsilon moves """ + worklist = list(itemset) + def addIt(itm): + if not itm in itemset: + itemset.add(itm) + worklist.append(itm) + def first2(itm, la): + # When using the first sets, create a copy: + f = set(self.first[item.NextNext]) + if EPS in f: + f.discard(EPS) + f.add(item.look_ahead) + return f + + while worklist: + item = worklist.pop(0) + if not item.IsShift: + continue + if not (item.Next in self.nonterminals): + continue + C = item.Next + for add_p in self.productionsForName(C): + for b in first2(item, item.look_ahead): + addIt(Item(add_p, 0, b)) + return frozenset(itemset) + + def initialItemSet(self): + """ Calculates the initial item set """ + iis = set() + for p in self.productionsForName(self.start_symbol): + iis.add(Item(p, 0, EOF)) + return self.closure(iis) + + def nextItemSet(self, itemset, symbol): + """ + Determines the next itemset for the current set and a symbol + This is the goto procedure + """ + next_set = set() + for item in itemset: + if item.can_shift_over(symbol): + next_set.add(item.shifted()) + return self.closure(next_set) + + def genCanonicalSet(self, iis): + states = set() + worklist = [] + goto_table = {} + def addSt(s): + if not (s in states): + worklist.append(s) + states.add(s) + addSt(iis) + while len(worklist) > 0: + itemset = worklist.pop(0) + for symbol in self.Symbols: + nis = self.nextItemSet(itemset, symbol) + if not nis: + continue + goto_table[(itemset, symbol)] = nis + addSt(nis) + return states, goto_table + + def genParser(self): + """ Generates a parser from the grammar """ + action_table = {} + self.first = self.calcFirstSets() + iis = self.initialItemSet() + + # First generate all item sets by using the nextItemset function: + states, goto_table = self.genCanonicalSet(iis) + + # Number the states: + number_states = {} + for s, i in zip(states, range(len(states))): + number_states[s] = i + + # Fill action table: + for state in states: + for item in state: + if item.IsShift and item.Next in self.terminals: + action_table[(state, item.Next)] = (SHIFT, 0) + elif item.IsReduce: + if item.look_ahead == EOF: + action_table[(state, item.look_ahead)] = (ACCEPT, 0) + else: + action_table[(state, item.look_ahead)] = (REDUCE, item.production) + else: + pass + + p = LRParser(action_table) + p.goto_table = goto_table + p.s0 = iis + return p class Production: """ Production rule for a grammar """ @@ -70,6 +174,7 @@ def __init__(self, production, dotpos, look_ahead): self.production = production self.dotpos = dotpos + assert self.dotpos <= len(self.production.symbols) self.look_ahead = look_ahead def getdata(self): @@ -96,10 +201,18 @@ def Next(self): return self.production.symbols[self.dotpos] + def can_shift_over(self, symbol): + """ Determines if this item can shift over the given symbol """ + return self.IsShift and self.Next == symbol + + def shifted(self): + """ Creates a new item that is shifted one position """ + return Item(self.production, self.dotpos + 1, self.look_ahead) + @property def NextNext(self): if self.dotpos + 1 >= len(self.production.symbols): - return 'EPS' + return EPS else: return self.production.symbols[self.dotpos + 1] @@ -109,81 +222,16 @@ postdot = ' '.join(prod.symbols[self.dotpos:]) nt = prod.name args = (nt, predot, postdot, self.look_ahead) - return '[{0} -> {1} _dot_ {2},{3}]'.format(*args) - - -class LRgenerator: - """ Parser generator """ - def __init__(self, g): - self.grammar = g - - def closure(self, itemset): - """ Expand itemset by using epsilon moves """ - worklist = list(itemset) - while worklist: - item = worklist.pop(0) - if item.IsShift and item.Next in self.grammar.nonterminals: - for add_p in self.grammar.productionsForName(item.Next): - f = self.first[item.NextNext] - if 'EPS' in f: - f.discard('EPS') - f.add(item.look_ahead) - for b in f: - i = Item(add_p, 0, b) - if not i in itemset: - itemset.add(i) - worklist.append(i) - return frozenset(itemset) - - def initialItemSet(self): - """ Calculates the initial item set """ - iis = set() - for p in self.grammar.productionsForName(self.grammar.start_symbol): - iis.add(Item(p, 0, 'EOF')) - return self.closure(iis) - - def nextItemSet(self, itemset, symbol): - """ Determines the next itemset for the current set and a symbol """ - nis = set() - for item in itemset: - if item.IsShift and item.Next == symbol: - nis.add(Item(item.production, item.dotpos + 1, item.look_ahead)) - return self.closure(nis) - - def genParser(self): - states = set() - goto_table = {} - action_table = {} - self.first = self.grammar.calcFirstSets() - iis = self.initialItemSet() - states.add(iis) - - # First generate all item sets by using the nextItemset function: - worklist = [iis] - while len(worklist) > 0: - itemset = worklist.pop(0) - for symbol in self.grammar.Symbols: - nis = self.nextItemSet(itemset, symbol) - if nis: - goto_table[(itemset, symbol)] = nis - if not nis in states: - worklist.append(nis) - states.add(nis) - - # Number the states: - print('States:') - for s, i in zip(states, range(len(states))): - print(i, ', '.join(str(x) for x in s)) - p = LRParser() - p.goto_table = goto_table - p.action_table = action_table - p.s0 = iis - return p + return '[{0} -> {1} . {2}, {3}]'.format(*args) class LRParser: + """ LR parser """ + def __init__(self, action_table): + self.action_table = action_table + def parse(self, toks): - stack = ['EOF', self.s0] + stack = [EOF, self.s0] look_ahead = toks.pop(0) while True: state = stack[-1] # top of stack @@ -191,23 +239,22 @@ if not key in self.action_table: raise Exception('Error parsing') action, param = self.action_table[(state, look_ahead)] - if action == 'reduce': + if action == REDUCE: for s in param.symbols: stack.pop() stack.pop() state = stack[-1] stack.append(param.name) stack.append(self.goto_table[(state, param.name)]) - elif action == 'shift': + elif action == SHIFT: stack.append(look_ahead) s, i = stack[-2:] stack.append(self.goto_table[(s, i)]) look_ahead = toks.pop(0) - elif action == 'accept': + elif action == ACCEPT: break -# Test it: -def test(): +def testSimpleGrammar(): # 1. define a simple grammar: g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) g.add_production('input', ['expression']) @@ -221,12 +268,10 @@ # 2. define input: tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] # 3. build parser: - p = LRgenerator(g).genParser() + p = g.genParser() # 4. feed input: p.parse(tokens) -print('Hallo') + if __name__ == '__main__': - print('Hallo') - test() - + testSimpleGrammar() diff -r 4fd075e8259c -r fe2b72381a83 python/testpyy.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/testpyy.py Fri May 24 16:13:23 2013 +0200 @@ -0,0 +1,97 @@ +import unittest, pprint +from pyyacc import Grammar, Item, EOF + + +class testLR(unittest.TestCase): + def setUp(self): + pass + + def testSimpleGrammar(self): + # 1. define a simple grammar: + g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) + g.add_production('input', ['expression']) + g.add_production('expression', ['term']) + g.add_production('expression', ['expression', '+', 'term']) + g.add_production('term', ['factor']) + g.add_production('term', ['term', '*', 'factor']) + g.add_production('factor', ['(', 'expression', ')']) + g.add_production('factor', ['identifier']) + g.start_symbol = 'input' + # 2. define input: + tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] + # 3. build parser: + p = g.genParser() + # 4. feed input: + p.parse(tokens) + +class testPG(unittest.TestCase): + """ Tests several parts of the parser generator """ + def setUp(self): + g = Grammar(['(', ')']) + g.add_production('goal', ['list']) + g.add_production('list', ['list', 'pair']) + g.add_production('list', ['pair']) + g.add_production('pair', ['(', 'pair', ')']) + g.add_production('pair', ['(', ')']) + g.start_symbol = 'goal' + g.first = g.calcFirstSets() + self.g = g + + def testFirstSet(self): + for a in ['(', ')', EOF, 'EPS']: + self.assertEqual(self.g.first[a], {a}) + for nt in ['list', 'pair', 'goal']: + self.assertEqual(self.g.first[nt], {'('}) + + def testInitItemSet(self): + p0, p1, p2, p3, p4 = self.g.productions + s0 = self.g.initialItemSet() + self.assertEqual(len(s0), 9) # 9 with the goal rule included! + self.assertIn(Item(p0, 0, EOF), s0) + self.assertIn(Item(p1, 0, EOF), s0) + self.assertIn(Item(p1, 0, '('), s0) + self.assertIn(Item(p2, 0, EOF), s0) + self.assertIn(Item(p2, 0, '('), s0) + self.assertIn(Item(p3, 0, EOF), s0) + self.assertIn(Item(p3, 0, '('), s0) + self.assertIn(Item(p4, 0, EOF), s0) + self.assertIn(Item(p4, 0, '('), s0) + + def testCanonical(self): + s0 = self.g.initialItemSet() + s, gt = self.g.genCanonicalSet(s0) + pprint.pprint(s) + # Must result in 12 sets: + self.assertEqual(len(s), 12) + + def testClosure(self): + p0, p1, p2, p3, p4 = self.g.productions + s0 = set() + for p in self.g.productionsForName(self.g.start_symbol): + s0.add(Item(p, 0, EOF)) + self.assertEqual(len(s0), 1) # 1 rule + self.assertIn(Item(p0, 0, EOF), s0) + + # Invoke closure on set: + s0 = self.g.closure(s0) + self.assertIn(Item(p0, 0, EOF), s0) + self.assertIn(Item(p1, 0, EOF), s0) + self.assertIn(Item(p1, 0, '('), s0) + self.assertIn(Item(p2, 0, EOF), s0) + self.assertIn(Item(p2, 0, '('), s0) + self.assertIn(Item(p3, 0, EOF), s0) + self.assertIn(Item(p3, 0, '('), s0) + self.assertIn(Item(p4, 0, EOF), s0) + self.assertIn(Item(p4, 0, '('), s0) + + def testParser(self): + tokens = ['(', '(', ')', '(', ')', ')', 'EOF'] + # 3. build parser: + p = self.g.genParser() + # 4. feed input: + p.parse(tokens) + +if __name__ == '__main__': + unittest.main() + +