Mercurial > lcfOS
view python/pyyacc.py @ 181:216da5e46efc
Changed indent to 4 spaces to comply to convention
author | Windel Bouwman |
---|---|
date | Sat, 18 May 2013 17:45:26 +0200 |
parents | 25a0753da4cf |
children | fe2b72381a83 |
line wrap: on
line source
""" Parser generator script """ EPS = 'EPS' class Grammar: """ Defines a grammar of a language """ def __init__(self, terminals): self.terminals = terminals self.nonterminals = [] self.productions = [] def add_production(self, name, symbols): """ Add a production rule to the grammar """ production = Production(name, symbols) self.productions.append(production) assert not name in self.terminals, "Cannot redefine terminal" if not name in self.nonterminals: self.nonterminals.append(name) def productionsForName(self, name): """ Retrieve all productions for a non terminal """ return [p for p in self.productions if p.name == name] @property def Symbols(self): """ Get all the symbols defined by this grammar """ return self.nonterminals + self.terminals def calcFirstSets(self): """ Calculate first sets for each grammar symbol """ first = {} for t in self.terminals + ['EOF', EPS]: first[t] = set([t]) for nt in self.nonterminals: first[nt] = set() epsset = set([EPS]) while True: some_change = False for p in self.productions: rhs = set() for beta in p.symbols: rhs = rhs | (first[beta] - epsset) if not EPS in first[beta]: break else: if EPS in first[beta]: rhs.add(EPS) if rhs - first[p.name]: first[p.name] |= rhs some_change = True if not some_change: break return first class Production: """ Production rule for a grammar """ def __init__(self, name, symbols): self.name = name self.symbols = symbols def __repr__(self): return '{0} -> {1}'.format(self.name, self.symbols) class Item: def __init__(self, production, dotpos, look_ahead): self.production = production self.dotpos = dotpos self.look_ahead = look_ahead def getdata(self): """ Gets the members as a tuple """ return (self.production, self.dotpos, self.look_ahead) def __eq__(self, other): if type(other) is type(self): return self.getdata() == other.getdata() return False def __hash__(self): return self.getdata().__hash__() @property def IsReduce(self): return self.dotpos == len(self.production.symbols) @property def IsShift(self): return not self.IsReduce @property def Next(self): return self.production.symbols[self.dotpos] @property def NextNext(self): if self.dotpos + 1 >= len(self.production.symbols): return 'EPS' else: return self.production.symbols[self.dotpos + 1] def __repr__(self): prod = self.production predot = ' '.join(prod.symbols[0:self.dotpos]) postdot = ' '.join(prod.symbols[self.dotpos:]) nt = prod.name args = (nt, predot, postdot, self.look_ahead) return '[{0} -> {1} _dot_ {2},{3}]'.format(*args) class LRgenerator: """ Parser generator """ def __init__(self, g): self.grammar = g def closure(self, itemset): """ Expand itemset by using epsilon moves """ worklist = list(itemset) while worklist: item = worklist.pop(0) if item.IsShift and item.Next in self.grammar.nonterminals: for add_p in self.grammar.productionsForName(item.Next): f = self.first[item.NextNext] if 'EPS' in f: f.discard('EPS') f.add(item.look_ahead) for b in f: i = Item(add_p, 0, b) if not i in itemset: itemset.add(i) worklist.append(i) return frozenset(itemset) def initialItemSet(self): """ Calculates the initial item set """ iis = set() for p in self.grammar.productionsForName(self.grammar.start_symbol): iis.add(Item(p, 0, 'EOF')) return self.closure(iis) def nextItemSet(self, itemset, symbol): """ Determines the next itemset for the current set and a symbol """ nis = set() for item in itemset: if item.IsShift and item.Next == symbol: nis.add(Item(item.production, item.dotpos + 1, item.look_ahead)) return self.closure(nis) def genParser(self): states = set() goto_table = {} action_table = {} self.first = self.grammar.calcFirstSets() iis = self.initialItemSet() states.add(iis) # First generate all item sets by using the nextItemset function: worklist = [iis] while len(worklist) > 0: itemset = worklist.pop(0) for symbol in self.grammar.Symbols: nis = self.nextItemSet(itemset, symbol) if nis: goto_table[(itemset, symbol)] = nis if not nis in states: worklist.append(nis) states.add(nis) # Number the states: print('States:') for s, i in zip(states, range(len(states))): print(i, ', '.join(str(x) for x in s)) p = LRParser() p.goto_table = goto_table p.action_table = action_table p.s0 = iis return p class LRParser: def parse(self, toks): stack = ['EOF', self.s0] look_ahead = toks.pop(0) while True: state = stack[-1] # top of stack key = (state, look_ahead) if not key in self.action_table: raise Exception('Error parsing') action, param = self.action_table[(state, look_ahead)] if action == 'reduce': for s in param.symbols: stack.pop() stack.pop() state = stack[-1] stack.append(param.name) stack.append(self.goto_table[(state, param.name)]) elif action == 'shift': stack.append(look_ahead) s, i = stack[-2:] stack.append(self.goto_table[(s, i)]) look_ahead = toks.pop(0) elif action == 'accept': break # Test it: def test(): # 1. define a simple grammar: g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) g.add_production('input', ['expression']) g.add_production('expression', ['term']) g.add_production('expression', ['expression', '+', 'term']) g.add_production('term', ['factor']) g.add_production('term', ['term', '*', 'factor']) g.add_production('factor', ['(', 'expression', ')']) g.add_production('factor', ['identifier']) g.start_symbol = 'input' # 2. define input: tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] # 3. build parser: p = LRgenerator(g).genParser() # 4. feed input: p.parse(tokens) print('Hallo') if __name__ == '__main__': print('Hallo') test()