Mercurial > lcfOS
diff python/pyyacc.py @ 178:c694ec551f34
Added lex yacc test scripts
author | Windel Bouwman |
---|---|
date | Sat, 04 May 2013 12:07:17 +0200 |
parents | |
children | 0f3b1adfd416 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/pyyacc.py Sat May 04 12:07:17 2013 +0200 @@ -0,0 +1,160 @@ + +class GrammarError(Exception): + pass + +class Grammar: + def __init__(self, terminals): + self.terminals = terminals + self.nonterminals = [] + self.productions = [] + self.states = None + self.start_symbol = None + def add_production(self, name, symbols): + p = Production(name, symbols) + self.productions.append(p) + if not name in self.nonterminals: + self.nonterminals.append(name) + def set_start(self, start): + self.start_symbol = start + def get_start(self): + return self.start_symbol + StartSymbol = property(get_start, set_start) + def productionsForName(self, name): + return [p for p in self.productions if p.name == name] + @property + def Symbols(self): + return self.nonterminals + self.terminals + def calcFollow(self, name): + """ Determines the follow set for a given name """ + f = set() + # TODO + return f + + def e_closure(self, S): + something_changed = True + while something_changed: + something_changed = False + worklist = list(S) + for item in worklist: + if item.IsShift and item.Next in self.nonterminals: + n = item.Next + for add_p in self.productionsForName(n): + i = Item(add_p, 0) + if not i in S: + S.add(i) + something_changed = True + return frozenset(S) + def calcInitialItemSet(self): + nis = set() + for p in self.productionsForName(self.StartSymbol): + nis.add(Item(p, 0)) + return self.e_closure(nis) + def calcNextItemSet(self, itemset, symbol): + nis = set() + for item in itemset: + if item.IsShift and item.Next == symbol: + nis.add(Item(item.production, item.dotpos + 1)) + return self.e_closure(nis) + def calcTables(self): + print(self.nonterminals + self.terminals) + self.states = set() + self.goto_table = {} + self.action_table = {} + iis = self.calcInitialItemSet() + self.s0 = iis + print(len(iis), iis) + worklist = [iis] + self.states.add(iis) + while len(worklist) > 0: + itemset = worklist.pop(0) + has_goto = False + for symbol in self.Symbols: + nis = self.calcNextItemSet(itemset, symbol) + if nis: + self.goto_table[(itemset, symbol)] = nis + has_goto = True + if not nis in self.states: + worklist.append(nis) + self.states.add(nis) + if has_goto: + self.action_table[itemset] = 'shift', 's' + for item in itemset: + assert item.IsShift, item + else: + # Check for reduce-reduce conflicts: + assert len(itemset) == 1 + item = list(itemset)[0] + assert item.IsReduce + self.action_table[itemset] = 'reduce', item.production + print(self.Symbols) + print(len(nis), nis, self.states) + for s, i in zip(self.states, range(len(self.states))): + print(i, s) + + def parse(self, toks): + stack = [self.s0] + while stack[-1] != 'input': + action, p = self.action_table[stack[-1]] + print(action, p) + if action == 'shift': + stack.append(toks.pop(0)) + s, i = stack[-2:] + stack.append(self.goto_table[(s, i)]) + else: + for s in p.symbols: + stack.pop() + stack.pop() + stack.append(p.name) + s, i = stack[-2:] + stack.append(self.goto_table[(s, i)]) + +class Production: + def __init__(self, name, symbols): + self.name = name + self.symbols = symbols + def __repr__(self): + return '{0} -> {1}'.format(self.name, self.symbols) + +class Item: + def __init__(self, production, dotpos): + self.production = production + self.dotpos = dotpos + def __eq__(self, other): + if type(other) is type(self): + return (self.production, self.dotpos) == (other.production, other.dotpos) + return False + def __hash__(self): + return (self.production, self.dotpos).__hash__() + @property + def IsReduce(self): + return self.dotpos == len(self.production.symbols) + @property + def IsShift(self): + return not self.IsReduce + @property + def Next(self): + return self.production.symbols[self.dotpos] + def matches(self, symbol): + return self.Next == symbol + def __repr__(self): + return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) + +# Test it: +# 1. define a simple grammar: +g = Grammar(['EOF', 'identifier', '(', ')', '+']) +g.add_production('input', ['expression', 'EOF']) +g.add_production('expression', ['term']) +g.add_production('expression', ['expression', '+', 'term']) +g.add_production('term', ['identifier']) +g.add_production('term', ['(', 'expression', ')']) +g.StartSymbol = 'input' + +# 2. define input: +tokens = ['identifier', '+', 'identifier', 'EOF'] + +# 3. build tables +g.calcTables() + +# 4. feed input +g.parse(tokens) +