Mercurial > lcfOS
diff python/pyyacc.py @ 179:0f3b1adfd416
LR(0) parser
author | Windel Bouwman |
---|---|
date | Sat, 04 May 2013 18:50:36 +0200 |
parents | c694ec551f34 |
children | 25a0753da4cf |
line wrap: on
line diff
--- a/python/pyyacc.py Sat May 04 12:07:17 2013 +0200 +++ b/python/pyyacc.py Sat May 04 18:50:36 2013 +0200 @@ -1,112 +1,18 @@ - -class GrammarError(Exception): - pass - class Grammar: def __init__(self, terminals): self.terminals = terminals self.nonterminals = [] self.productions = [] - self.states = None - self.start_symbol = None def add_production(self, name, symbols): p = Production(name, symbols) self.productions.append(p) if not name in self.nonterminals: self.nonterminals.append(name) - def set_start(self, start): - self.start_symbol = start - def get_start(self): - return self.start_symbol - StartSymbol = property(get_start, set_start) def productionsForName(self, name): return [p for p in self.productions if p.name == name] @property def Symbols(self): return self.nonterminals + self.terminals - def calcFollow(self, name): - """ Determines the follow set for a given name """ - f = set() - # TODO - return f - - def e_closure(self, S): - something_changed = True - while something_changed: - something_changed = False - worklist = list(S) - for item in worklist: - if item.IsShift and item.Next in self.nonterminals: - n = item.Next - for add_p in self.productionsForName(n): - i = Item(add_p, 0) - if not i in S: - S.add(i) - something_changed = True - return frozenset(S) - def calcInitialItemSet(self): - nis = set() - for p in self.productionsForName(self.StartSymbol): - nis.add(Item(p, 0)) - return self.e_closure(nis) - def calcNextItemSet(self, itemset, symbol): - nis = set() - for item in itemset: - if item.IsShift and item.Next == symbol: - nis.add(Item(item.production, item.dotpos + 1)) - return self.e_closure(nis) - def calcTables(self): - print(self.nonterminals + self.terminals) - self.states = set() - self.goto_table = {} - self.action_table = {} - iis = self.calcInitialItemSet() - self.s0 = iis - print(len(iis), iis) - worklist = [iis] - self.states.add(iis) - while len(worklist) > 0: - itemset = worklist.pop(0) - has_goto = False - for symbol in self.Symbols: - nis = self.calcNextItemSet(itemset, symbol) - if nis: - self.goto_table[(itemset, symbol)] = nis - has_goto = True - if not nis in self.states: - worklist.append(nis) - self.states.add(nis) - if has_goto: - self.action_table[itemset] = 'shift', 's' - for item in itemset: - assert item.IsShift, item - else: - # Check for reduce-reduce conflicts: - assert len(itemset) == 1 - item = list(itemset)[0] - assert item.IsReduce - self.action_table[itemset] = 'reduce', item.production - print(self.Symbols) - print(len(nis), nis, self.states) - for s, i in zip(self.states, range(len(self.states))): - print(i, s) - - def parse(self, toks): - stack = [self.s0] - while stack[-1] != 'input': - action, p = self.action_table[stack[-1]] - print(action, p) - if action == 'shift': - stack.append(toks.pop(0)) - s, i = stack[-2:] - stack.append(self.goto_table[(s, i)]) - else: - for s in p.symbols: - stack.pop() - stack.pop() - stack.append(p.name) - s, i = stack[-2:] - stack.append(self.goto_table[(s, i)]) class Production: def __init__(self, name, symbols): @@ -114,7 +20,7 @@ self.symbols = symbols def __repr__(self): return '{0} -> {1}'.format(self.name, self.symbols) - + class Item: def __init__(self, production, dotpos): self.production = production @@ -137,24 +43,108 @@ def matches(self, symbol): return self.Next == symbol def __repr__(self): - return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) + return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) + +class LRgenerator: + def __init__(self, g): + self.grammar = g + def e_closure(self, S): + """ Expand itemset by using epsilon moves """ + something_changed = True + while something_changed: + something_changed = False + worklist = list(S) + for item in worklist: + if item.IsShift and item.Next in self.grammar.nonterminals: + n = item.Next + for add_p in self.grammar.productionsForName(n): + i = Item(add_p, 0) + if not i in S: + S.add(i) + something_changed = True + return frozenset(S) + def initialItemSet(self): + nis = set() + for p in self.grammar.productionsForName(self.grammar.start_symbol): + nis.add(Item(p, 0)) + return self.e_closure(nis) + def nextItemSet(self, itemset, symbol): + nis = set() + for item in itemset: + if item.IsShift and item.Next == symbol: + nis.add(Item(item.production, item.dotpos + 1)) + return self.e_closure(nis) + def genParser(self): + states = set() + goto_table = {} + action_table = {} + iis = self.initialItemSet() + worklist = [iis] + states.add(iis) + while len(worklist) > 0: + itemset = worklist.pop(0) + has_goto = False + for symbol in self.grammar.Symbols: + nis = self.nextItemSet(itemset, symbol) + if nis: + goto_table[(itemset, symbol)] = nis + has_goto = True + if not nis in states: + worklist.append(nis) + states.add(nis) + if has_goto: + action_table[itemset] = 'shift', 's' + for item in itemset: + assert item.IsShift, item + else: + # Check for reduce-reduce conflicts: + assert len(itemset) == 1 + item = list(itemset)[0] + assert item.IsReduce + action_table[itemset] = 'reduce', item.production + for s, i in zip(states, range(len(states))): + print(i, ' || '.join(str(x) for x in s)) + p = LRParser() + p.goto_table = goto_table + p.action_table = action_table + p.s0 = iis + return p + +class LRParser: + def parse(self, toks): + stack = [self.s0] + while stack[-1] != 'input': + action, p = self.action_table[stack[-1]] + print(action, p) + if action == 'shift': + stack.append(toks.pop(0)) + s, i = stack[-2:] + stack.append(self.goto_table[(s, i)]) + elif action == 'reduce': + for s in p.symbols: + stack.pop() + stack.pop() + stack.append(p.name) + s, i = stack[-2:] + if stack[-1] == 'input': + break + stack.append(self.goto_table[(s, i)]) # Test it: # 1. define a simple grammar: -g = Grammar(['EOF', 'identifier', '(', ')', '+']) +g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) g.add_production('input', ['expression', 'EOF']) g.add_production('expression', ['term']) g.add_production('expression', ['expression', '+', 'term']) -g.add_production('term', ['identifier']) +g.add_production('term', ['factor']) +#g.add_production('term', ['term', '*', 'factor']) g.add_production('term', ['(', 'expression', ')']) -g.StartSymbol = 'input' - +g.add_production('factor', ['identifier']) +g.start_symbol = 'input' # 2. define input: -tokens = ['identifier', '+', 'identifier', 'EOF'] +tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] +# 3. build parser: +p = LRgenerator(g).genParser() +# 4. feed input: +p.parse(tokens) -# 3. build tables -g.calcTables() - -# 4. feed input -g.parse(tokens) -