Mercurial > lcfOS
view python/pyyacc.py @ 179:0f3b1adfd416
LR(0) parser
author | Windel Bouwman |
---|---|
date | Sat, 04 May 2013 18:50:36 +0200 |
parents | c694ec551f34 |
children | 25a0753da4cf |
line wrap: on
line source
class Grammar: def __init__(self, terminals): self.terminals = terminals self.nonterminals = [] self.productions = [] def add_production(self, name, symbols): p = Production(name, symbols) self.productions.append(p) if not name in self.nonterminals: self.nonterminals.append(name) def productionsForName(self, name): return [p for p in self.productions if p.name == name] @property def Symbols(self): return self.nonterminals + self.terminals class Production: def __init__(self, name, symbols): self.name = name self.symbols = symbols def __repr__(self): return '{0} -> {1}'.format(self.name, self.symbols) class Item: def __init__(self, production, dotpos): self.production = production self.dotpos = dotpos def __eq__(self, other): if type(other) is type(self): return (self.production, self.dotpos) == (other.production, other.dotpos) return False def __hash__(self): return (self.production, self.dotpos).__hash__() @property def IsReduce(self): return self.dotpos == len(self.production.symbols) @property def IsShift(self): return not self.IsReduce @property def Next(self): return self.production.symbols[self.dotpos] def matches(self, symbol): return self.Next == symbol def __repr__(self): return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) class LRgenerator: def __init__(self, g): self.grammar = g def e_closure(self, S): """ Expand itemset by using epsilon moves """ something_changed = True while something_changed: something_changed = False worklist = list(S) for item in worklist: if item.IsShift and item.Next in self.grammar.nonterminals: n = item.Next for add_p in self.grammar.productionsForName(n): i = Item(add_p, 0) if not i in S: S.add(i) something_changed = True return frozenset(S) def initialItemSet(self): nis = set() for p in self.grammar.productionsForName(self.grammar.start_symbol): nis.add(Item(p, 0)) return self.e_closure(nis) def nextItemSet(self, itemset, symbol): nis = set() for item in itemset: if item.IsShift and item.Next == symbol: nis.add(Item(item.production, item.dotpos + 1)) return self.e_closure(nis) def genParser(self): states = set() goto_table = {} action_table = {} iis = self.initialItemSet() worklist = [iis] states.add(iis) while len(worklist) > 0: itemset = worklist.pop(0) has_goto = False for symbol in self.grammar.Symbols: nis = self.nextItemSet(itemset, symbol) if nis: goto_table[(itemset, symbol)] = nis has_goto = True if not nis in states: worklist.append(nis) states.add(nis) if has_goto: action_table[itemset] = 'shift', 's' for item in itemset: assert item.IsShift, item else: # Check for reduce-reduce conflicts: assert len(itemset) == 1 item = list(itemset)[0] assert item.IsReduce action_table[itemset] = 'reduce', item.production for s, i in zip(states, range(len(states))): print(i, ' || '.join(str(x) for x in s)) p = LRParser() p.goto_table = goto_table p.action_table = action_table p.s0 = iis return p class LRParser: def parse(self, toks): stack = [self.s0] while stack[-1] != 'input': action, p = self.action_table[stack[-1]] print(action, p) if action == 'shift': stack.append(toks.pop(0)) s, i = stack[-2:] stack.append(self.goto_table[(s, i)]) elif action == 'reduce': for s in p.symbols: stack.pop() stack.pop() stack.append(p.name) s, i = stack[-2:] if stack[-1] == 'input': break stack.append(self.goto_table[(s, i)]) # Test it: # 1. define a simple grammar: g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) g.add_production('input', ['expression', 'EOF']) g.add_production('expression', ['term']) g.add_production('expression', ['expression', '+', 'term']) g.add_production('term', ['factor']) #g.add_production('term', ['term', '*', 'factor']) g.add_production('term', ['(', 'expression', ')']) g.add_production('factor', ['identifier']) g.start_symbol = 'input' # 2. define input: tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] # 3. build parser: p = LRgenerator(g).genParser() # 4. feed input: p.parse(tokens)