Mercurial > lcfOS
view python/pyyacc.py @ 178:c694ec551f34
Added lex yacc test scripts
author | Windel Bouwman |
---|---|
date | Sat, 04 May 2013 12:07:17 +0200 |
parents | |
children | 0f3b1adfd416 |
line wrap: on
line source
class GrammarError(Exception): pass class Grammar: def __init__(self, terminals): self.terminals = terminals self.nonterminals = [] self.productions = [] self.states = None self.start_symbol = None def add_production(self, name, symbols): p = Production(name, symbols) self.productions.append(p) if not name in self.nonterminals: self.nonterminals.append(name) def set_start(self, start): self.start_symbol = start def get_start(self): return self.start_symbol StartSymbol = property(get_start, set_start) def productionsForName(self, name): return [p for p in self.productions if p.name == name] @property def Symbols(self): return self.nonterminals + self.terminals def calcFollow(self, name): """ Determines the follow set for a given name """ f = set() # TODO return f def e_closure(self, S): something_changed = True while something_changed: something_changed = False worklist = list(S) for item in worklist: if item.IsShift and item.Next in self.nonterminals: n = item.Next for add_p in self.productionsForName(n): i = Item(add_p, 0) if not i in S: S.add(i) something_changed = True return frozenset(S) def calcInitialItemSet(self): nis = set() for p in self.productionsForName(self.StartSymbol): nis.add(Item(p, 0)) return self.e_closure(nis) def calcNextItemSet(self, itemset, symbol): nis = set() for item in itemset: if item.IsShift and item.Next == symbol: nis.add(Item(item.production, item.dotpos + 1)) return self.e_closure(nis) def calcTables(self): print(self.nonterminals + self.terminals) self.states = set() self.goto_table = {} self.action_table = {} iis = self.calcInitialItemSet() self.s0 = iis print(len(iis), iis) worklist = [iis] self.states.add(iis) while len(worklist) > 0: itemset = worklist.pop(0) has_goto = False for symbol in self.Symbols: nis = self.calcNextItemSet(itemset, symbol) if nis: self.goto_table[(itemset, symbol)] = nis has_goto = True if not nis in self.states: worklist.append(nis) self.states.add(nis) if has_goto: self.action_table[itemset] = 'shift', 's' for item in itemset: assert item.IsShift, item else: # Check for reduce-reduce conflicts: assert len(itemset) == 1 item = list(itemset)[0] assert item.IsReduce self.action_table[itemset] = 'reduce', item.production print(self.Symbols) print(len(nis), nis, self.states) for s, i in zip(self.states, range(len(self.states))): print(i, s) def parse(self, toks): stack = [self.s0] while stack[-1] != 'input': action, p = self.action_table[stack[-1]] print(action, p) if action == 'shift': stack.append(toks.pop(0)) s, i = stack[-2:] stack.append(self.goto_table[(s, i)]) else: for s in p.symbols: stack.pop() stack.pop() stack.append(p.name) s, i = stack[-2:] stack.append(self.goto_table[(s, i)]) class Production: def __init__(self, name, symbols): self.name = name self.symbols = symbols def __repr__(self): return '{0} -> {1}'.format(self.name, self.symbols) class Item: def __init__(self, production, dotpos): self.production = production self.dotpos = dotpos def __eq__(self, other): if type(other) is type(self): return (self.production, self.dotpos) == (other.production, other.dotpos) return False def __hash__(self): return (self.production, self.dotpos).__hash__() @property def IsReduce(self): return self.dotpos == len(self.production.symbols) @property def IsShift(self): return not self.IsReduce @property def Next(self): return self.production.symbols[self.dotpos] def matches(self, symbol): return self.Next == symbol def __repr__(self): return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) # Test it: # 1. define a simple grammar: g = Grammar(['EOF', 'identifier', '(', ')', '+']) g.add_production('input', ['expression', 'EOF']) g.add_production('expression', ['term']) g.add_production('expression', ['expression', '+', 'term']) g.add_production('term', ['identifier']) g.add_production('term', ['(', 'expression', ')']) g.StartSymbol = 'input' # 2. define input: tokens = ['identifier', '+', 'identifier', 'EOF'] # 3. build tables g.calcTables() # 4. feed input g.parse(tokens)