Mercurial > lcfOS
diff python/pyyacc.py @ 185:51a6440d6398
Fixed LR(1) parser
author | Windel Bouwman |
---|---|
date | Fri, 24 May 2013 20:45:03 +0200 |
parents | fe2b72381a83 |
children | 46d62dadd61b |
line wrap: on
line diff
--- a/python/pyyacc.py Fri May 24 16:13:23 2013 +0200 +++ b/python/pyyacc.py Fri May 24 20:45:03 2013 +0200 @@ -8,6 +8,13 @@ REDUCE = 2 ACCEPT = 3 +class ParserGenerationException(Exception): + pass + + +class ParserException(Exception): + pass + class Grammar: """ Defines a grammar of a language """ @@ -15,6 +22,7 @@ self.terminals = terminals self.nonterminals = [] self.productions = [] + self._first = None # Cached first set def add_production(self, name, symbols): """ Add a production rule to the grammar """ @@ -33,6 +41,12 @@ """ Get all the symbols defined by this grammar """ return self.nonterminals + self.terminals + @property + def first(self): + if not self._first: + self._first = self.calcFirstSets() + return self._first + def calcFirstSets(self): """ Calculate first sets for each grammar symbol @@ -45,7 +59,7 @@ first[t] = set([t]) for nt in self.nonterminals: first[nt] = set() - epsset = set([EPS]) + epsset = {EPS} while True: some_change = False for p in self.productions: @@ -78,7 +92,7 @@ f.discard(EPS) f.add(item.look_ahead) return f - + # Start of algorithm: while worklist: item = worklist.pop(0) if not item.IsShift: @@ -110,13 +124,13 @@ return self.closure(next_set) def genCanonicalSet(self, iis): - states = set() + states = [] worklist = [] - goto_table = {} + transitions = {} def addSt(s): if not (s in states): worklist.append(s) - states.add(s) + states.append(s) addSt(iis) while len(worklist) > 0: itemset = worklist.pop(0) @@ -124,41 +138,50 @@ nis = self.nextItemSet(itemset, symbol) if not nis: continue - goto_table[(itemset, symbol)] = nis addSt(nis) - return states, goto_table + transitions[(states.index(itemset), symbol)] = states.index(nis) + return states, transitions def genParser(self): """ Generates a parser from the grammar """ action_table = {} - self.first = self.calcFirstSets() + goto_table = {} iis = self.initialItemSet() # First generate all item sets by using the nextItemset function: - states, goto_table = self.genCanonicalSet(iis) - - # Number the states: - number_states = {} - for s, i in zip(states, range(len(states))): - number_states[s] = i + states, transitions = self.genCanonicalSet(iis) + + def setAction(state, t, action): + key = (state, t) + if key in action_table: + action2 = action_table[key] + if action != action2: + raise ParserGenerationException('LR construction conflict') + else: + action_table[key] = action # Fill action table: for state in states: + # Detect conflicts: for item in state: if item.IsShift and item.Next in self.terminals: - action_table[(state, item.Next)] = (SHIFT, 0) - elif item.IsReduce: - if item.look_ahead == EOF: - action_table[(state, item.look_ahead)] = (ACCEPT, 0) + # Rule 1, a shift item: + nextstate = transitions[(states.index(state), item.Next)] + setAction(states.index(state), item.Next, (SHIFT, nextstate)) + if item.IsReduce: + if item.production.name == self.start_symbol and item.look_ahead == EOF: + # Rule 3: accept: + setAction(states.index(state), item.look_ahead, (ACCEPT, None)) else: - action_table[(state, item.look_ahead)] = (REDUCE, item.production) - else: - pass + # Rule 2, reduce item: + setAction(states.index(state), item.look_ahead, (REDUCE, item.production)) + for nt in self.nonterminals: + key = (states.index(state), nt) + if key in transitions: + goto_table[key] = transitions[key] - p = LRParser(action_table) - p.goto_table = goto_table - p.s0 = iis - return p + return LRParser(action_table, goto_table) + class Production: """ Production rule for a grammar """ @@ -222,21 +245,23 @@ postdot = ' '.join(prod.symbols[self.dotpos:]) nt = prod.name args = (nt, predot, postdot, self.look_ahead) - return '[{0} -> {1} . {2}, {3}]'.format(*args) + return '[{0} -> {1} . {2} -> {3}]'.format(*args) class LRParser: """ LR parser """ - def __init__(self, action_table): + def __init__(self, action_table, goto_table): self.action_table = action_table + self.goto_table = goto_table def parse(self, toks): - stack = [EOF, self.s0] + stack = [0] look_ahead = toks.pop(0) while True: state = stack[-1] # top of stack key = (state, look_ahead) if not key in self.action_table: + print(key) raise Exception('Error parsing') action, param = self.action_table[(state, look_ahead)] if action == REDUCE: @@ -248,8 +273,7 @@ stack.append(self.goto_table[(state, param.name)]) elif action == SHIFT: stack.append(look_ahead) - s, i = stack[-2:] - stack.append(self.goto_table[(s, i)]) + stack.append(param) look_ahead = toks.pop(0) elif action == ACCEPT: break