lcfOS: python/pyyacc.py comparison

comparison python/pyyacc.py @ 185:51a6440d6398

Fixed LR(1) parser

author	Windel Bouwman
date	Fri, 24 May 2013 20:45:03 +0200
parents	fe2b72381a83
children	46d62dadd61b

comparison

equal deleted inserted replaced

-:fe2b72381a83
+:51a6440d6398
 EPS = 'EPS'
 EOF = 'EOF'
 SHIFT = 1
 REDUCE = 2
 ACCEPT = 3
+class ParserGenerationException(Exception):
+pass
+class ParserException(Exception):
+pass
 class Grammar:
 """ Defines a grammar of a language """
 def __init__(self, terminals):
 self.terminals = terminals
 self.nonterminals = []
 self.productions = []
+self._first = None  # Cached first set
 def add_production(self, name, symbols):
 """ Add a production rule to the grammar """
 production = Production(name, symbols)
 self.productions.append(production)
 @property
 def Symbols(self):
 """ Get all the symbols defined by this grammar """
 return self.nonterminals + self.terminals
+@property
+def first(self):
+if not self._first:
+self._first = self.calcFirstSets()
+return self._first
 def calcFirstSets(self):
 """
 Calculate first sets for each grammar symbol
 This is a dictionary which maps each grammar symbol
 first = {}
 for t in self.terminals + [EOF, EPS]:
 first[t] = set([t])
 for nt in self.nonterminals:
 first[nt] = set()
-epsset = set([EPS])
+epsset = {EPS}
 while True:
 some_change = False
 for p in self.productions:
 rhs = set()
 for beta in p.symbols:
 f = set(self.first[item.NextNext])
 if EPS in f:
 f.discard(EPS)
 f.add(item.look_ahead)
 return f
+# Start of algorithm:
 while worklist:
 item = worklist.pop(0)
 if not item.IsShift:
 continue
 if not (item.Next in self.nonterminals):
 if item.can_shift_over(symbol):
 next_set.add(item.shifted())
 return self.closure(next_set)
 def genCanonicalSet(self, iis):
-states = set()
+states = []
 worklist = []
-goto_table = {}
+transitions = {}
 def addSt(s):
 if not (s in states):
 worklist.append(s)
-states.add(s)
+states.append(s)
 addSt(iis)
 while len(worklist) > 0:
 itemset = worklist.pop(0)
 for symbol in self.Symbols:
 nis = self.nextItemSet(itemset, symbol)
 if not nis:
 continue
-goto_table[(itemset, symbol)] = nis
 addSt(nis)
-return states, goto_table
+transitions[(states.index(itemset), symbol)] = states.index(nis)
+return states, transitions
 def genParser(self):
 """ Generates a parser from the grammar """
 action_table = {}
-self.first = self.calcFirstSets()
+goto_table = {}
 iis = self.initialItemSet()
 # First generate all item sets by using the nextItemset function:
-states, goto_table = self.genCanonicalSet(iis)
+states, transitions = self.genCanonicalSet(iis)
-# Number the states:
+def setAction(state, t, action):
-number_states = {}
+key = (state, t)
-for s, i in zip(states, range(len(states))):
+if key in action_table:
-number_states[s] = i
+action2 = action_table[key]
+if action != action2:
+raise ParserGenerationException('LR construction conflict')
+else:
+action_table[key] = action
 # Fill action table:
 for state in states:
+# Detect conflicts:
 for item in state:
 if item.IsShift and item.Next in self.terminals:
-action_table[(state, item.Next)] = (SHIFT, 0)
+# Rule 1, a shift item:
-elif item.IsReduce:
+nextstate = transitions[(states.index(state), item.Next)]
-if item.look_ahead == EOF:
+setAction(states.index(state), item.Next, (SHIFT, nextstate))
-action_table[(state, item.look_ahead)] = (ACCEPT, 0)
+if item.IsReduce:
+if item.production.name == self.start_symbol and item.look_ahead == EOF:
+# Rule 3: accept:
+setAction(states.index(state), item.look_ahead, (ACCEPT, None))
 else:
-action_table[(state, item.look_ahead)] = (REDUCE, item.production)
+# Rule 2, reduce item:
-else:
+setAction(states.index(state), item.look_ahead, (REDUCE, item.production))
-pass
+for nt in self.nonterminals:
+key = (states.index(state), nt)
-p = LRParser(action_table)
+if key in transitions:
-p.goto_table = goto_table
+goto_table[key] = transitions[key]
-p.s0 = iis
-return p
+return LRParser(action_table, goto_table)
 class Production:
 """ Production rule for a grammar """
 def __init__(self, name, symbols):
 self.name = name
 prod = self.production
 predot = ' '.join(prod.symbols[0:self.dotpos])
 postdot = ' '.join(prod.symbols[self.dotpos:])
 nt = prod.name
 args = (nt, predot, postdot, self.look_ahead)
-return '[{0} -> {1} . {2}, {3}]'.format(*args)
+return '[{0} -> {1} . {2} -> {3}]'.format(*args)
 class LRParser:
 """ LR parser """
-def __init__(self, action_table):
+def __init__(self, action_table, goto_table):
 self.action_table = action_table
+self.goto_table = goto_table
 def parse(self, toks):
-stack = [EOF, self.s0]
+stack = [0]
 look_ahead = toks.pop(0)
 while True:
 state = stack[-1]   # top of stack
 key = (state, look_ahead)
 if not key in self.action_table:
+print(key)
 raise Exception('Error parsing')
 action, param = self.action_table[(state, look_ahead)]
 if action == REDUCE:
 for s in param.symbols:
 stack.pop()
 state = stack[-1]
 stack.append(param.name)
 stack.append(self.goto_table[(state, param.name)])
 elif action == SHIFT:
 stack.append(look_ahead)
-s, i = stack[-2:]
+stack.append(param)
-stack.append(self.goto_table[(s, i)])
 look_ahead = toks.pop(0)
 elif action == ACCEPT:
 break
 def testSimpleGrammar():

Mercurial > lcfOS

comparison python/pyyacc.py @ 185:51a6440d6398