comparison python/pyyacc.py @ 185:51a6440d6398

Fixed LR(1) parser
author Windel Bouwman
date Fri, 24 May 2013 20:45:03 +0200
parents fe2b72381a83
children 46d62dadd61b
comparison
equal deleted inserted replaced
184:fe2b72381a83 185:51a6440d6398
5 EPS = 'EPS' 5 EPS = 'EPS'
6 EOF = 'EOF' 6 EOF = 'EOF'
7 SHIFT = 1 7 SHIFT = 1
8 REDUCE = 2 8 REDUCE = 2
9 ACCEPT = 3 9 ACCEPT = 3
10
11 class ParserGenerationException(Exception):
12 pass
13
14
15 class ParserException(Exception):
16 pass
10 17
11 18
12 class Grammar: 19 class Grammar:
13 """ Defines a grammar of a language """ 20 """ Defines a grammar of a language """
14 def __init__(self, terminals): 21 def __init__(self, terminals):
15 self.terminals = terminals 22 self.terminals = terminals
16 self.nonterminals = [] 23 self.nonterminals = []
17 self.productions = [] 24 self.productions = []
25 self._first = None # Cached first set
18 26
19 def add_production(self, name, symbols): 27 def add_production(self, name, symbols):
20 """ Add a production rule to the grammar """ 28 """ Add a production rule to the grammar """
21 production = Production(name, symbols) 29 production = Production(name, symbols)
22 self.productions.append(production) 30 self.productions.append(production)
30 38
31 @property 39 @property
32 def Symbols(self): 40 def Symbols(self):
33 """ Get all the symbols defined by this grammar """ 41 """ Get all the symbols defined by this grammar """
34 return self.nonterminals + self.terminals 42 return self.nonterminals + self.terminals
43
44 @property
45 def first(self):
46 if not self._first:
47 self._first = self.calcFirstSets()
48 return self._first
35 49
36 def calcFirstSets(self): 50 def calcFirstSets(self):
37 """ 51 """
38 Calculate first sets for each grammar symbol 52 Calculate first sets for each grammar symbol
39 This is a dictionary which maps each grammar symbol 53 This is a dictionary which maps each grammar symbol
43 first = {} 57 first = {}
44 for t in self.terminals + [EOF, EPS]: 58 for t in self.terminals + [EOF, EPS]:
45 first[t] = set([t]) 59 first[t] = set([t])
46 for nt in self.nonterminals: 60 for nt in self.nonterminals:
47 first[nt] = set() 61 first[nt] = set()
48 epsset = set([EPS]) 62 epsset = {EPS}
49 while True: 63 while True:
50 some_change = False 64 some_change = False
51 for p in self.productions: 65 for p in self.productions:
52 rhs = set() 66 rhs = set()
53 for beta in p.symbols: 67 for beta in p.symbols:
76 f = set(self.first[item.NextNext]) 90 f = set(self.first[item.NextNext])
77 if EPS in f: 91 if EPS in f:
78 f.discard(EPS) 92 f.discard(EPS)
79 f.add(item.look_ahead) 93 f.add(item.look_ahead)
80 return f 94 return f
81 95 # Start of algorithm:
82 while worklist: 96 while worklist:
83 item = worklist.pop(0) 97 item = worklist.pop(0)
84 if not item.IsShift: 98 if not item.IsShift:
85 continue 99 continue
86 if not (item.Next in self.nonterminals): 100 if not (item.Next in self.nonterminals):
108 if item.can_shift_over(symbol): 122 if item.can_shift_over(symbol):
109 next_set.add(item.shifted()) 123 next_set.add(item.shifted())
110 return self.closure(next_set) 124 return self.closure(next_set)
111 125
112 def genCanonicalSet(self, iis): 126 def genCanonicalSet(self, iis):
113 states = set() 127 states = []
114 worklist = [] 128 worklist = []
115 goto_table = {} 129 transitions = {}
116 def addSt(s): 130 def addSt(s):
117 if not (s in states): 131 if not (s in states):
118 worklist.append(s) 132 worklist.append(s)
119 states.add(s) 133 states.append(s)
120 addSt(iis) 134 addSt(iis)
121 while len(worklist) > 0: 135 while len(worklist) > 0:
122 itemset = worklist.pop(0) 136 itemset = worklist.pop(0)
123 for symbol in self.Symbols: 137 for symbol in self.Symbols:
124 nis = self.nextItemSet(itemset, symbol) 138 nis = self.nextItemSet(itemset, symbol)
125 if not nis: 139 if not nis:
126 continue 140 continue
127 goto_table[(itemset, symbol)] = nis
128 addSt(nis) 141 addSt(nis)
129 return states, goto_table 142 transitions[(states.index(itemset), symbol)] = states.index(nis)
143 return states, transitions
130 144
131 def genParser(self): 145 def genParser(self):
132 """ Generates a parser from the grammar """ 146 """ Generates a parser from the grammar """
133 action_table = {} 147 action_table = {}
134 self.first = self.calcFirstSets() 148 goto_table = {}
135 iis = self.initialItemSet() 149 iis = self.initialItemSet()
136 150
137 # First generate all item sets by using the nextItemset function: 151 # First generate all item sets by using the nextItemset function:
138 states, goto_table = self.genCanonicalSet(iis) 152 states, transitions = self.genCanonicalSet(iis)
139 153
140 # Number the states: 154 def setAction(state, t, action):
141 number_states = {} 155 key = (state, t)
142 for s, i in zip(states, range(len(states))): 156 if key in action_table:
143 number_states[s] = i 157 action2 = action_table[key]
158 if action != action2:
159 raise ParserGenerationException('LR construction conflict')
160 else:
161 action_table[key] = action
144 162
145 # Fill action table: 163 # Fill action table:
146 for state in states: 164 for state in states:
165 # Detect conflicts:
147 for item in state: 166 for item in state:
148 if item.IsShift and item.Next in self.terminals: 167 if item.IsShift and item.Next in self.terminals:
149 action_table[(state, item.Next)] = (SHIFT, 0) 168 # Rule 1, a shift item:
150 elif item.IsReduce: 169 nextstate = transitions[(states.index(state), item.Next)]
151 if item.look_ahead == EOF: 170 setAction(states.index(state), item.Next, (SHIFT, nextstate))
152 action_table[(state, item.look_ahead)] = (ACCEPT, 0) 171 if item.IsReduce:
172 if item.production.name == self.start_symbol and item.look_ahead == EOF:
173 # Rule 3: accept:
174 setAction(states.index(state), item.look_ahead, (ACCEPT, None))
153 else: 175 else:
154 action_table[(state, item.look_ahead)] = (REDUCE, item.production) 176 # Rule 2, reduce item:
155 else: 177 setAction(states.index(state), item.look_ahead, (REDUCE, item.production))
156 pass 178 for nt in self.nonterminals:
157 179 key = (states.index(state), nt)
158 p = LRParser(action_table) 180 if key in transitions:
159 p.goto_table = goto_table 181 goto_table[key] = transitions[key]
160 p.s0 = iis 182
161 return p 183 return LRParser(action_table, goto_table)
184
162 185
163 class Production: 186 class Production:
164 """ Production rule for a grammar """ 187 """ Production rule for a grammar """
165 def __init__(self, name, symbols): 188 def __init__(self, name, symbols):
166 self.name = name 189 self.name = name
220 prod = self.production 243 prod = self.production
221 predot = ' '.join(prod.symbols[0:self.dotpos]) 244 predot = ' '.join(prod.symbols[0:self.dotpos])
222 postdot = ' '.join(prod.symbols[self.dotpos:]) 245 postdot = ' '.join(prod.symbols[self.dotpos:])
223 nt = prod.name 246 nt = prod.name
224 args = (nt, predot, postdot, self.look_ahead) 247 args = (nt, predot, postdot, self.look_ahead)
225 return '[{0} -> {1} . {2}, {3}]'.format(*args) 248 return '[{0} -> {1} . {2} -> {3}]'.format(*args)
226 249
227 250
228 class LRParser: 251 class LRParser:
229 """ LR parser """ 252 """ LR parser """
230 def __init__(self, action_table): 253 def __init__(self, action_table, goto_table):
231 self.action_table = action_table 254 self.action_table = action_table
255 self.goto_table = goto_table
232 256
233 def parse(self, toks): 257 def parse(self, toks):
234 stack = [EOF, self.s0] 258 stack = [0]
235 look_ahead = toks.pop(0) 259 look_ahead = toks.pop(0)
236 while True: 260 while True:
237 state = stack[-1] # top of stack 261 state = stack[-1] # top of stack
238 key = (state, look_ahead) 262 key = (state, look_ahead)
239 if not key in self.action_table: 263 if not key in self.action_table:
264 print(key)
240 raise Exception('Error parsing') 265 raise Exception('Error parsing')
241 action, param = self.action_table[(state, look_ahead)] 266 action, param = self.action_table[(state, look_ahead)]
242 if action == REDUCE: 267 if action == REDUCE:
243 for s in param.symbols: 268 for s in param.symbols:
244 stack.pop() 269 stack.pop()
246 state = stack[-1] 271 state = stack[-1]
247 stack.append(param.name) 272 stack.append(param.name)
248 stack.append(self.goto_table[(state, param.name)]) 273 stack.append(self.goto_table[(state, param.name)])
249 elif action == SHIFT: 274 elif action == SHIFT:
250 stack.append(look_ahead) 275 stack.append(look_ahead)
251 s, i = stack[-2:] 276 stack.append(param)
252 stack.append(self.goto_table[(s, i)])
253 look_ahead = toks.pop(0) 277 look_ahead = toks.pop(0)
254 elif action == ACCEPT: 278 elif action == ACCEPT:
255 break 279 break
256 280
257 def testSimpleGrammar(): 281 def testSimpleGrammar():