Mercurial > lcfOS
comparison python/pyyacc.py @ 185:51a6440d6398
Fixed LR(1) parser
author | Windel Bouwman |
---|---|
date | Fri, 24 May 2013 20:45:03 +0200 |
parents | fe2b72381a83 |
children | 46d62dadd61b |
comparison
equal
deleted
inserted
replaced
184:fe2b72381a83 | 185:51a6440d6398 |
---|---|
5 EPS = 'EPS' | 5 EPS = 'EPS' |
6 EOF = 'EOF' | 6 EOF = 'EOF' |
7 SHIFT = 1 | 7 SHIFT = 1 |
8 REDUCE = 2 | 8 REDUCE = 2 |
9 ACCEPT = 3 | 9 ACCEPT = 3 |
10 | |
11 class ParserGenerationException(Exception): | |
12 pass | |
13 | |
14 | |
15 class ParserException(Exception): | |
16 pass | |
10 | 17 |
11 | 18 |
12 class Grammar: | 19 class Grammar: |
13 """ Defines a grammar of a language """ | 20 """ Defines a grammar of a language """ |
14 def __init__(self, terminals): | 21 def __init__(self, terminals): |
15 self.terminals = terminals | 22 self.terminals = terminals |
16 self.nonterminals = [] | 23 self.nonterminals = [] |
17 self.productions = [] | 24 self.productions = [] |
25 self._first = None # Cached first set | |
18 | 26 |
19 def add_production(self, name, symbols): | 27 def add_production(self, name, symbols): |
20 """ Add a production rule to the grammar """ | 28 """ Add a production rule to the grammar """ |
21 production = Production(name, symbols) | 29 production = Production(name, symbols) |
22 self.productions.append(production) | 30 self.productions.append(production) |
30 | 38 |
31 @property | 39 @property |
32 def Symbols(self): | 40 def Symbols(self): |
33 """ Get all the symbols defined by this grammar """ | 41 """ Get all the symbols defined by this grammar """ |
34 return self.nonterminals + self.terminals | 42 return self.nonterminals + self.terminals |
43 | |
44 @property | |
45 def first(self): | |
46 if not self._first: | |
47 self._first = self.calcFirstSets() | |
48 return self._first | |
35 | 49 |
36 def calcFirstSets(self): | 50 def calcFirstSets(self): |
37 """ | 51 """ |
38 Calculate first sets for each grammar symbol | 52 Calculate first sets for each grammar symbol |
39 This is a dictionary which maps each grammar symbol | 53 This is a dictionary which maps each grammar symbol |
43 first = {} | 57 first = {} |
44 for t in self.terminals + [EOF, EPS]: | 58 for t in self.terminals + [EOF, EPS]: |
45 first[t] = set([t]) | 59 first[t] = set([t]) |
46 for nt in self.nonterminals: | 60 for nt in self.nonterminals: |
47 first[nt] = set() | 61 first[nt] = set() |
48 epsset = set([EPS]) | 62 epsset = {EPS} |
49 while True: | 63 while True: |
50 some_change = False | 64 some_change = False |
51 for p in self.productions: | 65 for p in self.productions: |
52 rhs = set() | 66 rhs = set() |
53 for beta in p.symbols: | 67 for beta in p.symbols: |
76 f = set(self.first[item.NextNext]) | 90 f = set(self.first[item.NextNext]) |
77 if EPS in f: | 91 if EPS in f: |
78 f.discard(EPS) | 92 f.discard(EPS) |
79 f.add(item.look_ahead) | 93 f.add(item.look_ahead) |
80 return f | 94 return f |
81 | 95 # Start of algorithm: |
82 while worklist: | 96 while worklist: |
83 item = worklist.pop(0) | 97 item = worklist.pop(0) |
84 if not item.IsShift: | 98 if not item.IsShift: |
85 continue | 99 continue |
86 if not (item.Next in self.nonterminals): | 100 if not (item.Next in self.nonterminals): |
108 if item.can_shift_over(symbol): | 122 if item.can_shift_over(symbol): |
109 next_set.add(item.shifted()) | 123 next_set.add(item.shifted()) |
110 return self.closure(next_set) | 124 return self.closure(next_set) |
111 | 125 |
112 def genCanonicalSet(self, iis): | 126 def genCanonicalSet(self, iis): |
113 states = set() | 127 states = [] |
114 worklist = [] | 128 worklist = [] |
115 goto_table = {} | 129 transitions = {} |
116 def addSt(s): | 130 def addSt(s): |
117 if not (s in states): | 131 if not (s in states): |
118 worklist.append(s) | 132 worklist.append(s) |
119 states.add(s) | 133 states.append(s) |
120 addSt(iis) | 134 addSt(iis) |
121 while len(worklist) > 0: | 135 while len(worklist) > 0: |
122 itemset = worklist.pop(0) | 136 itemset = worklist.pop(0) |
123 for symbol in self.Symbols: | 137 for symbol in self.Symbols: |
124 nis = self.nextItemSet(itemset, symbol) | 138 nis = self.nextItemSet(itemset, symbol) |
125 if not nis: | 139 if not nis: |
126 continue | 140 continue |
127 goto_table[(itemset, symbol)] = nis | |
128 addSt(nis) | 141 addSt(nis) |
129 return states, goto_table | 142 transitions[(states.index(itemset), symbol)] = states.index(nis) |
143 return states, transitions | |
130 | 144 |
131 def genParser(self): | 145 def genParser(self): |
132 """ Generates a parser from the grammar """ | 146 """ Generates a parser from the grammar """ |
133 action_table = {} | 147 action_table = {} |
134 self.first = self.calcFirstSets() | 148 goto_table = {} |
135 iis = self.initialItemSet() | 149 iis = self.initialItemSet() |
136 | 150 |
137 # First generate all item sets by using the nextItemset function: | 151 # First generate all item sets by using the nextItemset function: |
138 states, goto_table = self.genCanonicalSet(iis) | 152 states, transitions = self.genCanonicalSet(iis) |
139 | 153 |
140 # Number the states: | 154 def setAction(state, t, action): |
141 number_states = {} | 155 key = (state, t) |
142 for s, i in zip(states, range(len(states))): | 156 if key in action_table: |
143 number_states[s] = i | 157 action2 = action_table[key] |
158 if action != action2: | |
159 raise ParserGenerationException('LR construction conflict') | |
160 else: | |
161 action_table[key] = action | |
144 | 162 |
145 # Fill action table: | 163 # Fill action table: |
146 for state in states: | 164 for state in states: |
165 # Detect conflicts: | |
147 for item in state: | 166 for item in state: |
148 if item.IsShift and item.Next in self.terminals: | 167 if item.IsShift and item.Next in self.terminals: |
149 action_table[(state, item.Next)] = (SHIFT, 0) | 168 # Rule 1, a shift item: |
150 elif item.IsReduce: | 169 nextstate = transitions[(states.index(state), item.Next)] |
151 if item.look_ahead == EOF: | 170 setAction(states.index(state), item.Next, (SHIFT, nextstate)) |
152 action_table[(state, item.look_ahead)] = (ACCEPT, 0) | 171 if item.IsReduce: |
172 if item.production.name == self.start_symbol and item.look_ahead == EOF: | |
173 # Rule 3: accept: | |
174 setAction(states.index(state), item.look_ahead, (ACCEPT, None)) | |
153 else: | 175 else: |
154 action_table[(state, item.look_ahead)] = (REDUCE, item.production) | 176 # Rule 2, reduce item: |
155 else: | 177 setAction(states.index(state), item.look_ahead, (REDUCE, item.production)) |
156 pass | 178 for nt in self.nonterminals: |
157 | 179 key = (states.index(state), nt) |
158 p = LRParser(action_table) | 180 if key in transitions: |
159 p.goto_table = goto_table | 181 goto_table[key] = transitions[key] |
160 p.s0 = iis | 182 |
161 return p | 183 return LRParser(action_table, goto_table) |
184 | |
162 | 185 |
163 class Production: | 186 class Production: |
164 """ Production rule for a grammar """ | 187 """ Production rule for a grammar """ |
165 def __init__(self, name, symbols): | 188 def __init__(self, name, symbols): |
166 self.name = name | 189 self.name = name |
220 prod = self.production | 243 prod = self.production |
221 predot = ' '.join(prod.symbols[0:self.dotpos]) | 244 predot = ' '.join(prod.symbols[0:self.dotpos]) |
222 postdot = ' '.join(prod.symbols[self.dotpos:]) | 245 postdot = ' '.join(prod.symbols[self.dotpos:]) |
223 nt = prod.name | 246 nt = prod.name |
224 args = (nt, predot, postdot, self.look_ahead) | 247 args = (nt, predot, postdot, self.look_ahead) |
225 return '[{0} -> {1} . {2}, {3}]'.format(*args) | 248 return '[{0} -> {1} . {2} -> {3}]'.format(*args) |
226 | 249 |
227 | 250 |
228 class LRParser: | 251 class LRParser: |
229 """ LR parser """ | 252 """ LR parser """ |
230 def __init__(self, action_table): | 253 def __init__(self, action_table, goto_table): |
231 self.action_table = action_table | 254 self.action_table = action_table |
255 self.goto_table = goto_table | |
232 | 256 |
233 def parse(self, toks): | 257 def parse(self, toks): |
234 stack = [EOF, self.s0] | 258 stack = [0] |
235 look_ahead = toks.pop(0) | 259 look_ahead = toks.pop(0) |
236 while True: | 260 while True: |
237 state = stack[-1] # top of stack | 261 state = stack[-1] # top of stack |
238 key = (state, look_ahead) | 262 key = (state, look_ahead) |
239 if not key in self.action_table: | 263 if not key in self.action_table: |
264 print(key) | |
240 raise Exception('Error parsing') | 265 raise Exception('Error parsing') |
241 action, param = self.action_table[(state, look_ahead)] | 266 action, param = self.action_table[(state, look_ahead)] |
242 if action == REDUCE: | 267 if action == REDUCE: |
243 for s in param.symbols: | 268 for s in param.symbols: |
244 stack.pop() | 269 stack.pop() |
246 state = stack[-1] | 271 state = stack[-1] |
247 stack.append(param.name) | 272 stack.append(param.name) |
248 stack.append(self.goto_table[(state, param.name)]) | 273 stack.append(self.goto_table[(state, param.name)]) |
249 elif action == SHIFT: | 274 elif action == SHIFT: |
250 stack.append(look_ahead) | 275 stack.append(look_ahead) |
251 s, i = stack[-2:] | 276 stack.append(param) |
252 stack.append(self.goto_table[(s, i)]) | |
253 look_ahead = toks.pop(0) | 277 look_ahead = toks.pop(0) |
254 elif action == ACCEPT: | 278 elif action == ACCEPT: |
255 break | 279 break |
256 | 280 |
257 def testSimpleGrammar(): | 281 def testSimpleGrammar(): |