Mercurial > lcfOS
annotate python/pyyacc.py @ 229:51d5ed1bd503
Added testrunner
author | Windel Bouwman |
---|---|
date | Sat, 13 Jul 2013 11:13:01 +0200 |
parents | 494828a7adf1 |
children | 6259856841a0 |
rev | line source |
---|---|
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
1 """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
2 Parser generator script |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
3 """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
4 |
218 | 5 import shelve |
6 import hashlib | |
191 | 7 from ppci import Token |
8 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
9 EPS = 'EPS' |
184 | 10 EOF = 'EOF' |
11 SHIFT = 1 | |
12 REDUCE = 2 | |
13 ACCEPT = 3 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
14 |
185 | 15 class ParserGenerationException(Exception): |
186 | 16 """ Raised when something goes wrong during parser generation """ |
185 | 17 pass |
18 | |
19 | |
20 class ParserException(Exception): | |
186 | 21 """ Raised during a failure in the parsing process """ |
185 | 22 pass |
23 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
24 |
178 | 25 class Grammar: |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
26 """ Defines a grammar of a language """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
27 def __init__(self, terminals): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
28 self.terminals = terminals |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
29 self.nonterminals = [] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
30 self.productions = [] |
185 | 31 self._first = None # Cached first set |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
32 |
194 | 33 def add_production(self, name, symbols, f=None): |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
34 """ Add a production rule to the grammar """ |
194 | 35 production = Production(name, symbols, f) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
36 self.productions.append(production) |
192 | 37 if name in self.terminals: |
38 raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
39 if not name in self.nonterminals: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
40 self.nonterminals.append(name) |
194 | 41 self._first = None # Invalidate cached version |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
42 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
43 def productionsForName(self, name): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
44 """ Retrieve all productions for a non terminal """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
45 return [p for p in self.productions if p.name == name] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
46 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
47 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
48 def Symbols(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
49 """ Get all the symbols defined by this grammar """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
50 return self.nonterminals + self.terminals |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
51 |
185 | 52 @property |
53 def first(self): | |
186 | 54 """ |
55 The first set is a mapping from a grammar symbol to a set of | |
56 set of all terminal symbols that can be the first terminal when | |
57 looking for the grammar symbol | |
58 """ | |
185 | 59 if not self._first: |
60 self._first = self.calcFirstSets() | |
61 return self._first | |
62 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
63 def calcFirstSets(self): |
184 | 64 """ |
65 Calculate first sets for each grammar symbol | |
66 This is a dictionary which maps each grammar symbol | |
67 to a set of terminals that can be encountered first | |
68 when looking for the symbol. | |
69 """ | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
70 first = {} |
184 | 71 for t in self.terminals + [EOF, EPS]: |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
72 first[t] = set([t]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
73 for nt in self.nonterminals: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
74 first[nt] = set() |
185 | 75 epsset = {EPS} |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
76 while True: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
77 some_change = False |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
78 for p in self.productions: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
79 rhs = set() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
80 for beta in p.symbols: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
81 rhs = rhs | (first[beta] - epsset) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
82 if not EPS in first[beta]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
83 break |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
84 else: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
85 if EPS in first[beta]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
86 rhs.add(EPS) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
87 if rhs - first[p.name]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
88 first[p.name] |= rhs |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
89 some_change = True |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
90 if not some_change: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
91 break |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
92 return first |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
93 |
184 | 94 def closure(self, itemset): |
95 """ Expand itemset by using epsilon moves """ | |
96 worklist = list(itemset) | |
97 def addIt(itm): | |
98 if not itm in itemset: | |
99 itemset.add(itm) | |
100 worklist.append(itm) | |
186 | 101 def first2(itm): |
184 | 102 # When using the first sets, create a copy: |
186 | 103 f = set(self.first[itm.NextNext]) |
184 | 104 if EPS in f: |
105 f.discard(EPS) | |
186 | 106 f.add(itm.look_ahead) |
184 | 107 return f |
185 | 108 # Start of algorithm: |
184 | 109 while worklist: |
110 item = worklist.pop(0) | |
111 if not item.IsShift: | |
112 continue | |
113 if not (item.Next in self.nonterminals): | |
114 continue | |
115 C = item.Next | |
116 for add_p in self.productionsForName(C): | |
186 | 117 for b in first2(item): |
184 | 118 addIt(Item(add_p, 0, b)) |
119 return frozenset(itemset) | |
120 | |
121 def initialItemSet(self): | |
122 """ Calculates the initial item set """ | |
123 iis = set() | |
124 for p in self.productionsForName(self.start_symbol): | |
125 iis.add(Item(p, 0, EOF)) | |
126 return self.closure(iis) | |
127 | |
128 def nextItemSet(self, itemset, symbol): | |
129 """ | |
130 Determines the next itemset for the current set and a symbol | |
131 This is the goto procedure | |
132 """ | |
133 next_set = set() | |
134 for item in itemset: | |
135 if item.can_shift_over(symbol): | |
136 next_set.add(item.shifted()) | |
137 return self.closure(next_set) | |
138 | |
139 def genCanonicalSet(self, iis): | |
185 | 140 states = [] |
184 | 141 worklist = [] |
185 | 142 transitions = {} |
184 | 143 def addSt(s): |
144 if not (s in states): | |
145 worklist.append(s) | |
185 | 146 states.append(s) |
184 | 147 addSt(iis) |
148 while len(worklist) > 0: | |
149 itemset = worklist.pop(0) | |
150 for symbol in self.Symbols: | |
151 nis = self.nextItemSet(itemset, symbol) | |
152 if not nis: | |
153 continue | |
154 addSt(nis) | |
185 | 155 transitions[(states.index(itemset), symbol)] = states.index(nis) |
156 return states, transitions | |
192 | 157 |
158 def checkSymbols(self): | |
159 """ Checks no symbols are undefined """ | |
160 for production in self.productions: | |
161 for symbol in production.symbols: | |
194 | 162 if symbol not in self.Symbols + [EPS]: |
192 | 163 raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) |
164 | |
218 | 165 def getSignature(self): |
166 m = hashlib.md5() | |
167 m.update((str(self.productions) + str(self.start_symbol)).encode('ascii')) | |
168 signature = m.hexdigest() | |
169 | |
184 | 170 def genParser(self): |
218 | 171 """ Generates a parser from the grammar (using a caching algorithm) """ |
172 signature = self.getSignature() | |
173 cache = shelve.open('__grammar_cache__.shelve') | |
174 # TODO: fix caching. | |
175 if ('signature1' in cache) and cache['signature'] == signature: | |
176 goto_table = cache['goto_table'] | |
177 action_table = cache['action_table'] | |
178 else: | |
179 action_table, goto_table = self.doGenerate() | |
180 cache['goto_table'] = goto_table | |
181 cache['action_table'] = action_table | |
182 cache['signature'] = signature | |
183 cache.close() | |
184 p = LRParser(action_table, goto_table, self.start_symbol) | |
185 p.grammar = self | |
186 return p | |
187 | |
188 def doGenerate(self): | |
192 | 189 self.checkSymbols() |
184 | 190 action_table = {} |
185 | 191 goto_table = {} |
184 | 192 iis = self.initialItemSet() |
193 | |
194 # First generate all item sets by using the nextItemset function: | |
185 | 195 states, transitions = self.genCanonicalSet(iis) |
194 | 196 |
197 def action_str(act): | |
198 a, p = act | |
199 if a is SHIFT: | |
200 return 'Shift {0}'.format(0) | |
201 elif a is REDUCE: | |
202 return 'Reduce {0}'.format(p) | |
203 return 'Other' | |
185 | 204 |
205 def setAction(state, t, action): | |
206 key = (state, t) | |
218 | 207 assert type(state) is int |
208 assert type(t) is str | |
185 | 209 if key in action_table: |
210 action2 = action_table[key] | |
211 if action != action2: | |
194 | 212 if (action2[0] == REDUCE) and (action[0] == SHIFT): |
213 # Automatically resolve and do the shift action! | |
214 # Simple, but almost always what you want!! | |
215 action_table[key] = action | |
216 else: | |
217 if (action2[0] == SHIFT) and (action[0] == REDUCE): | |
218 pass | |
219 else: | |
220 a1 = action_str(action) | |
221 a2 = action_str(action2) | |
222 raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2)) | |
185 | 223 else: |
224 action_table[key] = action | |
184 | 225 |
226 # Fill action table: | |
227 for state in states: | |
185 | 228 # Detect conflicts: |
184 | 229 for item in state: |
230 if item.IsShift and item.Next in self.terminals: | |
185 | 231 # Rule 1, a shift item: |
232 nextstate = transitions[(states.index(state), item.Next)] | |
233 setAction(states.index(state), item.Next, (SHIFT, nextstate)) | |
234 if item.IsReduce: | |
235 if item.production.name == self.start_symbol and item.look_ahead == EOF: | |
236 # Rule 3: accept: | |
218 | 237 setAction(states.index(state), item.look_ahead, (ACCEPT, self.productions.index(item.production))) |
184 | 238 else: |
185 | 239 # Rule 2, reduce item: |
218 | 240 setAction(states.index(state), item.look_ahead, (REDUCE, self.productions.index(item.production))) |
185 | 241 for nt in self.nonterminals: |
242 key = (states.index(state), nt) | |
243 if key in transitions: | |
244 goto_table[key] = transitions[key] | |
218 | 245 return action_table, goto_table |
185 | 246 |
178 | 247 |
248 class Production: | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
249 """ Production rule for a grammar """ |
194 | 250 def __init__(self, name, symbols, f=None): |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
251 self.name = name |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
252 self.symbols = symbols |
194 | 253 self.f = f |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
254 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
255 def __repr__(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
256 return '{0} -> {1}'.format(self.name, self.symbols) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
257 |
179 | 258 |
178 | 259 class Item: |
186 | 260 """ |
261 Represents a partially parsed item | |
262 It has a production it is looking for, a position | |
263 in this production called the 'dot' and a look ahead | |
264 symbol that must follow this item. | |
265 """ | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
266 def __init__(self, production, dotpos, look_ahead): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
267 self.production = production |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
268 self.dotpos = dotpos |
184 | 269 assert self.dotpos <= len(self.production.symbols) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
270 self.look_ahead = look_ahead |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
271 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
272 def getdata(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
273 """ Gets the members as a tuple """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
274 return (self.production, self.dotpos, self.look_ahead) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
275 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
276 def __eq__(self, other): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
277 if type(other) is type(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
278 return self.getdata() == other.getdata() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
279 return False |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
280 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
281 def __hash__(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
282 return self.getdata().__hash__() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
283 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
284 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
285 def IsReduce(self): |
186 | 286 """ Check if this item has the dot at the end """ |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
287 return self.dotpos == len(self.production.symbols) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
288 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
289 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
290 def IsShift(self): |
186 | 291 """ Check if this item is a shift item, i.e. the dot can proceed """ |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
292 return not self.IsReduce |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
293 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
294 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
295 def Next(self): |
186 | 296 """ Returns the symbol after the dot """ |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
297 return self.production.symbols[self.dotpos] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
298 |
184 | 299 def can_shift_over(self, symbol): |
300 """ Determines if this item can shift over the given symbol """ | |
301 return self.IsShift and self.Next == symbol | |
302 | |
303 def shifted(self): | |
304 """ Creates a new item that is shifted one position """ | |
305 return Item(self.production, self.dotpos + 1, self.look_ahead) | |
306 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
307 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
308 def NextNext(self): |
186 | 309 """ Gets the symbol after the next symbol, or EPS if at the end """ |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
310 if self.dotpos + 1 >= len(self.production.symbols): |
184 | 311 return EPS |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
312 else: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
313 return self.production.symbols[self.dotpos + 1] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
314 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
315 def __repr__(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
316 prod = self.production |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
317 predot = ' '.join(prod.symbols[0:self.dotpos]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
318 postdot = ' '.join(prod.symbols[self.dotpos:]) |
186 | 319 name = prod.name |
320 args = (name, predot, postdot, self.look_ahead) | |
185 | 321 return '[{0} -> {1} . {2} -> {3}]'.format(*args) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
322 |
179 | 323 |
324 class LRParser: | |
184 | 325 """ LR parser """ |
195 | 326 def __init__(self, action_table, goto_table, start_symbol): |
184 | 327 self.action_table = action_table |
185 | 328 self.goto_table = goto_table |
195 | 329 self.start_symbol = start_symbol |
184 | 330 |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
331 def parse(self, toks): |
191 | 332 """ Parse an iterable with tokens """ |
333 assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks)) | |
185 | 334 stack = [0] |
195 | 335 r_data_stack = [] |
194 | 336 try: |
337 look_ahead = toks.__next__() | |
338 except StopIteration: | |
339 look_ahead = Token(EOF, EOF) | |
191 | 340 assert type(look_ahead) is Token |
195 | 341 # TODO: exit on this condition: |
342 while stack != [0, self.start_symbol, 2222]: | |
343 #print(stack) | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
344 state = stack[-1] # top of stack |
191 | 345 key = (state, look_ahead.typ) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
346 if not key in self.action_table: |
194 | 347 raise ParserException('Error parsing at character {0}'.format(look_ahead)) |
191 | 348 action, param = self.action_table[key] |
184 | 349 if action == REDUCE: |
194 | 350 f_args = [] |
218 | 351 param = self.grammar.productions[param] |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
352 for s in param.symbols: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
353 stack.pop() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
354 stack.pop() |
195 | 355 f_args.append(r_data_stack.pop()) |
356 f_args.reverse() | |
357 r_data = None | |
194 | 358 if param.f: |
195 | 359 r_data = param.f(*f_args) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
360 state = stack[-1] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
361 stack.append(param.name) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
362 stack.append(self.goto_table[(state, param.name)]) |
195 | 363 r_data_stack.append(r_data) |
184 | 364 elif action == SHIFT: |
191 | 365 stack.append(look_ahead.typ) |
185 | 366 stack.append(param) |
195 | 367 r_data_stack.append(look_ahead.val) |
191 | 368 try: |
369 look_ahead = toks.__next__() | |
370 except StopIteration: | |
194 | 371 look_ahead = Token(EOF, EOF) |
191 | 372 assert type(look_ahead) is Token |
184 | 373 elif action == ACCEPT: |
195 | 374 # Pop last rule data off the stack: |
375 f_args = [] | |
218 | 376 param = self.grammar.productions[param] |
195 | 377 for s in param.symbols: |
378 stack.pop() | |
379 stack.pop() | |
380 f_args.append(r_data_stack.pop()) | |
381 f_args.reverse() | |
382 if param.f: | |
383 param.f(*f_args) | |
384 # Break out! | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
385 break |
195 | 386 # At exit, the stack must be 1 long |
387 # TODO: fix that this holds: | |
388 #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) | |
178 | 389 |