Mercurial > lcfOS
annotate python/pyyacc.py @ 318:e84047f29c78
Add burg and yacc initial attempts
author | Windel Bouwman |
---|---|
date | Tue, 31 Dec 2013 12:38:15 +0100 |
parents | 6259856841a0 |
children | 8d07a4254f04 |
rev | line source |
---|---|
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
1 """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
2 Parser generator script |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
3 """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
4 |
191 | 5 from ppci import Token |
6 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
7 EPS = 'EPS' |
184 | 8 EOF = 'EOF' |
318 | 9 |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
10 |
185 | 11 class ParserGenerationException(Exception): |
186 | 12 """ Raised when something goes wrong during parser generation """ |
185 | 13 pass |
14 | |
15 | |
16 class ParserException(Exception): | |
186 | 17 """ Raised during a failure in the parsing process """ |
185 | 18 pass |
19 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
20 |
318 | 21 class Action: |
22 def __repr__(self): | |
23 return 'Action' | |
24 | |
25 def __eq__(self, other): | |
26 return str(self) == str(other) | |
27 | |
28 | |
29 class Shift(Action): | |
30 def __init__(self, to_state): | |
31 self.to_state = to_state | |
32 | |
33 def __repr__(self): | |
34 return 'Shift({})'.format(self.to_state) | |
35 | |
36 | |
37 class Reduce(Action): | |
38 def __init__(self, rule): | |
39 self.rule = rule | |
40 | |
41 def __repr__(self): | |
42 return 'Reduce({})'.format(self.rule) | |
43 | |
44 | |
45 class Accept(Reduce): | |
46 def __repr__(self): | |
47 return 'Accept({})'.format(self.rule) | |
48 | |
49 | |
50 def print_grammar(g): | |
51 """ Pretty print a grammar """ | |
52 print(g) | |
53 for production in g.productions: | |
54 print(production) | |
55 | |
178 | 56 class Grammar: |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
57 """ Defines a grammar of a language """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
58 def __init__(self, terminals): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
59 self.terminals = terminals |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
60 self.nonterminals = [] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
61 self.productions = [] |
185 | 62 self._first = None # Cached first set |
318 | 63 self.start_symbol = None |
64 | |
65 def __repr__(self): | |
66 return 'Grammar with {} rules'.format(len(self.productions)) | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
67 |
194 | 68 def add_production(self, name, symbols, f=None): |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
69 """ Add a production rule to the grammar """ |
194 | 70 production = Production(name, symbols, f) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
71 self.productions.append(production) |
192 | 72 if name in self.terminals: |
73 raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
74 if not name in self.nonterminals: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
75 self.nonterminals.append(name) |
194 | 76 self._first = None # Invalidate cached version |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
77 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
78 def productionsForName(self, name): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
79 """ Retrieve all productions for a non terminal """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
80 return [p for p in self.productions if p.name == name] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
81 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
82 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
83 def Symbols(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
84 """ Get all the symbols defined by this grammar """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
85 return self.nonterminals + self.terminals |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
86 |
185 | 87 @property |
88 def first(self): | |
318 | 89 """ |
186 | 90 The first set is a mapping from a grammar symbol to a set of |
91 set of all terminal symbols that can be the first terminal when | |
318 | 92 looking for the grammar symbol |
186 | 93 """ |
185 | 94 if not self._first: |
95 self._first = self.calcFirstSets() | |
96 return self._first | |
97 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
98 def calcFirstSets(self): |
184 | 99 """ |
100 Calculate first sets for each grammar symbol | |
101 This is a dictionary which maps each grammar symbol | |
102 to a set of terminals that can be encountered first | |
103 when looking for the symbol. | |
104 """ | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
105 first = {} |
184 | 106 for t in self.terminals + [EOF, EPS]: |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
107 first[t] = set([t]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
108 for nt in self.nonterminals: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
109 first[nt] = set() |
185 | 110 epsset = {EPS} |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
111 while True: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
112 some_change = False |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
113 for p in self.productions: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
114 rhs = set() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
115 for beta in p.symbols: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
116 rhs = rhs | (first[beta] - epsset) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
117 if not EPS in first[beta]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
118 break |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
119 else: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
120 if EPS in first[beta]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
121 rhs.add(EPS) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
122 if rhs - first[p.name]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
123 first[p.name] |= rhs |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
124 some_change = True |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
125 if not some_change: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
126 break |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
127 return first |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
128 |
184 | 129 def closure(self, itemset): |
130 """ Expand itemset by using epsilon moves """ | |
131 worklist = list(itemset) | |
132 def addIt(itm): | |
133 if not itm in itemset: | |
134 itemset.add(itm) | |
135 worklist.append(itm) | |
186 | 136 def first2(itm): |
184 | 137 # When using the first sets, create a copy: |
186 | 138 f = set(self.first[itm.NextNext]) |
184 | 139 if EPS in f: |
140 f.discard(EPS) | |
186 | 141 f.add(itm.look_ahead) |
184 | 142 return f |
185 | 143 # Start of algorithm: |
184 | 144 while worklist: |
145 item = worklist.pop(0) | |
146 if not item.IsShift: | |
147 continue | |
148 if not (item.Next in self.nonterminals): | |
149 continue | |
150 C = item.Next | |
151 for add_p in self.productionsForName(C): | |
186 | 152 for b in first2(item): |
184 | 153 addIt(Item(add_p, 0, b)) |
154 return frozenset(itemset) | |
155 | |
156 def initialItemSet(self): | |
157 """ Calculates the initial item set """ | |
158 iis = set() | |
159 for p in self.productionsForName(self.start_symbol): | |
160 iis.add(Item(p, 0, EOF)) | |
161 return self.closure(iis) | |
162 | |
163 def nextItemSet(self, itemset, symbol): | |
318 | 164 """ |
184 | 165 Determines the next itemset for the current set and a symbol |
166 This is the goto procedure | |
167 """ | |
168 next_set = set() | |
169 for item in itemset: | |
170 if item.can_shift_over(symbol): | |
171 next_set.add(item.shifted()) | |
172 return self.closure(next_set) | |
318 | 173 |
184 | 174 def genCanonicalSet(self, iis): |
185 | 175 states = [] |
184 | 176 worklist = [] |
185 | 177 transitions = {} |
184 | 178 def addSt(s): |
179 if not (s in states): | |
180 worklist.append(s) | |
185 | 181 states.append(s) |
184 | 182 addSt(iis) |
183 while len(worklist) > 0: | |
184 itemset = worklist.pop(0) | |
185 for symbol in self.Symbols: | |
186 nis = self.nextItemSet(itemset, symbol) | |
187 if not nis: | |
188 continue | |
189 addSt(nis) | |
185 | 190 transitions[(states.index(itemset), symbol)] = states.index(nis) |
191 return states, transitions | |
318 | 192 |
192 | 193 def checkSymbols(self): |
194 """ Checks no symbols are undefined """ | |
195 for production in self.productions: | |
196 for symbol in production.symbols: | |
194 | 197 if symbol not in self.Symbols + [EPS]: |
192 | 198 raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) |
199 | |
184 | 200 def genParser(self): |
218 | 201 """ Generates a parser from the grammar (using a caching algorithm) """ |
240 | 202 action_table, goto_table = self.doGenerate() |
218 | 203 p = LRParser(action_table, goto_table, self.start_symbol) |
204 p.grammar = self | |
205 return p | |
206 | |
207 def doGenerate(self): | |
318 | 208 if not self.start_symbol: |
209 self.start_symbol = self.productions[0].name | |
192 | 210 self.checkSymbols() |
184 | 211 action_table = {} |
185 | 212 goto_table = {} |
184 | 213 iis = self.initialItemSet() |
214 | |
215 # First generate all item sets by using the nextItemset function: | |
185 | 216 states, transitions = self.genCanonicalSet(iis) |
194 | 217 |
185 | 218 def setAction(state, t, action): |
318 | 219 assert isinstance(action, Action) |
185 | 220 key = (state, t) |
218 | 221 assert type(state) is int |
222 assert type(t) is str | |
185 | 223 if key in action_table: |
224 action2 = action_table[key] | |
225 if action != action2: | |
318 | 226 if (type(action2) is Reduce) and (type(action) is Shift): |
194 | 227 # Automatically resolve and do the shift action! |
228 # Simple, but almost always what you want!! | |
229 action_table[key] = action | |
318 | 230 elif isinstance(action2, Shift) and isinstance(action, Reduce): |
231 pass | |
194 | 232 else: |
318 | 233 a1 = str(action) |
234 a2 = str(action2) | |
235 raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2)) | |
185 | 236 else: |
237 action_table[key] = action | |
184 | 238 |
239 # Fill action table: | |
240 for state in states: | |
185 | 241 # Detect conflicts: |
184 | 242 for item in state: |
243 if item.IsShift and item.Next in self.terminals: | |
185 | 244 # Rule 1, a shift item: |
245 nextstate = transitions[(states.index(state), item.Next)] | |
318 | 246 setAction(states.index(state), item.Next, Shift(nextstate)) |
185 | 247 if item.IsReduce: |
248 if item.production.name == self.start_symbol and item.look_ahead == EOF: | |
249 # Rule 3: accept: | |
318 | 250 act = Accept(self.productions.index(item.production)) |
184 | 251 else: |
185 | 252 # Rule 2, reduce item: |
318 | 253 act = Reduce(self.productions.index(item.production)) |
254 setAction(states.index(state), item.look_ahead, act) | |
185 | 255 for nt in self.nonterminals: |
256 key = (states.index(state), nt) | |
257 if key in transitions: | |
258 goto_table[key] = transitions[key] | |
218 | 259 return action_table, goto_table |
185 | 260 |
178 | 261 |
262 class Production: | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
263 """ Production rule for a grammar """ |
194 | 264 def __init__(self, name, symbols, f=None): |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
265 self.name = name |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
266 self.symbols = symbols |
194 | 267 self.f = f |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
268 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
269 def __repr__(self): |
318 | 270 action = ' ' + str(self.f) if self.f else '' |
271 return '{0} -> {1}'.format(self.name, self.symbols) + action | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
272 |
179 | 273 |
178 | 274 class Item: |
318 | 275 """ |
276 Represents a partially parsed item | |
186 | 277 It has a production it is looking for, a position |
278 in this production called the 'dot' and a look ahead | |
279 symbol that must follow this item. | |
280 """ | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
281 def __init__(self, production, dotpos, look_ahead): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
282 self.production = production |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
283 self.dotpos = dotpos |
184 | 284 assert self.dotpos <= len(self.production.symbols) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
285 self.look_ahead = look_ahead |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
286 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
287 def getdata(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
288 """ Gets the members as a tuple """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
289 return (self.production, self.dotpos, self.look_ahead) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
290 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
291 def __eq__(self, other): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
292 if type(other) is type(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
293 return self.getdata() == other.getdata() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
294 return False |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
295 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
296 def __hash__(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
297 return self.getdata().__hash__() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
298 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
299 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
300 def IsReduce(self): |
186 | 301 """ Check if this item has the dot at the end """ |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
302 return self.dotpos == len(self.production.symbols) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
303 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
304 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
305 def IsShift(self): |
186 | 306 """ Check if this item is a shift item, i.e. the dot can proceed """ |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
307 return not self.IsReduce |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
308 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
309 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
310 def Next(self): |
186 | 311 """ Returns the symbol after the dot """ |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
312 return self.production.symbols[self.dotpos] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
313 |
184 | 314 def can_shift_over(self, symbol): |
315 """ Determines if this item can shift over the given symbol """ | |
316 return self.IsShift and self.Next == symbol | |
317 | |
318 def shifted(self): | |
319 """ Creates a new item that is shifted one position """ | |
320 return Item(self.production, self.dotpos + 1, self.look_ahead) | |
321 | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
322 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
323 def NextNext(self): |
186 | 324 """ Gets the symbol after the next symbol, or EPS if at the end """ |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
325 if self.dotpos + 1 >= len(self.production.symbols): |
184 | 326 return EPS |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
327 else: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
328 return self.production.symbols[self.dotpos + 1] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
329 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
330 def __repr__(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
331 prod = self.production |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
332 predot = ' '.join(prod.symbols[0:self.dotpos]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
333 postdot = ' '.join(prod.symbols[self.dotpos:]) |
186 | 334 name = prod.name |
335 args = (name, predot, postdot, self.look_ahead) | |
185 | 336 return '[{0} -> {1} . {2} -> {3}]'.format(*args) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
337 |
179 | 338 |
339 class LRParser: | |
318 | 340 """ LR parser automata """ |
195 | 341 def __init__(self, action_table, goto_table, start_symbol): |
184 | 342 self.action_table = action_table |
185 | 343 self.goto_table = goto_table |
195 | 344 self.start_symbol = start_symbol |
184 | 345 |
318 | 346 def parse(self, lexer): |
191 | 347 """ Parse an iterable with tokens """ |
318 | 348 assert hasattr(lexer, 'next_token'), '{0} is no lexer'.format(type(lexer)) |
185 | 349 stack = [0] |
195 | 350 r_data_stack = [] |
318 | 351 look_ahead = lexer.next_token() |
191 | 352 assert type(look_ahead) is Token |
195 | 353 # TODO: exit on this condition: |
354 while stack != [0, self.start_symbol, 2222]: | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
355 state = stack[-1] # top of stack |
191 | 356 key = (state, look_ahead.typ) |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
357 if not key in self.action_table: |
194 | 358 raise ParserException('Error parsing at character {0}'.format(look_ahead)) |
318 | 359 action = self.action_table[key] |
360 if type(action) is Reduce: | |
194 | 361 f_args = [] |
318 | 362 prod = self.grammar.productions[action.rule] |
363 for s in prod.symbols: | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
364 stack.pop() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
365 stack.pop() |
195 | 366 f_args.append(r_data_stack.pop()) |
367 f_args.reverse() | |
368 r_data = None | |
318 | 369 if prod.f: |
370 r_data = prod.f(*f_args) | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
371 state = stack[-1] |
318 | 372 stack.append(prod.name) |
373 stack.append(self.goto_table[(state, prod.name)]) | |
195 | 374 r_data_stack.append(r_data) |
318 | 375 elif type(action) is Shift: |
191 | 376 stack.append(look_ahead.typ) |
318 | 377 stack.append(action.to_state) |
378 r_data_stack.append(look_ahead) | |
379 look_ahead = lexer.next_token() | |
191 | 380 assert type(look_ahead) is Token |
318 | 381 elif type(action) is Accept: |
195 | 382 # Pop last rule data off the stack: |
383 f_args = [] | |
318 | 384 param = self.grammar.productions[action.rule] |
195 | 385 for s in param.symbols: |
386 stack.pop() | |
387 stack.pop() | |
388 f_args.append(r_data_stack.pop()) | |
389 f_args.reverse() | |
390 if param.f: | |
318 | 391 ret_val = param.f(*f_args) |
392 else: | |
393 ret_val = None | |
195 | 394 # Break out! |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
395 break |
195 | 396 # At exit, the stack must be 1 long |
397 # TODO: fix that this holds: | |
398 #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) | |
318 | 399 return ret_val |