Mercurial > lcfOS
comparison python/pyyacc.py @ 186:46d62dadd61b
Improved testsuite
author | Windel Bouwman |
---|---|
date | Sat, 25 May 2013 14:26:25 +0200 |
parents | 51a6440d6398 |
children | 6b2bec5653f1 |
comparison
equal
deleted
inserted
replaced
185:51a6440d6398 | 186:46d62dadd61b |
---|---|
7 SHIFT = 1 | 7 SHIFT = 1 |
8 REDUCE = 2 | 8 REDUCE = 2 |
9 ACCEPT = 3 | 9 ACCEPT = 3 |
10 | 10 |
11 class ParserGenerationException(Exception): | 11 class ParserGenerationException(Exception): |
12 """ Raised when something goes wrong during parser generation """ | |
12 pass | 13 pass |
13 | 14 |
14 | 15 |
15 class ParserException(Exception): | 16 class ParserException(Exception): |
17 """ Raised during a failure in the parsing process """ | |
16 pass | 18 pass |
17 | 19 |
18 | 20 |
19 class Grammar: | 21 class Grammar: |
20 """ Defines a grammar of a language """ | 22 """ Defines a grammar of a language """ |
41 """ Get all the symbols defined by this grammar """ | 43 """ Get all the symbols defined by this grammar """ |
42 return self.nonterminals + self.terminals | 44 return self.nonterminals + self.terminals |
43 | 45 |
44 @property | 46 @property |
45 def first(self): | 47 def first(self): |
48 """ | |
49 The first set is a mapping from a grammar symbol to a set of | |
50 set of all terminal symbols that can be the first terminal when | |
51 looking for the grammar symbol | |
52 """ | |
46 if not self._first: | 53 if not self._first: |
47 self._first = self.calcFirstSets() | 54 self._first = self.calcFirstSets() |
48 return self._first | 55 return self._first |
49 | 56 |
50 def calcFirstSets(self): | 57 def calcFirstSets(self): |
83 worklist = list(itemset) | 90 worklist = list(itemset) |
84 def addIt(itm): | 91 def addIt(itm): |
85 if not itm in itemset: | 92 if not itm in itemset: |
86 itemset.add(itm) | 93 itemset.add(itm) |
87 worklist.append(itm) | 94 worklist.append(itm) |
88 def first2(itm, la): | 95 def first2(itm): |
89 # When using the first sets, create a copy: | 96 # When using the first sets, create a copy: |
90 f = set(self.first[item.NextNext]) | 97 f = set(self.first[itm.NextNext]) |
91 if EPS in f: | 98 if EPS in f: |
92 f.discard(EPS) | 99 f.discard(EPS) |
93 f.add(item.look_ahead) | 100 f.add(itm.look_ahead) |
94 return f | 101 return f |
95 # Start of algorithm: | 102 # Start of algorithm: |
96 while worklist: | 103 while worklist: |
97 item = worklist.pop(0) | 104 item = worklist.pop(0) |
98 if not item.IsShift: | 105 if not item.IsShift: |
99 continue | 106 continue |
100 if not (item.Next in self.nonterminals): | 107 if not (item.Next in self.nonterminals): |
101 continue | 108 continue |
102 C = item.Next | 109 C = item.Next |
103 for add_p in self.productionsForName(C): | 110 for add_p in self.productionsForName(C): |
104 for b in first2(item, item.look_ahead): | 111 for b in first2(item): |
105 addIt(Item(add_p, 0, b)) | 112 addIt(Item(add_p, 0, b)) |
106 return frozenset(itemset) | 113 return frozenset(itemset) |
107 | 114 |
108 def initialItemSet(self): | 115 def initialItemSet(self): |
109 """ Calculates the initial item set """ | 116 """ Calculates the initial item set """ |
192 def __repr__(self): | 199 def __repr__(self): |
193 return '{0} -> {1}'.format(self.name, self.symbols) | 200 return '{0} -> {1}'.format(self.name, self.symbols) |
194 | 201 |
195 | 202 |
196 class Item: | 203 class Item: |
204 """ | |
205 Represents a partially parsed item | |
206 It has a production it is looking for, a position | |
207 in this production called the 'dot' and a look ahead | |
208 symbol that must follow this item. | |
209 """ | |
197 def __init__(self, production, dotpos, look_ahead): | 210 def __init__(self, production, dotpos, look_ahead): |
198 self.production = production | 211 self.production = production |
199 self.dotpos = dotpos | 212 self.dotpos = dotpos |
200 assert self.dotpos <= len(self.production.symbols) | 213 assert self.dotpos <= len(self.production.symbols) |
201 self.look_ahead = look_ahead | 214 self.look_ahead = look_ahead |
212 def __hash__(self): | 225 def __hash__(self): |
213 return self.getdata().__hash__() | 226 return self.getdata().__hash__() |
214 | 227 |
215 @property | 228 @property |
216 def IsReduce(self): | 229 def IsReduce(self): |
230 """ Check if this item has the dot at the end """ | |
217 return self.dotpos == len(self.production.symbols) | 231 return self.dotpos == len(self.production.symbols) |
218 | 232 |
219 @property | 233 @property |
220 def IsShift(self): | 234 def IsShift(self): |
235 """ Check if this item is a shift item, i.e. the dot can proceed """ | |
221 return not self.IsReduce | 236 return not self.IsReduce |
222 | 237 |
223 @property | 238 @property |
224 def Next(self): | 239 def Next(self): |
240 """ Returns the symbol after the dot """ | |
225 return self.production.symbols[self.dotpos] | 241 return self.production.symbols[self.dotpos] |
226 | 242 |
227 def can_shift_over(self, symbol): | 243 def can_shift_over(self, symbol): |
228 """ Determines if this item can shift over the given symbol """ | 244 """ Determines if this item can shift over the given symbol """ |
229 return self.IsShift and self.Next == symbol | 245 return self.IsShift and self.Next == symbol |
232 """ Creates a new item that is shifted one position """ | 248 """ Creates a new item that is shifted one position """ |
233 return Item(self.production, self.dotpos + 1, self.look_ahead) | 249 return Item(self.production, self.dotpos + 1, self.look_ahead) |
234 | 250 |
235 @property | 251 @property |
236 def NextNext(self): | 252 def NextNext(self): |
253 """ Gets the symbol after the next symbol, or EPS if at the end """ | |
237 if self.dotpos + 1 >= len(self.production.symbols): | 254 if self.dotpos + 1 >= len(self.production.symbols): |
238 return EPS | 255 return EPS |
239 else: | 256 else: |
240 return self.production.symbols[self.dotpos + 1] | 257 return self.production.symbols[self.dotpos + 1] |
241 | 258 |
242 def __repr__(self): | 259 def __repr__(self): |
243 prod = self.production | 260 prod = self.production |
244 predot = ' '.join(prod.symbols[0:self.dotpos]) | 261 predot = ' '.join(prod.symbols[0:self.dotpos]) |
245 postdot = ' '.join(prod.symbols[self.dotpos:]) | 262 postdot = ' '.join(prod.symbols[self.dotpos:]) |
246 nt = prod.name | 263 name = prod.name |
247 args = (nt, predot, postdot, self.look_ahead) | 264 args = (name, predot, postdot, self.look_ahead) |
248 return '[{0} -> {1} . {2} -> {3}]'.format(*args) | 265 return '[{0} -> {1} . {2} -> {3}]'.format(*args) |
249 | 266 |
250 | 267 |
251 class LRParser: | 268 class LRParser: |
252 """ LR parser """ | 269 """ LR parser """ |