Mercurial > lcfOS
comparison python/pyyacc.py @ 184:fe2b72381a83
Added testset for pyy
author | Windel Bouwman |
---|---|
date | Fri, 24 May 2013 16:13:23 +0200 |
parents | 216da5e46efc |
children | 51a6440d6398 |
comparison
equal
deleted
inserted
replaced
183:4fd075e8259c | 184:fe2b72381a83 |
---|---|
1 """ | 1 """ |
2 Parser generator script | 2 Parser generator script |
3 """ | 3 """ |
4 | 4 |
5 EPS = 'EPS' | 5 EPS = 'EPS' |
6 EOF = 'EOF' | |
7 SHIFT = 1 | |
8 REDUCE = 2 | |
9 ACCEPT = 3 | |
6 | 10 |
7 | 11 |
8 class Grammar: | 12 class Grammar: |
9 """ Defines a grammar of a language """ | 13 """ Defines a grammar of a language """ |
10 def __init__(self, terminals): | 14 def __init__(self, terminals): |
28 def Symbols(self): | 32 def Symbols(self): |
29 """ Get all the symbols defined by this grammar """ | 33 """ Get all the symbols defined by this grammar """ |
30 return self.nonterminals + self.terminals | 34 return self.nonterminals + self.terminals |
31 | 35 |
32 def calcFirstSets(self): | 36 def calcFirstSets(self): |
33 """ Calculate first sets for each grammar symbol """ | 37 """ |
38 Calculate first sets for each grammar symbol | |
39 This is a dictionary which maps each grammar symbol | |
40 to a set of terminals that can be encountered first | |
41 when looking for the symbol. | |
42 """ | |
34 first = {} | 43 first = {} |
35 for t in self.terminals + ['EOF', EPS]: | 44 for t in self.terminals + [EOF, EPS]: |
36 first[t] = set([t]) | 45 first[t] = set([t]) |
37 for nt in self.nonterminals: | 46 for nt in self.nonterminals: |
38 first[nt] = set() | 47 first[nt] = set() |
39 epsset = set([EPS]) | 48 epsset = set([EPS]) |
40 while True: | 49 while True: |
53 some_change = True | 62 some_change = True |
54 if not some_change: | 63 if not some_change: |
55 break | 64 break |
56 return first | 65 return first |
57 | 66 |
67 def closure(self, itemset): | |
68 """ Expand itemset by using epsilon moves """ | |
69 worklist = list(itemset) | |
70 def addIt(itm): | |
71 if not itm in itemset: | |
72 itemset.add(itm) | |
73 worklist.append(itm) | |
74 def first2(itm, la): | |
75 # When using the first sets, create a copy: | |
76 f = set(self.first[item.NextNext]) | |
77 if EPS in f: | |
78 f.discard(EPS) | |
79 f.add(item.look_ahead) | |
80 return f | |
81 | |
82 while worklist: | |
83 item = worklist.pop(0) | |
84 if not item.IsShift: | |
85 continue | |
86 if not (item.Next in self.nonterminals): | |
87 continue | |
88 C = item.Next | |
89 for add_p in self.productionsForName(C): | |
90 for b in first2(item, item.look_ahead): | |
91 addIt(Item(add_p, 0, b)) | |
92 return frozenset(itemset) | |
93 | |
94 def initialItemSet(self): | |
95 """ Calculates the initial item set """ | |
96 iis = set() | |
97 for p in self.productionsForName(self.start_symbol): | |
98 iis.add(Item(p, 0, EOF)) | |
99 return self.closure(iis) | |
100 | |
101 def nextItemSet(self, itemset, symbol): | |
102 """ | |
103 Determines the next itemset for the current set and a symbol | |
104 This is the goto procedure | |
105 """ | |
106 next_set = set() | |
107 for item in itemset: | |
108 if item.can_shift_over(symbol): | |
109 next_set.add(item.shifted()) | |
110 return self.closure(next_set) | |
111 | |
112 def genCanonicalSet(self, iis): | |
113 states = set() | |
114 worklist = [] | |
115 goto_table = {} | |
116 def addSt(s): | |
117 if not (s in states): | |
118 worklist.append(s) | |
119 states.add(s) | |
120 addSt(iis) | |
121 while len(worklist) > 0: | |
122 itemset = worklist.pop(0) | |
123 for symbol in self.Symbols: | |
124 nis = self.nextItemSet(itemset, symbol) | |
125 if not nis: | |
126 continue | |
127 goto_table[(itemset, symbol)] = nis | |
128 addSt(nis) | |
129 return states, goto_table | |
130 | |
131 def genParser(self): | |
132 """ Generates a parser from the grammar """ | |
133 action_table = {} | |
134 self.first = self.calcFirstSets() | |
135 iis = self.initialItemSet() | |
136 | |
137 # First generate all item sets by using the nextItemset function: | |
138 states, goto_table = self.genCanonicalSet(iis) | |
139 | |
140 # Number the states: | |
141 number_states = {} | |
142 for s, i in zip(states, range(len(states))): | |
143 number_states[s] = i | |
144 | |
145 # Fill action table: | |
146 for state in states: | |
147 for item in state: | |
148 if item.IsShift and item.Next in self.terminals: | |
149 action_table[(state, item.Next)] = (SHIFT, 0) | |
150 elif item.IsReduce: | |
151 if item.look_ahead == EOF: | |
152 action_table[(state, item.look_ahead)] = (ACCEPT, 0) | |
153 else: | |
154 action_table[(state, item.look_ahead)] = (REDUCE, item.production) | |
155 else: | |
156 pass | |
157 | |
158 p = LRParser(action_table) | |
159 p.goto_table = goto_table | |
160 p.s0 = iis | |
161 return p | |
58 | 162 |
59 class Production: | 163 class Production: |
60 """ Production rule for a grammar """ | 164 """ Production rule for a grammar """ |
61 def __init__(self, name, symbols): | 165 def __init__(self, name, symbols): |
62 self.name = name | 166 self.name = name |
68 | 172 |
69 class Item: | 173 class Item: |
70 def __init__(self, production, dotpos, look_ahead): | 174 def __init__(self, production, dotpos, look_ahead): |
71 self.production = production | 175 self.production = production |
72 self.dotpos = dotpos | 176 self.dotpos = dotpos |
177 assert self.dotpos <= len(self.production.symbols) | |
73 self.look_ahead = look_ahead | 178 self.look_ahead = look_ahead |
74 | 179 |
75 def getdata(self): | 180 def getdata(self): |
76 """ Gets the members as a tuple """ | 181 """ Gets the members as a tuple """ |
77 return (self.production, self.dotpos, self.look_ahead) | 182 return (self.production, self.dotpos, self.look_ahead) |
94 | 199 |
95 @property | 200 @property |
96 def Next(self): | 201 def Next(self): |
97 return self.production.symbols[self.dotpos] | 202 return self.production.symbols[self.dotpos] |
98 | 203 |
204 def can_shift_over(self, symbol): | |
205 """ Determines if this item can shift over the given symbol """ | |
206 return self.IsShift and self.Next == symbol | |
207 | |
208 def shifted(self): | |
209 """ Creates a new item that is shifted one position """ | |
210 return Item(self.production, self.dotpos + 1, self.look_ahead) | |
211 | |
99 @property | 212 @property |
100 def NextNext(self): | 213 def NextNext(self): |
101 if self.dotpos + 1 >= len(self.production.symbols): | 214 if self.dotpos + 1 >= len(self.production.symbols): |
102 return 'EPS' | 215 return EPS |
103 else: | 216 else: |
104 return self.production.symbols[self.dotpos + 1] | 217 return self.production.symbols[self.dotpos + 1] |
105 | 218 |
106 def __repr__(self): | 219 def __repr__(self): |
107 prod = self.production | 220 prod = self.production |
108 predot = ' '.join(prod.symbols[0:self.dotpos]) | 221 predot = ' '.join(prod.symbols[0:self.dotpos]) |
109 postdot = ' '.join(prod.symbols[self.dotpos:]) | 222 postdot = ' '.join(prod.symbols[self.dotpos:]) |
110 nt = prod.name | 223 nt = prod.name |
111 args = (nt, predot, postdot, self.look_ahead) | 224 args = (nt, predot, postdot, self.look_ahead) |
112 return '[{0} -> {1} _dot_ {2},{3}]'.format(*args) | 225 return '[{0} -> {1} . {2}, {3}]'.format(*args) |
113 | |
114 | |
115 class LRgenerator: | |
116 """ Parser generator """ | |
117 def __init__(self, g): | |
118 self.grammar = g | |
119 | |
120 def closure(self, itemset): | |
121 """ Expand itemset by using epsilon moves """ | |
122 worklist = list(itemset) | |
123 while worklist: | |
124 item = worklist.pop(0) | |
125 if item.IsShift and item.Next in self.grammar.nonterminals: | |
126 for add_p in self.grammar.productionsForName(item.Next): | |
127 f = self.first[item.NextNext] | |
128 if 'EPS' in f: | |
129 f.discard('EPS') | |
130 f.add(item.look_ahead) | |
131 for b in f: | |
132 i = Item(add_p, 0, b) | |
133 if not i in itemset: | |
134 itemset.add(i) | |
135 worklist.append(i) | |
136 return frozenset(itemset) | |
137 | |
138 def initialItemSet(self): | |
139 """ Calculates the initial item set """ | |
140 iis = set() | |
141 for p in self.grammar.productionsForName(self.grammar.start_symbol): | |
142 iis.add(Item(p, 0, 'EOF')) | |
143 return self.closure(iis) | |
144 | |
145 def nextItemSet(self, itemset, symbol): | |
146 """ Determines the next itemset for the current set and a symbol """ | |
147 nis = set() | |
148 for item in itemset: | |
149 if item.IsShift and item.Next == symbol: | |
150 nis.add(Item(item.production, item.dotpos + 1, item.look_ahead)) | |
151 return self.closure(nis) | |
152 | |
153 def genParser(self): | |
154 states = set() | |
155 goto_table = {} | |
156 action_table = {} | |
157 self.first = self.grammar.calcFirstSets() | |
158 iis = self.initialItemSet() | |
159 states.add(iis) | |
160 | |
161 # First generate all item sets by using the nextItemset function: | |
162 worklist = [iis] | |
163 while len(worklist) > 0: | |
164 itemset = worklist.pop(0) | |
165 for symbol in self.grammar.Symbols: | |
166 nis = self.nextItemSet(itemset, symbol) | |
167 if nis: | |
168 goto_table[(itemset, symbol)] = nis | |
169 if not nis in states: | |
170 worklist.append(nis) | |
171 states.add(nis) | |
172 | |
173 # Number the states: | |
174 print('States:') | |
175 for s, i in zip(states, range(len(states))): | |
176 print(i, ', '.join(str(x) for x in s)) | |
177 p = LRParser() | |
178 p.goto_table = goto_table | |
179 p.action_table = action_table | |
180 p.s0 = iis | |
181 return p | |
182 | 226 |
183 | 227 |
184 class LRParser: | 228 class LRParser: |
229 """ LR parser """ | |
230 def __init__(self, action_table): | |
231 self.action_table = action_table | |
232 | |
185 def parse(self, toks): | 233 def parse(self, toks): |
186 stack = ['EOF', self.s0] | 234 stack = [EOF, self.s0] |
187 look_ahead = toks.pop(0) | 235 look_ahead = toks.pop(0) |
188 while True: | 236 while True: |
189 state = stack[-1] # top of stack | 237 state = stack[-1] # top of stack |
190 key = (state, look_ahead) | 238 key = (state, look_ahead) |
191 if not key in self.action_table: | 239 if not key in self.action_table: |
192 raise Exception('Error parsing') | 240 raise Exception('Error parsing') |
193 action, param = self.action_table[(state, look_ahead)] | 241 action, param = self.action_table[(state, look_ahead)] |
194 if action == 'reduce': | 242 if action == REDUCE: |
195 for s in param.symbols: | 243 for s in param.symbols: |
196 stack.pop() | 244 stack.pop() |
197 stack.pop() | 245 stack.pop() |
198 state = stack[-1] | 246 state = stack[-1] |
199 stack.append(param.name) | 247 stack.append(param.name) |
200 stack.append(self.goto_table[(state, param.name)]) | 248 stack.append(self.goto_table[(state, param.name)]) |
201 elif action == 'shift': | 249 elif action == SHIFT: |
202 stack.append(look_ahead) | 250 stack.append(look_ahead) |
203 s, i = stack[-2:] | 251 s, i = stack[-2:] |
204 stack.append(self.goto_table[(s, i)]) | 252 stack.append(self.goto_table[(s, i)]) |
205 look_ahead = toks.pop(0) | 253 look_ahead = toks.pop(0) |
206 elif action == 'accept': | 254 elif action == ACCEPT: |
207 break | 255 break |
208 | 256 |
209 # Test it: | 257 def testSimpleGrammar(): |
210 def test(): | |
211 # 1. define a simple grammar: | 258 # 1. define a simple grammar: |
212 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) | 259 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) |
213 g.add_production('input', ['expression']) | 260 g.add_production('input', ['expression']) |
214 g.add_production('expression', ['term']) | 261 g.add_production('expression', ['term']) |
215 g.add_production('expression', ['expression', '+', 'term']) | 262 g.add_production('expression', ['expression', '+', 'term']) |
219 g.add_production('factor', ['identifier']) | 266 g.add_production('factor', ['identifier']) |
220 g.start_symbol = 'input' | 267 g.start_symbol = 'input' |
221 # 2. define input: | 268 # 2. define input: |
222 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] | 269 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] |
223 # 3. build parser: | 270 # 3. build parser: |
224 p = LRgenerator(g).genParser() | 271 p = g.genParser() |
225 # 4. feed input: | 272 # 4. feed input: |
226 p.parse(tokens) | 273 p.parse(tokens) |
227 | 274 |
228 print('Hallo') | 275 |
229 if __name__ == '__main__': | 276 if __name__ == '__main__': |
230 print('Hallo') | 277 testSimpleGrammar() |
231 test() | |
232 |