comparison python/pyyacc.py @ 184:fe2b72381a83

Added testset for pyy
author Windel Bouwman
date Fri, 24 May 2013 16:13:23 +0200
parents 216da5e46efc
children 51a6440d6398
comparison
equal deleted inserted replaced
183:4fd075e8259c 184:fe2b72381a83
1 """ 1 """
2 Parser generator script 2 Parser generator script
3 """ 3 """
4 4
5 EPS = 'EPS' 5 EPS = 'EPS'
6 EOF = 'EOF'
7 SHIFT = 1
8 REDUCE = 2
9 ACCEPT = 3
6 10
7 11
8 class Grammar: 12 class Grammar:
9 """ Defines a grammar of a language """ 13 """ Defines a grammar of a language """
10 def __init__(self, terminals): 14 def __init__(self, terminals):
28 def Symbols(self): 32 def Symbols(self):
29 """ Get all the symbols defined by this grammar """ 33 """ Get all the symbols defined by this grammar """
30 return self.nonterminals + self.terminals 34 return self.nonterminals + self.terminals
31 35
32 def calcFirstSets(self): 36 def calcFirstSets(self):
33 """ Calculate first sets for each grammar symbol """ 37 """
38 Calculate first sets for each grammar symbol
39 This is a dictionary which maps each grammar symbol
40 to a set of terminals that can be encountered first
41 when looking for the symbol.
42 """
34 first = {} 43 first = {}
35 for t in self.terminals + ['EOF', EPS]: 44 for t in self.terminals + [EOF, EPS]:
36 first[t] = set([t]) 45 first[t] = set([t])
37 for nt in self.nonterminals: 46 for nt in self.nonterminals:
38 first[nt] = set() 47 first[nt] = set()
39 epsset = set([EPS]) 48 epsset = set([EPS])
40 while True: 49 while True:
53 some_change = True 62 some_change = True
54 if not some_change: 63 if not some_change:
55 break 64 break
56 return first 65 return first
57 66
67 def closure(self, itemset):
68 """ Expand itemset by using epsilon moves """
69 worklist = list(itemset)
70 def addIt(itm):
71 if not itm in itemset:
72 itemset.add(itm)
73 worklist.append(itm)
74 def first2(itm, la):
75 # When using the first sets, create a copy:
76 f = set(self.first[item.NextNext])
77 if EPS in f:
78 f.discard(EPS)
79 f.add(item.look_ahead)
80 return f
81
82 while worklist:
83 item = worklist.pop(0)
84 if not item.IsShift:
85 continue
86 if not (item.Next in self.nonterminals):
87 continue
88 C = item.Next
89 for add_p in self.productionsForName(C):
90 for b in first2(item, item.look_ahead):
91 addIt(Item(add_p, 0, b))
92 return frozenset(itemset)
93
94 def initialItemSet(self):
95 """ Calculates the initial item set """
96 iis = set()
97 for p in self.productionsForName(self.start_symbol):
98 iis.add(Item(p, 0, EOF))
99 return self.closure(iis)
100
101 def nextItemSet(self, itemset, symbol):
102 """
103 Determines the next itemset for the current set and a symbol
104 This is the goto procedure
105 """
106 next_set = set()
107 for item in itemset:
108 if item.can_shift_over(symbol):
109 next_set.add(item.shifted())
110 return self.closure(next_set)
111
112 def genCanonicalSet(self, iis):
113 states = set()
114 worklist = []
115 goto_table = {}
116 def addSt(s):
117 if not (s in states):
118 worklist.append(s)
119 states.add(s)
120 addSt(iis)
121 while len(worklist) > 0:
122 itemset = worklist.pop(0)
123 for symbol in self.Symbols:
124 nis = self.nextItemSet(itemset, symbol)
125 if not nis:
126 continue
127 goto_table[(itemset, symbol)] = nis
128 addSt(nis)
129 return states, goto_table
130
131 def genParser(self):
132 """ Generates a parser from the grammar """
133 action_table = {}
134 self.first = self.calcFirstSets()
135 iis = self.initialItemSet()
136
137 # First generate all item sets by using the nextItemset function:
138 states, goto_table = self.genCanonicalSet(iis)
139
140 # Number the states:
141 number_states = {}
142 for s, i in zip(states, range(len(states))):
143 number_states[s] = i
144
145 # Fill action table:
146 for state in states:
147 for item in state:
148 if item.IsShift and item.Next in self.terminals:
149 action_table[(state, item.Next)] = (SHIFT, 0)
150 elif item.IsReduce:
151 if item.look_ahead == EOF:
152 action_table[(state, item.look_ahead)] = (ACCEPT, 0)
153 else:
154 action_table[(state, item.look_ahead)] = (REDUCE, item.production)
155 else:
156 pass
157
158 p = LRParser(action_table)
159 p.goto_table = goto_table
160 p.s0 = iis
161 return p
58 162
59 class Production: 163 class Production:
60 """ Production rule for a grammar """ 164 """ Production rule for a grammar """
61 def __init__(self, name, symbols): 165 def __init__(self, name, symbols):
62 self.name = name 166 self.name = name
68 172
69 class Item: 173 class Item:
70 def __init__(self, production, dotpos, look_ahead): 174 def __init__(self, production, dotpos, look_ahead):
71 self.production = production 175 self.production = production
72 self.dotpos = dotpos 176 self.dotpos = dotpos
177 assert self.dotpos <= len(self.production.symbols)
73 self.look_ahead = look_ahead 178 self.look_ahead = look_ahead
74 179
75 def getdata(self): 180 def getdata(self):
76 """ Gets the members as a tuple """ 181 """ Gets the members as a tuple """
77 return (self.production, self.dotpos, self.look_ahead) 182 return (self.production, self.dotpos, self.look_ahead)
94 199
95 @property 200 @property
96 def Next(self): 201 def Next(self):
97 return self.production.symbols[self.dotpos] 202 return self.production.symbols[self.dotpos]
98 203
204 def can_shift_over(self, symbol):
205 """ Determines if this item can shift over the given symbol """
206 return self.IsShift and self.Next == symbol
207
208 def shifted(self):
209 """ Creates a new item that is shifted one position """
210 return Item(self.production, self.dotpos + 1, self.look_ahead)
211
99 @property 212 @property
100 def NextNext(self): 213 def NextNext(self):
101 if self.dotpos + 1 >= len(self.production.symbols): 214 if self.dotpos + 1 >= len(self.production.symbols):
102 return 'EPS' 215 return EPS
103 else: 216 else:
104 return self.production.symbols[self.dotpos + 1] 217 return self.production.symbols[self.dotpos + 1]
105 218
106 def __repr__(self): 219 def __repr__(self):
107 prod = self.production 220 prod = self.production
108 predot = ' '.join(prod.symbols[0:self.dotpos]) 221 predot = ' '.join(prod.symbols[0:self.dotpos])
109 postdot = ' '.join(prod.symbols[self.dotpos:]) 222 postdot = ' '.join(prod.symbols[self.dotpos:])
110 nt = prod.name 223 nt = prod.name
111 args = (nt, predot, postdot, self.look_ahead) 224 args = (nt, predot, postdot, self.look_ahead)
112 return '[{0} -> {1} _dot_ {2},{3}]'.format(*args) 225 return '[{0} -> {1} . {2}, {3}]'.format(*args)
113
114
115 class LRgenerator:
116 """ Parser generator """
117 def __init__(self, g):
118 self.grammar = g
119
120 def closure(self, itemset):
121 """ Expand itemset by using epsilon moves """
122 worklist = list(itemset)
123 while worklist:
124 item = worklist.pop(0)
125 if item.IsShift and item.Next in self.grammar.nonterminals:
126 for add_p in self.grammar.productionsForName(item.Next):
127 f = self.first[item.NextNext]
128 if 'EPS' in f:
129 f.discard('EPS')
130 f.add(item.look_ahead)
131 for b in f:
132 i = Item(add_p, 0, b)
133 if not i in itemset:
134 itemset.add(i)
135 worklist.append(i)
136 return frozenset(itemset)
137
138 def initialItemSet(self):
139 """ Calculates the initial item set """
140 iis = set()
141 for p in self.grammar.productionsForName(self.grammar.start_symbol):
142 iis.add(Item(p, 0, 'EOF'))
143 return self.closure(iis)
144
145 def nextItemSet(self, itemset, symbol):
146 """ Determines the next itemset for the current set and a symbol """
147 nis = set()
148 for item in itemset:
149 if item.IsShift and item.Next == symbol:
150 nis.add(Item(item.production, item.dotpos + 1, item.look_ahead))
151 return self.closure(nis)
152
153 def genParser(self):
154 states = set()
155 goto_table = {}
156 action_table = {}
157 self.first = self.grammar.calcFirstSets()
158 iis = self.initialItemSet()
159 states.add(iis)
160
161 # First generate all item sets by using the nextItemset function:
162 worklist = [iis]
163 while len(worklist) > 0:
164 itemset = worklist.pop(0)
165 for symbol in self.grammar.Symbols:
166 nis = self.nextItemSet(itemset, symbol)
167 if nis:
168 goto_table[(itemset, symbol)] = nis
169 if not nis in states:
170 worklist.append(nis)
171 states.add(nis)
172
173 # Number the states:
174 print('States:')
175 for s, i in zip(states, range(len(states))):
176 print(i, ', '.join(str(x) for x in s))
177 p = LRParser()
178 p.goto_table = goto_table
179 p.action_table = action_table
180 p.s0 = iis
181 return p
182 226
183 227
184 class LRParser: 228 class LRParser:
229 """ LR parser """
230 def __init__(self, action_table):
231 self.action_table = action_table
232
185 def parse(self, toks): 233 def parse(self, toks):
186 stack = ['EOF', self.s0] 234 stack = [EOF, self.s0]
187 look_ahead = toks.pop(0) 235 look_ahead = toks.pop(0)
188 while True: 236 while True:
189 state = stack[-1] # top of stack 237 state = stack[-1] # top of stack
190 key = (state, look_ahead) 238 key = (state, look_ahead)
191 if not key in self.action_table: 239 if not key in self.action_table:
192 raise Exception('Error parsing') 240 raise Exception('Error parsing')
193 action, param = self.action_table[(state, look_ahead)] 241 action, param = self.action_table[(state, look_ahead)]
194 if action == 'reduce': 242 if action == REDUCE:
195 for s in param.symbols: 243 for s in param.symbols:
196 stack.pop() 244 stack.pop()
197 stack.pop() 245 stack.pop()
198 state = stack[-1] 246 state = stack[-1]
199 stack.append(param.name) 247 stack.append(param.name)
200 stack.append(self.goto_table[(state, param.name)]) 248 stack.append(self.goto_table[(state, param.name)])
201 elif action == 'shift': 249 elif action == SHIFT:
202 stack.append(look_ahead) 250 stack.append(look_ahead)
203 s, i = stack[-2:] 251 s, i = stack[-2:]
204 stack.append(self.goto_table[(s, i)]) 252 stack.append(self.goto_table[(s, i)])
205 look_ahead = toks.pop(0) 253 look_ahead = toks.pop(0)
206 elif action == 'accept': 254 elif action == ACCEPT:
207 break 255 break
208 256
209 # Test it: 257 def testSimpleGrammar():
210 def test():
211 # 1. define a simple grammar: 258 # 1. define a simple grammar:
212 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) 259 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
213 g.add_production('input', ['expression']) 260 g.add_production('input', ['expression'])
214 g.add_production('expression', ['term']) 261 g.add_production('expression', ['term'])
215 g.add_production('expression', ['expression', '+', 'term']) 262 g.add_production('expression', ['expression', '+', 'term'])
219 g.add_production('factor', ['identifier']) 266 g.add_production('factor', ['identifier'])
220 g.start_symbol = 'input' 267 g.start_symbol = 'input'
221 # 2. define input: 268 # 2. define input:
222 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] 269 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
223 # 3. build parser: 270 # 3. build parser:
224 p = LRgenerator(g).genParser() 271 p = g.genParser()
225 # 4. feed input: 272 # 4. feed input:
226 p.parse(tokens) 273 p.parse(tokens)
227 274
228 print('Hallo') 275
229 if __name__ == '__main__': 276 if __name__ == '__main__':
230 print('Hallo') 277 testSimpleGrammar()
231 test()
232