comparison python/pyyacc.py @ 181:216da5e46efc

Changed indent to 4 spaces to comply to convention
author Windel Bouwman
date Sat, 18 May 2013 17:45:26 +0200
parents 25a0753da4cf
children fe2b72381a83
comparison
equal deleted inserted replaced
180:25a0753da4cf 181:216da5e46efc
1 """
2 Parser generator script
3 """
4
5 EPS = 'EPS'
6
7
1 class Grammar: 8 class Grammar:
2 def __init__(self, terminals): 9 """ Defines a grammar of a language """
3 self.terminals = terminals 10 def __init__(self, terminals):
4 self.nonterminals = [] 11 self.terminals = terminals
5 self.productions = [] 12 self.nonterminals = []
6 def add_production(self, name, symbols): 13 self.productions = []
7 p = Production(name, symbols) 14
8 self.productions.append(p) 15 def add_production(self, name, symbols):
9 if not name in self.nonterminals: 16 """ Add a production rule to the grammar """
10 self.nonterminals.append(name) 17 production = Production(name, symbols)
11 def productionsForName(self, name): 18 self.productions.append(production)
12 return [p for p in self.productions if p.name == name] 19 assert not name in self.terminals, "Cannot redefine terminal"
13 @property 20 if not name in self.nonterminals:
14 def Symbols(self): 21 self.nonterminals.append(name)
15 return self.nonterminals + self.terminals 22
16 def calcFollow(self): 23 def productionsForName(self, name):
17 follow = {} 24 """ Retrieve all productions for a non terminal """
18 for nt in self.nonterminals: 25 return [p for p in self.productions if p.name == name]
19 follow[nt] = set() 26
20 while True: 27 @property
21 change = False 28 def Symbols(self):
22 # 1. 29 """ Get all the symbols defined by this grammar """
23 for p in self.productions: 30 return self.nonterminals + self.terminals
24 pass 31
25 if not change: 32 def calcFirstSets(self):
26 break 33 """ Calculate first sets for each grammar symbol """
27 return follow 34 first = {}
28 def calcFirst(self): 35 for t in self.terminals + ['EOF', EPS]:
29 first = {} 36 first[t] = set([t])
30 return first 37 for nt in self.nonterminals:
38 first[nt] = set()
39 epsset = set([EPS])
40 while True:
41 some_change = False
42 for p in self.productions:
43 rhs = set()
44 for beta in p.symbols:
45 rhs = rhs | (first[beta] - epsset)
46 if not EPS in first[beta]:
47 break
48 else:
49 if EPS in first[beta]:
50 rhs.add(EPS)
51 if rhs - first[p.name]:
52 first[p.name] |= rhs
53 some_change = True
54 if not some_change:
55 break
56 return first
57
31 58
32 class Production: 59 class Production:
33 def __init__(self, name, symbols): 60 """ Production rule for a grammar """
34 self.name = name 61 def __init__(self, name, symbols):
35 self.symbols = symbols 62 self.name = name
36 def __repr__(self): 63 self.symbols = symbols
37 return '{0} -> {1}'.format(self.name, self.symbols) 64
65 def __repr__(self):
66 return '{0} -> {1}'.format(self.name, self.symbols)
67
38 68
39 class Item: 69 class Item:
40 def __init__(self, production, dotpos): 70 def __init__(self, production, dotpos, look_ahead):
41 self.production = production 71 self.production = production
42 self.dotpos = dotpos 72 self.dotpos = dotpos
43 def __eq__(self, other): 73 self.look_ahead = look_ahead
44 if type(other) is type(self): 74
45 return (self.production, self.dotpos) == (other.production, other.dotpos) 75 def getdata(self):
46 return False 76 """ Gets the members as a tuple """
47 def __hash__(self): 77 return (self.production, self.dotpos, self.look_ahead)
48 return (self.production, self.dotpos).__hash__() 78
49 @property 79 def __eq__(self, other):
50 def IsReduce(self): 80 if type(other) is type(self):
51 return self.dotpos == len(self.production.symbols) 81 return self.getdata() == other.getdata()
52 @property 82 return False
53 def IsShift(self): 83
54 return not self.IsReduce 84 def __hash__(self):
55 @property 85 return self.getdata().__hash__()
56 def Next(self): 86
57 return self.production.symbols[self.dotpos] 87 @property
58 def matches(self, symbol): 88 def IsReduce(self):
59 return self.Next == symbol 89 return self.dotpos == len(self.production.symbols)
60 def __repr__(self): 90
61 return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) 91 @property
92 def IsShift(self):
93 return not self.IsReduce
94
95 @property
96 def Next(self):
97 return self.production.symbols[self.dotpos]
98
99 @property
100 def NextNext(self):
101 if self.dotpos + 1 >= len(self.production.symbols):
102 return 'EPS'
103 else:
104 return self.production.symbols[self.dotpos + 1]
105
106 def __repr__(self):
107 prod = self.production
108 predot = ' '.join(prod.symbols[0:self.dotpos])
109 postdot = ' '.join(prod.symbols[self.dotpos:])
110 nt = prod.name
111 args = (nt, predot, postdot, self.look_ahead)
112 return '[{0} -> {1} _dot_ {2},{3}]'.format(*args)
113
62 114
63 class LRgenerator: 115 class LRgenerator:
64 def __init__(self, g): 116 """ Parser generator """
65 self.grammar = g 117 def __init__(self, g):
66 def e_closure(self, S): 118 self.grammar = g
67 """ Expand itemset by using epsilon moves """ 119
68 something_changed = True 120 def closure(self, itemset):
69 while something_changed: 121 """ Expand itemset by using epsilon moves """
70 something_changed = False 122 worklist = list(itemset)
71 worklist = list(S) 123 while worklist:
72 for item in worklist: 124 item = worklist.pop(0)
73 if item.IsShift and item.Next in self.grammar.nonterminals: 125 if item.IsShift and item.Next in self.grammar.nonterminals:
74 n = item.Next 126 for add_p in self.grammar.productionsForName(item.Next):
75 for add_p in self.grammar.productionsForName(n): 127 f = self.first[item.NextNext]
76 i = Item(add_p, 0) 128 if 'EPS' in f:
77 if not i in S: 129 f.discard('EPS')
78 S.add(i) 130 f.add(item.look_ahead)
79 something_changed = True 131 for b in f:
80 return frozenset(S) 132 i = Item(add_p, 0, b)
81 def initialItemSet(self): 133 if not i in itemset:
82 nis = set() 134 itemset.add(i)
83 for p in self.grammar.productionsForName(self.grammar.start_symbol): 135 worklist.append(i)
84 nis.add(Item(p, 0)) 136 return frozenset(itemset)
85 return self.e_closure(nis) 137
86 def nextItemSet(self, itemset, symbol): 138 def initialItemSet(self):
87 nis = set() 139 """ Calculates the initial item set """
88 for item in itemset: 140 iis = set()
89 if item.IsShift and item.Next == symbol: 141 for p in self.grammar.productionsForName(self.grammar.start_symbol):
90 nis.add(Item(item.production, item.dotpos + 1)) 142 iis.add(Item(p, 0, 'EOF'))
91 return self.e_closure(nis) 143 return self.closure(iis)
92 def genParser(self): 144
93 states = set() 145 def nextItemSet(self, itemset, symbol):
94 goto_table = {} 146 """ Determines the next itemset for the current set and a symbol """
95 action_table = {} 147 nis = set()
96 iis = self.initialItemSet() 148 for item in itemset:
97 worklist = [iis] 149 if item.IsShift and item.Next == symbol:
98 states.add(iis) 150 nis.add(Item(item.production, item.dotpos + 1, item.look_ahead))
99 while len(worklist) > 0: 151 return self.closure(nis)
100 itemset = worklist.pop(0) 152
101 has_goto = False 153 def genParser(self):
102 for symbol in self.grammar.Symbols: 154 states = set()
103 nis = self.nextItemSet(itemset, symbol) 155 goto_table = {}
104 if nis: 156 action_table = {}
105 goto_table[(itemset, symbol)] = nis 157 self.first = self.grammar.calcFirstSets()
106 has_goto = True 158 iis = self.initialItemSet()
107 if not nis in states: 159 states.add(iis)
108 worklist.append(nis) 160
109 states.add(nis) 161 # First generate all item sets by using the nextItemset function:
110 if has_goto: 162 worklist = [iis]
111 action_table[itemset] = 'shift', 's' 163 while len(worklist) > 0:
112 for item in itemset: 164 itemset = worklist.pop(0)
113 assert item.IsShift, item 165 for symbol in self.grammar.Symbols:
114 else: 166 nis = self.nextItemSet(itemset, symbol)
115 # Check for reduce-reduce conflicts: 167 if nis:
116 assert len(itemset) == 1 168 goto_table[(itemset, symbol)] = nis
117 item = list(itemset)[0] 169 if not nis in states:
118 assert item.IsReduce 170 worklist.append(nis)
119 action_table[itemset] = 'reduce', item.production 171 states.add(nis)
120 for s, i in zip(states, range(len(states))): 172
121 print(i, ' || '.join(str(x) for x in s)) 173 # Number the states:
122 p = LRParser() 174 print('States:')
123 p.goto_table = goto_table 175 for s, i in zip(states, range(len(states))):
124 p.action_table = action_table 176 print(i, ', '.join(str(x) for x in s))
125 p.s0 = iis 177 p = LRParser()
126 return p 178 p.goto_table = goto_table
179 p.action_table = action_table
180 p.s0 = iis
181 return p
182
127 183
128 class LRParser: 184 class LRParser:
129 def parse(self, toks): 185 def parse(self, toks):
130 stack = [self.s0] 186 stack = ['EOF', self.s0]
131 while stack[-1] != 'input': 187 look_ahead = toks.pop(0)
132 action, p = self.action_table[stack[-1]] 188 while True:
133 print(action, p) 189 state = stack[-1] # top of stack
134 if action == 'shift': 190 key = (state, look_ahead)
135 stack.append(toks.pop(0)) 191 if not key in self.action_table:
136 s, i = stack[-2:] 192 raise Exception('Error parsing')
137 stack.append(self.goto_table[(s, i)]) 193 action, param = self.action_table[(state, look_ahead)]
138 elif action == 'reduce': 194 if action == 'reduce':
139 for s in p.symbols: 195 for s in param.symbols:
140 stack.pop() 196 stack.pop()
141 stack.pop() 197 stack.pop()
142 stack.append(p.name) 198 state = stack[-1]
143 s, i = stack[-2:] 199 stack.append(param.name)
144 if stack[-1] == 'input': 200 stack.append(self.goto_table[(state, param.name)])
145 break 201 elif action == 'shift':
146 stack.append(self.goto_table[(s, i)]) 202 stack.append(look_ahead)
203 s, i = stack[-2:]
204 stack.append(self.goto_table[(s, i)])
205 look_ahead = toks.pop(0)
206 elif action == 'accept':
207 break
147 208
148 # Test it: 209 # Test it:
149 # 1. define a simple grammar: 210 def test():
150 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) 211 # 1. define a simple grammar:
151 g.add_production('input', ['expression', 'EOF']) 212 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
152 g.add_production('expression', ['term']) 213 g.add_production('input', ['expression'])
153 g.add_production('expression', ['expression', '+', 'term']) 214 g.add_production('expression', ['term'])
154 g.add_production('term', ['factor']) 215 g.add_production('expression', ['expression', '+', 'term'])
155 #g.add_production('term', ['term', '*', 'factor']) 216 g.add_production('term', ['factor'])
156 g.add_production('term', ['(', 'expression', ')']) 217 g.add_production('term', ['term', '*', 'factor'])
157 g.add_production('factor', ['identifier']) 218 g.add_production('factor', ['(', 'expression', ')'])
158 g.start_symbol = 'input' 219 g.add_production('factor', ['identifier'])
159 # 2. define input: 220 g.start_symbol = 'input'
160 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] 221 # 2. define input:
161 # 3. build parser: 222 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
162 p = LRgenerator(g).genParser() 223 # 3. build parser:
163 # 4. feed input: 224 p = LRgenerator(g).genParser()
164 p.parse(tokens) 225 # 4. feed input:
165 226 p.parse(tokens)
227
228 print('Hallo')
229 if __name__ == '__main__':
230 print('Hallo')
231 test()
232