Mercurial > lcfOS
comparison python/pyyacc.py @ 181:216da5e46efc
Changed indent to 4 spaces to comply to convention
author | Windel Bouwman |
---|---|
date | Sat, 18 May 2013 17:45:26 +0200 |
parents | 25a0753da4cf |
children | fe2b72381a83 |
comparison
equal
deleted
inserted
replaced
180:25a0753da4cf | 181:216da5e46efc |
---|---|
1 """ | |
2 Parser generator script | |
3 """ | |
4 | |
5 EPS = 'EPS' | |
6 | |
7 | |
1 class Grammar: | 8 class Grammar: |
2 def __init__(self, terminals): | 9 """ Defines a grammar of a language """ |
3 self.terminals = terminals | 10 def __init__(self, terminals): |
4 self.nonterminals = [] | 11 self.terminals = terminals |
5 self.productions = [] | 12 self.nonterminals = [] |
6 def add_production(self, name, symbols): | 13 self.productions = [] |
7 p = Production(name, symbols) | 14 |
8 self.productions.append(p) | 15 def add_production(self, name, symbols): |
9 if not name in self.nonterminals: | 16 """ Add a production rule to the grammar """ |
10 self.nonterminals.append(name) | 17 production = Production(name, symbols) |
11 def productionsForName(self, name): | 18 self.productions.append(production) |
12 return [p for p in self.productions if p.name == name] | 19 assert not name in self.terminals, "Cannot redefine terminal" |
13 @property | 20 if not name in self.nonterminals: |
14 def Symbols(self): | 21 self.nonterminals.append(name) |
15 return self.nonterminals + self.terminals | 22 |
16 def calcFollow(self): | 23 def productionsForName(self, name): |
17 follow = {} | 24 """ Retrieve all productions for a non terminal """ |
18 for nt in self.nonterminals: | 25 return [p for p in self.productions if p.name == name] |
19 follow[nt] = set() | 26 |
20 while True: | 27 @property |
21 change = False | 28 def Symbols(self): |
22 # 1. | 29 """ Get all the symbols defined by this grammar """ |
23 for p in self.productions: | 30 return self.nonterminals + self.terminals |
24 pass | 31 |
25 if not change: | 32 def calcFirstSets(self): |
26 break | 33 """ Calculate first sets for each grammar symbol """ |
27 return follow | 34 first = {} |
28 def calcFirst(self): | 35 for t in self.terminals + ['EOF', EPS]: |
29 first = {} | 36 first[t] = set([t]) |
30 return first | 37 for nt in self.nonterminals: |
38 first[nt] = set() | |
39 epsset = set([EPS]) | |
40 while True: | |
41 some_change = False | |
42 for p in self.productions: | |
43 rhs = set() | |
44 for beta in p.symbols: | |
45 rhs = rhs | (first[beta] - epsset) | |
46 if not EPS in first[beta]: | |
47 break | |
48 else: | |
49 if EPS in first[beta]: | |
50 rhs.add(EPS) | |
51 if rhs - first[p.name]: | |
52 first[p.name] |= rhs | |
53 some_change = True | |
54 if not some_change: | |
55 break | |
56 return first | |
57 | |
31 | 58 |
32 class Production: | 59 class Production: |
33 def __init__(self, name, symbols): | 60 """ Production rule for a grammar """ |
34 self.name = name | 61 def __init__(self, name, symbols): |
35 self.symbols = symbols | 62 self.name = name |
36 def __repr__(self): | 63 self.symbols = symbols |
37 return '{0} -> {1}'.format(self.name, self.symbols) | 64 |
65 def __repr__(self): | |
66 return '{0} -> {1}'.format(self.name, self.symbols) | |
67 | |
38 | 68 |
39 class Item: | 69 class Item: |
40 def __init__(self, production, dotpos): | 70 def __init__(self, production, dotpos, look_ahead): |
41 self.production = production | 71 self.production = production |
42 self.dotpos = dotpos | 72 self.dotpos = dotpos |
43 def __eq__(self, other): | 73 self.look_ahead = look_ahead |
44 if type(other) is type(self): | 74 |
45 return (self.production, self.dotpos) == (other.production, other.dotpos) | 75 def getdata(self): |
46 return False | 76 """ Gets the members as a tuple """ |
47 def __hash__(self): | 77 return (self.production, self.dotpos, self.look_ahead) |
48 return (self.production, self.dotpos).__hash__() | 78 |
49 @property | 79 def __eq__(self, other): |
50 def IsReduce(self): | 80 if type(other) is type(self): |
51 return self.dotpos == len(self.production.symbols) | 81 return self.getdata() == other.getdata() |
52 @property | 82 return False |
53 def IsShift(self): | 83 |
54 return not self.IsReduce | 84 def __hash__(self): |
55 @property | 85 return self.getdata().__hash__() |
56 def Next(self): | 86 |
57 return self.production.symbols[self.dotpos] | 87 @property |
58 def matches(self, symbol): | 88 def IsReduce(self): |
59 return self.Next == symbol | 89 return self.dotpos == len(self.production.symbols) |
60 def __repr__(self): | 90 |
61 return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) | 91 @property |
92 def IsShift(self): | |
93 return not self.IsReduce | |
94 | |
95 @property | |
96 def Next(self): | |
97 return self.production.symbols[self.dotpos] | |
98 | |
99 @property | |
100 def NextNext(self): | |
101 if self.dotpos + 1 >= len(self.production.symbols): | |
102 return 'EPS' | |
103 else: | |
104 return self.production.symbols[self.dotpos + 1] | |
105 | |
106 def __repr__(self): | |
107 prod = self.production | |
108 predot = ' '.join(prod.symbols[0:self.dotpos]) | |
109 postdot = ' '.join(prod.symbols[self.dotpos:]) | |
110 nt = prod.name | |
111 args = (nt, predot, postdot, self.look_ahead) | |
112 return '[{0} -> {1} _dot_ {2},{3}]'.format(*args) | |
113 | |
62 | 114 |
63 class LRgenerator: | 115 class LRgenerator: |
64 def __init__(self, g): | 116 """ Parser generator """ |
65 self.grammar = g | 117 def __init__(self, g): |
66 def e_closure(self, S): | 118 self.grammar = g |
67 """ Expand itemset by using epsilon moves """ | 119 |
68 something_changed = True | 120 def closure(self, itemset): |
69 while something_changed: | 121 """ Expand itemset by using epsilon moves """ |
70 something_changed = False | 122 worklist = list(itemset) |
71 worklist = list(S) | 123 while worklist: |
72 for item in worklist: | 124 item = worklist.pop(0) |
73 if item.IsShift and item.Next in self.grammar.nonterminals: | 125 if item.IsShift and item.Next in self.grammar.nonterminals: |
74 n = item.Next | 126 for add_p in self.grammar.productionsForName(item.Next): |
75 for add_p in self.grammar.productionsForName(n): | 127 f = self.first[item.NextNext] |
76 i = Item(add_p, 0) | 128 if 'EPS' in f: |
77 if not i in S: | 129 f.discard('EPS') |
78 S.add(i) | 130 f.add(item.look_ahead) |
79 something_changed = True | 131 for b in f: |
80 return frozenset(S) | 132 i = Item(add_p, 0, b) |
81 def initialItemSet(self): | 133 if not i in itemset: |
82 nis = set() | 134 itemset.add(i) |
83 for p in self.grammar.productionsForName(self.grammar.start_symbol): | 135 worklist.append(i) |
84 nis.add(Item(p, 0)) | 136 return frozenset(itemset) |
85 return self.e_closure(nis) | 137 |
86 def nextItemSet(self, itemset, symbol): | 138 def initialItemSet(self): |
87 nis = set() | 139 """ Calculates the initial item set """ |
88 for item in itemset: | 140 iis = set() |
89 if item.IsShift and item.Next == symbol: | 141 for p in self.grammar.productionsForName(self.grammar.start_symbol): |
90 nis.add(Item(item.production, item.dotpos + 1)) | 142 iis.add(Item(p, 0, 'EOF')) |
91 return self.e_closure(nis) | 143 return self.closure(iis) |
92 def genParser(self): | 144 |
93 states = set() | 145 def nextItemSet(self, itemset, symbol): |
94 goto_table = {} | 146 """ Determines the next itemset for the current set and a symbol """ |
95 action_table = {} | 147 nis = set() |
96 iis = self.initialItemSet() | 148 for item in itemset: |
97 worklist = [iis] | 149 if item.IsShift and item.Next == symbol: |
98 states.add(iis) | 150 nis.add(Item(item.production, item.dotpos + 1, item.look_ahead)) |
99 while len(worklist) > 0: | 151 return self.closure(nis) |
100 itemset = worklist.pop(0) | 152 |
101 has_goto = False | 153 def genParser(self): |
102 for symbol in self.grammar.Symbols: | 154 states = set() |
103 nis = self.nextItemSet(itemset, symbol) | 155 goto_table = {} |
104 if nis: | 156 action_table = {} |
105 goto_table[(itemset, symbol)] = nis | 157 self.first = self.grammar.calcFirstSets() |
106 has_goto = True | 158 iis = self.initialItemSet() |
107 if not nis in states: | 159 states.add(iis) |
108 worklist.append(nis) | 160 |
109 states.add(nis) | 161 # First generate all item sets by using the nextItemset function: |
110 if has_goto: | 162 worklist = [iis] |
111 action_table[itemset] = 'shift', 's' | 163 while len(worklist) > 0: |
112 for item in itemset: | 164 itemset = worklist.pop(0) |
113 assert item.IsShift, item | 165 for symbol in self.grammar.Symbols: |
114 else: | 166 nis = self.nextItemSet(itemset, symbol) |
115 # Check for reduce-reduce conflicts: | 167 if nis: |
116 assert len(itemset) == 1 | 168 goto_table[(itemset, symbol)] = nis |
117 item = list(itemset)[0] | 169 if not nis in states: |
118 assert item.IsReduce | 170 worklist.append(nis) |
119 action_table[itemset] = 'reduce', item.production | 171 states.add(nis) |
120 for s, i in zip(states, range(len(states))): | 172 |
121 print(i, ' || '.join(str(x) for x in s)) | 173 # Number the states: |
122 p = LRParser() | 174 print('States:') |
123 p.goto_table = goto_table | 175 for s, i in zip(states, range(len(states))): |
124 p.action_table = action_table | 176 print(i, ', '.join(str(x) for x in s)) |
125 p.s0 = iis | 177 p = LRParser() |
126 return p | 178 p.goto_table = goto_table |
179 p.action_table = action_table | |
180 p.s0 = iis | |
181 return p | |
182 | |
127 | 183 |
128 class LRParser: | 184 class LRParser: |
129 def parse(self, toks): | 185 def parse(self, toks): |
130 stack = [self.s0] | 186 stack = ['EOF', self.s0] |
131 while stack[-1] != 'input': | 187 look_ahead = toks.pop(0) |
132 action, p = self.action_table[stack[-1]] | 188 while True: |
133 print(action, p) | 189 state = stack[-1] # top of stack |
134 if action == 'shift': | 190 key = (state, look_ahead) |
135 stack.append(toks.pop(0)) | 191 if not key in self.action_table: |
136 s, i = stack[-2:] | 192 raise Exception('Error parsing') |
137 stack.append(self.goto_table[(s, i)]) | 193 action, param = self.action_table[(state, look_ahead)] |
138 elif action == 'reduce': | 194 if action == 'reduce': |
139 for s in p.symbols: | 195 for s in param.symbols: |
140 stack.pop() | 196 stack.pop() |
141 stack.pop() | 197 stack.pop() |
142 stack.append(p.name) | 198 state = stack[-1] |
143 s, i = stack[-2:] | 199 stack.append(param.name) |
144 if stack[-1] == 'input': | 200 stack.append(self.goto_table[(state, param.name)]) |
145 break | 201 elif action == 'shift': |
146 stack.append(self.goto_table[(s, i)]) | 202 stack.append(look_ahead) |
203 s, i = stack[-2:] | |
204 stack.append(self.goto_table[(s, i)]) | |
205 look_ahead = toks.pop(0) | |
206 elif action == 'accept': | |
207 break | |
147 | 208 |
148 # Test it: | 209 # Test it: |
149 # 1. define a simple grammar: | 210 def test(): |
150 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) | 211 # 1. define a simple grammar: |
151 g.add_production('input', ['expression', 'EOF']) | 212 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) |
152 g.add_production('expression', ['term']) | 213 g.add_production('input', ['expression']) |
153 g.add_production('expression', ['expression', '+', 'term']) | 214 g.add_production('expression', ['term']) |
154 g.add_production('term', ['factor']) | 215 g.add_production('expression', ['expression', '+', 'term']) |
155 #g.add_production('term', ['term', '*', 'factor']) | 216 g.add_production('term', ['factor']) |
156 g.add_production('term', ['(', 'expression', ')']) | 217 g.add_production('term', ['term', '*', 'factor']) |
157 g.add_production('factor', ['identifier']) | 218 g.add_production('factor', ['(', 'expression', ')']) |
158 g.start_symbol = 'input' | 219 g.add_production('factor', ['identifier']) |
159 # 2. define input: | 220 g.start_symbol = 'input' |
160 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] | 221 # 2. define input: |
161 # 3. build parser: | 222 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] |
162 p = LRgenerator(g).genParser() | 223 # 3. build parser: |
163 # 4. feed input: | 224 p = LRgenerator(g).genParser() |
164 p.parse(tokens) | 225 # 4. feed input: |
165 | 226 p.parse(tokens) |
227 | |
228 print('Hallo') | |
229 if __name__ == '__main__': | |
230 print('Hallo') | |
231 test() | |
232 |