annotate python/pyyacc.py @ 209:07bfea4c1ed7

Added codegen test
author Windel Bouwman
date Sat, 29 Jun 2013 10:08:46 +0200
parents 37ac6c016e0f
children 494828a7adf1
rev   line source
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
1 """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
2 Parser generator script
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
3 """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
4
191
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
5 from ppci import Token
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
6
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
7 EPS = 'EPS'
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
8 EOF = 'EOF'
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
9 SHIFT = 1
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
10 REDUCE = 2
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
11 ACCEPT = 3
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
12
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
13 class ParserGenerationException(Exception):
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
14 """ Raised when something goes wrong during parser generation """
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
15 pass
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
16
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
17
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
18 class ParserException(Exception):
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
19 """ Raised during a failure in the parsing process """
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
20 pass
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
21
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
22
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
23 class Grammar:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
24 """ Defines a grammar of a language """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
25 def __init__(self, terminals):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
26 self.terminals = terminals
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
27 self.nonterminals = []
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
28 self.productions = []
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
29 self._first = None # Cached first set
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
30
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
31 def add_production(self, name, symbols, f=None):
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
32 """ Add a production rule to the grammar """
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
33 production = Production(name, symbols, f)
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
34 self.productions.append(production)
192
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
35 if name in self.terminals:
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
36 raise ParserGenerationException("Cannot redefine terminal {0}".format(name))
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
37 if not name in self.nonterminals:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
38 self.nonterminals.append(name)
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
39 self._first = None # Invalidate cached version
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
40
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
41 def productionsForName(self, name):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
42 """ Retrieve all productions for a non terminal """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
43 return [p for p in self.productions if p.name == name]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
44
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
45 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
46 def Symbols(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
47 """ Get all the symbols defined by this grammar """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
48 return self.nonterminals + self.terminals
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
49
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
50 @property
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
51 def first(self):
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
52 """
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
53 The first set is a mapping from a grammar symbol to a set of
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
54 set of all terminal symbols that can be the first terminal when
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
55 looking for the grammar symbol
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
56 """
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
57 if not self._first:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
58 self._first = self.calcFirstSets()
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
59 return self._first
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
60
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
61 def calcFirstSets(self):
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
62 """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
63 Calculate first sets for each grammar symbol
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
64 This is a dictionary which maps each grammar symbol
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
65 to a set of terminals that can be encountered first
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
66 when looking for the symbol.
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
67 """
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
68 first = {}
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
69 for t in self.terminals + [EOF, EPS]:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
70 first[t] = set([t])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
71 for nt in self.nonterminals:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
72 first[nt] = set()
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
73 epsset = {EPS}
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
74 while True:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
75 some_change = False
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
76 for p in self.productions:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
77 rhs = set()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
78 for beta in p.symbols:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
79 rhs = rhs | (first[beta] - epsset)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
80 if not EPS in first[beta]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
81 break
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
82 else:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
83 if EPS in first[beta]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
84 rhs.add(EPS)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
85 if rhs - first[p.name]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
86 first[p.name] |= rhs
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
87 some_change = True
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
88 if not some_change:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
89 break
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
90 return first
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
91
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
92 def closure(self, itemset):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
93 """ Expand itemset by using epsilon moves """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
94 worklist = list(itemset)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
95 def addIt(itm):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
96 if not itm in itemset:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
97 itemset.add(itm)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
98 worklist.append(itm)
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
99 def first2(itm):
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
100 # When using the first sets, create a copy:
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
101 f = set(self.first[itm.NextNext])
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
102 if EPS in f:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
103 f.discard(EPS)
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
104 f.add(itm.look_ahead)
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
105 return f
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
106 # Start of algorithm:
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
107 while worklist:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
108 item = worklist.pop(0)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
109 if not item.IsShift:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
110 continue
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
111 if not (item.Next in self.nonterminals):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
112 continue
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
113 C = item.Next
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
114 for add_p in self.productionsForName(C):
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
115 for b in first2(item):
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
116 addIt(Item(add_p, 0, b))
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
117 return frozenset(itemset)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
118
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
119 def initialItemSet(self):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
120 """ Calculates the initial item set """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
121 iis = set()
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
122 for p in self.productionsForName(self.start_symbol):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
123 iis.add(Item(p, 0, EOF))
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
124 return self.closure(iis)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
125
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
126 def nextItemSet(self, itemset, symbol):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
127 """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
128 Determines the next itemset for the current set and a symbol
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
129 This is the goto procedure
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
130 """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
131 next_set = set()
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
132 for item in itemset:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
133 if item.can_shift_over(symbol):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
134 next_set.add(item.shifted())
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
135 return self.closure(next_set)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
136
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
137 def genCanonicalSet(self, iis):
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
138 states = []
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
139 worklist = []
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
140 transitions = {}
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
141 def addSt(s):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
142 if not (s in states):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
143 worklist.append(s)
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
144 states.append(s)
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
145 addSt(iis)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
146 while len(worklist) > 0:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
147 itemset = worklist.pop(0)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
148 for symbol in self.Symbols:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
149 nis = self.nextItemSet(itemset, symbol)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
150 if not nis:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
151 continue
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
152 addSt(nis)
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
153 transitions[(states.index(itemset), symbol)] = states.index(nis)
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
154 return states, transitions
192
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
155
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
156 def checkSymbols(self):
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
157 """ Checks no symbols are undefined """
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
158 for production in self.productions:
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
159 for symbol in production.symbols:
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
160 if symbol not in self.Symbols + [EPS]:
192
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
161 raise ParserGenerationException('Symbol {0} undefined'.format(symbol))
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
162
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
163
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
164 def genParser(self):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
165 """ Generates a parser from the grammar """
192
6cd6260789a1 Added more tests for parser generator
Windel Bouwman
parents: 191
diff changeset
166 self.checkSymbols()
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
167 action_table = {}
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
168 goto_table = {}
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
169 iis = self.initialItemSet()
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
170
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
171 # First generate all item sets by using the nextItemset function:
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
172 states, transitions = self.genCanonicalSet(iis)
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
173
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
174 def action_str(act):
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
175 a, p = act
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
176 if a is SHIFT:
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
177 return 'Shift {0}'.format(0)
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
178 elif a is REDUCE:
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
179 return 'Reduce {0}'.format(p)
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
180 return 'Other'
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
181
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
182 def setAction(state, t, action):
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
183 key = (state, t)
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
184 if key in action_table:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
185 action2 = action_table[key]
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
186 if action != action2:
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
187 if (action2[0] == REDUCE) and (action[0] == SHIFT):
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
188 # Automatically resolve and do the shift action!
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
189 # Simple, but almost always what you want!!
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
190 action_table[key] = action
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
191 else:
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
192 if (action2[0] == SHIFT) and (action[0] == REDUCE):
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
193 pass
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
194 else:
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
195 a1 = action_str(action)
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
196 a2 = action_str(action2)
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
197 raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2))
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
198 else:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
199 action_table[key] = action
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
200
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
201 # Fill action table:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
202 for state in states:
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
203 # Detect conflicts:
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
204 for item in state:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
205 if item.IsShift and item.Next in self.terminals:
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
206 # Rule 1, a shift item:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
207 nextstate = transitions[(states.index(state), item.Next)]
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
208 setAction(states.index(state), item.Next, (SHIFT, nextstate))
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
209 if item.IsReduce:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
210 if item.production.name == self.start_symbol and item.look_ahead == EOF:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
211 # Rule 3: accept:
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
212 setAction(states.index(state), item.look_ahead, (ACCEPT, item.production))
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
213 else:
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
214 # Rule 2, reduce item:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
215 setAction(states.index(state), item.look_ahead, (REDUCE, item.production))
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
216 for nt in self.nonterminals:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
217 key = (states.index(state), nt)
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
218 if key in transitions:
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
219 goto_table[key] = transitions[key]
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
220
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
221 return LRParser(action_table, goto_table, self.start_symbol)
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
222
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
223
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
224 class Production:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
225 """ Production rule for a grammar """
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
226 def __init__(self, name, symbols, f=None):
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
227 self.name = name
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
228 self.symbols = symbols
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
229 self.f = f
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
230
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
231 def __repr__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
232 return '{0} -> {1}'.format(self.name, self.symbols)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
233
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
234
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
235 class Item:
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
236 """
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
237 Represents a partially parsed item
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
238 It has a production it is looking for, a position
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
239 in this production called the 'dot' and a look ahead
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
240 symbol that must follow this item.
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
241 """
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
242 def __init__(self, production, dotpos, look_ahead):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
243 self.production = production
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
244 self.dotpos = dotpos
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
245 assert self.dotpos <= len(self.production.symbols)
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
246 self.look_ahead = look_ahead
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
247
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
248 def getdata(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
249 """ Gets the members as a tuple """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
250 return (self.production, self.dotpos, self.look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
251
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
252 def __eq__(self, other):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
253 if type(other) is type(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
254 return self.getdata() == other.getdata()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
255 return False
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
256
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
257 def __hash__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
258 return self.getdata().__hash__()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
259
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
260 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
261 def IsReduce(self):
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
262 """ Check if this item has the dot at the end """
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
263 return self.dotpos == len(self.production.symbols)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
264
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
265 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
266 def IsShift(self):
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
267 """ Check if this item is a shift item, i.e. the dot can proceed """
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
268 return not self.IsReduce
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
269
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
270 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
271 def Next(self):
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
272 """ Returns the symbol after the dot """
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
273 return self.production.symbols[self.dotpos]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
274
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
275 def can_shift_over(self, symbol):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
276 """ Determines if this item can shift over the given symbol """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
277 return self.IsShift and self.Next == symbol
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
278
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
279 def shifted(self):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
280 """ Creates a new item that is shifted one position """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
281 return Item(self.production, self.dotpos + 1, self.look_ahead)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
282
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
283 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
284 def NextNext(self):
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
285 """ Gets the symbol after the next symbol, or EPS if at the end """
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
286 if self.dotpos + 1 >= len(self.production.symbols):
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
287 return EPS
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
288 else:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
289 return self.production.symbols[self.dotpos + 1]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
290
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
291 def __repr__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
292 prod = self.production
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
293 predot = ' '.join(prod.symbols[0:self.dotpos])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
294 postdot = ' '.join(prod.symbols[self.dotpos:])
186
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
295 name = prod.name
46d62dadd61b Improved testsuite
Windel Bouwman
parents: 185
diff changeset
296 args = (name, predot, postdot, self.look_ahead)
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
297 return '[{0} -> {1} . {2} -> {3}]'.format(*args)
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
298
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
299
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
300 class LRParser:
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
301 """ LR parser """
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
302 def __init__(self, action_table, goto_table, start_symbol):
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
303 self.action_table = action_table
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
304 self.goto_table = goto_table
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
305 self.start_symbol = start_symbol
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
306
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
307 def parse(self, toks):
191
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
308 """ Parse an iterable with tokens """
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
309 assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks))
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
310 stack = [0]
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
311 r_data_stack = []
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
312 try:
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
313 look_ahead = toks.__next__()
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
314 except StopIteration:
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
315 look_ahead = Token(EOF, EOF)
191
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
316 assert type(look_ahead) is Token
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
317 # TODO: exit on this condition:
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
318 while stack != [0, self.start_symbol, 2222]:
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
319 #print(stack)
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
320 state = stack[-1] # top of stack
191
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
321 key = (state, look_ahead.typ)
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
322 if not key in self.action_table:
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
323 raise ParserException('Error parsing at character {0}'.format(look_ahead))
191
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
324 action, param = self.action_table[key]
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
325 if action == REDUCE:
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
326 f_args = []
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
327 for s in param.symbols:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
328 stack.pop()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
329 stack.pop()
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
330 f_args.append(r_data_stack.pop())
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
331 f_args.reverse()
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
332 r_data = None
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
333 if param.f:
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
334 r_data = param.f(*f_args)
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
335 state = stack[-1]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
336 stack.append(param.name)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
337 stack.append(self.goto_table[(state, param.name)])
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
338 r_data_stack.append(r_data)
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
339 elif action == SHIFT:
191
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
340 stack.append(look_ahead.typ)
185
51a6440d6398 Fixed LR(1) parser
Windel Bouwman
parents: 184
diff changeset
341 stack.append(param)
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
342 r_data_stack.append(look_ahead.val)
191
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
343 try:
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
344 look_ahead = toks.__next__()
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
345 except StopIteration:
194
b01429a5d695 Fixed test
Windel Bouwman
parents: 192
diff changeset
346 look_ahead = Token(EOF, EOF)
191
6b2bec5653f1 Added assembler testset
Windel Bouwman
parents: 186
diff changeset
347 assert type(look_ahead) is Token
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
348 elif action == ACCEPT:
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
349 # Pop last rule data off the stack:
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
350 f_args = []
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
351 for s in param.symbols:
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
352 stack.pop()
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
353 stack.pop()
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
354 f_args.append(r_data_stack.pop())
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
355 f_args.reverse()
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
356 if param.f:
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
357 param.f(*f_args)
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
358 # Break out!
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
359 break
195
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
360 # At exit, the stack must be 1 long
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
361 # TODO: fix that this holds:
37ac6c016e0f Expanded asm subsystem
Windel Bouwman
parents: 194
diff changeset
362 #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack)
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
363