annotate python/pyyacc.py @ 184:fe2b72381a83

Added testset for pyy
author Windel Bouwman
date Fri, 24 May 2013 16:13:23 +0200
parents 216da5e46efc
children 51a6440d6398
rev   line source
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
1 """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
2 Parser generator script
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
3 """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
4
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
5 EPS = 'EPS'
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
6 EOF = 'EOF'
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
7 SHIFT = 1
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
8 REDUCE = 2
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
9 ACCEPT = 3
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
10
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
11
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
12 class Grammar:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
13 """ Defines a grammar of a language """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
14 def __init__(self, terminals):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
15 self.terminals = terminals
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
16 self.nonterminals = []
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
17 self.productions = []
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
18
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
19 def add_production(self, name, symbols):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
20 """ Add a production rule to the grammar """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
21 production = Production(name, symbols)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
22 self.productions.append(production)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
23 assert not name in self.terminals, "Cannot redefine terminal"
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
24 if not name in self.nonterminals:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
25 self.nonterminals.append(name)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
26
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
27 def productionsForName(self, name):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
28 """ Retrieve all productions for a non terminal """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
29 return [p for p in self.productions if p.name == name]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
30
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
31 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
32 def Symbols(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
33 """ Get all the symbols defined by this grammar """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
34 return self.nonterminals + self.terminals
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
35
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
36 def calcFirstSets(self):
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
37 """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
38 Calculate first sets for each grammar symbol
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
39 This is a dictionary which maps each grammar symbol
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
40 to a set of terminals that can be encountered first
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
41 when looking for the symbol.
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
42 """
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
43 first = {}
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
44 for t in self.terminals + [EOF, EPS]:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
45 first[t] = set([t])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
46 for nt in self.nonterminals:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
47 first[nt] = set()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
48 epsset = set([EPS])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
49 while True:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
50 some_change = False
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
51 for p in self.productions:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
52 rhs = set()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
53 for beta in p.symbols:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
54 rhs = rhs | (first[beta] - epsset)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
55 if not EPS in first[beta]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
56 break
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
57 else:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
58 if EPS in first[beta]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
59 rhs.add(EPS)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
60 if rhs - first[p.name]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
61 first[p.name] |= rhs
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
62 some_change = True
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
63 if not some_change:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
64 break
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
65 return first
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
66
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
67 def closure(self, itemset):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
68 """ Expand itemset by using epsilon moves """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
69 worklist = list(itemset)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
70 def addIt(itm):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
71 if not itm in itemset:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
72 itemset.add(itm)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
73 worklist.append(itm)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
74 def first2(itm, la):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
75 # When using the first sets, create a copy:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
76 f = set(self.first[item.NextNext])
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
77 if EPS in f:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
78 f.discard(EPS)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
79 f.add(item.look_ahead)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
80 return f
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
81
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
82 while worklist:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
83 item = worklist.pop(0)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
84 if not item.IsShift:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
85 continue
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
86 if not (item.Next in self.nonterminals):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
87 continue
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
88 C = item.Next
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
89 for add_p in self.productionsForName(C):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
90 for b in first2(item, item.look_ahead):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
91 addIt(Item(add_p, 0, b))
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
92 return frozenset(itemset)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
93
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
94 def initialItemSet(self):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
95 """ Calculates the initial item set """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
96 iis = set()
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
97 for p in self.productionsForName(self.start_symbol):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
98 iis.add(Item(p, 0, EOF))
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
99 return self.closure(iis)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
100
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
101 def nextItemSet(self, itemset, symbol):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
102 """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
103 Determines the next itemset for the current set and a symbol
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
104 This is the goto procedure
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
105 """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
106 next_set = set()
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
107 for item in itemset:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
108 if item.can_shift_over(symbol):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
109 next_set.add(item.shifted())
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
110 return self.closure(next_set)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
111
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
112 def genCanonicalSet(self, iis):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
113 states = set()
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
114 worklist = []
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
115 goto_table = {}
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
116 def addSt(s):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
117 if not (s in states):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
118 worklist.append(s)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
119 states.add(s)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
120 addSt(iis)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
121 while len(worklist) > 0:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
122 itemset = worklist.pop(0)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
123 for symbol in self.Symbols:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
124 nis = self.nextItemSet(itemset, symbol)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
125 if not nis:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
126 continue
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
127 goto_table[(itemset, symbol)] = nis
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
128 addSt(nis)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
129 return states, goto_table
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
130
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
131 def genParser(self):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
132 """ Generates a parser from the grammar """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
133 action_table = {}
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
134 self.first = self.calcFirstSets()
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
135 iis = self.initialItemSet()
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
136
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
137 # First generate all item sets by using the nextItemset function:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
138 states, goto_table = self.genCanonicalSet(iis)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
139
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
140 # Number the states:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
141 number_states = {}
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
142 for s, i in zip(states, range(len(states))):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
143 number_states[s] = i
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
144
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
145 # Fill action table:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
146 for state in states:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
147 for item in state:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
148 if item.IsShift and item.Next in self.terminals:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
149 action_table[(state, item.Next)] = (SHIFT, 0)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
150 elif item.IsReduce:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
151 if item.look_ahead == EOF:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
152 action_table[(state, item.look_ahead)] = (ACCEPT, 0)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
153 else:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
154 action_table[(state, item.look_ahead)] = (REDUCE, item.production)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
155 else:
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
156 pass
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
157
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
158 p = LRParser(action_table)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
159 p.goto_table = goto_table
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
160 p.s0 = iis
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
161 return p
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
162
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
163 class Production:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
164 """ Production rule for a grammar """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
165 def __init__(self, name, symbols):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
166 self.name = name
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
167 self.symbols = symbols
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
168
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
169 def __repr__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
170 return '{0} -> {1}'.format(self.name, self.symbols)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
171
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
172
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
173 class Item:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
174 def __init__(self, production, dotpos, look_ahead):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
175 self.production = production
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
176 self.dotpos = dotpos
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
177 assert self.dotpos <= len(self.production.symbols)
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
178 self.look_ahead = look_ahead
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
179
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
180 def getdata(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
181 """ Gets the members as a tuple """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
182 return (self.production, self.dotpos, self.look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
183
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
184 def __eq__(self, other):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
185 if type(other) is type(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
186 return self.getdata() == other.getdata()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
187 return False
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
188
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
189 def __hash__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
190 return self.getdata().__hash__()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
191
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
192 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
193 def IsReduce(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
194 return self.dotpos == len(self.production.symbols)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
195
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
196 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
197 def IsShift(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
198 return not self.IsReduce
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
199
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
200 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
201 def Next(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
202 return self.production.symbols[self.dotpos]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
203
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
204 def can_shift_over(self, symbol):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
205 """ Determines if this item can shift over the given symbol """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
206 return self.IsShift and self.Next == symbol
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
207
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
208 def shifted(self):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
209 """ Creates a new item that is shifted one position """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
210 return Item(self.production, self.dotpos + 1, self.look_ahead)
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
211
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
212 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
213 def NextNext(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
214 if self.dotpos + 1 >= len(self.production.symbols):
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
215 return EPS
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
216 else:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
217 return self.production.symbols[self.dotpos + 1]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
218
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
219 def __repr__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
220 prod = self.production
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
221 predot = ' '.join(prod.symbols[0:self.dotpos])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
222 postdot = ' '.join(prod.symbols[self.dotpos:])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
223 nt = prod.name
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
224 args = (nt, predot, postdot, self.look_ahead)
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
225 return '[{0} -> {1} . {2}, {3}]'.format(*args)
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
226
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
227
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
228 class LRParser:
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
229 """ LR parser """
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
230 def __init__(self, action_table):
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
231 self.action_table = action_table
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
232
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
233 def parse(self, toks):
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
234 stack = [EOF, self.s0]
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
235 look_ahead = toks.pop(0)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
236 while True:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
237 state = stack[-1] # top of stack
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
238 key = (state, look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
239 if not key in self.action_table:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
240 raise Exception('Error parsing')
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
241 action, param = self.action_table[(state, look_ahead)]
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
242 if action == REDUCE:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
243 for s in param.symbols:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
244 stack.pop()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
245 stack.pop()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
246 state = stack[-1]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
247 stack.append(param.name)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
248 stack.append(self.goto_table[(state, param.name)])
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
249 elif action == SHIFT:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
250 stack.append(look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
251 s, i = stack[-2:]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
252 stack.append(self.goto_table[(s, i)])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
253 look_ahead = toks.pop(0)
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
254 elif action == ACCEPT:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
255 break
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
256
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
257 def testSimpleGrammar():
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
258 # 1. define a simple grammar:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
259 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
260 g.add_production('input', ['expression'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
261 g.add_production('expression', ['term'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
262 g.add_production('expression', ['expression', '+', 'term'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
263 g.add_production('term', ['factor'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
264 g.add_production('term', ['term', '*', 'factor'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
265 g.add_production('factor', ['(', 'expression', ')'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
266 g.add_production('factor', ['identifier'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
267 g.start_symbol = 'input'
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
268 # 2. define input:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
269 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
270 # 3. build parser:
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
271 p = g.genParser()
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
272 # 4. feed input:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
273 p.parse(tokens)
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
274
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
275
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
276 if __name__ == '__main__':
184
fe2b72381a83 Added testset for pyy
Windel Bouwman
parents: 181
diff changeset
277 testSimpleGrammar()