annotate python/pyyacc.py @ 181:216da5e46efc

Changed indent to 4 spaces to comply to convention
author Windel Bouwman
date Sat, 18 May 2013 17:45:26 +0200
parents 25a0753da4cf
children fe2b72381a83
rev   line source
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
1 """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
2 Parser generator script
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
3 """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
4
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
5 EPS = 'EPS'
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
6
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
7
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
8 class Grammar:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
9 """ Defines a grammar of a language """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
10 def __init__(self, terminals):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
11 self.terminals = terminals
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
12 self.nonterminals = []
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
13 self.productions = []
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
14
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
15 def add_production(self, name, symbols):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
16 """ Add a production rule to the grammar """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
17 production = Production(name, symbols)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
18 self.productions.append(production)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
19 assert not name in self.terminals, "Cannot redefine terminal"
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
20 if not name in self.nonterminals:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
21 self.nonterminals.append(name)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
22
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
23 def productionsForName(self, name):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
24 """ Retrieve all productions for a non terminal """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
25 return [p for p in self.productions if p.name == name]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
26
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
27 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
28 def Symbols(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
29 """ Get all the symbols defined by this grammar """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
30 return self.nonterminals + self.terminals
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
31
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
32 def calcFirstSets(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
33 """ Calculate first sets for each grammar symbol """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
34 first = {}
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
35 for t in self.terminals + ['EOF', EPS]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
36 first[t] = set([t])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
37 for nt in self.nonterminals:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
38 first[nt] = set()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
39 epsset = set([EPS])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
40 while True:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
41 some_change = False
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
42 for p in self.productions:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
43 rhs = set()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
44 for beta in p.symbols:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
45 rhs = rhs | (first[beta] - epsset)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
46 if not EPS in first[beta]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
47 break
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
48 else:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
49 if EPS in first[beta]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
50 rhs.add(EPS)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
51 if rhs - first[p.name]:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
52 first[p.name] |= rhs
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
53 some_change = True
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
54 if not some_change:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
55 break
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
56 return first
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
57
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
58
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
59 class Production:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
60 """ Production rule for a grammar """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
61 def __init__(self, name, symbols):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
62 self.name = name
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
63 self.symbols = symbols
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
64
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
65 def __repr__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
66 return '{0} -> {1}'.format(self.name, self.symbols)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
67
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
68
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
69 class Item:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
70 def __init__(self, production, dotpos, look_ahead):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
71 self.production = production
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
72 self.dotpos = dotpos
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
73 self.look_ahead = look_ahead
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
74
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
75 def getdata(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
76 """ Gets the members as a tuple """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
77 return (self.production, self.dotpos, self.look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
78
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
79 def __eq__(self, other):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
80 if type(other) is type(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
81 return self.getdata() == other.getdata()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
82 return False
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
83
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
84 def __hash__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
85 return self.getdata().__hash__()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
86
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
87 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
88 def IsReduce(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
89 return self.dotpos == len(self.production.symbols)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
90
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
91 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
92 def IsShift(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
93 return not self.IsReduce
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
94
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
95 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
96 def Next(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
97 return self.production.symbols[self.dotpos]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
98
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
99 @property
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
100 def NextNext(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
101 if self.dotpos + 1 >= len(self.production.symbols):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
102 return 'EPS'
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
103 else:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
104 return self.production.symbols[self.dotpos + 1]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
105
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
106 def __repr__(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
107 prod = self.production
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
108 predot = ' '.join(prod.symbols[0:self.dotpos])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
109 postdot = ' '.join(prod.symbols[self.dotpos:])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
110 nt = prod.name
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
111 args = (nt, predot, postdot, self.look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
112 return '[{0} -> {1} _dot_ {2},{3}]'.format(*args)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
113
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
114
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
115 class LRgenerator:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
116 """ Parser generator """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
117 def __init__(self, g):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
118 self.grammar = g
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
119
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
120 def closure(self, itemset):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
121 """ Expand itemset by using epsilon moves """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
122 worklist = list(itemset)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
123 while worklist:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
124 item = worklist.pop(0)
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
125 if item.IsShift and item.Next in self.grammar.nonterminals:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
126 for add_p in self.grammar.productionsForName(item.Next):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
127 f = self.first[item.NextNext]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
128 if 'EPS' in f:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
129 f.discard('EPS')
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
130 f.add(item.look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
131 for b in f:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
132 i = Item(add_p, 0, b)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
133 if not i in itemset:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
134 itemset.add(i)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
135 worklist.append(i)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
136 return frozenset(itemset)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
137
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
138 def initialItemSet(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
139 """ Calculates the initial item set """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
140 iis = set()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
141 for p in self.grammar.productionsForName(self.grammar.start_symbol):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
142 iis.add(Item(p, 0, 'EOF'))
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
143 return self.closure(iis)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
144
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
145 def nextItemSet(self, itemset, symbol):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
146 """ Determines the next itemset for the current set and a symbol """
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
147 nis = set()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
148 for item in itemset:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
149 if item.IsShift and item.Next == symbol:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
150 nis.add(Item(item.production, item.dotpos + 1, item.look_ahead))
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
151 return self.closure(nis)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
152
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
153 def genParser(self):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
154 states = set()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
155 goto_table = {}
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
156 action_table = {}
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
157 self.first = self.grammar.calcFirstSets()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
158 iis = self.initialItemSet()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
159 states.add(iis)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
160
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
161 # First generate all item sets by using the nextItemset function:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
162 worklist = [iis]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
163 while len(worklist) > 0:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
164 itemset = worklist.pop(0)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
165 for symbol in self.grammar.Symbols:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
166 nis = self.nextItemSet(itemset, symbol)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
167 if nis:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
168 goto_table[(itemset, symbol)] = nis
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
169 if not nis in states:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
170 worklist.append(nis)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
171 states.add(nis)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
172
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
173 # Number the states:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
174 print('States:')
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
175 for s, i in zip(states, range(len(states))):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
176 print(i, ', '.join(str(x) for x in s))
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
177 p = LRParser()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
178 p.goto_table = goto_table
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
179 p.action_table = action_table
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
180 p.s0 = iis
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
181 return p
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
182
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
183
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
184 class LRParser:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
185 def parse(self, toks):
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
186 stack = ['EOF', self.s0]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
187 look_ahead = toks.pop(0)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
188 while True:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
189 state = stack[-1] # top of stack
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
190 key = (state, look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
191 if not key in self.action_table:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
192 raise Exception('Error parsing')
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
193 action, param = self.action_table[(state, look_ahead)]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
194 if action == 'reduce':
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
195 for s in param.symbols:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
196 stack.pop()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
197 stack.pop()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
198 state = stack[-1]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
199 stack.append(param.name)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
200 stack.append(self.goto_table[(state, param.name)])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
201 elif action == 'shift':
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
202 stack.append(look_ahead)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
203 s, i = stack[-2:]
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
204 stack.append(self.goto_table[(s, i)])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
205 look_ahead = toks.pop(0)
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
206 elif action == 'accept':
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
207 break
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
208
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
209 # Test it:
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
210 def test():
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
211 # 1. define a simple grammar:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
212 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
213 g.add_production('input', ['expression'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
214 g.add_production('expression', ['term'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
215 g.add_production('expression', ['expression', '+', 'term'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
216 g.add_production('term', ['factor'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
217 g.add_production('term', ['term', '*', 'factor'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
218 g.add_production('factor', ['(', 'expression', ')'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
219 g.add_production('factor', ['identifier'])
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
220 g.start_symbol = 'input'
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
221 # 2. define input:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
222 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
223 # 3. build parser:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
224 p = LRgenerator(g).genParser()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
225 # 4. feed input:
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
226 p.parse(tokens)
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
227
181
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
228 print('Hallo')
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
229 if __name__ == '__main__':
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
230 print('Hallo')
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
231 test()
216da5e46efc Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents: 180
diff changeset
232