Mercurial > lcfOS
annotate python/pyyacc.py @ 181:216da5e46efc
Changed indent to 4 spaces to comply to convention
author | Windel Bouwman |
---|---|
date | Sat, 18 May 2013 17:45:26 +0200 |
parents | 25a0753da4cf |
children | fe2b72381a83 |
rev | line source |
---|---|
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
1 """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
2 Parser generator script |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
3 """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
4 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
5 EPS = 'EPS' |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
6 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
7 |
178 | 8 class Grammar: |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
9 """ Defines a grammar of a language """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
10 def __init__(self, terminals): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
11 self.terminals = terminals |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
12 self.nonterminals = [] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
13 self.productions = [] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
14 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
15 def add_production(self, name, symbols): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
16 """ Add a production rule to the grammar """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
17 production = Production(name, symbols) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
18 self.productions.append(production) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
19 assert not name in self.terminals, "Cannot redefine terminal" |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
20 if not name in self.nonterminals: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
21 self.nonterminals.append(name) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
22 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
23 def productionsForName(self, name): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
24 """ Retrieve all productions for a non terminal """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
25 return [p for p in self.productions if p.name == name] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
26 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
27 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
28 def Symbols(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
29 """ Get all the symbols defined by this grammar """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
30 return self.nonterminals + self.terminals |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
31 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
32 def calcFirstSets(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
33 """ Calculate first sets for each grammar symbol """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
34 first = {} |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
35 for t in self.terminals + ['EOF', EPS]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
36 first[t] = set([t]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
37 for nt in self.nonterminals: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
38 first[nt] = set() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
39 epsset = set([EPS]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
40 while True: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
41 some_change = False |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
42 for p in self.productions: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
43 rhs = set() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
44 for beta in p.symbols: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
45 rhs = rhs | (first[beta] - epsset) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
46 if not EPS in first[beta]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
47 break |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
48 else: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
49 if EPS in first[beta]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
50 rhs.add(EPS) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
51 if rhs - first[p.name]: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
52 first[p.name] |= rhs |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
53 some_change = True |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
54 if not some_change: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
55 break |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
56 return first |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
57 |
178 | 58 |
59 class Production: | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
60 """ Production rule for a grammar """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
61 def __init__(self, name, symbols): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
62 self.name = name |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
63 self.symbols = symbols |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
64 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
65 def __repr__(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
66 return '{0} -> {1}'.format(self.name, self.symbols) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
67 |
179 | 68 |
178 | 69 class Item: |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
70 def __init__(self, production, dotpos, look_ahead): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
71 self.production = production |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
72 self.dotpos = dotpos |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
73 self.look_ahead = look_ahead |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
74 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
75 def getdata(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
76 """ Gets the members as a tuple """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
77 return (self.production, self.dotpos, self.look_ahead) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
78 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
79 def __eq__(self, other): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
80 if type(other) is type(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
81 return self.getdata() == other.getdata() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
82 return False |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
83 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
84 def __hash__(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
85 return self.getdata().__hash__() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
86 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
87 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
88 def IsReduce(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
89 return self.dotpos == len(self.production.symbols) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
90 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
91 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
92 def IsShift(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
93 return not self.IsReduce |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
94 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
95 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
96 def Next(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
97 return self.production.symbols[self.dotpos] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
98 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
99 @property |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
100 def NextNext(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
101 if self.dotpos + 1 >= len(self.production.symbols): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
102 return 'EPS' |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
103 else: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
104 return self.production.symbols[self.dotpos + 1] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
105 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
106 def __repr__(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
107 prod = self.production |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
108 predot = ' '.join(prod.symbols[0:self.dotpos]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
109 postdot = ' '.join(prod.symbols[self.dotpos:]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
110 nt = prod.name |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
111 args = (nt, predot, postdot, self.look_ahead) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
112 return '[{0} -> {1} _dot_ {2},{3}]'.format(*args) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
113 |
179 | 114 |
115 class LRgenerator: | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
116 """ Parser generator """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
117 def __init__(self, g): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
118 self.grammar = g |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
119 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
120 def closure(self, itemset): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
121 """ Expand itemset by using epsilon moves """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
122 worklist = list(itemset) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
123 while worklist: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
124 item = worklist.pop(0) |
179 | 125 if item.IsShift and item.Next in self.grammar.nonterminals: |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
126 for add_p in self.grammar.productionsForName(item.Next): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
127 f = self.first[item.NextNext] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
128 if 'EPS' in f: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
129 f.discard('EPS') |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
130 f.add(item.look_ahead) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
131 for b in f: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
132 i = Item(add_p, 0, b) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
133 if not i in itemset: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
134 itemset.add(i) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
135 worklist.append(i) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
136 return frozenset(itemset) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
137 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
138 def initialItemSet(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
139 """ Calculates the initial item set """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
140 iis = set() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
141 for p in self.grammar.productionsForName(self.grammar.start_symbol): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
142 iis.add(Item(p, 0, 'EOF')) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
143 return self.closure(iis) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
144 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
145 def nextItemSet(self, itemset, symbol): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
146 """ Determines the next itemset for the current set and a symbol """ |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
147 nis = set() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
148 for item in itemset: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
149 if item.IsShift and item.Next == symbol: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
150 nis.add(Item(item.production, item.dotpos + 1, item.look_ahead)) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
151 return self.closure(nis) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
152 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
153 def genParser(self): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
154 states = set() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
155 goto_table = {} |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
156 action_table = {} |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
157 self.first = self.grammar.calcFirstSets() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
158 iis = self.initialItemSet() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
159 states.add(iis) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
160 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
161 # First generate all item sets by using the nextItemset function: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
162 worklist = [iis] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
163 while len(worklist) > 0: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
164 itemset = worklist.pop(0) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
165 for symbol in self.grammar.Symbols: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
166 nis = self.nextItemSet(itemset, symbol) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
167 if nis: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
168 goto_table[(itemset, symbol)] = nis |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
169 if not nis in states: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
170 worklist.append(nis) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
171 states.add(nis) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
172 |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
173 # Number the states: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
174 print('States:') |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
175 for s, i in zip(states, range(len(states))): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
176 print(i, ', '.join(str(x) for x in s)) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
177 p = LRParser() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
178 p.goto_table = goto_table |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
179 p.action_table = action_table |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
180 p.s0 = iis |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
181 return p |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
182 |
179 | 183 |
184 class LRParser: | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
185 def parse(self, toks): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
186 stack = ['EOF', self.s0] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
187 look_ahead = toks.pop(0) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
188 while True: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
189 state = stack[-1] # top of stack |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
190 key = (state, look_ahead) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
191 if not key in self.action_table: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
192 raise Exception('Error parsing') |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
193 action, param = self.action_table[(state, look_ahead)] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
194 if action == 'reduce': |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
195 for s in param.symbols: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
196 stack.pop() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
197 stack.pop() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
198 state = stack[-1] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
199 stack.append(param.name) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
200 stack.append(self.goto_table[(state, param.name)]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
201 elif action == 'shift': |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
202 stack.append(look_ahead) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
203 s, i = stack[-2:] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
204 stack.append(self.goto_table[(s, i)]) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
205 look_ahead = toks.pop(0) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
206 elif action == 'accept': |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
207 break |
178 | 208 |
209 # Test it: | |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
210 def test(): |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
211 # 1. define a simple grammar: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
212 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
213 g.add_production('input', ['expression']) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
214 g.add_production('expression', ['term']) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
215 g.add_production('expression', ['expression', '+', 'term']) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
216 g.add_production('term', ['factor']) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
217 g.add_production('term', ['term', '*', 'factor']) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
218 g.add_production('factor', ['(', 'expression', ')']) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
219 g.add_production('factor', ['identifier']) |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
220 g.start_symbol = 'input' |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
221 # 2. define input: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
222 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
223 # 3. build parser: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
224 p = LRgenerator(g).genParser() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
225 # 4. feed input: |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
226 p.parse(tokens) |
178 | 227 |
181
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
228 print('Hallo') |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
229 if __name__ == '__main__': |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
230 print('Hallo') |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
231 test() |
216da5e46efc
Changed indent to 4 spaces to comply to convention
Windel Bouwman
parents:
180
diff
changeset
|
232 |