178
|
1 class Grammar:
|
|
2 def __init__(self, terminals):
|
|
3 self.terminals = terminals
|
|
4 self.nonterminals = []
|
|
5 self.productions = []
|
|
6 def add_production(self, name, symbols):
|
|
7 p = Production(name, symbols)
|
|
8 self.productions.append(p)
|
|
9 if not name in self.nonterminals:
|
|
10 self.nonterminals.append(name)
|
|
11 def productionsForName(self, name):
|
|
12 return [p for p in self.productions if p.name == name]
|
|
13 @property
|
|
14 def Symbols(self):
|
|
15 return self.nonterminals + self.terminals
|
180
|
16 def calcFollow(self):
|
|
17 follow = {}
|
|
18 for nt in self.nonterminals:
|
|
19 follow[nt] = set()
|
|
20 while True:
|
|
21 change = False
|
|
22 # 1.
|
|
23 for p in self.productions:
|
|
24 pass
|
|
25 if not change:
|
|
26 break
|
|
27 return follow
|
|
28 def calcFirst(self):
|
|
29 first = {}
|
|
30 return first
|
178
|
31
|
|
32 class Production:
|
|
33 def __init__(self, name, symbols):
|
|
34 self.name = name
|
|
35 self.symbols = symbols
|
|
36 def __repr__(self):
|
|
37 return '{0} -> {1}'.format(self.name, self.symbols)
|
179
|
38
|
178
|
39 class Item:
|
|
40 def __init__(self, production, dotpos):
|
|
41 self.production = production
|
|
42 self.dotpos = dotpos
|
|
43 def __eq__(self, other):
|
|
44 if type(other) is type(self):
|
|
45 return (self.production, self.dotpos) == (other.production, other.dotpos)
|
|
46 return False
|
|
47 def __hash__(self):
|
|
48 return (self.production, self.dotpos).__hash__()
|
|
49 @property
|
|
50 def IsReduce(self):
|
|
51 return self.dotpos == len(self.production.symbols)
|
|
52 @property
|
|
53 def IsShift(self):
|
|
54 return not self.IsReduce
|
|
55 @property
|
|
56 def Next(self):
|
|
57 return self.production.symbols[self.dotpos]
|
|
58 def matches(self, symbol):
|
|
59 return self.Next == symbol
|
|
60 def __repr__(self):
|
179
|
61 return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))
|
|
62
|
|
63 class LRgenerator:
|
|
64 def __init__(self, g):
|
|
65 self.grammar = g
|
|
66 def e_closure(self, S):
|
|
67 """ Expand itemset by using epsilon moves """
|
|
68 something_changed = True
|
|
69 while something_changed:
|
|
70 something_changed = False
|
|
71 worklist = list(S)
|
|
72 for item in worklist:
|
|
73 if item.IsShift and item.Next in self.grammar.nonterminals:
|
|
74 n = item.Next
|
|
75 for add_p in self.grammar.productionsForName(n):
|
|
76 i = Item(add_p, 0)
|
|
77 if not i in S:
|
|
78 S.add(i)
|
|
79 something_changed = True
|
|
80 return frozenset(S)
|
|
81 def initialItemSet(self):
|
|
82 nis = set()
|
|
83 for p in self.grammar.productionsForName(self.grammar.start_symbol):
|
|
84 nis.add(Item(p, 0))
|
|
85 return self.e_closure(nis)
|
|
86 def nextItemSet(self, itemset, symbol):
|
|
87 nis = set()
|
|
88 for item in itemset:
|
|
89 if item.IsShift and item.Next == symbol:
|
|
90 nis.add(Item(item.production, item.dotpos + 1))
|
|
91 return self.e_closure(nis)
|
|
92 def genParser(self):
|
|
93 states = set()
|
|
94 goto_table = {}
|
|
95 action_table = {}
|
|
96 iis = self.initialItemSet()
|
|
97 worklist = [iis]
|
|
98 states.add(iis)
|
|
99 while len(worklist) > 0:
|
|
100 itemset = worklist.pop(0)
|
|
101 has_goto = False
|
|
102 for symbol in self.grammar.Symbols:
|
|
103 nis = self.nextItemSet(itemset, symbol)
|
|
104 if nis:
|
|
105 goto_table[(itemset, symbol)] = nis
|
|
106 has_goto = True
|
|
107 if not nis in states:
|
|
108 worklist.append(nis)
|
|
109 states.add(nis)
|
|
110 if has_goto:
|
|
111 action_table[itemset] = 'shift', 's'
|
|
112 for item in itemset:
|
|
113 assert item.IsShift, item
|
|
114 else:
|
|
115 # Check for reduce-reduce conflicts:
|
|
116 assert len(itemset) == 1
|
|
117 item = list(itemset)[0]
|
|
118 assert item.IsReduce
|
|
119 action_table[itemset] = 'reduce', item.production
|
|
120 for s, i in zip(states, range(len(states))):
|
|
121 print(i, ' || '.join(str(x) for x in s))
|
|
122 p = LRParser()
|
|
123 p.goto_table = goto_table
|
|
124 p.action_table = action_table
|
|
125 p.s0 = iis
|
|
126 return p
|
|
127
|
|
128 class LRParser:
|
|
129 def parse(self, toks):
|
|
130 stack = [self.s0]
|
|
131 while stack[-1] != 'input':
|
|
132 action, p = self.action_table[stack[-1]]
|
|
133 print(action, p)
|
|
134 if action == 'shift':
|
|
135 stack.append(toks.pop(0))
|
|
136 s, i = stack[-2:]
|
|
137 stack.append(self.goto_table[(s, i)])
|
|
138 elif action == 'reduce':
|
|
139 for s in p.symbols:
|
|
140 stack.pop()
|
|
141 stack.pop()
|
|
142 stack.append(p.name)
|
|
143 s, i = stack[-2:]
|
|
144 if stack[-1] == 'input':
|
|
145 break
|
|
146 stack.append(self.goto_table[(s, i)])
|
178
|
147
|
|
148 # Test it:
|
|
149 # 1. define a simple grammar:
|
179
|
150 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
|
178
|
151 g.add_production('input', ['expression', 'EOF'])
|
|
152 g.add_production('expression', ['term'])
|
|
153 g.add_production('expression', ['expression', '+', 'term'])
|
179
|
154 g.add_production('term', ['factor'])
|
|
155 #g.add_production('term', ['term', '*', 'factor'])
|
178
|
156 g.add_production('term', ['(', 'expression', ')'])
|
179
|
157 g.add_production('factor', ['identifier'])
|
|
158 g.start_symbol = 'input'
|
178
|
159 # 2. define input:
|
179
|
160 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
|
|
161 # 3. build parser:
|
|
162 p = LRgenerator(g).genParser()
|
|
163 # 4. feed input:
|
|
164 p.parse(tokens)
|
178
|
165
|