comparison python/pyyacc.py @ 179:0f3b1adfd416

LR(0) parser
author Windel Bouwman
date Sat, 04 May 2013 18:50:36 +0200
parents c694ec551f34
children 25a0753da4cf
comparison
equal deleted inserted replaced
178:c694ec551f34 179:0f3b1adfd416
1
2 class GrammarError(Exception):
3 pass
4
5 class Grammar: 1 class Grammar:
6 def __init__(self, terminals): 2 def __init__(self, terminals):
7 self.terminals = terminals 3 self.terminals = terminals
8 self.nonterminals = [] 4 self.nonterminals = []
9 self.productions = [] 5 self.productions = []
10 self.states = None
11 self.start_symbol = None
12 def add_production(self, name, symbols): 6 def add_production(self, name, symbols):
13 p = Production(name, symbols) 7 p = Production(name, symbols)
14 self.productions.append(p) 8 self.productions.append(p)
15 if not name in self.nonterminals: 9 if not name in self.nonterminals:
16 self.nonterminals.append(name) 10 self.nonterminals.append(name)
17 def set_start(self, start):
18 self.start_symbol = start
19 def get_start(self):
20 return self.start_symbol
21 StartSymbol = property(get_start, set_start)
22 def productionsForName(self, name): 11 def productionsForName(self, name):
23 return [p for p in self.productions if p.name == name] 12 return [p for p in self.productions if p.name == name]
24 @property 13 @property
25 def Symbols(self): 14 def Symbols(self):
26 return self.nonterminals + self.terminals 15 return self.nonterminals + self.terminals
27 def calcFollow(self, name):
28 """ Determines the follow set for a given name """
29 f = set()
30 # TODO
31 return f
32
33 def e_closure(self, S):
34 something_changed = True
35 while something_changed:
36 something_changed = False
37 worklist = list(S)
38 for item in worklist:
39 if item.IsShift and item.Next in self.nonterminals:
40 n = item.Next
41 for add_p in self.productionsForName(n):
42 i = Item(add_p, 0)
43 if not i in S:
44 S.add(i)
45 something_changed = True
46 return frozenset(S)
47 def calcInitialItemSet(self):
48 nis = set()
49 for p in self.productionsForName(self.StartSymbol):
50 nis.add(Item(p, 0))
51 return self.e_closure(nis)
52 def calcNextItemSet(self, itemset, symbol):
53 nis = set()
54 for item in itemset:
55 if item.IsShift and item.Next == symbol:
56 nis.add(Item(item.production, item.dotpos + 1))
57 return self.e_closure(nis)
58 def calcTables(self):
59 print(self.nonterminals + self.terminals)
60 self.states = set()
61 self.goto_table = {}
62 self.action_table = {}
63 iis = self.calcInitialItemSet()
64 self.s0 = iis
65 print(len(iis), iis)
66 worklist = [iis]
67 self.states.add(iis)
68 while len(worklist) > 0:
69 itemset = worklist.pop(0)
70 has_goto = False
71 for symbol in self.Symbols:
72 nis = self.calcNextItemSet(itemset, symbol)
73 if nis:
74 self.goto_table[(itemset, symbol)] = nis
75 has_goto = True
76 if not nis in self.states:
77 worklist.append(nis)
78 self.states.add(nis)
79 if has_goto:
80 self.action_table[itemset] = 'shift', 's'
81 for item in itemset:
82 assert item.IsShift, item
83 else:
84 # Check for reduce-reduce conflicts:
85 assert len(itemset) == 1
86 item = list(itemset)[0]
87 assert item.IsReduce
88 self.action_table[itemset] = 'reduce', item.production
89 print(self.Symbols)
90 print(len(nis), nis, self.states)
91 for s, i in zip(self.states, range(len(self.states))):
92 print(i, s)
93
94 def parse(self, toks):
95 stack = [self.s0]
96 while stack[-1] != 'input':
97 action, p = self.action_table[stack[-1]]
98 print(action, p)
99 if action == 'shift':
100 stack.append(toks.pop(0))
101 s, i = stack[-2:]
102 stack.append(self.goto_table[(s, i)])
103 else:
104 for s in p.symbols:
105 stack.pop()
106 stack.pop()
107 stack.append(p.name)
108 s, i = stack[-2:]
109 stack.append(self.goto_table[(s, i)])
110 16
111 class Production: 17 class Production:
112 def __init__(self, name, symbols): 18 def __init__(self, name, symbols):
113 self.name = name 19 self.name = name
114 self.symbols = symbols 20 self.symbols = symbols
115 def __repr__(self): 21 def __repr__(self):
116 return '{0} -> {1}'.format(self.name, self.symbols) 22 return '{0} -> {1}'.format(self.name, self.symbols)
117 23
118 class Item: 24 class Item:
119 def __init__(self, production, dotpos): 25 def __init__(self, production, dotpos):
120 self.production = production 26 self.production = production
121 self.dotpos = dotpos 27 self.dotpos = dotpos
122 def __eq__(self, other): 28 def __eq__(self, other):
135 def Next(self): 41 def Next(self):
136 return self.production.symbols[self.dotpos] 42 return self.production.symbols[self.dotpos]
137 def matches(self, symbol): 43 def matches(self, symbol):
138 return self.Next == symbol 44 return self.Next == symbol
139 def __repr__(self): 45 def __repr__(self):
140 return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) 46 return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))
47
48 class LRgenerator:
49 def __init__(self, g):
50 self.grammar = g
51 def e_closure(self, S):
52 """ Expand itemset by using epsilon moves """
53 something_changed = True
54 while something_changed:
55 something_changed = False
56 worklist = list(S)
57 for item in worklist:
58 if item.IsShift and item.Next in self.grammar.nonterminals:
59 n = item.Next
60 for add_p in self.grammar.productionsForName(n):
61 i = Item(add_p, 0)
62 if not i in S:
63 S.add(i)
64 something_changed = True
65 return frozenset(S)
66 def initialItemSet(self):
67 nis = set()
68 for p in self.grammar.productionsForName(self.grammar.start_symbol):
69 nis.add(Item(p, 0))
70 return self.e_closure(nis)
71 def nextItemSet(self, itemset, symbol):
72 nis = set()
73 for item in itemset:
74 if item.IsShift and item.Next == symbol:
75 nis.add(Item(item.production, item.dotpos + 1))
76 return self.e_closure(nis)
77 def genParser(self):
78 states = set()
79 goto_table = {}
80 action_table = {}
81 iis = self.initialItemSet()
82 worklist = [iis]
83 states.add(iis)
84 while len(worklist) > 0:
85 itemset = worklist.pop(0)
86 has_goto = False
87 for symbol in self.grammar.Symbols:
88 nis = self.nextItemSet(itemset, symbol)
89 if nis:
90 goto_table[(itemset, symbol)] = nis
91 has_goto = True
92 if not nis in states:
93 worklist.append(nis)
94 states.add(nis)
95 if has_goto:
96 action_table[itemset] = 'shift', 's'
97 for item in itemset:
98 assert item.IsShift, item
99 else:
100 # Check for reduce-reduce conflicts:
101 assert len(itemset) == 1
102 item = list(itemset)[0]
103 assert item.IsReduce
104 action_table[itemset] = 'reduce', item.production
105 for s, i in zip(states, range(len(states))):
106 print(i, ' || '.join(str(x) for x in s))
107 p = LRParser()
108 p.goto_table = goto_table
109 p.action_table = action_table
110 p.s0 = iis
111 return p
112
113 class LRParser:
114 def parse(self, toks):
115 stack = [self.s0]
116 while stack[-1] != 'input':
117 action, p = self.action_table[stack[-1]]
118 print(action, p)
119 if action == 'shift':
120 stack.append(toks.pop(0))
121 s, i = stack[-2:]
122 stack.append(self.goto_table[(s, i)])
123 elif action == 'reduce':
124 for s in p.symbols:
125 stack.pop()
126 stack.pop()
127 stack.append(p.name)
128 s, i = stack[-2:]
129 if stack[-1] == 'input':
130 break
131 stack.append(self.goto_table[(s, i)])
141 132
142 # Test it: 133 # Test it:
143 # 1. define a simple grammar: 134 # 1. define a simple grammar:
144 g = Grammar(['EOF', 'identifier', '(', ')', '+']) 135 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
145 g.add_production('input', ['expression', 'EOF']) 136 g.add_production('input', ['expression', 'EOF'])
146 g.add_production('expression', ['term']) 137 g.add_production('expression', ['term'])
147 g.add_production('expression', ['expression', '+', 'term']) 138 g.add_production('expression', ['expression', '+', 'term'])
148 g.add_production('term', ['identifier']) 139 g.add_production('term', ['factor'])
140 #g.add_production('term', ['term', '*', 'factor'])
149 g.add_production('term', ['(', 'expression', ')']) 141 g.add_production('term', ['(', 'expression', ')'])
150 g.StartSymbol = 'input' 142 g.add_production('factor', ['identifier'])
143 g.start_symbol = 'input'
144 # 2. define input:
145 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
146 # 3. build parser:
147 p = LRgenerator(g).genParser()
148 # 4. feed input:
149 p.parse(tokens)
151 150
152 # 2. define input:
153 tokens = ['identifier', '+', 'identifier', 'EOF']
154
155 # 3. build tables
156 g.calcTables()
157
158 # 4. feed input
159 g.parse(tokens)
160