Mercurial > lcfOS
comparison python/pyyacc.py @ 179:0f3b1adfd416
LR(0) parser
author | Windel Bouwman |
---|---|
date | Sat, 04 May 2013 18:50:36 +0200 |
parents | c694ec551f34 |
children | 25a0753da4cf |
comparison
equal
deleted
inserted
replaced
178:c694ec551f34 | 179:0f3b1adfd416 |
---|---|
1 | |
2 class GrammarError(Exception): | |
3 pass | |
4 | |
5 class Grammar: | 1 class Grammar: |
6 def __init__(self, terminals): | 2 def __init__(self, terminals): |
7 self.terminals = terminals | 3 self.terminals = terminals |
8 self.nonterminals = [] | 4 self.nonterminals = [] |
9 self.productions = [] | 5 self.productions = [] |
10 self.states = None | |
11 self.start_symbol = None | |
12 def add_production(self, name, symbols): | 6 def add_production(self, name, symbols): |
13 p = Production(name, symbols) | 7 p = Production(name, symbols) |
14 self.productions.append(p) | 8 self.productions.append(p) |
15 if not name in self.nonterminals: | 9 if not name in self.nonterminals: |
16 self.nonterminals.append(name) | 10 self.nonterminals.append(name) |
17 def set_start(self, start): | |
18 self.start_symbol = start | |
19 def get_start(self): | |
20 return self.start_symbol | |
21 StartSymbol = property(get_start, set_start) | |
22 def productionsForName(self, name): | 11 def productionsForName(self, name): |
23 return [p for p in self.productions if p.name == name] | 12 return [p for p in self.productions if p.name == name] |
24 @property | 13 @property |
25 def Symbols(self): | 14 def Symbols(self): |
26 return self.nonterminals + self.terminals | 15 return self.nonterminals + self.terminals |
27 def calcFollow(self, name): | |
28 """ Determines the follow set for a given name """ | |
29 f = set() | |
30 # TODO | |
31 return f | |
32 | |
33 def e_closure(self, S): | |
34 something_changed = True | |
35 while something_changed: | |
36 something_changed = False | |
37 worklist = list(S) | |
38 for item in worklist: | |
39 if item.IsShift and item.Next in self.nonterminals: | |
40 n = item.Next | |
41 for add_p in self.productionsForName(n): | |
42 i = Item(add_p, 0) | |
43 if not i in S: | |
44 S.add(i) | |
45 something_changed = True | |
46 return frozenset(S) | |
47 def calcInitialItemSet(self): | |
48 nis = set() | |
49 for p in self.productionsForName(self.StartSymbol): | |
50 nis.add(Item(p, 0)) | |
51 return self.e_closure(nis) | |
52 def calcNextItemSet(self, itemset, symbol): | |
53 nis = set() | |
54 for item in itemset: | |
55 if item.IsShift and item.Next == symbol: | |
56 nis.add(Item(item.production, item.dotpos + 1)) | |
57 return self.e_closure(nis) | |
58 def calcTables(self): | |
59 print(self.nonterminals + self.terminals) | |
60 self.states = set() | |
61 self.goto_table = {} | |
62 self.action_table = {} | |
63 iis = self.calcInitialItemSet() | |
64 self.s0 = iis | |
65 print(len(iis), iis) | |
66 worklist = [iis] | |
67 self.states.add(iis) | |
68 while len(worklist) > 0: | |
69 itemset = worklist.pop(0) | |
70 has_goto = False | |
71 for symbol in self.Symbols: | |
72 nis = self.calcNextItemSet(itemset, symbol) | |
73 if nis: | |
74 self.goto_table[(itemset, symbol)] = nis | |
75 has_goto = True | |
76 if not nis in self.states: | |
77 worklist.append(nis) | |
78 self.states.add(nis) | |
79 if has_goto: | |
80 self.action_table[itemset] = 'shift', 's' | |
81 for item in itemset: | |
82 assert item.IsShift, item | |
83 else: | |
84 # Check for reduce-reduce conflicts: | |
85 assert len(itemset) == 1 | |
86 item = list(itemset)[0] | |
87 assert item.IsReduce | |
88 self.action_table[itemset] = 'reduce', item.production | |
89 print(self.Symbols) | |
90 print(len(nis), nis, self.states) | |
91 for s, i in zip(self.states, range(len(self.states))): | |
92 print(i, s) | |
93 | |
94 def parse(self, toks): | |
95 stack = [self.s0] | |
96 while stack[-1] != 'input': | |
97 action, p = self.action_table[stack[-1]] | |
98 print(action, p) | |
99 if action == 'shift': | |
100 stack.append(toks.pop(0)) | |
101 s, i = stack[-2:] | |
102 stack.append(self.goto_table[(s, i)]) | |
103 else: | |
104 for s in p.symbols: | |
105 stack.pop() | |
106 stack.pop() | |
107 stack.append(p.name) | |
108 s, i = stack[-2:] | |
109 stack.append(self.goto_table[(s, i)]) | |
110 | 16 |
111 class Production: | 17 class Production: |
112 def __init__(self, name, symbols): | 18 def __init__(self, name, symbols): |
113 self.name = name | 19 self.name = name |
114 self.symbols = symbols | 20 self.symbols = symbols |
115 def __repr__(self): | 21 def __repr__(self): |
116 return '{0} -> {1}'.format(self.name, self.symbols) | 22 return '{0} -> {1}'.format(self.name, self.symbols) |
117 | 23 |
118 class Item: | 24 class Item: |
119 def __init__(self, production, dotpos): | 25 def __init__(self, production, dotpos): |
120 self.production = production | 26 self.production = production |
121 self.dotpos = dotpos | 27 self.dotpos = dotpos |
122 def __eq__(self, other): | 28 def __eq__(self, other): |
135 def Next(self): | 41 def Next(self): |
136 return self.production.symbols[self.dotpos] | 42 return self.production.symbols[self.dotpos] |
137 def matches(self, symbol): | 43 def matches(self, symbol): |
138 return self.Next == symbol | 44 return self.Next == symbol |
139 def __repr__(self): | 45 def __repr__(self): |
140 return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) | 46 return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) |
47 | |
48 class LRgenerator: | |
49 def __init__(self, g): | |
50 self.grammar = g | |
51 def e_closure(self, S): | |
52 """ Expand itemset by using epsilon moves """ | |
53 something_changed = True | |
54 while something_changed: | |
55 something_changed = False | |
56 worklist = list(S) | |
57 for item in worklist: | |
58 if item.IsShift and item.Next in self.grammar.nonterminals: | |
59 n = item.Next | |
60 for add_p in self.grammar.productionsForName(n): | |
61 i = Item(add_p, 0) | |
62 if not i in S: | |
63 S.add(i) | |
64 something_changed = True | |
65 return frozenset(S) | |
66 def initialItemSet(self): | |
67 nis = set() | |
68 for p in self.grammar.productionsForName(self.grammar.start_symbol): | |
69 nis.add(Item(p, 0)) | |
70 return self.e_closure(nis) | |
71 def nextItemSet(self, itemset, symbol): | |
72 nis = set() | |
73 for item in itemset: | |
74 if item.IsShift and item.Next == symbol: | |
75 nis.add(Item(item.production, item.dotpos + 1)) | |
76 return self.e_closure(nis) | |
77 def genParser(self): | |
78 states = set() | |
79 goto_table = {} | |
80 action_table = {} | |
81 iis = self.initialItemSet() | |
82 worklist = [iis] | |
83 states.add(iis) | |
84 while len(worklist) > 0: | |
85 itemset = worklist.pop(0) | |
86 has_goto = False | |
87 for symbol in self.grammar.Symbols: | |
88 nis = self.nextItemSet(itemset, symbol) | |
89 if nis: | |
90 goto_table[(itemset, symbol)] = nis | |
91 has_goto = True | |
92 if not nis in states: | |
93 worklist.append(nis) | |
94 states.add(nis) | |
95 if has_goto: | |
96 action_table[itemset] = 'shift', 's' | |
97 for item in itemset: | |
98 assert item.IsShift, item | |
99 else: | |
100 # Check for reduce-reduce conflicts: | |
101 assert len(itemset) == 1 | |
102 item = list(itemset)[0] | |
103 assert item.IsReduce | |
104 action_table[itemset] = 'reduce', item.production | |
105 for s, i in zip(states, range(len(states))): | |
106 print(i, ' || '.join(str(x) for x in s)) | |
107 p = LRParser() | |
108 p.goto_table = goto_table | |
109 p.action_table = action_table | |
110 p.s0 = iis | |
111 return p | |
112 | |
113 class LRParser: | |
114 def parse(self, toks): | |
115 stack = [self.s0] | |
116 while stack[-1] != 'input': | |
117 action, p = self.action_table[stack[-1]] | |
118 print(action, p) | |
119 if action == 'shift': | |
120 stack.append(toks.pop(0)) | |
121 s, i = stack[-2:] | |
122 stack.append(self.goto_table[(s, i)]) | |
123 elif action == 'reduce': | |
124 for s in p.symbols: | |
125 stack.pop() | |
126 stack.pop() | |
127 stack.append(p.name) | |
128 s, i = stack[-2:] | |
129 if stack[-1] == 'input': | |
130 break | |
131 stack.append(self.goto_table[(s, i)]) | |
141 | 132 |
142 # Test it: | 133 # Test it: |
143 # 1. define a simple grammar: | 134 # 1. define a simple grammar: |
144 g = Grammar(['EOF', 'identifier', '(', ')', '+']) | 135 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) |
145 g.add_production('input', ['expression', 'EOF']) | 136 g.add_production('input', ['expression', 'EOF']) |
146 g.add_production('expression', ['term']) | 137 g.add_production('expression', ['term']) |
147 g.add_production('expression', ['expression', '+', 'term']) | 138 g.add_production('expression', ['expression', '+', 'term']) |
148 g.add_production('term', ['identifier']) | 139 g.add_production('term', ['factor']) |
140 #g.add_production('term', ['term', '*', 'factor']) | |
149 g.add_production('term', ['(', 'expression', ')']) | 141 g.add_production('term', ['(', 'expression', ')']) |
150 g.StartSymbol = 'input' | 142 g.add_production('factor', ['identifier']) |
143 g.start_symbol = 'input' | |
144 # 2. define input: | |
145 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF'] | |
146 # 3. build parser: | |
147 p = LRgenerator(g).genParser() | |
148 # 4. feed input: | |
149 p.parse(tokens) | |
151 | 150 |
152 # 2. define input: | |
153 tokens = ['identifier', '+', 'identifier', 'EOF'] | |
154 | |
155 # 3. build tables | |
156 g.calcTables() | |
157 | |
158 # 4. feed input | |
159 g.parse(tokens) | |
160 |