Mercurial > lcfOS
comparison python/pyyacc.py @ 178:c694ec551f34
Added lex yacc test scripts
author | Windel Bouwman |
---|---|
date | Sat, 04 May 2013 12:07:17 +0200 |
parents | |
children | 0f3b1adfd416 |
comparison
equal
deleted
inserted
replaced
177:460db5669efa | 178:c694ec551f34 |
---|---|
1 | |
2 class GrammarError(Exception): | |
3 pass | |
4 | |
5 class Grammar: | |
6 def __init__(self, terminals): | |
7 self.terminals = terminals | |
8 self.nonterminals = [] | |
9 self.productions = [] | |
10 self.states = None | |
11 self.start_symbol = None | |
12 def add_production(self, name, symbols): | |
13 p = Production(name, symbols) | |
14 self.productions.append(p) | |
15 if not name in self.nonterminals: | |
16 self.nonterminals.append(name) | |
17 def set_start(self, start): | |
18 self.start_symbol = start | |
19 def get_start(self): | |
20 return self.start_symbol | |
21 StartSymbol = property(get_start, set_start) | |
22 def productionsForName(self, name): | |
23 return [p for p in self.productions if p.name == name] | |
24 @property | |
25 def Symbols(self): | |
26 return self.nonterminals + self.terminals | |
27 def calcFollow(self, name): | |
28 """ Determines the follow set for a given name """ | |
29 f = set() | |
30 # TODO | |
31 return f | |
32 | |
33 def e_closure(self, S): | |
34 something_changed = True | |
35 while something_changed: | |
36 something_changed = False | |
37 worklist = list(S) | |
38 for item in worklist: | |
39 if item.IsShift and item.Next in self.nonterminals: | |
40 n = item.Next | |
41 for add_p in self.productionsForName(n): | |
42 i = Item(add_p, 0) | |
43 if not i in S: | |
44 S.add(i) | |
45 something_changed = True | |
46 return frozenset(S) | |
47 def calcInitialItemSet(self): | |
48 nis = set() | |
49 for p in self.productionsForName(self.StartSymbol): | |
50 nis.add(Item(p, 0)) | |
51 return self.e_closure(nis) | |
52 def calcNextItemSet(self, itemset, symbol): | |
53 nis = set() | |
54 for item in itemset: | |
55 if item.IsShift and item.Next == symbol: | |
56 nis.add(Item(item.production, item.dotpos + 1)) | |
57 return self.e_closure(nis) | |
58 def calcTables(self): | |
59 print(self.nonterminals + self.terminals) | |
60 self.states = set() | |
61 self.goto_table = {} | |
62 self.action_table = {} | |
63 iis = self.calcInitialItemSet() | |
64 self.s0 = iis | |
65 print(len(iis), iis) | |
66 worklist = [iis] | |
67 self.states.add(iis) | |
68 while len(worklist) > 0: | |
69 itemset = worklist.pop(0) | |
70 has_goto = False | |
71 for symbol in self.Symbols: | |
72 nis = self.calcNextItemSet(itemset, symbol) | |
73 if nis: | |
74 self.goto_table[(itemset, symbol)] = nis | |
75 has_goto = True | |
76 if not nis in self.states: | |
77 worklist.append(nis) | |
78 self.states.add(nis) | |
79 if has_goto: | |
80 self.action_table[itemset] = 'shift', 's' | |
81 for item in itemset: | |
82 assert item.IsShift, item | |
83 else: | |
84 # Check for reduce-reduce conflicts: | |
85 assert len(itemset) == 1 | |
86 item = list(itemset)[0] | |
87 assert item.IsReduce | |
88 self.action_table[itemset] = 'reduce', item.production | |
89 print(self.Symbols) | |
90 print(len(nis), nis, self.states) | |
91 for s, i in zip(self.states, range(len(self.states))): | |
92 print(i, s) | |
93 | |
94 def parse(self, toks): | |
95 stack = [self.s0] | |
96 while stack[-1] != 'input': | |
97 action, p = self.action_table[stack[-1]] | |
98 print(action, p) | |
99 if action == 'shift': | |
100 stack.append(toks.pop(0)) | |
101 s, i = stack[-2:] | |
102 stack.append(self.goto_table[(s, i)]) | |
103 else: | |
104 for s in p.symbols: | |
105 stack.pop() | |
106 stack.pop() | |
107 stack.append(p.name) | |
108 s, i = stack[-2:] | |
109 stack.append(self.goto_table[(s, i)]) | |
110 | |
111 class Production: | |
112 def __init__(self, name, symbols): | |
113 self.name = name | |
114 self.symbols = symbols | |
115 def __repr__(self): | |
116 return '{0} -> {1}'.format(self.name, self.symbols) | |
117 | |
118 class Item: | |
119 def __init__(self, production, dotpos): | |
120 self.production = production | |
121 self.dotpos = dotpos | |
122 def __eq__(self, other): | |
123 if type(other) is type(self): | |
124 return (self.production, self.dotpos) == (other.production, other.dotpos) | |
125 return False | |
126 def __hash__(self): | |
127 return (self.production, self.dotpos).__hash__() | |
128 @property | |
129 def IsReduce(self): | |
130 return self.dotpos == len(self.production.symbols) | |
131 @property | |
132 def IsShift(self): | |
133 return not self.IsReduce | |
134 @property | |
135 def Next(self): | |
136 return self.production.symbols[self.dotpos] | |
137 def matches(self, symbol): | |
138 return self.Next == symbol | |
139 def __repr__(self): | |
140 return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:])) | |
141 | |
142 # Test it: | |
143 # 1. define a simple grammar: | |
144 g = Grammar(['EOF', 'identifier', '(', ')', '+']) | |
145 g.add_production('input', ['expression', 'EOF']) | |
146 g.add_production('expression', ['term']) | |
147 g.add_production('expression', ['expression', '+', 'term']) | |
148 g.add_production('term', ['identifier']) | |
149 g.add_production('term', ['(', 'expression', ')']) | |
150 g.StartSymbol = 'input' | |
151 | |
152 # 2. define input: | |
153 tokens = ['identifier', '+', 'identifier', 'EOF'] | |
154 | |
155 # 3. build tables | |
156 g.calcTables() | |
157 | |
158 # 4. feed input | |
159 g.parse(tokens) | |
160 |