annotate python/pyyacc.py @ 178:c694ec551f34

Added lex yacc test scripts
author Windel Bouwman
date Sat, 04 May 2013 12:07:17 +0200
parents
children 0f3b1adfd416
rev   line source
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
1
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
2 class GrammarError(Exception):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
3 pass
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
4
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
5 class Grammar:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
6 def __init__(self, terminals):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
7 self.terminals = terminals
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
8 self.nonterminals = []
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
9 self.productions = []
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
10 self.states = None
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
11 self.start_symbol = None
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
12 def add_production(self, name, symbols):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
13 p = Production(name, symbols)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
14 self.productions.append(p)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
15 if not name in self.nonterminals:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
16 self.nonterminals.append(name)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
17 def set_start(self, start):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
18 self.start_symbol = start
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
19 def get_start(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
20 return self.start_symbol
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
21 StartSymbol = property(get_start, set_start)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
22 def productionsForName(self, name):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
23 return [p for p in self.productions if p.name == name]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
24 @property
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
25 def Symbols(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
26 return self.nonterminals + self.terminals
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
27 def calcFollow(self, name):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
28 """ Determines the follow set for a given name """
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
29 f = set()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
30 # TODO
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
31 return f
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
32
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
33 def e_closure(self, S):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
34 something_changed = True
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
35 while something_changed:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
36 something_changed = False
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
37 worklist = list(S)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
38 for item in worklist:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
39 if item.IsShift and item.Next in self.nonterminals:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
40 n = item.Next
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
41 for add_p in self.productionsForName(n):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
42 i = Item(add_p, 0)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
43 if not i in S:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
44 S.add(i)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
45 something_changed = True
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
46 return frozenset(S)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
47 def calcInitialItemSet(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
48 nis = set()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
49 for p in self.productionsForName(self.StartSymbol):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
50 nis.add(Item(p, 0))
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
51 return self.e_closure(nis)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
52 def calcNextItemSet(self, itemset, symbol):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
53 nis = set()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
54 for item in itemset:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
55 if item.IsShift and item.Next == symbol:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
56 nis.add(Item(item.production, item.dotpos + 1))
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
57 return self.e_closure(nis)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
58 def calcTables(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
59 print(self.nonterminals + self.terminals)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
60 self.states = set()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
61 self.goto_table = {}
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
62 self.action_table = {}
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
63 iis = self.calcInitialItemSet()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
64 self.s0 = iis
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
65 print(len(iis), iis)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
66 worklist = [iis]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
67 self.states.add(iis)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
68 while len(worklist) > 0:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
69 itemset = worklist.pop(0)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
70 has_goto = False
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
71 for symbol in self.Symbols:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
72 nis = self.calcNextItemSet(itemset, symbol)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
73 if nis:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
74 self.goto_table[(itemset, symbol)] = nis
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
75 has_goto = True
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
76 if not nis in self.states:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
77 worklist.append(nis)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
78 self.states.add(nis)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
79 if has_goto:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
80 self.action_table[itemset] = 'shift', 's'
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
81 for item in itemset:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
82 assert item.IsShift, item
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
83 else:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
84 # Check for reduce-reduce conflicts:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
85 assert len(itemset) == 1
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
86 item = list(itemset)[0]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
87 assert item.IsReduce
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
88 self.action_table[itemset] = 'reduce', item.production
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
89 print(self.Symbols)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
90 print(len(nis), nis, self.states)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
91 for s, i in zip(self.states, range(len(self.states))):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
92 print(i, s)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
93
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
94 def parse(self, toks):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
95 stack = [self.s0]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
96 while stack[-1] != 'input':
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
97 action, p = self.action_table[stack[-1]]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
98 print(action, p)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
99 if action == 'shift':
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
100 stack.append(toks.pop(0))
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
101 s, i = stack[-2:]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
102 stack.append(self.goto_table[(s, i)])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
103 else:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
104 for s in p.symbols:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
105 stack.pop()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
106 stack.pop()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
107 stack.append(p.name)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
108 s, i = stack[-2:]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
109 stack.append(self.goto_table[(s, i)])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
110
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
111 class Production:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
112 def __init__(self, name, symbols):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
113 self.name = name
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
114 self.symbols = symbols
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
115 def __repr__(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
116 return '{0} -> {1}'.format(self.name, self.symbols)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
117
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
118 class Item:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
119 def __init__(self, production, dotpos):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
120 self.production = production
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
121 self.dotpos = dotpos
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
122 def __eq__(self, other):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
123 if type(other) is type(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
124 return (self.production, self.dotpos) == (other.production, other.dotpos)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
125 return False
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
126 def __hash__(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
127 return (self.production, self.dotpos).__hash__()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
128 @property
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
129 def IsReduce(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
130 return self.dotpos == len(self.production.symbols)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
131 @property
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
132 def IsShift(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
133 return not self.IsReduce
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
134 @property
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
135 def Next(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
136 return self.production.symbols[self.dotpos]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
137 def matches(self, symbol):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
138 return self.Next == symbol
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
139 def __repr__(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
140 return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
141
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
142 # Test it:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
143 # 1. define a simple grammar:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
144 g = Grammar(['EOF', 'identifier', '(', ')', '+'])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
145 g.add_production('input', ['expression', 'EOF'])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
146 g.add_production('expression', ['term'])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
147 g.add_production('expression', ['expression', '+', 'term'])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
148 g.add_production('term', ['identifier'])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
149 g.add_production('term', ['(', 'expression', ')'])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
150 g.StartSymbol = 'input'
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
151
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
152 # 2. define input:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
153 tokens = ['identifier', '+', 'identifier', 'EOF']
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
154
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
155 # 3. build tables
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
156 g.calcTables()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
157
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
158 # 4. feed input
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
159 g.parse(tokens)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
160