annotate python/pyyacc.py @ 180:25a0753da4cf

Re-organized files
author Windel Bouwman
date Thu, 09 May 2013 17:35:19 +0200
parents 0f3b1adfd416
children 216da5e46efc
rev   line source
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
1 class Grammar:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
2 def __init__(self, terminals):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
3 self.terminals = terminals
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
4 self.nonterminals = []
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
5 self.productions = []
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
6 def add_production(self, name, symbols):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
7 p = Production(name, symbols)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
8 self.productions.append(p)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
9 if not name in self.nonterminals:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
10 self.nonterminals.append(name)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
11 def productionsForName(self, name):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
12 return [p for p in self.productions if p.name == name]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
13 @property
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
14 def Symbols(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
15 return self.nonterminals + self.terminals
180
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
16 def calcFollow(self):
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
17 follow = {}
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
18 for nt in self.nonterminals:
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
19 follow[nt] = set()
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
20 while True:
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
21 change = False
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
22 # 1.
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
23 for p in self.productions:
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
24 pass
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
25 if not change:
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
26 break
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
27 return follow
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
28 def calcFirst(self):
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
29 first = {}
25a0753da4cf Re-organized files
Windel Bouwman
parents: 179
diff changeset
30 return first
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
31
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
32 class Production:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
33 def __init__(self, name, symbols):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
34 self.name = name
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
35 self.symbols = symbols
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
36 def __repr__(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
37 return '{0} -> {1}'.format(self.name, self.symbols)
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
38
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
39 class Item:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
40 def __init__(self, production, dotpos):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
41 self.production = production
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
42 self.dotpos = dotpos
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
43 def __eq__(self, other):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
44 if type(other) is type(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
45 return (self.production, self.dotpos) == (other.production, other.dotpos)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
46 return False
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
47 def __hash__(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
48 return (self.production, self.dotpos).__hash__()
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
49 @property
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
50 def IsReduce(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
51 return self.dotpos == len(self.production.symbols)
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
52 @property
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
53 def IsShift(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
54 return not self.IsReduce
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
55 @property
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
56 def Next(self):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
57 return self.production.symbols[self.dotpos]
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
58 def matches(self, symbol):
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
59 return self.Next == symbol
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
60 def __repr__(self):
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
61 return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
62
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
63 class LRgenerator:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
64 def __init__(self, g):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
65 self.grammar = g
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
66 def e_closure(self, S):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
67 """ Expand itemset by using epsilon moves """
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
68 something_changed = True
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
69 while something_changed:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
70 something_changed = False
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
71 worklist = list(S)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
72 for item in worklist:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
73 if item.IsShift and item.Next in self.grammar.nonterminals:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
74 n = item.Next
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
75 for add_p in self.grammar.productionsForName(n):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
76 i = Item(add_p, 0)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
77 if not i in S:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
78 S.add(i)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
79 something_changed = True
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
80 return frozenset(S)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
81 def initialItemSet(self):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
82 nis = set()
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
83 for p in self.grammar.productionsForName(self.grammar.start_symbol):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
84 nis.add(Item(p, 0))
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
85 return self.e_closure(nis)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
86 def nextItemSet(self, itemset, symbol):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
87 nis = set()
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
88 for item in itemset:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
89 if item.IsShift and item.Next == symbol:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
90 nis.add(Item(item.production, item.dotpos + 1))
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
91 return self.e_closure(nis)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
92 def genParser(self):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
93 states = set()
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
94 goto_table = {}
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
95 action_table = {}
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
96 iis = self.initialItemSet()
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
97 worklist = [iis]
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
98 states.add(iis)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
99 while len(worklist) > 0:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
100 itemset = worklist.pop(0)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
101 has_goto = False
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
102 for symbol in self.grammar.Symbols:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
103 nis = self.nextItemSet(itemset, symbol)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
104 if nis:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
105 goto_table[(itemset, symbol)] = nis
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
106 has_goto = True
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
107 if not nis in states:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
108 worklist.append(nis)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
109 states.add(nis)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
110 if has_goto:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
111 action_table[itemset] = 'shift', 's'
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
112 for item in itemset:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
113 assert item.IsShift, item
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
114 else:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
115 # Check for reduce-reduce conflicts:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
116 assert len(itemset) == 1
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
117 item = list(itemset)[0]
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
118 assert item.IsReduce
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
119 action_table[itemset] = 'reduce', item.production
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
120 for s, i in zip(states, range(len(states))):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
121 print(i, ' || '.join(str(x) for x in s))
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
122 p = LRParser()
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
123 p.goto_table = goto_table
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
124 p.action_table = action_table
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
125 p.s0 = iis
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
126 return p
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
127
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
128 class LRParser:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
129 def parse(self, toks):
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
130 stack = [self.s0]
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
131 while stack[-1] != 'input':
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
132 action, p = self.action_table[stack[-1]]
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
133 print(action, p)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
134 if action == 'shift':
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
135 stack.append(toks.pop(0))
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
136 s, i = stack[-2:]
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
137 stack.append(self.goto_table[(s, i)])
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
138 elif action == 'reduce':
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
139 for s in p.symbols:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
140 stack.pop()
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
141 stack.pop()
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
142 stack.append(p.name)
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
143 s, i = stack[-2:]
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
144 if stack[-1] == 'input':
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
145 break
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
146 stack.append(self.goto_table[(s, i)])
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
147
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
148 # Test it:
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
149 # 1. define a simple grammar:
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
150 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
151 g.add_production('input', ['expression', 'EOF'])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
152 g.add_production('expression', ['term'])
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
153 g.add_production('expression', ['expression', '+', 'term'])
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
154 g.add_production('term', ['factor'])
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
155 #g.add_production('term', ['term', '*', 'factor'])
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
156 g.add_production('term', ['(', 'expression', ')'])
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
157 g.add_production('factor', ['identifier'])
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
158 g.start_symbol = 'input'
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
159 # 2. define input:
179
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
160 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
161 # 3. build parser:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
162 p = LRgenerator(g).genParser()
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
163 # 4. feed input:
0f3b1adfd416 LR(0) parser
Windel Bouwman
parents: 178
diff changeset
164 p.parse(tokens)
178
c694ec551f34 Added lex yacc test scripts
Windel Bouwman
parents:
diff changeset
165