184
|
1 import unittest, pprint
|
194
|
2 from pyyacc import Grammar, Item, ParserGenerationException, ParserException, EPS, EOF
|
191
|
3 from ppci import Token
|
184
|
4
|
191
|
5 def genTokens(lst):
|
|
6 for t in lst:
|
194
|
7 yield Token(t, t)
|
184
|
8
|
|
9 class testLR(unittest.TestCase):
|
194
|
10 """ Test basic LR(1) parser generator constructs """
|
184
|
11 def testSimpleGrammar(self):
|
|
12 # 1. define a simple grammar:
|
194
|
13 g = Grammar(['identifier', '(', ')', '+', '*'])
|
184
|
14 g.add_production('input', ['expression'])
|
|
15 g.add_production('expression', ['term'])
|
|
16 g.add_production('expression', ['expression', '+', 'term'])
|
|
17 g.add_production('term', ['factor'])
|
|
18 g.add_production('term', ['term', '*', 'factor'])
|
|
19 g.add_production('factor', ['(', 'expression', ')'])
|
|
20 g.add_production('factor', ['identifier'])
|
|
21 g.start_symbol = 'input'
|
|
22 # 2. define input:
|
191
|
23 tokens = genTokens(['identifier', '+', 'identifier', '+', 'identifier'])
|
184
|
24 # 3. build parser:
|
|
25 p = g.genParser()
|
|
26 # 4. feed input:
|
|
27 p.parse(tokens)
|
192
|
28 def testReduceReduceConflict(self):
|
|
29 """ Check if a reduce-reduce conflict is detected """
|
|
30 # Define a grammar with an obvious reduce-reduce conflict:
|
194
|
31 g = Grammar(['id'])
|
192
|
32 g.add_production('goal', ['a'])
|
|
33 g.add_production('a', ['b'])
|
|
34 g.add_production('a', ['c'])
|
|
35 g.add_production('b', ['id'])
|
|
36 g.add_production('c', ['id'])
|
|
37 g.start_symbol = 'goal'
|
|
38 with self.assertRaises(ParserGenerationException):
|
|
39 p = g.genParser()
|
|
40 def testShiftReduceConflict(self):
|
194
|
41 """ Must be handled automatically by doing shift """
|
|
42 g = Grammar([EOF, 'if', 'then', 'else', 'ass'])
|
|
43 # Ambiguous grammar:
|
|
44 g.add_production('if_stmt', ['if', 'then', 'stmt'])
|
|
45 g.add_production('if_stmt', ['if', 'then', 'stmt', 'else', 'stmt'])
|
|
46 g.add_production('stmt', ['if_stmt'])
|
|
47 g.add_production('stmt', ['ass'])
|
192
|
48 g.start_symbol = 'stmt'
|
194
|
49 p = g.genParser()
|
|
50 # Ambiguous program:
|
|
51 tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass' ])
|
|
52 p.parse(tokens)
|
|
53
|
192
|
54 def testUndefinedTerminal(self):
|
|
55 """ Test correct behavior when a terminal is undefined """
|
194
|
56 g = Grammar(['b'])
|
192
|
57 g.add_production('goal', ['a'])
|
|
58 g.add_production('a', ['b'])
|
|
59 g.add_production('a', ['c'])
|
|
60 g.start_symbol = 'goal'
|
|
61 with self.assertRaises(ParserGenerationException):
|
|
62 g.genParser()
|
|
63 def testRedefineTerminal(self):
|
|
64 """ Test correct behavior when a terminal is redefined """
|
|
65 g = Grammar([EOF, 'b', 'c'])
|
|
66 g.add_production('goal', ['a'])
|
|
67 with self.assertRaises(ParserGenerationException):
|
|
68 g.add_production('b', ['c']) # Not allowed
|
|
69 g.add_production('a', ['c'])
|
|
70 g.start_symbol = 'goal'
|
|
71 g.genParser()
|
194
|
72 def testEmpty(self):
|
|
73 """ Test empty token stream """
|
|
74 g = Grammar([','])
|
|
75 g.add_production('input', [','])
|
|
76 g.start_symbol = 'input'
|
|
77 p = g.genParser()
|
|
78 tokens = genTokens([])
|
|
79 with self.assertRaises(ParserException):
|
|
80 p.parse(tokens)
|
|
81
|
|
82 def testEps(self):
|
|
83 """ Test epsilon terminal """
|
|
84 g = Grammar(['a', 'b'])
|
|
85 g.add_production('input', ['optional_a', 'b'])
|
|
86 g.add_production('optional_a', ['a'])
|
|
87 g.add_production('optional_a', [])
|
|
88 g.start_symbol = 'input'
|
|
89 p = g.genParser()
|
|
90 tokens = genTokens(['b'])
|
|
91 p.parse(tokens)
|
|
92
|
|
93 def testEps2(self):
|
|
94 g = Grammar(['id', ':'])
|
|
95 g.add_production('input', ['opt_lab', 'ins', 'op1'])
|
|
96 g.add_production('input', ['ins', 'op1'])
|
|
97 g.add_production('opt_lab', ['id', ':'])
|
|
98 g.add_production('ins', ['id'])
|
|
99 g.add_production('op1', ['id'])
|
|
100 g.start_symbol = 'input'
|
|
101 p = g.genParser()
|
|
102 tokens = genTokens(['id', ':', 'id', 'id']) # i.e. "lab_0: inc rax"
|
|
103 p.parse(tokens)
|
|
104 tokens = genTokens(['id', 'id']) # i.e. "inc rax"
|
|
105 p.parse(tokens)
|
|
106
|
184
|
107
|
185
|
108 class testExpressionGrammar(unittest.TestCase):
|
|
109 def setUp(self):
|
|
110 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*', 'num'])
|
|
111 g.add_production('input', ['expression'])
|
|
112 g.add_production('expression', ['term'])
|
|
113 g.add_production('expression', ['expression', '+', 'term'])
|
|
114 g.add_production('term', ['factor'])
|
|
115 g.add_production('term', ['term', '*', 'factor'])
|
|
116 g.add_production('factor', ['(', 'expression', ')'])
|
|
117 g.add_production('factor', ['identifier'])
|
|
118 g.add_production('factor', ['num'])
|
|
119 g.start_symbol = 'input'
|
|
120 self.g = g
|
|
121
|
|
122 def testFirstSimpleGrammar(self):
|
|
123 # 1. define a simple grammar:
|
|
124 first = self.g.calcFirstSets()
|
|
125 self.assertEqual(first['input'], {'identifier', '(', 'num'})
|
|
126 self.assertEqual(first['term'], {'identifier', '(', 'num'})
|
|
127
|
|
128 def testCanonical(self):
|
|
129 s0 = self.g.initialItemSet()
|
|
130 s, gt = self.g.genCanonicalSet(s0)
|
|
131 # Must result in 12 sets:
|
|
132 self.assertEqual(len(s), 24)
|
|
133
|
184
|
134 class testPG(unittest.TestCase):
|
|
135 """ Tests several parts of the parser generator """
|
|
136 def setUp(self):
|
|
137 g = Grammar(['(', ')'])
|
|
138 g.add_production('goal', ['list'])
|
|
139 g.add_production('list', ['list', 'pair'])
|
|
140 g.add_production('list', ['pair'])
|
|
141 g.add_production('pair', ['(', 'pair', ')'])
|
|
142 g.add_production('pair', ['(', ')'])
|
|
143 g.start_symbol = 'goal'
|
|
144 self.g = g
|
|
145
|
|
146 def testFirstSet(self):
|
|
147 for a in ['(', ')', EOF, 'EPS']:
|
|
148 self.assertEqual(self.g.first[a], {a})
|
|
149 for nt in ['list', 'pair', 'goal']:
|
|
150 self.assertEqual(self.g.first[nt], {'('})
|
|
151
|
|
152 def testInitItemSet(self):
|
|
153 p0, p1, p2, p3, p4 = self.g.productions
|
|
154 s0 = self.g.initialItemSet()
|
|
155 self.assertEqual(len(s0), 9) # 9 with the goal rule included!
|
|
156 self.assertIn(Item(p0, 0, EOF), s0)
|
|
157 self.assertIn(Item(p1, 0, EOF), s0)
|
|
158 self.assertIn(Item(p1, 0, '('), s0)
|
|
159 self.assertIn(Item(p2, 0, EOF), s0)
|
|
160 self.assertIn(Item(p2, 0, '('), s0)
|
|
161 self.assertIn(Item(p3, 0, EOF), s0)
|
|
162 self.assertIn(Item(p3, 0, '('), s0)
|
|
163 self.assertIn(Item(p4, 0, EOF), s0)
|
|
164 self.assertIn(Item(p4, 0, '('), s0)
|
|
165
|
|
166 def testCanonical(self):
|
|
167 s0 = self.g.initialItemSet()
|
|
168 s, gt = self.g.genCanonicalSet(s0)
|
|
169 # Must result in 12 sets:
|
|
170 self.assertEqual(len(s), 12)
|
|
171
|
|
172 def testClosure(self):
|
|
173 p0, p1, p2, p3, p4 = self.g.productions
|
|
174 s0 = set()
|
185
|
175 s0.add(Item(p0, 0, EOF))
|
184
|
176 self.assertEqual(len(s0), 1) # 1 rule
|
|
177 self.assertIn(Item(p0, 0, EOF), s0)
|
|
178
|
|
179 # Invoke closure on set:
|
|
180 s0 = self.g.closure(s0)
|
|
181 self.assertIn(Item(p0, 0, EOF), s0)
|
|
182 self.assertIn(Item(p1, 0, EOF), s0)
|
|
183 self.assertIn(Item(p1, 0, '('), s0)
|
|
184 self.assertIn(Item(p2, 0, EOF), s0)
|
|
185 self.assertIn(Item(p2, 0, '('), s0)
|
|
186 self.assertIn(Item(p3, 0, EOF), s0)
|
|
187 self.assertIn(Item(p3, 0, '('), s0)
|
|
188 self.assertIn(Item(p4, 0, EOF), s0)
|
|
189 self.assertIn(Item(p4, 0, '('), s0)
|
|
190
|
|
191 def testParser(self):
|
191
|
192 tokens = ['(', '(', ')', ')', '(', ')']
|
184
|
193 # 3. build parser:
|
|
194 p = self.g.genParser()
|
185
|
195 self.assertEqual(len(p.goto_table), 5)
|
|
196 self.assertEqual(len(p.action_table), 19)
|
|
197
|
184
|
198 # 4. feed input:
|
191
|
199 p.parse(genTokens(tokens))
|
184
|
200
|
|
201 if __name__ == '__main__':
|
|
202 unittest.main()
|
|
203
|
|
204
|