318
|
1 import unittest
|
|
2 from pyyacc import Grammar, Item, ParserGenerationException, ParserException
|
319
|
3 from pyyacc import EPS, EOF, calculate_first_sets
|
396
|
4 from ppci import Token, SourceLocation
|
184
|
5
|
318
|
6
|
|
7 class genTokens:
|
|
8 def __init__(self, lst):
|
|
9 def tokGen():
|
396
|
10 loc = SourceLocation('', 0, 0, 0)
|
318
|
11 for t in lst:
|
396
|
12 yield Token(t, t, loc)
|
318
|
13 while True:
|
396
|
14 yield Token(EOF, EOF, loc)
|
318
|
15 self.tokens = tokGen()
|
|
16 self.token = self.tokens.__next__()
|
|
17
|
|
18 def next_token(self):
|
|
19 t = self.token
|
|
20 if t.typ != EOF:
|
|
21 self.token = self.tokens.__next__()
|
|
22 return t
|
|
23
|
184
|
24
|
|
25 class testLR(unittest.TestCase):
|
194
|
26 """ Test basic LR(1) parser generator constructs """
|
184
|
27 def testSimpleGrammar(self):
|
|
28 # 1. define a simple grammar:
|
194
|
29 g = Grammar(['identifier', '(', ')', '+', '*'])
|
184
|
30 g.add_production('input', ['expression'])
|
|
31 g.add_production('expression', ['term'])
|
|
32 g.add_production('expression', ['expression', '+', 'term'])
|
|
33 g.add_production('term', ['factor'])
|
|
34 g.add_production('term', ['term', '*', 'factor'])
|
|
35 g.add_production('factor', ['(', 'expression', ')'])
|
|
36 g.add_production('factor', ['identifier'])
|
|
37 g.start_symbol = 'input'
|
|
38 # 2. define input:
|
191
|
39 tokens = genTokens(['identifier', '+', 'identifier', '+', 'identifier'])
|
184
|
40 # 3. build parser:
|
341
|
41 p = g.generate_parser()
|
184
|
42 # 4. feed input:
|
|
43 p.parse(tokens)
|
318
|
44
|
192
|
45 def testReduceReduceConflict(self):
|
|
46 """ Check if a reduce-reduce conflict is detected """
|
|
47 # Define a grammar with an obvious reduce-reduce conflict:
|
194
|
48 g = Grammar(['id'])
|
192
|
49 g.add_production('goal', ['a'])
|
|
50 g.add_production('a', ['b'])
|
|
51 g.add_production('a', ['c'])
|
|
52 g.add_production('b', ['id'])
|
|
53 g.add_production('c', ['id'])
|
|
54 g.start_symbol = 'goal'
|
|
55 with self.assertRaises(ParserGenerationException):
|
341
|
56 p = g.generate_parser()
|
318
|
57
|
192
|
58 def testShiftReduceConflict(self):
|
194
|
59 """ Must be handled automatically by doing shift """
|
|
60 g = Grammar([EOF, 'if', 'then', 'else', 'ass'])
|
|
61 # Ambiguous grammar:
|
|
62 g.add_production('if_stmt', ['if', 'then', 'stmt'])
|
|
63 g.add_production('if_stmt', ['if', 'then', 'stmt', 'else', 'stmt'])
|
|
64 g.add_production('stmt', ['if_stmt'])
|
|
65 g.add_production('stmt', ['ass'])
|
192
|
66 g.start_symbol = 'stmt'
|
341
|
67 p = g.generate_parser()
|
194
|
68 # Ambiguous program:
|
195
|
69 tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass'])
|
194
|
70 p.parse(tokens)
|
|
71
|
192
|
72 def testUndefinedTerminal(self):
|
|
73 """ Test correct behavior when a terminal is undefined """
|
194
|
74 g = Grammar(['b'])
|
192
|
75 g.add_production('goal', ['a'])
|
|
76 g.add_production('a', ['b'])
|
|
77 g.add_production('a', ['c'])
|
|
78 g.start_symbol = 'goal'
|
|
79 with self.assertRaises(ParserGenerationException):
|
341
|
80 g.generate_parser()
|
318
|
81
|
192
|
82 def testRedefineTerminal(self):
|
|
83 """ Test correct behavior when a terminal is redefined """
|
|
84 g = Grammar([EOF, 'b', 'c'])
|
|
85 g.add_production('goal', ['a'])
|
|
86 with self.assertRaises(ParserGenerationException):
|
|
87 g.add_production('b', ['c']) # Not allowed
|
|
88 g.add_production('a', ['c'])
|
|
89 g.start_symbol = 'goal'
|
341
|
90 g.generate_parser()
|
318
|
91
|
194
|
92 def testEmpty(self):
|
|
93 """ Test empty token stream """
|
|
94 g = Grammar([','])
|
|
95 g.add_production('input', [','])
|
|
96 g.start_symbol = 'input'
|
341
|
97 p = g.generate_parser()
|
194
|
98 tokens = genTokens([])
|
|
99 with self.assertRaises(ParserException):
|
|
100 p.parse(tokens)
|
318
|
101
|
194
|
102 def testEps(self):
|
|
103 """ Test epsilon terminal """
|
|
104 g = Grammar(['a', 'b'])
|
|
105 g.add_production('input', ['optional_a', 'b'])
|
|
106 g.add_production('optional_a', ['a'])
|
|
107 g.add_production('optional_a', [])
|
|
108 g.start_symbol = 'input'
|
341
|
109 p = g.generate_parser()
|
194
|
110 tokens = genTokens(['b'])
|
|
111 p.parse(tokens)
|
|
112
|
|
113 def testEps2(self):
|
|
114 g = Grammar(['id', ':'])
|
|
115 g.add_production('input', ['opt_lab', 'ins', 'op1'])
|
|
116 g.add_production('input', ['ins', 'op1'])
|
|
117 g.add_production('opt_lab', ['id', ':'])
|
|
118 g.add_production('ins', ['id'])
|
|
119 g.add_production('op1', ['id'])
|
|
120 g.start_symbol = 'input'
|
341
|
121 p = g.generate_parser()
|
194
|
122 tokens = genTokens(['id', ':', 'id', 'id']) # i.e. "lab_0: inc rax"
|
|
123 p.parse(tokens)
|
|
124 tokens = genTokens(['id', 'id']) # i.e. "inc rax"
|
|
125 p.parse(tokens)
|
|
126
|
319
|
127 def testEpsSequence(self):
|
|
128 """ Test epsilon terminal for use in sequences """
|
|
129 g = Grammar(['a'])
|
|
130 g.add_production('aas', [])
|
|
131 g.add_production('aas', ['aas', 'a'])
|
|
132 g.start_symbol = 'aas'
|
341
|
133 p = g.generate_parser()
|
319
|
134 tokens = genTokens(['a', 'a', 'a'])
|
|
135 p.parse(tokens)
|
|
136 tokens = genTokens([])
|
|
137 p.parse(tokens)
|
|
138
|
195
|
139 def test_cb(self):
|
|
140 """ Test callback of one rule and order or parameters """
|
|
141 self.cb_called = False
|
|
142 def cb(a, c, b):
|
|
143 self.cb_called = True
|
318
|
144 self.assertEqual(a.val, 'a')
|
|
145 self.assertEqual(b.val, 'b')
|
|
146 self.assertEqual(c.val, 'c')
|
195
|
147 g = Grammar(['a', 'b', 'c'])
|
|
148 g.add_production('goal', ['a', 'c', 'b'], cb)
|
|
149 g.start_symbol = 'goal'
|
341
|
150 p = g.generate_parser()
|
195
|
151 tokens = genTokens(['a', 'c', 'b'])
|
|
152 p.parse(tokens)
|
|
153 self.assertTrue(self.cb_called)
|
|
154
|
184
|
155
|
185
|
156 class testExpressionGrammar(unittest.TestCase):
|
|
157 def setUp(self):
|
|
158 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*', 'num'])
|
|
159 g.add_production('input', ['expression'])
|
|
160 g.add_production('expression', ['term'])
|
|
161 g.add_production('expression', ['expression', '+', 'term'])
|
|
162 g.add_production('term', ['factor'])
|
|
163 g.add_production('term', ['term', '*', 'factor'])
|
|
164 g.add_production('factor', ['(', 'expression', ')'])
|
|
165 g.add_production('factor', ['identifier'])
|
|
166 g.add_production('factor', ['num'])
|
|
167 g.start_symbol = 'input'
|
|
168 self.g = g
|
|
169
|
|
170 def testFirstSimpleGrammar(self):
|
|
171 # 1. define a simple grammar:
|
319
|
172 first = calculate_first_sets(self.g)
|
185
|
173 self.assertEqual(first['input'], {'identifier', '(', 'num'})
|
|
174 self.assertEqual(first['term'], {'identifier', '(', 'num'})
|
|
175
|
|
176 def testCanonical(self):
|
|
177 s0 = self.g.initialItemSet()
|
|
178 s, gt = self.g.genCanonicalSet(s0)
|
|
179 # Must result in 12 sets:
|
|
180 self.assertEqual(len(s), 24)
|
|
181
|
318
|
182
|
|
183 class testParserGenerator(unittest.TestCase):
|
184
|
184 """ Tests several parts of the parser generator """
|
|
185 def setUp(self):
|
|
186 g = Grammar(['(', ')'])
|
|
187 g.add_production('goal', ['list'])
|
|
188 g.add_production('list', ['list', 'pair'])
|
|
189 g.add_production('list', ['pair'])
|
|
190 g.add_production('pair', ['(', 'pair', ')'])
|
|
191 g.add_production('pair', ['(', ')'])
|
|
192 g.start_symbol = 'goal'
|
|
193 self.g = g
|
|
194
|
|
195 def testFirstSet(self):
|
|
196 for a in ['(', ')', EOF, 'EPS']:
|
|
197 self.assertEqual(self.g.first[a], {a})
|
|
198 for nt in ['list', 'pair', 'goal']:
|
|
199 self.assertEqual(self.g.first[nt], {'('})
|
|
200
|
|
201 def testInitItemSet(self):
|
|
202 p0, p1, p2, p3, p4 = self.g.productions
|
|
203 s0 = self.g.initialItemSet()
|
|
204 self.assertEqual(len(s0), 9) # 9 with the goal rule included!
|
|
205 self.assertIn(Item(p0, 0, EOF), s0)
|
|
206 self.assertIn(Item(p1, 0, EOF), s0)
|
|
207 self.assertIn(Item(p1, 0, '('), s0)
|
|
208 self.assertIn(Item(p2, 0, EOF), s0)
|
|
209 self.assertIn(Item(p2, 0, '('), s0)
|
|
210 self.assertIn(Item(p3, 0, EOF), s0)
|
|
211 self.assertIn(Item(p3, 0, '('), s0)
|
|
212 self.assertIn(Item(p4, 0, EOF), s0)
|
|
213 self.assertIn(Item(p4, 0, '('), s0)
|
|
214
|
|
215 def testCanonical(self):
|
|
216 s0 = self.g.initialItemSet()
|
|
217 s, gt = self.g.genCanonicalSet(s0)
|
|
218 # Must result in 12 sets:
|
|
219 self.assertEqual(len(s), 12)
|
|
220
|
|
221 def testClosure(self):
|
|
222 p0, p1, p2, p3, p4 = self.g.productions
|
|
223 s0 = set()
|
185
|
224 s0.add(Item(p0, 0, EOF))
|
184
|
225 self.assertEqual(len(s0), 1) # 1 rule
|
|
226 self.assertIn(Item(p0, 0, EOF), s0)
|
|
227
|
|
228 # Invoke closure on set:
|
|
229 s0 = self.g.closure(s0)
|
|
230 self.assertIn(Item(p0, 0, EOF), s0)
|
|
231 self.assertIn(Item(p1, 0, EOF), s0)
|
|
232 self.assertIn(Item(p1, 0, '('), s0)
|
|
233 self.assertIn(Item(p2, 0, EOF), s0)
|
|
234 self.assertIn(Item(p2, 0, '('), s0)
|
|
235 self.assertIn(Item(p3, 0, EOF), s0)
|
|
236 self.assertIn(Item(p3, 0, '('), s0)
|
|
237 self.assertIn(Item(p4, 0, EOF), s0)
|
|
238 self.assertIn(Item(p4, 0, '('), s0)
|
|
239
|
|
240 def testParser(self):
|
191
|
241 tokens = ['(', '(', ')', ')', '(', ')']
|
184
|
242 # 3. build parser:
|
341
|
243 p = self.g.generate_parser()
|
185
|
244 self.assertEqual(len(p.goto_table), 5)
|
|
245 self.assertEqual(len(p.action_table), 19)
|
|
246
|
184
|
247 # 4. feed input:
|
191
|
248 p.parse(genTokens(tokens))
|
184
|
249
|
|
250 if __name__ == '__main__':
|
|
251 unittest.main()
|