318
|
1 import unittest
|
|
2 from pyyacc import Grammar, Item, ParserGenerationException, ParserException
|
319
|
3 from pyyacc import EPS, EOF, calculate_first_sets
|
191
|
4 from ppci import Token
|
184
|
5
|
318
|
6
|
|
7 class genTokens:
|
|
8 def __init__(self, lst):
|
|
9 def tokGen():
|
|
10 for t in lst:
|
|
11 yield Token(t, t)
|
|
12 while True:
|
|
13 yield Token(EOF, EOF)
|
|
14 self.tokens = tokGen()
|
|
15 self.token = self.tokens.__next__()
|
|
16
|
|
17 def next_token(self):
|
|
18 t = self.token
|
|
19 if t.typ != EOF:
|
|
20 self.token = self.tokens.__next__()
|
|
21 return t
|
|
22
|
184
|
23
|
|
24 class testLR(unittest.TestCase):
|
194
|
25 """ Test basic LR(1) parser generator constructs """
|
184
|
26 def testSimpleGrammar(self):
|
|
27 # 1. define a simple grammar:
|
194
|
28 g = Grammar(['identifier', '(', ')', '+', '*'])
|
184
|
29 g.add_production('input', ['expression'])
|
|
30 g.add_production('expression', ['term'])
|
|
31 g.add_production('expression', ['expression', '+', 'term'])
|
|
32 g.add_production('term', ['factor'])
|
|
33 g.add_production('term', ['term', '*', 'factor'])
|
|
34 g.add_production('factor', ['(', 'expression', ')'])
|
|
35 g.add_production('factor', ['identifier'])
|
|
36 g.start_symbol = 'input'
|
|
37 # 2. define input:
|
191
|
38 tokens = genTokens(['identifier', '+', 'identifier', '+', 'identifier'])
|
184
|
39 # 3. build parser:
|
341
|
40 p = g.generate_parser()
|
184
|
41 # 4. feed input:
|
|
42 p.parse(tokens)
|
318
|
43
|
192
|
44 def testReduceReduceConflict(self):
|
|
45 """ Check if a reduce-reduce conflict is detected """
|
|
46 # Define a grammar with an obvious reduce-reduce conflict:
|
194
|
47 g = Grammar(['id'])
|
192
|
48 g.add_production('goal', ['a'])
|
|
49 g.add_production('a', ['b'])
|
|
50 g.add_production('a', ['c'])
|
|
51 g.add_production('b', ['id'])
|
|
52 g.add_production('c', ['id'])
|
|
53 g.start_symbol = 'goal'
|
|
54 with self.assertRaises(ParserGenerationException):
|
341
|
55 p = g.generate_parser()
|
318
|
56
|
192
|
57 def testShiftReduceConflict(self):
|
194
|
58 """ Must be handled automatically by doing shift """
|
|
59 g = Grammar([EOF, 'if', 'then', 'else', 'ass'])
|
|
60 # Ambiguous grammar:
|
|
61 g.add_production('if_stmt', ['if', 'then', 'stmt'])
|
|
62 g.add_production('if_stmt', ['if', 'then', 'stmt', 'else', 'stmt'])
|
|
63 g.add_production('stmt', ['if_stmt'])
|
|
64 g.add_production('stmt', ['ass'])
|
192
|
65 g.start_symbol = 'stmt'
|
341
|
66 p = g.generate_parser()
|
194
|
67 # Ambiguous program:
|
195
|
68 tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass'])
|
194
|
69 p.parse(tokens)
|
|
70
|
192
|
71 def testUndefinedTerminal(self):
|
|
72 """ Test correct behavior when a terminal is undefined """
|
194
|
73 g = Grammar(['b'])
|
192
|
74 g.add_production('goal', ['a'])
|
|
75 g.add_production('a', ['b'])
|
|
76 g.add_production('a', ['c'])
|
|
77 g.start_symbol = 'goal'
|
|
78 with self.assertRaises(ParserGenerationException):
|
341
|
79 g.generate_parser()
|
318
|
80
|
192
|
81 def testRedefineTerminal(self):
|
|
82 """ Test correct behavior when a terminal is redefined """
|
|
83 g = Grammar([EOF, 'b', 'c'])
|
|
84 g.add_production('goal', ['a'])
|
|
85 with self.assertRaises(ParserGenerationException):
|
|
86 g.add_production('b', ['c']) # Not allowed
|
|
87 g.add_production('a', ['c'])
|
|
88 g.start_symbol = 'goal'
|
341
|
89 g.generate_parser()
|
318
|
90
|
194
|
91 def testEmpty(self):
|
|
92 """ Test empty token stream """
|
|
93 g = Grammar([','])
|
|
94 g.add_production('input', [','])
|
|
95 g.start_symbol = 'input'
|
341
|
96 p = g.generate_parser()
|
194
|
97 tokens = genTokens([])
|
|
98 with self.assertRaises(ParserException):
|
|
99 p.parse(tokens)
|
318
|
100
|
194
|
101 def testEps(self):
|
|
102 """ Test epsilon terminal """
|
|
103 g = Grammar(['a', 'b'])
|
|
104 g.add_production('input', ['optional_a', 'b'])
|
|
105 g.add_production('optional_a', ['a'])
|
|
106 g.add_production('optional_a', [])
|
|
107 g.start_symbol = 'input'
|
341
|
108 p = g.generate_parser()
|
194
|
109 tokens = genTokens(['b'])
|
|
110 p.parse(tokens)
|
|
111
|
|
112 def testEps2(self):
|
|
113 g = Grammar(['id', ':'])
|
|
114 g.add_production('input', ['opt_lab', 'ins', 'op1'])
|
|
115 g.add_production('input', ['ins', 'op1'])
|
|
116 g.add_production('opt_lab', ['id', ':'])
|
|
117 g.add_production('ins', ['id'])
|
|
118 g.add_production('op1', ['id'])
|
|
119 g.start_symbol = 'input'
|
341
|
120 p = g.generate_parser()
|
194
|
121 tokens = genTokens(['id', ':', 'id', 'id']) # i.e. "lab_0: inc rax"
|
|
122 p.parse(tokens)
|
|
123 tokens = genTokens(['id', 'id']) # i.e. "inc rax"
|
|
124 p.parse(tokens)
|
|
125
|
319
|
126 def testEpsSequence(self):
|
|
127 """ Test epsilon terminal for use in sequences """
|
|
128 g = Grammar(['a'])
|
|
129 g.add_production('aas', [])
|
|
130 g.add_production('aas', ['aas', 'a'])
|
|
131 g.start_symbol = 'aas'
|
341
|
132 p = g.generate_parser()
|
319
|
133 tokens = genTokens(['a', 'a', 'a'])
|
|
134 p.parse(tokens)
|
|
135 tokens = genTokens([])
|
|
136 p.parse(tokens)
|
|
137
|
195
|
138 def test_cb(self):
|
|
139 """ Test callback of one rule and order or parameters """
|
|
140 self.cb_called = False
|
|
141 def cb(a, c, b):
|
|
142 self.cb_called = True
|
318
|
143 self.assertEqual(a.val, 'a')
|
|
144 self.assertEqual(b.val, 'b')
|
|
145 self.assertEqual(c.val, 'c')
|
195
|
146 g = Grammar(['a', 'b', 'c'])
|
|
147 g.add_production('goal', ['a', 'c', 'b'], cb)
|
|
148 g.start_symbol = 'goal'
|
341
|
149 p = g.generate_parser()
|
195
|
150 tokens = genTokens(['a', 'c', 'b'])
|
|
151 p.parse(tokens)
|
|
152 self.assertTrue(self.cb_called)
|
|
153
|
184
|
154
|
185
|
155 class testExpressionGrammar(unittest.TestCase):
|
|
156 def setUp(self):
|
|
157 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*', 'num'])
|
|
158 g.add_production('input', ['expression'])
|
|
159 g.add_production('expression', ['term'])
|
|
160 g.add_production('expression', ['expression', '+', 'term'])
|
|
161 g.add_production('term', ['factor'])
|
|
162 g.add_production('term', ['term', '*', 'factor'])
|
|
163 g.add_production('factor', ['(', 'expression', ')'])
|
|
164 g.add_production('factor', ['identifier'])
|
|
165 g.add_production('factor', ['num'])
|
|
166 g.start_symbol = 'input'
|
|
167 self.g = g
|
|
168
|
|
169 def testFirstSimpleGrammar(self):
|
|
170 # 1. define a simple grammar:
|
319
|
171 first = calculate_first_sets(self.g)
|
185
|
172 self.assertEqual(first['input'], {'identifier', '(', 'num'})
|
|
173 self.assertEqual(first['term'], {'identifier', '(', 'num'})
|
|
174
|
|
175 def testCanonical(self):
|
|
176 s0 = self.g.initialItemSet()
|
|
177 s, gt = self.g.genCanonicalSet(s0)
|
|
178 # Must result in 12 sets:
|
|
179 self.assertEqual(len(s), 24)
|
|
180
|
318
|
181
|
|
182 class testParserGenerator(unittest.TestCase):
|
184
|
183 """ Tests several parts of the parser generator """
|
|
184 def setUp(self):
|
|
185 g = Grammar(['(', ')'])
|
|
186 g.add_production('goal', ['list'])
|
|
187 g.add_production('list', ['list', 'pair'])
|
|
188 g.add_production('list', ['pair'])
|
|
189 g.add_production('pair', ['(', 'pair', ')'])
|
|
190 g.add_production('pair', ['(', ')'])
|
|
191 g.start_symbol = 'goal'
|
|
192 self.g = g
|
|
193
|
|
194 def testFirstSet(self):
|
|
195 for a in ['(', ')', EOF, 'EPS']:
|
|
196 self.assertEqual(self.g.first[a], {a})
|
|
197 for nt in ['list', 'pair', 'goal']:
|
|
198 self.assertEqual(self.g.first[nt], {'('})
|
|
199
|
|
200 def testInitItemSet(self):
|
|
201 p0, p1, p2, p3, p4 = self.g.productions
|
|
202 s0 = self.g.initialItemSet()
|
|
203 self.assertEqual(len(s0), 9) # 9 with the goal rule included!
|
|
204 self.assertIn(Item(p0, 0, EOF), s0)
|
|
205 self.assertIn(Item(p1, 0, EOF), s0)
|
|
206 self.assertIn(Item(p1, 0, '('), s0)
|
|
207 self.assertIn(Item(p2, 0, EOF), s0)
|
|
208 self.assertIn(Item(p2, 0, '('), s0)
|
|
209 self.assertIn(Item(p3, 0, EOF), s0)
|
|
210 self.assertIn(Item(p3, 0, '('), s0)
|
|
211 self.assertIn(Item(p4, 0, EOF), s0)
|
|
212 self.assertIn(Item(p4, 0, '('), s0)
|
|
213
|
|
214 def testCanonical(self):
|
|
215 s0 = self.g.initialItemSet()
|
|
216 s, gt = self.g.genCanonicalSet(s0)
|
|
217 # Must result in 12 sets:
|
|
218 self.assertEqual(len(s), 12)
|
|
219
|
|
220 def testClosure(self):
|
|
221 p0, p1, p2, p3, p4 = self.g.productions
|
|
222 s0 = set()
|
185
|
223 s0.add(Item(p0, 0, EOF))
|
184
|
224 self.assertEqual(len(s0), 1) # 1 rule
|
|
225 self.assertIn(Item(p0, 0, EOF), s0)
|
|
226
|
|
227 # Invoke closure on set:
|
|
228 s0 = self.g.closure(s0)
|
|
229 self.assertIn(Item(p0, 0, EOF), s0)
|
|
230 self.assertIn(Item(p1, 0, EOF), s0)
|
|
231 self.assertIn(Item(p1, 0, '('), s0)
|
|
232 self.assertIn(Item(p2, 0, EOF), s0)
|
|
233 self.assertIn(Item(p2, 0, '('), s0)
|
|
234 self.assertIn(Item(p3, 0, EOF), s0)
|
|
235 self.assertIn(Item(p3, 0, '('), s0)
|
|
236 self.assertIn(Item(p4, 0, EOF), s0)
|
|
237 self.assertIn(Item(p4, 0, '('), s0)
|
|
238
|
|
239 def testParser(self):
|
191
|
240 tokens = ['(', '(', ')', ')', '(', ')']
|
184
|
241 # 3. build parser:
|
341
|
242 p = self.g.generate_parser()
|
185
|
243 self.assertEqual(len(p.goto_table), 5)
|
|
244 self.assertEqual(len(p.action_table), 19)
|
|
245
|
184
|
246 # 4. feed input:
|
191
|
247 p.parse(genTokens(tokens))
|
184
|
248
|
|
249 if __name__ == '__main__':
|
|
250 unittest.main()
|