comparison python/yacc.py @ 319:8d07a4254f04

Work on burg
author Windel Bouwman
date Sat, 18 Jan 2014 18:58:43 +0100
parents e84047f29c78
children 8c569fbe60e4
comparison
equal deleted inserted replaced
318:e84047f29c78 319:8d07a4254f04
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 """ Parser generator utility """ 3 """
4 Parser generator utility. This script can generate a python script from a
5 grammar description.
6
7 Invoke the script on a grammar specification file:
8
9 .. code::
10
11 $ ./yacc.py test.x -o test_parser.py
12
13 And use the generated parser by deriving a user class:
14
15
16 .. code::
17
18 import test_parser
19 class MyParser(test_parser.Parser):
20 pass
21 p = MyParser()
22 p.parse()
23
24
25 """
4 26
5 import argparse 27 import argparse
6 import re 28 import re
7 import sys 29 import sys
8 import datetime 30 import datetime
129 while self.Peak == 'ID': 151 while self.Peak == 'ID':
130 terminals.append(self.consume('ID')[1]) 152 terminals.append(self.consume('ID')[1])
131 else: 153 else:
132 headers.append(self.consume('HEADER')[1]) 154 headers.append(self.consume('HEADER')[1])
133 self.consume('%%') 155 self.consume('%%')
156 self.headers = headers
134 self.grammar = Grammar(terminals) 157 self.grammar = Grammar(terminals)
135 while self.Peak != 'eof': 158 while self.Peak != 'eof':
136 self.parse_rule() 159 self.parse_rule()
137 return self.grammar 160 return self.grammar
138 161
139 def parse_symbol(self): 162 def parse_symbol(self):
140 return self.consume('ID')[1] 163 return self.consume('ID')[1]
141 164
142 def parse_rhs(self): 165 def parse_rhs(self):
166 """ Parse the right hand side of a rule definition """
143 symbols = [] 167 symbols = []
144 while self.Peak not in [';', 'BRACEDCODE', '|']: 168 while self.Peak not in [';', 'BRACEDCODE', '|']:
145 symbols.append(self.parse_symbol()) 169 symbols.append(self.parse_symbol())
146 if self.Peak == 'BRACEDCODE': 170 if self.Peak == 'BRACEDCODE':
147 action = self.consume('BRACEDCODE')[1] 171 action = self.consume('BRACEDCODE')[1]
149 else: 173 else:
150 action = None 174 action = None
151 return symbols, action 175 return symbols, action
152 176
153 def parse_rule(self): 177 def parse_rule(self):
178 """ Parse a rule definition """
154 p = self.parse_symbol() 179 p = self.parse_symbol()
155 self.consume(':') 180 self.consume(':')
156 symbols, action = self.parse_rhs() 181 symbols, action = self.parse_rhs()
157 self.grammar.add_production(p, symbols, action) 182 self.grammar.add_production(p, symbols, action)
158 while self.has_consumed('|'): 183 while self.has_consumed('|'):
164 class XaccGenerator: 189 class XaccGenerator:
165 """ Generator that writes generated parser to file """ 190 """ Generator that writes generated parser to file """
166 def __init__(self): 191 def __init__(self):
167 pass 192 pass
168 193
169 def generate(self, grammar, output_file): 194 def generate(self, grammar, headers, output_file):
170 print_grammar(grammar) 195 print_grammar(grammar)
171 self.grammar = grammar 196 self.grammar = grammar
197 self.headers = headers
172 self.action_table, self.goto_table = grammar.doGenerate() 198 self.action_table, self.goto_table = grammar.doGenerate()
173 self.generate_python_script(output_file) 199 self.generate_python_script(output_file)
174 200
175 def generate_python_script(self, f): 201 def generate_python_script(self, output_file):
176 """ Generate python script with the parser table """ 202 """ Generate python script with the parser table """
177 print('#!/usr/bin/python', file=f) 203 print('#!/usr/bin/python', file=output_file)
178 stamp = datetime.datetime.now().ctime() 204 stamp = datetime.datetime.now().ctime()
179 print('""" Automatically generated by xacc on {} """'.format(stamp), file=f) 205 print('""" Automatically generated by xacc on {} """'.format(stamp), file=output_file)
180 print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar', file=f) 206 print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar', file=output_file)
181 print('from ppci import Token', file=f) 207 print('from ppci import Token', file=output_file)
182 print(file=f) 208 print(file=output_file)
183 print('class Parser(LRParser):', file=f) 209 for h in self.headers:
184 print(' def __init__(self):', file=f) 210 print(h, file=output_file)
211 print(file=output_file)
212 print('class Parser(LRParser):', file=output_file)
213 print(' def __init__(self):', file=output_file)
185 # Generate rules: 214 # Generate rules:
186 print(' self.start_symbol = "{}"'.format(self.grammar.start_symbol), file=f) 215 print(' self.start_symbol = "{}"'.format(self.grammar.start_symbol), file=output_file)
187 print(' self.grammar = Grammar({})'.format(self.grammar.terminals), file=f) 216 print(' self.grammar = Grammar({})'.format(self.grammar.terminals), file=output_file)
188 for rule_number, rule in enumerate(self.grammar.productions): 217 for rule_number, rule in enumerate(self.grammar.productions):
189 rule.f_name = 'action_{}_{}'.format(rule.name, rule_number) 218 rule.f_name = 'action_{}_{}'.format(rule.name, rule_number)
190 print(' self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name), file=f) 219 print(' self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name), file=output_file)
191 # Fill action table: 220 # Fill action table:
192 print(' self.action_table = {}', file=f) 221 print(' self.action_table = {}', file=output_file)
193 for state in self.action_table: 222 for state in self.action_table:
194 action = self.action_table[state] 223 action = self.action_table[state]
195 print(' self.action_table[{}] = {}'.format(state, action), file=f) 224 print(' self.action_table[{}] = {}'.format(state, action), file=output_file)
196 print('', file=f) 225 print('', file=output_file)
197 226
198 # Fill goto table: 227 # Fill goto table:
199 print(' self.goto_table = {}', file=f) 228 print(' self.goto_table = {}', file=output_file)
200 for gt in self.goto_table: 229 for gt in self.goto_table:
201 to = self.goto_table[gt] 230 to = self.goto_table[gt]
202 print(' self.goto_table[{}] = {}'.format(gt, to), file=f) 231 print(' self.goto_table[{}] = {}'.format(gt, to), file=output_file)
203 print('', file=f) 232 print('', file=output_file)
204 233
205 # Generate a function for each action: 234 # Generate a function for each action:
206 for rule in self.grammar.productions: 235 for rule in self.grammar.productions:
207 M = len(rule.symbols) 236 M = len(rule.symbols)
208 args = ', '.join('arg{}'.format(n + 1) for n in range(M)) 237 args = ', '.join('arg{}'.format(n + 1) for n in range(M))
209 print(' def {}(self, {}):'.format(rule.f_name, args), file=f) 238 print(' def {}(self, {}):'.format(rule.f_name, args), file=output_file)
210 if rule.f == None: 239 if rule.f == None:
211 semantics = 'pass' 240 semantics = 'pass'
212 else: 241 else:
213 semantics = str(rule.f) 242 semantics = str(rule.f)
214 if semantics.strip() == '': 243 if semantics.strip() == '':
215 semantics = 'pass' 244 semantics = 'pass'
216 for n in range(M): 245 for n in range(M):
217 semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1)) 246 semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1))
218 print(' {}'.format(semantics), file=f) 247 print(' {}'.format(semantics), file=output_file)
219 print('', file=f)
220 248
221 249
222 def main(): 250 def main():
223 # Parse arguments: 251 # Parse arguments:
224 parser = argparse.ArgumentParser(description='xacc compiler compiler') 252 parser = argparse.ArgumentParser(description='xacc compiler compiler')
236 generator = XaccGenerator() 264 generator = XaccGenerator()
237 265
238 # Sequence source through the generator parts: 266 # Sequence source through the generator parts:
239 lexer.feed(src) 267 lexer.feed(src)
240 grammar = parser.parse_grammar() 268 grammar = parser.parse_grammar()
241 generator.generate(grammar, args.output) 269 generator.generate(grammar, parser.headers, args.output)
242 args.output.close() 270 args.output.close()
243 271
244 272
245 if __name__ == '__main__': 273 if __name__ == '__main__':
246 main() 274 main()