318
|
1 #!/usr/bin/python
|
|
2
|
319
|
3 """
|
334
|
4 Parser generator utility. This script can generate a python script from a
|
319
|
5 grammar description.
|
|
6
|
|
7 Invoke the script on a grammar specification file:
|
|
8
|
|
9 .. code::
|
|
10
|
|
11 $ ./yacc.py test.x -o test_parser.py
|
|
12
|
|
13 And use the generated parser by deriving a user class:
|
|
14
|
|
15
|
|
16 .. code::
|
|
17
|
|
18 import test_parser
|
|
19 class MyParser(test_parser.Parser):
|
|
20 pass
|
|
21 p = MyParser()
|
|
22 p.parse()
|
|
23
|
|
24
|
321
|
25 Alternatively you can load the parser on the fly:
|
|
26
|
|
27 .. code::
|
|
28
|
|
29 import yacc
|
|
30 parser_mod = yacc.load_as_module('mygrammar.x')
|
|
31 class MyParser(parser_mod.Parser):
|
|
32 pass
|
|
33 p = MyParser()
|
|
34 p.parse()
|
|
35
|
319
|
36 """
|
318
|
37
|
|
38 import argparse
|
|
39 import re
|
|
40 import sys
|
|
41 import datetime
|
321
|
42 import types
|
|
43 import io
|
323
|
44 import logging
|
334
|
45 from pyyacc import Grammar
|
383
|
46 from baselex import BaseLexer
|
396
|
47 from ppci import Token, SourceLocation
|
318
|
48
|
|
49
|
383
|
50 class XaccLexer(BaseLexer):
|
318
|
51 def __init__(self):
|
|
52 tok_spec = [
|
383
|
53 ('ID', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)),
|
|
54 ('STRING', r"'[^']*'", lambda typ, val: ('ID', val[1:-1])),
|
|
55 ('BRACEDCODE', r"\{[^\}]*\}", lambda typ, val: (typ, val)),
|
|
56 ('OTHER', r'[:;\|]', lambda typ, val: (val, val)),
|
|
57 ('SKIP', r'[ ]', None)
|
318
|
58 ]
|
383
|
59 super().__init__(tok_spec)
|
318
|
60
|
383
|
61 def tokenize(self, txt):
|
|
62 lines = txt.split('\n')
|
|
63 section = 0
|
|
64 for line in lines:
|
|
65 line = line.strip()
|
396
|
66 loc = SourceLocation(self.filename, 0, 0, 0)
|
383
|
67 if not line:
|
|
68 continue # Skip empty lines
|
|
69 if line == '%%':
|
|
70 section += 1
|
396
|
71 yield Token('%%', '%%', loc)
|
383
|
72 continue
|
|
73 if section == 0:
|
|
74 if line.startswith('%tokens'):
|
396
|
75 yield Token('%tokens', '%tokens', loc)
|
383
|
76 for tk in super().tokenize(line[7:]):
|
|
77 yield tk
|
318
|
78 else:
|
396
|
79 yield Token('HEADER', line, loc)
|
383
|
80 elif section == 1:
|
|
81 for tk in super().tokenize(line):
|
|
82 yield tk
|
318
|
83
|
|
84
|
|
85 class ParseError(Exception):
|
|
86 pass
|
|
87
|
|
88
|
|
89 class XaccParser:
|
|
90 """ Implements a recursive descent parser to parse grammar rules.
|
|
91 We could have made an generated parser, but that would yield a chicken
|
|
92 egg issue.
|
|
93 """
|
383
|
94 def __init__(self):
|
|
95 pass
|
|
96
|
|
97 def prepare_peak(self, lexer):
|
318
|
98 self.lexer = lexer
|
383
|
99 self.look_ahead = self.lexer.next_token()
|
318
|
100
|
|
101 @property
|
|
102 def Peak(self):
|
|
103 """ Sneak peak to the next token in line """
|
383
|
104 return self.look_ahead.typ
|
318
|
105
|
|
106 def next_token(self):
|
|
107 """ Take the next token """
|
383
|
108 token = self.look_ahead
|
|
109 self.look_ahead = self.lexer.next_token()
|
|
110 return token
|
318
|
111
|
|
112 def consume(self, typ):
|
|
113 """ Eat next token of type typ or raise an exception """
|
|
114 if self.Peak == typ:
|
|
115 return self.next_token()
|
|
116 else:
|
|
117 raise ParseError('Expected {}, but got {}'.format(typ, self.Peak))
|
|
118
|
|
119 def has_consumed(self, typ):
|
|
120 """ Consume typ if possible and return true if so """
|
|
121 if self.Peak == typ:
|
|
122 self.consume(typ)
|
|
123 return True
|
|
124 return False
|
|
125
|
383
|
126 def parse_grammar(self, lexer):
|
318
|
127 """ Entry parse function into recursive descent parser """
|
383
|
128 self.prepare_peak(lexer)
|
318
|
129 # parse header
|
383
|
130 self.headers = []
|
318
|
131 terminals = []
|
|
132 while self.Peak in ['HEADER', '%tokens']:
|
|
133 if self.Peak == '%tokens':
|
|
134 self.consume('%tokens')
|
|
135 while self.Peak == 'ID':
|
383
|
136 terminals.append(self.consume('ID').val)
|
318
|
137 else:
|
383
|
138 self.headers.append(self.consume('HEADER').val)
|
318
|
139 self.consume('%%')
|
|
140 self.grammar = Grammar(terminals)
|
383
|
141 while self.Peak != 'EOF':
|
318
|
142 self.parse_rule()
|
|
143 return self.grammar
|
|
144
|
|
145 def parse_symbol(self):
|
383
|
146 return self.consume('ID').val
|
318
|
147
|
|
148 def parse_rhs(self):
|
319
|
149 """ Parse the right hand side of a rule definition """
|
318
|
150 symbols = []
|
|
151 while self.Peak not in [';', 'BRACEDCODE', '|']:
|
|
152 symbols.append(self.parse_symbol())
|
|
153 if self.Peak == 'BRACEDCODE':
|
383
|
154 action = self.consume('BRACEDCODE').val
|
318
|
155 action = action[1:-1].strip()
|
|
156 else:
|
|
157 action = None
|
|
158 return symbols, action
|
|
159
|
|
160 def parse_rule(self):
|
319
|
161 """ Parse a rule definition """
|
318
|
162 p = self.parse_symbol()
|
|
163 self.consume(':')
|
|
164 symbols, action = self.parse_rhs()
|
|
165 self.grammar.add_production(p, symbols, action)
|
|
166 while self.has_consumed('|'):
|
|
167 symbols, action = self.parse_rhs()
|
|
168 self.grammar.add_production(p, symbols, action)
|
|
169 self.consume(';')
|
|
170
|
|
171
|
|
172 class XaccGenerator:
|
|
173 """ Generator that writes generated parser to file """
|
|
174 def __init__(self):
|
323
|
175 self.logger = logging.getLogger('yacc')
|
318
|
176
|
319
|
177 def generate(self, grammar, headers, output_file):
|
321
|
178 self.output_file = output_file
|
318
|
179 self.grammar = grammar
|
319
|
180 self.headers = headers
|
323
|
181 self.logger.info('Generating parser for grammar {}'.format(grammar))
|
340
|
182 self.action_table, self.goto_table = grammar.generate_tables()
|
321
|
183 self.generate_python_script()
|
318
|
184
|
321
|
185 def print(self, *args):
|
|
186 """ Print helper function that prints to output file """
|
|
187 print(*args, file=self.output_file)
|
|
188
|
|
189 def generate_python_script(self):
|
318
|
190 """ Generate python script with the parser table """
|
321
|
191 self.print('#!/usr/bin/python')
|
318
|
192 stamp = datetime.datetime.now().ctime()
|
321
|
193 self.print('""" Automatically generated by xacc on {} """'.format(stamp))
|
|
194 self.print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar')
|
|
195 self.print('from ppci import Token')
|
|
196 self.print('')
|
319
|
197 for h in self.headers:
|
|
198 print(h, file=output_file)
|
321
|
199 self.print('')
|
|
200 self.print('class Parser(LRParser):')
|
|
201 self.print(' def __init__(self):')
|
318
|
202 # Generate rules:
|
321
|
203 self.print(' self.start_symbol = "{}"'.format(self.grammar.start_symbol))
|
|
204 self.print(' self.grammar = Grammar({})'.format(self.grammar.terminals))
|
318
|
205 for rule_number, rule in enumerate(self.grammar.productions):
|
|
206 rule.f_name = 'action_{}_{}'.format(rule.name, rule_number)
|
321
|
207 self.print(' self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name))
|
318
|
208 # Fill action table:
|
321
|
209 self.print(' self.action_table = {}')
|
318
|
210 for state in self.action_table:
|
|
211 action = self.action_table[state]
|
321
|
212 self.print(' self.action_table[{}] = {}'.format(state, action))
|
|
213 self.print('')
|
318
|
214
|
|
215 # Fill goto table:
|
321
|
216 self.print(' self.goto_table = {}')
|
334
|
217 for state_number in self.goto_table:
|
|
218 to = self.goto_table[state_number]
|
|
219 self.print(' self.goto_table[{}] = {}'.format(state_number, to))
|
321
|
220 self.print('')
|
318
|
221
|
|
222 # Generate a function for each action:
|
|
223 for rule in self.grammar.productions:
|
334
|
224 num_symbols = len(rule.symbols)
|
|
225 args = ', '.join('arg{}'.format(n + 1) for n in range(num_symbols))
|
321
|
226 self.print(' def {}(self, {}):'.format(rule.f_name, args))
|
318
|
227 if rule.f == None:
|
|
228 semantics = 'pass'
|
|
229 else:
|
|
230 semantics = str(rule.f)
|
|
231 if semantics.strip() == '':
|
|
232 semantics = 'pass'
|
334
|
233 for n in range(num_symbols):
|
318
|
234 semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1))
|
321
|
235 self.print(' {}'.format(semantics))
|
322
|
236 self.print('')
|
318
|
237
|
|
238
|
321
|
239 def make_argument_parser():
|
318
|
240 # Parse arguments:
|
|
241 parser = argparse.ArgumentParser(description='xacc compiler compiler')
|
|
242 parser.add_argument('source', type=argparse.FileType('r'), \
|
|
243 help='the parser specification')
|
|
244 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
|
|
245 default=sys.stdout)
|
322
|
246 return parser
|
321
|
247
|
|
248
|
|
249 def load_as_module(filename):
|
|
250 """ Load a parser spec file, generate LR tables and create module """
|
|
251 ob = io.StringIO()
|
|
252 args = argparse.Namespace(source=open(filename), output=ob)
|
|
253 main(args)
|
|
254
|
|
255 parser_mod = types.ModuleType('generated_parser')
|
|
256 exec(ob.getvalue(), parser_mod.__dict__)
|
|
257 return parser_mod
|
|
258
|
342
|
259
|
321
|
260 def main(args):
|
318
|
261 src = args.source.read()
|
|
262 args.source.close()
|
|
263
|
|
264 # Construction of generator parts:
|
|
265 lexer = XaccLexer()
|
383
|
266 parser = XaccParser()
|
318
|
267 generator = XaccGenerator()
|
|
268
|
|
269 # Sequence source through the generator parts:
|
|
270 lexer.feed(src)
|
383
|
271 grammar = parser.parse_grammar(lexer)
|
319
|
272 generator.generate(grammar, parser.headers, args.output)
|
318
|
273
|
|
274
|
|
275 if __name__ == '__main__':
|
321
|
276 args = make_argument_parser().parse_args()
|
|
277 main(args)
|