Mercurial > lcfOS
view python/yacc.py @ 395:3b0c495e3008
Speed improvements
author | Windel Bouwman |
---|---|
date | Fri, 23 May 2014 14:28:03 +0200 |
parents | 173e20a47fda |
children | fb3c1f029b30 |
line wrap: on
line source
#!/usr/bin/python """ Parser generator utility. This script can generate a python script from a grammar description. Invoke the script on a grammar specification file: .. code:: $ ./yacc.py test.x -o test_parser.py And use the generated parser by deriving a user class: .. code:: import test_parser class MyParser(test_parser.Parser): pass p = MyParser() p.parse() Alternatively you can load the parser on the fly: .. code:: import yacc parser_mod = yacc.load_as_module('mygrammar.x') class MyParser(parser_mod.Parser): pass p = MyParser() p.parse() """ import argparse import re import sys import datetime import types import io import logging from pyyacc import Grammar from baselex import BaseLexer from ppci import Token class XaccLexer(BaseLexer): def __init__(self): tok_spec = [ ('ID', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)), ('STRING', r"'[^']*'", lambda typ, val: ('ID', val[1:-1])), ('BRACEDCODE', r"\{[^\}]*\}", lambda typ, val: (typ, val)), ('OTHER', r'[:;\|]', lambda typ, val: (val, val)), ('SKIP', r'[ ]', None) ] super().__init__(tok_spec) def tokenize(self, txt): lines = txt.split('\n') section = 0 for line in lines: line = line.strip() if not line: continue # Skip empty lines if line == '%%': section += 1 yield Token('%%', '%%') continue if section == 0: if line.startswith('%tokens'): yield Token('%tokens', '%tokens') for tk in super().tokenize(line[7:]): yield tk else: yield Token('HEADER', line) elif section == 1: for tk in super().tokenize(line): yield tk class ParseError(Exception): pass class XaccParser: """ Implements a recursive descent parser to parse grammar rules. We could have made an generated parser, but that would yield a chicken egg issue. """ def __init__(self): pass def prepare_peak(self, lexer): self.lexer = lexer self.look_ahead = self.lexer.next_token() @property def Peak(self): """ Sneak peak to the next token in line """ return self.look_ahead.typ def next_token(self): """ Take the next token """ token = self.look_ahead self.look_ahead = self.lexer.next_token() return token def consume(self, typ): """ Eat next token of type typ or raise an exception """ if self.Peak == typ: return self.next_token() else: raise ParseError('Expected {}, but got {}'.format(typ, self.Peak)) def has_consumed(self, typ): """ Consume typ if possible and return true if so """ if self.Peak == typ: self.consume(typ) return True return False def parse_grammar(self, lexer): """ Entry parse function into recursive descent parser """ self.prepare_peak(lexer) # parse header self.headers = [] terminals = [] while self.Peak in ['HEADER', '%tokens']: if self.Peak == '%tokens': self.consume('%tokens') while self.Peak == 'ID': terminals.append(self.consume('ID').val) else: self.headers.append(self.consume('HEADER').val) self.consume('%%') self.grammar = Grammar(terminals) while self.Peak != 'EOF': self.parse_rule() return self.grammar def parse_symbol(self): return self.consume('ID').val def parse_rhs(self): """ Parse the right hand side of a rule definition """ symbols = [] while self.Peak not in [';', 'BRACEDCODE', '|']: symbols.append(self.parse_symbol()) if self.Peak == 'BRACEDCODE': action = self.consume('BRACEDCODE').val action = action[1:-1].strip() else: action = None return symbols, action def parse_rule(self): """ Parse a rule definition """ p = self.parse_symbol() self.consume(':') symbols, action = self.parse_rhs() self.grammar.add_production(p, symbols, action) while self.has_consumed('|'): symbols, action = self.parse_rhs() self.grammar.add_production(p, symbols, action) self.consume(';') class XaccGenerator: """ Generator that writes generated parser to file """ def __init__(self): self.logger = logging.getLogger('yacc') def generate(self, grammar, headers, output_file): self.output_file = output_file self.grammar = grammar self.headers = headers self.logger.info('Generating parser for grammar {}'.format(grammar)) self.action_table, self.goto_table = grammar.generate_tables() self.generate_python_script() def print(self, *args): """ Print helper function that prints to output file """ print(*args, file=self.output_file) def generate_python_script(self): """ Generate python script with the parser table """ self.print('#!/usr/bin/python') stamp = datetime.datetime.now().ctime() self.print('""" Automatically generated by xacc on {} """'.format(stamp)) self.print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar') self.print('from ppci import Token') self.print('') for h in self.headers: print(h, file=output_file) self.print('') self.print('class Parser(LRParser):') self.print(' def __init__(self):') # Generate rules: self.print(' self.start_symbol = "{}"'.format(self.grammar.start_symbol)) self.print(' self.grammar = Grammar({})'.format(self.grammar.terminals)) for rule_number, rule in enumerate(self.grammar.productions): rule.f_name = 'action_{}_{}'.format(rule.name, rule_number) self.print(' self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name)) # Fill action table: self.print(' self.action_table = {}') for state in self.action_table: action = self.action_table[state] self.print(' self.action_table[{}] = {}'.format(state, action)) self.print('') # Fill goto table: self.print(' self.goto_table = {}') for state_number in self.goto_table: to = self.goto_table[state_number] self.print(' self.goto_table[{}] = {}'.format(state_number, to)) self.print('') # Generate a function for each action: for rule in self.grammar.productions: num_symbols = len(rule.symbols) args = ', '.join('arg{}'.format(n + 1) for n in range(num_symbols)) self.print(' def {}(self, {}):'.format(rule.f_name, args)) if rule.f == None: semantics = 'pass' else: semantics = str(rule.f) if semantics.strip() == '': semantics = 'pass' for n in range(num_symbols): semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1)) self.print(' {}'.format(semantics)) self.print('') def make_argument_parser(): # Parse arguments: parser = argparse.ArgumentParser(description='xacc compiler compiler') parser.add_argument('source', type=argparse.FileType('r'), \ help='the parser specification') parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ default=sys.stdout) return parser def load_as_module(filename): """ Load a parser spec file, generate LR tables and create module """ ob = io.StringIO() args = argparse.Namespace(source=open(filename), output=ob) main(args) parser_mod = types.ModuleType('generated_parser') exec(ob.getvalue(), parser_mod.__dict__) return parser_mod def main(args): src = args.source.read() args.source.close() # Construction of generator parts: lexer = XaccLexer() parser = XaccParser() generator = XaccGenerator() # Sequence source through the generator parts: lexer.feed(src) grammar = parser.parse_grammar(lexer) generator.generate(grammar, parser.headers, args.output) if __name__ == '__main__': args = make_argument_parser().parse_args() main(args)