lcfOS: python/yacc.py comparison

comparison python/yacc.py @ 318:e84047f29c78

Add burg and yacc initial attempts

author	Windel Bouwman
date	Tue, 31 Dec 2013 12:38:15 +0100
parents
children	8d07a4254f04

comparison

equal deleted inserted replaced

-:e30a77ae359b
+:e84047f29c78
+#!/usr/bin/python
+""" Parser generator utility """
+import argparse
+import re
+import sys
+import datetime
+from pyyacc import Grammar, print_grammar
+class XaccLexer:
+def __init__(self):
+pass
+def feed(self, txt):
+# Create a regular expression for the lexing part:
+tok_spec = [
+('ID', r'[A-Za-z][A-Za-z\d_]*'),
+('STRING', r"'[^']*'"),
+('BRACEDCODE', r"\{[^\}]*\}"),
+('OTHER', r'[:;\|]'),
+('SKIP', r'[ ]')
+]
+tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+gettok = re.compile(tok_re).match
+lines = txt.split('\n')
+def tokenize_line(line):
+""" Generator that splits up a line into tokens """
+mo = gettok(line)
+pos = 0
+while mo:
+typ = mo.lastgroup
+val = mo.group(typ)
+if typ == 'ID':
+yield (typ, val)
+elif typ == 'STRING':
+typ = 'ID'
+yield (typ, val[1:-1])
+elif typ == 'OTHER':
+typ = val
+yield (typ, val)
+elif typ == 'BRACEDCODE':
+yield (typ, val)
+elif typ == 'SKIP':
+pass
+else:
+raise NotImplementedError(str(typ))
+pos = mo.end()
+mo = gettok(line, pos)
+if len(line) != pos:
+raise ParseError('Lex fault at {}'.format(line))
+def tokenize():
+section = 0
+for line in lines:
+line = line.strip()
+if not line:
+continue  # Skip empty lines
+if line == '%%':
+section += 1
+yield('%%', '%%')
+continue
+if section == 0:
+if line.startswith('%tokens'):
+yield('%tokens', '%tokens')
+yield from tokenize_line(line[7:])
+else:
+yield ('HEADER', line)
+elif section == 1:
+yield from tokenize_line(line)
+yield ('eof', 'eof')
+self.tokens = tokenize()
+self.token = self.tokens.__next__()
+def next_token(self):
+t = self.token
+if t[0] != 'eof':
+self.token = self.tokens.__next__()
+#print(t)
+return t
+class ParseError(Exception):
+pass
+class XaccParser:
+""" Implements a recursive descent parser to parse grammar rules.
+We could have made an generated parser, but that would yield a chicken
+egg issue.
+"""
+def __init__(self, lexer):
+self.lexer = lexer
+@property
+def Peak(self):
+""" Sneak peak to the next token in line """
+return self.lexer.token[0]
+def next_token(self):
+""" Take the next token """
+return self.lexer.next_token()
+def consume(self, typ):
+""" Eat next token of type typ or raise an exception """
+if self.Peak == typ:
+return self.next_token()
+else:
+raise ParseError('Expected {}, but got {}'.format(typ, self.Peak))
+def has_consumed(self, typ):
+""" Consume typ if possible and return true if so """
+if self.Peak == typ:
+self.consume(typ)
+return True
+return False
+def parse_grammar(self):
+""" Entry parse function into recursive descent parser """
+# parse header
+headers = []
+terminals = []
+while self.Peak in ['HEADER', '%tokens']:
+if self.Peak == '%tokens':
+self.consume('%tokens')
+while self.Peak == 'ID':
+terminals.append(self.consume('ID')[1])
+else:
+headers.append(self.consume('HEADER')[1])
+self.consume('%%')
+self.grammar = Grammar(terminals)
+while self.Peak != 'eof':
+self.parse_rule()
+return self.grammar
+def parse_symbol(self):
+return self.consume('ID')[1]
+def parse_rhs(self):
+symbols = []
+while self.Peak not in [';', 'BRACEDCODE', '|']:
+symbols.append(self.parse_symbol())
+if self.Peak == 'BRACEDCODE':
+action = self.consume('BRACEDCODE')[1]
+action = action[1:-1].strip()
+else:
+action = None
+return symbols, action
+def parse_rule(self):
+p = self.parse_symbol()
+self.consume(':')
+symbols, action = self.parse_rhs()
+self.grammar.add_production(p, symbols, action)
+while self.has_consumed('|'):
+symbols, action = self.parse_rhs()
+self.grammar.add_production(p, symbols, action)
+self.consume(';')
+class XaccGenerator:
+""" Generator that writes generated parser to file """
+def __init__(self):
+pass
+def generate(self, grammar, output_file):
+print_grammar(grammar)
+self.grammar = grammar
+self.action_table, self.goto_table = grammar.doGenerate()
+self.generate_python_script(output_file)
+def generate_python_script(self, f):
+""" Generate python script with the parser table """
+print('#!/usr/bin/python', file=f)
+stamp = datetime.datetime.now().ctime()
+print('""" Automatically generated by xacc on {} """'.format(stamp), file=f)
+print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar', file=f)
+print('from ppci import Token', file=f)
+print(file=f)
+print('class Parser(LRParser):', file=f)
+print('    def __init__(self):', file=f)
+# Generate rules:
+print('        self.start_symbol = "{}"'.format(self.grammar.start_symbol), file=f)
+print('        self.grammar = Grammar({})'.format(self.grammar.terminals), file=f)
+for rule_number, rule in enumerate(self.grammar.productions):
+rule.f_name = 'action_{}_{}'.format(rule.name, rule_number)
+print('        self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name), file=f)
+# Fill action table:
+print('        self.action_table = {}', file=f)
+for state in self.action_table:
+action = self.action_table[state]
+print('        self.action_table[{}] = {}'.format(state, action), file=f)
+print('', file=f)
+# Fill goto table:
+print('        self.goto_table = {}', file=f)
+for gt in self.goto_table:
+to = self.goto_table[gt]
+print('        self.goto_table[{}] = {}'.format(gt, to), file=f)
+print('', file=f)
+# Generate a function for each action:
+for rule in self.grammar.productions:
+M = len(rule.symbols)
+args = ', '.join('arg{}'.format(n + 1) for n in range(M))
+print('    def {}(self, {}):'.format(rule.f_name, args), file=f)
+if rule.f == None:
+semantics = 'pass'
+else:
+semantics = str(rule.f)
+if semantics.strip() == '':
+semantics = 'pass'
+for n in range(M):
+semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1))
+print('        {}'.format(semantics), file=f)
+print('', file=f)
+def main():
+# Parse arguments:
+parser = argparse.ArgumentParser(description='xacc compiler compiler')
+parser.add_argument('source', type=argparse.FileType('r'), \
+help='the parser specification')
+parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
+default=sys.stdout)
+args = parser.parse_args()
+src = args.source.read()
+args.source.close()
+# Construction of generator parts:
+lexer = XaccLexer()
+parser = XaccParser(lexer)
+generator = XaccGenerator()
+# Sequence source through the generator parts:
+lexer.feed(src)
+grammar = parser.parse_grammar()
+generator.generate(grammar, args.output)
+args.output.close()
+if __name__ == '__main__':
+main()

Mercurial > lcfOS

comparison python/yacc.py @ 318:e84047f29c78