Mercurial > lcfOS
view python/pyburg.py @ 318:e84047f29c78
Add burg and yacc initial attempts
author | Windel Bouwman |
---|---|
date | Tue, 31 Dec 2013 12:38:15 +0100 |
parents | |
children | 8d07a4254f04 |
line wrap: on
line source
#!/usr/bin/python """ Bottom up rewrite generator in python """ import sys import re import argparse from ppci import Token import burg_parser class BurgLexer: def feed(self, txt): tok_spec = [ ('ID', r'[A-Za-z][A-Za-z\d_]*'), ('STRING', r"'[^']*'"), ('BRACEDCODE', r"\{[^\}]*\}"), ('OTHER', r'[:;\|]'), ('SKIP', r'[ ]') ] tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) gettok = re.compile(tok_re).match lines = txt.split('\n') def tokenize_line(line): """ Generator that splits up a line into tokens """ mo = gettok(line) pos = 0 while mo: typ = mo.lastgroup val = mo.group(typ) if typ == 'ID': yield Token(typ, val) elif typ == 'STRING': typ = 'ID' yield Token(typ, val[1:-1]) elif typ == 'OTHER': typ = val yield Token(typ, val) elif typ == 'BRACEDCODE': yield Token(typ, val) elif typ == 'SKIP': pass else: raise NotImplementedError(str(typ)) pos = mo.end() mo = gettok(line, pos) if len(line) != pos: raise ParseError('Lex fault at {}'.format(line)) def tokenize(): section = 0 for line in lines: line = line.strip() if not line: continue # Skip empty lines if line == '%%': section += 1 yield Token('%%', '%%') continue if section == 0: if line.startswith('%tokens'): yield Token('%tokens', '%tokens') yield from tokenize_line(line[7:]) else: yield Token('HEADER', line) elif section == 1: yield from tokenize_line(line) yield Token('eof', 'eof') self.tokens = tokenize() self.token = self.tokens.__next__() def next_token(self): t = self.token if t.typ != 'eof': self.token = self.tokens.__next__() return t class BurgParser(burg_parser.Parser): """ Derive from automatically generated parser """ def add_rule(self, *args): print(args) def main(): # Parse arguments: parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler') parser.add_argument('source', type=argparse.FileType('r'), \ help='the parser specification') parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ default=sys.stdout) args = parser.parse_args() src = args.source.read() args.source.close() l = BurgLexer() p = BurgParser() l.feed(src) p.parse(l) if __name__ == '__main__': main()