view python/pyburg.py @ 318:e84047f29c78

Add burg and yacc initial attempts
author Windel Bouwman
date Tue, 31 Dec 2013 12:38:15 +0100
parents
children 8d07a4254f04
line wrap: on
line source

#!/usr/bin/python

""" Bottom up rewrite generator in python """
import sys
import re
import argparse
from ppci import Token
import burg_parser


class BurgLexer:
    def feed(self, txt):
        tok_spec = [
           ('ID', r'[A-Za-z][A-Za-z\d_]*'),
           ('STRING', r"'[^']*'"),
           ('BRACEDCODE', r"\{[^\}]*\}"),
           ('OTHER', r'[:;\|]'),
           ('SKIP', r'[ ]')
            ]
        tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
        gettok = re.compile(tok_re).match

        lines = txt.split('\n')
        def tokenize_line(line):
            """ Generator that splits up a line into tokens """
            mo = gettok(line)
            pos = 0
            while mo:
                typ = mo.lastgroup
                val = mo.group(typ)
                if typ == 'ID':
                    yield Token(typ, val)
                elif typ == 'STRING':
                    typ = 'ID'
                    yield Token(typ, val[1:-1])
                elif typ == 'OTHER':
                    typ = val
                    yield Token(typ, val)
                elif typ == 'BRACEDCODE':
                    yield Token(typ, val)
                elif typ == 'SKIP':
                    pass
                else:
                    raise NotImplementedError(str(typ))
                pos = mo.end()
                mo = gettok(line, pos)
            if len(line) != pos:
                raise ParseError('Lex fault at {}'.format(line))

        def tokenize():
            section = 0
            for line in lines:
                line = line.strip()
                if not line:
                    continue  # Skip empty lines
                if line == '%%':
                    section += 1
                    yield Token('%%', '%%')
                    continue
                if section == 0:
                    if line.startswith('%tokens'):
                        yield Token('%tokens', '%tokens')
                        yield from tokenize_line(line[7:])
                    else:
                        yield Token('HEADER', line)
                elif section == 1:
                    yield from tokenize_line(line)
            yield Token('eof', 'eof')
        self.tokens = tokenize()
        self.token = self.tokens.__next__()

    def next_token(self):
        t = self.token
        if t.typ != 'eof':
            self.token = self.tokens.__next__()
        return t


class BurgParser(burg_parser.Parser):
    """ Derive from automatically generated parser """
    def add_rule(self, *args):
        print(args)


def main():
    # Parse arguments:
    parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler')
    parser.add_argument('source', type=argparse.FileType('r'), \
      help='the parser specification')
    parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
        default=sys.stdout)
    args = parser.parse_args()
    src = args.source.read()
    args.source.close()

    l = BurgLexer()
    p = BurgParser()
    l.feed(src)
    p.parse(l)

if __name__ == '__main__':
    main()