Mercurial > lcfOS

diff python/pyburg.py @ 318:e84047f29c78
Add burg and yacc initial attempts
author: Windel Bouwman
date: Tue, 31 Dec 2013 12:38:15 +0100
children: 8d07a4254f04
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/pyburg.py	Tue Dec 31 12:38:15 2013 +0100
@@ -0,0 +1,102 @@
+#!/usr/bin/python
+
+""" Bottom up rewrite generator in python """
+import sys
+import re
+import argparse
+from ppci import Token
+import burg_parser
+
+
+class BurgLexer:
+    def feed(self, txt):
+        tok_spec = [
+           ('ID', r'[A-Za-z][A-Za-z\d_]*'),
+           ('STRING', r"'[^']*'"),
+           ('BRACEDCODE', r"\{[^\}]*\}"),
+           ('OTHER', r'[:;\|]'),
+           ('SKIP', r'[ ]')
+            ]
+        tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+        gettok = re.compile(tok_re).match
+
+        lines = txt.split('\n')
+        def tokenize_line(line):
+            """ Generator that splits up a line into tokens """
+            mo = gettok(line)
+            pos = 0
+            while mo:
+                typ = mo.lastgroup
+                val = mo.group(typ)
+                if typ == 'ID':
+                    yield Token(typ, val)
+                elif typ == 'STRING':
+                    typ = 'ID'
+                    yield Token(typ, val[1:-1])
+                elif typ == 'OTHER':
+                    typ = val
+                    yield Token(typ, val)
+                elif typ == 'BRACEDCODE':
+                    yield Token(typ, val)
+                elif typ == 'SKIP':
+                    pass
+                else:
+                    raise NotImplementedError(str(typ))
+                pos = mo.end()
+                mo = gettok(line, pos)
+            if len(line) != pos:
+                raise ParseError('Lex fault at {}'.format(line))
+
+        def tokenize():
+            section = 0
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue  # Skip empty lines
+                if line == '%%':
+                    section += 1
+                    yield Token('%%', '%%')
+                    continue
+                if section == 0:
+                    if line.startswith('%tokens'):
+                        yield Token('%tokens', '%tokens')
+                        yield from tokenize_line(line[7:])
+                    else:
+                        yield Token('HEADER', line)
+                elif section == 1:
+                    yield from tokenize_line(line)
+            yield Token('eof', 'eof')
+        self.tokens = tokenize()
+        self.token = self.tokens.__next__()
+
+    def next_token(self):
+        t = self.token
+        if t.typ != 'eof':
+            self.token = self.tokens.__next__()
+        return t
+
+
+class BurgParser(burg_parser.Parser):
+    """ Derive from automatically generated parser """
+    def add_rule(self, *args):
+        print(args)
+
+
+def main():
+    # Parse arguments:
+    parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler')
+    parser.add_argument('source', type=argparse.FileType('r'), \
+      help='the parser specification')
+    parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
+        default=sys.stdout)
+    args = parser.parse_args()
+    src = args.source.read()
+    args.source.close()
+
+    l = BurgLexer()
+    p = BurgParser()
+    l.feed(src)
+    p.parse(l)
+
+if __name__ == '__main__':
+    main()
author	Windel Bouwman
date	Tue, 31 Dec 2013 12:38:15 +0100
parents
children	8d07a4254f04