Mercurial > lcfOS
comparison python/pyburg.py @ 382:0c44e494ef58
Made lexer more generic
author | Windel Bouwman |
---|---|
date | Sun, 27 Apr 2014 12:24:21 +0200 |
parents | d2ddfe134c48 |
children | fb3c1f029b30 |
comparison
equal
deleted
inserted
replaced
381:6df89163e114 | 382:0c44e494ef58 |
---|---|
58 import os | 58 import os |
59 import io | 59 import io |
60 import types | 60 import types |
61 import argparse | 61 import argparse |
62 from ppci import Token | 62 from ppci import Token |
63 from pyyacc import ParserException, EOF | 63 from pyyacc import ParserException |
64 import yacc | 64 import yacc |
65 import baselex | 65 import baselex |
66 from tree import Tree | 66 from tree import Tree |
67 | 67 |
68 # Generate parser on the fly: | 68 # Generate parser on the fly: |
69 spec_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'burg.x') | 69 spec_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'burg.x') |
70 burg_parser = yacc.load_as_module(spec_file) | 70 burg_parser = yacc.load_as_module(spec_file) |
71 | 71 |
72 | 72 |
73 class BurgLexer: | 73 class BurgLexer(baselex.BaseLexer): |
74 def feed(self, txt): | 74 def __init__(self): |
75 tok_spec = [ | 75 tok_spec = [ |
76 ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)), | 76 ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)), |
77 ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)), | 77 ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)), |
78 ('number', r'\d+', lambda typ, val: (typ, int(val))), | 78 ('number', r'\d+', lambda typ, val: (typ, int(val))), |
79 ('STRING', r"'[^']*'", lambda typ, val: ('string', val[1:-1])), | 79 ('STRING', r"'[^']*'", lambda typ, val: ('string', val[1:-1])), |
80 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)), | 80 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)), |
81 ('SKIP', r'[ ]', None) | 81 ('SKIP', r'[ ]', None) |
82 ] | 82 ] |
83 | 83 super().__init__(tok_spec) |
84 | |
85 def tokenize(self, txt): | |
84 lines = txt.split('\n') | 86 lines = txt.split('\n') |
85 header_lines = [] | 87 header_lines = [] |
86 | 88 section = 0 |
87 def tokenize(): | 89 for line in lines: |
88 section = 0 | 90 line = line.strip() |
89 for line in lines: | 91 if not line: |
90 line = line.strip() | 92 continue # Skip empty lines |
91 if not line: | 93 elif line == '%%': |
92 continue # Skip empty lines | 94 section += 1 |
93 elif line == '%%': | 95 if section == 1: |
94 section += 1 | 96 yield Token('header', header_lines) |
95 if section == 1: | 97 yield Token('%%', '%%') |
96 yield Token('header', header_lines) | 98 else: |
97 yield Token('%%', '%%') | 99 if section == 0: |
100 header_lines.append(line) | |
98 else: | 101 else: |
99 if section == 0: | 102 # we could use yield from below, but python 3.2 does not work then: |
100 header_lines.append(line) | 103 for tk in super().tokenize(line): |
101 else: | 104 yield tk |
102 for tk in baselex.tokenize(tok_spec, line): | |
103 yield tk | |
104 yield Token(EOF, EOF) | |
105 self.tokens = tokenize() | |
106 self.token = self.tokens.__next__() | |
107 | |
108 def next_token(self): | |
109 t = self.token | |
110 if t.typ != EOF: | |
111 self.token = self.tokens.__next__() | |
112 return t | |
113 | 105 |
114 | 106 |
115 class Rule: | 107 class Rule: |
116 """ A rewrite rule. Specifies a tree that can be rewritten into a result | 108 """ A rewrite rule. Specifies a tree that can be rewritten into a result |
117 at a specific cost """ | 109 at a specific cost """ |
315 help='the parser specification') | 307 help='the parser specification') |
316 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ | 308 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ |
317 default=sys.stdout) | 309 default=sys.stdout) |
318 return parser | 310 return parser |
319 | 311 |
312 | |
320 def load_as_module(filename): | 313 def load_as_module(filename): |
321 """ Load a parser spec file, generate LR tables and create module """ | 314 """ Load a parser spec file, generate LR tables and create module """ |
322 ob = io.StringIO() | 315 ob = io.StringIO() |
323 args = argparse.Namespace(source=open(filename), output=ob) | 316 args = argparse.Namespace(source=open(filename), output=ob) |
324 main(args) | 317 main(args) |