comparison python/pyburg.py @ 382:0c44e494ef58

Made lexer more generic
author Windel Bouwman
date Sun, 27 Apr 2014 12:24:21 +0200
parents d2ddfe134c48
children fb3c1f029b30
comparison
equal deleted inserted replaced
381:6df89163e114 382:0c44e494ef58
58 import os 58 import os
59 import io 59 import io
60 import types 60 import types
61 import argparse 61 import argparse
62 from ppci import Token 62 from ppci import Token
63 from pyyacc import ParserException, EOF 63 from pyyacc import ParserException
64 import yacc 64 import yacc
65 import baselex 65 import baselex
66 from tree import Tree 66 from tree import Tree
67 67
68 # Generate parser on the fly: 68 # Generate parser on the fly:
69 spec_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'burg.x') 69 spec_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'burg.x')
70 burg_parser = yacc.load_as_module(spec_file) 70 burg_parser = yacc.load_as_module(spec_file)
71 71
72 72
73 class BurgLexer: 73 class BurgLexer(baselex.BaseLexer):
74 def feed(self, txt): 74 def __init__(self):
75 tok_spec = [ 75 tok_spec = [
76 ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)), 76 ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)),
77 ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)), 77 ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)),
78 ('number', r'\d+', lambda typ, val: (typ, int(val))), 78 ('number', r'\d+', lambda typ, val: (typ, int(val))),
79 ('STRING', r"'[^']*'", lambda typ, val: ('string', val[1:-1])), 79 ('STRING', r"'[^']*'", lambda typ, val: ('string', val[1:-1])),
80 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)), 80 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)),
81 ('SKIP', r'[ ]', None) 81 ('SKIP', r'[ ]', None)
82 ] 82 ]
83 83 super().__init__(tok_spec)
84
85 def tokenize(self, txt):
84 lines = txt.split('\n') 86 lines = txt.split('\n')
85 header_lines = [] 87 header_lines = []
86 88 section = 0
87 def tokenize(): 89 for line in lines:
88 section = 0 90 line = line.strip()
89 for line in lines: 91 if not line:
90 line = line.strip() 92 continue # Skip empty lines
91 if not line: 93 elif line == '%%':
92 continue # Skip empty lines 94 section += 1
93 elif line == '%%': 95 if section == 1:
94 section += 1 96 yield Token('header', header_lines)
95 if section == 1: 97 yield Token('%%', '%%')
96 yield Token('header', header_lines) 98 else:
97 yield Token('%%', '%%') 99 if section == 0:
100 header_lines.append(line)
98 else: 101 else:
99 if section == 0: 102 # we could use yield from below, but python 3.2 does not work then:
100 header_lines.append(line) 103 for tk in super().tokenize(line):
101 else: 104 yield tk
102 for tk in baselex.tokenize(tok_spec, line):
103 yield tk
104 yield Token(EOF, EOF)
105 self.tokens = tokenize()
106 self.token = self.tokens.__next__()
107
108 def next_token(self):
109 t = self.token
110 if t.typ != EOF:
111 self.token = self.tokens.__next__()
112 return t
113 105
114 106
115 class Rule: 107 class Rule:
116 """ A rewrite rule. Specifies a tree that can be rewritten into a result 108 """ A rewrite rule. Specifies a tree that can be rewritten into a result
117 at a specific cost """ 109 at a specific cost """
315 help='the parser specification') 307 help='the parser specification')
316 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ 308 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
317 default=sys.stdout) 309 default=sys.stdout)
318 return parser 310 return parser
319 311
312
320 def load_as_module(filename): 313 def load_as_module(filename):
321 """ Load a parser spec file, generate LR tables and create module """ 314 """ Load a parser spec file, generate LR tables and create module """
322 ob = io.StringIO() 315 ob = io.StringIO()
323 args = argparse.Namespace(source=open(filename), output=ob) 316 args = argparse.Namespace(source=open(filename), output=ob)
324 main(args) 317 main(args)