Mercurial > lcfOS
comparison python/asm.py @ 318:e84047f29c78
Add burg and yacc initial attempts
author | Windel Bouwman |
---|---|
date | Tue, 31 Dec 2013 12:38:15 +0100 |
parents | 084cccaa5deb |
children | 8d07a4254f04 |
comparison
equal
deleted
inserted
replaced
317:e30a77ae359b | 318:e84047f29c78 |
---|---|
1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
2 | 2 |
3 import re, argparse | 3 import re |
4 import argparse | |
4 import pyyacc | 5 import pyyacc |
5 from ppci import Token, CompilerError, SourceLocation | 6 from ppci import Token, CompilerError, SourceLocation |
6 from target import Target, Label | 7 from target import Target, Label |
7 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber | 8 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber |
8 | 9 |
9 def tokenize(s): | 10 def tokenize(s): |
10 """ | 11 """ |
11 Tokenizer, generates an iterator that | 12 Tokenizer, generates an iterator that |
12 returns tokens! | 13 returns tokens! |
13 | 14 |
14 This GREAT example was taken from python re doc page! | 15 This GREAT example was taken from python re doc page! |
15 """ | 16 """ |
16 tok_spec = [ | 17 tok_spec = [ |
17 ('REAL', r'\d+\.\d+'), | 18 ('REAL', r'\d+\.\d+'), |
18 ('HEXNUMBER', r'0x[\da-fA-F]+'), | 19 ('HEXNUMBER', r'0x[\da-fA-F]+'), |
19 ('NUMBER', r'\d+'), | 20 ('NUMBER', r'\d+'), |
20 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | 21 ('ID', r'[A-Za-z][A-Za-z\d_]*'), |
21 ('SKIP', r'[ \t]'), | 22 ('SKIP', r'[ \t]'), |
22 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), | 23 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), |
23 ('STRING', r"'.*?'"), | 24 ('STRING', r"'.*?'"), |
24 ('COMMENT', r";.*") | 25 ('COMMENT', r";.*") |
25 ] | 26 ] |
26 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | 27 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) |
27 gettok = re.compile(tok_re).match | 28 gettok = re.compile(tok_re).match |
28 line = 1 | 29 line = 1 |
29 pos = line_start = 0 | 30 pos = line_start = 0 |
30 mo = gettok(s) | 31 mo = gettok(s) |
31 while mo is not None: | 32 while mo is not None: |
32 typ = mo.lastgroup | 33 typ = mo.lastgroup |
33 val = mo.group(typ) | 34 val = mo.group(typ) |
34 if typ == 'NEWLINE': | 35 if typ == 'NEWLINE': |
35 line_start = pos | 36 line_start = pos |
36 line += 1 | 37 line += 1 |
49 col = mo.start() - line_start | 50 col = mo.start() - line_start |
50 loc = SourceLocation('', line, col, 0) # TODO retrieve length? | 51 loc = SourceLocation('', line, col, 0) # TODO retrieve length? |
51 yield Token(typ, val, loc) | 52 yield Token(typ, val, loc) |
52 pos = mo.end() | 53 pos = mo.end() |
53 mo = gettok(s, pos) | 54 mo = gettok(s, pos) |
54 if pos != len(s): | 55 if pos != len(s): |
55 col = pos - line_start | 56 col = pos - line_start |
56 loc = SourceLocation('', line, col, 0) | 57 loc = SourceLocation('', line, col, 0) |
57 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) | 58 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) |
59 yield Token('EOF', pyyacc.EOF) | |
58 | 60 |
59 | 61 |
60 class Lexer: | 62 class Lexer: |
61 def __init__(self, src): | 63 def __init__(self, src): |
62 self.tokens = tokenize(src) | 64 self.tokens = tokenize(src) |
63 self.curTok = self.tokens.__next__() | 65 self.curTok = self.tokens.__next__() |
64 | 66 |
65 def eat(self): | 67 def next_token(self): |
66 t = self.curTok | 68 t = self.curTok |
67 self.curTok = self.tokens.__next__() | 69 if t.typ != 'EOF': |
70 self.curTok = self.tokens.__next__() | |
68 return t | 71 return t |
69 | |
70 @property | |
71 def Peak(self): | |
72 return self.curTok | |
73 | 72 |
74 | 73 |
75 class Parser: | 74 class Parser: |
76 def __init__(self): | 75 def __init__(self): |
77 # Construct a parser given a grammar: | 76 # Construct a parser given a grammar: |
78 ident = lambda x: x # Identity helper function | 77 ident = lambda x: x # Identity helper function |
79 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}']) | 78 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', |
79 pyyacc.EOF]) | |
80 g.add_production('asmline', ['asmline2']) | 80 g.add_production('asmline', ['asmline2']) |
81 g.add_production('asmline', ['asmline2', 'COMMENT']) | 81 g.add_production('asmline', ['asmline2', 'COMMENT']) |
82 g.add_production('asmline2', ['label', 'instruction']) | 82 g.add_production('asmline2', ['label', 'instruction']) |
83 g.add_production('asmline2', ['instruction']) | 83 g.add_production('asmline2', ['instruction']) |
84 g.add_production('asmline2', ['label']) | 84 g.add_production('asmline2', ['label']) |
85 g.add_production('asmline2', []) | 85 g.add_production('asmline2', []) |
86 g.add_production('optcomment', []) | |
87 g.add_production('optcomment', ['COMMENT']) | |
88 g.add_production('label', ['ID', ':'], self.p_label) | 86 g.add_production('label', ['ID', ':'], self.p_label) |
89 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) | 87 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) |
90 g.add_production('instruction', ['opcode'], self.p_ins_2) | 88 g.add_production('instruction', ['opcode'], self.p_ins_2) |
91 g.add_production('opcode', ['ID'], ident) | 89 g.add_production('opcode', ['ID'], lambda x: x.val) |
92 g.add_production('operands', ['operand'], self.p_operands_1) | 90 g.add_production('operands', ['operand'], self.p_operands_1) |
93 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) | 91 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) |
94 g.add_production('operand', ['expression'], ident) | 92 g.add_production('operand', ['expression'], ident) |
95 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) | 93 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) |
96 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op) | 94 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op) |
97 g.add_production('listitems', ['expression'], self.p_listitems_1) | 95 g.add_production('listitems', ['expression'], self.p_listitems_1) |
98 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) | 96 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) |
99 g.add_production('expression', ['term'], ident) | 97 g.add_production('expression', ['term'], ident) |
100 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) | 98 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) |
101 g.add_production('addop', ['-'], ident) | 99 g.add_production('addop', ['-'], lambda x: x.val) |
102 g.add_production('addop', ['+'], ident) | 100 g.add_production('addop', ['+'], lambda x: x.val) |
103 g.add_production('mulop', ['*'], ident) | 101 g.add_production('mulop', ['*'], lambda x: x.val) |
104 g.add_production('term', ['factor'], ident) | 102 g.add_production('term', ['factor'], ident) |
105 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) | 103 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) |
106 g.add_production('factor', ['ID'], lambda name: ASymbol(name)) | 104 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) |
107 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num))) | 105 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) |
108 g.start_symbol = 'asmline' | 106 g.start_symbol = 'asmline' |
109 self.p = g.genParser() | 107 self.p = g.genParser() |
110 | 108 |
111 # Parser handlers: | 109 # Parser handlers: |
112 def p_ins_1(self, opc, ops): | 110 def p_ins_1(self, opc, ops): |
113 ins = AInstruction(opc, ops) | 111 ins = AInstruction(opc, ops) |
114 self.emit(ins) | 112 self.emit(ins) |
113 | |
115 def p_ins_2(self, opc): | 114 def p_ins_2(self, opc): |
116 self.p_ins_1(opc, []) | 115 self.p_ins_1(opc, []) |
116 | |
117 def p_operands_1(self, op1): | 117 def p_operands_1(self, op1): |
118 return [op1] | 118 return [op1] |
119 | |
119 def p_operands_2(self, ops, comma, op2): | 120 def p_operands_2(self, ops, comma, op2): |
120 assert type(ops) is list | 121 assert type(ops) is list |
121 ops.append(op2) | 122 ops.append(op2) |
122 return ops | 123 return ops |
123 | 124 |
129 lis.append(li2) | 130 lis.append(li2) |
130 return lis | 131 return lis |
131 | 132 |
132 def p_list_op(self, brace_open, lst, brace_close): | 133 def p_list_op(self, brace_open, lst, brace_close): |
133 return AUnop('{}', lst) | 134 return AUnop('{}', lst) |
135 | |
134 def p_mem_op(self, brace_open, exp, brace_close): | 136 def p_mem_op(self, brace_open, exp, brace_close): |
135 return AUnop('[]', exp) | 137 return AUnop('[]', exp) |
138 | |
136 def p_label(self, lname, cn): | 139 def p_label(self, lname, cn): |
137 lab = ALabel(lname) | 140 lab = ALabel(lname.val) |
138 self.emit(lab) | 141 self.emit(lab) |
142 | |
139 def p_binop(self, exp1, op, exp2): | 143 def p_binop(self, exp1, op, exp2): |
140 return ABinop(op, exp1, exp2) | 144 return ABinop(op, exp1, exp2) |
141 | 145 |
142 def parse(self, tokens, emitter): | 146 def parse(self, lexer, emitter): |
143 self.emit = emitter | 147 self.emit = emitter |
144 self.p.parse(tokens) | 148 self.p.parse(lexer) |
145 | 149 |
146 # Pre construct parser to save time: | 150 # Pre construct parser to save time: |
147 asmParser = Parser() | 151 asmParser = Parser() |
148 | 152 |
149 class Assembler: | 153 class Assembler: |
161 """ Emit a parsed instruction """ | 165 """ Emit a parsed instruction """ |
162 self.stack.append(a) | 166 self.stack.append(a) |
163 | 167 |
164 def parse_line(self, line): | 168 def parse_line(self, line): |
165 """ Parse line into asm AST """ | 169 """ Parse line into asm AST """ |
166 tokens = tokenize(line) | 170 tokens = Lexer(line) |
167 self.p.parse(tokens, self.emit) | 171 self.p.parse(tokens, self.emit) |
168 | 172 |
169 def assemble(self, asmsrc): | 173 def assemble(self, asmsrc): |
170 """ Assemble this source snippet """ | 174 """ Assemble this source snippet """ |
171 for line in asmsrc.split('\n'): | 175 for line in asmsrc.split('\n'): |
172 self.assemble_line(line) | 176 self.assemble_line(line) |
173 | 177 |
174 def assemble_line(self, line): | 178 def assemble_line(self, line): |
175 """ | 179 """ |
176 Assemble a single source line. | 180 Assemble a single source line. |
177 Do not take newlines into account | 181 Do not take newlines into account |
178 """ | 182 """ |
179 self.parse_line(line) | 183 self.parse_line(line) |
180 self.assemble_aast() | 184 self.assemble_aast() |
181 | 185 |
182 def assemble_aast(self): | 186 def assemble_aast(self): |
183 """ Assemble a parsed asm line """ | 187 """ Assemble a parsed asm line """ |
184 # TODO | |
185 if not self.target: | 188 if not self.target: |
186 raise CompilerError('Cannot assemble without target') | 189 raise CompilerError('Cannot assemble without target') |
187 while self.stack: | 190 while self.stack: |
188 vi = self.stack.pop(0) | 191 vi = self.stack.pop(0) |
189 if type(vi) is AInstruction: | 192 if type(vi) is AInstruction: |
197 | 200 |
198 | 201 |
199 if __name__ == '__main__': | 202 if __name__ == '__main__': |
200 # When run as main file, try to grab command line arguments: | 203 # When run as main file, try to grab command line arguments: |
201 parser = argparse.ArgumentParser(description="Assembler") | 204 parser = argparse.ArgumentParser(description="Assembler") |
202 parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble') | 205 parser.add_argument('sourcefile', type=argparse.FileType('r'), |
206 help='the source file to assemble') | |
203 args = parser.parse_args() | 207 args = parser.parse_args() |
204 a = Assembler() | 208 a = Assembler() |
205 obj = a.assemble(args.sourcefile.read()) | 209 obj = a.assemble(args.sourcefile.read()) |
206 |