Mercurial > lcfOS
comparison python/ppci/assembler.py @ 341:4d204f6f7d4e devel
Rewrite of assembler parts
author | Windel Bouwman |
---|---|
date | Fri, 28 Feb 2014 18:07:14 +0100 |
parents | c7cc54c0dfdf |
children | 86b02c98a717 |
comparison
equal
deleted
inserted
replaced
340:c7cc54c0dfdf | 341:4d204f6f7d4e |
---|---|
4 from . import Token, CompilerError, SourceLocation | 4 from . import Token, CompilerError, SourceLocation |
5 from target import Target, Label | 5 from target import Target, Label |
6 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber | 6 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber |
7 | 7 |
8 | 8 |
9 def tokenize(s): | 9 def bit_type(value): |
10 assert value < (2**31) | |
11 assert value >= 0 | |
12 t = 'val32' | |
13 for n in [8, 5, 3]: | |
14 if value < (2**n): | |
15 t = 'val{}'.format(n) | |
16 return t | |
17 | |
18 def tokenize(s, kws): | |
10 """ | 19 """ |
11 Tokenizer, generates an iterator that | 20 Tokenizer, generates an iterator that |
12 returns tokens! | 21 returns tokens! |
13 | 22 |
14 This GREAT example was taken from python re doc page! | 23 This GREAT example was taken from python re doc page! |
44 typ = 'NUMBER' | 53 typ = 'NUMBER' |
45 elif typ == 'REAL': | 54 elif typ == 'REAL': |
46 val = float(val) | 55 val = float(val) |
47 elif typ == 'STRING': | 56 elif typ == 'STRING': |
48 val = val[1:-1] | 57 val = val[1:-1] |
58 elif typ == 'ID': | |
59 if val.lower() in kws: # ['r3', 'sp', 'add', 'yield', 'r4', 'r0', 'r1', 'sub', 'r5', 'r6', 'r2']: | |
60 typ = val.lower() | |
49 col = mo.start() - line_start | 61 col = mo.start() - line_start |
50 loc = SourceLocation('', line, col, 0) # TODO retrieve length? | 62 loc = SourceLocation('', line, col, 0) # TODO retrieve length? |
63 if typ == 'NUMBER': | |
64 typ = bit_type(val) | |
51 yield Token(typ, val, loc) | 65 yield Token(typ, val, loc) |
52 pos = mo.end() | 66 pos = mo.end() |
53 mo = gettok(s, pos) | 67 mo = gettok(s, pos) |
54 if pos != len(s): | 68 if pos != len(s): |
55 col = pos - line_start | 69 col = pos - line_start |
57 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) | 71 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) |
58 yield Token('EOF', pyyacc.EOF) | 72 yield Token('EOF', pyyacc.EOF) |
59 | 73 |
60 | 74 |
61 class Lexer: | 75 class Lexer: |
62 def __init__(self, src): | 76 def __init__(self, src, kws): |
63 self.tokens = tokenize(src) | 77 self.tokens = tokenize(src, kws) |
64 self.curTok = self.tokens.__next__() | 78 self.curTok = self.tokens.__next__() |
65 | 79 |
66 def next_token(self): | 80 def next_token(self): |
67 t = self.curTok | 81 t = self.curTok |
68 if t.typ != 'EOF': | 82 if t.typ != 'EOF': |
69 self.curTok = self.tokens.__next__() | 83 self.curTok = self.tokens.__next__() |
70 return t | 84 return t |
71 | 85 |
72 | 86 |
73 class Parser: | 87 class Parser: |
74 def __init__(self, tokens, instruction_rules): | 88 def add_rule(self, prod, rhs, f): |
89 """ Helper function to add a rule, why this is required? """ | |
90 if prod == 'instruction': | |
91 def f_wrap(*args): | |
92 i = f(args) | |
93 self.emit(i) | |
94 else: | |
95 def f_wrap(*rhs): | |
96 return f(rhs) | |
97 self.g.add_production(prod, rhs, f_wrap) | |
98 | |
99 def __init__(self, kws, instruction_rules, emit): | |
75 # Construct a parser given a grammar: | 100 # Construct a parser given a grammar: |
76 ident = lambda x: x # Identity helper function | 101 tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', |
77 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', | 102 pyyacc.EPS, 'COMMENT', '{', '}', |
78 pyyacc.EOF]) | 103 pyyacc.EOF, 'val32', 'val8', 'val5', 'val3'] |
104 tokens2.extend(kws) | |
105 self.kws = kws | |
106 g = pyyacc.Grammar(tokens2) | |
107 self.g = g | |
79 # Global structure of assembly line: | 108 # Global structure of assembly line: |
80 g.add_production('asmline', ['asmline2']) | 109 g.add_production('asmline', ['asmline2']) |
81 g.add_production('asmline', ['asmline2', 'COMMENT']) | 110 g.add_production('asmline', ['asmline2', 'COMMENT']) |
82 g.add_production('asmline2', ['label', 'instruction']) | 111 g.add_production('asmline2', ['label', 'instruction']) |
83 g.add_production('asmline2', ['instruction']) | 112 g.add_production('asmline2', ['instruction']) |
86 g.add_production('label', ['ID', ':'], self.p_label) | 115 g.add_production('label', ['ID', ':'], self.p_label) |
87 #g.add_production('label', []) | 116 #g.add_production('label', []) |
88 | 117 |
89 # Add instruction rules for the target in question: | 118 # Add instruction rules for the target in question: |
90 for prod, rhs, f in instruction_rules: | 119 for prod, rhs, f in instruction_rules: |
91 if prod is 'instruction': | 120 self.add_rule(prod, rhs, f) |
92 def f_wrap(*rhs): | |
93 i = f(rhs) | |
94 self.emit(i) | |
95 else: | |
96 def f_wrap(*rhs): | |
97 return f(rhs) | |
98 g.add_production(prod, rhs, f_wrap) | |
99 | 121 |
100 #g.add_production('instruction', []) | 122 #g.add_production('instruction', []) |
101 g.add_production('expression', ['term'], ident) | 123 g.add_production('expression', ['term'], lambda x: x) |
102 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) | 124 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) |
103 g.add_production('addop', ['-'], lambda x: x.val) | 125 g.add_production('addop', ['-'], lambda x: x.val) |
104 g.add_production('addop', ['+'], lambda x: x.val) | 126 g.add_production('addop', ['+'], lambda x: x.val) |
105 g.add_production('mulop', ['*'], lambda x: x.val) | 127 g.add_production('mulop', ['*'], lambda x: x.val) |
106 g.add_production('term', ['factor'], ident) | 128 g.add_production('term', ['factor'], lambda x: x) |
107 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) | 129 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) |
108 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) | 130 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) |
109 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) | 131 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) |
110 g.start_symbol = 'asmline' | 132 g.start_symbol = 'asmline' |
111 self.p = g.genParser() | 133 self.emit = emit |
134 self.p = g.generate_parser() | |
135 print('length of table:', len(self.p.action_table)) | |
112 | 136 |
113 # Parser handlers: | 137 # Parser handlers: |
114 def p_ins_1(self, opc, ops): | 138 def p_ins_1(self, opc, ops): |
115 ins = AInstruction(opc, ops) | 139 ins = AInstruction(opc, ops) |
116 self.emit(ins) | 140 self.emit(ins) |
139 | 163 |
140 def p_mem_op(self, brace_open, exp, brace_close): | 164 def p_mem_op(self, brace_open, exp, brace_close): |
141 return AUnop('[]', exp) | 165 return AUnop('[]', exp) |
142 | 166 |
143 def p_label(self, lname, cn): | 167 def p_label(self, lname, cn): |
144 lab = ALabel(lname.val) | 168 lab = Label(lname.val) |
145 self.emit(lab) | 169 self.emit(lab) |
146 | 170 |
147 def p_binop(self, exp1, op, exp2): | 171 def p_binop(self, exp1, op, exp2): |
148 return ABinop(op, exp1, exp2) | 172 return ABinop(op, exp1, exp2) |
149 | 173 |
150 def parse(self, lexer, emitter): | 174 def parse(self, lexer): |
151 self.emit = emitter | |
152 self.p.parse(lexer) | 175 self.p.parse(lexer) |
153 | 176 |
154 | 177 |
155 class Assembler: | 178 class Assembler: |
156 def __init__(self, target, stream): | 179 def __init__(self, target, stream): |
157 self.target = target | 180 self.target = target |
158 assert isinstance(target, Target) | 181 assert isinstance(target, Target) |
159 self.stream = stream | 182 self.stream = stream |
160 self.parser = Parser(None, target.assembler_rules, self.stream.emit) | 183 self.parser = Parser(target.asm_keywords, target.assembler_rules, self.stream.emit) |
161 | 184 |
162 # Top level interface: | 185 # Top level interface: |
163 def parse_line(self, line): | 186 def parse_line(self, line): |
164 """ Parse line into assembly instructions """ | 187 """ Parse line into assembly instructions """ |
165 tokens = Lexer(line) | 188 tokens = Lexer(line, self.target.asm_keywords) |
166 self.parser.parse(tokens) | 189 self.parser.parse(tokens) |
167 | 190 |
168 def assemble(self, asmsrc): | 191 def assemble(self, asmsrc): |
169 """ Assemble this source snippet """ | 192 """ Assemble this source snippet """ |
170 if hasattr(asmsrc, 'read'): | 193 if hasattr(asmsrc, 'read'): |