Mercurial > lcfOS
annotate python/ppci/assembler.py @ 340:c7cc54c0dfdf devel
Test featurebranch
author | Windel Bouwman |
---|---|
date | Sun, 23 Feb 2014 16:24:01 +0100 |
parents | b00219172a42 |
children | 4d204f6f7d4e |
rev | line source |
---|---|
334 | 1 |
2 import re | |
3 import pyyacc | |
4 from . import Token, CompilerError, SourceLocation | |
5 from target import Target, Label | |
6 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber | |
7 | |
340 | 8 |
334 | 9 def tokenize(s): |
10 """ | |
11 Tokenizer, generates an iterator that | |
12 returns tokens! | |
13 | |
14 This GREAT example was taken from python re doc page! | |
15 """ | |
16 tok_spec = [ | |
17 ('REAL', r'\d+\.\d+'), | |
18 ('HEXNUMBER', r'0x[\da-fA-F]+'), | |
19 ('NUMBER', r'\d+'), | |
20 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | |
21 ('SKIP', r'[ \t]'), | |
22 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), | |
23 ('STRING', r"'.*?'"), | |
24 ('COMMENT', r";.*") | |
25 ] | |
26 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | |
27 gettok = re.compile(tok_re).match | |
28 line = 1 | |
29 pos = line_start = 0 | |
30 mo = gettok(s) | |
31 while mo is not None: | |
32 typ = mo.lastgroup | |
33 val = mo.group(typ) | |
34 if typ == 'NEWLINE': | |
35 line_start = pos | |
36 line += 1 | |
37 elif typ != 'SKIP': | |
38 if typ == 'LEESTEKEN': | |
39 typ = val | |
40 elif typ == 'NUMBER': | |
41 val = int(val) | |
42 elif typ == 'HEXNUMBER': | |
43 val = int(val[2:], 16) | |
44 typ = 'NUMBER' | |
45 elif typ == 'REAL': | |
46 val = float(val) | |
47 elif typ == 'STRING': | |
48 val = val[1:-1] | |
49 col = mo.start() - line_start | |
50 loc = SourceLocation('', line, col, 0) # TODO retrieve length? | |
51 yield Token(typ, val, loc) | |
52 pos = mo.end() | |
53 mo = gettok(s, pos) | |
54 if pos != len(s): | |
55 col = pos - line_start | |
56 loc = SourceLocation('', line, col, 0) | |
57 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) | |
58 yield Token('EOF', pyyacc.EOF) | |
59 | |
60 | |
61 class Lexer: | |
62 def __init__(self, src): | |
63 self.tokens = tokenize(src) | |
64 self.curTok = self.tokens.__next__() | |
65 | |
66 def next_token(self): | |
67 t = self.curTok | |
68 if t.typ != 'EOF': | |
69 self.curTok = self.tokens.__next__() | |
70 return t | |
71 | |
72 | |
73 class Parser: | |
340 | 74 def __init__(self, tokens, instruction_rules): |
334 | 75 # Construct a parser given a grammar: |
76 ident = lambda x: x # Identity helper function | |
77 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', | |
78 pyyacc.EOF]) | |
340 | 79 # Global structure of assembly line: |
334 | 80 g.add_production('asmline', ['asmline2']) |
81 g.add_production('asmline', ['asmline2', 'COMMENT']) | |
82 g.add_production('asmline2', ['label', 'instruction']) | |
83 g.add_production('asmline2', ['instruction']) | |
84 g.add_production('asmline2', ['label']) | |
85 g.add_production('asmline2', []) | |
86 g.add_production('label', ['ID', ':'], self.p_label) | |
87 #g.add_production('label', []) | |
340 | 88 |
89 # Add instruction rules for the target in question: | |
90 for prod, rhs, f in instruction_rules: | |
91 if prod is 'instruction': | |
92 def f_wrap(*rhs): | |
93 i = f(rhs) | |
94 self.emit(i) | |
95 else: | |
96 def f_wrap(*rhs): | |
97 return f(rhs) | |
98 g.add_production(prod, rhs, f_wrap) | |
99 | |
334 | 100 #g.add_production('instruction', []) |
101 g.add_production('expression', ['term'], ident) | |
102 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) | |
103 g.add_production('addop', ['-'], lambda x: x.val) | |
104 g.add_production('addop', ['+'], lambda x: x.val) | |
105 g.add_production('mulop', ['*'], lambda x: x.val) | |
106 g.add_production('term', ['factor'], ident) | |
107 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) | |
108 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) | |
109 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) | |
110 g.start_symbol = 'asmline' | |
111 self.p = g.genParser() | |
112 | |
113 # Parser handlers: | |
114 def p_ins_1(self, opc, ops): | |
115 ins = AInstruction(opc, ops) | |
116 self.emit(ins) | |
117 | |
118 def p_ins_2(self, opc): | |
119 self.p_ins_1(opc, []) | |
120 | |
121 def p_operands_1(self, op1): | |
122 return [op1] | |
123 | |
124 def p_operands_2(self, ops, comma, op2): | |
125 assert type(ops) is list | |
126 ops.append(op2) | |
127 return ops | |
128 | |
129 def p_listitems_1(self, li1): | |
130 return [li1] | |
131 | |
132 def p_listitems_2(self, lis, comma, li2): | |
133 assert type(lis) is list | |
134 lis.append(li2) | |
135 return lis | |
136 | |
137 def p_list_op(self, brace_open, lst, brace_close): | |
138 return AUnop('{}', lst) | |
139 | |
140 def p_mem_op(self, brace_open, exp, brace_close): | |
141 return AUnop('[]', exp) | |
142 | |
143 def p_label(self, lname, cn): | |
144 lab = ALabel(lname.val) | |
145 self.emit(lab) | |
146 | |
147 def p_binop(self, exp1, op, exp2): | |
148 return ABinop(op, exp1, exp2) | |
149 | |
150 def parse(self, lexer, emitter): | |
151 self.emit = emitter | |
152 self.p.parse(lexer) | |
153 | |
154 | |
155 class Assembler: | |
336
d1ecc493384e
Added spiffy armtoken class for bit fiddeling. Added cool test that checks for build repeatability
Windel Bouwman
parents:
334
diff
changeset
|
156 def __init__(self, target, stream): |
334 | 157 self.target = target |
340 | 158 assert isinstance(target, Target) |
334 | 159 self.stream = stream |
340 | 160 self.parser = Parser(None, target.assembler_rules, self.stream.emit) |
334 | 161 |
162 # Top level interface: | |
163 def parse_line(self, line): | |
340 | 164 """ Parse line into assembly instructions """ |
334 | 165 tokens = Lexer(line) |
340 | 166 self.parser.parse(tokens) |
334 | 167 |
168 def assemble(self, asmsrc): | |
169 """ Assemble this source snippet """ | |
340 | 170 if hasattr(asmsrc, 'read'): |
334 | 171 asmsrc2 = asmsrc.read() |
172 asmsrc.close() | |
173 asmsrc = asmsrc2 | |
340 | 174 # TODO: use generic newline?? |
175 # TODO: the bothersome newline ... | |
334 | 176 for line in asmsrc.split('\n'): |
340 | 177 self.parse_line(line) |
334 | 178 |
179 def assemble_line(self, line): | |
340 | 180 """ Assemble a single assembly line. """ |
334 | 181 self.parse_line(line) |