303
|
1 #!/usr/bin/env python3
|
|
2
|
318
|
3 import re
|
|
4 import argparse
|
191
|
5 import pyyacc
|
|
6 from ppci import Token, CompilerError, SourceLocation
|
236
|
7 from target import Target, Label
|
200
|
8 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber
|
159
|
9
|
|
10 def tokenize(s):
|
318
|
11 """
|
159
|
12 Tokenizer, generates an iterator that
|
|
13 returns tokens!
|
|
14
|
|
15 This GREAT example was taken from python re doc page!
|
318
|
16 """
|
|
17 tok_spec = [
|
159
|
18 ('REAL', r'\d+\.\d+'),
|
|
19 ('HEXNUMBER', r'0x[\da-fA-F]+'),
|
|
20 ('NUMBER', r'\d+'),
|
|
21 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
|
|
22 ('SKIP', r'[ \t]'),
|
206
|
23 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'),
|
198
|
24 ('STRING', r"'.*?'"),
|
|
25 ('COMMENT', r";.*")
|
318
|
26 ]
|
|
27 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
|
|
28 gettok = re.compile(tok_re).match
|
|
29 line = 1
|
|
30 pos = line_start = 0
|
|
31 mo = gettok(s)
|
|
32 while mo is not None:
|
159
|
33 typ = mo.lastgroup
|
|
34 val = mo.group(typ)
|
|
35 if typ == 'NEWLINE':
|
|
36 line_start = pos
|
|
37 line += 1
|
|
38 elif typ != 'SKIP':
|
199
|
39 if typ == 'LEESTEKEN':
|
159
|
40 typ = val
|
|
41 elif typ == 'NUMBER':
|
|
42 val = int(val)
|
|
43 elif typ == 'HEXNUMBER':
|
|
44 val = int(val[2:], 16)
|
|
45 typ = 'NUMBER'
|
|
46 elif typ == 'REAL':
|
|
47 val = float(val)
|
|
48 elif typ == 'STRING':
|
|
49 val = val[1:-1]
|
191
|
50 col = mo.start() - line_start
|
287
|
51 loc = SourceLocation('', line, col, 0) # TODO retrieve length?
|
191
|
52 yield Token(typ, val, loc)
|
159
|
53 pos = mo.end()
|
|
54 mo = gettok(s, pos)
|
318
|
55 if pos != len(s):
|
159
|
56 col = pos - line_start
|
287
|
57 loc = SourceLocation('', line, col, 0)
|
191
|
58 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
|
318
|
59 yield Token('EOF', pyyacc.EOF)
|
159
|
60
|
287
|
61
|
159
|
62 class Lexer:
|
315
|
63 def __init__(self, src):
|
|
64 self.tokens = tokenize(src)
|
|
65 self.curTok = self.tokens.__next__()
|
|
66
|
318
|
67 def next_token(self):
|
315
|
68 t = self.curTok
|
318
|
69 if t.typ != 'EOF':
|
|
70 self.curTok = self.tokens.__next__()
|
315
|
71 return t
|
|
72
|
287
|
73
|
218
|
74 class Parser:
|
|
75 def __init__(self):
|
191
|
76 # Construct a parser given a grammar:
|
195
|
77 ident = lambda x: x # Identity helper function
|
318
|
78 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}',
|
|
79 pyyacc.EOF])
|
198
|
80 g.add_production('asmline', ['asmline2'])
|
|
81 g.add_production('asmline', ['asmline2', 'COMMENT'])
|
|
82 g.add_production('asmline2', ['label', 'instruction'])
|
|
83 g.add_production('asmline2', ['instruction'])
|
|
84 g.add_production('asmline2', ['label'])
|
|
85 g.add_production('asmline2', [])
|
194
|
86 g.add_production('label', ['ID', ':'], self.p_label)
|
195
|
87 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
|
|
88 g.add_production('instruction', ['opcode'], self.p_ins_2)
|
318
|
89 g.add_production('opcode', ['ID'], lambda x: x.val)
|
195
|
90 g.add_production('operands', ['operand'], self.p_operands_1)
|
|
91 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
|
|
92 g.add_production('operand', ['expression'], ident)
|
|
93 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
|
206
|
94 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op)
|
|
95 g.add_production('listitems', ['expression'], self.p_listitems_1)
|
|
96 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2)
|
195
|
97 g.add_production('expression', ['term'], ident)
|
|
98 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
|
318
|
99 g.add_production('addop', ['-'], lambda x: x.val)
|
|
100 g.add_production('addop', ['+'], lambda x: x.val)
|
|
101 g.add_production('mulop', ['*'], lambda x: x.val)
|
195
|
102 g.add_production('term', ['factor'], ident)
|
|
103 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
|
318
|
104 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
|
|
105 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
|
191
|
106 g.start_symbol = 'asmline'
|
195
|
107 self.p = g.genParser()
|
159
|
108
|
195
|
109 # Parser handlers:
|
|
110 def p_ins_1(self, opc, ops):
|
|
111 ins = AInstruction(opc, ops)
|
|
112 self.emit(ins)
|
318
|
113
|
195
|
114 def p_ins_2(self, opc):
|
|
115 self.p_ins_1(opc, [])
|
318
|
116
|
195
|
117 def p_operands_1(self, op1):
|
|
118 return [op1]
|
318
|
119
|
195
|
120 def p_operands_2(self, ops, comma, op2):
|
|
121 assert type(ops) is list
|
|
122 ops.append(op2)
|
|
123 return ops
|
206
|
124
|
|
125 def p_listitems_1(self, li1):
|
|
126 return [li1]
|
|
127
|
|
128 def p_listitems_2(self, lis, comma, li2):
|
|
129 assert type(lis) is list
|
|
130 lis.append(li2)
|
|
131 return lis
|
|
132
|
|
133 def p_list_op(self, brace_open, lst, brace_close):
|
|
134 return AUnop('{}', lst)
|
318
|
135
|
195
|
136 def p_mem_op(self, brace_open, exp, brace_close):
|
|
137 return AUnop('[]', exp)
|
318
|
138
|
195
|
139 def p_label(self, lname, cn):
|
318
|
140 lab = ALabel(lname.val)
|
195
|
141 self.emit(lab)
|
318
|
142
|
195
|
143 def p_binop(self, exp1, op, exp2):
|
|
144 return ABinop(op, exp1, exp2)
|
|
145
|
318
|
146 def parse(self, lexer, emitter):
|
218
|
147 self.emit = emitter
|
318
|
148 self.p.parse(lexer)
|
218
|
149
|
219
|
150 # Pre construct parser to save time:
|
218
|
151 asmParser = Parser()
|
219
|
152
|
218
|
153 class Assembler:
|
236
|
154 def __init__(self, target=None, stream=None):
|
218
|
155 self.target = target
|
236
|
156 self.stream = stream
|
218
|
157 self.restart()
|
|
158 self.p = asmParser
|
|
159
|
196
|
160 # Top level interface:
|
199
|
161 def restart(self):
|
236
|
162 self.stack = []
|
199
|
163
|
195
|
164 def emit(self, a):
|
196
|
165 """ Emit a parsed instruction """
|
236
|
166 self.stack.append(a)
|
196
|
167
|
194
|
168 def parse_line(self, line):
|
|
169 """ Parse line into asm AST """
|
318
|
170 tokens = Lexer(line)
|
218
|
171 self.p.parse(tokens, self.emit)
|
191
|
172
|
|
173 def assemble(self, asmsrc):
|
196
|
174 """ Assemble this source snippet """
|
|
175 for line in asmsrc.split('\n'):
|
|
176 self.assemble_line(line)
|
159
|
177
|
196
|
178 def assemble_line(self, line):
|
318
|
179 """
|
|
180 Assemble a single source line.
|
|
181 Do not take newlines into account
|
191
|
182 """
|
196
|
183 self.parse_line(line)
|
|
184 self.assemble_aast()
|
191
|
185
|
198
|
186 def assemble_aast(self):
|
191
|
187 """ Assemble a parsed asm line """
|
199
|
188 if not self.target:
|
|
189 raise CompilerError('Cannot assemble without target')
|
236
|
190 while self.stack:
|
|
191 vi = self.stack.pop(0)
|
203
|
192 if type(vi) is AInstruction:
|
236
|
193 mi = self.target.mapInstruction(vi)
|
|
194 elif type(vi) is ALabel:
|
|
195 mi = Label(vi.name)
|
|
196 else:
|
|
197 raise NotImplementedError('{}'.format(vi))
|
|
198 if self.stream:
|
|
199 self.stream.emit(mi)
|
191
|
200
|
196
|
201
|
|
202 if __name__ == '__main__':
|
|
203 # When run as main file, try to grab command line arguments:
|
|
204 parser = argparse.ArgumentParser(description="Assembler")
|
318
|
205 parser.add_argument('sourcefile', type=argparse.FileType('r'),
|
|
206 help='the source file to assemble')
|
196
|
207 args = parser.parse_args()
|
|
208 a = Assembler()
|
|
209 obj = a.assemble(args.sourcefile.read())
|