191
|
1 import re
|
|
2 import pyyacc
|
|
3 from ppci import Token, CompilerError, SourceLocation
|
196
|
4 import sys, argparse
|
|
5
|
159
|
6
|
|
7 # Different instruction sets:
|
|
8 class InstructionSet:
|
|
9 pass
|
|
10
|
|
11 class X86(InstructionSet):
|
|
12 pass
|
|
13
|
|
14 # Generic assembler:
|
|
15 keywords = ['global', 'db']
|
|
16
|
|
17 def tokenize(s):
|
|
18 """
|
|
19 Tokenizer, generates an iterator that
|
|
20 returns tokens!
|
|
21
|
|
22 This GREAT example was taken from python re doc page!
|
|
23 """
|
|
24 tok_spec = [
|
|
25 ('REAL', r'\d+\.\d+'),
|
|
26 ('HEXNUMBER', r'0x[\da-fA-F]+'),
|
|
27 ('NUMBER', r'\d+'),
|
|
28 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
|
|
29 ('SKIP', r'[ \t]'),
|
191
|
30 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
|
159
|
31 ('STRING', r"'.*?'")
|
|
32 ]
|
|
33 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
|
|
34 gettok = re.compile(tok_re).match
|
|
35 line = 1
|
|
36 pos = line_start = 0
|
|
37 mo = gettok(s)
|
|
38 while mo is not None:
|
|
39 typ = mo.lastgroup
|
|
40 val = mo.group(typ)
|
|
41 if typ == 'NEWLINE':
|
|
42 line_start = pos
|
|
43 line += 1
|
|
44 elif typ == 'COMMENTS':
|
|
45 pass
|
|
46 elif typ != 'SKIP':
|
|
47 if typ == 'ID':
|
|
48 if val in keywords:
|
|
49 typ = val
|
|
50 elif typ == 'LEESTEKEN':
|
|
51 typ = val
|
|
52 elif typ == 'NUMBER':
|
|
53 val = int(val)
|
|
54 elif typ == 'HEXNUMBER':
|
|
55 val = int(val[2:], 16)
|
|
56 typ = 'NUMBER'
|
|
57 elif typ == 'REAL':
|
|
58 val = float(val)
|
|
59 elif typ == 'STRING':
|
|
60 val = val[1:-1]
|
191
|
61 col = mo.start() - line_start
|
|
62 loc = SourceLocation(line, col, 0) # TODO retrieve length?
|
|
63 yield Token(typ, val, loc)
|
159
|
64 pos = mo.end()
|
|
65 mo = gettok(s, pos)
|
|
66 if pos != len(s):
|
|
67 col = pos - line_start
|
191
|
68 loc = SourceLocation(line, col, 0)
|
|
69 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
|
159
|
70
|
|
71 class Lexer:
|
|
72 def __init__(self, src):
|
|
73 self.tokens = tokenize(src)
|
|
74 self.curTok = self.tokens.__next__()
|
|
75 def eat(self):
|
|
76 t = self.curTok
|
|
77 self.curTok = self.tokens.__next__()
|
|
78 return t
|
|
79 @property
|
|
80 def Peak(self):
|
|
81 return self.curTok
|
|
82
|
195
|
83 class ANode:
|
|
84 def __eq__(self, other):
|
|
85 return self.__repr__() == other.__repr__()
|
|
86
|
|
87 class ALabel(ANode):
|
|
88 def __init__(self, name):
|
|
89 self.name = name
|
|
90 def __repr__(self):
|
|
91 return '{0}:'.format(self.name)
|
|
92
|
|
93 class AInstruction(ANode):
|
|
94 def __init__(self, opcode, operands):
|
|
95 self.opcode = opcode
|
|
96 self.operands = operands
|
|
97 def __repr__(self):
|
|
98 ops = ', '.join(map(str, self.operands))
|
|
99 return '{0} {1}'.format(self.opcode, ops)
|
|
100
|
|
101 class AExpression(ANode):
|
|
102 def __add__(self, other):
|
196
|
103 assert isinstance(other, AExpression)
|
195
|
104 return ABinop('+', self, other)
|
|
105 def __mul__(self, other):
|
196
|
106 assert isinstance(other, AExpression)
|
195
|
107 return ABinop('*', self, other)
|
194
|
108
|
195
|
109 class ABinop(AExpression):
|
|
110 def __init__(self, op, arg1, arg2):
|
|
111 self.op = op
|
|
112 self.arg1 = arg1
|
|
113 self.arg2 = arg2
|
|
114 def __repr__(self):
|
|
115 return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2)
|
|
116
|
|
117 class AUnop(AExpression):
|
|
118 def __init__(self, op, arg):
|
|
119 self.op = op
|
|
120 self.arg = arg
|
|
121 def __repr__(self):
|
|
122 return '{0} {1}'.format(self.op, self.arg)
|
|
123
|
|
124 class ASymbol(AExpression):
|
|
125 def __init__(self, name):
|
|
126 self.name = name
|
|
127 def __repr__(self):
|
|
128 return self.name
|
|
129
|
|
130 class ANumber(AExpression):
|
|
131 def __init__(self, n):
|
|
132 self.n = n
|
|
133 def __repr__(self):
|
|
134 return '{0}'.format(self.n)
|
|
135
|
|
136 class Assembler:
|
191
|
137 def __init__(self):
|
195
|
138 self.output = []
|
191
|
139 # Construct a parser given a grammar:
|
195
|
140 ident = lambda x: x # Identity helper function
|
|
141 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS])
|
|
142 g.add_production('asmline', ['label', 'instruction'])
|
|
143 g.add_production('asmline', ['instruction'])
|
|
144 g.add_production('asmline', ['label'])
|
196
|
145 g.add_production('asmline', [])
|
194
|
146 g.add_production('label', ['ID', ':'], self.p_label)
|
195
|
147 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
|
|
148 g.add_production('instruction', ['opcode'], self.p_ins_2)
|
|
149 g.add_production('opcode', ['ID'], ident)
|
|
150 g.add_production('operands', ['operand'], self.p_operands_1)
|
|
151 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
|
|
152 g.add_production('operand', ['expression'], ident)
|
|
153 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
|
|
154 g.add_production('expression', ['term'], ident)
|
|
155 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
|
|
156 g.add_production('addop', ['-'], ident)
|
|
157 g.add_production('addop', ['+'], ident)
|
|
158 g.add_production('mulop', ['*'], ident)
|
|
159 g.add_production('term', ['factor'], ident)
|
|
160 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
|
|
161 g.add_production('factor', ['ID'], self.p_symbol)
|
|
162 g.add_production('factor', ['NUMBER'], self.p_number)
|
191
|
163 g.start_symbol = 'asmline'
|
195
|
164 self.p = g.genParser()
|
159
|
165
|
195
|
166 # Parser handlers:
|
|
167 def p_ins_1(self, opc, ops):
|
|
168 ins = AInstruction(opc, ops)
|
|
169 self.emit(ins)
|
|
170 def p_ins_2(self, opc):
|
|
171 self.p_ins_1(opc, [])
|
|
172 def p_operands_1(self, op1):
|
|
173 return [op1]
|
|
174 def p_operands_2(self, ops, comma, op2):
|
|
175 assert type(ops) is list
|
|
176 ops.append(op2)
|
|
177 return ops
|
|
178 def p_mem_op(self, brace_open, exp, brace_close):
|
|
179 return AUnop('[]', exp)
|
|
180 def handle_ins(self, id0, operands):
|
|
181 ins = AInstruction(id0)
|
|
182 self.emit(ins)
|
|
183 def p_label(self, lname, cn):
|
|
184 lab = ALabel(lname)
|
|
185 self.emit(lab)
|
|
186 def p_binop(self, exp1, op, exp2):
|
|
187 return ABinop(op, exp1, exp2)
|
|
188 def p_symbol(self, name):
|
|
189 return ASymbol(name)
|
|
190 def p_number(self, n):
|
|
191 n = int(n)
|
|
192 return ANumber(n)
|
|
193
|
196
|
194 # Top level interface:
|
195
|
195 def emit(self, a):
|
196
|
196 """ Emit a parsed instruction """
|
195
|
197 self.output.append(a)
|
196
|
198 # Determine the bit pattern from a lookup table:
|
|
199 # TODO
|
|
200
|
195
|
201
|
194
|
202 def parse_line(self, line):
|
|
203 """ Parse line into asm AST """
|
|
204 tokens = tokenize(line)
|
|
205 self.p.parse(tokens)
|
191
|
206
|
|
207 def assemble(self, asmsrc):
|
196
|
208 """ Assemble this source snippet """
|
|
209 for line in asmsrc.split('\n'):
|
|
210 self.assemble_line(line)
|
|
211 self.back_patch()
|
159
|
212
|
196
|
213 def assemble_line(self, line):
|
191
|
214 """
|
|
215 Assemble a single source line.
|
|
216 Do not take newlines into account
|
|
217 """
|
196
|
218 self.parse_line(line)
|
|
219 self.assemble_aast()
|
191
|
220
|
194
|
221 def assemble_aast(self, at):
|
191
|
222 """ Assemble a parsed asm line """
|
|
223 pass
|
|
224
|
196
|
225 def back_patch(self):
|
|
226 """ Fix references to earlier labels """
|
|
227 pass
|
191
|
228
|
196
|
229
|
|
230 if __name__ == '__main__':
|
|
231 # When run as main file, try to grab command line arguments:
|
|
232 parser = argparse.ArgumentParser(description="Assembler")
|
|
233 parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble')
|
|
234 args = parser.parse_args()
|
|
235 a = Assembler()
|
|
236 obj = a.assemble(args.sourcefile.read())
|
|
237
|