1
|
1 """
|
|
2 Code generation for 64 bits intel processors
|
|
3 """
|
|
4
|
|
5 from .nodes import *
|
|
6 from .errors import Error
|
|
7 from .builtin import real, integer, boolean, char
|
|
8 from .assembler import *
|
|
9
|
|
10 class CodeGenerator:
|
|
11 def __init__(self):
|
|
12 self.strings = []
|
|
13 self.initialize()
|
|
14 def initialize(self):
|
|
15 # Register descriptors:
|
|
16 self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',')
|
|
17 self.usedregs = []
|
|
18 # Members to accumulate the result into:
|
|
19 # The result is an image of bytecode and global variable space.
|
|
20 # Global variables a referenced by RIP relative addressing.
|
|
21 self.image = []
|
|
22 self.rip = 0 # The current instruction pointer location.
|
|
23 # TODO: backpatch list here?
|
|
24
|
|
25 # Functions to modify the code image
|
|
26 def addCode(self, code):
|
|
27 assert(type(code) is list)
|
|
28 self.image += code
|
|
29 self.rip += len(code)
|
|
30 def fixCode(self, position, code):
|
|
31 self.image[position:position+len(code)] = code
|
|
32 def align(self, b):
|
|
33 while (self.rip % b) != 0:
|
|
34 self.addCode([0])
|
|
35
|
|
36 def saveAllRegisters(self):
|
|
37 regs = list(self.usedregs.keys())
|
|
38 for reg in regs:
|
|
39 code += self.saveRegister(reg)
|
|
40
|
|
41 def saveRegister(self, reg):
|
|
42 code = []
|
|
43 if reg in self.usedregs.keys():
|
|
44 code.append('mov {0}, {1}'.format(self.usedregs[reg], reg))
|
|
45 del self.usedregs[reg]
|
|
46 self.freeregs.append(reg)
|
|
47
|
|
48 def getreg(self, node):
|
|
49 """ acquire a working register for a certain node."""
|
|
50 # Temporary register bypass action:
|
|
51 if len(self.freeregs) > 0:
|
|
52 reg = self.freeregs.pop(0)
|
|
53 self.usedregs.append(reg)
|
|
54 else:
|
|
55 Error('No more free regs')
|
|
56 node.reg = reg
|
|
57
|
|
58 def freereg(self, node):
|
|
59 reg = node.reg
|
|
60 node.reg = None
|
|
61 self.freeregs.append(reg)
|
|
62 self.usedregs.remove(reg)
|
|
63
|
|
64 # Helpers to load and retrieve designated objects:
|
|
65 def storeRegInDesignator(self, reg, designator):
|
|
66 assert(type(reg) is str)
|
|
67 assert(type(designator) is Designator)
|
|
68 if len(designator.selectors) > 0:
|
|
69 self.gencode( designator ) # Load the pointer into some register
|
|
70 self.addCode( mov([designator.reg, 0x0], reg) )
|
|
71 self.freereg( designator )
|
|
72 else:
|
|
73 if designator.obj.isLocal:
|
|
74 # Relative from rbp register
|
|
75 mem = ['rbp', designator.obj.offset]
|
|
76 self.addCode( mov(mem, reg) )
|
|
77 else:
|
|
78 # Relative from RIP after move
|
|
79 self.addCode( mov(['RIP', 0x0], reg) )
|
|
80 self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) )
|
|
81
|
|
82 # Code generation functions:
|
|
83 def genexprcode(self, node):
|
|
84 """
|
|
85 Generate code for expressions!
|
|
86 Recursively evaluates, and ensures a register contains the answer.
|
|
87 register is an integer register or a floating point reg
|
|
88 """
|
|
89 if isinstance(node, Binop):
|
|
90 """ Handle a binary operation (two arguments) of some kind """
|
|
91 self.genexprcode(node.a)
|
|
92 self.genexprcode(node.b)
|
|
93
|
|
94 if node.op == 'mod':
|
|
95 assert(node.typ.isType(integer))
|
|
96 self.addCode(mov('rax', node.a.reg))
|
|
97 self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
|
|
98 self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
|
|
99 node.reg = node.a.reg
|
|
100 self.freereg(node.b) # give up register that contains b
|
|
101 self.addCode(mov(node.reg, 'rdx')) # move remainder into result
|
|
102 elif node.op == 'div':
|
|
103 assert(node.typ.isType(integer))
|
|
104 self.addCode(mov('rax', node.a.reg))
|
|
105 self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
|
|
106 self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
|
|
107 node.reg = node.a.reg
|
|
108 self.freereg(node.b) # give up register that contains b
|
|
109 self.addCode(mov(node.reg, 'rax')) # move result into reg
|
|
110 elif node.op == '*':
|
|
111 if node.typ.isType(integer):
|
|
112 self.addCode(imulreg64(node.a.reg, node.b.reg))
|
|
113 node.reg = node.a.reg
|
|
114 self.freereg(node.b)
|
|
115 else:
|
|
116 Error('{0} for * not implemented'.format(node.typ))
|
|
117 elif node.op == '+':
|
|
118 if node.typ.isType(integer):
|
|
119 self.addCode(addreg64(node.a.reg, node.b.reg))
|
|
120 node.reg = node.a.reg
|
|
121 self.freereg(node.b)
|
|
122 else:
|
|
123 Error('{0} for + not implemented'.format(node.typ))
|
|
124 elif node.op == '-':
|
|
125 if node.typ.isType(integer):
|
|
126 self.addCode(subreg64(node.a.reg, node.b.reg))
|
|
127 node.reg = node.a.reg
|
|
128 self.freereg(node.b)
|
|
129 else:
|
|
130 Error('{0} for - not implemented'.format(node.typ))
|
|
131 else:
|
|
132 Error('Unknown Binop {0}'.format(node.op))
|
|
133
|
|
134 elif type(node) is Unop:
|
|
135 if node.op == 'INTTOREAL':
|
|
136 self.genexprcode(node.a)
|
|
137 node.reg = node.a.reg
|
|
138 # TODO use 'FILD' instruction
|
|
139 freg = 12
|
|
140 code.append('Unop inttoreal TODO')
|
|
141 elif node.op == 'ABS':
|
|
142 if isType(node.typ, real):
|
|
143 code = [0xD9, 0xE1] # st(0) = fabs st(0)
|
|
144 Error('ABS error integer')
|
|
145 elif isType(node.typ, integer):
|
|
146 code = []
|
|
147 Error('ABS error integer')
|
|
148 else:
|
|
149 Error('ABS error')
|
|
150 else:
|
|
151 Error('Unknown Unop {0}'.format(node.op))
|
|
152
|
|
153 elif isinstance(node, Designator):
|
|
154 # dereference, array index. Make sure that the result comes into a register
|
|
155 if len(node.selectors) > 0:
|
|
156 self.gencode(node) # Load the pointer into some register
|
|
157
|
|
158 # Now we can access the object at location '[node.reg]':
|
|
159 if node.typ.isType(integer):
|
|
160 self.addCode( mov(node.reg, [node.reg, 0x0]) )
|
|
161 else:
|
|
162 Error('Only integer types implemented')
|
|
163 else:
|
|
164 # No selectors, load variable directly
|
|
165 if node.obj.typ.isType(integer):
|
|
166 if type(node.obj) is Constant:
|
|
167 self.genexprcode(node.obj)
|
|
168 node.reg = node.obj.reg
|
|
169 else:
|
|
170 self.getreg(node)
|
|
171 # Get a register to store the integer value
|
|
172 if node.obj.isLocal:
|
|
173 # relative to rbp:
|
|
174 self.addCode( mov(node.reg, ['rbp', node.obj.offset]) )
|
|
175 else:
|
|
176 self.addCode(mov(node.reg, ['RIP', 0x0]))
|
|
177 self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip))
|
|
178 else:
|
|
179 Error('Cannot load variable type {0}'.format(node.typ))
|
|
180
|
|
181 elif isinstance(node, Relop):
|
|
182 # Create a boolean from operands
|
|
183 # TODO create an alternative for expressions used as conditions.
|
|
184 self.genexprcode(node.a)
|
|
185 self.genexprcode(node.b)
|
|
186
|
|
187 if node.a.typ.isType(integer):
|
|
188 instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'}
|
|
189 if not node.relop in instructions.keys():
|
|
190 Error('Unimplemented relop: '+str(node.relop))
|
|
191 instr = instructions[node.relop]
|
|
192
|
|
193 node.reg = node.a.reg
|
|
194 self.addCode( cmpreg64(node.a.reg, node.b.reg) )
|
|
195 self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp
|
|
196 fixloc1 = self.rip - 1
|
|
197 rip1 = self.rip
|
|
198 self.addCode( xorreg64(node.reg, node.reg) )
|
|
199 self.addCode( shortjump(0x0) ) # Jump over 1 code
|
|
200 fixloc2 = self.rip - 1
|
|
201 self.fixCode(fixloc1, imm8(self.rip - rip1))
|
|
202 rip2 = self.rip
|
|
203 self.addCode( xorreg64(node.reg, node.reg) )
|
|
204 self.addCode( increg64(node.reg) )
|
|
205 self.fixCode(fixloc2, imm8(self.rip - rip2))
|
|
206
|
|
207 self.freereg(node.b)
|
|
208 else:
|
|
209 Error('Relop not implemented for {0}'.format(node.a.typ))
|
|
210
|
|
211 elif type(node) is Constant:
|
|
212 if node.typ.isType(integer):
|
|
213 self.getreg(node)
|
|
214 self.addCode(mov(node.reg, node.value))
|
|
215 elif node.typ.isType(real):
|
|
216 code += self.getreg(node)
|
|
217 Error('TODO: get real reg')
|
|
218 # TODO: get a fixed point reg, and load the variable in there
|
|
219 else:
|
|
220 Error('Howto generate code for {0}?'.format(node))
|
|
221
|
|
222 elif type(node) is ProcedureCall:
|
|
223 if type(node.proc.obj) is BuiltinProcedure:
|
|
224 # Handle builtin procedures different, these not always call
|
|
225 # a function, but generate code.
|
|
226 bi = node.proc.obj
|
|
227 if bi.name == 'chr':
|
|
228 arg = node.args[0]
|
|
229 self.genexprcode(arg)
|
|
230 # Store character in full width register:
|
|
231 # TODO: store in char only register
|
|
232 node.reg = arg.reg
|
|
233 else:
|
|
234 Error('Unknown builtin function {0}'.format(bi.name))
|
|
235 else:
|
|
236 # Use generic procedure call first
|
|
237 self.gencode(node)
|
|
238 # Retrieve result:
|
|
239 if node.typ.isType(integer):
|
|
240 # Store result!
|
|
241 self.getreg(node)
|
|
242 self.addCode( mov(node.reg, 'rax') )
|
|
243 else:
|
|
244 Error('Return type not supported {0}'.format(node.typ))
|
|
245 else:
|
|
246 Error('Cannot generate expression code for: {0}'.format(node))
|
|
247
|
|
248 def gencode(self, node):
|
|
249 """ Code generation function for AST nodes """
|
|
250 if isinstance(node, Module):
|
|
251 # for all imports make a list of pointer to the actual procedures:
|
|
252 for imp in node.imports:
|
|
253 imp.offset = self.rip
|
|
254 self.addCode( [0x0]*8 )
|
|
255 # global variable storage allocation
|
|
256 variables = node.symtable.getAllLocal(Variable)
|
|
257 for var in variables:
|
|
258 var.isLocal = False
|
|
259 var.offset = self.rip
|
|
260 self.addCode( [0x00] * var.typ.size ) # TODO initial values here?
|
|
261 self.align(8)
|
|
262 # TODO: mark end of data and start of code inside image
|
|
263 # TODO: round data to page size to enable protection by loader.
|
|
264 # Procedure code generation:
|
|
265 procedures = node.symtable.getAllLocal(Procedure)
|
|
266 node.procs = procedures
|
|
267 for proc in procedures:
|
|
268 self.gencode(proc)
|
|
269 # Module init code:
|
|
270 node.initcodeentry = self.rip
|
|
271 self.gencode(node.initcode)
|
|
272 self.addCode( ret() )
|
|
273 # TODO: how to return from module init code? far return??
|
|
274
|
|
275 elif type(node) is Procedure:
|
|
276 # calculate offsets for local variables and parameters
|
|
277 # Variable location relative to 'rbp' register
|
|
278 variables = node.symtable.getAllLocal(Variable)
|
|
279 offset = 0
|
|
280 paramoffset = 16
|
|
281 for var in variables:
|
|
282 var.isLocal = True
|
|
283 if not var.isParameter:
|
|
284 offset += var.typ.size
|
|
285 # Offset is negative of rbp in stack frame
|
|
286 var.offset = -offset
|
|
287 node.framesize = offset
|
|
288 # Calculate offsets of parameters relative to rbp register
|
|
289 for par in reversed(node.typ.parameters):
|
|
290 pvar = node.symtable.getLocal(Variable, par.name)
|
|
291 pvar.offset = paramoffset
|
|
292 paramoffset += pvar.typ.size
|
|
293
|
|
294 # code generation
|
|
295 node.entrypoint = self.rip
|
|
296 self.addCode(push('rbp'))
|
|
297 self.addCode(mov('rbp', 'rsp')) # Setup the base pointer
|
|
298 self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals
|
|
299 self.gencode(node.block)
|
|
300 if node.retexpr:
|
|
301 if node.retexpr.typ.isType(integer):
|
|
302 self.genexprcode(node.retexpr)
|
|
303 self.addCode( mov('rax', node.retexpr.reg) )
|
|
304 self.freereg(node.retexpr)
|
|
305 else:
|
|
306 Error('Cannot return this kind yet {0}'.format(node.retexpr.typ))
|
|
307 self.addCode( addreg64('rsp', node.framesize) )
|
|
308 self.addCode( pop('rbp') )
|
|
309 self.addCode( ret() )
|
|
310 assert(len(self.usedregs) == 0)
|
|
311
|
|
312 elif isinstance(node, StatementSequence):
|
|
313 for s in node.statements:
|
|
314 self.gencode(s)
|
|
315 assert(len(self.usedregs) == 0)
|
|
316
|
|
317 elif type(node) is ProcedureCall:
|
|
318 # Prepare parameters on the stack:
|
|
319 stacksize = 0
|
|
320 assert(len(node.args) == len(node.proc.typ.parameters))
|
|
321 for arg, param in zip(node.args, node.proc.typ.parameters):
|
|
322
|
|
323 if param.kind == 'value':
|
|
324 self.genexprcode(arg)
|
|
325 self.addCode( push(arg.reg) )
|
|
326 self.freereg( arg )
|
|
327 stacksize += 8
|
|
328 else:
|
|
329 Error('Parameter kind other than value')
|
|
330
|
|
331 # Calculate address using designator
|
|
332 if type(node.proc.obj) is Procedure:
|
|
333 self.addCode( call(0x0) )
|
|
334 self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip))
|
|
335 elif type(node.proc.obj) is ImportedSymbol:
|
|
336 # Load the entry point of the import table
|
|
337 self.getreg(node.proc.obj)
|
|
338 # Load the address of the procedure:
|
|
339 self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) )
|
|
340 self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) )
|
|
341 # Call to the address in register:
|
|
342 self.addCode( call(node.proc.obj.reg) )
|
|
343 # Free register that holds the address of the object
|
|
344 self.freereg( node.proc.obj )
|
|
345 elif type(node.proc.obj) is BuiltinProcedure:
|
|
346 if node.proc.obj.name == 'chr':
|
|
347 print('int to char')
|
|
348 else:
|
|
349 Error('Unknown builtin function {0}'.format(node.proc.obj.name))
|
|
350 else:
|
|
351 Error('Cannot call designator of type {0}'.format(node.proc.obj))
|
|
352
|
|
353 # Restore stack (pop all arguments of):
|
|
354 self.addCode(addreg64('rsp', stacksize))
|
|
355
|
|
356 elif type(node) is Assignment:
|
|
357 if node.lval.typ.isType(integer):
|
|
358 # TODO if node.rval is Constant of some datatype, move it to mem directly
|
|
359 self.genexprcode(node.rval) # Calculate the value that has to be stored.
|
|
360 self.storeRegInDesignator(node.rval.reg, node.lval)
|
|
361 self.freereg(node.rval)
|
|
362 else:
|
|
363 Error('Assignments of other types not implemented')
|
|
364 # TODO if left and right are designators, do some sort of memcpy.
|
|
365
|
|
366 elif type(node) is IfStatement:
|
|
367 self.genexprcode(node.condition)
|
|
368 self.addCode( cmpreg64(node.condition.reg, 1) )
|
|
369 self.freereg(node.condition)
|
|
370 if node.falsestatement:
|
|
371 # If with else clause
|
|
372 self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
|
|
373 rip1 = self.rip
|
|
374 fixloc1 = self.rip - 4
|
|
375 self.gencode(node.truestatement)
|
|
376 self.addCode( nearjump( 0x0 ) ) # jump over false code
|
|
377 fixloc2 = self.rip - 4
|
|
378 self.fixCode(fixloc1, imm32(self.rip - rip1))
|
|
379 rip2 = self.rip
|
|
380 self.gencode(node.falsestatement)
|
|
381 self.fixCode(fixloc2, imm32(self.rip - rip2))
|
|
382 else:
|
|
383 # If without else clause
|
|
384 self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
|
|
385 rip1 = self.rip
|
|
386 fixloc1 = self.rip - 4
|
|
387 self.gencode(node.truestatement)
|
|
388 self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code.
|
|
389
|
|
390 elif isinstance(node, WhileStatement):
|
|
391 rip1 = self.rip # Store the start of the while loop
|
|
392 self.genexprcode(node.condition)
|
|
393 self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness
|
|
394 self.freereg(node.condition)
|
|
395 self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later)
|
|
396 fixloc1 = self.rip - 4
|
|
397 rip2 = self.rip
|
|
398 self.gencode(node.dostatements)
|
|
399 self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below
|
|
400 fixloc2 = self.rip - 4
|
|
401 rip3 = self.rip # end of while loop
|
|
402 self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop
|
|
403 self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop
|
|
404
|
|
405 elif type(node) is ForStatement:
|
|
406 # Initial load of iterator variable:
|
|
407 self.genexprcode(node.begin)
|
4
|
408 self.genexprcode(node.end)
|
|
409 # TODO: link reg with variable so that a register is used instead of a variable
|
|
410 iterreg = node.begin.reg # Get the register used for the loop
|
|
411 #self.addCode(cmpreg64(iterreg, node.endvalue))
|
1
|
412 rip1 = self.rip
|
|
413 self.gencode(node.statements)
|
|
414 #self.loadDesignatorInReg(node.
|
|
415 #self.addCode( addreg64(node.variable, node.increment) )
|
4
|
416 self.addCode(nearjump(0x0))
|
|
417 fixloc1 = self.rip - 4
|
|
418 rip2 = self.rip
|
|
419 self.fixCode(fixloc1, imm32(rip1 - rip2))
|
|
420
|
|
421 self.freereg(node.begin) # Release register used in loop
|
|
422 self.freereg(node.end)
|
1
|
423 Error('No implementation of FOR statement')
|
|
424
|
|
425 elif type(node) is AsmCode:
|
|
426 def processOperand(op):
|
|
427 if type(op) is list:
|
|
428 if type(op[0]) is Variable:
|
|
429 var = op[0]
|
|
430 if var.isLocal:
|
|
431 return ['rbp', var.offset]
|
|
432 else:
|
|
433 Error('Can only use local variables in inline assembler')
|
|
434 return op
|
|
435 for asmline in node.asmcode:
|
|
436 opcode, operands = asmline
|
|
437 operands = [processOperand(opx) for opx in operands]
|
|
438 print('assembling', opcode, *operands)
|
|
439 func,nargs = opcodes[opcode]
|
|
440 code = func(*operands)
|
|
441 self.addCode(code)
|
|
442
|
|
443 elif isinstance(node, EmptyStatement):
|
|
444 pass
|
|
445
|
|
446
|
|
447 elif type(node) is StringConstant:
|
|
448 self.strings.append(node)
|
|
449 self.data.append(node.value) # Add string to the data section
|
|
450
|
|
451 elif type(node) is Designator:
|
|
452 if len(node.selectors) > 0:
|
|
453 self.getreg(node)
|
|
454 # Load starting address
|
|
455 if node.obj.isLocal:
|
|
456 self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) )
|
|
457 else:
|
|
458 # Global variables need to be relocated...
|
|
459 self.addCode(leareg64(node.reg, ['RIP', 0]))
|
|
460 self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip))
|
|
461 # Loop over all designators..
|
|
462 for selector in node.selectors:
|
|
463 if type(selector) is Index:
|
|
464 # Deref an array index
|
|
465 self.genexprcode(selector.index)
|
|
466 self.getreg(selector)
|
|
467 self.addCode( mov(selector.reg, selector.typ.elementType.size) )
|
|
468 self.addCode( imulreg64(selector.reg, selector.index.reg ) )
|
|
469 self.freereg(selector.index)
|
|
470 self.addCode(addreg64(node.reg, selector.reg))
|
|
471 self.freereg(selector)
|
|
472 elif type(selector) is Field:
|
|
473 print('Field')
|
|
474 Error('Field not implemented')
|
|
475 else:
|
|
476 Error('Unknown selector')
|
|
477 else:
|
|
478 Error('Can only gencode for designator with selectors')
|
|
479
|
|
480 else:
|
|
481 print('not generating code for {0}'.format(node))
|
|
482
|
|
483 def generatecode(self, ast):
|
|
484 """ code generation front end """
|
|
485 self.initialize()
|
|
486 self.gencode(ast)
|
|
487 ast.image = self.image
|
|
488
|