comparison python/ppci/backends/codegenerator.py @ 96:a350055d6119

movage
author windel
date Mon, 24 Dec 2012 13:30:12 +0100
parents python/libs/compiler/backends/codegenerator.py@60cc36ef5a50
children
comparison
equal deleted inserted replaced
95:4a37d6992bd3 96:a350055d6119
1 """
2 Code generation for 64 bits intel processors
3 """
4
5 from .nodes import *
6 from .errors import Error
7 from .builtin import real, integer, boolean, char
8 from .assembler import *
9
10 class CodeGenerator:
11 def __init__(self):
12 self.strings = []
13 self.initialize()
14 def initialize(self):
15 # Register descriptors:
16 self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',')
17 self.usedregs = []
18 # Members to accumulate the result into:
19 # The result is an image of bytecode and global variable space.
20 # Global variables a referenced by RIP relative addressing.
21 self.image = []
22 self.rip = 0 # The current instruction pointer location.
23 # TODO: backpatch list here?
24
25 # Functions to modify the code image
26 def addCode(self, code):
27 assert(type(code) is list)
28 self.image += code
29 self.rip += len(code)
30 def fixCode(self, position, code):
31 self.image[position:position+len(code)] = code
32 def align(self, b):
33 while (self.rip % b) != 0:
34 self.addCode([0])
35
36 def saveAllRegisters(self):
37 regs = list(self.usedregs.keys())
38 for reg in regs:
39 code += self.saveRegister(reg)
40
41 def saveRegister(self, reg):
42 code = []
43 if reg in self.usedregs.keys():
44 code.append('mov {0}, {1}'.format(self.usedregs[reg], reg))
45 del self.usedregs[reg]
46 self.freeregs.append(reg)
47
48 def getreg(self, node):
49 """ acquire a working register for a certain node."""
50 # Temporary register bypass action:
51 if len(self.freeregs) > 0:
52 reg = self.freeregs.pop(0)
53 self.usedregs.append(reg)
54 else:
55 Error('No more free regs')
56 node.reg = reg
57
58 def freereg(self, node):
59 reg = node.reg
60 node.reg = None
61 self.freeregs.append(reg)
62 self.usedregs.remove(reg)
63
64 # Helpers to load and retrieve designated objects:
65 def storeRegInDesignator(self, reg, designator):
66 assert(type(reg) is str)
67 assert(type(designator) is Designator)
68 if len(designator.selectors) > 0:
69 self.gencode( designator ) # Load the pointer into some register
70 self.addCode( mov([designator.reg, 0x0], reg) )
71 self.freereg( designator )
72 else:
73 if designator.obj.isLocal:
74 # Relative from rbp register
75 mem = ['rbp', designator.obj.offset]
76 self.addCode( mov(mem, reg) )
77 else:
78 # Relative from RIP after move
79 self.addCode( mov(['RIP', 0x0], reg) )
80 self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) )
81
82 # Code generation functions:
83 def genexprcode(self, node):
84 """
85 Generate code for expressions!
86 Recursively evaluates, and ensures a register contains the answer.
87 register is an integer register or a floating point reg
88 """
89 if isinstance(node, Binop):
90 """ Handle a binary operation (two arguments) of some kind """
91 self.genexprcode(node.a)
92 self.genexprcode(node.b)
93
94 if node.op == 'mod':
95 assert(node.typ.isType(integer))
96 self.addCode(mov('rax', node.a.reg))
97 self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
98 self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
99 node.reg = node.a.reg
100 self.freereg(node.b) # give up register that contains b
101 self.addCode(mov(node.reg, 'rdx')) # move remainder into result
102 elif node.op == 'div':
103 assert(node.typ.isType(integer))
104 self.addCode(mov('rax', node.a.reg))
105 self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
106 self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
107 node.reg = node.a.reg
108 self.freereg(node.b) # give up register that contains b
109 self.addCode(mov(node.reg, 'rax')) # move result into reg
110 elif node.op == '*':
111 if node.typ.isType(integer):
112 self.addCode(imulreg64(node.a.reg, node.b.reg))
113 node.reg = node.a.reg
114 self.freereg(node.b)
115 else:
116 Error('{0} for * not implemented'.format(node.typ))
117 elif node.op == '+':
118 if node.typ.isType(integer):
119 self.addCode(addreg64(node.a.reg, node.b.reg))
120 node.reg = node.a.reg
121 self.freereg(node.b)
122 else:
123 Error('{0} for + not implemented'.format(node.typ))
124 elif node.op == '-':
125 if node.typ.isType(integer):
126 self.addCode(subreg64(node.a.reg, node.b.reg))
127 node.reg = node.a.reg
128 self.freereg(node.b)
129 else:
130 Error('{0} for - not implemented'.format(node.typ))
131 else:
132 Error('Unknown Binop {0}'.format(node.op))
133
134 elif type(node) is Unop:
135 if node.op == 'INTTOREAL':
136 self.genexprcode(node.a)
137 node.reg = node.a.reg
138 # TODO use 'FILD' instruction
139 freg = 12
140 code.append('Unop inttoreal TODO')
141 elif node.op == 'ABS':
142 if isType(node.typ, real):
143 code = [0xD9, 0xE1] # st(0) = fabs st(0)
144 Error('ABS error integer')
145 elif isType(node.typ, integer):
146 code = []
147 Error('ABS error integer')
148 else:
149 Error('ABS error')
150 else:
151 Error('Unknown Unop {0}'.format(node.op))
152
153 elif isinstance(node, Designator):
154 # dereference, array index. Make sure that the result comes into a register
155 if len(node.selectors) > 0:
156 self.gencode(node) # Load the pointer into some register
157
158 # Now we can access the object at location '[node.reg]':
159 if node.typ.isType(integer):
160 self.addCode( mov(node.reg, [node.reg, 0x0]) )
161 else:
162 Error('Only integer types implemented')
163 else:
164 # No selectors, load variable directly
165 if node.obj.typ.isType(integer):
166 if type(node.obj) is Constant:
167 self.genexprcode(node.obj)
168 node.reg = node.obj.reg
169 else:
170 self.getreg(node)
171 # Get a register to store the integer value
172 if node.obj.isLocal:
173 # relative to rbp:
174 self.addCode( mov(node.reg, ['rbp', node.obj.offset]) )
175 else:
176 self.addCode(mov(node.reg, ['RIP', 0x0]))
177 self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip))
178 else:
179 Error('Cannot load variable type {0}'.format(node.typ))
180
181 elif isinstance(node, Relop):
182 # Create a boolean from operands
183 # TODO create an alternative for expressions used as conditions.
184 self.genexprcode(node.a)
185 self.genexprcode(node.b)
186
187 if node.a.typ.isType(integer):
188 instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'}
189 if not node.relop in instructions.keys():
190 Error('Unimplemented relop: '+str(node.relop))
191 instr = instructions[node.relop]
192
193 node.reg = node.a.reg
194 self.addCode( cmpreg64(node.a.reg, node.b.reg) )
195 self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp
196 fixloc1 = self.rip - 1
197 rip1 = self.rip
198 self.addCode( xorreg64(node.reg, node.reg) )
199 self.addCode( shortjump(0x0) ) # Jump over 1 code
200 fixloc2 = self.rip - 1
201 self.fixCode(fixloc1, imm8(self.rip - rip1))
202 rip2 = self.rip
203 self.addCode( xorreg64(node.reg, node.reg) )
204 self.addCode( increg64(node.reg) )
205 self.fixCode(fixloc2, imm8(self.rip - rip2))
206
207 self.freereg(node.b)
208 else:
209 Error('Relop not implemented for {0}'.format(node.a.typ))
210
211 elif type(node) is Constant:
212 if node.typ.isType(integer):
213 self.getreg(node)
214 self.addCode(mov(node.reg, node.value))
215 elif node.typ.isType(real):
216 code += self.getreg(node)
217 Error('TODO: get real reg')
218 # TODO: get a fixed point reg, and load the variable in there
219 else:
220 Error('Howto generate code for {0}?'.format(node))
221
222 elif type(node) is ProcedureCall:
223 if type(node.proc.obj) is BuiltinProcedure:
224 # Handle builtin procedures different, these not always call
225 # a function, but generate code.
226 bi = node.proc.obj
227 if bi.name == 'chr':
228 arg = node.args[0]
229 self.genexprcode(arg)
230 # Store character in full width register:
231 # TODO: store in char only register
232 node.reg = arg.reg
233 else:
234 Error('Unknown builtin function {0}'.format(bi.name))
235 else:
236 # Use generic procedure call first
237 self.gencode(node)
238 # Retrieve result:
239 if node.typ.isType(integer):
240 # Store result!
241 self.getreg(node)
242 self.addCode( mov(node.reg, 'rax') )
243 else:
244 Error('Return type not supported {0}'.format(node.typ))
245 else:
246 Error('Cannot generate expression code for: {0}'.format(node))
247
248 def gencode(self, node):
249 """ Code generation function for AST nodes """
250 if isinstance(node, Module):
251 # for all imports make a list of pointer to the actual procedures:
252 for imp in node.imports:
253 imp.offset = self.rip
254 self.addCode( [0x0]*8 )
255 # global variable storage allocation
256 variables = node.symtable.getAllLocal(Variable)
257 for var in variables:
258 var.isLocal = False
259 var.offset = self.rip
260 self.addCode( [0x00] * var.typ.size ) # TODO initial values here?
261 self.align(8)
262 # TODO: mark end of data and start of code inside image
263 # TODO: round data to page size to enable protection by loader.
264 # Procedure code generation:
265 procedures = node.symtable.getAllLocal(Procedure)
266 node.procs = procedures
267 for proc in procedures:
268 self.gencode(proc)
269 # Module init code:
270 node.initcodeentry = self.rip
271 self.gencode(node.initcode)
272 self.addCode( ret() )
273 # TODO: how to return from module init code? far return??
274
275 elif type(node) is Procedure:
276 # calculate offsets for local variables and parameters
277 # Variable location relative to 'rbp' register
278 variables = node.symtable.getAllLocal(Variable)
279 offset = 0
280 paramoffset = 16
281 for var in variables:
282 var.isLocal = True
283 if not var.isParameter:
284 offset += var.typ.size
285 # Offset is negative of rbp in stack frame
286 var.offset = -offset
287 node.framesize = offset
288 # Calculate offsets of parameters relative to rbp register
289 for par in reversed(node.typ.parameters):
290 pvar = node.symtable.getLocal(Variable, par.name)
291 pvar.offset = paramoffset
292 paramoffset += pvar.typ.size
293
294 # code generation
295 node.entrypoint = self.rip
296 self.addCode(push('rbp'))
297 self.addCode(mov('rbp', 'rsp')) # Setup the base pointer
298 self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals
299 self.gencode(node.block)
300 if node.retexpr:
301 if node.retexpr.typ.isType(integer):
302 self.genexprcode(node.retexpr)
303 self.addCode( mov('rax', node.retexpr.reg) )
304 self.freereg(node.retexpr)
305 else:
306 Error('Cannot return this kind yet {0}'.format(node.retexpr.typ))
307 self.addCode( addreg64('rsp', node.framesize) )
308 self.addCode( pop('rbp') )
309 self.addCode( ret() )
310 assert(len(self.usedregs) == 0)
311
312 elif isinstance(node, StatementSequence):
313 for s in node.statements:
314 self.gencode(s)
315
316 elif type(node) is ProcedureCall:
317 # Prepare parameters on the stack:
318 stacksize = 0
319 assert(len(node.args) == len(node.proc.typ.parameters))
320 for arg, param in zip(node.args, node.proc.typ.parameters):
321
322 if param.kind == 'value':
323 self.genexprcode(arg)
324 self.addCode( push(arg.reg) )
325 self.freereg( arg )
326 stacksize += 8
327 else:
328 Error('Parameter kind other than value')
329
330 # Calculate address using designator
331 if type(node.proc.obj) is Procedure:
332 self.addCode( call(0x0) )
333 self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip))
334 elif type(node.proc.obj) is ImportedSymbol:
335 # Load the entry point of the import table
336 self.getreg(node.proc.obj)
337 # Load the address of the procedure:
338 self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) )
339 self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) )
340 # Call to the address in register:
341 self.addCode( call(node.proc.obj.reg) )
342 # Free register that holds the address of the object
343 self.freereg( node.proc.obj )
344 elif type(node.proc.obj) is BuiltinProcedure:
345 if node.proc.obj.name == 'chr':
346 print('int to char')
347 else:
348 Error('Unknown builtin function {0}'.format(node.proc.obj.name))
349 else:
350 Error('Cannot call designator of type {0}'.format(node.proc.obj))
351
352 # Restore stack (pop all arguments of):
353 self.addCode(addreg64('rsp', stacksize))
354
355 elif type(node) is Assignment:
356 if node.lval.typ.isType(integer):
357 # TODO if node.rval is Constant of some datatype, move it to mem directly
358 self.genexprcode(node.rval) # Calculate the value that has to be stored.
359 self.storeRegInDesignator(node.rval.reg, node.lval)
360 self.freereg(node.rval)
361 else:
362 Error('Assignments of other types not implemented')
363 # TODO if left and right are designators, do some sort of memcpy.
364
365 elif type(node) is IfStatement:
366 self.genexprcode(node.condition)
367 self.addCode( cmpreg64(node.condition.reg, 1) )
368 self.freereg(node.condition)
369 if node.falsestatement:
370 # If with else clause
371 self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
372 rip1 = self.rip
373 fixloc1 = self.rip - 4
374 self.gencode(node.truestatement)
375 self.addCode( nearjump( 0x0 ) ) # jump over false code
376 fixloc2 = self.rip - 4
377 self.fixCode(fixloc1, imm32(self.rip - rip1))
378 rip2 = self.rip
379 self.gencode(node.falsestatement)
380 self.fixCode(fixloc2, imm32(self.rip - rip2))
381 else:
382 # If without else clause
383 self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
384 rip1 = self.rip
385 fixloc1 = self.rip - 4
386 self.gencode(node.truestatement)
387 self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code.
388
389 elif isinstance(node, WhileStatement):
390 rip1 = self.rip # Store the start of the while loop
391 self.genexprcode(node.condition)
392 self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness
393 self.freereg(node.condition)
394 self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later)
395 fixloc1 = self.rip - 4
396 rip2 = self.rip
397 self.gencode(node.dostatements)
398 self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below
399 fixloc2 = self.rip - 4
400 rip3 = self.rip # end of while loop
401 self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop
402 self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop
403
404 elif type(node) is ForStatement:
405 # Initial load of iterator variable:
406 self.genexprcode(node.begin)
407 self.genexprcode(node.end)
408 # TODO: link reg with variable so that a register is used instead of a variable
409 iterreg = node.begin.reg # Get the register used for the loop
410 #self.addCode(cmpreg64(iterreg, node.endvalue))
411 rip1 = self.rip
412 self.gencode(node.statements)
413 #self.loadDesignatorInReg(node.
414 #self.addCode( addreg64(node.variable, node.increment) )
415 self.addCode(nearjump(0x0))
416 fixloc1 = self.rip - 4
417 rip2 = self.rip
418 self.fixCode(fixloc1, imm32(rip1 - rip2))
419
420 self.freereg(node.begin) # Release register used in loop
421 self.freereg(node.end)
422 Error('No implementation of FOR statement')
423
424 elif type(node) is AsmCode:
425 def processOperand(op):
426 if type(op) is list:
427 if type(op[0]) is Variable:
428 var = op[0]
429 if var.isLocal:
430 return ['rbp', var.offset]
431 else:
432 Error('Can only use local variables in inline assembler')
433 return op
434 for asmline in node.asmcode:
435 opcode, operands = asmline
436 operands = [processOperand(opx) for opx in operands]
437 print('assembling', opcode, *operands)
438 func,nargs = opcodes[opcode]
439 code = func(*operands)
440 self.addCode(code)
441
442 elif isinstance(node, EmptyStatement):
443 pass
444
445
446 elif type(node) is StringConstant:
447 self.strings.append(node)
448 self.data.append(node.value) # Add string to the data section
449
450 elif type(node) is Designator:
451 if len(node.selectors) > 0:
452 self.getreg(node)
453 # Load starting address
454 if node.obj.isLocal:
455 self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) )
456 else:
457 # Global variables need to be relocated...
458 self.addCode(leareg64(node.reg, ['RIP', 0]))
459 self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip))
460 # Loop over all designators..
461 for selector in node.selectors:
462 if type(selector) is Index:
463 # Deref an array index
464 self.genexprcode(selector.index)
465 self.getreg(selector)
466 self.addCode( mov(selector.reg, selector.typ.elementType.size) )
467 self.addCode( imulreg64(selector.reg, selector.index.reg ) )
468 self.freereg(selector.index)
469 self.addCode(addreg64(node.reg, selector.reg))
470 self.freereg(selector)
471 elif type(selector) is Field:
472 print('Field')
473 Error('Field not implemented')
474 else:
475 Error('Unknown selector')
476 else:
477 Error('Can only gencode for designator with selectors')
478
479 else:
480 print('not generating code for {0}'.format(node))
481
482 def generatecode(self, ast):
483 """ code generation front end """
484 self.initialize()
485 self.gencode(ast)
486 ast.image = self.image
487