comparison ide/compiler/codegenerator.py @ 1:92df07bc2081

Initial import of compiler
author windel
date Sun, 18 Sep 2011 19:00:29 +0200
parents
children 0d5ef85b8698
comparison
equal deleted inserted replaced
0:1a4faf9ef1ea 1:92df07bc2081
1 """
2 Code generation for 64 bits intel processors
3 """
4
5 from .nodes import *
6 from .errors import Error
7 from .builtin import real, integer, boolean, char
8 from .assembler import *
9
10 class CodeGenerator:
11 def __init__(self):
12 self.strings = []
13 self.initialize()
14 def initialize(self):
15 # Register descriptors:
16 self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',')
17 self.usedregs = []
18 # Members to accumulate the result into:
19 # The result is an image of bytecode and global variable space.
20 # Global variables a referenced by RIP relative addressing.
21 self.image = []
22 self.rip = 0 # The current instruction pointer location.
23 # TODO: backpatch list here?
24
25 # Functions to modify the code image
26 def addCode(self, code):
27 assert(type(code) is list)
28 self.image += code
29 self.rip += len(code)
30 def fixCode(self, position, code):
31 self.image[position:position+len(code)] = code
32 def align(self, b):
33 while (self.rip % b) != 0:
34 self.addCode([0])
35
36 def saveAllRegisters(self):
37 regs = list(self.usedregs.keys())
38 for reg in regs:
39 code += self.saveRegister(reg)
40
41 def saveRegister(self, reg):
42 code = []
43 if reg in self.usedregs.keys():
44 code.append('mov {0}, {1}'.format(self.usedregs[reg], reg))
45 del self.usedregs[reg]
46 self.freeregs.append(reg)
47
48 def getreg(self, node):
49 """ acquire a working register for a certain node."""
50 # Temporary register bypass action:
51 if len(self.freeregs) > 0:
52 reg = self.freeregs.pop(0)
53 self.usedregs.append(reg)
54 else:
55 Error('No more free regs')
56 node.reg = reg
57
58 def freereg(self, node):
59 reg = node.reg
60 node.reg = None
61 self.freeregs.append(reg)
62 self.usedregs.remove(reg)
63
64 # Helpers to load and retrieve designated objects:
65 def storeRegInDesignator(self, reg, designator):
66 assert(type(reg) is str)
67 assert(type(designator) is Designator)
68 if len(designator.selectors) > 0:
69 self.gencode( designator ) # Load the pointer into some register
70 self.addCode( mov([designator.reg, 0x0], reg) )
71 self.freereg( designator )
72 else:
73 if designator.obj.isLocal:
74 # Relative from rbp register
75 mem = ['rbp', designator.obj.offset]
76 self.addCode( mov(mem, reg) )
77 else:
78 # Relative from RIP after move
79 self.addCode( mov(['RIP', 0x0], reg) )
80 self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) )
81
82 # Code generation functions:
83 def genexprcode(self, node):
84 """
85 Generate code for expressions!
86 Recursively evaluates, and ensures a register contains the answer.
87 register is an integer register or a floating point reg
88 """
89 if isinstance(node, Binop):
90 """ Handle a binary operation (two arguments) of some kind """
91 self.genexprcode(node.a)
92 self.genexprcode(node.b)
93
94 if node.op == 'mod':
95 assert(node.typ.isType(integer))
96 self.addCode(mov('rax', node.a.reg))
97 self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
98 self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
99 node.reg = node.a.reg
100 self.freereg(node.b) # give up register that contains b
101 self.addCode(mov(node.reg, 'rdx')) # move remainder into result
102 elif node.op == 'div':
103 assert(node.typ.isType(integer))
104 self.addCode(mov('rax', node.a.reg))
105 self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
106 self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
107 node.reg = node.a.reg
108 self.freereg(node.b) # give up register that contains b
109 self.addCode(mov(node.reg, 'rax')) # move result into reg
110 elif node.op == '*':
111 if node.typ.isType(integer):
112 self.addCode(imulreg64(node.a.reg, node.b.reg))
113 node.reg = node.a.reg
114 self.freereg(node.b)
115 else:
116 Error('{0} for * not implemented'.format(node.typ))
117 elif node.op == '+':
118 if node.typ.isType(integer):
119 self.addCode(addreg64(node.a.reg, node.b.reg))
120 node.reg = node.a.reg
121 self.freereg(node.b)
122 else:
123 Error('{0} for + not implemented'.format(node.typ))
124 elif node.op == '-':
125 if node.typ.isType(integer):
126 self.addCode(subreg64(node.a.reg, node.b.reg))
127 node.reg = node.a.reg
128 self.freereg(node.b)
129 else:
130 Error('{0} for - not implemented'.format(node.typ))
131 else:
132 Error('Unknown Binop {0}'.format(node.op))
133
134 elif type(node) is Unop:
135 if node.op == 'INTTOREAL':
136 self.genexprcode(node.a)
137 node.reg = node.a.reg
138 # TODO use 'FILD' instruction
139 freg = 12
140 code.append('Unop inttoreal TODO')
141 elif node.op == 'ABS':
142 if isType(node.typ, real):
143 code = [0xD9, 0xE1] # st(0) = fabs st(0)
144 Error('ABS error integer')
145 elif isType(node.typ, integer):
146 code = []
147 Error('ABS error integer')
148 else:
149 Error('ABS error')
150 else:
151 Error('Unknown Unop {0}'.format(node.op))
152
153 elif isinstance(node, Designator):
154 # dereference, array index. Make sure that the result comes into a register
155 if len(node.selectors) > 0:
156 self.gencode(node) # Load the pointer into some register
157
158 # Now we can access the object at location '[node.reg]':
159 if node.typ.isType(integer):
160 self.addCode( mov(node.reg, [node.reg, 0x0]) )
161 else:
162 Error('Only integer types implemented')
163 else:
164 # No selectors, load variable directly
165 if node.obj.typ.isType(integer):
166 if type(node.obj) is Constant:
167 self.genexprcode(node.obj)
168 node.reg = node.obj.reg
169 else:
170 self.getreg(node)
171 # Get a register to store the integer value
172 if node.obj.isLocal:
173 # relative to rbp:
174 self.addCode( mov(node.reg, ['rbp', node.obj.offset]) )
175 else:
176 self.addCode(mov(node.reg, ['RIP', 0x0]))
177 self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip))
178 else:
179 Error('Cannot load variable type {0}'.format(node.typ))
180
181 elif isinstance(node, Relop):
182 # Create a boolean from operands
183 # TODO create an alternative for expressions used as conditions.
184 self.genexprcode(node.a)
185 self.genexprcode(node.b)
186
187 if node.a.typ.isType(integer):
188 instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'}
189 if not node.relop in instructions.keys():
190 Error('Unimplemented relop: '+str(node.relop))
191 instr = instructions[node.relop]
192
193 node.reg = node.a.reg
194 self.addCode( cmpreg64(node.a.reg, node.b.reg) )
195 self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp
196 fixloc1 = self.rip - 1
197 rip1 = self.rip
198 self.addCode( xorreg64(node.reg, node.reg) )
199 self.addCode( shortjump(0x0) ) # Jump over 1 code
200 fixloc2 = self.rip - 1
201 self.fixCode(fixloc1, imm8(self.rip - rip1))
202 rip2 = self.rip
203 self.addCode( xorreg64(node.reg, node.reg) )
204 self.addCode( increg64(node.reg) )
205 self.fixCode(fixloc2, imm8(self.rip - rip2))
206
207 self.freereg(node.b)
208 else:
209 Error('Relop not implemented for {0}'.format(node.a.typ))
210
211 elif type(node) is Constant:
212 if node.typ.isType(integer):
213 self.getreg(node)
214 self.addCode(mov(node.reg, node.value))
215 elif node.typ.isType(real):
216 code += self.getreg(node)
217 Error('TODO: get real reg')
218 # TODO: get a fixed point reg, and load the variable in there
219 else:
220 Error('Howto generate code for {0}?'.format(node))
221
222 elif type(node) is ProcedureCall:
223 if type(node.proc.obj) is BuiltinProcedure:
224 # Handle builtin procedures different, these not always call
225 # a function, but generate code.
226 bi = node.proc.obj
227 if bi.name == 'chr':
228 arg = node.args[0]
229 self.genexprcode(arg)
230 # Store character in full width register:
231 # TODO: store in char only register
232 node.reg = arg.reg
233 else:
234 Error('Unknown builtin function {0}'.format(bi.name))
235 else:
236 # Use generic procedure call first
237 self.gencode(node)
238 # Retrieve result:
239 if node.typ.isType(integer):
240 # Store result!
241 self.getreg(node)
242 self.addCode( mov(node.reg, 'rax') )
243 else:
244 Error('Return type not supported {0}'.format(node.typ))
245 else:
246 Error('Cannot generate expression code for: {0}'.format(node))
247
248 def gencode(self, node):
249 """ Code generation function for AST nodes """
250 if isinstance(node, Module):
251 # for all imports make a list of pointer to the actual procedures:
252 for imp in node.imports:
253 imp.offset = self.rip
254 self.addCode( [0x0]*8 )
255 # global variable storage allocation
256 variables = node.symtable.getAllLocal(Variable)
257 for var in variables:
258 var.isLocal = False
259 var.offset = self.rip
260 self.addCode( [0x00] * var.typ.size ) # TODO initial values here?
261 self.align(8)
262 # TODO: mark end of data and start of code inside image
263 # TODO: round data to page size to enable protection by loader.
264 # Procedure code generation:
265 procedures = node.symtable.getAllLocal(Procedure)
266 node.procs = procedures
267 for proc in procedures:
268 self.gencode(proc)
269 # Module init code:
270 node.initcodeentry = self.rip
271 self.gencode(node.initcode)
272 self.addCode( ret() )
273 # TODO: how to return from module init code? far return??
274
275 elif type(node) is Procedure:
276 # calculate offsets for local variables and parameters
277 # Variable location relative to 'rbp' register
278 variables = node.symtable.getAllLocal(Variable)
279 offset = 0
280 paramoffset = 16
281 for var in variables:
282 var.isLocal = True
283 if not var.isParameter:
284 offset += var.typ.size
285 # Offset is negative of rbp in stack frame
286 var.offset = -offset
287 node.framesize = offset
288 # Calculate offsets of parameters relative to rbp register
289 for par in reversed(node.typ.parameters):
290 pvar = node.symtable.getLocal(Variable, par.name)
291 pvar.offset = paramoffset
292 paramoffset += pvar.typ.size
293
294 # code generation
295 node.entrypoint = self.rip
296 self.addCode(push('rbp'))
297 self.addCode(mov('rbp', 'rsp')) # Setup the base pointer
298 self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals
299 self.gencode(node.block)
300 if node.retexpr:
301 if node.retexpr.typ.isType(integer):
302 self.genexprcode(node.retexpr)
303 self.addCode( mov('rax', node.retexpr.reg) )
304 self.freereg(node.retexpr)
305 else:
306 Error('Cannot return this kind yet {0}'.format(node.retexpr.typ))
307 self.addCode( addreg64('rsp', node.framesize) )
308 self.addCode( pop('rbp') )
309 self.addCode( ret() )
310 assert(len(self.usedregs) == 0)
311
312 elif isinstance(node, StatementSequence):
313 for s in node.statements:
314 self.gencode(s)
315 assert(len(self.usedregs) == 0)
316
317 elif type(node) is ProcedureCall:
318 # Prepare parameters on the stack:
319 stacksize = 0
320 assert(len(node.args) == len(node.proc.typ.parameters))
321 for arg, param in zip(node.args, node.proc.typ.parameters):
322
323 if param.kind == 'value':
324 self.genexprcode(arg)
325 self.addCode( push(arg.reg) )
326 self.freereg( arg )
327 stacksize += 8
328 else:
329 Error('Parameter kind other than value')
330
331 # Calculate address using designator
332 if type(node.proc.obj) is Procedure:
333 self.addCode( call(0x0) )
334 self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip))
335 elif type(node.proc.obj) is ImportedSymbol:
336 # Load the entry point of the import table
337 self.getreg(node.proc.obj)
338 # Load the address of the procedure:
339 self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) )
340 self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) )
341 # Call to the address in register:
342 self.addCode( call(node.proc.obj.reg) )
343 # Free register that holds the address of the object
344 self.freereg( node.proc.obj )
345 elif type(node.proc.obj) is BuiltinProcedure:
346 if node.proc.obj.name == 'chr':
347 print('int to char')
348 else:
349 Error('Unknown builtin function {0}'.format(node.proc.obj.name))
350 else:
351 Error('Cannot call designator of type {0}'.format(node.proc.obj))
352
353 # Restore stack (pop all arguments of):
354 self.addCode(addreg64('rsp', stacksize))
355
356 elif type(node) is Assignment:
357 if node.lval.typ.isType(integer):
358 # TODO if node.rval is Constant of some datatype, move it to mem directly
359 self.genexprcode(node.rval) # Calculate the value that has to be stored.
360 self.storeRegInDesignator(node.rval.reg, node.lval)
361 self.freereg(node.rval)
362 else:
363 Error('Assignments of other types not implemented')
364 # TODO if left and right are designators, do some sort of memcpy.
365
366 elif type(node) is IfStatement:
367 self.genexprcode(node.condition)
368 self.addCode( cmpreg64(node.condition.reg, 1) )
369 self.freereg(node.condition)
370 if node.falsestatement:
371 # If with else clause
372 self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
373 rip1 = self.rip
374 fixloc1 = self.rip - 4
375 self.gencode(node.truestatement)
376 self.addCode( nearjump( 0x0 ) ) # jump over false code
377 fixloc2 = self.rip - 4
378 self.fixCode(fixloc1, imm32(self.rip - rip1))
379 rip2 = self.rip
380 self.gencode(node.falsestatement)
381 self.fixCode(fixloc2, imm32(self.rip - rip2))
382 else:
383 # If without else clause
384 self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
385 rip1 = self.rip
386 fixloc1 = self.rip - 4
387 self.gencode(node.truestatement)
388 self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code.
389
390 elif isinstance(node, WhileStatement):
391 rip1 = self.rip # Store the start of the while loop
392 self.genexprcode(node.condition)
393 self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness
394 self.freereg(node.condition)
395 self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later)
396 fixloc1 = self.rip - 4
397 rip2 = self.rip
398 self.gencode(node.dostatements)
399 self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below
400 fixloc2 = self.rip - 4
401 rip3 = self.rip # end of while loop
402 self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop
403 self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop
404
405 elif type(node) is ForStatement:
406 # Initial load of iterator variable:
407 self.genexprcode(node.begin)
408 self.storeRegInDesignator(node.begin.reg, node.variable)
409 self.freereg(node.begin)
410 rip1 = self.rip
411 self.gencode(node.statements)
412 #self.loadDesignatorInReg(node.
413 #self.addCode( addreg64(node.variable, node.increment) )
414 Error('No implementation of FOR statement')
415
416 elif type(node) is AsmCode:
417 def processOperand(op):
418 if type(op) is list:
419 if type(op[0]) is Variable:
420 var = op[0]
421 if var.isLocal:
422 return ['rbp', var.offset]
423 else:
424 Error('Can only use local variables in inline assembler')
425 return op
426 for asmline in node.asmcode:
427 opcode, operands = asmline
428 operands = [processOperand(opx) for opx in operands]
429 print('assembling', opcode, *operands)
430 func,nargs = opcodes[opcode]
431 code = func(*operands)
432 self.addCode(code)
433
434 elif isinstance(node, EmptyStatement):
435 pass
436
437
438 elif type(node) is StringConstant:
439 self.strings.append(node)
440 self.data.append(node.value) # Add string to the data section
441
442 elif type(node) is Designator:
443 if len(node.selectors) > 0:
444 self.getreg(node)
445 # Load starting address
446 if node.obj.isLocal:
447 self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) )
448 else:
449 # Global variables need to be relocated...
450 self.addCode(leareg64(node.reg, ['RIP', 0]))
451 self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip))
452 # Loop over all designators..
453 for selector in node.selectors:
454 if type(selector) is Index:
455 # Deref an array index
456 self.genexprcode(selector.index)
457 self.getreg(selector)
458 self.addCode( mov(selector.reg, selector.typ.elementType.size) )
459 self.addCode( imulreg64(selector.reg, selector.index.reg ) )
460 self.freereg(selector.index)
461 self.addCode(addreg64(node.reg, selector.reg))
462 self.freereg(selector)
463 elif type(selector) is Field:
464 print('Field')
465 Error('Field not implemented')
466 else:
467 Error('Unknown selector')
468 else:
469 Error('Can only gencode for designator with selectors')
470
471 else:
472 print('not generating code for {0}'.format(node))
473
474 def generatecode(self, ast):
475 """ code generation front end """
476 self.initialize()
477 self.gencode(ast)
478 ast.image = self.image
479