Mercurial > lcfOS
diff python/libs/compiler/backends/codegenerator.py @ 69:60cc36ef5a50
Rework in compiler
author | windel |
---|---|
date | Sat, 27 Oct 2012 14:31:58 +0200 |
parents | python/libs/compiler/codegenerator.py@32078200cdd6 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/libs/compiler/backends/codegenerator.py Sat Oct 27 14:31:58 2012 +0200 @@ -0,0 +1,487 @@ +""" + Code generation for 64 bits intel processors +""" + +from .nodes import * +from .errors import Error +from .builtin import real, integer, boolean, char +from .assembler import * + +class CodeGenerator: + def __init__(self): + self.strings = [] + self.initialize() + def initialize(self): + # Register descriptors: + self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',') + self.usedregs = [] + # Members to accumulate the result into: + # The result is an image of bytecode and global variable space. + # Global variables a referenced by RIP relative addressing. + self.image = [] + self.rip = 0 # The current instruction pointer location. + # TODO: backpatch list here? + + # Functions to modify the code image + def addCode(self, code): + assert(type(code) is list) + self.image += code + self.rip += len(code) + def fixCode(self, position, code): + self.image[position:position+len(code)] = code + def align(self, b): + while (self.rip % b) != 0: + self.addCode([0]) + + def saveAllRegisters(self): + regs = list(self.usedregs.keys()) + for reg in regs: + code += self.saveRegister(reg) + + def saveRegister(self, reg): + code = [] + if reg in self.usedregs.keys(): + code.append('mov {0}, {1}'.format(self.usedregs[reg], reg)) + del self.usedregs[reg] + self.freeregs.append(reg) + + def getreg(self, node): + """ acquire a working register for a certain node.""" + # Temporary register bypass action: + if len(self.freeregs) > 0: + reg = self.freeregs.pop(0) + self.usedregs.append(reg) + else: + Error('No more free regs') + node.reg = reg + + def freereg(self, node): + reg = node.reg + node.reg = None + self.freeregs.append(reg) + self.usedregs.remove(reg) + + # Helpers to load and retrieve designated objects: + def storeRegInDesignator(self, reg, designator): + assert(type(reg) is str) + assert(type(designator) is Designator) + if len(designator.selectors) > 0: + self.gencode( designator ) # Load the pointer into some register + self.addCode( mov([designator.reg, 0x0], reg) ) + self.freereg( designator ) + else: + if designator.obj.isLocal: + # Relative from rbp register + mem = ['rbp', designator.obj.offset] + self.addCode( mov(mem, reg) ) + else: + # Relative from RIP after move + self.addCode( mov(['RIP', 0x0], reg) ) + self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) ) + + # Code generation functions: + def genexprcode(self, node): + """ + Generate code for expressions! + Recursively evaluates, and ensures a register contains the answer. + register is an integer register or a floating point reg + """ + if isinstance(node, Binop): + """ Handle a binary operation (two arguments) of some kind """ + self.genexprcode(node.a) + self.genexprcode(node.b) + + if node.op == 'mod': + assert(node.typ.isType(integer)) + self.addCode(mov('rax', node.a.reg)) + self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros + self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg + node.reg = node.a.reg + self.freereg(node.b) # give up register that contains b + self.addCode(mov(node.reg, 'rdx')) # move remainder into result + elif node.op == 'div': + assert(node.typ.isType(integer)) + self.addCode(mov('rax', node.a.reg)) + self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros + self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg + node.reg = node.a.reg + self.freereg(node.b) # give up register that contains b + self.addCode(mov(node.reg, 'rax')) # move result into reg + elif node.op == '*': + if node.typ.isType(integer): + self.addCode(imulreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for * not implemented'.format(node.typ)) + elif node.op == '+': + if node.typ.isType(integer): + self.addCode(addreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for + not implemented'.format(node.typ)) + elif node.op == '-': + if node.typ.isType(integer): + self.addCode(subreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for - not implemented'.format(node.typ)) + else: + Error('Unknown Binop {0}'.format(node.op)) + + elif type(node) is Unop: + if node.op == 'INTTOREAL': + self.genexprcode(node.a) + node.reg = node.a.reg + # TODO use 'FILD' instruction + freg = 12 + code.append('Unop inttoreal TODO') + elif node.op == 'ABS': + if isType(node.typ, real): + code = [0xD9, 0xE1] # st(0) = fabs st(0) + Error('ABS error integer') + elif isType(node.typ, integer): + code = [] + Error('ABS error integer') + else: + Error('ABS error') + else: + Error('Unknown Unop {0}'.format(node.op)) + + elif isinstance(node, Designator): + # dereference, array index. Make sure that the result comes into a register + if len(node.selectors) > 0: + self.gencode(node) # Load the pointer into some register + + # Now we can access the object at location '[node.reg]': + if node.typ.isType(integer): + self.addCode( mov(node.reg, [node.reg, 0x0]) ) + else: + Error('Only integer types implemented') + else: + # No selectors, load variable directly + if node.obj.typ.isType(integer): + if type(node.obj) is Constant: + self.genexprcode(node.obj) + node.reg = node.obj.reg + else: + self.getreg(node) + # Get a register to store the integer value + if node.obj.isLocal: + # relative to rbp: + self.addCode( mov(node.reg, ['rbp', node.obj.offset]) ) + else: + self.addCode(mov(node.reg, ['RIP', 0x0])) + self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip)) + else: + Error('Cannot load variable type {0}'.format(node.typ)) + + elif isinstance(node, Relop): + # Create a boolean from operands + # TODO create an alternative for expressions used as conditions. + self.genexprcode(node.a) + self.genexprcode(node.b) + + if node.a.typ.isType(integer): + instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'} + if not node.relop in instructions.keys(): + Error('Unimplemented relop: '+str(node.relop)) + instr = instructions[node.relop] + + node.reg = node.a.reg + self.addCode( cmpreg64(node.a.reg, node.b.reg) ) + self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp + fixloc1 = self.rip - 1 + rip1 = self.rip + self.addCode( xorreg64(node.reg, node.reg) ) + self.addCode( shortjump(0x0) ) # Jump over 1 code + fixloc2 = self.rip - 1 + self.fixCode(fixloc1, imm8(self.rip - rip1)) + rip2 = self.rip + self.addCode( xorreg64(node.reg, node.reg) ) + self.addCode( increg64(node.reg) ) + self.fixCode(fixloc2, imm8(self.rip - rip2)) + + self.freereg(node.b) + else: + Error('Relop not implemented for {0}'.format(node.a.typ)) + + elif type(node) is Constant: + if node.typ.isType(integer): + self.getreg(node) + self.addCode(mov(node.reg, node.value)) + elif node.typ.isType(real): + code += self.getreg(node) + Error('TODO: get real reg') + # TODO: get a fixed point reg, and load the variable in there + else: + Error('Howto generate code for {0}?'.format(node)) + + elif type(node) is ProcedureCall: + if type(node.proc.obj) is BuiltinProcedure: + # Handle builtin procedures different, these not always call + # a function, but generate code. + bi = node.proc.obj + if bi.name == 'chr': + arg = node.args[0] + self.genexprcode(arg) + # Store character in full width register: + # TODO: store in char only register + node.reg = arg.reg + else: + Error('Unknown builtin function {0}'.format(bi.name)) + else: + # Use generic procedure call first + self.gencode(node) + # Retrieve result: + if node.typ.isType(integer): + # Store result! + self.getreg(node) + self.addCode( mov(node.reg, 'rax') ) + else: + Error('Return type not supported {0}'.format(node.typ)) + else: + Error('Cannot generate expression code for: {0}'.format(node)) + + def gencode(self, node): + """ Code generation function for AST nodes """ + if isinstance(node, Module): + # for all imports make a list of pointer to the actual procedures: + for imp in node.imports: + imp.offset = self.rip + self.addCode( [0x0]*8 ) + # global variable storage allocation + variables = node.symtable.getAllLocal(Variable) + for var in variables: + var.isLocal = False + var.offset = self.rip + self.addCode( [0x00] * var.typ.size ) # TODO initial values here? + self.align(8) + # TODO: mark end of data and start of code inside image + # TODO: round data to page size to enable protection by loader. + # Procedure code generation: + procedures = node.symtable.getAllLocal(Procedure) + node.procs = procedures + for proc in procedures: + self.gencode(proc) + # Module init code: + node.initcodeentry = self.rip + self.gencode(node.initcode) + self.addCode( ret() ) + # TODO: how to return from module init code? far return?? + + elif type(node) is Procedure: + # calculate offsets for local variables and parameters + # Variable location relative to 'rbp' register + variables = node.symtable.getAllLocal(Variable) + offset = 0 + paramoffset = 16 + for var in variables: + var.isLocal = True + if not var.isParameter: + offset += var.typ.size + # Offset is negative of rbp in stack frame + var.offset = -offset + node.framesize = offset + # Calculate offsets of parameters relative to rbp register + for par in reversed(node.typ.parameters): + pvar = node.symtable.getLocal(Variable, par.name) + pvar.offset = paramoffset + paramoffset += pvar.typ.size + + # code generation + node.entrypoint = self.rip + self.addCode(push('rbp')) + self.addCode(mov('rbp', 'rsp')) # Setup the base pointer + self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals + self.gencode(node.block) + if node.retexpr: + if node.retexpr.typ.isType(integer): + self.genexprcode(node.retexpr) + self.addCode( mov('rax', node.retexpr.reg) ) + self.freereg(node.retexpr) + else: + Error('Cannot return this kind yet {0}'.format(node.retexpr.typ)) + self.addCode( addreg64('rsp', node.framesize) ) + self.addCode( pop('rbp') ) + self.addCode( ret() ) + assert(len(self.usedregs) == 0) + + elif isinstance(node, StatementSequence): + for s in node.statements: + self.gencode(s) + + elif type(node) is ProcedureCall: + # Prepare parameters on the stack: + stacksize = 0 + assert(len(node.args) == len(node.proc.typ.parameters)) + for arg, param in zip(node.args, node.proc.typ.parameters): + + if param.kind == 'value': + self.genexprcode(arg) + self.addCode( push(arg.reg) ) + self.freereg( arg ) + stacksize += 8 + else: + Error('Parameter kind other than value') + + # Calculate address using designator + if type(node.proc.obj) is Procedure: + self.addCode( call(0x0) ) + self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip)) + elif type(node.proc.obj) is ImportedSymbol: + # Load the entry point of the import table + self.getreg(node.proc.obj) + # Load the address of the procedure: + self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) ) + self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) ) + # Call to the address in register: + self.addCode( call(node.proc.obj.reg) ) + # Free register that holds the address of the object + self.freereg( node.proc.obj ) + elif type(node.proc.obj) is BuiltinProcedure: + if node.proc.obj.name == 'chr': + print('int to char') + else: + Error('Unknown builtin function {0}'.format(node.proc.obj.name)) + else: + Error('Cannot call designator of type {0}'.format(node.proc.obj)) + + # Restore stack (pop all arguments of): + self.addCode(addreg64('rsp', stacksize)) + + elif type(node) is Assignment: + if node.lval.typ.isType(integer): + # TODO if node.rval is Constant of some datatype, move it to mem directly + self.genexprcode(node.rval) # Calculate the value that has to be stored. + self.storeRegInDesignator(node.rval.reg, node.lval) + self.freereg(node.rval) + else: + Error('Assignments of other types not implemented') + # TODO if left and right are designators, do some sort of memcpy. + + elif type(node) is IfStatement: + self.genexprcode(node.condition) + self.addCode( cmpreg64(node.condition.reg, 1) ) + self.freereg(node.condition) + if node.falsestatement: + # If with else clause + self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false + rip1 = self.rip + fixloc1 = self.rip - 4 + self.gencode(node.truestatement) + self.addCode( nearjump( 0x0 ) ) # jump over false code + fixloc2 = self.rip - 4 + self.fixCode(fixloc1, imm32(self.rip - rip1)) + rip2 = self.rip + self.gencode(node.falsestatement) + self.fixCode(fixloc2, imm32(self.rip - rip2)) + else: + # If without else clause + self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false + rip1 = self.rip + fixloc1 = self.rip - 4 + self.gencode(node.truestatement) + self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code. + + elif isinstance(node, WhileStatement): + rip1 = self.rip # Store the start of the while loop + self.genexprcode(node.condition) + self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness + self.freereg(node.condition) + self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later) + fixloc1 = self.rip - 4 + rip2 = self.rip + self.gencode(node.dostatements) + self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below + fixloc2 = self.rip - 4 + rip3 = self.rip # end of while loop + self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop + self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop + + elif type(node) is ForStatement: + # Initial load of iterator variable: + self.genexprcode(node.begin) + self.genexprcode(node.end) + # TODO: link reg with variable so that a register is used instead of a variable + iterreg = node.begin.reg # Get the register used for the loop + #self.addCode(cmpreg64(iterreg, node.endvalue)) + rip1 = self.rip + self.gencode(node.statements) + #self.loadDesignatorInReg(node. + #self.addCode( addreg64(node.variable, node.increment) ) + self.addCode(nearjump(0x0)) + fixloc1 = self.rip - 4 + rip2 = self.rip + self.fixCode(fixloc1, imm32(rip1 - rip2)) + + self.freereg(node.begin) # Release register used in loop + self.freereg(node.end) + Error('No implementation of FOR statement') + + elif type(node) is AsmCode: + def processOperand(op): + if type(op) is list: + if type(op[0]) is Variable: + var = op[0] + if var.isLocal: + return ['rbp', var.offset] + else: + Error('Can only use local variables in inline assembler') + return op + for asmline in node.asmcode: + opcode, operands = asmline + operands = [processOperand(opx) for opx in operands] + print('assembling', opcode, *operands) + func,nargs = opcodes[opcode] + code = func(*operands) + self.addCode(code) + + elif isinstance(node, EmptyStatement): + pass + + + elif type(node) is StringConstant: + self.strings.append(node) + self.data.append(node.value) # Add string to the data section + + elif type(node) is Designator: + if len(node.selectors) > 0: + self.getreg(node) + # Load starting address + if node.obj.isLocal: + self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) ) + else: + # Global variables need to be relocated... + self.addCode(leareg64(node.reg, ['RIP', 0])) + self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip)) + # Loop over all designators.. + for selector in node.selectors: + if type(selector) is Index: + # Deref an array index + self.genexprcode(selector.index) + self.getreg(selector) + self.addCode( mov(selector.reg, selector.typ.elementType.size) ) + self.addCode( imulreg64(selector.reg, selector.index.reg ) ) + self.freereg(selector.index) + self.addCode(addreg64(node.reg, selector.reg)) + self.freereg(selector) + elif type(selector) is Field: + print('Field') + Error('Field not implemented') + else: + Error('Unknown selector') + else: + Error('Can only gencode for designator with selectors') + + else: + print('not generating code for {0}'.format(node)) + + def generatecode(self, ast): + """ code generation front end """ + self.initialize() + self.gencode(ast) + ast.image = self.image +