Mercurial > lcfOS
view python/old/codegenerator.py @ 155:b28a11c01dbe
Simplified IR classes
author | Windel Bouwman |
---|---|
date | Sun, 03 Mar 2013 13:20:03 +0100 |
parents | 91af0e40f868 |
children |
line wrap: on
line source
""" Code generation for 64 bits intel processors """ from .nodes import * from .errors import Error from .builtin import real, integer, boolean, char from .assembler import * class CodeGenerator: def __init__(self): self.strings = [] self.initialize() def initialize(self): # Register descriptors: self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',') self.usedregs = [] # Members to accumulate the result into: # The result is an image of bytecode and global variable space. # Global variables a referenced by RIP relative addressing. self.image = [] self.rip = 0 # The current instruction pointer location. # TODO: backpatch list here? # Functions to modify the code image def addCode(self, code): assert(type(code) is list) self.image += code self.rip += len(code) def fixCode(self, position, code): self.image[position:position+len(code)] = code def align(self, b): while (self.rip % b) != 0: self.addCode([0]) def saveAllRegisters(self): regs = list(self.usedregs.keys()) for reg in regs: code += self.saveRegister(reg) def saveRegister(self, reg): code = [] if reg in self.usedregs.keys(): code.append('mov {0}, {1}'.format(self.usedregs[reg], reg)) del self.usedregs[reg] self.freeregs.append(reg) def getreg(self, node): """ acquire a working register for a certain node.""" # Temporary register bypass action: if len(self.freeregs) > 0: reg = self.freeregs.pop(0) self.usedregs.append(reg) else: Error('No more free regs') node.reg = reg def freereg(self, node): reg = node.reg node.reg = None self.freeregs.append(reg) self.usedregs.remove(reg) # Helpers to load and retrieve designated objects: def storeRegInDesignator(self, reg, designator): assert(type(reg) is str) assert(type(designator) is Designator) if len(designator.selectors) > 0: self.gencode( designator ) # Load the pointer into some register self.addCode( mov([designator.reg, 0x0], reg) ) self.freereg( designator ) else: if designator.obj.isLocal: # Relative from rbp register mem = ['rbp', designator.obj.offset] self.addCode( mov(mem, reg) ) else: # Relative from RIP after move self.addCode( mov(['RIP', 0x0], reg) ) self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) ) # Code generation functions: def genexprcode(self, node): """ Generate code for expressions! Recursively evaluates, and ensures a register contains the answer. register is an integer register or a floating point reg """ if isinstance(node, Binop): """ Handle a binary operation (two arguments) of some kind """ self.genexprcode(node.a) self.genexprcode(node.b) if node.op == 'mod': assert(node.typ.isType(integer)) self.addCode(mov('rax', node.a.reg)) self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg node.reg = node.a.reg self.freereg(node.b) # give up register that contains b self.addCode(mov(node.reg, 'rdx')) # move remainder into result elif node.op == 'div': assert(node.typ.isType(integer)) self.addCode(mov('rax', node.a.reg)) self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg node.reg = node.a.reg self.freereg(node.b) # give up register that contains b self.addCode(mov(node.reg, 'rax')) # move result into reg elif node.op == '*': if node.typ.isType(integer): self.addCode(imulreg64(node.a.reg, node.b.reg)) node.reg = node.a.reg self.freereg(node.b) else: Error('{0} for * not implemented'.format(node.typ)) elif node.op == '+': if node.typ.isType(integer): self.addCode(addreg64(node.a.reg, node.b.reg)) node.reg = node.a.reg self.freereg(node.b) else: Error('{0} for + not implemented'.format(node.typ)) elif node.op == '-': if node.typ.isType(integer): self.addCode(subreg64(node.a.reg, node.b.reg)) node.reg = node.a.reg self.freereg(node.b) else: Error('{0} for - not implemented'.format(node.typ)) else: Error('Unknown Binop {0}'.format(node.op)) elif type(node) is Unop: if node.op == 'INTTOREAL': self.genexprcode(node.a) node.reg = node.a.reg # TODO use 'FILD' instruction freg = 12 code.append('Unop inttoreal TODO') elif node.op == 'ABS': if isType(node.typ, real): code = [0xD9, 0xE1] # st(0) = fabs st(0) Error('ABS error integer') elif isType(node.typ, integer): code = [] Error('ABS error integer') else: Error('ABS error') else: Error('Unknown Unop {0}'.format(node.op)) elif isinstance(node, Designator): # dereference, array index. Make sure that the result comes into a register if len(node.selectors) > 0: self.gencode(node) # Load the pointer into some register # Now we can access the object at location '[node.reg]': if node.typ.isType(integer): self.addCode( mov(node.reg, [node.reg, 0x0]) ) else: Error('Only integer types implemented') else: # No selectors, load variable directly if node.obj.typ.isType(integer): if type(node.obj) is Constant: self.genexprcode(node.obj) node.reg = node.obj.reg else: self.getreg(node) # Get a register to store the integer value if node.obj.isLocal: # relative to rbp: self.addCode( mov(node.reg, ['rbp', node.obj.offset]) ) else: self.addCode(mov(node.reg, ['RIP', 0x0])) self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip)) else: Error('Cannot load variable type {0}'.format(node.typ)) elif isinstance(node, Relop): # Create a boolean from operands # TODO create an alternative for expressions used as conditions. self.genexprcode(node.a) self.genexprcode(node.b) if node.a.typ.isType(integer): instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'} if not node.relop in instructions.keys(): Error('Unimplemented relop: '+str(node.relop)) instr = instructions[node.relop] node.reg = node.a.reg self.addCode( cmpreg64(node.a.reg, node.b.reg) ) self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp fixloc1 = self.rip - 1 rip1 = self.rip self.addCode( xorreg64(node.reg, node.reg) ) self.addCode( shortjump(0x0) ) # Jump over 1 code fixloc2 = self.rip - 1 self.fixCode(fixloc1, imm8(self.rip - rip1)) rip2 = self.rip self.addCode( xorreg64(node.reg, node.reg) ) self.addCode( increg64(node.reg) ) self.fixCode(fixloc2, imm8(self.rip - rip2)) self.freereg(node.b) else: Error('Relop not implemented for {0}'.format(node.a.typ)) elif type(node) is Constant: if node.typ.isType(integer): self.getreg(node) self.addCode(mov(node.reg, node.value)) elif node.typ.isType(real): code += self.getreg(node) Error('TODO: get real reg') # TODO: get a fixed point reg, and load the variable in there else: Error('Howto generate code for {0}?'.format(node)) elif type(node) is ProcedureCall: if type(node.proc.obj) is BuiltinProcedure: # Handle builtin procedures different, these not always call # a function, but generate code. bi = node.proc.obj if bi.name == 'chr': arg = node.args[0] self.genexprcode(arg) # Store character in full width register: # TODO: store in char only register node.reg = arg.reg else: Error('Unknown builtin function {0}'.format(bi.name)) else: # Use generic procedure call first self.gencode(node) # Retrieve result: if node.typ.isType(integer): # Store result! self.getreg(node) self.addCode( mov(node.reg, 'rax') ) else: Error('Return type not supported {0}'.format(node.typ)) else: Error('Cannot generate expression code for: {0}'.format(node)) def gencode(self, node): """ Code generation function for AST nodes """ if isinstance(node, Module): # for all imports make a list of pointer to the actual procedures: for imp in node.imports: imp.offset = self.rip self.addCode( [0x0]*8 ) # global variable storage allocation variables = node.symtable.getAllLocal(Variable) for var in variables: var.isLocal = False var.offset = self.rip self.addCode( [0x00] * var.typ.size ) # TODO initial values here? self.align(8) # TODO: mark end of data and start of code inside image # TODO: round data to page size to enable protection by loader. # Procedure code generation: procedures = node.symtable.getAllLocal(Procedure) node.procs = procedures for proc in procedures: self.gencode(proc) # Module init code: node.initcodeentry = self.rip self.gencode(node.initcode) self.addCode( ret() ) # TODO: how to return from module init code? far return?? elif type(node) is Procedure: # calculate offsets for local variables and parameters # Variable location relative to 'rbp' register variables = node.symtable.getAllLocal(Variable) offset = 0 paramoffset = 16 for var in variables: var.isLocal = True if not var.isParameter: offset += var.typ.size # Offset is negative of rbp in stack frame var.offset = -offset node.framesize = offset # Calculate offsets of parameters relative to rbp register for par in reversed(node.typ.parameters): pvar = node.symtable.getLocal(Variable, par.name) pvar.offset = paramoffset paramoffset += pvar.typ.size # code generation node.entrypoint = self.rip self.addCode(push('rbp')) self.addCode(mov('rbp', 'rsp')) # Setup the base pointer self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals self.gencode(node.block) if node.retexpr: if node.retexpr.typ.isType(integer): self.genexprcode(node.retexpr) self.addCode( mov('rax', node.retexpr.reg) ) self.freereg(node.retexpr) else: Error('Cannot return this kind yet {0}'.format(node.retexpr.typ)) self.addCode( addreg64('rsp', node.framesize) ) self.addCode( pop('rbp') ) self.addCode( ret() ) assert(len(self.usedregs) == 0) elif isinstance(node, StatementSequence): for s in node.statements: self.gencode(s) elif type(node) is ProcedureCall: # Prepare parameters on the stack: stacksize = 0 assert(len(node.args) == len(node.proc.typ.parameters)) for arg, param in zip(node.args, node.proc.typ.parameters): if param.kind == 'value': self.genexprcode(arg) self.addCode( push(arg.reg) ) self.freereg( arg ) stacksize += 8 else: Error('Parameter kind other than value') # Calculate address using designator if type(node.proc.obj) is Procedure: self.addCode( call(0x0) ) self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip)) elif type(node.proc.obj) is ImportedSymbol: # Load the entry point of the import table self.getreg(node.proc.obj) # Load the address of the procedure: self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) ) self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) ) # Call to the address in register: self.addCode( call(node.proc.obj.reg) ) # Free register that holds the address of the object self.freereg( node.proc.obj ) elif type(node.proc.obj) is BuiltinProcedure: if node.proc.obj.name == 'chr': print('int to char') else: Error('Unknown builtin function {0}'.format(node.proc.obj.name)) else: Error('Cannot call designator of type {0}'.format(node.proc.obj)) # Restore stack (pop all arguments of): self.addCode(addreg64('rsp', stacksize)) elif type(node) is Assignment: if node.lval.typ.isType(integer): # TODO if node.rval is Constant of some datatype, move it to mem directly self.genexprcode(node.rval) # Calculate the value that has to be stored. self.storeRegInDesignator(node.rval.reg, node.lval) self.freereg(node.rval) else: Error('Assignments of other types not implemented') # TODO if left and right are designators, do some sort of memcpy. elif type(node) is IfStatement: self.genexprcode(node.condition) self.addCode( cmpreg64(node.condition.reg, 1) ) self.freereg(node.condition) if node.falsestatement: # If with else clause self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false rip1 = self.rip fixloc1 = self.rip - 4 self.gencode(node.truestatement) self.addCode( nearjump( 0x0 ) ) # jump over false code fixloc2 = self.rip - 4 self.fixCode(fixloc1, imm32(self.rip - rip1)) rip2 = self.rip self.gencode(node.falsestatement) self.fixCode(fixloc2, imm32(self.rip - rip2)) else: # If without else clause self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false rip1 = self.rip fixloc1 = self.rip - 4 self.gencode(node.truestatement) self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code. elif isinstance(node, WhileStatement): rip1 = self.rip # Store the start of the while loop self.genexprcode(node.condition) self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness self.freereg(node.condition) self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later) fixloc1 = self.rip - 4 rip2 = self.rip self.gencode(node.dostatements) self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below fixloc2 = self.rip - 4 rip3 = self.rip # end of while loop self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop elif type(node) is ForStatement: # Initial load of iterator variable: self.genexprcode(node.begin) self.genexprcode(node.end) # TODO: link reg with variable so that a register is used instead of a variable iterreg = node.begin.reg # Get the register used for the loop #self.addCode(cmpreg64(iterreg, node.endvalue)) rip1 = self.rip self.gencode(node.statements) #self.loadDesignatorInReg(node. #self.addCode( addreg64(node.variable, node.increment) ) self.addCode(nearjump(0x0)) fixloc1 = self.rip - 4 rip2 = self.rip self.fixCode(fixloc1, imm32(rip1 - rip2)) self.freereg(node.begin) # Release register used in loop self.freereg(node.end) Error('No implementation of FOR statement') elif type(node) is AsmCode: def processOperand(op): if type(op) is list: if type(op[0]) is Variable: var = op[0] if var.isLocal: return ['rbp', var.offset] else: Error('Can only use local variables in inline assembler') return op for asmline in node.asmcode: opcode, operands = asmline operands = [processOperand(opx) for opx in operands] print('assembling', opcode, *operands) func,nargs = opcodes[opcode] code = func(*operands) self.addCode(code) elif isinstance(node, EmptyStatement): pass elif type(node) is StringConstant: self.strings.append(node) self.data.append(node.value) # Add string to the data section elif type(node) is Designator: if len(node.selectors) > 0: self.getreg(node) # Load starting address if node.obj.isLocal: self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) ) else: # Global variables need to be relocated... self.addCode(leareg64(node.reg, ['RIP', 0])) self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip)) # Loop over all designators.. for selector in node.selectors: if type(selector) is Index: # Deref an array index self.genexprcode(selector.index) self.getreg(selector) self.addCode( mov(selector.reg, selector.typ.elementType.size) ) self.addCode( imulreg64(selector.reg, selector.index.reg ) ) self.freereg(selector.index) self.addCode(addreg64(node.reg, selector.reg)) self.freereg(selector) elif type(selector) is Field: print('Field') Error('Field not implemented') else: Error('Unknown selector') else: Error('Can only gencode for designator with selectors') else: print('not generating code for {0}'.format(node)) def generatecode(self, ast): """ code generation front end """ self.initialize() self.gencode(ast) ast.image = self.image