view ide/compiler/codegenerator.py @ 23:5dd47d6eebac

Added ubersimple malloc algorithm
author windel
date Thu, 01 Dec 2011 21:42:59 +0100
parents 818f80afa78b
children
line wrap: on
line source

"""
  Code generation for 64 bits intel processors
"""

from .nodes import *
from .errors import Error
from .builtin import real, integer, boolean, char
from .assembler import *

class CodeGenerator:
   def __init__(self):
      self.strings = []
      self.initialize()
   def initialize(self):
      # Register descriptors:
      self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',')
      self.usedregs = []
      # Members to accumulate the result into:
      # The result is an image of bytecode and global variable space.
      # Global variables a referenced by RIP relative addressing.
      self.image = []
      self.rip = 0 # The current instruction pointer location.
      # TODO: backpatch list here?

   # Functions to modify the code image
   def addCode(self, code):
      assert(type(code) is list)
      self.image += code
      self.rip += len(code)
   def fixCode(self, position, code):
      self.image[position:position+len(code)] = code
   def align(self, b):
      while (self.rip % b) != 0:
         self.addCode([0])

   def saveAllRegisters(self):
      regs = list(self.usedregs.keys())
      for reg in regs:
         code += self.saveRegister(reg)

   def saveRegister(self, reg):
      code = []
      if reg in self.usedregs.keys(): 
         code.append('mov {0}, {1}'.format(self.usedregs[reg], reg))
         del self.usedregs[reg]
         self.freeregs.append(reg)

   def getreg(self, node):
      """ acquire a working register for a certain node."""
      # Temporary register bypass action:
      if len(self.freeregs) > 0:
         reg = self.freeregs.pop(0)
         self.usedregs.append(reg)
      else:
         Error('No more free regs')
      node.reg = reg

   def freereg(self, node):
      reg = node.reg
      node.reg = None
      self.freeregs.append(reg)
      self.usedregs.remove(reg)

   # Helpers to load and retrieve designated objects:
   def storeRegInDesignator(self, reg, designator):
      assert(type(reg) is str)
      assert(type(designator) is Designator)
      if len(designator.selectors) > 0:
         self.gencode( designator ) # Load the pointer into some register
         self.addCode( mov([designator.reg, 0x0], reg) )
         self.freereg( designator )
      else:
         if designator.obj.isLocal:
            # Relative from rbp register
            mem = ['rbp', designator.obj.offset]
            self.addCode( mov(mem, reg) )
         else:
            # Relative from RIP after move
            self.addCode( mov(['RIP', 0x0], reg) )
            self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) )

   # Code generation functions:
   def genexprcode(self, node):
      """ 
         Generate code for expressions!
         Recursively evaluates, and ensures a register contains the answer.
         register is an integer register or a floating point reg
      """
      if isinstance(node, Binop):
         """ Handle a binary operation (two arguments) of some kind """
         self.genexprcode(node.a)
         self.genexprcode(node.b)

         if node.op == 'mod':
            assert(node.typ.isType(integer))
            self.addCode(mov('rax', node.a.reg))
            self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
            self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
            node.reg = node.a.reg
            self.freereg(node.b) # give up register that contains b
            self.addCode(mov(node.reg, 'rdx')) # move remainder into result
         elif node.op == 'div':
            assert(node.typ.isType(integer))
            self.addCode(mov('rax', node.a.reg))
            self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
            self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
            node.reg = node.a.reg
            self.freereg(node.b) # give up register that contains b
            self.addCode(mov(node.reg, 'rax')) # move result into reg
         elif node.op == '*':
            if node.typ.isType(integer):
               self.addCode(imulreg64(node.a.reg, node.b.reg))
               node.reg = node.a.reg
               self.freereg(node.b)
            else:
               Error('{0} for * not implemented'.format(node.typ))
         elif node.op == '+':
            if node.typ.isType(integer):
               self.addCode(addreg64(node.a.reg, node.b.reg))
               node.reg = node.a.reg
               self.freereg(node.b)
            else:
               Error('{0} for + not implemented'.format(node.typ))
         elif node.op == '-':
            if node.typ.isType(integer):
               self.addCode(subreg64(node.a.reg, node.b.reg))
               node.reg = node.a.reg
               self.freereg(node.b)
            else:
               Error('{0} for - not implemented'.format(node.typ))
         else:
            Error('Unknown Binop {0}'.format(node.op))

      elif type(node) is Unop:
         if node.op == 'INTTOREAL':
            self.genexprcode(node.a)
            node.reg = node.a.reg
            # TODO use 'FILD' instruction
            freg = 12
            code.append('Unop inttoreal TODO')
         elif node.op == 'ABS':
            if isType(node.typ, real):
               code = [0xD9, 0xE1] # st(0) = fabs st(0)
               Error('ABS error integer')
            elif isType(node.typ, integer):
               code = []
               Error('ABS error integer')
            else:
               Error('ABS error')
         else:
            Error('Unknown Unop {0}'.format(node.op))

      elif isinstance(node, Designator):
         # dereference, array index. Make sure that the result comes into a register
         if len(node.selectors) > 0:
            self.gencode(node) # Load the pointer into some register

            # Now we can access the object at location '[node.reg]':
            if node.typ.isType(integer):
               self.addCode( mov(node.reg, [node.reg, 0x0]) )
            else:
               Error('Only integer types implemented')
         else:
            # No selectors, load variable directly
            if node.obj.typ.isType(integer):
               if type(node.obj) is Constant:
                  self.genexprcode(node.obj)
                  node.reg = node.obj.reg
               else:
                  self.getreg(node)
                  # Get a register to store the integer value
                  if node.obj.isLocal:
                     # relative to rbp:
                     self.addCode( mov(node.reg, ['rbp', node.obj.offset]) )
                  else:
                     self.addCode(mov(node.reg, ['RIP', 0x0]))
                     self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip))
            else:
               Error('Cannot load variable type {0}'.format(node.typ))

      elif isinstance(node, Relop):
         # Create a boolean from operands
         # TODO create an alternative for expressions used as conditions.
         self.genexprcode(node.a)
         self.genexprcode(node.b)

         if node.a.typ.isType(integer):
            instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'}
            if not node.relop in instructions.keys():
               Error('Unimplemented relop: '+str(node.relop))
            instr = instructions[node.relop]

            node.reg = node.a.reg
            self.addCode( cmpreg64(node.a.reg, node.b.reg) )
            self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp
            fixloc1 = self.rip - 1
            rip1 = self.rip
            self.addCode( xorreg64(node.reg, node.reg) )
            self.addCode( shortjump(0x0) ) # Jump over 1 code
            fixloc2 = self.rip - 1
            self.fixCode(fixloc1, imm8(self.rip - rip1))
            rip2 = self.rip
            self.addCode( xorreg64(node.reg, node.reg) )
            self.addCode( increg64(node.reg) )
            self.fixCode(fixloc2, imm8(self.rip - rip2))

            self.freereg(node.b)
         else:
            Error('Relop not implemented for {0}'.format(node.a.typ))

      elif type(node) is Constant:
         if node.typ.isType(integer):
            self.getreg(node)
            self.addCode(mov(node.reg, node.value))
         elif node.typ.isType(real):
            code += self.getreg(node)
            Error('TODO: get real reg')
            # TODO: get a fixed point reg, and load the variable in there
         else:
            Error('Howto generate code for {0}?'.format(node))

      elif type(node) is ProcedureCall:
         if type(node.proc.obj) is BuiltinProcedure:
            # Handle builtin procedures different, these not always call
            # a function, but generate code.
            bi = node.proc.obj
            if bi.name == 'chr':
               arg = node.args[0]
               self.genexprcode(arg)
               # Store character in full width register:
               # TODO: store in char only register
               node.reg = arg.reg
            else:
               Error('Unknown builtin function {0}'.format(bi.name))
         else:
            # Use generic procedure call first
            self.gencode(node)
            # Retrieve result:
            if node.typ.isType(integer):
               # Store result!
               self.getreg(node)
               self.addCode( mov(node.reg, 'rax') )
            else:
               Error('Return type not supported {0}'.format(node.typ))
      else:
         Error('Cannot generate expression code for: {0}'.format(node))

   def gencode(self, node):
      """ Code generation function for AST nodes """
      if isinstance(node, Module):
         # for all imports make a list of pointer to the actual procedures:
         for imp in node.imports:
            imp.offset = self.rip
            self.addCode( [0x0]*8 )
         # global variable storage allocation
         variables = node.symtable.getAllLocal(Variable)
         for var in variables:
            var.isLocal = False
            var.offset = self.rip
            self.addCode( [0x00] * var.typ.size ) # TODO initial values here?
         self.align(8)
         # TODO: mark end of data and start of code inside image
         # TODO: round data to page size to enable protection by loader.
         # Procedure code generation:
         procedures = node.symtable.getAllLocal(Procedure)
         node.procs = procedures
         for proc in procedures:
            self.gencode(proc)
         # Module init code:
         node.initcodeentry = self.rip
         self.gencode(node.initcode)
         self.addCode( ret() )
         # TODO: how to return from module init code? far return??

      elif type(node) is Procedure:
        # calculate offsets for local variables and parameters
        # Variable location relative to 'rbp' register
        variables = node.symtable.getAllLocal(Variable)
        offset = 0
        paramoffset = 16
        for var in variables:
           var.isLocal = True
           if not var.isParameter:
              offset += var.typ.size
              # Offset is negative of rbp in stack frame
              var.offset = -offset
        node.framesize = offset
        # Calculate offsets of parameters relative to rbp register
        for par in reversed(node.typ.parameters):
           pvar = node.symtable.getLocal(Variable, par.name)
           pvar.offset = paramoffset
           paramoffset += pvar.typ.size

        # code generation
        node.entrypoint = self.rip
        self.addCode(push('rbp'))
        self.addCode(mov('rbp', 'rsp')) # Setup the base pointer
        self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals
        self.gencode(node.block)
        if node.retexpr:
           if node.retexpr.typ.isType(integer):
              self.genexprcode(node.retexpr)
              self.addCode( mov('rax', node.retexpr.reg) )
              self.freereg(node.retexpr)
           else:
              Error('Cannot return this kind yet {0}'.format(node.retexpr.typ))
        self.addCode( addreg64('rsp', node.framesize) )
        self.addCode( pop('rbp') )
        self.addCode( ret() )
        assert(len(self.usedregs) == 0)

      elif isinstance(node, StatementSequence):
         for s in node.statements:
            self.gencode(s)

      elif type(node) is ProcedureCall:
         # Prepare parameters on the stack:
         stacksize = 0
         assert(len(node.args) == len(node.proc.typ.parameters))
         for arg, param in zip(node.args, node.proc.typ.parameters):

            if param.kind == 'value': 
               self.genexprcode(arg)
               self.addCode( push(arg.reg) )
               self.freereg( arg )
               stacksize += 8
            else:
               Error('Parameter kind other than value')

         # Calculate address using designator
         if type(node.proc.obj) is Procedure:
            self.addCode( call(0x0) )
            self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip))
         elif type(node.proc.obj) is ImportedSymbol:
            # Load the entry point of the import table
            self.getreg(node.proc.obj)
            # Load the address of the procedure:
            self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) )
            self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) )
            # Call to the address in register:
            self.addCode( call(node.proc.obj.reg) )
            # Free register that holds the address of the object
            self.freereg( node.proc.obj )
         elif type(node.proc.obj) is BuiltinProcedure:
            if node.proc.obj.name == 'chr':
               print('int to char')
            else:
               Error('Unknown builtin function {0}'.format(node.proc.obj.name))
         else:
            Error('Cannot call designator of type {0}'.format(node.proc.obj))

         # Restore stack (pop all arguments of):
         self.addCode(addreg64('rsp', stacksize))

      elif type(node) is Assignment:
         if node.lval.typ.isType(integer):
           # TODO if node.rval is Constant of some datatype, move it to mem directly
           self.genexprcode(node.rval) # Calculate the value that has to be stored.
           self.storeRegInDesignator(node.rval.reg, node.lval)
           self.freereg(node.rval)
         else:
            Error('Assignments of other types not implemented')
            # TODO if left and right are designators, do some sort of memcpy.

      elif type(node) is IfStatement:
        self.genexprcode(node.condition)
        self.addCode( cmpreg64(node.condition.reg, 1) )
        self.freereg(node.condition)
        if node.falsestatement:
           # If with else clause
           self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
           rip1 = self.rip
           fixloc1 = self.rip - 4
           self.gencode(node.truestatement)
           self.addCode( nearjump( 0x0 ) ) # jump over false code
           fixloc2 = self.rip - 4
           self.fixCode(fixloc1, imm32(self.rip - rip1))
           rip2 = self.rip
           self.gencode(node.falsestatement)
           self.fixCode(fixloc2, imm32(self.rip - rip2))
        else:
           # If without else clause
           self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
           rip1 = self.rip
           fixloc1 = self.rip - 4
           self.gencode(node.truestatement)
           self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code.

      elif isinstance(node, WhileStatement):
        rip1 = self.rip # Store the start of the while loop
        self.genexprcode(node.condition)
        self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness
        self.freereg(node.condition)
        self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later)
        fixloc1 = self.rip - 4
        rip2 = self.rip
        self.gencode(node.dostatements)
        self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below
        fixloc2 = self.rip - 4
        rip3 = self.rip # end of while loop
        self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop
        self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop

      elif type(node) is ForStatement:
         # Initial load of iterator variable:
         self.genexprcode(node.begin)
         self.genexprcode(node.end)
         # TODO: link reg with variable so that a register is used instead of a variable
         iterreg = node.begin.reg # Get the register used for the loop
         #self.addCode(cmpreg64(iterreg, node.endvalue))
         rip1 = self.rip
         self.gencode(node.statements)
         #self.loadDesignatorInReg(node.
         #self.addCode( addreg64(node.variable, node.increment) )
         self.addCode(nearjump(0x0))
         fixloc1 = self.rip - 4
         rip2 = self.rip
         self.fixCode(fixloc1, imm32(rip1 - rip2))

         self.freereg(node.begin) # Release register used in loop
         self.freereg(node.end)
         Error('No implementation of FOR statement')

      elif type(node) is AsmCode:
         def processOperand(op):
            if type(op) is list:
               if type(op[0]) is Variable:
                  var = op[0]
                  if var.isLocal:
                     return ['rbp', var.offset]
                  else:
                     Error('Can only use local variables in inline assembler')
            return op
         for asmline in node.asmcode:
            opcode, operands = asmline
            operands = [processOperand(opx) for opx in operands]
            print('assembling', opcode, *operands)
            func,nargs = opcodes[opcode]
            code = func(*operands)
            self.addCode(code)

      elif isinstance(node, EmptyStatement):
         pass


      elif type(node) is StringConstant:
        self.strings.append(node)
        self.data.append(node.value) # Add string to the data section

      elif type(node) is Designator:
         if len(node.selectors) > 0:
            self.getreg(node)
            # Load starting address
            if node.obj.isLocal:
               self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) )
            else:
               # Global variables need to be relocated...
               self.addCode(leareg64(node.reg, ['RIP', 0]))
               self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip))
            # Loop over all designators..
            for selector in node.selectors:
               if type(selector) is Index:
                  # Deref an array index
                  self.genexprcode(selector.index)
                  self.getreg(selector)
                  self.addCode( mov(selector.reg, selector.typ.elementType.size) )
                  self.addCode( imulreg64(selector.reg, selector.index.reg ) )
                  self.freereg(selector.index)
                  self.addCode(addreg64(node.reg, selector.reg))
                  self.freereg(selector)
               elif type(selector) is Field:
                  print('Field')
                  Error('Field not implemented')
               else:
                  Error('Unknown selector')
         else:
            Error('Can only gencode for designator with selectors')

      else:
         print('not generating code for {0}'.format(node))

   def generatecode(self, ast):
     """ code generation front end """
     self.initialize()
     self.gencode(ast)
     ast.image = self.image