diff ide/compiler/codegenerator.py @ 1:92df07bc2081

Initial import of compiler
author windel
date Sun, 18 Sep 2011 19:00:29 +0200
parents
children 0d5ef85b8698
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ide/compiler/codegenerator.py	Sun Sep 18 19:00:29 2011 +0200
@@ -0,0 +1,479 @@
+"""
+  Code generation for 64 bits intel processors
+"""
+
+from .nodes import *
+from .errors import Error
+from .builtin import real, integer, boolean, char
+from .assembler import *
+
+class CodeGenerator:
+   def __init__(self):
+      self.strings = []
+      self.initialize()
+   def initialize(self):
+      # Register descriptors:
+      self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',')
+      self.usedregs = []
+      # Members to accumulate the result into:
+      # The result is an image of bytecode and global variable space.
+      # Global variables a referenced by RIP relative addressing.
+      self.image = []
+      self.rip = 0 # The current instruction pointer location.
+      # TODO: backpatch list here?
+
+   # Functions to modify the code image
+   def addCode(self, code):
+      assert(type(code) is list)
+      self.image += code
+      self.rip += len(code)
+   def fixCode(self, position, code):
+      self.image[position:position+len(code)] = code
+   def align(self, b):
+      while (self.rip % b) != 0:
+         self.addCode([0])
+
+   def saveAllRegisters(self):
+      regs = list(self.usedregs.keys())
+      for reg in regs:
+         code += self.saveRegister(reg)
+
+   def saveRegister(self, reg):
+      code = []
+      if reg in self.usedregs.keys(): 
+         code.append('mov {0}, {1}'.format(self.usedregs[reg], reg))
+         del self.usedregs[reg]
+         self.freeregs.append(reg)
+
+   def getreg(self, node):
+      """ acquire a working register for a certain node."""
+      # Temporary register bypass action:
+      if len(self.freeregs) > 0:
+         reg = self.freeregs.pop(0)
+         self.usedregs.append(reg)
+      else:
+         Error('No more free regs')
+      node.reg = reg
+
+   def freereg(self, node):
+      reg = node.reg
+      node.reg = None
+      self.freeregs.append(reg)
+      self.usedregs.remove(reg)
+
+   # Helpers to load and retrieve designated objects:
+   def storeRegInDesignator(self, reg, designator):
+      assert(type(reg) is str)
+      assert(type(designator) is Designator)
+      if len(designator.selectors) > 0:
+         self.gencode( designator ) # Load the pointer into some register
+         self.addCode( mov([designator.reg, 0x0], reg) )
+         self.freereg( designator )
+      else:
+         if designator.obj.isLocal:
+            # Relative from rbp register
+            mem = ['rbp', designator.obj.offset]
+            self.addCode( mov(mem, reg) )
+         else:
+            # Relative from RIP after move
+            self.addCode( mov(['RIP', 0x0], reg) )
+            self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) )
+
+   # Code generation functions:
+   def genexprcode(self, node):
+      """ 
+         Generate code for expressions!
+         Recursively evaluates, and ensures a register contains the answer.
+         register is an integer register or a floating point reg
+      """
+      if isinstance(node, Binop):
+         """ Handle a binary operation (two arguments) of some kind """
+         self.genexprcode(node.a)
+         self.genexprcode(node.b)
+
+         if node.op == 'mod':
+            assert(node.typ.isType(integer))
+            self.addCode(mov('rax', node.a.reg))
+            self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
+            self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
+            node.reg = node.a.reg
+            self.freereg(node.b) # give up register that contains b
+            self.addCode(mov(node.reg, 'rdx')) # move remainder into result
+         elif node.op == 'div':
+            assert(node.typ.isType(integer))
+            self.addCode(mov('rax', node.a.reg))
+            self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros
+            self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg
+            node.reg = node.a.reg
+            self.freereg(node.b) # give up register that contains b
+            self.addCode(mov(node.reg, 'rax')) # move result into reg
+         elif node.op == '*':
+            if node.typ.isType(integer):
+               self.addCode(imulreg64(node.a.reg, node.b.reg))
+               node.reg = node.a.reg
+               self.freereg(node.b)
+            else:
+               Error('{0} for * not implemented'.format(node.typ))
+         elif node.op == '+':
+            if node.typ.isType(integer):
+               self.addCode(addreg64(node.a.reg, node.b.reg))
+               node.reg = node.a.reg
+               self.freereg(node.b)
+            else:
+               Error('{0} for + not implemented'.format(node.typ))
+         elif node.op == '-':
+            if node.typ.isType(integer):
+               self.addCode(subreg64(node.a.reg, node.b.reg))
+               node.reg = node.a.reg
+               self.freereg(node.b)
+            else:
+               Error('{0} for - not implemented'.format(node.typ))
+         else:
+            Error('Unknown Binop {0}'.format(node.op))
+
+      elif type(node) is Unop:
+         if node.op == 'INTTOREAL':
+            self.genexprcode(node.a)
+            node.reg = node.a.reg
+            # TODO use 'FILD' instruction
+            freg = 12
+            code.append('Unop inttoreal TODO')
+         elif node.op == 'ABS':
+            if isType(node.typ, real):
+               code = [0xD9, 0xE1] # st(0) = fabs st(0)
+               Error('ABS error integer')
+            elif isType(node.typ, integer):
+               code = []
+               Error('ABS error integer')
+            else:
+               Error('ABS error')
+         else:
+            Error('Unknown Unop {0}'.format(node.op))
+
+      elif isinstance(node, Designator):
+         # dereference, array index. Make sure that the result comes into a register
+         if len(node.selectors) > 0:
+            self.gencode(node) # Load the pointer into some register
+
+            # Now we can access the object at location '[node.reg]':
+            if node.typ.isType(integer):
+               self.addCode( mov(node.reg, [node.reg, 0x0]) )
+            else:
+               Error('Only integer types implemented')
+         else:
+            # No selectors, load variable directly
+            if node.obj.typ.isType(integer):
+               if type(node.obj) is Constant:
+                  self.genexprcode(node.obj)
+                  node.reg = node.obj.reg
+               else:
+                  self.getreg(node)
+                  # Get a register to store the integer value
+                  if node.obj.isLocal:
+                     # relative to rbp:
+                     self.addCode( mov(node.reg, ['rbp', node.obj.offset]) )
+                  else:
+                     self.addCode(mov(node.reg, ['RIP', 0x0]))
+                     self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip))
+            else:
+               Error('Cannot load variable type {0}'.format(node.typ))
+
+      elif isinstance(node, Relop):
+         # Create a boolean from operands
+         # TODO create an alternative for expressions used as conditions.
+         self.genexprcode(node.a)
+         self.genexprcode(node.b)
+
+         if node.a.typ.isType(integer):
+            instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'}
+            if not node.relop in instructions.keys():
+               Error('Unimplemented relop: '+str(node.relop))
+            instr = instructions[node.relop]
+
+            node.reg = node.a.reg
+            self.addCode( cmpreg64(node.a.reg, node.b.reg) )
+            self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp
+            fixloc1 = self.rip - 1
+            rip1 = self.rip
+            self.addCode( xorreg64(node.reg, node.reg) )
+            self.addCode( shortjump(0x0) ) # Jump over 1 code
+            fixloc2 = self.rip - 1
+            self.fixCode(fixloc1, imm8(self.rip - rip1))
+            rip2 = self.rip
+            self.addCode( xorreg64(node.reg, node.reg) )
+            self.addCode( increg64(node.reg) )
+            self.fixCode(fixloc2, imm8(self.rip - rip2))
+
+            self.freereg(node.b)
+         else:
+            Error('Relop not implemented for {0}'.format(node.a.typ))
+
+      elif type(node) is Constant:
+         if node.typ.isType(integer):
+            self.getreg(node)
+            self.addCode(mov(node.reg, node.value))
+         elif node.typ.isType(real):
+            code += self.getreg(node)
+            Error('TODO: get real reg')
+            # TODO: get a fixed point reg, and load the variable in there
+         else:
+            Error('Howto generate code for {0}?'.format(node))
+
+      elif type(node) is ProcedureCall:
+         if type(node.proc.obj) is BuiltinProcedure:
+            # Handle builtin procedures different, these not always call
+            # a function, but generate code.
+            bi = node.proc.obj
+            if bi.name == 'chr':
+               arg = node.args[0]
+               self.genexprcode(arg)
+               # Store character in full width register:
+               # TODO: store in char only register
+               node.reg = arg.reg
+            else:
+               Error('Unknown builtin function {0}'.format(bi.name))
+         else:
+            # Use generic procedure call first
+            self.gencode(node)
+            # Retrieve result:
+            if node.typ.isType(integer):
+               # Store result!
+               self.getreg(node)
+               self.addCode( mov(node.reg, 'rax') )
+            else:
+               Error('Return type not supported {0}'.format(node.typ))
+      else:
+         Error('Cannot generate expression code for: {0}'.format(node))
+
+   def gencode(self, node):
+      """ Code generation function for AST nodes """
+      if isinstance(node, Module):
+         # for all imports make a list of pointer to the actual procedures:
+         for imp in node.imports:
+            imp.offset = self.rip
+            self.addCode( [0x0]*8 )
+         # global variable storage allocation
+         variables = node.symtable.getAllLocal(Variable)
+         for var in variables:
+            var.isLocal = False
+            var.offset = self.rip
+            self.addCode( [0x00] * var.typ.size ) # TODO initial values here?
+         self.align(8)
+         # TODO: mark end of data and start of code inside image
+         # TODO: round data to page size to enable protection by loader.
+         # Procedure code generation:
+         procedures = node.symtable.getAllLocal(Procedure)
+         node.procs = procedures
+         for proc in procedures:
+            self.gencode(proc)
+         # Module init code:
+         node.initcodeentry = self.rip
+         self.gencode(node.initcode)
+         self.addCode( ret() )
+         # TODO: how to return from module init code? far return??
+
+      elif type(node) is Procedure:
+        # calculate offsets for local variables and parameters
+        # Variable location relative to 'rbp' register
+        variables = node.symtable.getAllLocal(Variable)
+        offset = 0
+        paramoffset = 16
+        for var in variables:
+           var.isLocal = True
+           if not var.isParameter:
+              offset += var.typ.size
+              # Offset is negative of rbp in stack frame
+              var.offset = -offset
+        node.framesize = offset
+        # Calculate offsets of parameters relative to rbp register
+        for par in reversed(node.typ.parameters):
+           pvar = node.symtable.getLocal(Variable, par.name)
+           pvar.offset = paramoffset
+           paramoffset += pvar.typ.size
+
+        # code generation
+        node.entrypoint = self.rip
+        self.addCode(push('rbp'))
+        self.addCode(mov('rbp', 'rsp')) # Setup the base pointer
+        self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals
+        self.gencode(node.block)
+        if node.retexpr:
+           if node.retexpr.typ.isType(integer):
+              self.genexprcode(node.retexpr)
+              self.addCode( mov('rax', node.retexpr.reg) )
+              self.freereg(node.retexpr)
+           else:
+              Error('Cannot return this kind yet {0}'.format(node.retexpr.typ))
+        self.addCode( addreg64('rsp', node.framesize) )
+        self.addCode( pop('rbp') )
+        self.addCode( ret() )
+        assert(len(self.usedregs) == 0)
+
+      elif isinstance(node, StatementSequence):
+         for s in node.statements:
+            self.gencode(s)
+            assert(len(self.usedregs) == 0)
+
+      elif type(node) is ProcedureCall:
+         # Prepare parameters on the stack:
+         stacksize = 0
+         assert(len(node.args) == len(node.proc.typ.parameters))
+         for arg, param in zip(node.args, node.proc.typ.parameters):
+
+            if param.kind == 'value': 
+               self.genexprcode(arg)
+               self.addCode( push(arg.reg) )
+               self.freereg( arg )
+               stacksize += 8
+            else:
+               Error('Parameter kind other than value')
+
+         # Calculate address using designator
+         if type(node.proc.obj) is Procedure:
+            self.addCode( call(0x0) )
+            self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip))
+         elif type(node.proc.obj) is ImportedSymbol:
+            # Load the entry point of the import table
+            self.getreg(node.proc.obj)
+            # Load the address of the procedure:
+            self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) )
+            self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) )
+            # Call to the address in register:
+            self.addCode( call(node.proc.obj.reg) )
+            # Free register that holds the address of the object
+            self.freereg( node.proc.obj )
+         elif type(node.proc.obj) is BuiltinProcedure:
+            if node.proc.obj.name == 'chr':
+               print('int to char')
+            else:
+               Error('Unknown builtin function {0}'.format(node.proc.obj.name))
+         else:
+            Error('Cannot call designator of type {0}'.format(node.proc.obj))
+
+         # Restore stack (pop all arguments of):
+         self.addCode(addreg64('rsp', stacksize))
+
+      elif type(node) is Assignment:
+         if node.lval.typ.isType(integer):
+           # TODO if node.rval is Constant of some datatype, move it to mem directly
+           self.genexprcode(node.rval) # Calculate the value that has to be stored.
+           self.storeRegInDesignator(node.rval.reg, node.lval)
+           self.freereg(node.rval)
+         else:
+            Error('Assignments of other types not implemented')
+            # TODO if left and right are designators, do some sort of memcpy.
+
+      elif type(node) is IfStatement:
+        self.genexprcode(node.condition)
+        self.addCode( cmpreg64(node.condition.reg, 1) )
+        self.freereg(node.condition)
+        if node.falsestatement:
+           # If with else clause
+           self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
+           rip1 = self.rip
+           fixloc1 = self.rip - 4
+           self.gencode(node.truestatement)
+           self.addCode( nearjump( 0x0 ) ) # jump over false code
+           fixloc2 = self.rip - 4
+           self.fixCode(fixloc1, imm32(self.rip - rip1))
+           rip2 = self.rip
+           self.gencode(node.falsestatement)
+           self.fixCode(fixloc2, imm32(self.rip - rip2))
+        else:
+           # If without else clause
+           self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false
+           rip1 = self.rip
+           fixloc1 = self.rip - 4
+           self.gencode(node.truestatement)
+           self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code.
+
+      elif isinstance(node, WhileStatement):
+        rip1 = self.rip # Store the start of the while loop
+        self.genexprcode(node.condition)
+        self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness
+        self.freereg(node.condition)
+        self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later)
+        fixloc1 = self.rip - 4
+        rip2 = self.rip
+        self.gencode(node.dostatements)
+        self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below
+        fixloc2 = self.rip - 4
+        rip3 = self.rip # end of while loop
+        self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop
+        self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop
+
+      elif type(node) is ForStatement:
+         # Initial load of iterator variable:
+         self.genexprcode(node.begin)
+         self.storeRegInDesignator(node.begin.reg, node.variable)
+         self.freereg(node.begin)
+         rip1 = self.rip
+         self.gencode(node.statements)
+         #self.loadDesignatorInReg(node.
+         #self.addCode( addreg64(node.variable, node.increment) )
+         Error('No implementation of FOR statement')
+
+      elif type(node) is AsmCode:
+         def processOperand(op):
+            if type(op) is list:
+               if type(op[0]) is Variable:
+                  var = op[0]
+                  if var.isLocal:
+                     return ['rbp', var.offset]
+                  else:
+                     Error('Can only use local variables in inline assembler')
+            return op
+         for asmline in node.asmcode:
+            opcode, operands = asmline
+            operands = [processOperand(opx) for opx in operands]
+            print('assembling', opcode, *operands)
+            func,nargs = opcodes[opcode]
+            code = func(*operands)
+            self.addCode(code)
+
+      elif isinstance(node, EmptyStatement):
+         pass
+
+
+      elif type(node) is StringConstant:
+        self.strings.append(node)
+        self.data.append(node.value) # Add string to the data section
+
+      elif type(node) is Designator:
+         if len(node.selectors) > 0:
+            self.getreg(node)
+            # Load starting address
+            if node.obj.isLocal:
+               self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) )
+            else:
+               # Global variables need to be relocated...
+               self.addCode(leareg64(node.reg, ['RIP', 0]))
+               self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip))
+            # Loop over all designators..
+            for selector in node.selectors:
+               if type(selector) is Index:
+                  # Deref an array index
+                  self.genexprcode(selector.index)
+                  self.getreg(selector)
+                  self.addCode( mov(selector.reg, selector.typ.elementType.size) )
+                  self.addCode( imulreg64(selector.reg, selector.index.reg ) )
+                  self.freereg(selector.index)
+                  self.addCode(addreg64(node.reg, selector.reg))
+                  self.freereg(selector)
+               elif type(selector) is Field:
+                  print('Field')
+                  Error('Field not implemented')
+               else:
+                  Error('Unknown selector')
+         else:
+            Error('Can only gencode for designator with selectors')
+
+      else:
+         print('not generating code for {0}'.format(node))
+
+   def generatecode(self, ast):
+     """ code generation front end """
+     self.initialize()
+     self.gencode(ast)
+     ast.image = self.image
+