Mercurial > lcfOS
changeset 39:600f48b74799
Move ide
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/__init__.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,2 @@ +# File to make this directory a package. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/assembler.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,354 @@ +""" + Assembler code generation functions +""" + +from .errors import Error + +modrm = {'rax': 0, 'rbx': 1} + +# Table 3.1 of the intel manual: +# use REX.W on the table below: +regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7} +regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7} +regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7} + +# Calculation of the rexb bit: +rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1} + +# Helper functions: +def imm64(x): + """ represent 64 bits integer in little endian 8 bytes""" + if x < 0: + x = x + (1 << 64) + x = x & 0xFFFFFFFFFFFFFFFF + return [ (x >> (p*8)) & 0xFF for p in range(8) ] + +def imm32(x): + """ represent 32 bits integer in little endian 4 bytes""" + if x < 0: + x = x + (1 << 32) + x = x & 0xFFFFFFFF + return [ (x >> (p*8)) & 0xFF for p in range(4) ] + +def imm8(x): + if x < 0: + x = x + (1 << 8) + x = x & 0xFF + return [ x ] + +def modrm(mod=0, rm=0, reg=0): + """ Construct the modrm byte from its components """ + assert(mod <= 3) + assert(rm <= 7) + assert(reg <= 7) + return (mod << 6) | (reg << 3) | rm + +def rex(w=0, r=0, x=0, b=0): + """ Create a REX prefix byte """ + assert(w <= 1) + assert(r <= 1) + assert(x <= 1) + assert(b <= 1) + return 0x40 | (w<<3) | (r<<2) | (x<<1) | b + +def sib(ss=0, index=0, base=0): + assert(ss <= 3) + assert(index <= 7) + assert(base <= 7) + return (ss << 6) | (index << 3) | base + +tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} + +# Actual instructions: +def nearjump(distance, condition=None): + """ jmp imm32 """ + lim = (1<<30) + if abs(distance) > lim: + Error('near jump cannot jump over more than {0} bytes'.format(lim)) + if condition: + if distance < 0: + distance -= 6 # Skip own instruction + opcode = 0x80 | tttn[condition] # Jcc imm32 + return [0x0F, opcode] + imm32(distance) + else: + if distance < 0: + distance -= 5 # Skip own instruction + return [ 0xE9 ] + imm32(distance) + +def shortjump(distance, condition=None): + """ jmp imm8 """ + lim = 118 + if abs(distance) > lim: + Error('short jump cannot jump over more than {0} bytes'.format(lim)) + if distance < 0: + distance -= 2 # Skip own instruction + if condition: + opcode = 0x70 | tttn[condition] # Jcc rel8 + else: + opcode = 0xeb # jmp rel8 + return [opcode] + imm8(distance) + +# Helper that determines jump type: +def reljump(distance): + if abs(distance) < 110: + return shortjump(distance) + else: + return nearjump(distance) + +def push(reg): + if reg in regs64: + if rexbit[reg] == 1: + return [0x41, 0x50 + regs64[reg]] + else: + return [0x50 + regs64[reg]] + else: + Error('push for {0} not implemented'.format(reg)) + +def pop(reg): + if reg in regs64: + if rexbit[reg] == 1: + rexprefix = rex(b=1) + opcode = 0x58 + regs64[reg] + return [rexprefix, opcode] + else: + opcode = 0x58 + regs64[reg] + return [ opcode ] + else: + Error('pop for {0} not implemented'.format(reg)) + +def INT(number): + opcode = 0xcd + return [opcode] + imm8(number) + +def syscall(): + return [0x0F, 0x05] + +def call(distance): + if type(distance) is int: + return [0xe8]+imm32(distance) + elif type(distance) is str and distance in regs64: + reg = distance + opcode = 0xFF # 0xFF /2 == call r/m64 + mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) + if rexbit[reg] == 1: + rexprefix = rex(b=rexbit[reg]) + return [rexprefix, opcode, mod_rm] + else: + return [opcode, mod_rm] + else: + Error('Cannot call to {0}'.format(distance)) + +def ret(): + return [ 0xc3 ] + +def increg64(reg): + assert(reg in regs64) + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xff + mod_rm = modrm(mod=3, rm=regs64[reg]) + return [rexprefix, opcode, mod_rm] + +def prepost8(r8, rm8): + assert(r8 in regs8) + pre = [] + if type(rm8) is list: + # TODO: merge mem access with prepost for 64 bits + if len(rm8) == 1: + base, = rm8 + if type(base) is str and base in regs64: + assert(not base in ['rbp', 'rsp', 'r12', 'r13']) + mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) + if rexbit[base] == 1: + pre.append(rex(b=1)) + post = [mod_rm] + else: + Error('One arg of type {0} not implemented'.format(base)) + elif len(rm8) == 2: + base, offset = rm8 + assert(type(offset) is int) + assert(base in regs64) + + if base == 'rsp' or base == 'r12': + Error('Cannot use rsp or r12 as base yet') + if rexbit[base] == 1: + pre.append( rex(b=1) ) + mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) + post = [mod_rm] + imm8(offset) + else: + Error('not supporting prepost8 with list len {0}'.format(len(rm8))) + else: + Error('Not supporting move with reg8 {0}'.format(r8)) + return pre, post + +def prepost(r64, rm64): + assert(r64 in regs64) + if type(rm64) is list: + if len(rm64) == 3: + base, index, disp = rm64 + assert(base in regs64) + assert(index in regs64) + assert(type(disp) is int) + # Assert that no special cases are used: + # TODO: swap base and index to avoid special cases + # TODO: exploit special cases and make better code + assert(index != 'rsp') + + rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) + # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 + mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) + si_b = sib(ss=0, index=regs64[index], base=regs64[base]) + return [rexprefix], [mod_rm, si_b] + imm8(disp) + elif len(rm64) == 2: + base, offset = rm64 + assert(type(offset) is int) + if base == 'RIP': + # RIP pointer relative addressing mode! + rexprefix = rex(w=1, r=rexbit[r64]) + mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) + return [rexprefix], [mod_rm] + imm32(offset) + else: + assert(base in regs64) + + if base == 'rsp' or base == 'r12': + # extended function that uses SIB byte + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) + # rm=4 indicates a SIB byte follows + mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) + # index=4 indicates that index is not used + si_b = sib(ss=0, index=4, base=regs64[base]) + return [rexprefix], [mod_rm, si_b] + imm8(offset) + else: + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) + mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) + return [rexprefix], [mod_rm] + imm8(offset) + elif len(rm64) == 1: + offset = rm64[0] + if type(offset) is int: + rexprefix = rex(w=1, r=rexbit[r64]) + mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) + si_b = sib(ss=0, index=4,base=5) # 0x25 + return [rexprefix], [mod_rm, si_b] + imm32(offset) + else: + Error('Memory reference of type {0} not implemented'.format(offset)) + else: + Error('Memory reference not implemented') + elif rm64 in regs64: + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) + mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) + return [rexprefix], [mod_rm] + +def leareg64(rega, m): + opcode = 0x8d # lea r64, m + pre, post = prepost(rega, m) + return pre + [opcode] + post + +def mov(rega, regb): + if type(regb) is int: + pre = [rex(w=1, b=rexbit[rega])] + opcode = 0xb8 + regs64[rega] + post = imm64(regb) + elif type(regb) is str: + if regb in regs64: + opcode = 0x89 # mov r/m64, r64 + pre, post = prepost(regb, rega) + elif regb in regs8: + opcode = 0x88 # mov r/m8, r8 + pre, post = prepost8(regb, rega) + else: + Error('Unknown register {0}'.format(regb)) + elif type(rega) is str: + if rega in regs64: + opcode = 0x8b # mov r64, r/m64 + pre, post = prepost(rega, regb) + else: + Error('Unknown register {0}'.format(rega)) + else: + Error('Move of this kind {0}, {1} not implemented'.format(rega, regb)) + return pre + [opcode] + post + +def xorreg64(rega, regb): + rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega]) + opcode = 0x31 # XOR r/m64, r64 + # Alternative is 0x33 XOR r64, r/m64 + mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb]) + return [rexprefix, opcode, mod_rm] + +# integer arithmatic: +def addreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x01 # ADD r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + if regb < 100: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # add r/m, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=0) + return [rexprefix, opcode, mod_rm]+imm8(regb) + elif regb < (1<<31): + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x81 # add r/m64, imm32 + mod_rm = modrm(3, rm=regs64[rega], reg=0) + return [rexprefix, opcode, mod_rm]+imm32(regb) + else: + Error('Constant value too large!') + else: + Error('unknown second operand!'.format(regb)) + +def subreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x29 # SUB r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + if regb < 100: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # sub r/m, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=5) + return [rexprefix, opcode, mod_rm]+imm8(regb) + elif regb < (1<<31): + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x81 # sub r/m64, imm32 + mod_rm = modrm(3, rm=regs64[rega], reg=5) + return [rexprefix, opcode, mod_rm]+imm32(regb) + else: + Error('Constant value too large!') + + else: + Error('unknown second operand!'.format(regb)) + +def idivreg64(reg): + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xf7 # IDIV r/m64 + mod_rm = modrm(3, rm=regs64[reg], reg=7) + return [rexprefix, opcode, mod_rm] + +def imulreg64_rax(reg): + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xf7 # IMUL r/m64 + mod_rm = modrm(3, rm=regs64[reg], reg=5) + return [rexprefix, opcode, mod_rm] + +def imulreg64(rega, regb): + pre, post = prepost(rega, regb) + opcode = 0x0f # IMUL r64, r/m64 + opcode2 = 0xaf + return pre + [opcode, opcode2] + post + +def cmpreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x39 # CMP r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # CMP r/m64, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=7) + return [rexprefix, opcode, mod_rm] + imm8(regb) + + else: + Error('not implemented cmp64') + +# Mapping that maps string names to the right functions: +opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/builtin.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,10 @@ +from .nodes import * + +boolean = BaseType('boolean', 8) # Choose: 1 or 8 bytes? +integer = BaseType('integer', 8) +real = BaseType('real', 8) +char = BaseType('char', 1) +void = BaseType('void', 0) + +chr_func = BuiltinProcedure('chr', ProcedureType([Parameter('value', 'x', integer)], char)) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/codegenerator.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,487 @@ +""" + Code generation for 64 bits intel processors +""" + +from .nodes import * +from .errors import Error +from .builtin import real, integer, boolean, char +from .assembler import * + +class CodeGenerator: + def __init__(self): + self.strings = [] + self.initialize() + def initialize(self): + # Register descriptors: + self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',') + self.usedregs = [] + # Members to accumulate the result into: + # The result is an image of bytecode and global variable space. + # Global variables a referenced by RIP relative addressing. + self.image = [] + self.rip = 0 # The current instruction pointer location. + # TODO: backpatch list here? + + # Functions to modify the code image + def addCode(self, code): + assert(type(code) is list) + self.image += code + self.rip += len(code) + def fixCode(self, position, code): + self.image[position:position+len(code)] = code + def align(self, b): + while (self.rip % b) != 0: + self.addCode([0]) + + def saveAllRegisters(self): + regs = list(self.usedregs.keys()) + for reg in regs: + code += self.saveRegister(reg) + + def saveRegister(self, reg): + code = [] + if reg in self.usedregs.keys(): + code.append('mov {0}, {1}'.format(self.usedregs[reg], reg)) + del self.usedregs[reg] + self.freeregs.append(reg) + + def getreg(self, node): + """ acquire a working register for a certain node.""" + # Temporary register bypass action: + if len(self.freeregs) > 0: + reg = self.freeregs.pop(0) + self.usedregs.append(reg) + else: + Error('No more free regs') + node.reg = reg + + def freereg(self, node): + reg = node.reg + node.reg = None + self.freeregs.append(reg) + self.usedregs.remove(reg) + + # Helpers to load and retrieve designated objects: + def storeRegInDesignator(self, reg, designator): + assert(type(reg) is str) + assert(type(designator) is Designator) + if len(designator.selectors) > 0: + self.gencode( designator ) # Load the pointer into some register + self.addCode( mov([designator.reg, 0x0], reg) ) + self.freereg( designator ) + else: + if designator.obj.isLocal: + # Relative from rbp register + mem = ['rbp', designator.obj.offset] + self.addCode( mov(mem, reg) ) + else: + # Relative from RIP after move + self.addCode( mov(['RIP', 0x0], reg) ) + self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) ) + + # Code generation functions: + def genexprcode(self, node): + """ + Generate code for expressions! + Recursively evaluates, and ensures a register contains the answer. + register is an integer register or a floating point reg + """ + if isinstance(node, Binop): + """ Handle a binary operation (two arguments) of some kind """ + self.genexprcode(node.a) + self.genexprcode(node.b) + + if node.op == 'mod': + assert(node.typ.isType(integer)) + self.addCode(mov('rax', node.a.reg)) + self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros + self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg + node.reg = node.a.reg + self.freereg(node.b) # give up register that contains b + self.addCode(mov(node.reg, 'rdx')) # move remainder into result + elif node.op == 'div': + assert(node.typ.isType(integer)) + self.addCode(mov('rax', node.a.reg)) + self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros + self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg + node.reg = node.a.reg + self.freereg(node.b) # give up register that contains b + self.addCode(mov(node.reg, 'rax')) # move result into reg + elif node.op == '*': + if node.typ.isType(integer): + self.addCode(imulreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for * not implemented'.format(node.typ)) + elif node.op == '+': + if node.typ.isType(integer): + self.addCode(addreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for + not implemented'.format(node.typ)) + elif node.op == '-': + if node.typ.isType(integer): + self.addCode(subreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for - not implemented'.format(node.typ)) + else: + Error('Unknown Binop {0}'.format(node.op)) + + elif type(node) is Unop: + if node.op == 'INTTOREAL': + self.genexprcode(node.a) + node.reg = node.a.reg + # TODO use 'FILD' instruction + freg = 12 + code.append('Unop inttoreal TODO') + elif node.op == 'ABS': + if isType(node.typ, real): + code = [0xD9, 0xE1] # st(0) = fabs st(0) + Error('ABS error integer') + elif isType(node.typ, integer): + code = [] + Error('ABS error integer') + else: + Error('ABS error') + else: + Error('Unknown Unop {0}'.format(node.op)) + + elif isinstance(node, Designator): + # dereference, array index. Make sure that the result comes into a register + if len(node.selectors) > 0: + self.gencode(node) # Load the pointer into some register + + # Now we can access the object at location '[node.reg]': + if node.typ.isType(integer): + self.addCode( mov(node.reg, [node.reg, 0x0]) ) + else: + Error('Only integer types implemented') + else: + # No selectors, load variable directly + if node.obj.typ.isType(integer): + if type(node.obj) is Constant: + self.genexprcode(node.obj) + node.reg = node.obj.reg + else: + self.getreg(node) + # Get a register to store the integer value + if node.obj.isLocal: + # relative to rbp: + self.addCode( mov(node.reg, ['rbp', node.obj.offset]) ) + else: + self.addCode(mov(node.reg, ['RIP', 0x0])) + self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip)) + else: + Error('Cannot load variable type {0}'.format(node.typ)) + + elif isinstance(node, Relop): + # Create a boolean from operands + # TODO create an alternative for expressions used as conditions. + self.genexprcode(node.a) + self.genexprcode(node.b) + + if node.a.typ.isType(integer): + instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'} + if not node.relop in instructions.keys(): + Error('Unimplemented relop: '+str(node.relop)) + instr = instructions[node.relop] + + node.reg = node.a.reg + self.addCode( cmpreg64(node.a.reg, node.b.reg) ) + self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp + fixloc1 = self.rip - 1 + rip1 = self.rip + self.addCode( xorreg64(node.reg, node.reg) ) + self.addCode( shortjump(0x0) ) # Jump over 1 code + fixloc2 = self.rip - 1 + self.fixCode(fixloc1, imm8(self.rip - rip1)) + rip2 = self.rip + self.addCode( xorreg64(node.reg, node.reg) ) + self.addCode( increg64(node.reg) ) + self.fixCode(fixloc2, imm8(self.rip - rip2)) + + self.freereg(node.b) + else: + Error('Relop not implemented for {0}'.format(node.a.typ)) + + elif type(node) is Constant: + if node.typ.isType(integer): + self.getreg(node) + self.addCode(mov(node.reg, node.value)) + elif node.typ.isType(real): + code += self.getreg(node) + Error('TODO: get real reg') + # TODO: get a fixed point reg, and load the variable in there + else: + Error('Howto generate code for {0}?'.format(node)) + + elif type(node) is ProcedureCall: + if type(node.proc.obj) is BuiltinProcedure: + # Handle builtin procedures different, these not always call + # a function, but generate code. + bi = node.proc.obj + if bi.name == 'chr': + arg = node.args[0] + self.genexprcode(arg) + # Store character in full width register: + # TODO: store in char only register + node.reg = arg.reg + else: + Error('Unknown builtin function {0}'.format(bi.name)) + else: + # Use generic procedure call first + self.gencode(node) + # Retrieve result: + if node.typ.isType(integer): + # Store result! + self.getreg(node) + self.addCode( mov(node.reg, 'rax') ) + else: + Error('Return type not supported {0}'.format(node.typ)) + else: + Error('Cannot generate expression code for: {0}'.format(node)) + + def gencode(self, node): + """ Code generation function for AST nodes """ + if isinstance(node, Module): + # for all imports make a list of pointer to the actual procedures: + for imp in node.imports: + imp.offset = self.rip + self.addCode( [0x0]*8 ) + # global variable storage allocation + variables = node.symtable.getAllLocal(Variable) + for var in variables: + var.isLocal = False + var.offset = self.rip + self.addCode( [0x00] * var.typ.size ) # TODO initial values here? + self.align(8) + # TODO: mark end of data and start of code inside image + # TODO: round data to page size to enable protection by loader. + # Procedure code generation: + procedures = node.symtable.getAllLocal(Procedure) + node.procs = procedures + for proc in procedures: + self.gencode(proc) + # Module init code: + node.initcodeentry = self.rip + self.gencode(node.initcode) + self.addCode( ret() ) + # TODO: how to return from module init code? far return?? + + elif type(node) is Procedure: + # calculate offsets for local variables and parameters + # Variable location relative to 'rbp' register + variables = node.symtable.getAllLocal(Variable) + offset = 0 + paramoffset = 16 + for var in variables: + var.isLocal = True + if not var.isParameter: + offset += var.typ.size + # Offset is negative of rbp in stack frame + var.offset = -offset + node.framesize = offset + # Calculate offsets of parameters relative to rbp register + for par in reversed(node.typ.parameters): + pvar = node.symtable.getLocal(Variable, par.name) + pvar.offset = paramoffset + paramoffset += pvar.typ.size + + # code generation + node.entrypoint = self.rip + self.addCode(push('rbp')) + self.addCode(mov('rbp', 'rsp')) # Setup the base pointer + self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals + self.gencode(node.block) + if node.retexpr: + if node.retexpr.typ.isType(integer): + self.genexprcode(node.retexpr) + self.addCode( mov('rax', node.retexpr.reg) ) + self.freereg(node.retexpr) + else: + Error('Cannot return this kind yet {0}'.format(node.retexpr.typ)) + self.addCode( addreg64('rsp', node.framesize) ) + self.addCode( pop('rbp') ) + self.addCode( ret() ) + assert(len(self.usedregs) == 0) + + elif isinstance(node, StatementSequence): + for s in node.statements: + self.gencode(s) + + elif type(node) is ProcedureCall: + # Prepare parameters on the stack: + stacksize = 0 + assert(len(node.args) == len(node.proc.typ.parameters)) + for arg, param in zip(node.args, node.proc.typ.parameters): + + if param.kind == 'value': + self.genexprcode(arg) + self.addCode( push(arg.reg) ) + self.freereg( arg ) + stacksize += 8 + else: + Error('Parameter kind other than value') + + # Calculate address using designator + if type(node.proc.obj) is Procedure: + self.addCode( call(0x0) ) + self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip)) + elif type(node.proc.obj) is ImportedSymbol: + # Load the entry point of the import table + self.getreg(node.proc.obj) + # Load the address of the procedure: + self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) ) + self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) ) + # Call to the address in register: + self.addCode( call(node.proc.obj.reg) ) + # Free register that holds the address of the object + self.freereg( node.proc.obj ) + elif type(node.proc.obj) is BuiltinProcedure: + if node.proc.obj.name == 'chr': + print('int to char') + else: + Error('Unknown builtin function {0}'.format(node.proc.obj.name)) + else: + Error('Cannot call designator of type {0}'.format(node.proc.obj)) + + # Restore stack (pop all arguments of): + self.addCode(addreg64('rsp', stacksize)) + + elif type(node) is Assignment: + if node.lval.typ.isType(integer): + # TODO if node.rval is Constant of some datatype, move it to mem directly + self.genexprcode(node.rval) # Calculate the value that has to be stored. + self.storeRegInDesignator(node.rval.reg, node.lval) + self.freereg(node.rval) + else: + Error('Assignments of other types not implemented') + # TODO if left and right are designators, do some sort of memcpy. + + elif type(node) is IfStatement: + self.genexprcode(node.condition) + self.addCode( cmpreg64(node.condition.reg, 1) ) + self.freereg(node.condition) + if node.falsestatement: + # If with else clause + self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false + rip1 = self.rip + fixloc1 = self.rip - 4 + self.gencode(node.truestatement) + self.addCode( nearjump( 0x0 ) ) # jump over false code + fixloc2 = self.rip - 4 + self.fixCode(fixloc1, imm32(self.rip - rip1)) + rip2 = self.rip + self.gencode(node.falsestatement) + self.fixCode(fixloc2, imm32(self.rip - rip2)) + else: + # If without else clause + self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false + rip1 = self.rip + fixloc1 = self.rip - 4 + self.gencode(node.truestatement) + self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code. + + elif isinstance(node, WhileStatement): + rip1 = self.rip # Store the start of the while loop + self.genexprcode(node.condition) + self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness + self.freereg(node.condition) + self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later) + fixloc1 = self.rip - 4 + rip2 = self.rip + self.gencode(node.dostatements) + self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below + fixloc2 = self.rip - 4 + rip3 = self.rip # end of while loop + self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop + self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop + + elif type(node) is ForStatement: + # Initial load of iterator variable: + self.genexprcode(node.begin) + self.genexprcode(node.end) + # TODO: link reg with variable so that a register is used instead of a variable + iterreg = node.begin.reg # Get the register used for the loop + #self.addCode(cmpreg64(iterreg, node.endvalue)) + rip1 = self.rip + self.gencode(node.statements) + #self.loadDesignatorInReg(node. + #self.addCode( addreg64(node.variable, node.increment) ) + self.addCode(nearjump(0x0)) + fixloc1 = self.rip - 4 + rip2 = self.rip + self.fixCode(fixloc1, imm32(rip1 - rip2)) + + self.freereg(node.begin) # Release register used in loop + self.freereg(node.end) + Error('No implementation of FOR statement') + + elif type(node) is AsmCode: + def processOperand(op): + if type(op) is list: + if type(op[0]) is Variable: + var = op[0] + if var.isLocal: + return ['rbp', var.offset] + else: + Error('Can only use local variables in inline assembler') + return op + for asmline in node.asmcode: + opcode, operands = asmline + operands = [processOperand(opx) for opx in operands] + print('assembling', opcode, *operands) + func,nargs = opcodes[opcode] + code = func(*operands) + self.addCode(code) + + elif isinstance(node, EmptyStatement): + pass + + + elif type(node) is StringConstant: + self.strings.append(node) + self.data.append(node.value) # Add string to the data section + + elif type(node) is Designator: + if len(node.selectors) > 0: + self.getreg(node) + # Load starting address + if node.obj.isLocal: + self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) ) + else: + # Global variables need to be relocated... + self.addCode(leareg64(node.reg, ['RIP', 0])) + self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip)) + # Loop over all designators.. + for selector in node.selectors: + if type(selector) is Index: + # Deref an array index + self.genexprcode(selector.index) + self.getreg(selector) + self.addCode( mov(selector.reg, selector.typ.elementType.size) ) + self.addCode( imulreg64(selector.reg, selector.index.reg ) ) + self.freereg(selector.index) + self.addCode(addreg64(node.reg, selector.reg)) + self.freereg(selector) + elif type(selector) is Field: + print('Field') + Error('Field not implemented') + else: + Error('Unknown selector') + else: + Error('Can only gencode for designator with selectors') + + else: + print('not generating code for {0}'.format(node)) + + def generatecode(self, ast): + """ code generation front end """ + self.initialize() + self.gencode(ast) + ast.image = self.image +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/compiler.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,53 @@ +import hashlib +# Import compiler components: +from . import lexer +from .parser import Parser +from .codegenerator import CodeGenerator +from .nodes import ExportedSymbol +from .errors import CompilerException + +class Compiler: + versie = '0.9.3' + + def __repr__(self): + return 'LCFOS compiler {0}'.format(self.versie) + + def generateSignature(self, src): + return hashlib.md5(bytes(src,encoding='ascii')).hexdigest() + + def compilesource(self, src): + """ Front end that handles the stages: """ + self.errorlist = [] + # Pass 1: parsing and type checking + tokens = lexer.tokenize(src) # Lexical stage + p = Parser(tokens) + try: + ast = p.parseModule() # Parse a module + except CompilerException as e: + p.errorlist.append( (e.row, e.col, e.msg) ) + if len(p.errorlist) > 0: + self.errorlist = p.errorlist + return + # Pass 2: code generation + CodeGenerator().generatecode(ast) + # Attach a signature: + ast.signature = self.generateSignature(src) + # Generate exported symbols: + ast.exports = [] + for proc in ast.procs: + if proc.public: + sym = ExportedSymbol(proc.name, proc.typ) + sym.imageoffset = proc.entrypoint + ast.exports.append(sym) + return ast + + def compileProject(self, project): + mods = [] + for fname in project.files: + print('Compiling {0}...'.format(fname)) + source = project.loadProjectFile(fname) + mod = self.compilesource(source) + mods.append(mod) + return mods + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/display.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,21 @@ +from .nodes import * + +def printNode(node, indent=0): + """ + Print visitor + all printing goes in here + """ + print(' '*indent+str(node)) + if type(node) is Procedure: + print(' '*indent+' PARAMETERS:') + for p in node.parameters: + printNode(p, indent+4) + if node.block: + print(' '*indent+' CODE:') + printNode(node.block, indent+4) + elif type(node) is Module: + print(node.symtable) + printNode(node.initcode, indent+2) + else: + for c in node.getChildren(): + printNode(c, indent+2)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/errors.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,47 @@ +""" Error handling routines """ + +class CompilerException(Exception): + def __init__(self, msg, row=0, col=0, filename=None): + self.msg = msg + self.row = row + self.col = col + self.filename = filename + def __repr__(self): + return self.msg + def __str__(self): + return self.msg + +class ErrorNode: + def __init__(self, row, col, msg): + self.row, self.col = row,col + self.msg = msg + +def Error(msg, node=None): + if node is None: + raise CompilerException(msg) + else: + raise CompilerException(msg, node.row, node.col) + +def printError(source, e): + def printLine(row, txt): + print(str(row)+':'+txt) + if e.row == 0: + print('Error: {0}'.format(e.msg)) + else: + lines = source.split('\n') + prerow = e.row - 3 + if prerow < 1: + prerow = 1 + afterrow = e.row + 3 + if afterrow > len(lines): + afterrow = len(lines) + + # print preceding source lines: + for r in range(prerow, e.row): + printLine(r, lines[r-1]) + # print source line containing error: + printLine(e.row, lines[e.row-1]) + print(' '*(len(str(e.row)+':')+e.col-1) + '^ Error: {0}'.format(e.msg)) + # print trailing source line: + for r in range(e.row+1, afterrow+1): + printLine(r, lines[r-1])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/lexer.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,71 @@ +import collections +import re +from .errors import CompilerException + +""" + Lexical analyzer part. Splits the input character stream into tokens. +""" + +# Token is used in the lexical analyzer: +Token = collections.namedtuple('Token', 'typ val row col') + +keywords = ['and', 'array', 'begin', 'by', 'case', 'const', 'div', 'do', \ + 'else', 'elsif', 'end', 'false', 'for', 'if', 'import', 'in', 'is', \ + 'mod', 'module', 'nil', 'not', 'of', 'or', 'pointer', 'procedure', \ + 'record', 'repeat', 'return', 'then', 'to', 'true', 'type', 'until', 'var', \ + 'while', 'asm' ] + +def tokenize(s): + """ + Tokenizer, generates an iterator that + returns tokens! + + This GREAT example was taken from python re doc page! + """ + tok_spec = [ + ('REAL', r'\d+\.\d+'), + ('HEXNUMBER', r'0x[\da-fA-F]+'), + ('NUMBER', r'\d+'), + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('NEWLINE', r'\n'), + ('SKIP', r'[ \t]'), + ('COMMENTS', r'{.*}'), + ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), + ('STRING', r"'.*?'") + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'NEWLINE': + line_start = pos + line += 1 + elif typ == 'COMMENTS': + pass + elif typ != 'SKIP': + if typ == 'ID': + if val in keywords: + typ = val + elif typ == 'LEESTEKEN': + typ = val + elif typ == 'NUMBER': + val = int(val) + elif typ == 'HEXNUMBER': + val = int(val[2:], 16) + typ = 'NUMBER' + elif typ == 'REAL': + val = float(val) + elif typ == 'STRING': + val = val[1:-1] + yield Token(typ, val, line, mo.start()-line_start) + pos = mo.end() + mo = gettok(s, pos) + if pos != len(s): + col = pos - line_start + raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) + yield Token('END', '', line, 0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/modules.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,193 @@ +import struct +from .errors import Error +from .nodes import * +from .builtin import integer, real, char, boolean, void +import os.path + +""" + File format for compiled modules. + * [11] magic identifier + * [STR] mod name + * [STR] signature, a md5 signature of the module. + * [I32] size of code + * code image + * [I32] entrypoint for initcode + * imported modules + ** [I32] num of imported modules + *** [STR] name of module + *** signature of the module + *** [I32] offset in the process image where the interface symbols must be placed + * public interface + ** [I32] num of interface elements + *** [STR] proc name + *** [I32] offset in code image + *** [type] return type + *** [I32] number of parameters + **** parameter + ***** parameter kind + ***** parameter name + ***** parameter type +""" + +MAGIC = b'LCFOSMODC' + +loadedModules = [] + +def loadModule(modname): + """ returns a Module object specified by a name """ + # Check if the module was already loaded: + for mod in loadedModules: + if mod.name == modname: + return mod + + # Try to load the module from file: + srcfilename = modname + '.mod' + binfilename = modname + '.bin' + sourceExists = os.path.exists(srcfilename) + if os.path.exists(binfilename): + if sourceExists: + compileModule() + else: + return loadModuleFromFile(binfilename) + else: + Error("Cannot load module '{0}'!".format(modname)) + +def loadModuleFromFile(filename): + f = open(filename, 'rb') + magic = f.read(len(MAGIC)) + assert(magic == MAGIC) + + # Helper functions: + def readI32(): + int32, = struct.unpack('<I', f.read(4)) + return int32 + def readSTR(): + length = readI32() + b = f.read(length) + return b.decode(encoding='ascii') + def readType(): + code, = f.read(1) + basetypes = {0x11:integer, 0x12:real, 0x13:char,0x14:boolean, 0x15:void} + if code in list(basetypes.keys()): + return basetypes[code] + elif code == 0x20: + dimension, elementType = readI32(), readType() + return ArrayType(dimension, elementType) + elif code == 0x21: + returntype = readType() + numparams = readI32() + parameters = [] + kinds = {0x1:'value', 0x2:'var', 0x3:'const'} + for i in range(numparams): + byt, = f.read(1) + kind = kinds[byt] + name, typ = readSTR(), readType() + parameters.append(Parameter(kind, name, typ)) + return ProcedureType(parameters, returntype) + else: + Error('Reading of this typ not supported') + + # Begin of actual loading + modname = readSTR() + modsignature = readSTR() + codesize = readI32() + image = f.read(codesize) + initcodeentry = readI32() + # Check which modules this module loads: + numimports = readI32() + imports = [] + for i in range(numimports): + modname = readSTR() + signature = readSTR() + symname = readSTR() + offset = readI32() + impsym = ImportedSymbol(modname, symname) + impsym.signature = signature + impsym.offset = offset + imports.append(impsym) + # Modules exported interface: + numexports = readI32() + exports = [] + for i in range(numexports): + name = readSTR() + imageoffset = readI32() # Offset in image where symbol is located + typ = readType() + export = ExportedSymbol(name, typ) + export.imageoffset = imageoffset + exports.append(export) + f.close() + + # Construct imported module object: + module = Module(modname) + module.signature = modsignature + module.exports = exports # Symbols provided to other modules + module.imports = imports # Symbols of others used by this module. + module.initcodeentry = initcodeentry + module.image = image # The binary blob + global loadedModules + loadedModules.append(module) + return module + +def storeModule(mod, filename): + """ Class to store a module in a file """ + f = open(filename, 'wb') + + def writeI32(int32): + f.write( struct.pack('<I', int32) ) + def writeSTR(s): + writeI32(len(s)) + f.write(bytes(s, encoding='ascii')) + def writeType(typ): + if type(typ) is BaseType: + basetypecode = {'integer': 0x11, 'real': 0x12, 'char': 0x13, 'boolean':0x14, 'void':0x15} + code = basetypecode[typ.name] + f.write( bytes([code])) + elif type(typ) is ArrayType: + f.write(bytes([0x20])) + writeI32(typ.dimension) + writeType(typ.elementType) + elif type(typ) is ProcedureType: + f.write(bytes([0x21])) + writeType(typ.returntype) + writeI32(len(typ.parameters)) + for parameter in typ.parameters: + kinds = {'value': 0x1, 'var': 0x2, 'const': 0x3} + kind = kinds[parameter.kind] + f.write(bytes([kind])) + writeSTR(parameter.name) + writeType(parameter.typ) + else: + Error('Type storage not implemented {0}'.format(typ)) + + # Begin of actual storage function + f.write(MAGIC) + writeSTR(mod.name) + writeSTR(mod.signature) + writeI32(len(mod.image)) + f.write(bytes(mod.image)) + writeI32(mod.initcodeentry) + # modules imported symbols: + writeI32(len(mod.imports)) + for imp in mod.imports: + writeSTR(imp.modname) + writeSTR(imp.signature) + writeSTR(imp.name) + writeI32(imp.offset) + # modules provided interface + writeI32(len(mod.exports)) + # Store exported symbols: + for sym in mod.exports: + writeSTR(sym.name) # proc name + writeI32(sym.imageoffset) # proc entry point + writeType(sym.typ) # Procedure type + f.close() + + storeModuleInCache(mod) + +def storeModuleInCache(newmod): + global loadedModules + for mod in loadedModules: + if newmod.name == mod.name: + return + loadedModules.append(newmod) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/nodes.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,310 @@ +""" +Parse tree elements +""" +class Node: + location = None + def getChildren(self): + children = [] + members = dir(self) + for member in members: + member = getattr(self, member) + if isinstance(member, Node): + children.append(member) + elif type(member) is list: + for mi in member: + if isinstance(mi, Node): + children.append(mi) + return children + +class Symbol(Node): + pass + +class Id(Node): + def __init__(self, name): + self.name = name + def __repr__(self): + return 'ID {0}'.format(self.name) + +# Selectors: +class Field(Node): + def __init__(self, fieldname): + self.fieldname = fieldname + def __repr__(self): + return 'FLD {0}'.format(self.fieldname) + +class Index(Node): + def __init__(self, index, typ): + self.index = index + self.typ = typ + def __repr__(self): + return 'IDX {0}'.format(self.index) + +class Deref(Node): + pass + +class Designator(Node): + def __init__(self, obj, selectors, typ): + self.obj = obj + self.selectors = selectors + self.typ = typ + def __repr__(self): + return 'DESIGNATOR {0}, selectors {1}, type {2}'.format(self.obj, self.selectors, self.typ) + +""" +Type classes +""" +def isType(a, b): + """ Compare types a and b and check if they are equal """ + if type(a) is type(b): + if type(a) is BaseType: + return (a.name == b.name) and (a.size == b.size) + elif type(a) is ArrayType: + return (a.dimension == b.dimension) and isType(a.elementType, b.elementType) + elif type(a) is ProcedureType: + if len(a.parameters) != len(b.parameters): + print('Number of parameters does not match') + return False + for aparam, bparam in zip(a.parameters, b.parameters): + if not isType(aparam.typ, bparam.typ): + print('Parameter {0} does not match parameter {1}'.format(aparam, bparam)) + return False + if a.result is None: + # TODO: how to handle a None return type?? + pass + if not isType(a.result, b.result): + print('Procedure return value mismatch {0} != {1}'.format(a.result, b.result)) + return False + return True + else: + print(a) + print(b) + Error('Not implemented {0}'.format(a)) + else: + return False + +class Type: + def isType(self, b): + return isType(self, b) + +class BaseType(Type): + def __init__(self, name, size): + self.name = name + self.size = size + def __repr__(self): + return '[TYPE {0}]'.format(self.name) + +class NilType(Node): + # TODO: how to handle nil values?? + def __repr__(self): + return 'NILTYPE' + +class ArrayType(Type): + def __init__(self, dimension, elementType): + self.dimension = dimension + self.elementType = elementType + self.size = elementType.size * dimension + def __repr__(self): + return '[ARRAY {0} of {1}]'.format(self.dimension, self.elementType) + +class RecordType(Type): + def __init__(self, fields): + self.fields = fields + self.size = 0 + for fieldname in self.fields: + self.size += self.fields[fieldname].size + def __repr__(self): + return '[RECORD {0}]'.format(self.fields) + +class PointerType(Type): + def __init__(self, pointedType): + self.pointedType = pointedType + self.size = 8 + def __repr__(self): + return '[POINTER {0}]'.format(self.pointedType) + +class ProcedureType(Type): + def __init__(self, parameters, returntype): + self.parameters = parameters + self.returntype = returntype + def __repr__(self): + return '[PROCTYPE {0} RET {1}]'.format(self.parameters, self.returntype) + +class DefinedType(Type): + def __init__(self, name, typ): + self.name = name + self.typ = typ + def __repr__(self): + return 'Named type {0} of type {1}'.format(self.name, self.typ) + +# Classes for constants like numbers and strings: +class StringConstant(Symbol): + def __init__(self, txt): + self.txt = txt + self.typ = 'string' + def __repr__(self): + return "STRING '{0}'".format(self.txt) + +# Variables, parameters, local variables, constants: +class Constant(Symbol): + def __init__(self, value, typ, name=None, public=False): + self.name = name + self.value = value + self.typ = typ + self.public = public + def __repr__(self): + return 'CONSTANT {0} = {1}'.format(self.name, self.value) + +class Variable(Symbol): + def __init__(self, name, typ, public): + self.name = name + self.typ = typ + self.public = public + self.isLocal = False + self.isReadOnly = False + self.isParameter = False + def __repr__(self): + txt = '[public] ' if self.public else '' + return '{2}VAR {0} : {1}'.format(self.name, self.typ, txt) + +class Parameter(Node): + """ A parameter has a passing method, name and typ """ + def __init__(self, kind, name, typ): + self.kind = kind + self.name = name + self.typ = typ + def __repr__(self): + return 'PARAM {0} {1} {2}'.format(self.kind, self.name, self.typ) + +# Operations: +class Unop(Node): + def __init__(self, a, op, typ): + self.a = a + self.op = op # Operation: '+', '-', '*', '/', 'mod' + self.typ = typ + self.place = None + def __repr__(self): + return 'UNOP {0}'.format(self.op) + +class Binop(Node): + def __init__(self, a, op, b, typ): + self.a = a + self.b = b + self.op = op # Operation: '+', '-', '*', '/', 'mod' + self.typ = typ # Resulting type :) + self.place = None + def __repr__(self): + return 'BINOP {0} {1}'.format(self.op, self.typ) + +class Relop(Node): + def __init__(self, a, relop, b, typ): + self.a = a + self.relop = relop + self.b = b + self.typ = typ + def __repr__(self): + return 'RELOP {0}'.format(self.relop) + +# Modules +class Module(Node): + def __init__(self, name): + self.name = name + def __repr__(self): + return 'MODULE {0}'.format(self.name) + +# Imports and Exports: +class ImportedSymbol(Node): + def __init__(self, modname, name): + self.modname = modname + self.name = name + def __repr__(self): + return 'IMPORTED SYMBOL {0}'.format(self.name) + +class ExportedSymbol(Node): + def __init__(self, name, typ): + self.name = name + self.typ = typ + def __repr__(self): + return 'EXPORTED PROCEDURE {0} : {1}'.format(self.name, self.typ) + +# Procedure types +class BuiltinProcedure(Node): + def __init__(self, name, typ): + self.name = name + self.typ = typ + def __repr__(self): + return 'BUILTIN PROCEDURE {0} : {1}'.format(self.name, self.typ) + +class Procedure(Symbol): + """ Actual implementation of a function """ + def __init__(self, name, typ, block, symtable, retexpr): + self.name = name + self.block = block + self.symtable = symtable + self.typ = typ + self.retexpr = retexpr + def __repr__(self): + return 'PROCEDURE {0} {1}'.format(self.name, self.typ) + +# Statements +class StatementSequence(Node): + def __init__(self, statements): + self.statements = statements + def __repr__(self): + return 'STATEMENTSEQUENCE' + +class EmptyStatement(Node): + def __repr__(self): + return 'EMPTY STATEMENT' + +class Assignment(Node): + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __repr__(self): + return 'ASSIGNMENT' + +class ProcedureCall(Node): + def __init__(self, proc, args): + self.proc = proc + self.args = args + self.typ = proc.typ.returntype + def __repr__(self): + return 'CALL {0} '.format(self.proc) + +class IfStatement(Node): + def __init__(self, condition, truestatement, falsestatement=None): + self.condition = condition + self.truestatement = truestatement + self.falsestatement = falsestatement + def __repr__(self): + return 'IF-statement' + +class CaseStatement(Node): + def __init__(self, condition): + self.condition = condition + def __repr__(self): + return 'CASE-statement' + +class WhileStatement(Node): + def __init__(self, condition, statements): + self.condition = condition + self.dostatements = statements + def __repr__(self): + return 'WHILE-statement' + +class ForStatement(Node): + def __init__(self, variable, begin, end, increment, statements): + self.variable = variable + self.begin = begin + self.end = end + self.increment = increment + self.statements = statements + def __repr__(self): + return 'FOR-statement' + +class AsmCode(Node): + def __init__(self, asmcode): + self.asmcode = asmcode + def __repr__(self): + return 'ASM CODE' +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/parser.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,787 @@ +""" + This module parses source code into an abstract syntax tree (AST) +""" + +from .symboltable import SymbolTable +from .nodes import * +from .errors import CompilerException, Error +from .modules import loadModule +from .display import printNode +from .builtin import * +from . import assembler + +class Parser: + def __init__(self, tokens): + """ provide the parser with the tokens iterator from the lexer. """ + self.tokens = tokens + self.NextToken() + self.errorlist = [] + + def Error(self, msg): + raise CompilerException(msg, self.token.row, self.token.col) + + # Lexer helpers: + def Consume(self, typ=''): + if self.token.typ == typ or typ == '': + v = self.token.val + self.NextToken() + return v + else: + self.Error('Excected: "{0}", got "{1}"'.format(typ, self.token.val)) + + def hasConsumed(self, typ): + if self.token.typ == typ: + self.Consume(typ) + return True + return False + + def NextToken(self): + self.token = self.tokens.__next__() + # TODO: store filename in location? + self.location = (self.token.row, self.token.col) + + # Helpers to find location of the error in the code: + def setLocation(self, obj, location): + obj.location = location + return obj + def getLocation(self): + return self.location + + """ + Recursive descent parser functions: + A set of mutual recursive functions. + Starting symbol is the Module. + """ + def parseModule(self): + self.imports = [] + loc = self.getLocation() + self.Consume('module') + modname = self.Consume('ID') + self.Consume(';') + mod = Module(modname) + + # Construct a symbol table for this program + mod.symtable = SymbolTable() + # Add built in types and functions: + for x in [real, integer, boolean, char, chr_func]: + mod.symtable.addSymbol(x) + + self.cst = mod.symtable + self.parseImportList() + + self.parseDeclarationSequence() + # Procedures only allowed in this scope + self.parseProcedureDeclarations() + + if self.hasConsumed('begin'): + mod.initcode = self.parseStatementSequence() + else: + mod.initcode = EmptyStatement() + + self.Consume('end') + endname = self.Consume('ID') + if endname != modname: + self.Error('end denoter must be module name') + self.Consume('.') + + mod.imports = self.imports + return self.setLocation(mod, loc) + + # Import part + def parseImportList(self): + if self.hasConsumed('import'): + self.parseImport() + while self.hasConsumed(','): + self.parseImport() + self.Consume(';') + + def parseImport(self): + loc = self.getLocation() + modname = self.Consume('ID') + mod = loadModule(modname) + self.setLocation(mod, loc) + self.cst.addSymbol(mod) + + # Helper to parse an identifier defenitions + def parseIdentDef(self): + loc = self.getLocation() + name = self.Consume('ID') + ispublic = self.hasConsumed('*') + # Make a node of this thing: + i = Id(name) + i.ispublic = ispublic + return self.setLocation(i, loc) + + def parseIdentList(self): + ids = [ self.parseIdentDef() ] + while self.hasConsumed(','): + ids.append( self.parseIdentDef() ) + return ids + + def parseQualIdent(self): + """ Parse a qualified identifier """ + name = self.Consume('ID') + if self.cst.has(Module, name): + modname = name + mod = self.cst.get(Module, modname) + self.Consume('.') + name = self.Consume('ID') + # Try to find existing imported symbol: + for imp in self.imports: + if imp.modname == modname and imp.name == name: + return imp + # Try to find the symbol in the modules exports: + for sym in mod.exports: + if sym.name == name: + impsym = ImportedSymbol(modname, name) + impsym.typ = sym.typ + impsym.signature = mod.signature + self.imports.append(impsym) + return impsym + self.Error("Cannot find symbol {0}".format(name)) + else: + return self.cst.getSymbol(name) + + # Helper to parse a designator + def parseDesignator(self): + """ A designator designates an object. + The base location in memory is denoted by the qualified identifier + The actual address depends on the selector. + """ + loc = self.getLocation() + obj = self.parseQualIdent() + typ = obj.typ + selectors = [] + while self.token.typ in ['.', '[', '^']: + if self.hasConsumed('.'): + field = self.Consume('ID') + if typ is PointerType: + selectors.append(Deref()) + typ = typ.pointedType + if not type(typ) is RecordType: + self.Error("field reference, type not record but {0}".format(typ)) + typ = typ.fields[field] + selectors.append(Field(field)) + elif self.hasConsumed('['): + indexes = self.parseExpressionList() + self.Consume(']') + for idx in indexes: + if not type(typ) is ArrayType: + self.Error('Cannot index non array type') + if not isType(idx.typ, integer): + self.Error('Only integer expressions can be used as an index') + selectors.append(Index(idx, typ)) + typ = typ.elementType + elif self.hasConsumed('^'): + selectors.append(Deref()) + typ = typ.pointedType + return self.setLocation(Designator(obj, selectors, typ), loc) + + # Declaration sequence + def parseDeclarationSequence(self): + """ 1. constants, 2. types, 3. variables """ + self.parseConstantDeclarations() + self.parseTypeDeclarations() + self.parseVariableDeclarations() + + # Constants + def evalExpression(self, expr): + if type(expr) is Binop: + a = self.evalExpression(expr.a) + b = self.evalExpression(expr.b) + if expr.op == '+': + return a + b + elif expr.op == '-': + return a - b + elif expr.op == '*': + return a * b + elif expr.op == '/': + return float(a) / float(b) + elif expr.op == 'mod': + return int(a % b) + elif expr.op == 'div': + return int(a / b) + elif expr.op == 'or': + return a or b + elif expr.op == 'and': + return a and b + else: + self.Error('Cannot evaluate expression with {0}'.format(expr.op)) + elif type(expr) is Constant: + return expr.value + elif type(expr) is Designator: + if type(expr.obj) is Constant: + return self.evalExpression(expr.obj) + else: + self.Error('Cannot evaluate designated object {0}'.format(expr.obj)) + elif type(expr) is Unop: + a = self.evalExpression(expr.a) + if expr.op == 'not': + return not a + elif expr.op == '-': + return -a + else: + self.Error('Unimplemented unary operation {0}'.format(expr.op)) + else: + self.Error('Cannot evaluate expression {0}'.format(expr)) + + def parseConstExpression(self): + e = self.parseExpression() + return self.evalExpression(e), e.typ + + def parseConstantDeclarations(self): + """ Parse const part of a module """ + if self.hasConsumed('const'): + while self.token.typ == 'ID': + i = self.parseIdentDef() + self.Consume('=') + constvalue, typ = self.parseConstExpression() + self.Consume(';') + c = Constant(constvalue, typ, name=i.name, public=i.ispublic) + self.setLocation(c, i.location) + self.cst.addSymbol(c) + + # Type system + def parseTypeDeclarations(self): + if self.hasConsumed('type'): + while self.token.typ == 'ID': + typename, export = self.parseIdentDef() + self.Consume('=') + typ = self.parseStructuredType() + self.Consume(';') + t = DefinedType(typename, typ) + self.cst.addSymbol(t) + + def parseType(self): + if self.token.typ == 'ID': + typename = self.Consume('ID') + if self.cst.has(Type, typename): + typ = self.cst.get(Type, typename) + while type(typ) is DefinedType: + typ = typ.typ + return typ + else: + self.Error('Cannot find type {0}'.format(typename)) + else: + return self.parseStructuredType() + + def parseStructuredType(self): + if self.hasConsumed('array'): + dimensions = [] + dimensions.append( self.parseConstExpression() ) + while self.hasConsumed(','): + dimensions.append( self.parseConstExpression() ) + self.Consume('of') + arr = self.parseType() + for dimension, consttyp in reversed(dimensions): + if not isType(consttyp, integer): + self.Error('array dimension must be an integer type (not {0})'.format(consttyp)) + if dimension < 2: + self.Error('array dimension must be bigger than 1 (not {0})'.format(dimension)) + arr = ArrayType(dimension, arr) + return arr + elif self.hasConsumed('record'): + fields = {} + while self.token.typ == 'ID': + # parse a fieldlist: + identifiers = self.parseIdentList() + self.Consume(':') + typ = self.parseType() + self.Consume(';') + for i in identifiers: + if i.name in fields.keys(): + self.Error('record field "{0}" multiple defined.'.format(i.name)) + fields[i.name] = typ + # TODO store this in another way, symbol table? + self.Consume('end') + return RecordType(fields) + elif self.hasConsumed('pointer'): + self.Consume('to') + typ = self.parseType() + return PointerType(typ) + elif self.hasConsumed('procedure'): + parameters, returntype = self.parseFormalParameters() + return ProcedureType(parameters, returntype) + else: + self.Error('Unknown structured type "{0}"'.format(self.token.val)) + + # Variable declarations: + def parseVariableDeclarations(self): + if self.hasConsumed('var'): + if self.token.typ == 'ID': + while self.token.typ == 'ID': + ids = self.parseIdentList() + self.Consume(':') + typename = self.parseType() + self.Consume(';') + for i in ids: + v = Variable(i.name, typename, public=i.ispublic) + self.setLocation(v, i.location) + self.cst.addSymbol(v) + else: + self.Error('Expected ID, got'+str(self.token)) + + # Procedures + def parseFPsection(self): + if self.hasConsumed('const'): + kind = 'const' + elif self.hasConsumed('var'): + kind = 'var' + else: + kind = 'value' + names = [ self.Consume('ID') ] + while self.hasConsumed(','): + names.append( self.Consume('ID') ) + self.Consume(':') + typ = self.parseType() + parameters = [Parameter(kind, name, typ) + for name in names] + return parameters + + def parseFormalParameters(self): + parameters = [] + self.Consume('(') + if not self.hasConsumed(')'): + parameters += self.parseFPsection() + while self.hasConsumed(';'): + parameters += self.parseFPsection() + self.Consume(')') + if self.hasConsumed(':'): + returntype = self.parseQualIdent() + else: + returntype = void + return ProcedureType(parameters, returntype) + + def parseProcedureDeclarations(self): + procedures = [] + while self.token.typ == 'procedure': + p = self.parseProcedureDeclaration() + procedures.append(p) + self.Consume(';') + return procedures + + def parseProcedureDeclaration(self): + loc = self.getLocation() + self.Consume('procedure') + i = self.parseIdentDef() + procname = i.name + proctyp = self.parseFormalParameters() + procsymtable = SymbolTable(parent = self.cst) + self.cst = procsymtable # Switch symbol table: + # Add parameters as variables to symbol table: + for parameter in proctyp.parameters: + vname = parameter.name + vtyp = parameter.typ + if parameter.kind == 'var': + vtyp = PointerType(vtyp) + variable = Variable(vname, vtyp, False) + if parameter.kind == 'const': + variable.isReadOnly = True + variable.isParameter = True + self.cst.addSymbol(variable) + self.Consume(';') + self.parseDeclarationSequence() + # Mark all variables as local: + for variable in self.cst.getAllLocal(Variable): + variable.isLocal = True + + if self.hasConsumed('begin'): + block = self.parseStatementSequence() + if self.hasConsumed('return'): + returnexpression = self.parseExpression() + else: + returnexpression = None + + if proctyp.returntype.isType(void): + if not returnexpression is None: + self.Error('Void procedure cannot return a value') + else: + if returnexpression is None: + self.Error('Procedure must return a value') + if not isType(returnexpression.typ, proctyp.returntype): + self.Error('Returned type {0} does not match function return type {1}'.format(returnexpression.typ, proctyp.returntype)) + + self.Consume('end') + endname = self.Consume('ID') + if endname != procname: + self.Error('endname should match {0}'.format(name)) + self.cst = procsymtable.parent # Switch back to parent symbol table + proc = Procedure(procname, proctyp, block, procsymtable, returnexpression) + self.setLocation(proc, loc) + self.cst.addSymbol(proc) + proc.public = i.ispublic + return proc + + # Statements: + def parseAssignment(self, lval): + loc = self.getLocation() + self.Consume(':=') + rval = self.parseExpression() + if isType(lval.typ, real) and isType(rval.typ, integer): + rval = Unop(rval, 'INTTOREAL', real) + if type(rval.typ) is NilType: + if not type(lval.typ) is ProcedureType and not type(lval.typ) is PointerType: + self.Error('Can assign nil only to pointers or procedure types, not {0}'.format(lval)) + elif not isType(lval.typ, rval.typ): + self.Error('Type mismatch {0} != {1}'.format(lval.typ, rval.typ)) + return self.setLocation(Assignment(lval, rval), loc) + + def parseExpressionList(self): + expressions = [ self.parseExpression() ] + while self.hasConsumed(','): + expressions.append( self.parseExpression() ) + return expressions + + def parseProcedureCall(self, procedure): + self.Consume('(') + if self.token.typ != ')': + args = self.parseExpressionList() + else: + args = [] + self.Consume(')') + parameters = procedure.typ.parameters + if len(args) != len(parameters): + self.Error("Procedure requires {0} arguments, {1} given".format(len(parameters), len(args))) + for arg, param in zip(args, parameters): + if not arg.typ.isType(param.typ): + print(arg.typ, param.typ) + self.Error('Mismatch in parameter') + return ProcedureCall(procedure, args) + + def parseIfStatement(self): + loc = self.getLocation() + self.Consume('if') + ifs = [] + condition = self.parseExpression() + if not isType(condition.typ, boolean): + self.Error('condition of if statement must be boolean') + self.Consume('then') + truestatement = self.parseStatementSequence() + ifs.append( (condition, truestatement) ) + while self.hasConsumed('elsif'): + condition = self.parseExpression() + if not isType(condition.typ, boolean): + self.Error('condition of if statement must be boolean') + self.Consume('then') + truestatement = self.parseStatementSequence() + ifs.append( (condition, truestatement) ) + if self.hasConsumed('else'): + statement = self.parseStatementSequence() + else: + statement = None + self.Consume('end') + for condition, truestatement in reversed(ifs): + statement = IfStatement(condition, truestatement, statement) + return self.setLocation(statement, loc) + + def parseCase(self): + # TODO + pass + + def parseCaseStatement(self): + self.Consume('case') + expr = self.parseExpression() + self.Consume('of') + self.parseCase() + while self.hasConsumed('|'): + self.parseCase() + self.Consume('end') + + def parseWhileStatement(self): + loc = self.getLocation() + self.Consume('while') + condition = self.parseExpression() + self.Consume('do') + statements = self.parseStatementSequence() + if self.hasConsumed('elsif'): + self.Error('elsif in while not yet implemented') + self.Consume('end') + return self.setLocation(WhileStatement(condition, statements), loc) + + def parseRepeatStatement(self): + self.Consume('repeat') + stmt = self.parseStatementSequence() + self.Consume('until') + cond = self.parseBoolExpression() + + def parseForStatement(self): + loc = self.getLocation() + self.Consume('for') + variable = self.parseDesignator() + if not variable.typ.isType(integer): + self.Error('loop variable of for statement must have integer type') + assert(variable.typ.isType(integer)) + self.Consume(':=') + begin = self.parseExpression() + if not begin.typ.isType(integer): + self.Error('begin expression of a for statement must have integer type') + self.Consume('to') + end = self.parseExpression() + if not end.typ.isType(integer): + self.Error('end expression of a for statement must have integer type') + if self.hasConsumed('by'): + increment, typ = self.parseConstExpression() + if not typ.isType(integer): + self.Error('Increment must be integer') + else: + increment = 1 + assert(type(increment) is int) + self.Consume('do') + statements = self.parseStatementSequence() + self.Consume('end') + return self.setLocation(ForStatement(variable, begin, end, increment, statements), loc) + + def parseAsmcode(self): + # TODO: move this to seperate file + def parseOpcode(): + return self.Consume('ID') + def parseOperand(): + if self.hasConsumed('['): + memref = [] + memref.append(parseOperand()) + self.Consume(']') + return memref + else: + if self.token.typ == 'NUMBER': + return self.Consume('NUMBER') + else: + ID = self.Consume('ID') + if self.cst.has(Variable, ID): + return self.cst.get(Variable, ID) + else: + return ID + + def parseOperands(n): + operands = [] + if n > 0: + operands.append( parseOperand() ) + n = n - 1 + while n > 0: + self.Consume(',') + operands.append(parseOperand()) + n = n - 1 + return operands + self.Consume('asm') + asmcode = [] + while self.token.typ != 'end': + opcode = parseOpcode() + func, numargs = assembler.opcodes[opcode] + operands = parseOperands(numargs) + asmcode.append( (opcode, operands) ) + #print('opcode', opcode, operands) + self.Consume('end') + return AsmCode(asmcode) + + def parseStatement(self): + try: + # Determine statement type based on the pending token: + if self.token.typ == 'if': + return self.parseIfStatement() + elif self.token.typ == 'case': + return self.parseCaseStatement() + elif self.token.typ == 'while': + return self.parseWhileStatement() + elif self.token.typ == 'repeat': + return self.parseRepeatStatement() + elif self.token.typ == 'for': + return self.parseForStatement() + elif self.token.typ == 'asm': + return self.parseAsmcode() + elif self.token.typ == 'ID': + # Assignment or procedure call + designator = self.parseDesignator() + if self.token.typ == '(' and type(designator.typ) is ProcedureType: + return self.parseProcedureCall(designator) + elif self.token.typ == ':=': + return self.parseAssignment(designator) + else: + self.Error('Unknown statement following designator: {0}'.format(self.token)) + else: + # TODO: return empty statement??: + return EmptyStatement() + self.Error('Unknown statement {0}'.format(self.token)) + except CompilerException as e: + print(e) + self.errorlist.append( (e.row, e.col, e.msg)) + # Do error recovery by skipping all tokens until next ; or end + while not (self.token.typ == ';' or self.token.typ == 'end'): + self.Consume(self.token.typ) + return EmptyStatement() + + def parseStatementSequence(self): + """ Sequence of statements seperated by ';' """ + statements = [ self.parseStatement() ] + while self.hasConsumed(';'): + statements.append( self.parseStatement() ) + return StatementSequence( statements ) + + # Parsing expressions: + """ + grammar of expressions: + expression = SimpleExpression [ reloperator SimpleExpression ] + reloperator = '=' | '<=' | '>=' | '<>' + Simpleexpression = [ '+' | '-' ] term { addoperator term } + addoperator = '+' | '-' | 'or' + term = factor { muloperator factor } + muloperator = '*' | '/' | 'div' | 'mod' | 'and' + factor = number | nil | true | false | "(" expression ")" | + designator [ actualparameters ] | 'not' factor + """ + def parseExpression(self): + """ The connector between the boolean and expression domain """ + expr = self.parseSimpleExpression() + if self.token.typ in ['>=','<=','<','>','<>','=']: + relop = self.Consume() + expr2 = self.parseSimpleExpression() + # Automatic type convert to reals: + if isType(expr.typ, real) and isType(expr2.typ, integer): + expr2 = Unop(expr2, 'INTTOREAL', real) + if isType(expr2.typ, real) and isType(expr.typ, integer): + expr = Unop(expr, 'INTTOREAL', real) + # Type check: + if not isType(expr.typ, expr2.typ): + self.Error('Type mismatch in relop') + if isType(expr.typ, real) and relop in ['<>', '=']: + self.Error('Cannot check real values for equality') + + expr = Relop(expr, relop, expr2, boolean) + return expr + + # Parsing arithmatic expressions: + def parseTerm(self): + a = self.parseFactor() + while self.token.typ in ['*', '/', 'mod', 'div', 'and']: + loc = self.getLocation() + op = self.Consume() + b = self.parseTerm() + # Type determination and checking: + if op in ['mod', 'div']: + if not isType(a.typ, integer): + self.Error('First operand should be integer, not {0}'.format(a.typ)) + if not isType(b.typ, integer): + self.Error('Second operand should be integer, not {0}'.format(b.typ)) + typ = integer + elif op == '*': + if isType(a.typ, integer) and isType(b.typ, integer): + typ = integer + elif isType(a.typ, real) or isType(b.typ, real): + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if not isType(b.typ, real): + self.Error('second operand must be a real!') + typ = real + else: + self.Error('Unknown operands for multiply: {0}, {1}'.format(a, b)) + elif op == '/': + # Division always yields a real result, for integer division use div + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if not isType(b.typ, real): + self.Error('second operand must be a real!') + typ = real + elif op == 'and': + if not isType(a.typ, boolean): + self.Error('First operand of and must be boolean') + if not isType(b.typ, boolean): + self.Error('Second operand of and must be boolean') + typ = boolean + else: + self.Error('Unknown operand {0}'.format(op)) + + a = self.setLocation(Binop(a, op, b, typ), loc) + return a + + def parseFactor(self): + if self.hasConsumed('('): + e = self.parseExpression() + self.Consume(')') + return e + elif self.token.typ == 'NUMBER': + loc = self.getLocation() + val = self.Consume('NUMBER') + return self.setLocation(Constant(val, integer), loc) + elif self.token.typ == 'REAL': + loc = self.getLocation() + val = self.Consume('REAL') + return self.setLocation(Constant(val, real), loc) + elif self.token.typ == 'CHAR': + val = self.Consume('CHAR') + return Constant(val, char) + elif self.token.typ == 'STRING': + txt = self.Consume('STRING') + return StringConstant(txt) + elif self.token.typ in ['true', 'false']: + val = self.Consume() + val = True if val == 'true' else False + return Constant(val, boolean) + elif self.hasConsumed('nil'): + return Constant(0, NilType()) + elif self.hasConsumed('not'): + f = self.parseFactor() + if not isType(f.typ, boolean): + self.Error('argument of boolean negation must be boolean type') + return Unop(f, 'not', boolean) + elif self.token.typ == 'ID': + designator = self.parseDesignator() + # TODO: handle functions different here? + if self.token.typ == '(' and type(designator.typ) is ProcedureType: + return self.parseProcedureCall(designator) + else: + return designator + else: + self.Error('Expected NUMBER, ID or ( expr ), got'+str(self.token)) + + def parseSimpleExpression(self): + """ Arithmatic expression """ + if self.token.typ in ['+', '-']: + # Handle the unary minus + op = self.Consume() + a = self.parseTerm() + typ = a.typ + if not isType(typ,real) and not isType(typ, integer): + self.Error('Unary minus or plus can be only applied to real or integers') + if op == '-': + a = Unop(a, op, typ) + else: + a = self.parseTerm() + while self.token.typ in ['+', '-', 'or']: + loc = self.getLocation() + op = self.Consume() + b = self.parseTerm() + if op in ['+', '-']: + if isType(a.typ, real) or isType(b.typ, real): + typ = real + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(b.typ, real): + self.Error('second operand must be a real!') + elif isType(a.typ, integer) and isType(b.typ, integer): + typ = integer + else: + self.Error('Invalid types {0} and {1}'.format(a.typ, b.typ)) + elif op == 'or': + if not isType(a.typ, boolean): + self.Error('first operand must be boolean for or operation') + if not isType(b.typ, boolean): + self.Error('second operand must be boolean for or operation') + typ = boolean + else: + self.Error('Unknown operand {0}'.format(op)) + a = self.setLocation(Binop(a, op, b, typ), loc) + return a +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/compiler/symboltable.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,80 @@ +from .nodes import * +from .errors import Error + +class SymbolTable: + """ + Symbol table for a current scope. + It has functions: + - hasname for checking for a name in current scope or above + - addSymbol to add an object + """ + def __init__(self, parent=None): + self.parent = parent + self.syms = {} + + def __repr__(self): + return 'Symboltable with {0} symbols\n'.format(len(self.syms)) + + def printTable(self, indent=0): + for name in self.syms: + print(self.syms[name]) + + def getAllLocal(self, cls): + """ Get all local objects of a specific type """ + r = [] + for key in self.syms.keys(): + sym = self.syms[key] + if issubclass(type(sym), cls): + r.append(sym) + return r + + def getLocal(self, cls, name): + if name in self.syms.keys(): + sym = self.syms[name] + if isinstance(sym, cls): + return sym + else: + Error('Wrong type found') + else: + Error('Symbol not found') + + # Retrieving of specific classes of items: + def get(self, cls, name): + if self.hasSymbol(name): + sym = self.getSymbol(name) + if issubclass(type(sym), cls): + return sym + raise SymbolException('type {0} undefined'.format(typename)) + + def has(self, cls, name): + if self.hasSymbol(name): + sym = self.getSymbol(name) + if issubclass(type(sym), cls): + return True + return False + + # Adding and retrieving of symbols in general: + def addSymbol(self, sym): + if sym.name in self.syms.keys(): + raise Exception('Symbol "{0}" redefined'.format(sym.name)) + else: + self.syms[sym.name] = sym + + def getSymbol(self, name): + if name in self.syms.keys(): + return self.syms[name] + else: + if self.parent: + return self.parent.getSymbol(name) + else: + Error('Symbol "{0}" undeclared!'.format(name)) + + def hasSymbol(self, name): + if name in self.syms.keys(): + return True + else: + if self.parent: + return self.parent.hasSymbol(name) + else: + return False +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/ide/__init__.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,2 @@ +# Package +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/ide/astviewer.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,36 @@ +from PyQt4.QtCore import * +from PyQt4.QtGui import * + +def astToNamedElement(astNode, parentNode): + """ Helper to convert and AST tree to NamedElement tree: """ + item = QStandardItem(str(astNode)) + item.setData(astNode) + parentNode.appendRow(item) + for c in astNode.getChildren(): + astToNamedElement(c, item) + +# The actual widget: +class AstViewer(QTreeView): + sigNodeSelected = pyqtSignal(object) + def __init__(self, parent=None): + super(AstViewer, self).__init__(parent) + self.setHeaderHidden(True) + self.clicked.connect(self.selectHandler) + + def setAst(self, ast): + """ Create a new model and add all ast elements to it """ + model = QStandardItemModel() + if ast: + astToNamedElement(ast, model.invisibleRootItem()) + self.setModel( model ) + self.expandAll() + + def selectHandler(self, index): + if not index.isValid(): + return + model = self.model() + item = model.itemFromIndex(index) + node = item.data() + self.sigNodeSelected.emit(node) + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/ide/codeeditor.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,140 @@ +from PyQt4.QtCore import * +from PyQt4.QtGui import * +import compiler.lexer +import os.path + +class MySyntaxHighlighter(QSyntaxHighlighter): + def __init__(self, parent=None): + super(MySyntaxHighlighter, self).__init__(parent) + # Syntax highlighting: + self.rules = [] + fmt = QTextCharFormat() + fmt.setForeground(Qt.darkBlue) + fmt.setFontWeight(QFont.Bold) + for kw in compiler.lexer.keywords: + pattern = '\\b'+kw+'\\b' + self.rules.append( (pattern, fmt) ) + + # Comments: + fmt = QTextCharFormat() + fmt.setForeground(Qt.gray) + fmt.setFontItalic(True) + pattern = '\{.*\}' + self.rules.append( (pattern, fmt) ) + + # Procedure: + fmt = QTextCharFormat() + fmt.setForeground(Qt.blue) + fmt.setFontItalic(True) + #pattern = '(?<=procedure )[A-Za-z]' + # TODO lookbehind does not work, think something else + #self.rules.append( (pattern, fmt) ) + + def highlightBlock(self, text): + for pattern, fmt in self.rules: + expression = QRegExp(pattern) + index = expression.indexIn(text) + while index >= 0: + length = expression.matchedLength() + self.setFormat(index, length, fmt) + index = expression.indexIn(text, index + length) + +class LineNumberArea(QWidget): + def __init__(self, codeedit): + super(LineNumberArea, self).__init__(codeedit) + self.codeedit = codeedit + # TODO: display error in this: self.setToolTip('hello world') + def sizeHint(self): + return QSize(self.codeedit.lineNumberAreaWidth(), 0) + def paintEvent(self, ev): + self.codeedit.lineNumberAreaPaintEvent(ev) + +class CodeEdit(QPlainTextEdit): + def __init__(self, parent=None): + super(CodeEdit, self).__init__(parent) + # members: + self.isUntitled = True + self.filename = None + self.setFont(QFont('Courier')) + self.lineNumberArea = LineNumberArea(self) + + self.blockCountChanged.connect(self.updateLineNumberAreaWidth) + self.updateRequest.connect(self.updateLineNumberArea) + + # Syntax highlighter: + self.highlighter = MySyntaxHighlighter(self.document()) + + def setFileName(self, filename): + self.filename = filename + self.isUntitled = False + self.setWindowTitle(filename) + def setSource(self, source): + self.setPlainText(source) + + def save(self): + pass + def saveAs(self): + pass + + def saveFile(self): + if self.isUntitled: + self.saveAs() + else: + source = str(self.toPlainText()) + f = open(self.filename, 'w') + f.write(source) + f.close() + + def highlightErrorLocation(self, row, col): + tc = QTextCursor(self.document()) + tc.clearSelection() + tc.movePosition(tc.Down, tc.MoveAnchor, row - 1) + tc.movePosition(tc.Right, tc.MoveAnchor, col - 1) + tc.movePosition(tc.NextCharacter, tc.KeepAnchor) # Select 1 character + selection = QTextEdit.ExtraSelection() + lineColor = QColor(Qt.red).lighter(160) + selection.format.setBackground(lineColor) + #selection.format.setProperty(QTextFormat.FullWidthSelection, True) + selection.cursor = tc + self.setExtraSelections( [ selection ] ) + def clearErrors(self): + self.setExtraSelections( [ ] ) + + def lineNumberAreaWidth(self): + digits = 1 + mx = max(1, self.blockCount()) + while mx >= 10: + mx = mx / 10 + digits += 1 + space = 3 + self.fontMetrics().width('8') * digits + return space + def lineNumberAreaPaintEvent(self, ev): + painter = QPainter(self.lineNumberArea) + painter.fillRect(ev.rect(), Qt.lightGray) + block = self.firstVisibleBlock() + blockNumber = block.blockNumber() + top = self.blockBoundingGeometry(block).translated(self.contentOffset()).top() + bottom = top + self.blockBoundingRect(block).height() + while block.isValid() and top <= ev.rect().bottom(): + if block.isVisible() and bottom >= ev.rect().top(): + num = str(blockNumber + 1) + painter.setPen(Qt.black) + painter.drawText(0, top, self.lineNumberArea.width(), self.fontMetrics().height(), Qt.AlignRight, num) + block = block.next() + top = bottom + bottom = top + self.blockBoundingRect(block).height() + blockNumber += 1 + def resizeEvent(self, ev): + super(CodeEdit, self).resizeEvent(ev) + cr = self.contentsRect() + self.lineNumberArea.setGeometry(QRect(cr.left(), cr.top(), self.lineNumberAreaWidth(), cr.height() )) + def updateLineNumberAreaWidth(self, newBlockCount): + self.setViewportMargins(self.lineNumberAreaWidth(), 0, 0, 0) + def updateLineNumberArea(self, rect, dy): + if dy > 0: + self.lineNumberArea.scroll(0, dy) + else: + self.lineNumberArea.update(0, rect.y(), self.lineNumberArea.width(), rect.height()) + if rect.contains(self.viewport().rect()): + self.updateLineNumberAreaWidth(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/ide/ide.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,309 @@ +from PyQt4.QtCore import * +from PyQt4.QtGui import * +# ide components: +from .codeeditor import CodeEdit +from .astviewer import AstViewer +import base64 +from project import Project +import os.path + +lcfospng = base64.decodestring(b'iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A\n/wD/oL2nkwAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9sJEhMKBk7B678AAAA/SURBVFjD\n7dbBCQAgDATBi9h/y7EFA4Kf2QLCwH1S6XQu6sqoujublc8BAAAAAAAAAAB8B+zXT6YJAAAAAKYd\nWSgFQNUyijIAAAAASUVORK5CYII=\n') + +class BuildOutput(QTextEdit): + """ Build output component """ + def __init__(self, parent=None): + super(BuildOutput, self).__init__(parent) + self.setCurrentFont(QFont('Courier')) + self.setReadOnly(True) + self.append('Build output will appear here!') + +class BuildErrors(QListView): + sigErrorSelected = pyqtSignal(object) + def __init__(self, parent=None): + super(BuildErrors, self).__init__(parent) + model = QStandardItemModel() + self.setModel(model) + self.clicked.connect(self.itemSelected) + def setErrorList(self, errorlist): + model = QStandardItemModel() + for e in errorlist: + row, col, msg = e + item = QStandardItem(str(msg)) + item.setData(e) + model.appendRow(item) + self.setModel(model) + def itemSelected(self, index): + if not index.isValid(): + return + model = self.model() + item = model.itemFromIndex(index) + err = item.data() + self.sigErrorSelected.emit(err) + +class ProjectView(QWidget): + sigLoadFile = pyqtSignal(str) + def __init__(self, parent=None): + super(ProjectView, self).__init__(parent) + self.treeview = QTreeView(self) + self.treeview.setContextMenuPolicy(Qt.CustomContextMenu) + l = QVBoxLayout(self) + l.addWidget(self.treeview) + pm = QPixmap() + pm.loadFromData(lcfospng) + self.projectIcon = QIcon(pm) + # Connect signals: + self.treeview.activated.connect(self.activate) + self.treeview.customContextMenuRequested.connect(self.contextMenu) + def setProject(self, project): + self.project = project + model = QStandardItemModel() + root = model.invisibleRootItem() + pitem = QStandardItem(self.projectIcon, project.name) + pitem.setEditable(False) + pitem.setData(project) + root.appendRow(pitem) + for f in self.project.files: + fitem = QStandardItem(f) + pitem.appendRow(fitem) + fitem.setEditable(False) + fitem.setData(f) + self.treeview.setModel(model) + self.treeview.expandAll() + def contextMenu(self, pos): + idx = self.treeview.indexAt(pos) + if not idx.isValid(): + return + item = self.treeview.model().itemFromIndex(idx) + def activate(self, index): + if not index.isValid(): + return + model = self.treeview.model() + item = model.itemFromIndex(index) + fn = item.data() + if type(fn) is str: + self.sigLoadFile.emit(fn) + +class AboutDialog(QDialog): + def __init__(self, parent=None): + super(AboutDialog, self).__init__(parent) + self.setWindowTitle('About') + l = QVBoxLayout(self) + txt = QTextEdit(self) + txt.setReadOnly(True) + aboutText = """<h1>lcfOS IDE</h1> + <p>An all-in-one IDE for OS development.</p> + <p>https://www.assembla.com/spaces/lcfOS/wiki</p> + <p>Author: Windel Bouwman</p> + """ + txt.append(aboutText) + l.addWidget(txt) + but = QPushButton('OK') + but.setDefault(True) + but.clicked.connect(self.close) + l.addWidget(but) + +class ProjectOptions(QDialog): + pass + # TODO: project options in here + +class Ide(QMainWindow): + def __init__(self, parent=None): + super(Ide, self).__init__(parent) + self.setWindowTitle('LCFOS IDE') + icon = QPixmap() + icon.loadFromData(lcfospng) + self.setWindowIcon(QIcon(icon)) + + # Create menus: + self.fileMenu = self.menuBar().addMenu('File') + self.viewMenu = self.menuBar().addMenu('View') + self.projectMenu = self.menuBar().addMenu('Project') + self.helpMenu = self.menuBar().addMenu('Help') + + # Create mdi area: + self.mdiArea = QMdiArea() + self.setCentralWidget(self.mdiArea) + + # Create components: + self.buildOutput = BuildOutput() + self.addComponent('Build output', self.buildOutput) + + self.astViewer = AstViewer() + self.addComponent('AST viewer', self.astViewer) + self.astViewer.sigNodeSelected.connect(self.nodeSelected) + + self.builderrors = BuildErrors() + self.addComponent('Build errors', self.builderrors) + self.builderrors.sigErrorSelected.connect(self.errorSelected) + + self.projectview = ProjectView() + self.addComponent('Project', self.projectview) + self.projectview.sigLoadFile.connect(self.loadFile) + + # About dialog: + self.aboutDialog = AboutDialog() + self.aboutDialog.setWindowIcon(QIcon(icon)) + # Create actions: + self.buildAction = QAction('Build!', self) + self.buildAction.setShortcut(QKeySequence('F7')) + self.projectMenu.addAction(self.buildAction) + self.buildAction.triggered.connect(self.buildFile) + self.openProjectAction = QAction("Open project", self) + self.openProjectAction.triggered.connect(self.openProject) + self.projectMenu.addAction(self.openProjectAction) + self.helpAction = QAction('Help', self) + self.helpAction.setShortcut(QKeySequence('F1')) + self.helpMenu.addAction(self.helpAction) + self.aboutAction = QAction('About', self) + self.helpMenu.addAction(self.aboutAction) + self.aboutAction.triggered.connect(self.aboutDialog.open) + + self.newFileAction = QAction("New File", self) + self.fileMenu.addAction(self.newFileAction) + self.newFileAction.triggered.connect(self.newFile) + self.saveFileAction = QAction("Save File", self) + self.fileMenu.addAction(self.saveFileAction) + self.saveFileAction.triggered.connect(self.saveFile) + self.closeFileAction = QAction("Close File", self) + self.fileMenu.addAction(self.closeFileAction) + self.closeFileAction.triggered.connect(self.closeFile) + + cascadeAction = QAction("Cascade windows", self) + cascadeAction.triggered.connect(self.mdiArea.cascadeSubWindows) + self.viewMenu.addAction(cascadeAction) + tileAction = QAction('Tile windows', self) + tileAction.triggered.connect(self.mdiArea.tileSubWindows) + self.viewMenu.addAction(tileAction) + + # Load settings: + self.settings = QSettings('windelsoft', 'lcfoside') + self.loadSettings() + + def addComponent(self, name, widget): + dw = QDockWidget(name) + dw.setWidget(widget) + dw.setObjectName(name) + self.addDockWidget(Qt.RightDockWidgetArea, dw) + self.viewMenu.addAction(dw.toggleViewAction()) + + # File handling: + def newFile(self): + ce = CodeEdit() + w = self.mdiArea.addSubWindow(ce) + ce.show() + + def saveFile(self): + ac = self.activeMdiChild() + if ac: + ac.saveFile() + + def saveAll(self): + pass + + def openFile(self): + # TODO + pass + + def closeFile(self): + ac = self.activeMdiChild() + if ac: + self.mdiArea.removeSubWindow(ac) + + def loadFile(self, filename): + # Find existing mdi widget: + wid = self.findMdiChild(filename) + if wid: + self.mdiArea.setActiveSubWindow(wid.parent()) + return wid + + # Create a new one: + ce = CodeEdit() + source = self.project.loadProjectFile(filename) + ce.setSource(source) + self.mdiArea.addSubWindow(ce) + ce.show() + return ce + + # MDI: + def activeMdiChild(self): + aw = self.mdiArea.activeSubWindow() + if aw: + return aw.widget() + else: + return None + + def findMdiChild(self, filename): + for window in self.mdiArea.subWindowList(): + wid = window.widget() + if wid.filename == filename: + return wid + return None + + def allChildren(self): + c = [] + for window in self.mdiArea.subWindowList(): + wid = window.widget() + c.append(wid) + return c + + # Settings: + def loadSettings(self): + if self.settings.contains('mainwindowstate'): + self.restoreState(self.settings.value('mainwindowstate')) + if self.settings.contains('mainwindowgeometry'): + self.restoreGeometry(self.settings.value('mainwindowgeometry')) + if self.settings.contains('openedproject'): + projectfile = self.settings.value('openedproject') + self.loadProject(projectfile) + + def closeEvent(self, ev): + self.settings.setValue('mainwindowstate', self.saveState()) + self.settings.setValue('mainwindowgeometry', self.saveGeometry()) + if self.project: + self.settings.setValue('openedproject', self.project.filename) + # TODO: ask for save of opened files + ev.accept() + + # Error handling: + def nodeSelected(self, node): + ce = self.activeMdiChild() + if not ce: + return + if node.location: + row, col = node.location + ce.highlightErrorLocation( row, col ) + else: + ce.clearErrors() + + def errorSelected(self, err): + row, col, msg = err + ce = self.activeMdiChild() + if not ce: + return + ce.highlightErrorLocation(row, col) + + # Project loading: + def loadProject(self, filename): + self.project = Project(filename) + self.projectview.setProject(self.project) + + def openProject(self): + filename = QFileDialog.getOpenFileName(self, \ + "Choose project file", "", "lcfos Project files (*.lcp)") + if filename: + self.loadProject(filename) + + # Build recepy: + def buildFile(self): + """ Build project """ + self.saveAll() + self.buildOutput.clear() + self.buildOutput.append(str(self.compiler)) + mods = self.compiler.compileProject(self.project) + + self.builderrors.setErrorList(self.compiler.errorlist) + self.astViewer.setAst(mods[0]) + for err in self.compiler.errorlist: + self.buildOutput.append(str(err)) + self.buildOutput.append("Done!") +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/project.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,70 @@ +""" + Project that can be stored to and loaded from XML. +""" + +from xml.sax import ContentHandler, make_parser +import xml.dom.minidom as md +import os.path + +class ProjectContentHandler(ContentHandler): + def __init__(self, project): + self.project = project + self.inFiles = False + def startElement(self, name, attrs): + if name == 'Project': + self.project.name = attrs['name'] + if name == 'Files': + self.inFiles = True + if name == 'File' and self.inFiles: + self.project.files.append(attrs['Filename']) + def endElement(self, name): + if name == 'Files': + self.inFiles = False + +class ProjectFile: + def __init__(self, filename): + self.filename = filename + +class Project: + def __init__(self, filename, isnew=False): + self.name = "" + self.files = [] + self.settings = {} + self.filename = filename + + if not isnew: + """ Load the project from the XML file """ + parser = make_parser() + handler = ProjectContentHandler(self) + parser.setContentHandler(handler) + parser.parse(filename) + + def save(self, filename): + """ Save the project in XML format """ + # Create document: + doc = md.Document() + # Add project: + project = doc.createElement("Project") + project.setAttribute("name", self.name) + doc.appendChild(project) + + # Add project files: + filesNode = doc.createElement("Files") + project.appendChild(filesNode) + for f in self.files: + fe = doc.createElement("File") + fe.setAttribute("Filename", f) + filesNode.appendChild(fe) + + # Write the XML file: + xml = doc.toprettyxml() + with open(filename, 'w') as f: + f.write(xml) + + def loadProjectFile(self, filename): + basedir = os.path.dirname(self.filename) + filename = os.path.join(basedir, filename) + with open(filename, 'r') as f: + src = f.read() + return src +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/runbuild.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,16 @@ +import sys + +# Compiler imports: +from compiler.compiler import Compiler +from project import Project + +if __name__ == '__main__': + if len(sys.argv) < 2: + print('Use {0} projectfile'.format(sys.argv[0])) + sys.exit(-1) + filename = sys.argv[1] + project = Project() + project.load(filename) + pc = Compiler() + pc.compileProject(project) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/runide.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,14 @@ +import sys +from PyQt4.QtGui import QApplication + +# Compiler imports: +from compiler.compiler import Compiler +from ide.ide import Ide + +if __name__ == '__main__': + app = QApplication(sys.argv) + ide = Ide() + ide.compiler = Compiler() + ide.show() + app.exec_() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/runtests.py Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,272 @@ +import unittest +import os + +from compiler.compiler import Compiler +from compiler.errors import CompilerException, printError +from compiler import lexer +from compiler.parser import Parser +from compiler import assembler +from compiler.codegenerator import CodeGenerator +from project import Project + +class CompilerTestCase(unittest.TestCase): + """ test methods start with 'test*' """ + def testSource1(self): + source = """ + module lcfos; + var + a : integer; + + procedure putchar(num : integer); + begin + end putchar; + + procedure WriteNum( num: integer); + var + d, base : integer; + dgt : integer; + begin + d := 1; + base := 10; + while num div d >= base do + d := d * base + end; + while d <> 0 do + dgt := num div d; + num := num mod d; + d := d div base; + putchar(48 + dgt) + end + end WriteNum; + + begin + a := 1; + while a < 26 + do + putchar(65+a); + a := a * 2 + end; + end lcfos. + """ + pc = Compiler() + pc.compilesource(source) + def testSource2(self): + source = """ + module lcfos; + var + a, b : integer; + arr: array 30 of integer; + arr2: array 10, 12 of integer; + procedure t2*() : integer; + begin + a := 2; + while a < 5 do + b := arr[a-1] + arr[a-2]; + arr2[a,2] := b; + arr2[a,3] := arr2[a,2] + arr2[a,2]*3 + b; + arr[a] := b; + a := a + 1; + end; + return b + end t2; + begin + b := 12; + arr[0] := 1; + arr[1] := 1; + end lcfos. + """ + pc = Compiler() + mod = pc.compilesource(source) + def testSource5(self): + source = """ + module lcfos; + procedure WriteLn() : integer; + const zzz = 13; + var + a, b, c: integer; + begin + a := 2; + b := 7; + c := 10 * a + b*10*a; + return c + end WriteLn; + begin end lcfos. + """ + pc = Compiler() + pc.compilesource(source) + def tstForStatement(self): + source = """ + module fortest; + var + a,b,c : integer; + begin + c := 0; + for a := 1 to 10 by 1 do + b := a + 15; + c := c + b * a; + end; + end fortest. + """ + pc = Compiler() + pc.compilesource(source) + def testSourceIfAndWhilePattern(self): + source = """ + module lcfos; + procedure WriteLn() : integer; + const zzz = 13; + var + a, b, c: integer; + begin + a := 1; + b := 2; + if a * 3 > b then + c := 10*a + b*10*a*a*a*b; + else + c := 13; + end; + while a < 101 do + a := a + 1; + c := c + 2; + end; + return c + end WriteLn; + begin end lcfos. + """ + pc = Compiler() + pc.compilesource(source) + + def testPattern1(self): + """ Test if expression can be compiled into byte code """ + src = "12*13+33-12*2*3" + tokens = lexer.tokenize(src) + ast = Parser(tokens).parseExpression() + code = CodeGenerator().genexprcode(ast) + + def testAssembler(self): + """ Check all kind of assembler cases """ + assert(assembler.shortjump(5) == [0xeb, 0x5]) + assert(assembler.shortjump(-2) == [0xeb, 0xfc]) + assert(assembler.shortjump(10,'GE') == [0x7d, 0xa]) + assert(assembler.nearjump(5) == [0xe9, 0x5,0x0,0x0,0x0]) + assert(assembler.nearjump(-2) == [0xe9, 0xf9, 0xff,0xff,0xff]) + assert(assembler.nearjump(10,'LE') == [0x0f, 0x8e, 0xa,0x0,0x0,0x0]) + + def testCall(self): + assert(assembler.call('r10') == [0x41, 0xff, 0xd2]) + assert(assembler.call('rcx') == [0xff, 0xd1]) + def testXOR(self): + assert(assembler.xorreg64('rax', 'rax') == [0x48, 0x31, 0xc0]) + assert(assembler.xorreg64('r9', 'r8') == [0x4d, 0x31, 0xc1]) + assert(assembler.xorreg64('rbx', 'r11') == [0x4c, 0x31, 0xdb]) + + def testINC(self): + assert(assembler.increg64('r11') == [0x49, 0xff, 0xc3]) + assert(assembler.increg64('rcx') == [0x48, 0xff, 0xc1]) + + def testPush(self): + assert(assembler.push('rbp') == [0x55]) + assert(assembler.push('rbx') == [0x53]) + assert(assembler.push('r12') == [0x41, 0x54]) + def testPop(self): + assert(assembler.pop('rbx') == [0x5b]) + assert(assembler.pop('rbp') == [0x5d]) + assert(assembler.pop('r12') == [0x41, 0x5c]) + + def testAsmLoads(self): + # TODO constant add testcases + assert(assembler.mov('rbx', 'r14') == [0x4c, 0x89, 0xf3]) + assert(assembler.mov('r12', 'r8') == [0x4d, 0x89, 0xc4]) + assert(assembler.mov('rdi', 'rsp') == [0x48, 0x89, 0xe7]) + + def testAsmMemLoads(self): + assert(assembler.mov('rax', ['r8','r15',0x11]) == [0x4b,0x8b,0x44,0x38,0x11]) + assert(assembler.mov('r13', ['rbp','rcx',0x23]) == [0x4c,0x8b,0x6c,0xd,0x23]) + + assert(assembler.mov('r9', ['rbp',-0x33]) == [0x4c,0x8b,0x4d,0xcd]) + #assert(assembler.movreg64('rbx', ['rax']) == [0x48, 0x8b,0x18]) + + assert(assembler.mov('rax', [0xb000]) == [0x48,0x8b,0x4,0x25,0x0,0xb0,0x0,0x0]) + assert(assembler.mov('r11', [0xa0]) == [0x4c,0x8b,0x1c,0x25,0xa0,0x0,0x0,0x0]) + + assert(assembler.mov('r11', ['RIP', 0xf]) == [0x4c,0x8b,0x1d,0x0f,0x0,0x0,0x0]) + + def testAsmMemStores(self): + assert(assembler.mov(['rbp', 0x13],'rbx') == [0x48,0x89,0x5d,0x13]) + assert(assembler.mov(['r12', 0x12],'r9') == [0x4d,0x89,0x4c,0x24,0x12]) + assert(assembler.mov(['rcx', 0x11],'r14') == [0x4c,0x89,0x71,0x11]) + + + assert(assembler.mov([0xab], 'rbx') == [0x48,0x89,0x1c,0x25,0xab,0x0,0x0,0x0]) + assert(assembler.mov([0xcd], 'r13') == [0x4c,0x89,0x2c,0x25,0xcd,0x0,0x0,0x0]) + + assert(assembler.mov(['RIP', 0xf], 'r9') == [0x4c,0x89,0x0d,0x0f,0x0,0x0,0x0]) + + def testAsmMOV8(self): + assert(assembler.mov(['rbp', -8], 'al') == [0x88, 0x45, 0xf8]) + assert(assembler.mov(['r11', 9], 'cl') == [0x41, 0x88, 0x4b, 0x09]) + + assert(assembler.mov(['rbx'], 'al') == [0x88, 0x03]) + assert(assembler.mov(['r11'], 'dl') == [0x41, 0x88, 0x13]) + + def testAsmLea(self): + assert(assembler.leareg64('r11', ['RIP', 0xf]) == [0x4c,0x8d,0x1d,0x0f,0x0,0x0,0x0]) + assert(assembler.leareg64('rsi', ['RIP', 0x7]) == [0x48,0x8d,0x35,0x07,0x0,0x0,0x0]) + + assert(assembler.leareg64('rcx', ['rbp', -8]) == [0x48,0x8d,0x4d,0xf8]) + + def testAssemblerCMP(self): + assert(assembler.cmpreg64('rdi', 'r13') == [0x4c, 0x39, 0xef]) + assert(assembler.cmpreg64('rbx', 'r14') == [0x4c, 0x39, 0xf3]) + assert(assembler.cmpreg64('r12', 'r9') == [0x4d, 0x39, 0xcc]) + + assert(assembler.cmpreg64('rdi', 1) == [0x48, 0x83, 0xff, 0x01]) + assert(assembler.cmpreg64('r11', 2) == [0x49, 0x83, 0xfb, 0x02]) + def testAssemblerADD(self): + assert(assembler.addreg64('rbx', 'r13') == [0x4c, 0x01, 0xeb]) + assert(assembler.addreg64('rax', 'rbx') == [0x48, 0x01, 0xd8]) + assert(assembler.addreg64('r12', 'r13') == [0x4d, 0x01, 0xec]) + + assert(assembler.addreg64('rbx', 0x13) == [0x48, 0x83, 0xc3, 0x13]) + assert(assembler.addreg64('r11', 0x1234567) == [0x49, 0x81, 0xc3, 0x67, 0x45,0x23,0x1]) + assert(assembler.addreg64('rsp', 0x33) == [0x48, 0x83, 0xc4, 0x33]) + + def testAssemblerSUB(self): + assert(assembler.subreg64('rdx', 'r14') == [0x4c, 0x29, 0xf2]) + assert(assembler.subreg64('r15', 'rbx') == [0x49, 0x29, 0xdf]) + assert(assembler.subreg64('r8', 'r9') == [0x4d, 0x29, 0xc8]) + + assert(assembler.subreg64('rsp', 0x123456) == [0x48, 0x81, 0xec, 0x56,0x34,0x12,0x0]) + assert(assembler.subreg64('rsp', 0x12) == [0x48, 0x83, 0xec, 0x12]) + + def testAssemblerIDIV(self): + assert(assembler.idivreg64('r11') == [0x49, 0xf7, 0xfb]) + assert(assembler.idivreg64('rcx') == [0x48, 0xf7, 0xf9]) + assert(assembler.idivreg64('rsp') == [0x48, 0xf7, 0xfc]) + + def testAssemblerIMUL(self): + assert(assembler.imulreg64_rax('rdi') == [0x48, 0xf7, 0xef]) + assert(assembler.imulreg64_rax('r10') == [0x49, 0xf7, 0xea]) + assert(assembler.imulreg64_rax('rdx') == [0x48, 0xf7, 0xea]) + + assert(assembler.imulreg64('r11', 'rdi') == [0x4c, 0xf, 0xaf, 0xdf]) + assert(assembler.imulreg64('r12', 'rbx') == [0x4c, 0xf, 0xaf, 0xe3]) + # nasm generates this machine code: 0x4d, 0x6b, 0xff, 0xee + # This also works: 4D0FAFFE (another variant?? ) + assert(assembler.imulreg64('r15', 'r14') == [0x4d, 0x0f, 0xaf, 0xfe]) + def testProject(self): + p = Project('test.xml', isnew=True) + p.name = "Test project" + p.files.append('main.mod') + p.files.append('test.mod') + p.save('test.xml') + + q = Project('test.xml') + + assert(p.name == q.name) + assert(p.files == q.files) + # TODO: remove test.xml test file + os.remove('test.xml') + +if __name__ == '__main__': + unittest.main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/testproject/main.mod Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,25 @@ +module main; +var + a,b,c : integer; + +procedure test(x:integer); + var y,z:integer; +begin + y := x * 3 + 2; + z := x + y + a; +end test; + +procedure add(a:integer; b:integer):integer; + var + tmp : integer; + begin + tmp := a + b; + return tmp + end add; + +begin + a := 12; + b := a * 12 + 33; + c := a div b + a * b * 99; +end main. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/testproject/test.lcp Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,7 @@ +<?xml version="1.0" ?> +<Project name="Test project"> + <Files> + <File Filename="main.mod"/> + <File Filename="test.mod"/> + </Files> +</Project>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/applications/ide/testproject/test.mod Fri Feb 03 18:40:43 2012 +0100 @@ -0,0 +1,25 @@ +module test; +var + a,b,c : integer; + +procedure test(x:integer); + var y,z:integer; +begin + y := x * 3 + 2; + z := x + y + a; +end test; + +procedure add(a:integer; b:integer):integer; + var + tmp : integer; + begin + tmp := a + b; + return tmp + end add; + +begin + a := 12; + b := a * 12 + 33; + c := a div b + a * b * 99; +end test. +
--- a/ide/compiler/__init__.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -# File to make this directory a package. -
--- a/ide/compiler/assembler.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,354 +0,0 @@ -""" - Assembler code generation functions -""" - -from .errors import Error - -modrm = {'rax': 0, 'rbx': 1} - -# Table 3.1 of the intel manual: -# use REX.W on the table below: -regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7} -regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7} -regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7} - -# Calculation of the rexb bit: -rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1} - -# Helper functions: -def imm64(x): - """ represent 64 bits integer in little endian 8 bytes""" - if x < 0: - x = x + (1 << 64) - x = x & 0xFFFFFFFFFFFFFFFF - return [ (x >> (p*8)) & 0xFF for p in range(8) ] - -def imm32(x): - """ represent 32 bits integer in little endian 4 bytes""" - if x < 0: - x = x + (1 << 32) - x = x & 0xFFFFFFFF - return [ (x >> (p*8)) & 0xFF for p in range(4) ] - -def imm8(x): - if x < 0: - x = x + (1 << 8) - x = x & 0xFF - return [ x ] - -def modrm(mod=0, rm=0, reg=0): - """ Construct the modrm byte from its components """ - assert(mod <= 3) - assert(rm <= 7) - assert(reg <= 7) - return (mod << 6) | (reg << 3) | rm - -def rex(w=0, r=0, x=0, b=0): - """ Create a REX prefix byte """ - assert(w <= 1) - assert(r <= 1) - assert(x <= 1) - assert(b <= 1) - return 0x40 | (w<<3) | (r<<2) | (x<<1) | b - -def sib(ss=0, index=0, base=0): - assert(ss <= 3) - assert(index <= 7) - assert(base <= 7) - return (ss << 6) | (index << 3) | base - -tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} - -# Actual instructions: -def nearjump(distance, condition=None): - """ jmp imm32 """ - lim = (1<<30) - if abs(distance) > lim: - Error('near jump cannot jump over more than {0} bytes'.format(lim)) - if condition: - if distance < 0: - distance -= 6 # Skip own instruction - opcode = 0x80 | tttn[condition] # Jcc imm32 - return [0x0F, opcode] + imm32(distance) - else: - if distance < 0: - distance -= 5 # Skip own instruction - return [ 0xE9 ] + imm32(distance) - -def shortjump(distance, condition=None): - """ jmp imm8 """ - lim = 118 - if abs(distance) > lim: - Error('short jump cannot jump over more than {0} bytes'.format(lim)) - if distance < 0: - distance -= 2 # Skip own instruction - if condition: - opcode = 0x70 | tttn[condition] # Jcc rel8 - else: - opcode = 0xeb # jmp rel8 - return [opcode] + imm8(distance) - -# Helper that determines jump type: -def reljump(distance): - if abs(distance) < 110: - return shortjump(distance) - else: - return nearjump(distance) - -def push(reg): - if reg in regs64: - if rexbit[reg] == 1: - return [0x41, 0x50 + regs64[reg]] - else: - return [0x50 + regs64[reg]] - else: - Error('push for {0} not implemented'.format(reg)) - -def pop(reg): - if reg in regs64: - if rexbit[reg] == 1: - rexprefix = rex(b=1) - opcode = 0x58 + regs64[reg] - return [rexprefix, opcode] - else: - opcode = 0x58 + regs64[reg] - return [ opcode ] - else: - Error('pop for {0} not implemented'.format(reg)) - -def INT(number): - opcode = 0xcd - return [opcode] + imm8(number) - -def syscall(): - return [0x0F, 0x05] - -def call(distance): - if type(distance) is int: - return [0xe8]+imm32(distance) - elif type(distance) is str and distance in regs64: - reg = distance - opcode = 0xFF # 0xFF /2 == call r/m64 - mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) - if rexbit[reg] == 1: - rexprefix = rex(b=rexbit[reg]) - return [rexprefix, opcode, mod_rm] - else: - return [opcode, mod_rm] - else: - Error('Cannot call to {0}'.format(distance)) - -def ret(): - return [ 0xc3 ] - -def increg64(reg): - assert(reg in regs64) - rexprefix = rex(w=1, b=rexbit[reg]) - opcode = 0xff - mod_rm = modrm(mod=3, rm=regs64[reg]) - return [rexprefix, opcode, mod_rm] - -def prepost8(r8, rm8): - assert(r8 in regs8) - pre = [] - if type(rm8) is list: - # TODO: merge mem access with prepost for 64 bits - if len(rm8) == 1: - base, = rm8 - if type(base) is str and base in regs64: - assert(not base in ['rbp', 'rsp', 'r12', 'r13']) - mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) - if rexbit[base] == 1: - pre.append(rex(b=1)) - post = [mod_rm] - else: - Error('One arg of type {0} not implemented'.format(base)) - elif len(rm8) == 2: - base, offset = rm8 - assert(type(offset) is int) - assert(base in regs64) - - if base == 'rsp' or base == 'r12': - Error('Cannot use rsp or r12 as base yet') - if rexbit[base] == 1: - pre.append( rex(b=1) ) - mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) - post = [mod_rm] + imm8(offset) - else: - Error('not supporting prepost8 with list len {0}'.format(len(rm8))) - else: - Error('Not supporting move with reg8 {0}'.format(r8)) - return pre, post - -def prepost(r64, rm64): - assert(r64 in regs64) - if type(rm64) is list: - if len(rm64) == 3: - base, index, disp = rm64 - assert(base in regs64) - assert(index in regs64) - assert(type(disp) is int) - # Assert that no special cases are used: - # TODO: swap base and index to avoid special cases - # TODO: exploit special cases and make better code - assert(index != 'rsp') - - rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) - # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 - mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) - si_b = sib(ss=0, index=regs64[index], base=regs64[base]) - return [rexprefix], [mod_rm, si_b] + imm8(disp) - elif len(rm64) == 2: - base, offset = rm64 - assert(type(offset) is int) - if base == 'RIP': - # RIP pointer relative addressing mode! - rexprefix = rex(w=1, r=rexbit[r64]) - mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) - return [rexprefix], [mod_rm] + imm32(offset) - else: - assert(base in regs64) - - if base == 'rsp' or base == 'r12': - # extended function that uses SIB byte - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) - # rm=4 indicates a SIB byte follows - mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) - # index=4 indicates that index is not used - si_b = sib(ss=0, index=4, base=regs64[base]) - return [rexprefix], [mod_rm, si_b] + imm8(offset) - else: - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) - mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) - return [rexprefix], [mod_rm] + imm8(offset) - elif len(rm64) == 1: - offset = rm64[0] - if type(offset) is int: - rexprefix = rex(w=1, r=rexbit[r64]) - mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) - si_b = sib(ss=0, index=4,base=5) # 0x25 - return [rexprefix], [mod_rm, si_b] + imm32(offset) - else: - Error('Memory reference of type {0} not implemented'.format(offset)) - else: - Error('Memory reference not implemented') - elif rm64 in regs64: - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) - mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) - return [rexprefix], [mod_rm] - -def leareg64(rega, m): - opcode = 0x8d # lea r64, m - pre, post = prepost(rega, m) - return pre + [opcode] + post - -def mov(rega, regb): - if type(regb) is int: - pre = [rex(w=1, b=rexbit[rega])] - opcode = 0xb8 + regs64[rega] - post = imm64(regb) - elif type(regb) is str: - if regb in regs64: - opcode = 0x89 # mov r/m64, r64 - pre, post = prepost(regb, rega) - elif regb in regs8: - opcode = 0x88 # mov r/m8, r8 - pre, post = prepost8(regb, rega) - else: - Error('Unknown register {0}'.format(regb)) - elif type(rega) is str: - if rega in regs64: - opcode = 0x8b # mov r64, r/m64 - pre, post = prepost(rega, regb) - else: - Error('Unknown register {0}'.format(rega)) - else: - Error('Move of this kind {0}, {1} not implemented'.format(rega, regb)) - return pre + [opcode] + post - -def xorreg64(rega, regb): - rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega]) - opcode = 0x31 # XOR r/m64, r64 - # Alternative is 0x33 XOR r64, r/m64 - mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb]) - return [rexprefix, opcode, mod_rm] - -# integer arithmatic: -def addreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x01 # ADD r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - if regb < 100: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # add r/m, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=0) - return [rexprefix, opcode, mod_rm]+imm8(regb) - elif regb < (1<<31): - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x81 # add r/m64, imm32 - mod_rm = modrm(3, rm=regs64[rega], reg=0) - return [rexprefix, opcode, mod_rm]+imm32(regb) - else: - Error('Constant value too large!') - else: - Error('unknown second operand!'.format(regb)) - -def subreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x29 # SUB r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - if regb < 100: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # sub r/m, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=5) - return [rexprefix, opcode, mod_rm]+imm8(regb) - elif regb < (1<<31): - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x81 # sub r/m64, imm32 - mod_rm = modrm(3, rm=regs64[rega], reg=5) - return [rexprefix, opcode, mod_rm]+imm32(regb) - else: - Error('Constant value too large!') - - else: - Error('unknown second operand!'.format(regb)) - -def idivreg64(reg): - rexprefix = rex(w=1, b=rexbit[reg]) - opcode = 0xf7 # IDIV r/m64 - mod_rm = modrm(3, rm=regs64[reg], reg=7) - return [rexprefix, opcode, mod_rm] - -def imulreg64_rax(reg): - rexprefix = rex(w=1, b=rexbit[reg]) - opcode = 0xf7 # IMUL r/m64 - mod_rm = modrm(3, rm=regs64[reg], reg=5) - return [rexprefix, opcode, mod_rm] - -def imulreg64(rega, regb): - pre, post = prepost(rega, regb) - opcode = 0x0f # IMUL r64, r/m64 - opcode2 = 0xaf - return pre + [opcode, opcode2] + post - -def cmpreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x39 # CMP r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # CMP r/m64, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=7) - return [rexprefix, opcode, mod_rm] + imm8(regb) - - else: - Error('not implemented cmp64') - -# Mapping that maps string names to the right functions: -opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)} -
--- a/ide/compiler/builtin.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -from .nodes import * - -boolean = BaseType('boolean', 8) # Choose: 1 or 8 bytes? -integer = BaseType('integer', 8) -real = BaseType('real', 8) -char = BaseType('char', 1) -void = BaseType('void', 0) - -chr_func = BuiltinProcedure('chr', ProcedureType([Parameter('value', 'x', integer)], char)) -
--- a/ide/compiler/codegenerator.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,487 +0,0 @@ -""" - Code generation for 64 bits intel processors -""" - -from .nodes import * -from .errors import Error -from .builtin import real, integer, boolean, char -from .assembler import * - -class CodeGenerator: - def __init__(self): - self.strings = [] - self.initialize() - def initialize(self): - # Register descriptors: - self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',') - self.usedregs = [] - # Members to accumulate the result into: - # The result is an image of bytecode and global variable space. - # Global variables a referenced by RIP relative addressing. - self.image = [] - self.rip = 0 # The current instruction pointer location. - # TODO: backpatch list here? - - # Functions to modify the code image - def addCode(self, code): - assert(type(code) is list) - self.image += code - self.rip += len(code) - def fixCode(self, position, code): - self.image[position:position+len(code)] = code - def align(self, b): - while (self.rip % b) != 0: - self.addCode([0]) - - def saveAllRegisters(self): - regs = list(self.usedregs.keys()) - for reg in regs: - code += self.saveRegister(reg) - - def saveRegister(self, reg): - code = [] - if reg in self.usedregs.keys(): - code.append('mov {0}, {1}'.format(self.usedregs[reg], reg)) - del self.usedregs[reg] - self.freeregs.append(reg) - - def getreg(self, node): - """ acquire a working register for a certain node.""" - # Temporary register bypass action: - if len(self.freeregs) > 0: - reg = self.freeregs.pop(0) - self.usedregs.append(reg) - else: - Error('No more free regs') - node.reg = reg - - def freereg(self, node): - reg = node.reg - node.reg = None - self.freeregs.append(reg) - self.usedregs.remove(reg) - - # Helpers to load and retrieve designated objects: - def storeRegInDesignator(self, reg, designator): - assert(type(reg) is str) - assert(type(designator) is Designator) - if len(designator.selectors) > 0: - self.gencode( designator ) # Load the pointer into some register - self.addCode( mov([designator.reg, 0x0], reg) ) - self.freereg( designator ) - else: - if designator.obj.isLocal: - # Relative from rbp register - mem = ['rbp', designator.obj.offset] - self.addCode( mov(mem, reg) ) - else: - # Relative from RIP after move - self.addCode( mov(['RIP', 0x0], reg) ) - self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) ) - - # Code generation functions: - def genexprcode(self, node): - """ - Generate code for expressions! - Recursively evaluates, and ensures a register contains the answer. - register is an integer register or a floating point reg - """ - if isinstance(node, Binop): - """ Handle a binary operation (two arguments) of some kind """ - self.genexprcode(node.a) - self.genexprcode(node.b) - - if node.op == 'mod': - assert(node.typ.isType(integer)) - self.addCode(mov('rax', node.a.reg)) - self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros - self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg - node.reg = node.a.reg - self.freereg(node.b) # give up register that contains b - self.addCode(mov(node.reg, 'rdx')) # move remainder into result - elif node.op == 'div': - assert(node.typ.isType(integer)) - self.addCode(mov('rax', node.a.reg)) - self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros - self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg - node.reg = node.a.reg - self.freereg(node.b) # give up register that contains b - self.addCode(mov(node.reg, 'rax')) # move result into reg - elif node.op == '*': - if node.typ.isType(integer): - self.addCode(imulreg64(node.a.reg, node.b.reg)) - node.reg = node.a.reg - self.freereg(node.b) - else: - Error('{0} for * not implemented'.format(node.typ)) - elif node.op == '+': - if node.typ.isType(integer): - self.addCode(addreg64(node.a.reg, node.b.reg)) - node.reg = node.a.reg - self.freereg(node.b) - else: - Error('{0} for + not implemented'.format(node.typ)) - elif node.op == '-': - if node.typ.isType(integer): - self.addCode(subreg64(node.a.reg, node.b.reg)) - node.reg = node.a.reg - self.freereg(node.b) - else: - Error('{0} for - not implemented'.format(node.typ)) - else: - Error('Unknown Binop {0}'.format(node.op)) - - elif type(node) is Unop: - if node.op == 'INTTOREAL': - self.genexprcode(node.a) - node.reg = node.a.reg - # TODO use 'FILD' instruction - freg = 12 - code.append('Unop inttoreal TODO') - elif node.op == 'ABS': - if isType(node.typ, real): - code = [0xD9, 0xE1] # st(0) = fabs st(0) - Error('ABS error integer') - elif isType(node.typ, integer): - code = [] - Error('ABS error integer') - else: - Error('ABS error') - else: - Error('Unknown Unop {0}'.format(node.op)) - - elif isinstance(node, Designator): - # dereference, array index. Make sure that the result comes into a register - if len(node.selectors) > 0: - self.gencode(node) # Load the pointer into some register - - # Now we can access the object at location '[node.reg]': - if node.typ.isType(integer): - self.addCode( mov(node.reg, [node.reg, 0x0]) ) - else: - Error('Only integer types implemented') - else: - # No selectors, load variable directly - if node.obj.typ.isType(integer): - if type(node.obj) is Constant: - self.genexprcode(node.obj) - node.reg = node.obj.reg - else: - self.getreg(node) - # Get a register to store the integer value - if node.obj.isLocal: - # relative to rbp: - self.addCode( mov(node.reg, ['rbp', node.obj.offset]) ) - else: - self.addCode(mov(node.reg, ['RIP', 0x0])) - self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip)) - else: - Error('Cannot load variable type {0}'.format(node.typ)) - - elif isinstance(node, Relop): - # Create a boolean from operands - # TODO create an alternative for expressions used as conditions. - self.genexprcode(node.a) - self.genexprcode(node.b) - - if node.a.typ.isType(integer): - instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'} - if not node.relop in instructions.keys(): - Error('Unimplemented relop: '+str(node.relop)) - instr = instructions[node.relop] - - node.reg = node.a.reg - self.addCode( cmpreg64(node.a.reg, node.b.reg) ) - self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp - fixloc1 = self.rip - 1 - rip1 = self.rip - self.addCode( xorreg64(node.reg, node.reg) ) - self.addCode( shortjump(0x0) ) # Jump over 1 code - fixloc2 = self.rip - 1 - self.fixCode(fixloc1, imm8(self.rip - rip1)) - rip2 = self.rip - self.addCode( xorreg64(node.reg, node.reg) ) - self.addCode( increg64(node.reg) ) - self.fixCode(fixloc2, imm8(self.rip - rip2)) - - self.freereg(node.b) - else: - Error('Relop not implemented for {0}'.format(node.a.typ)) - - elif type(node) is Constant: - if node.typ.isType(integer): - self.getreg(node) - self.addCode(mov(node.reg, node.value)) - elif node.typ.isType(real): - code += self.getreg(node) - Error('TODO: get real reg') - # TODO: get a fixed point reg, and load the variable in there - else: - Error('Howto generate code for {0}?'.format(node)) - - elif type(node) is ProcedureCall: - if type(node.proc.obj) is BuiltinProcedure: - # Handle builtin procedures different, these not always call - # a function, but generate code. - bi = node.proc.obj - if bi.name == 'chr': - arg = node.args[0] - self.genexprcode(arg) - # Store character in full width register: - # TODO: store in char only register - node.reg = arg.reg - else: - Error('Unknown builtin function {0}'.format(bi.name)) - else: - # Use generic procedure call first - self.gencode(node) - # Retrieve result: - if node.typ.isType(integer): - # Store result! - self.getreg(node) - self.addCode( mov(node.reg, 'rax') ) - else: - Error('Return type not supported {0}'.format(node.typ)) - else: - Error('Cannot generate expression code for: {0}'.format(node)) - - def gencode(self, node): - """ Code generation function for AST nodes """ - if isinstance(node, Module): - # for all imports make a list of pointer to the actual procedures: - for imp in node.imports: - imp.offset = self.rip - self.addCode( [0x0]*8 ) - # global variable storage allocation - variables = node.symtable.getAllLocal(Variable) - for var in variables: - var.isLocal = False - var.offset = self.rip - self.addCode( [0x00] * var.typ.size ) # TODO initial values here? - self.align(8) - # TODO: mark end of data and start of code inside image - # TODO: round data to page size to enable protection by loader. - # Procedure code generation: - procedures = node.symtable.getAllLocal(Procedure) - node.procs = procedures - for proc in procedures: - self.gencode(proc) - # Module init code: - node.initcodeentry = self.rip - self.gencode(node.initcode) - self.addCode( ret() ) - # TODO: how to return from module init code? far return?? - - elif type(node) is Procedure: - # calculate offsets for local variables and parameters - # Variable location relative to 'rbp' register - variables = node.symtable.getAllLocal(Variable) - offset = 0 - paramoffset = 16 - for var in variables: - var.isLocal = True - if not var.isParameter: - offset += var.typ.size - # Offset is negative of rbp in stack frame - var.offset = -offset - node.framesize = offset - # Calculate offsets of parameters relative to rbp register - for par in reversed(node.typ.parameters): - pvar = node.symtable.getLocal(Variable, par.name) - pvar.offset = paramoffset - paramoffset += pvar.typ.size - - # code generation - node.entrypoint = self.rip - self.addCode(push('rbp')) - self.addCode(mov('rbp', 'rsp')) # Setup the base pointer - self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals - self.gencode(node.block) - if node.retexpr: - if node.retexpr.typ.isType(integer): - self.genexprcode(node.retexpr) - self.addCode( mov('rax', node.retexpr.reg) ) - self.freereg(node.retexpr) - else: - Error('Cannot return this kind yet {0}'.format(node.retexpr.typ)) - self.addCode( addreg64('rsp', node.framesize) ) - self.addCode( pop('rbp') ) - self.addCode( ret() ) - assert(len(self.usedregs) == 0) - - elif isinstance(node, StatementSequence): - for s in node.statements: - self.gencode(s) - - elif type(node) is ProcedureCall: - # Prepare parameters on the stack: - stacksize = 0 - assert(len(node.args) == len(node.proc.typ.parameters)) - for arg, param in zip(node.args, node.proc.typ.parameters): - - if param.kind == 'value': - self.genexprcode(arg) - self.addCode( push(arg.reg) ) - self.freereg( arg ) - stacksize += 8 - else: - Error('Parameter kind other than value') - - # Calculate address using designator - if type(node.proc.obj) is Procedure: - self.addCode( call(0x0) ) - self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip)) - elif type(node.proc.obj) is ImportedSymbol: - # Load the entry point of the import table - self.getreg(node.proc.obj) - # Load the address of the procedure: - self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) ) - self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) ) - # Call to the address in register: - self.addCode( call(node.proc.obj.reg) ) - # Free register that holds the address of the object - self.freereg( node.proc.obj ) - elif type(node.proc.obj) is BuiltinProcedure: - if node.proc.obj.name == 'chr': - print('int to char') - else: - Error('Unknown builtin function {0}'.format(node.proc.obj.name)) - else: - Error('Cannot call designator of type {0}'.format(node.proc.obj)) - - # Restore stack (pop all arguments of): - self.addCode(addreg64('rsp', stacksize)) - - elif type(node) is Assignment: - if node.lval.typ.isType(integer): - # TODO if node.rval is Constant of some datatype, move it to mem directly - self.genexprcode(node.rval) # Calculate the value that has to be stored. - self.storeRegInDesignator(node.rval.reg, node.lval) - self.freereg(node.rval) - else: - Error('Assignments of other types not implemented') - # TODO if left and right are designators, do some sort of memcpy. - - elif type(node) is IfStatement: - self.genexprcode(node.condition) - self.addCode( cmpreg64(node.condition.reg, 1) ) - self.freereg(node.condition) - if node.falsestatement: - # If with else clause - self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false - rip1 = self.rip - fixloc1 = self.rip - 4 - self.gencode(node.truestatement) - self.addCode( nearjump( 0x0 ) ) # jump over false code - fixloc2 = self.rip - 4 - self.fixCode(fixloc1, imm32(self.rip - rip1)) - rip2 = self.rip - self.gencode(node.falsestatement) - self.fixCode(fixloc2, imm32(self.rip - rip2)) - else: - # If without else clause - self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false - rip1 = self.rip - fixloc1 = self.rip - 4 - self.gencode(node.truestatement) - self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code. - - elif isinstance(node, WhileStatement): - rip1 = self.rip # Store the start of the while loop - self.genexprcode(node.condition) - self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness - self.freereg(node.condition) - self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later) - fixloc1 = self.rip - 4 - rip2 = self.rip - self.gencode(node.dostatements) - self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below - fixloc2 = self.rip - 4 - rip3 = self.rip # end of while loop - self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop - self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop - - elif type(node) is ForStatement: - # Initial load of iterator variable: - self.genexprcode(node.begin) - self.genexprcode(node.end) - # TODO: link reg with variable so that a register is used instead of a variable - iterreg = node.begin.reg # Get the register used for the loop - #self.addCode(cmpreg64(iterreg, node.endvalue)) - rip1 = self.rip - self.gencode(node.statements) - #self.loadDesignatorInReg(node. - #self.addCode( addreg64(node.variable, node.increment) ) - self.addCode(nearjump(0x0)) - fixloc1 = self.rip - 4 - rip2 = self.rip - self.fixCode(fixloc1, imm32(rip1 - rip2)) - - self.freereg(node.begin) # Release register used in loop - self.freereg(node.end) - Error('No implementation of FOR statement') - - elif type(node) is AsmCode: - def processOperand(op): - if type(op) is list: - if type(op[0]) is Variable: - var = op[0] - if var.isLocal: - return ['rbp', var.offset] - else: - Error('Can only use local variables in inline assembler') - return op - for asmline in node.asmcode: - opcode, operands = asmline - operands = [processOperand(opx) for opx in operands] - print('assembling', opcode, *operands) - func,nargs = opcodes[opcode] - code = func(*operands) - self.addCode(code) - - elif isinstance(node, EmptyStatement): - pass - - - elif type(node) is StringConstant: - self.strings.append(node) - self.data.append(node.value) # Add string to the data section - - elif type(node) is Designator: - if len(node.selectors) > 0: - self.getreg(node) - # Load starting address - if node.obj.isLocal: - self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) ) - else: - # Global variables need to be relocated... - self.addCode(leareg64(node.reg, ['RIP', 0])) - self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip)) - # Loop over all designators.. - for selector in node.selectors: - if type(selector) is Index: - # Deref an array index - self.genexprcode(selector.index) - self.getreg(selector) - self.addCode( mov(selector.reg, selector.typ.elementType.size) ) - self.addCode( imulreg64(selector.reg, selector.index.reg ) ) - self.freereg(selector.index) - self.addCode(addreg64(node.reg, selector.reg)) - self.freereg(selector) - elif type(selector) is Field: - print('Field') - Error('Field not implemented') - else: - Error('Unknown selector') - else: - Error('Can only gencode for designator with selectors') - - else: - print('not generating code for {0}'.format(node)) - - def generatecode(self, ast): - """ code generation front end """ - self.initialize() - self.gencode(ast) - ast.image = self.image -
--- a/ide/compiler/compiler.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ -import hashlib -# Import compiler components: -from . import lexer -from .parser import Parser -from .codegenerator import CodeGenerator -from .nodes import ExportedSymbol -from .errors import CompilerException - -class Compiler: - versie = '0.9.3' - - def __repr__(self): - return 'LCFOS compiler {0}'.format(self.versie) - - def generateSignature(self, src): - return hashlib.md5(bytes(src,encoding='ascii')).hexdigest() - - def compilesource(self, src): - """ Front end that handles the stages: """ - self.errorlist = [] - # Pass 1: parsing and type checking - tokens = lexer.tokenize(src) # Lexical stage - p = Parser(tokens) - try: - ast = p.parseModule() # Parse a module - except CompilerException as e: - p.errorlist.append( (e.row, e.col, e.msg) ) - if len(p.errorlist) > 0: - self.errorlist = p.errorlist - return - # Pass 2: code generation - CodeGenerator().generatecode(ast) - # Attach a signature: - ast.signature = self.generateSignature(src) - # Generate exported symbols: - ast.exports = [] - for proc in ast.procs: - if proc.public: - sym = ExportedSymbol(proc.name, proc.typ) - sym.imageoffset = proc.entrypoint - ast.exports.append(sym) - return ast - - def compileProject(self, project): - mods = [] - for fname in project.files: - print('Compiling {0}...'.format(fname)) - source = project.loadProjectFile(fname) - mod = self.compilesource(source) - mods.append(mod) - return mods - -
--- a/ide/compiler/display.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -from .nodes import * - -def printNode(node, indent=0): - """ - Print visitor - all printing goes in here - """ - print(' '*indent+str(node)) - if type(node) is Procedure: - print(' '*indent+' PARAMETERS:') - for p in node.parameters: - printNode(p, indent+4) - if node.block: - print(' '*indent+' CODE:') - printNode(node.block, indent+4) - elif type(node) is Module: - print(node.symtable) - printNode(node.initcode, indent+2) - else: - for c in node.getChildren(): - printNode(c, indent+2)
--- a/ide/compiler/errors.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -""" Error handling routines """ - -class CompilerException(Exception): - def __init__(self, msg, row=0, col=0, filename=None): - self.msg = msg - self.row = row - self.col = col - self.filename = filename - def __repr__(self): - return self.msg - def __str__(self): - return self.msg - -class ErrorNode: - def __init__(self, row, col, msg): - self.row, self.col = row,col - self.msg = msg - -def Error(msg, node=None): - if node is None: - raise CompilerException(msg) - else: - raise CompilerException(msg, node.row, node.col) - -def printError(source, e): - def printLine(row, txt): - print(str(row)+':'+txt) - if e.row == 0: - print('Error: {0}'.format(e.msg)) - else: - lines = source.split('\n') - prerow = e.row - 3 - if prerow < 1: - prerow = 1 - afterrow = e.row + 3 - if afterrow > len(lines): - afterrow = len(lines) - - # print preceding source lines: - for r in range(prerow, e.row): - printLine(r, lines[r-1]) - # print source line containing error: - printLine(e.row, lines[e.row-1]) - print(' '*(len(str(e.row)+':')+e.col-1) + '^ Error: {0}'.format(e.msg)) - # print trailing source line: - for r in range(e.row+1, afterrow+1): - printLine(r, lines[r-1])
--- a/ide/compiler/lexer.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -import collections -import re -from .errors import CompilerException - -""" - Lexical analyzer part. Splits the input character stream into tokens. -""" - -# Token is used in the lexical analyzer: -Token = collections.namedtuple('Token', 'typ val row col') - -keywords = ['and', 'array', 'begin', 'by', 'case', 'const', 'div', 'do', \ - 'else', 'elsif', 'end', 'false', 'for', 'if', 'import', 'in', 'is', \ - 'mod', 'module', 'nil', 'not', 'of', 'or', 'pointer', 'procedure', \ - 'record', 'repeat', 'return', 'then', 'to', 'true', 'type', 'until', 'var', \ - 'while', 'asm' ] - -def tokenize(s): - """ - Tokenizer, generates an iterator that - returns tokens! - - This GREAT example was taken from python re doc page! - """ - tok_spec = [ - ('REAL', r'\d+\.\d+'), - ('HEXNUMBER', r'0x[\da-fA-F]+'), - ('NUMBER', r'\d+'), - ('ID', r'[A-Za-z][A-Za-z\d_]*'), - ('NEWLINE', r'\n'), - ('SKIP', r'[ \t]'), - ('COMMENTS', r'{.*}'), - ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), - ('STRING', r"'.*?'") - ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - line = 1 - pos = line_start = 0 - mo = gettok(s) - while mo is not None: - typ = mo.lastgroup - val = mo.group(typ) - if typ == 'NEWLINE': - line_start = pos - line += 1 - elif typ == 'COMMENTS': - pass - elif typ != 'SKIP': - if typ == 'ID': - if val in keywords: - typ = val - elif typ == 'LEESTEKEN': - typ = val - elif typ == 'NUMBER': - val = int(val) - elif typ == 'HEXNUMBER': - val = int(val[2:], 16) - typ = 'NUMBER' - elif typ == 'REAL': - val = float(val) - elif typ == 'STRING': - val = val[1:-1] - yield Token(typ, val, line, mo.start()-line_start) - pos = mo.end() - mo = gettok(s, pos) - if pos != len(s): - col = pos - line_start - raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) - yield Token('END', '', line, 0) -
--- a/ide/compiler/modules.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,193 +0,0 @@ -import struct -from .errors import Error -from .nodes import * -from .builtin import integer, real, char, boolean, void -import os.path - -""" - File format for compiled modules. - * [11] magic identifier - * [STR] mod name - * [STR] signature, a md5 signature of the module. - * [I32] size of code - * code image - * [I32] entrypoint for initcode - * imported modules - ** [I32] num of imported modules - *** [STR] name of module - *** signature of the module - *** [I32] offset in the process image where the interface symbols must be placed - * public interface - ** [I32] num of interface elements - *** [STR] proc name - *** [I32] offset in code image - *** [type] return type - *** [I32] number of parameters - **** parameter - ***** parameter kind - ***** parameter name - ***** parameter type -""" - -MAGIC = b'LCFOSMODC' - -loadedModules = [] - -def loadModule(modname): - """ returns a Module object specified by a name """ - # Check if the module was already loaded: - for mod in loadedModules: - if mod.name == modname: - return mod - - # Try to load the module from file: - srcfilename = modname + '.mod' - binfilename = modname + '.bin' - sourceExists = os.path.exists(srcfilename) - if os.path.exists(binfilename): - if sourceExists: - compileModule() - else: - return loadModuleFromFile(binfilename) - else: - Error("Cannot load module '{0}'!".format(modname)) - -def loadModuleFromFile(filename): - f = open(filename, 'rb') - magic = f.read(len(MAGIC)) - assert(magic == MAGIC) - - # Helper functions: - def readI32(): - int32, = struct.unpack('<I', f.read(4)) - return int32 - def readSTR(): - length = readI32() - b = f.read(length) - return b.decode(encoding='ascii') - def readType(): - code, = f.read(1) - basetypes = {0x11:integer, 0x12:real, 0x13:char,0x14:boolean, 0x15:void} - if code in list(basetypes.keys()): - return basetypes[code] - elif code == 0x20: - dimension, elementType = readI32(), readType() - return ArrayType(dimension, elementType) - elif code == 0x21: - returntype = readType() - numparams = readI32() - parameters = [] - kinds = {0x1:'value', 0x2:'var', 0x3:'const'} - for i in range(numparams): - byt, = f.read(1) - kind = kinds[byt] - name, typ = readSTR(), readType() - parameters.append(Parameter(kind, name, typ)) - return ProcedureType(parameters, returntype) - else: - Error('Reading of this typ not supported') - - # Begin of actual loading - modname = readSTR() - modsignature = readSTR() - codesize = readI32() - image = f.read(codesize) - initcodeentry = readI32() - # Check which modules this module loads: - numimports = readI32() - imports = [] - for i in range(numimports): - modname = readSTR() - signature = readSTR() - symname = readSTR() - offset = readI32() - impsym = ImportedSymbol(modname, symname) - impsym.signature = signature - impsym.offset = offset - imports.append(impsym) - # Modules exported interface: - numexports = readI32() - exports = [] - for i in range(numexports): - name = readSTR() - imageoffset = readI32() # Offset in image where symbol is located - typ = readType() - export = ExportedSymbol(name, typ) - export.imageoffset = imageoffset - exports.append(export) - f.close() - - # Construct imported module object: - module = Module(modname) - module.signature = modsignature - module.exports = exports # Symbols provided to other modules - module.imports = imports # Symbols of others used by this module. - module.initcodeentry = initcodeentry - module.image = image # The binary blob - global loadedModules - loadedModules.append(module) - return module - -def storeModule(mod, filename): - """ Class to store a module in a file """ - f = open(filename, 'wb') - - def writeI32(int32): - f.write( struct.pack('<I', int32) ) - def writeSTR(s): - writeI32(len(s)) - f.write(bytes(s, encoding='ascii')) - def writeType(typ): - if type(typ) is BaseType: - basetypecode = {'integer': 0x11, 'real': 0x12, 'char': 0x13, 'boolean':0x14, 'void':0x15} - code = basetypecode[typ.name] - f.write( bytes([code])) - elif type(typ) is ArrayType: - f.write(bytes([0x20])) - writeI32(typ.dimension) - writeType(typ.elementType) - elif type(typ) is ProcedureType: - f.write(bytes([0x21])) - writeType(typ.returntype) - writeI32(len(typ.parameters)) - for parameter in typ.parameters: - kinds = {'value': 0x1, 'var': 0x2, 'const': 0x3} - kind = kinds[parameter.kind] - f.write(bytes([kind])) - writeSTR(parameter.name) - writeType(parameter.typ) - else: - Error('Type storage not implemented {0}'.format(typ)) - - # Begin of actual storage function - f.write(MAGIC) - writeSTR(mod.name) - writeSTR(mod.signature) - writeI32(len(mod.image)) - f.write(bytes(mod.image)) - writeI32(mod.initcodeentry) - # modules imported symbols: - writeI32(len(mod.imports)) - for imp in mod.imports: - writeSTR(imp.modname) - writeSTR(imp.signature) - writeSTR(imp.name) - writeI32(imp.offset) - # modules provided interface - writeI32(len(mod.exports)) - # Store exported symbols: - for sym in mod.exports: - writeSTR(sym.name) # proc name - writeI32(sym.imageoffset) # proc entry point - writeType(sym.typ) # Procedure type - f.close() - - storeModuleInCache(mod) - -def storeModuleInCache(newmod): - global loadedModules - for mod in loadedModules: - if newmod.name == mod.name: - return - loadedModules.append(newmod) -
--- a/ide/compiler/nodes.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,310 +0,0 @@ -""" -Parse tree elements -""" -class Node: - location = None - def getChildren(self): - children = [] - members = dir(self) - for member in members: - member = getattr(self, member) - if isinstance(member, Node): - children.append(member) - elif type(member) is list: - for mi in member: - if isinstance(mi, Node): - children.append(mi) - return children - -class Symbol(Node): - pass - -class Id(Node): - def __init__(self, name): - self.name = name - def __repr__(self): - return 'ID {0}'.format(self.name) - -# Selectors: -class Field(Node): - def __init__(self, fieldname): - self.fieldname = fieldname - def __repr__(self): - return 'FLD {0}'.format(self.fieldname) - -class Index(Node): - def __init__(self, index, typ): - self.index = index - self.typ = typ - def __repr__(self): - return 'IDX {0}'.format(self.index) - -class Deref(Node): - pass - -class Designator(Node): - def __init__(self, obj, selectors, typ): - self.obj = obj - self.selectors = selectors - self.typ = typ - def __repr__(self): - return 'DESIGNATOR {0}, selectors {1}, type {2}'.format(self.obj, self.selectors, self.typ) - -""" -Type classes -""" -def isType(a, b): - """ Compare types a and b and check if they are equal """ - if type(a) is type(b): - if type(a) is BaseType: - return (a.name == b.name) and (a.size == b.size) - elif type(a) is ArrayType: - return (a.dimension == b.dimension) and isType(a.elementType, b.elementType) - elif type(a) is ProcedureType: - if len(a.parameters) != len(b.parameters): - print('Number of parameters does not match') - return False - for aparam, bparam in zip(a.parameters, b.parameters): - if not isType(aparam.typ, bparam.typ): - print('Parameter {0} does not match parameter {1}'.format(aparam, bparam)) - return False - if a.result is None: - # TODO: how to handle a None return type?? - pass - if not isType(a.result, b.result): - print('Procedure return value mismatch {0} != {1}'.format(a.result, b.result)) - return False - return True - else: - print(a) - print(b) - Error('Not implemented {0}'.format(a)) - else: - return False - -class Type: - def isType(self, b): - return isType(self, b) - -class BaseType(Type): - def __init__(self, name, size): - self.name = name - self.size = size - def __repr__(self): - return '[TYPE {0}]'.format(self.name) - -class NilType(Node): - # TODO: how to handle nil values?? - def __repr__(self): - return 'NILTYPE' - -class ArrayType(Type): - def __init__(self, dimension, elementType): - self.dimension = dimension - self.elementType = elementType - self.size = elementType.size * dimension - def __repr__(self): - return '[ARRAY {0} of {1}]'.format(self.dimension, self.elementType) - -class RecordType(Type): - def __init__(self, fields): - self.fields = fields - self.size = 0 - for fieldname in self.fields: - self.size += self.fields[fieldname].size - def __repr__(self): - return '[RECORD {0}]'.format(self.fields) - -class PointerType(Type): - def __init__(self, pointedType): - self.pointedType = pointedType - self.size = 8 - def __repr__(self): - return '[POINTER {0}]'.format(self.pointedType) - -class ProcedureType(Type): - def __init__(self, parameters, returntype): - self.parameters = parameters - self.returntype = returntype - def __repr__(self): - return '[PROCTYPE {0} RET {1}]'.format(self.parameters, self.returntype) - -class DefinedType(Type): - def __init__(self, name, typ): - self.name = name - self.typ = typ - def __repr__(self): - return 'Named type {0} of type {1}'.format(self.name, self.typ) - -# Classes for constants like numbers and strings: -class StringConstant(Symbol): - def __init__(self, txt): - self.txt = txt - self.typ = 'string' - def __repr__(self): - return "STRING '{0}'".format(self.txt) - -# Variables, parameters, local variables, constants: -class Constant(Symbol): - def __init__(self, value, typ, name=None, public=False): - self.name = name - self.value = value - self.typ = typ - self.public = public - def __repr__(self): - return 'CONSTANT {0} = {1}'.format(self.name, self.value) - -class Variable(Symbol): - def __init__(self, name, typ, public): - self.name = name - self.typ = typ - self.public = public - self.isLocal = False - self.isReadOnly = False - self.isParameter = False - def __repr__(self): - txt = '[public] ' if self.public else '' - return '{2}VAR {0} : {1}'.format(self.name, self.typ, txt) - -class Parameter(Node): - """ A parameter has a passing method, name and typ """ - def __init__(self, kind, name, typ): - self.kind = kind - self.name = name - self.typ = typ - def __repr__(self): - return 'PARAM {0} {1} {2}'.format(self.kind, self.name, self.typ) - -# Operations: -class Unop(Node): - def __init__(self, a, op, typ): - self.a = a - self.op = op # Operation: '+', '-', '*', '/', 'mod' - self.typ = typ - self.place = None - def __repr__(self): - return 'UNOP {0}'.format(self.op) - -class Binop(Node): - def __init__(self, a, op, b, typ): - self.a = a - self.b = b - self.op = op # Operation: '+', '-', '*', '/', 'mod' - self.typ = typ # Resulting type :) - self.place = None - def __repr__(self): - return 'BINOP {0} {1}'.format(self.op, self.typ) - -class Relop(Node): - def __init__(self, a, relop, b, typ): - self.a = a - self.relop = relop - self.b = b - self.typ = typ - def __repr__(self): - return 'RELOP {0}'.format(self.relop) - -# Modules -class Module(Node): - def __init__(self, name): - self.name = name - def __repr__(self): - return 'MODULE {0}'.format(self.name) - -# Imports and Exports: -class ImportedSymbol(Node): - def __init__(self, modname, name): - self.modname = modname - self.name = name - def __repr__(self): - return 'IMPORTED SYMBOL {0}'.format(self.name) - -class ExportedSymbol(Node): - def __init__(self, name, typ): - self.name = name - self.typ = typ - def __repr__(self): - return 'EXPORTED PROCEDURE {0} : {1}'.format(self.name, self.typ) - -# Procedure types -class BuiltinProcedure(Node): - def __init__(self, name, typ): - self.name = name - self.typ = typ - def __repr__(self): - return 'BUILTIN PROCEDURE {0} : {1}'.format(self.name, self.typ) - -class Procedure(Symbol): - """ Actual implementation of a function """ - def __init__(self, name, typ, block, symtable, retexpr): - self.name = name - self.block = block - self.symtable = symtable - self.typ = typ - self.retexpr = retexpr - def __repr__(self): - return 'PROCEDURE {0} {1}'.format(self.name, self.typ) - -# Statements -class StatementSequence(Node): - def __init__(self, statements): - self.statements = statements - def __repr__(self): - return 'STATEMENTSEQUENCE' - -class EmptyStatement(Node): - def __repr__(self): - return 'EMPTY STATEMENT' - -class Assignment(Node): - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __repr__(self): - return 'ASSIGNMENT' - -class ProcedureCall(Node): - def __init__(self, proc, args): - self.proc = proc - self.args = args - self.typ = proc.typ.returntype - def __repr__(self): - return 'CALL {0} '.format(self.proc) - -class IfStatement(Node): - def __init__(self, condition, truestatement, falsestatement=None): - self.condition = condition - self.truestatement = truestatement - self.falsestatement = falsestatement - def __repr__(self): - return 'IF-statement' - -class CaseStatement(Node): - def __init__(self, condition): - self.condition = condition - def __repr__(self): - return 'CASE-statement' - -class WhileStatement(Node): - def __init__(self, condition, statements): - self.condition = condition - self.dostatements = statements - def __repr__(self): - return 'WHILE-statement' - -class ForStatement(Node): - def __init__(self, variable, begin, end, increment, statements): - self.variable = variable - self.begin = begin - self.end = end - self.increment = increment - self.statements = statements - def __repr__(self): - return 'FOR-statement' - -class AsmCode(Node): - def __init__(self, asmcode): - self.asmcode = asmcode - def __repr__(self): - return 'ASM CODE' -
--- a/ide/compiler/parser.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,787 +0,0 @@ -""" - This module parses source code into an abstract syntax tree (AST) -""" - -from .symboltable import SymbolTable -from .nodes import * -from .errors import CompilerException, Error -from .modules import loadModule -from .display import printNode -from .builtin import * -from . import assembler - -class Parser: - def __init__(self, tokens): - """ provide the parser with the tokens iterator from the lexer. """ - self.tokens = tokens - self.NextToken() - self.errorlist = [] - - def Error(self, msg): - raise CompilerException(msg, self.token.row, self.token.col) - - # Lexer helpers: - def Consume(self, typ=''): - if self.token.typ == typ or typ == '': - v = self.token.val - self.NextToken() - return v - else: - self.Error('Excected: "{0}", got "{1}"'.format(typ, self.token.val)) - - def hasConsumed(self, typ): - if self.token.typ == typ: - self.Consume(typ) - return True - return False - - def NextToken(self): - self.token = self.tokens.__next__() - # TODO: store filename in location? - self.location = (self.token.row, self.token.col) - - # Helpers to find location of the error in the code: - def setLocation(self, obj, location): - obj.location = location - return obj - def getLocation(self): - return self.location - - """ - Recursive descent parser functions: - A set of mutual recursive functions. - Starting symbol is the Module. - """ - def parseModule(self): - self.imports = [] - loc = self.getLocation() - self.Consume('module') - modname = self.Consume('ID') - self.Consume(';') - mod = Module(modname) - - # Construct a symbol table for this program - mod.symtable = SymbolTable() - # Add built in types and functions: - for x in [real, integer, boolean, char, chr_func]: - mod.symtable.addSymbol(x) - - self.cst = mod.symtable - self.parseImportList() - - self.parseDeclarationSequence() - # Procedures only allowed in this scope - self.parseProcedureDeclarations() - - if self.hasConsumed('begin'): - mod.initcode = self.parseStatementSequence() - else: - mod.initcode = EmptyStatement() - - self.Consume('end') - endname = self.Consume('ID') - if endname != modname: - self.Error('end denoter must be module name') - self.Consume('.') - - mod.imports = self.imports - return self.setLocation(mod, loc) - - # Import part - def parseImportList(self): - if self.hasConsumed('import'): - self.parseImport() - while self.hasConsumed(','): - self.parseImport() - self.Consume(';') - - def parseImport(self): - loc = self.getLocation() - modname = self.Consume('ID') - mod = loadModule(modname) - self.setLocation(mod, loc) - self.cst.addSymbol(mod) - - # Helper to parse an identifier defenitions - def parseIdentDef(self): - loc = self.getLocation() - name = self.Consume('ID') - ispublic = self.hasConsumed('*') - # Make a node of this thing: - i = Id(name) - i.ispublic = ispublic - return self.setLocation(i, loc) - - def parseIdentList(self): - ids = [ self.parseIdentDef() ] - while self.hasConsumed(','): - ids.append( self.parseIdentDef() ) - return ids - - def parseQualIdent(self): - """ Parse a qualified identifier """ - name = self.Consume('ID') - if self.cst.has(Module, name): - modname = name - mod = self.cst.get(Module, modname) - self.Consume('.') - name = self.Consume('ID') - # Try to find existing imported symbol: - for imp in self.imports: - if imp.modname == modname and imp.name == name: - return imp - # Try to find the symbol in the modules exports: - for sym in mod.exports: - if sym.name == name: - impsym = ImportedSymbol(modname, name) - impsym.typ = sym.typ - impsym.signature = mod.signature - self.imports.append(impsym) - return impsym - self.Error("Cannot find symbol {0}".format(name)) - else: - return self.cst.getSymbol(name) - - # Helper to parse a designator - def parseDesignator(self): - """ A designator designates an object. - The base location in memory is denoted by the qualified identifier - The actual address depends on the selector. - """ - loc = self.getLocation() - obj = self.parseQualIdent() - typ = obj.typ - selectors = [] - while self.token.typ in ['.', '[', '^']: - if self.hasConsumed('.'): - field = self.Consume('ID') - if typ is PointerType: - selectors.append(Deref()) - typ = typ.pointedType - if not type(typ) is RecordType: - self.Error("field reference, type not record but {0}".format(typ)) - typ = typ.fields[field] - selectors.append(Field(field)) - elif self.hasConsumed('['): - indexes = self.parseExpressionList() - self.Consume(']') - for idx in indexes: - if not type(typ) is ArrayType: - self.Error('Cannot index non array type') - if not isType(idx.typ, integer): - self.Error('Only integer expressions can be used as an index') - selectors.append(Index(idx, typ)) - typ = typ.elementType - elif self.hasConsumed('^'): - selectors.append(Deref()) - typ = typ.pointedType - return self.setLocation(Designator(obj, selectors, typ), loc) - - # Declaration sequence - def parseDeclarationSequence(self): - """ 1. constants, 2. types, 3. variables """ - self.parseConstantDeclarations() - self.parseTypeDeclarations() - self.parseVariableDeclarations() - - # Constants - def evalExpression(self, expr): - if type(expr) is Binop: - a = self.evalExpression(expr.a) - b = self.evalExpression(expr.b) - if expr.op == '+': - return a + b - elif expr.op == '-': - return a - b - elif expr.op == '*': - return a * b - elif expr.op == '/': - return float(a) / float(b) - elif expr.op == 'mod': - return int(a % b) - elif expr.op == 'div': - return int(a / b) - elif expr.op == 'or': - return a or b - elif expr.op == 'and': - return a and b - else: - self.Error('Cannot evaluate expression with {0}'.format(expr.op)) - elif type(expr) is Constant: - return expr.value - elif type(expr) is Designator: - if type(expr.obj) is Constant: - return self.evalExpression(expr.obj) - else: - self.Error('Cannot evaluate designated object {0}'.format(expr.obj)) - elif type(expr) is Unop: - a = self.evalExpression(expr.a) - if expr.op == 'not': - return not a - elif expr.op == '-': - return -a - else: - self.Error('Unimplemented unary operation {0}'.format(expr.op)) - else: - self.Error('Cannot evaluate expression {0}'.format(expr)) - - def parseConstExpression(self): - e = self.parseExpression() - return self.evalExpression(e), e.typ - - def parseConstantDeclarations(self): - """ Parse const part of a module """ - if self.hasConsumed('const'): - while self.token.typ == 'ID': - i = self.parseIdentDef() - self.Consume('=') - constvalue, typ = self.parseConstExpression() - self.Consume(';') - c = Constant(constvalue, typ, name=i.name, public=i.ispublic) - self.setLocation(c, i.location) - self.cst.addSymbol(c) - - # Type system - def parseTypeDeclarations(self): - if self.hasConsumed('type'): - while self.token.typ == 'ID': - typename, export = self.parseIdentDef() - self.Consume('=') - typ = self.parseStructuredType() - self.Consume(';') - t = DefinedType(typename, typ) - self.cst.addSymbol(t) - - def parseType(self): - if self.token.typ == 'ID': - typename = self.Consume('ID') - if self.cst.has(Type, typename): - typ = self.cst.get(Type, typename) - while type(typ) is DefinedType: - typ = typ.typ - return typ - else: - self.Error('Cannot find type {0}'.format(typename)) - else: - return self.parseStructuredType() - - def parseStructuredType(self): - if self.hasConsumed('array'): - dimensions = [] - dimensions.append( self.parseConstExpression() ) - while self.hasConsumed(','): - dimensions.append( self.parseConstExpression() ) - self.Consume('of') - arr = self.parseType() - for dimension, consttyp in reversed(dimensions): - if not isType(consttyp, integer): - self.Error('array dimension must be an integer type (not {0})'.format(consttyp)) - if dimension < 2: - self.Error('array dimension must be bigger than 1 (not {0})'.format(dimension)) - arr = ArrayType(dimension, arr) - return arr - elif self.hasConsumed('record'): - fields = {} - while self.token.typ == 'ID': - # parse a fieldlist: - identifiers = self.parseIdentList() - self.Consume(':') - typ = self.parseType() - self.Consume(';') - for i in identifiers: - if i.name in fields.keys(): - self.Error('record field "{0}" multiple defined.'.format(i.name)) - fields[i.name] = typ - # TODO store this in another way, symbol table? - self.Consume('end') - return RecordType(fields) - elif self.hasConsumed('pointer'): - self.Consume('to') - typ = self.parseType() - return PointerType(typ) - elif self.hasConsumed('procedure'): - parameters, returntype = self.parseFormalParameters() - return ProcedureType(parameters, returntype) - else: - self.Error('Unknown structured type "{0}"'.format(self.token.val)) - - # Variable declarations: - def parseVariableDeclarations(self): - if self.hasConsumed('var'): - if self.token.typ == 'ID': - while self.token.typ == 'ID': - ids = self.parseIdentList() - self.Consume(':') - typename = self.parseType() - self.Consume(';') - for i in ids: - v = Variable(i.name, typename, public=i.ispublic) - self.setLocation(v, i.location) - self.cst.addSymbol(v) - else: - self.Error('Expected ID, got'+str(self.token)) - - # Procedures - def parseFPsection(self): - if self.hasConsumed('const'): - kind = 'const' - elif self.hasConsumed('var'): - kind = 'var' - else: - kind = 'value' - names = [ self.Consume('ID') ] - while self.hasConsumed(','): - names.append( self.Consume('ID') ) - self.Consume(':') - typ = self.parseType() - parameters = [Parameter(kind, name, typ) - for name in names] - return parameters - - def parseFormalParameters(self): - parameters = [] - self.Consume('(') - if not self.hasConsumed(')'): - parameters += self.parseFPsection() - while self.hasConsumed(';'): - parameters += self.parseFPsection() - self.Consume(')') - if self.hasConsumed(':'): - returntype = self.parseQualIdent() - else: - returntype = void - return ProcedureType(parameters, returntype) - - def parseProcedureDeclarations(self): - procedures = [] - while self.token.typ == 'procedure': - p = self.parseProcedureDeclaration() - procedures.append(p) - self.Consume(';') - return procedures - - def parseProcedureDeclaration(self): - loc = self.getLocation() - self.Consume('procedure') - i = self.parseIdentDef() - procname = i.name - proctyp = self.parseFormalParameters() - procsymtable = SymbolTable(parent = self.cst) - self.cst = procsymtable # Switch symbol table: - # Add parameters as variables to symbol table: - for parameter in proctyp.parameters: - vname = parameter.name - vtyp = parameter.typ - if parameter.kind == 'var': - vtyp = PointerType(vtyp) - variable = Variable(vname, vtyp, False) - if parameter.kind == 'const': - variable.isReadOnly = True - variable.isParameter = True - self.cst.addSymbol(variable) - self.Consume(';') - self.parseDeclarationSequence() - # Mark all variables as local: - for variable in self.cst.getAllLocal(Variable): - variable.isLocal = True - - if self.hasConsumed('begin'): - block = self.parseStatementSequence() - if self.hasConsumed('return'): - returnexpression = self.parseExpression() - else: - returnexpression = None - - if proctyp.returntype.isType(void): - if not returnexpression is None: - self.Error('Void procedure cannot return a value') - else: - if returnexpression is None: - self.Error('Procedure must return a value') - if not isType(returnexpression.typ, proctyp.returntype): - self.Error('Returned type {0} does not match function return type {1}'.format(returnexpression.typ, proctyp.returntype)) - - self.Consume('end') - endname = self.Consume('ID') - if endname != procname: - self.Error('endname should match {0}'.format(name)) - self.cst = procsymtable.parent # Switch back to parent symbol table - proc = Procedure(procname, proctyp, block, procsymtable, returnexpression) - self.setLocation(proc, loc) - self.cst.addSymbol(proc) - proc.public = i.ispublic - return proc - - # Statements: - def parseAssignment(self, lval): - loc = self.getLocation() - self.Consume(':=') - rval = self.parseExpression() - if isType(lval.typ, real) and isType(rval.typ, integer): - rval = Unop(rval, 'INTTOREAL', real) - if type(rval.typ) is NilType: - if not type(lval.typ) is ProcedureType and not type(lval.typ) is PointerType: - self.Error('Can assign nil only to pointers or procedure types, not {0}'.format(lval)) - elif not isType(lval.typ, rval.typ): - self.Error('Type mismatch {0} != {1}'.format(lval.typ, rval.typ)) - return self.setLocation(Assignment(lval, rval), loc) - - def parseExpressionList(self): - expressions = [ self.parseExpression() ] - while self.hasConsumed(','): - expressions.append( self.parseExpression() ) - return expressions - - def parseProcedureCall(self, procedure): - self.Consume('(') - if self.token.typ != ')': - args = self.parseExpressionList() - else: - args = [] - self.Consume(')') - parameters = procedure.typ.parameters - if len(args) != len(parameters): - self.Error("Procedure requires {0} arguments, {1} given".format(len(parameters), len(args))) - for arg, param in zip(args, parameters): - if not arg.typ.isType(param.typ): - print(arg.typ, param.typ) - self.Error('Mismatch in parameter') - return ProcedureCall(procedure, args) - - def parseIfStatement(self): - loc = self.getLocation() - self.Consume('if') - ifs = [] - condition = self.parseExpression() - if not isType(condition.typ, boolean): - self.Error('condition of if statement must be boolean') - self.Consume('then') - truestatement = self.parseStatementSequence() - ifs.append( (condition, truestatement) ) - while self.hasConsumed('elsif'): - condition = self.parseExpression() - if not isType(condition.typ, boolean): - self.Error('condition of if statement must be boolean') - self.Consume('then') - truestatement = self.parseStatementSequence() - ifs.append( (condition, truestatement) ) - if self.hasConsumed('else'): - statement = self.parseStatementSequence() - else: - statement = None - self.Consume('end') - for condition, truestatement in reversed(ifs): - statement = IfStatement(condition, truestatement, statement) - return self.setLocation(statement, loc) - - def parseCase(self): - # TODO - pass - - def parseCaseStatement(self): - self.Consume('case') - expr = self.parseExpression() - self.Consume('of') - self.parseCase() - while self.hasConsumed('|'): - self.parseCase() - self.Consume('end') - - def parseWhileStatement(self): - loc = self.getLocation() - self.Consume('while') - condition = self.parseExpression() - self.Consume('do') - statements = self.parseStatementSequence() - if self.hasConsumed('elsif'): - self.Error('elsif in while not yet implemented') - self.Consume('end') - return self.setLocation(WhileStatement(condition, statements), loc) - - def parseRepeatStatement(self): - self.Consume('repeat') - stmt = self.parseStatementSequence() - self.Consume('until') - cond = self.parseBoolExpression() - - def parseForStatement(self): - loc = self.getLocation() - self.Consume('for') - variable = self.parseDesignator() - if not variable.typ.isType(integer): - self.Error('loop variable of for statement must have integer type') - assert(variable.typ.isType(integer)) - self.Consume(':=') - begin = self.parseExpression() - if not begin.typ.isType(integer): - self.Error('begin expression of a for statement must have integer type') - self.Consume('to') - end = self.parseExpression() - if not end.typ.isType(integer): - self.Error('end expression of a for statement must have integer type') - if self.hasConsumed('by'): - increment, typ = self.parseConstExpression() - if not typ.isType(integer): - self.Error('Increment must be integer') - else: - increment = 1 - assert(type(increment) is int) - self.Consume('do') - statements = self.parseStatementSequence() - self.Consume('end') - return self.setLocation(ForStatement(variable, begin, end, increment, statements), loc) - - def parseAsmcode(self): - # TODO: move this to seperate file - def parseOpcode(): - return self.Consume('ID') - def parseOperand(): - if self.hasConsumed('['): - memref = [] - memref.append(parseOperand()) - self.Consume(']') - return memref - else: - if self.token.typ == 'NUMBER': - return self.Consume('NUMBER') - else: - ID = self.Consume('ID') - if self.cst.has(Variable, ID): - return self.cst.get(Variable, ID) - else: - return ID - - def parseOperands(n): - operands = [] - if n > 0: - operands.append( parseOperand() ) - n = n - 1 - while n > 0: - self.Consume(',') - operands.append(parseOperand()) - n = n - 1 - return operands - self.Consume('asm') - asmcode = [] - while self.token.typ != 'end': - opcode = parseOpcode() - func, numargs = assembler.opcodes[opcode] - operands = parseOperands(numargs) - asmcode.append( (opcode, operands) ) - #print('opcode', opcode, operands) - self.Consume('end') - return AsmCode(asmcode) - - def parseStatement(self): - try: - # Determine statement type based on the pending token: - if self.token.typ == 'if': - return self.parseIfStatement() - elif self.token.typ == 'case': - return self.parseCaseStatement() - elif self.token.typ == 'while': - return self.parseWhileStatement() - elif self.token.typ == 'repeat': - return self.parseRepeatStatement() - elif self.token.typ == 'for': - return self.parseForStatement() - elif self.token.typ == 'asm': - return self.parseAsmcode() - elif self.token.typ == 'ID': - # Assignment or procedure call - designator = self.parseDesignator() - if self.token.typ == '(' and type(designator.typ) is ProcedureType: - return self.parseProcedureCall(designator) - elif self.token.typ == ':=': - return self.parseAssignment(designator) - else: - self.Error('Unknown statement following designator: {0}'.format(self.token)) - else: - # TODO: return empty statement??: - return EmptyStatement() - self.Error('Unknown statement {0}'.format(self.token)) - except CompilerException as e: - print(e) - self.errorlist.append( (e.row, e.col, e.msg)) - # Do error recovery by skipping all tokens until next ; or end - while not (self.token.typ == ';' or self.token.typ == 'end'): - self.Consume(self.token.typ) - return EmptyStatement() - - def parseStatementSequence(self): - """ Sequence of statements seperated by ';' """ - statements = [ self.parseStatement() ] - while self.hasConsumed(';'): - statements.append( self.parseStatement() ) - return StatementSequence( statements ) - - # Parsing expressions: - """ - grammar of expressions: - expression = SimpleExpression [ reloperator SimpleExpression ] - reloperator = '=' | '<=' | '>=' | '<>' - Simpleexpression = [ '+' | '-' ] term { addoperator term } - addoperator = '+' | '-' | 'or' - term = factor { muloperator factor } - muloperator = '*' | '/' | 'div' | 'mod' | 'and' - factor = number | nil | true | false | "(" expression ")" | - designator [ actualparameters ] | 'not' factor - """ - def parseExpression(self): - """ The connector between the boolean and expression domain """ - expr = self.parseSimpleExpression() - if self.token.typ in ['>=','<=','<','>','<>','=']: - relop = self.Consume() - expr2 = self.parseSimpleExpression() - # Automatic type convert to reals: - if isType(expr.typ, real) and isType(expr2.typ, integer): - expr2 = Unop(expr2, 'INTTOREAL', real) - if isType(expr2.typ, real) and isType(expr.typ, integer): - expr = Unop(expr, 'INTTOREAL', real) - # Type check: - if not isType(expr.typ, expr2.typ): - self.Error('Type mismatch in relop') - if isType(expr.typ, real) and relop in ['<>', '=']: - self.Error('Cannot check real values for equality') - - expr = Relop(expr, relop, expr2, boolean) - return expr - - # Parsing arithmatic expressions: - def parseTerm(self): - a = self.parseFactor() - while self.token.typ in ['*', '/', 'mod', 'div', 'and']: - loc = self.getLocation() - op = self.Consume() - b = self.parseTerm() - # Type determination and checking: - if op in ['mod', 'div']: - if not isType(a.typ, integer): - self.Error('First operand should be integer, not {0}'.format(a.typ)) - if not isType(b.typ, integer): - self.Error('Second operand should be integer, not {0}'.format(b.typ)) - typ = integer - elif op == '*': - if isType(a.typ, integer) and isType(b.typ, integer): - typ = integer - elif isType(a.typ, real) or isType(b.typ, real): - if isType(a.typ, integer): - # Automatic type cast - a = Unop(a, 'INTTOREAL', real) - if isType(b.typ, integer): - b = Unop(b, 'INTTOREAL', real) - if not isType(a.typ, real): - self.Error('first operand must be a real!') - if not isType(b.typ, real): - self.Error('second operand must be a real!') - typ = real - else: - self.Error('Unknown operands for multiply: {0}, {1}'.format(a, b)) - elif op == '/': - # Division always yields a real result, for integer division use div - if isType(a.typ, integer): - # Automatic type cast - a = Unop(a, 'INTTOREAL', real) - if isType(b.typ, integer): - b = Unop(b, 'INTTOREAL', real) - if not isType(a.typ, real): - self.Error('first operand must be a real!') - if not isType(b.typ, real): - self.Error('second operand must be a real!') - typ = real - elif op == 'and': - if not isType(a.typ, boolean): - self.Error('First operand of and must be boolean') - if not isType(b.typ, boolean): - self.Error('Second operand of and must be boolean') - typ = boolean - else: - self.Error('Unknown operand {0}'.format(op)) - - a = self.setLocation(Binop(a, op, b, typ), loc) - return a - - def parseFactor(self): - if self.hasConsumed('('): - e = self.parseExpression() - self.Consume(')') - return e - elif self.token.typ == 'NUMBER': - loc = self.getLocation() - val = self.Consume('NUMBER') - return self.setLocation(Constant(val, integer), loc) - elif self.token.typ == 'REAL': - loc = self.getLocation() - val = self.Consume('REAL') - return self.setLocation(Constant(val, real), loc) - elif self.token.typ == 'CHAR': - val = self.Consume('CHAR') - return Constant(val, char) - elif self.token.typ == 'STRING': - txt = self.Consume('STRING') - return StringConstant(txt) - elif self.token.typ in ['true', 'false']: - val = self.Consume() - val = True if val == 'true' else False - return Constant(val, boolean) - elif self.hasConsumed('nil'): - return Constant(0, NilType()) - elif self.hasConsumed('not'): - f = self.parseFactor() - if not isType(f.typ, boolean): - self.Error('argument of boolean negation must be boolean type') - return Unop(f, 'not', boolean) - elif self.token.typ == 'ID': - designator = self.parseDesignator() - # TODO: handle functions different here? - if self.token.typ == '(' and type(designator.typ) is ProcedureType: - return self.parseProcedureCall(designator) - else: - return designator - else: - self.Error('Expected NUMBER, ID or ( expr ), got'+str(self.token)) - - def parseSimpleExpression(self): - """ Arithmatic expression """ - if self.token.typ in ['+', '-']: - # Handle the unary minus - op = self.Consume() - a = self.parseTerm() - typ = a.typ - if not isType(typ,real) and not isType(typ, integer): - self.Error('Unary minus or plus can be only applied to real or integers') - if op == '-': - a = Unop(a, op, typ) - else: - a = self.parseTerm() - while self.token.typ in ['+', '-', 'or']: - loc = self.getLocation() - op = self.Consume() - b = self.parseTerm() - if op in ['+', '-']: - if isType(a.typ, real) or isType(b.typ, real): - typ = real - if isType(a.typ, integer): - # Automatic type cast - a = Unop(a, 'INTTOREAL', real) - if not isType(a.typ, real): - self.Error('first operand must be a real!') - if isType(b.typ, integer): - b = Unop(b, 'INTTOREAL', real) - if not isType(b.typ, real): - self.Error('second operand must be a real!') - elif isType(a.typ, integer) and isType(b.typ, integer): - typ = integer - else: - self.Error('Invalid types {0} and {1}'.format(a.typ, b.typ)) - elif op == 'or': - if not isType(a.typ, boolean): - self.Error('first operand must be boolean for or operation') - if not isType(b.typ, boolean): - self.Error('second operand must be boolean for or operation') - typ = boolean - else: - self.Error('Unknown operand {0}'.format(op)) - a = self.setLocation(Binop(a, op, b, typ), loc) - return a -
--- a/ide/compiler/symboltable.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -from .nodes import * -from .errors import Error - -class SymbolTable: - """ - Symbol table for a current scope. - It has functions: - - hasname for checking for a name in current scope or above - - addSymbol to add an object - """ - def __init__(self, parent=None): - self.parent = parent - self.syms = {} - - def __repr__(self): - return 'Symboltable with {0} symbols\n'.format(len(self.syms)) - - def printTable(self, indent=0): - for name in self.syms: - print(self.syms[name]) - - def getAllLocal(self, cls): - """ Get all local objects of a specific type """ - r = [] - for key in self.syms.keys(): - sym = self.syms[key] - if issubclass(type(sym), cls): - r.append(sym) - return r - - def getLocal(self, cls, name): - if name in self.syms.keys(): - sym = self.syms[name] - if isinstance(sym, cls): - return sym - else: - Error('Wrong type found') - else: - Error('Symbol not found') - - # Retrieving of specific classes of items: - def get(self, cls, name): - if self.hasSymbol(name): - sym = self.getSymbol(name) - if issubclass(type(sym), cls): - return sym - raise SymbolException('type {0} undefined'.format(typename)) - - def has(self, cls, name): - if self.hasSymbol(name): - sym = self.getSymbol(name) - if issubclass(type(sym), cls): - return True - return False - - # Adding and retrieving of symbols in general: - def addSymbol(self, sym): - if sym.name in self.syms.keys(): - raise Exception('Symbol "{0}" redefined'.format(sym.name)) - else: - self.syms[sym.name] = sym - - def getSymbol(self, name): - if name in self.syms.keys(): - return self.syms[name] - else: - if self.parent: - return self.parent.getSymbol(name) - else: - Error('Symbol "{0}" undeclared!'.format(name)) - - def hasSymbol(self, name): - if name in self.syms.keys(): - return True - else: - if self.parent: - return self.parent.hasSymbol(name) - else: - return False -
--- a/ide/ide/__init__.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -# Package -
--- a/ide/ide/astviewer.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -from PyQt4.QtCore import * -from PyQt4.QtGui import * - -def astToNamedElement(astNode, parentNode): - """ Helper to convert and AST tree to NamedElement tree: """ - item = QStandardItem(str(astNode)) - item.setData(astNode) - parentNode.appendRow(item) - for c in astNode.getChildren(): - astToNamedElement(c, item) - -# The actual widget: -class AstViewer(QTreeView): - sigNodeSelected = pyqtSignal(object) - def __init__(self, parent=None): - super(AstViewer, self).__init__(parent) - self.setHeaderHidden(True) - self.clicked.connect(self.selectHandler) - - def setAst(self, ast): - """ Create a new model and add all ast elements to it """ - model = QStandardItemModel() - if ast: - astToNamedElement(ast, model.invisibleRootItem()) - self.setModel( model ) - self.expandAll() - - def selectHandler(self, index): - if not index.isValid(): - return - model = self.model() - item = model.itemFromIndex(index) - node = item.data() - self.sigNodeSelected.emit(node) - -
--- a/ide/ide/codeeditor.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,140 +0,0 @@ -from PyQt4.QtCore import * -from PyQt4.QtGui import * -import compiler.lexer -import os.path - -class MySyntaxHighlighter(QSyntaxHighlighter): - def __init__(self, parent=None): - super(MySyntaxHighlighter, self).__init__(parent) - # Syntax highlighting: - self.rules = [] - fmt = QTextCharFormat() - fmt.setForeground(Qt.darkBlue) - fmt.setFontWeight(QFont.Bold) - for kw in compiler.lexer.keywords: - pattern = '\\b'+kw+'\\b' - self.rules.append( (pattern, fmt) ) - - # Comments: - fmt = QTextCharFormat() - fmt.setForeground(Qt.gray) - fmt.setFontItalic(True) - pattern = '\{.*\}' - self.rules.append( (pattern, fmt) ) - - # Procedure: - fmt = QTextCharFormat() - fmt.setForeground(Qt.blue) - fmt.setFontItalic(True) - #pattern = '(?<=procedure )[A-Za-z]' - # TODO lookbehind does not work, think something else - #self.rules.append( (pattern, fmt) ) - - def highlightBlock(self, text): - for pattern, fmt in self.rules: - expression = QRegExp(pattern) - index = expression.indexIn(text) - while index >= 0: - length = expression.matchedLength() - self.setFormat(index, length, fmt) - index = expression.indexIn(text, index + length) - -class LineNumberArea(QWidget): - def __init__(self, codeedit): - super(LineNumberArea, self).__init__(codeedit) - self.codeedit = codeedit - # TODO: display error in this: self.setToolTip('hello world') - def sizeHint(self): - return QSize(self.codeedit.lineNumberAreaWidth(), 0) - def paintEvent(self, ev): - self.codeedit.lineNumberAreaPaintEvent(ev) - -class CodeEdit(QPlainTextEdit): - def __init__(self, parent=None): - super(CodeEdit, self).__init__(parent) - # members: - self.isUntitled = True - self.filename = None - self.setFont(QFont('Courier')) - self.lineNumberArea = LineNumberArea(self) - - self.blockCountChanged.connect(self.updateLineNumberAreaWidth) - self.updateRequest.connect(self.updateLineNumberArea) - - # Syntax highlighter: - self.highlighter = MySyntaxHighlighter(self.document()) - - def setFileName(self, filename): - self.filename = filename - self.isUntitled = False - self.setWindowTitle(filename) - def setSource(self, source): - self.setPlainText(source) - - def save(self): - pass - def saveAs(self): - pass - - def saveFile(self): - if self.isUntitled: - self.saveAs() - else: - source = str(self.toPlainText()) - f = open(self.filename, 'w') - f.write(source) - f.close() - - def highlightErrorLocation(self, row, col): - tc = QTextCursor(self.document()) - tc.clearSelection() - tc.movePosition(tc.Down, tc.MoveAnchor, row - 1) - tc.movePosition(tc.Right, tc.MoveAnchor, col - 1) - tc.movePosition(tc.NextCharacter, tc.KeepAnchor) # Select 1 character - selection = QTextEdit.ExtraSelection() - lineColor = QColor(Qt.red).lighter(160) - selection.format.setBackground(lineColor) - #selection.format.setProperty(QTextFormat.FullWidthSelection, True) - selection.cursor = tc - self.setExtraSelections( [ selection ] ) - def clearErrors(self): - self.setExtraSelections( [ ] ) - - def lineNumberAreaWidth(self): - digits = 1 - mx = max(1, self.blockCount()) - while mx >= 10: - mx = mx / 10 - digits += 1 - space = 3 + self.fontMetrics().width('8') * digits - return space - def lineNumberAreaPaintEvent(self, ev): - painter = QPainter(self.lineNumberArea) - painter.fillRect(ev.rect(), Qt.lightGray) - block = self.firstVisibleBlock() - blockNumber = block.blockNumber() - top = self.blockBoundingGeometry(block).translated(self.contentOffset()).top() - bottom = top + self.blockBoundingRect(block).height() - while block.isValid() and top <= ev.rect().bottom(): - if block.isVisible() and bottom >= ev.rect().top(): - num = str(blockNumber + 1) - painter.setPen(Qt.black) - painter.drawText(0, top, self.lineNumberArea.width(), self.fontMetrics().height(), Qt.AlignRight, num) - block = block.next() - top = bottom - bottom = top + self.blockBoundingRect(block).height() - blockNumber += 1 - def resizeEvent(self, ev): - super(CodeEdit, self).resizeEvent(ev) - cr = self.contentsRect() - self.lineNumberArea.setGeometry(QRect(cr.left(), cr.top(), self.lineNumberAreaWidth(), cr.height() )) - def updateLineNumberAreaWidth(self, newBlockCount): - self.setViewportMargins(self.lineNumberAreaWidth(), 0, 0, 0) - def updateLineNumberArea(self, rect, dy): - if dy > 0: - self.lineNumberArea.scroll(0, dy) - else: - self.lineNumberArea.update(0, rect.y(), self.lineNumberArea.width(), rect.height()) - if rect.contains(self.viewport().rect()): - self.updateLineNumberAreaWidth(0) -
--- a/ide/ide/ide.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,309 +0,0 @@ -from PyQt4.QtCore import * -from PyQt4.QtGui import * -# ide components: -from .codeeditor import CodeEdit -from .astviewer import AstViewer -import base64 -from project import Project -import os.path - -lcfospng = base64.decodestring(b'iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A\n/wD/oL2nkwAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9sJEhMKBk7B678AAAA/SURBVFjD\n7dbBCQAgDATBi9h/y7EFA4Kf2QLCwH1S6XQu6sqoujublc8BAAAAAAAAAAB8B+zXT6YJAAAAAKYd\nWSgFQNUyijIAAAAASUVORK5CYII=\n') - -class BuildOutput(QTextEdit): - """ Build output component """ - def __init__(self, parent=None): - super(BuildOutput, self).__init__(parent) - self.setCurrentFont(QFont('Courier')) - self.setReadOnly(True) - self.append('Build output will appear here!') - -class BuildErrors(QListView): - sigErrorSelected = pyqtSignal(object) - def __init__(self, parent=None): - super(BuildErrors, self).__init__(parent) - model = QStandardItemModel() - self.setModel(model) - self.clicked.connect(self.itemSelected) - def setErrorList(self, errorlist): - model = QStandardItemModel() - for e in errorlist: - row, col, msg = e - item = QStandardItem(str(msg)) - item.setData(e) - model.appendRow(item) - self.setModel(model) - def itemSelected(self, index): - if not index.isValid(): - return - model = self.model() - item = model.itemFromIndex(index) - err = item.data() - self.sigErrorSelected.emit(err) - -class ProjectView(QWidget): - sigLoadFile = pyqtSignal(str) - def __init__(self, parent=None): - super(ProjectView, self).__init__(parent) - self.treeview = QTreeView(self) - self.treeview.setContextMenuPolicy(Qt.CustomContextMenu) - l = QVBoxLayout(self) - l.addWidget(self.treeview) - pm = QPixmap() - pm.loadFromData(lcfospng) - self.projectIcon = QIcon(pm) - # Connect signals: - self.treeview.activated.connect(self.activate) - self.treeview.customContextMenuRequested.connect(self.contextMenu) - def setProject(self, project): - self.project = project - model = QStandardItemModel() - root = model.invisibleRootItem() - pitem = QStandardItem(self.projectIcon, project.name) - pitem.setEditable(False) - pitem.setData(project) - root.appendRow(pitem) - for f in self.project.files: - fitem = QStandardItem(f) - pitem.appendRow(fitem) - fitem.setEditable(False) - fitem.setData(f) - self.treeview.setModel(model) - self.treeview.expandAll() - def contextMenu(self, pos): - idx = self.treeview.indexAt(pos) - if not idx.isValid(): - return - item = self.treeview.model().itemFromIndex(idx) - def activate(self, index): - if not index.isValid(): - return - model = self.treeview.model() - item = model.itemFromIndex(index) - fn = item.data() - if type(fn) is str: - self.sigLoadFile.emit(fn) - -class AboutDialog(QDialog): - def __init__(self, parent=None): - super(AboutDialog, self).__init__(parent) - self.setWindowTitle('About') - l = QVBoxLayout(self) - txt = QTextEdit(self) - txt.setReadOnly(True) - aboutText = """<h1>lcfOS IDE</h1> - <p>An all-in-one IDE for OS development.</p> - <p>https://www.assembla.com/spaces/lcfOS/wiki</p> - <p>Author: Windel Bouwman</p> - """ - txt.append(aboutText) - l.addWidget(txt) - but = QPushButton('OK') - but.setDefault(True) - but.clicked.connect(self.close) - l.addWidget(but) - -class ProjectOptions(QDialog): - pass - # TODO: project options in here - -class Ide(QMainWindow): - def __init__(self, parent=None): - super(Ide, self).__init__(parent) - self.setWindowTitle('LCFOS IDE') - icon = QPixmap() - icon.loadFromData(lcfospng) - self.setWindowIcon(QIcon(icon)) - - # Create menus: - self.fileMenu = self.menuBar().addMenu('File') - self.viewMenu = self.menuBar().addMenu('View') - self.projectMenu = self.menuBar().addMenu('Project') - self.helpMenu = self.menuBar().addMenu('Help') - - # Create mdi area: - self.mdiArea = QMdiArea() - self.setCentralWidget(self.mdiArea) - - # Create components: - self.buildOutput = BuildOutput() - self.addComponent('Build output', self.buildOutput) - - self.astViewer = AstViewer() - self.addComponent('AST viewer', self.astViewer) - self.astViewer.sigNodeSelected.connect(self.nodeSelected) - - self.builderrors = BuildErrors() - self.addComponent('Build errors', self.builderrors) - self.builderrors.sigErrorSelected.connect(self.errorSelected) - - self.projectview = ProjectView() - self.addComponent('Project', self.projectview) - self.projectview.sigLoadFile.connect(self.loadFile) - - # About dialog: - self.aboutDialog = AboutDialog() - self.aboutDialog.setWindowIcon(QIcon(icon)) - # Create actions: - self.buildAction = QAction('Build!', self) - self.buildAction.setShortcut(QKeySequence('F7')) - self.projectMenu.addAction(self.buildAction) - self.buildAction.triggered.connect(self.buildFile) - self.openProjectAction = QAction("Open project", self) - self.openProjectAction.triggered.connect(self.openProject) - self.projectMenu.addAction(self.openProjectAction) - self.helpAction = QAction('Help', self) - self.helpAction.setShortcut(QKeySequence('F1')) - self.helpMenu.addAction(self.helpAction) - self.aboutAction = QAction('About', self) - self.helpMenu.addAction(self.aboutAction) - self.aboutAction.triggered.connect(self.aboutDialog.open) - - self.newFileAction = QAction("New File", self) - self.fileMenu.addAction(self.newFileAction) - self.newFileAction.triggered.connect(self.newFile) - self.saveFileAction = QAction("Save File", self) - self.fileMenu.addAction(self.saveFileAction) - self.saveFileAction.triggered.connect(self.saveFile) - self.closeFileAction = QAction("Close File", self) - self.fileMenu.addAction(self.closeFileAction) - self.closeFileAction.triggered.connect(self.closeFile) - - cascadeAction = QAction("Cascade windows", self) - cascadeAction.triggered.connect(self.mdiArea.cascadeSubWindows) - self.viewMenu.addAction(cascadeAction) - tileAction = QAction('Tile windows', self) - tileAction.triggered.connect(self.mdiArea.tileSubWindows) - self.viewMenu.addAction(tileAction) - - # Load settings: - self.settings = QSettings('windelsoft', 'lcfoside') - self.loadSettings() - - def addComponent(self, name, widget): - dw = QDockWidget(name) - dw.setWidget(widget) - dw.setObjectName(name) - self.addDockWidget(Qt.RightDockWidgetArea, dw) - self.viewMenu.addAction(dw.toggleViewAction()) - - # File handling: - def newFile(self): - ce = CodeEdit() - w = self.mdiArea.addSubWindow(ce) - ce.show() - - def saveFile(self): - ac = self.activeMdiChild() - if ac: - ac.saveFile() - - def saveAll(self): - pass - - def openFile(self): - # TODO - pass - - def closeFile(self): - ac = self.activeMdiChild() - if ac: - self.mdiArea.removeSubWindow(ac) - - def loadFile(self, filename): - # Find existing mdi widget: - wid = self.findMdiChild(filename) - if wid: - self.mdiArea.setActiveSubWindow(wid.parent()) - return wid - - # Create a new one: - ce = CodeEdit() - source = self.project.loadProjectFile(filename) - ce.setSource(source) - self.mdiArea.addSubWindow(ce) - ce.show() - return ce - - # MDI: - def activeMdiChild(self): - aw = self.mdiArea.activeSubWindow() - if aw: - return aw.widget() - else: - return None - - def findMdiChild(self, filename): - for window in self.mdiArea.subWindowList(): - wid = window.widget() - if wid.filename == filename: - return wid - return None - - def allChildren(self): - c = [] - for window in self.mdiArea.subWindowList(): - wid = window.widget() - c.append(wid) - return c - - # Settings: - def loadSettings(self): - if self.settings.contains('mainwindowstate'): - self.restoreState(self.settings.value('mainwindowstate')) - if self.settings.contains('mainwindowgeometry'): - self.restoreGeometry(self.settings.value('mainwindowgeometry')) - if self.settings.contains('openedproject'): - projectfile = self.settings.value('openedproject') - self.loadProject(projectfile) - - def closeEvent(self, ev): - self.settings.setValue('mainwindowstate', self.saveState()) - self.settings.setValue('mainwindowgeometry', self.saveGeometry()) - if self.project: - self.settings.setValue('openedproject', self.project.filename) - # TODO: ask for save of opened files - ev.accept() - - # Error handling: - def nodeSelected(self, node): - ce = self.activeMdiChild() - if not ce: - return - if node.location: - row, col = node.location - ce.highlightErrorLocation( row, col ) - else: - ce.clearErrors() - - def errorSelected(self, err): - row, col, msg = err - ce = self.activeMdiChild() - if not ce: - return - ce.highlightErrorLocation(row, col) - - # Project loading: - def loadProject(self, filename): - self.project = Project(filename) - self.projectview.setProject(self.project) - - def openProject(self): - filename = QFileDialog.getOpenFileName(self, \ - "Choose project file", "", "lcfos Project files (*.lcp)") - if filename: - self.loadProject(filename) - - # Build recepy: - def buildFile(self): - """ Build project """ - self.saveAll() - self.buildOutput.clear() - self.buildOutput.append(str(self.compiler)) - mods = self.compiler.compileProject(self.project) - - self.builderrors.setErrorList(self.compiler.errorlist) - self.astViewer.setAst(mods[0]) - for err in self.compiler.errorlist: - self.buildOutput.append(str(err)) - self.buildOutput.append("Done!") -
--- a/ide/project.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ -""" - Project that can be stored to and loaded from XML. -""" - -from xml.sax import ContentHandler, make_parser -import xml.dom.minidom as md -import os.path - -class ProjectContentHandler(ContentHandler): - def __init__(self, project): - self.project = project - self.inFiles = False - def startElement(self, name, attrs): - if name == 'Project': - self.project.name = attrs['name'] - if name == 'Files': - self.inFiles = True - if name == 'File' and self.inFiles: - self.project.files.append(attrs['Filename']) - def endElement(self, name): - if name == 'Files': - self.inFiles = False - -class ProjectFile: - def __init__(self, filename): - self.filename = filename - -class Project: - def __init__(self, filename, isnew=False): - self.name = "" - self.files = [] - self.settings = {} - self.filename = filename - - if not isnew: - """ Load the project from the XML file """ - parser = make_parser() - handler = ProjectContentHandler(self) - parser.setContentHandler(handler) - parser.parse(filename) - - def save(self, filename): - """ Save the project in XML format """ - # Create document: - doc = md.Document() - # Add project: - project = doc.createElement("Project") - project.setAttribute("name", self.name) - doc.appendChild(project) - - # Add project files: - filesNode = doc.createElement("Files") - project.appendChild(filesNode) - for f in self.files: - fe = doc.createElement("File") - fe.setAttribute("Filename", f) - filesNode.appendChild(fe) - - # Write the XML file: - xml = doc.toprettyxml() - with open(filename, 'w') as f: - f.write(xml) - - def loadProjectFile(self, filename): - basedir = os.path.dirname(self.filename) - filename = os.path.join(basedir, filename) - with open(filename, 'r') as f: - src = f.read() - return src -
--- a/ide/runbuild.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -import sys - -# Compiler imports: -from compiler.compiler import Compiler -from project import Project - -if __name__ == '__main__': - if len(sys.argv) < 2: - print('Use {0} projectfile'.format(sys.argv[0])) - sys.exit(-1) - filename = sys.argv[1] - project = Project() - project.load(filename) - pc = Compiler() - pc.compileProject(project) -
--- a/ide/runide.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -import sys -from PyQt4.QtGui import QApplication - -# Compiler imports: -from compiler.compiler import Compiler -from ide.ide import Ide - -if __name__ == '__main__': - app = QApplication(sys.argv) - ide = Ide() - ide.compiler = Compiler() - ide.show() - app.exec_() -
--- a/ide/runtests.py Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,272 +0,0 @@ -import unittest -import os - -from compiler.compiler import Compiler -from compiler.errors import CompilerException, printError -from compiler import lexer -from compiler.parser import Parser -from compiler import assembler -from compiler.codegenerator import CodeGenerator -from project import Project - -class CompilerTestCase(unittest.TestCase): - """ test methods start with 'test*' """ - def testSource1(self): - source = """ - module lcfos; - var - a : integer; - - procedure putchar(num : integer); - begin - end putchar; - - procedure WriteNum( num: integer); - var - d, base : integer; - dgt : integer; - begin - d := 1; - base := 10; - while num div d >= base do - d := d * base - end; - while d <> 0 do - dgt := num div d; - num := num mod d; - d := d div base; - putchar(48 + dgt) - end - end WriteNum; - - begin - a := 1; - while a < 26 - do - putchar(65+a); - a := a * 2 - end; - end lcfos. - """ - pc = Compiler() - pc.compilesource(source) - def testSource2(self): - source = """ - module lcfos; - var - a, b : integer; - arr: array 30 of integer; - arr2: array 10, 12 of integer; - procedure t2*() : integer; - begin - a := 2; - while a < 5 do - b := arr[a-1] + arr[a-2]; - arr2[a,2] := b; - arr2[a,3] := arr2[a,2] + arr2[a,2]*3 + b; - arr[a] := b; - a := a + 1; - end; - return b - end t2; - begin - b := 12; - arr[0] := 1; - arr[1] := 1; - end lcfos. - """ - pc = Compiler() - mod = pc.compilesource(source) - def testSource5(self): - source = """ - module lcfos; - procedure WriteLn() : integer; - const zzz = 13; - var - a, b, c: integer; - begin - a := 2; - b := 7; - c := 10 * a + b*10*a; - return c - end WriteLn; - begin end lcfos. - """ - pc = Compiler() - pc.compilesource(source) - def tstForStatement(self): - source = """ - module fortest; - var - a,b,c : integer; - begin - c := 0; - for a := 1 to 10 by 1 do - b := a + 15; - c := c + b * a; - end; - end fortest. - """ - pc = Compiler() - pc.compilesource(source) - def testSourceIfAndWhilePattern(self): - source = """ - module lcfos; - procedure WriteLn() : integer; - const zzz = 13; - var - a, b, c: integer; - begin - a := 1; - b := 2; - if a * 3 > b then - c := 10*a + b*10*a*a*a*b; - else - c := 13; - end; - while a < 101 do - a := a + 1; - c := c + 2; - end; - return c - end WriteLn; - begin end lcfos. - """ - pc = Compiler() - pc.compilesource(source) - - def testPattern1(self): - """ Test if expression can be compiled into byte code """ - src = "12*13+33-12*2*3" - tokens = lexer.tokenize(src) - ast = Parser(tokens).parseExpression() - code = CodeGenerator().genexprcode(ast) - - def testAssembler(self): - """ Check all kind of assembler cases """ - assert(assembler.shortjump(5) == [0xeb, 0x5]) - assert(assembler.shortjump(-2) == [0xeb, 0xfc]) - assert(assembler.shortjump(10,'GE') == [0x7d, 0xa]) - assert(assembler.nearjump(5) == [0xe9, 0x5,0x0,0x0,0x0]) - assert(assembler.nearjump(-2) == [0xe9, 0xf9, 0xff,0xff,0xff]) - assert(assembler.nearjump(10,'LE') == [0x0f, 0x8e, 0xa,0x0,0x0,0x0]) - - def testCall(self): - assert(assembler.call('r10') == [0x41, 0xff, 0xd2]) - assert(assembler.call('rcx') == [0xff, 0xd1]) - def testXOR(self): - assert(assembler.xorreg64('rax', 'rax') == [0x48, 0x31, 0xc0]) - assert(assembler.xorreg64('r9', 'r8') == [0x4d, 0x31, 0xc1]) - assert(assembler.xorreg64('rbx', 'r11') == [0x4c, 0x31, 0xdb]) - - def testINC(self): - assert(assembler.increg64('r11') == [0x49, 0xff, 0xc3]) - assert(assembler.increg64('rcx') == [0x48, 0xff, 0xc1]) - - def testPush(self): - assert(assembler.push('rbp') == [0x55]) - assert(assembler.push('rbx') == [0x53]) - assert(assembler.push('r12') == [0x41, 0x54]) - def testPop(self): - assert(assembler.pop('rbx') == [0x5b]) - assert(assembler.pop('rbp') == [0x5d]) - assert(assembler.pop('r12') == [0x41, 0x5c]) - - def testAsmLoads(self): - # TODO constant add testcases - assert(assembler.mov('rbx', 'r14') == [0x4c, 0x89, 0xf3]) - assert(assembler.mov('r12', 'r8') == [0x4d, 0x89, 0xc4]) - assert(assembler.mov('rdi', 'rsp') == [0x48, 0x89, 0xe7]) - - def testAsmMemLoads(self): - assert(assembler.mov('rax', ['r8','r15',0x11]) == [0x4b,0x8b,0x44,0x38,0x11]) - assert(assembler.mov('r13', ['rbp','rcx',0x23]) == [0x4c,0x8b,0x6c,0xd,0x23]) - - assert(assembler.mov('r9', ['rbp',-0x33]) == [0x4c,0x8b,0x4d,0xcd]) - #assert(assembler.movreg64('rbx', ['rax']) == [0x48, 0x8b,0x18]) - - assert(assembler.mov('rax', [0xb000]) == [0x48,0x8b,0x4,0x25,0x0,0xb0,0x0,0x0]) - assert(assembler.mov('r11', [0xa0]) == [0x4c,0x8b,0x1c,0x25,0xa0,0x0,0x0,0x0]) - - assert(assembler.mov('r11', ['RIP', 0xf]) == [0x4c,0x8b,0x1d,0x0f,0x0,0x0,0x0]) - - def testAsmMemStores(self): - assert(assembler.mov(['rbp', 0x13],'rbx') == [0x48,0x89,0x5d,0x13]) - assert(assembler.mov(['r12', 0x12],'r9') == [0x4d,0x89,0x4c,0x24,0x12]) - assert(assembler.mov(['rcx', 0x11],'r14') == [0x4c,0x89,0x71,0x11]) - - - assert(assembler.mov([0xab], 'rbx') == [0x48,0x89,0x1c,0x25,0xab,0x0,0x0,0x0]) - assert(assembler.mov([0xcd], 'r13') == [0x4c,0x89,0x2c,0x25,0xcd,0x0,0x0,0x0]) - - assert(assembler.mov(['RIP', 0xf], 'r9') == [0x4c,0x89,0x0d,0x0f,0x0,0x0,0x0]) - - def testAsmMOV8(self): - assert(assembler.mov(['rbp', -8], 'al') == [0x88, 0x45, 0xf8]) - assert(assembler.mov(['r11', 9], 'cl') == [0x41, 0x88, 0x4b, 0x09]) - - assert(assembler.mov(['rbx'], 'al') == [0x88, 0x03]) - assert(assembler.mov(['r11'], 'dl') == [0x41, 0x88, 0x13]) - - def testAsmLea(self): - assert(assembler.leareg64('r11', ['RIP', 0xf]) == [0x4c,0x8d,0x1d,0x0f,0x0,0x0,0x0]) - assert(assembler.leareg64('rsi', ['RIP', 0x7]) == [0x48,0x8d,0x35,0x07,0x0,0x0,0x0]) - - assert(assembler.leareg64('rcx', ['rbp', -8]) == [0x48,0x8d,0x4d,0xf8]) - - def testAssemblerCMP(self): - assert(assembler.cmpreg64('rdi', 'r13') == [0x4c, 0x39, 0xef]) - assert(assembler.cmpreg64('rbx', 'r14') == [0x4c, 0x39, 0xf3]) - assert(assembler.cmpreg64('r12', 'r9') == [0x4d, 0x39, 0xcc]) - - assert(assembler.cmpreg64('rdi', 1) == [0x48, 0x83, 0xff, 0x01]) - assert(assembler.cmpreg64('r11', 2) == [0x49, 0x83, 0xfb, 0x02]) - def testAssemblerADD(self): - assert(assembler.addreg64('rbx', 'r13') == [0x4c, 0x01, 0xeb]) - assert(assembler.addreg64('rax', 'rbx') == [0x48, 0x01, 0xd8]) - assert(assembler.addreg64('r12', 'r13') == [0x4d, 0x01, 0xec]) - - assert(assembler.addreg64('rbx', 0x13) == [0x48, 0x83, 0xc3, 0x13]) - assert(assembler.addreg64('r11', 0x1234567) == [0x49, 0x81, 0xc3, 0x67, 0x45,0x23,0x1]) - assert(assembler.addreg64('rsp', 0x33) == [0x48, 0x83, 0xc4, 0x33]) - - def testAssemblerSUB(self): - assert(assembler.subreg64('rdx', 'r14') == [0x4c, 0x29, 0xf2]) - assert(assembler.subreg64('r15', 'rbx') == [0x49, 0x29, 0xdf]) - assert(assembler.subreg64('r8', 'r9') == [0x4d, 0x29, 0xc8]) - - assert(assembler.subreg64('rsp', 0x123456) == [0x48, 0x81, 0xec, 0x56,0x34,0x12,0x0]) - assert(assembler.subreg64('rsp', 0x12) == [0x48, 0x83, 0xec, 0x12]) - - def testAssemblerIDIV(self): - assert(assembler.idivreg64('r11') == [0x49, 0xf7, 0xfb]) - assert(assembler.idivreg64('rcx') == [0x48, 0xf7, 0xf9]) - assert(assembler.idivreg64('rsp') == [0x48, 0xf7, 0xfc]) - - def testAssemblerIMUL(self): - assert(assembler.imulreg64_rax('rdi') == [0x48, 0xf7, 0xef]) - assert(assembler.imulreg64_rax('r10') == [0x49, 0xf7, 0xea]) - assert(assembler.imulreg64_rax('rdx') == [0x48, 0xf7, 0xea]) - - assert(assembler.imulreg64('r11', 'rdi') == [0x4c, 0xf, 0xaf, 0xdf]) - assert(assembler.imulreg64('r12', 'rbx') == [0x4c, 0xf, 0xaf, 0xe3]) - # nasm generates this machine code: 0x4d, 0x6b, 0xff, 0xee - # This also works: 4D0FAFFE (another variant?? ) - assert(assembler.imulreg64('r15', 'r14') == [0x4d, 0x0f, 0xaf, 0xfe]) - def testProject(self): - p = Project('test.xml', isnew=True) - p.name = "Test project" - p.files.append('main.mod') - p.files.append('test.mod') - p.save('test.xml') - - q = Project('test.xml') - - assert(p.name == q.name) - assert(p.files == q.files) - # TODO: remove test.xml test file - os.remove('test.xml') - -if __name__ == '__main__': - unittest.main() -
--- a/ide/testproject/main.mod Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -module main; -var - a,b,c : integer; - -procedure test(x:integer); - var y,z:integer; -begin - y := x * 3 + 2; - z := x + y + a; -end test; - -procedure add(a:integer; b:integer):integer; - var - tmp : integer; - begin - tmp := a + b; - return tmp - end add; - -begin - a := 12; - b := a * 12 + 33; - c := a div b + a * b * 99; -end main. -
--- a/ide/testproject/test.lcp Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -<?xml version="1.0" ?> -<Project name="Test project"> - <Files> - <File Filename="main.mod"/> - <File Filename="test.mod"/> - </Files> -</Project>
--- a/ide/testproject/test.mod Fri Feb 03 18:38:44 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -module test; -var - a,b,c : integer; - -procedure test(x:integer); - var y,z:integer; -begin - y := x * 3 + 2; - z := x + y + a; -end test; - -procedure add(a:integer; b:integer):integer; - var - tmp : integer; - begin - tmp := a + b; - return tmp - end add; - -begin - a := 12; - b := a * 12 + 33; - c := a div b + a * b * 99; -end test. -