# HG changeset patch # User windel # Date 1316365229 -7200 # Node ID 92df07bc20817b57135dc11ccfc602818d0016db # Parent 1a4faf9ef1ea9b30e8a07bb0ed134624ec900fe0 Initial import of compiler diff -r 1a4faf9ef1ea -r 92df07bc2081 LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,25 @@ +Copyright 2011 The lcfOS Project. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are +permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list + of conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE LCFOS PROJECT ''AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE LCFOS PROJECT OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those of the +authors and should not be interpreted as representing official policies, either expressed +or implied, of the lcfOS Project. \ No newline at end of file diff -r 1a4faf9ef1ea -r 92df07bc2081 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,12 @@ += Software required = + * python3 + * pyqt4 +Optional: + * bochs + * nasm + += How to start the IDE = + +$ cd ide +$ python runide.py + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/__init__.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,2 @@ +# File to make this directory a package. + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/assembler.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/assembler.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,354 @@ +""" + Assembler code generation functions +""" + +from .errors import Error + +modrm = {'rax': 0, 'rbx': 1} + +# Table 3.1 of the intel manual: +# use REX.W on the table below: +regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7} +regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7} +regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7} + +# Calculation of the rexb bit: +rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1} + +# Helper functions: +def imm64(x): + """ represent 64 bits integer in little endian 8 bytes""" + if x < 0: + x = x + (1 << 64) + x = x & 0xFFFFFFFFFFFFFFFF + return [ (x >> (p*8)) & 0xFF for p in range(8) ] + +def imm32(x): + """ represent 32 bits integer in little endian 4 bytes""" + if x < 0: + x = x + (1 << 32) + x = x & 0xFFFFFFFF + return [ (x >> (p*8)) & 0xFF for p in range(4) ] + +def imm8(x): + if x < 0: + x = x + (1 << 8) + x = x & 0xFF + return [ x ] + +def modrm(mod=0, rm=0, reg=0): + """ Construct the modrm byte from its components """ + assert(mod <= 3) + assert(rm <= 7) + assert(reg <= 7) + return (mod << 6) | (reg << 3) | rm + +def rex(w=0, r=0, x=0, b=0): + """ Create a REX prefix byte """ + assert(w <= 1) + assert(r <= 1) + assert(x <= 1) + assert(b <= 1) + return 0x40 | (w<<3) | (r<<2) | (x<<1) | b + +def sib(ss=0, index=0, base=0): + assert(ss <= 3) + assert(index <= 7) + assert(base <= 7) + return (ss << 6) | (index << 3) | base + +tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} + +# Actual instructions: +def nearjump(distance, condition=None): + """ jmp imm32 """ + lim = (1<<30) + if abs(distance) > lim: + Error('near jump cannot jump over more than {0} bytes'.format(lim)) + if condition: + if distance < 0: + distance -= 6 # Skip own instruction + opcode = 0x80 | tttn[condition] # Jcc imm32 + return [0x0F, opcode] + imm32(distance) + else: + if distance < 0: + distance -= 5 # Skip own instruction + return [ 0xE9 ] + imm32(distance) + +def shortjump(distance, condition=None): + """ jmp imm8 """ + lim = 118 + if abs(distance) > lim: + Error('short jump cannot jump over more than {0} bytes'.format(lim)) + if distance < 0: + distance -= 2 # Skip own instruction + if condition: + opcode = 0x70 | tttn[condition] # Jcc rel8 + else: + opcode = 0xeb # jmp rel8 + return [opcode] + imm8(distance) + +# Helper that determines jump type: +def reljump(distance): + if abs(distance) < 110: + return shortjump(distance) + else: + return nearjump(distance) + +def push(reg): + if reg in regs64: + if rexbit[reg] == 1: + return [0x41, 0x50 + regs64[reg]] + else: + return [0x50 + regs64[reg]] + else: + Error('push for {0} not implemented'.format(reg)) + +def pop(reg): + if reg in regs64: + if rexbit[reg] == 1: + rexprefix = rex(b=1) + opcode = 0x58 + regs64[reg] + return [rexprefix, opcode] + else: + opcode = 0x58 + regs64[reg] + return [ opcode ] + else: + Error('pop for {0} not implemented'.format(reg)) + +def INT(number): + opcode = 0xcd + return [opcode] + imm8(number) + +def syscall(): + return [0x0F, 0x05] + +def call(distance): + if type(distance) is int: + return [0xe8]+imm32(distance) + elif type(distance) is str and distance in regs64: + reg = distance + opcode = 0xFF # 0xFF /2 == call r/m64 + mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) + if rexbit[reg] == 1: + rexprefix = rex(b=rexbit[reg]) + return [rexprefix, opcode, mod_rm] + else: + return [opcode, mod_rm] + else: + Error('Cannot call to {0}'.format(distance)) + +def ret(): + return [ 0xc3 ] + +def increg64(reg): + assert(reg in regs64) + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xff + mod_rm = modrm(mod=3, rm=regs64[reg]) + return [rexprefix, opcode, mod_rm] + +def prepost8(r8, rm8): + assert(r8 in regs8) + pre = [] + if type(rm8) is list: + # TODO: merge mem access with prepost for 64 bits + if len(rm8) == 1: + base, = rm8 + if type(base) is str and base in regs64: + assert(not base in ['rbp', 'rsp', 'r12', 'r13']) + mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) + if rexbit[base] == 1: + pre.append(rex(b=1)) + post = [mod_rm] + else: + Error('One arg of type {0} not implemented'.format(base)) + elif len(rm8) == 2: + base, offset = rm8 + assert(type(offset) is int) + assert(base in regs64) + + if base == 'rsp' or base == 'r12': + Error('Cannot use rsp or r12 as base yet') + if rexbit[base] == 1: + pre.append( rex(b=1) ) + mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) + post = [mod_rm] + imm8(offset) + else: + Error('not supporting prepost8 with list len {0}'.format(len(rm8))) + else: + Error('Not supporting move with reg8 {0}'.format(r8)) + return pre, post + +def prepost(r64, rm64): + assert(r64 in regs64) + if type(rm64) is list: + if len(rm64) == 3: + base, index, disp = rm64 + assert(base in regs64) + assert(index in regs64) + assert(type(disp) is int) + # Assert that no special cases are used: + # TODO: swap base and index to avoid special cases + # TODO: exploit special cases and make better code + assert(index != 'rsp') + + rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) + # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 + mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) + si_b = sib(ss=0, index=regs64[index], base=regs64[base]) + return [rexprefix], [mod_rm, si_b] + imm8(disp) + elif len(rm64) == 2: + base, offset = rm64 + assert(type(offset) is int) + if base == 'RIP': + # RIP pointer relative addressing mode! + rexprefix = rex(w=1, r=rexbit[r64]) + mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) + return [rexprefix], [mod_rm] + imm32(offset) + else: + assert(base in regs64) + + if base == 'rsp' or base == 'r12': + # extended function that uses SIB byte + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) + # rm=4 indicates a SIB byte follows + mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) + # index=4 indicates that index is not used + si_b = sib(ss=0, index=4, base=regs64[base]) + return [rexprefix], [mod_rm, si_b] + imm8(offset) + else: + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) + mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) + return [rexprefix], [mod_rm] + imm8(offset) + elif len(rm64) == 1: + offset = rm64[0] + if type(offset) is int: + rexprefix = rex(w=1, r=rexbit[r64]) + mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) + si_b = sib(ss=0, index=4,base=5) # 0x25 + return [rexprefix], [mod_rm, si_b] + imm32(offset) + else: + Error('Memory reference of type {0} not implemented'.format(offset)) + else: + Error('Memory reference not implemented') + elif rm64 in regs64: + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) + mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) + return [rexprefix], [mod_rm] + +def leareg64(rega, m): + opcode = 0x8d # lea r64, m + pre, post = prepost(rega, m) + return pre + [opcode] + post + +def mov(rega, regb): + if type(regb) is int: + pre = [rex(w=1, b=rexbit[rega])] + opcode = 0xb8 + regs64[rega] + post = imm64(regb) + elif type(regb) is str: + if regb in regs64: + opcode = 0x89 # mov r/m64, r64 + pre, post = prepost(regb, rega) + elif regb in regs8: + opcode = 0x88 # mov r/m8, r8 + pre, post = prepost8(regb, rega) + else: + Error('Unknown register {0}'.format(regb)) + elif type(rega) is str: + if rega in regs64: + opcode = 0x8b # mov r64, r/m64 + pre, post = prepost(rega, regb) + else: + Error('Unknown register {0}'.format(rega)) + else: + Error('Move of this kind {0}, {1} not implemented'.format(rega, regb)) + return pre + [opcode] + post + +def xorreg64(rega, regb): + rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega]) + opcode = 0x31 # XOR r/m64, r64 + # Alternative is 0x33 XOR r64, r/m64 + mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb]) + return [rexprefix, opcode, mod_rm] + +# integer arithmatic: +def addreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x01 # ADD r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + if regb < 100: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # add r/m, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=0) + return [rexprefix, opcode, mod_rm]+imm8(regb) + elif regb < (1<<31): + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x81 # add r/m64, imm32 + mod_rm = modrm(3, rm=regs64[rega], reg=0) + return [rexprefix, opcode, mod_rm]+imm32(regb) + else: + Error('Constant value too large!') + else: + Error('unknown second operand!'.format(regb)) + +def subreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x29 # SUB r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + if regb < 100: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # sub r/m, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=5) + return [rexprefix, opcode, mod_rm]+imm8(regb) + elif regb < (1<<31): + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x81 # sub r/m64, imm32 + mod_rm = modrm(3, rm=regs64[rega], reg=5) + return [rexprefix, opcode, mod_rm]+imm32(regb) + else: + Error('Constant value too large!') + + else: + Error('unknown second operand!'.format(regb)) + +def idivreg64(reg): + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xf7 # IDIV r/m64 + mod_rm = modrm(3, rm=regs64[reg], reg=7) + return [rexprefix, opcode, mod_rm] + +def imulreg64_rax(reg): + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xf7 # IMUL r/m64 + mod_rm = modrm(3, rm=regs64[reg], reg=5) + return [rexprefix, opcode, mod_rm] + +def imulreg64(rega, regb): + pre, post = prepost(rega, regb) + opcode = 0x0f # IMUL r64, r/m64 + opcode2 = 0xaf + return pre + [opcode, opcode2] + post + +def cmpreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x39 # CMP r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # CMP r/m64, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=7) + return [rexprefix, opcode, mod_rm] + imm8(regb) + + else: + Error('not implemented cmp64') + +# Mapping that maps string names to the right functions: +opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)} + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/builtin.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/builtin.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,10 @@ +from .nodes import * + +boolean = BaseType('boolean', 8) # Choose: 1 or 8 bytes? +integer = BaseType('integer', 8) +real = BaseType('real', 8) +char = BaseType('char', 1) +void = BaseType('void', 0) + +chr_func = BuiltinProcedure('chr', ProcedureType([Parameter('value', 'x', integer)], char)) + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/codegenerator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/codegenerator.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,479 @@ +""" + Code generation for 64 bits intel processors +""" + +from .nodes import * +from .errors import Error +from .builtin import real, integer, boolean, char +from .assembler import * + +class CodeGenerator: + def __init__(self): + self.strings = [] + self.initialize() + def initialize(self): + # Register descriptors: + self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',') + self.usedregs = [] + # Members to accumulate the result into: + # The result is an image of bytecode and global variable space. + # Global variables a referenced by RIP relative addressing. + self.image = [] + self.rip = 0 # The current instruction pointer location. + # TODO: backpatch list here? + + # Functions to modify the code image + def addCode(self, code): + assert(type(code) is list) + self.image += code + self.rip += len(code) + def fixCode(self, position, code): + self.image[position:position+len(code)] = code + def align(self, b): + while (self.rip % b) != 0: + self.addCode([0]) + + def saveAllRegisters(self): + regs = list(self.usedregs.keys()) + for reg in regs: + code += self.saveRegister(reg) + + def saveRegister(self, reg): + code = [] + if reg in self.usedregs.keys(): + code.append('mov {0}, {1}'.format(self.usedregs[reg], reg)) + del self.usedregs[reg] + self.freeregs.append(reg) + + def getreg(self, node): + """ acquire a working register for a certain node.""" + # Temporary register bypass action: + if len(self.freeregs) > 0: + reg = self.freeregs.pop(0) + self.usedregs.append(reg) + else: + Error('No more free regs') + node.reg = reg + + def freereg(self, node): + reg = node.reg + node.reg = None + self.freeregs.append(reg) + self.usedregs.remove(reg) + + # Helpers to load and retrieve designated objects: + def storeRegInDesignator(self, reg, designator): + assert(type(reg) is str) + assert(type(designator) is Designator) + if len(designator.selectors) > 0: + self.gencode( designator ) # Load the pointer into some register + self.addCode( mov([designator.reg, 0x0], reg) ) + self.freereg( designator ) + else: + if designator.obj.isLocal: + # Relative from rbp register + mem = ['rbp', designator.obj.offset] + self.addCode( mov(mem, reg) ) + else: + # Relative from RIP after move + self.addCode( mov(['RIP', 0x0], reg) ) + self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) ) + + # Code generation functions: + def genexprcode(self, node): + """ + Generate code for expressions! + Recursively evaluates, and ensures a register contains the answer. + register is an integer register or a floating point reg + """ + if isinstance(node, Binop): + """ Handle a binary operation (two arguments) of some kind """ + self.genexprcode(node.a) + self.genexprcode(node.b) + + if node.op == 'mod': + assert(node.typ.isType(integer)) + self.addCode(mov('rax', node.a.reg)) + self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros + self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg + node.reg = node.a.reg + self.freereg(node.b) # give up register that contains b + self.addCode(mov(node.reg, 'rdx')) # move remainder into result + elif node.op == 'div': + assert(node.typ.isType(integer)) + self.addCode(mov('rax', node.a.reg)) + self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros + self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg + node.reg = node.a.reg + self.freereg(node.b) # give up register that contains b + self.addCode(mov(node.reg, 'rax')) # move result into reg + elif node.op == '*': + if node.typ.isType(integer): + self.addCode(imulreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for * not implemented'.format(node.typ)) + elif node.op == '+': + if node.typ.isType(integer): + self.addCode(addreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for + not implemented'.format(node.typ)) + elif node.op == '-': + if node.typ.isType(integer): + self.addCode(subreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for - not implemented'.format(node.typ)) + else: + Error('Unknown Binop {0}'.format(node.op)) + + elif type(node) is Unop: + if node.op == 'INTTOREAL': + self.genexprcode(node.a) + node.reg = node.a.reg + # TODO use 'FILD' instruction + freg = 12 + code.append('Unop inttoreal TODO') + elif node.op == 'ABS': + if isType(node.typ, real): + code = [0xD9, 0xE1] # st(0) = fabs st(0) + Error('ABS error integer') + elif isType(node.typ, integer): + code = [] + Error('ABS error integer') + else: + Error('ABS error') + else: + Error('Unknown Unop {0}'.format(node.op)) + + elif isinstance(node, Designator): + # dereference, array index. Make sure that the result comes into a register + if len(node.selectors) > 0: + self.gencode(node) # Load the pointer into some register + + # Now we can access the object at location '[node.reg]': + if node.typ.isType(integer): + self.addCode( mov(node.reg, [node.reg, 0x0]) ) + else: + Error('Only integer types implemented') + else: + # No selectors, load variable directly + if node.obj.typ.isType(integer): + if type(node.obj) is Constant: + self.genexprcode(node.obj) + node.reg = node.obj.reg + else: + self.getreg(node) + # Get a register to store the integer value + if node.obj.isLocal: + # relative to rbp: + self.addCode( mov(node.reg, ['rbp', node.obj.offset]) ) + else: + self.addCode(mov(node.reg, ['RIP', 0x0])) + self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip)) + else: + Error('Cannot load variable type {0}'.format(node.typ)) + + elif isinstance(node, Relop): + # Create a boolean from operands + # TODO create an alternative for expressions used as conditions. + self.genexprcode(node.a) + self.genexprcode(node.b) + + if node.a.typ.isType(integer): + instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'} + if not node.relop in instructions.keys(): + Error('Unimplemented relop: '+str(node.relop)) + instr = instructions[node.relop] + + node.reg = node.a.reg + self.addCode( cmpreg64(node.a.reg, node.b.reg) ) + self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp + fixloc1 = self.rip - 1 + rip1 = self.rip + self.addCode( xorreg64(node.reg, node.reg) ) + self.addCode( shortjump(0x0) ) # Jump over 1 code + fixloc2 = self.rip - 1 + self.fixCode(fixloc1, imm8(self.rip - rip1)) + rip2 = self.rip + self.addCode( xorreg64(node.reg, node.reg) ) + self.addCode( increg64(node.reg) ) + self.fixCode(fixloc2, imm8(self.rip - rip2)) + + self.freereg(node.b) + else: + Error('Relop not implemented for {0}'.format(node.a.typ)) + + elif type(node) is Constant: + if node.typ.isType(integer): + self.getreg(node) + self.addCode(mov(node.reg, node.value)) + elif node.typ.isType(real): + code += self.getreg(node) + Error('TODO: get real reg') + # TODO: get a fixed point reg, and load the variable in there + else: + Error('Howto generate code for {0}?'.format(node)) + + elif type(node) is ProcedureCall: + if type(node.proc.obj) is BuiltinProcedure: + # Handle builtin procedures different, these not always call + # a function, but generate code. + bi = node.proc.obj + if bi.name == 'chr': + arg = node.args[0] + self.genexprcode(arg) + # Store character in full width register: + # TODO: store in char only register + node.reg = arg.reg + else: + Error('Unknown builtin function {0}'.format(bi.name)) + else: + # Use generic procedure call first + self.gencode(node) + # Retrieve result: + if node.typ.isType(integer): + # Store result! + self.getreg(node) + self.addCode( mov(node.reg, 'rax') ) + else: + Error('Return type not supported {0}'.format(node.typ)) + else: + Error('Cannot generate expression code for: {0}'.format(node)) + + def gencode(self, node): + """ Code generation function for AST nodes """ + if isinstance(node, Module): + # for all imports make a list of pointer to the actual procedures: + for imp in node.imports: + imp.offset = self.rip + self.addCode( [0x0]*8 ) + # global variable storage allocation + variables = node.symtable.getAllLocal(Variable) + for var in variables: + var.isLocal = False + var.offset = self.rip + self.addCode( [0x00] * var.typ.size ) # TODO initial values here? + self.align(8) + # TODO: mark end of data and start of code inside image + # TODO: round data to page size to enable protection by loader. + # Procedure code generation: + procedures = node.symtable.getAllLocal(Procedure) + node.procs = procedures + for proc in procedures: + self.gencode(proc) + # Module init code: + node.initcodeentry = self.rip + self.gencode(node.initcode) + self.addCode( ret() ) + # TODO: how to return from module init code? far return?? + + elif type(node) is Procedure: + # calculate offsets for local variables and parameters + # Variable location relative to 'rbp' register + variables = node.symtable.getAllLocal(Variable) + offset = 0 + paramoffset = 16 + for var in variables: + var.isLocal = True + if not var.isParameter: + offset += var.typ.size + # Offset is negative of rbp in stack frame + var.offset = -offset + node.framesize = offset + # Calculate offsets of parameters relative to rbp register + for par in reversed(node.typ.parameters): + pvar = node.symtable.getLocal(Variable, par.name) + pvar.offset = paramoffset + paramoffset += pvar.typ.size + + # code generation + node.entrypoint = self.rip + self.addCode(push('rbp')) + self.addCode(mov('rbp', 'rsp')) # Setup the base pointer + self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals + self.gencode(node.block) + if node.retexpr: + if node.retexpr.typ.isType(integer): + self.genexprcode(node.retexpr) + self.addCode( mov('rax', node.retexpr.reg) ) + self.freereg(node.retexpr) + else: + Error('Cannot return this kind yet {0}'.format(node.retexpr.typ)) + self.addCode( addreg64('rsp', node.framesize) ) + self.addCode( pop('rbp') ) + self.addCode( ret() ) + assert(len(self.usedregs) == 0) + + elif isinstance(node, StatementSequence): + for s in node.statements: + self.gencode(s) + assert(len(self.usedregs) == 0) + + elif type(node) is ProcedureCall: + # Prepare parameters on the stack: + stacksize = 0 + assert(len(node.args) == len(node.proc.typ.parameters)) + for arg, param in zip(node.args, node.proc.typ.parameters): + + if param.kind == 'value': + self.genexprcode(arg) + self.addCode( push(arg.reg) ) + self.freereg( arg ) + stacksize += 8 + else: + Error('Parameter kind other than value') + + # Calculate address using designator + if type(node.proc.obj) is Procedure: + self.addCode( call(0x0) ) + self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip)) + elif type(node.proc.obj) is ImportedSymbol: + # Load the entry point of the import table + self.getreg(node.proc.obj) + # Load the address of the procedure: + self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) ) + self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) ) + # Call to the address in register: + self.addCode( call(node.proc.obj.reg) ) + # Free register that holds the address of the object + self.freereg( node.proc.obj ) + elif type(node.proc.obj) is BuiltinProcedure: + if node.proc.obj.name == 'chr': + print('int to char') + else: + Error('Unknown builtin function {0}'.format(node.proc.obj.name)) + else: + Error('Cannot call designator of type {0}'.format(node.proc.obj)) + + # Restore stack (pop all arguments of): + self.addCode(addreg64('rsp', stacksize)) + + elif type(node) is Assignment: + if node.lval.typ.isType(integer): + # TODO if node.rval is Constant of some datatype, move it to mem directly + self.genexprcode(node.rval) # Calculate the value that has to be stored. + self.storeRegInDesignator(node.rval.reg, node.lval) + self.freereg(node.rval) + else: + Error('Assignments of other types not implemented') + # TODO if left and right are designators, do some sort of memcpy. + + elif type(node) is IfStatement: + self.genexprcode(node.condition) + self.addCode( cmpreg64(node.condition.reg, 1) ) + self.freereg(node.condition) + if node.falsestatement: + # If with else clause + self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false + rip1 = self.rip + fixloc1 = self.rip - 4 + self.gencode(node.truestatement) + self.addCode( nearjump( 0x0 ) ) # jump over false code + fixloc2 = self.rip - 4 + self.fixCode(fixloc1, imm32(self.rip - rip1)) + rip2 = self.rip + self.gencode(node.falsestatement) + self.fixCode(fixloc2, imm32(self.rip - rip2)) + else: + # If without else clause + self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false + rip1 = self.rip + fixloc1 = self.rip - 4 + self.gencode(node.truestatement) + self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code. + + elif isinstance(node, WhileStatement): + rip1 = self.rip # Store the start of the while loop + self.genexprcode(node.condition) + self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness + self.freereg(node.condition) + self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later) + fixloc1 = self.rip - 4 + rip2 = self.rip + self.gencode(node.dostatements) + self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below + fixloc2 = self.rip - 4 + rip3 = self.rip # end of while loop + self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop + self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop + + elif type(node) is ForStatement: + # Initial load of iterator variable: + self.genexprcode(node.begin) + self.storeRegInDesignator(node.begin.reg, node.variable) + self.freereg(node.begin) + rip1 = self.rip + self.gencode(node.statements) + #self.loadDesignatorInReg(node. + #self.addCode( addreg64(node.variable, node.increment) ) + Error('No implementation of FOR statement') + + elif type(node) is AsmCode: + def processOperand(op): + if type(op) is list: + if type(op[0]) is Variable: + var = op[0] + if var.isLocal: + return ['rbp', var.offset] + else: + Error('Can only use local variables in inline assembler') + return op + for asmline in node.asmcode: + opcode, operands = asmline + operands = [processOperand(opx) for opx in operands] + print('assembling', opcode, *operands) + func,nargs = opcodes[opcode] + code = func(*operands) + self.addCode(code) + + elif isinstance(node, EmptyStatement): + pass + + + elif type(node) is StringConstant: + self.strings.append(node) + self.data.append(node.value) # Add string to the data section + + elif type(node) is Designator: + if len(node.selectors) > 0: + self.getreg(node) + # Load starting address + if node.obj.isLocal: + self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) ) + else: + # Global variables need to be relocated... + self.addCode(leareg64(node.reg, ['RIP', 0])) + self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip)) + # Loop over all designators.. + for selector in node.selectors: + if type(selector) is Index: + # Deref an array index + self.genexprcode(selector.index) + self.getreg(selector) + self.addCode( mov(selector.reg, selector.typ.elementType.size) ) + self.addCode( imulreg64(selector.reg, selector.index.reg ) ) + self.freereg(selector.index) + self.addCode(addreg64(node.reg, selector.reg)) + self.freereg(selector) + elif type(selector) is Field: + print('Field') + Error('Field not implemented') + else: + Error('Unknown selector') + else: + Error('Can only gencode for designator with selectors') + + else: + print('not generating code for {0}'.format(node)) + + def generatecode(self, ast): + """ code generation front end """ + self.initialize() + self.gencode(ast) + ast.image = self.image + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/compiler.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/compiler.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,33 @@ +import hashlib +# Import compiler components: +from . import lexer +from . import parser +from .codegenerator import CodeGenerator +from .nodes import ExportedSymbol + +class Compiler: + versie = '0.9.3' + + def __repr__(self): + return 'LCFOS compiler {0}'.format(self.versie) + + def generateSignature(self, src): + return hashlib.md5(bytes(src,encoding='ascii')).hexdigest() + + def compilesource(self, src): + """ Front end that handles the stages: """ + tokens = lexer.tokenize(src) # Lexical stage + ast = parser.Parser(tokens).parseModule() # Parse a module + CodeGenerator().generatecode(ast) + # Attach a signature: + ast.signature = self.generateSignature(src) + # Generate exported symbols: + ast.exports = [] + for proc in ast.procs: + if proc.public: + sym = ExportedSymbol(proc.name, proc.typ) + sym.imageoffset = proc.entrypoint + ast.exports.append(sym) + + return ast + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/display.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/display.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,21 @@ +from .nodes import * + +def printNode(node, indent=0): + """ + Print visitor + all printing goes in here + """ + print(' '*indent+str(node)) + if type(node) is Procedure: + print(' '*indent+' PARAMETERS:') + for p in node.parameters: + printNode(p, indent+4) + if node.block: + print(' '*indent+' CODE:') + printNode(node.block, indent+4) + elif type(node) is Module: + print(node.symtable) + printNode(node.initcode, indent+2) + else: + for c in node.getChildren(): + printNode(c, indent+2) diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/errors.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/errors.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,41 @@ +""" Error handling routines """ + +class CompilerException(Exception): + def __init__(self, msg, row=0, col=0): + self.msg = msg + self.row = row + self.col = col + def __repr__(self): + return self.msg + def __str__(self): + return self.msg + +def Error(msg, node=None): + if node is None: + raise CompilerException(msg) + else: + raise CompilerException(msg, node.row, node.col) + +def printError(source, e): + def printLine(row, txt): + print(str(row)+':'+txt) + if e.row == 0: + print('Error: {0}'.format(e.msg)) + else: + lines = source.split('\n') + prerow = e.row - 3 + if prerow < 1: + prerow = 1 + afterrow = e.row + 3 + if afterrow > len(lines): + afterrow = len(lines) + + # print preceding source lines: + for r in range(prerow, e.row): + printLine(r, lines[r-1]) + # print source line containing error: + printLine(e.row, lines[e.row-1]) + print(' '*(len(str(e.row)+':')+e.col-1) + '^ Error: {0}'.format(e.msg)) + # print trailing source line: + for r in range(e.row+1, afterrow+1): + printLine(r, lines[r-1]) diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/lexer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/lexer.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,71 @@ +import collections +import re +from .errors import CompilerException + +""" + Lexical analyzer part. Splits the input character stream into tokens. +""" + +# Token is used in the lexical analyzer: +Token = collections.namedtuple('Token', 'typ val row col') + +keywords = ['and', 'array', 'begin', 'by', 'case', 'const', 'div', 'do', \ + 'else', 'elsif', 'end', 'false', 'for', 'if', 'import', 'in', 'is', \ + 'mod', 'module', 'nil', 'not', 'of', 'or', 'pointer', 'procedure', \ + 'record', 'repeat', 'return', 'then', 'to', 'true', 'type', 'until', 'var', \ + 'while', 'asm' ] + +def tokenize(s): + """ + Tokenizer, generates an iterator that + returns tokens! + + This GREAT example was taken from python re doc page! + """ + tok_spec = [ + ('REAL', r'\d+\.\d+'), + ('HEXNUMBER', r'0x[\da-fA-F]+'), + ('NUMBER', r'\d+'), + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('NEWLINE', r'\n'), + ('SKIP', r'[ \t]'), + ('COMMENTS', r'{.*}'), + ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), + ('STRING', r"'.*?'") + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'NEWLINE': + line_start = pos + line += 1 + elif typ == 'COMMENTS': + pass + elif typ != 'SKIP': + if typ == 'ID': + if val in keywords: + typ = val + elif typ == 'LEESTEKEN': + typ = val + elif typ == 'NUMBER': + val = int(val) + elif typ == 'HEXNUMBER': + val = int(val[2:], 16) + typ = 'NUMBER' + elif typ == 'REAL': + val = float(val) + elif typ == 'STRING': + val = val[1:-1] + yield Token(typ, val, line, mo.start()-line_start) + pos = mo.end() + mo = gettok(s, pos) + if pos != len(s): + col = pos - line_start + raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) + yield Token('END', '', line, 0) + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/modules.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/modules.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,193 @@ +import struct +from .errors import Error +from .nodes import * +from .builtin import integer, real, char, boolean, void +import os.path + +""" + File format for compiled modules. + * [11] magic identifier + * [STR] mod name + * [STR] signature, a md5 signature of the module. + * [I32] size of code + * code image + * [I32] entrypoint for initcode + * imported modules + ** [I32] num of imported modules + *** [STR] name of module + *** signature of the module + *** [I32] offset in the process image where the interface symbols must be placed + * public interface + ** [I32] num of interface elements + *** [STR] proc name + *** [I32] offset in code image + *** [type] return type + *** [I32] number of parameters + **** parameter + ***** parameter kind + ***** parameter name + ***** parameter type +""" + +MAGIC = b'LCFOSMODC' + +loadedModules = [] + +def loadModule(modname): + """ returns a Module object specified by a name """ + # Check if the module was already loaded: + for mod in loadedModules: + if mod.name == modname: + return mod + + # Try to load the module from file: + srcfilename = modname + '.mod' + binfilename = modname + '.bin' + sourceExists = os.path.exists(srcfilename) + if os.path.exists(binfilename): + if sourceExists: + compileModule() + else: + return loadModuleFromFile(binfilename) + else: + Error("Cannot load module '{0}'!".format(modname)) + +def loadModuleFromFile(filename): + f = open(filename, 'rb') + magic = f.read(len(MAGIC)) + assert(magic == MAGIC) + + # Helper functions: + def readI32(): + int32, = struct.unpack(' 0: + operands.append( parseOperand() ) + n = n - 1 + while n > 0: + self.Consume(',') + operands.append(parseOperand()) + n = n - 1 + return operands + self.Consume('asm') + asmcode = [] + while self.token.typ != 'end': + opcode = parseOpcode() + func, numargs = assembler.opcodes[opcode] + operands = parseOperands(numargs) + asmcode.append( (opcode, operands) ) + #print('opcode', opcode, operands) + self.Consume('end') + return AsmCode(asmcode) + + def parseStatement(self): + # Determine statement type based on the pending token: + if self.token.typ == 'if': + return self.parseIfStatement() + elif self.token.typ == 'case': + return self.parseCaseStatement() + elif self.token.typ == 'while': + return self.parseWhileStatement() + elif self.token.typ == 'repeat': + return self.parseRepeatStatement() + elif self.token.typ == 'for': + return self.parseForStatement() + elif self.token.typ == 'asm': + return self.parseAsmcode() + elif self.token.typ == 'ID': + # Assignment or procedure call + designator = self.parseDesignator() + if self.token.typ == '(' and type(designator.typ) is ProcedureType: + return self.parseProcedureCall(designator) + elif self.token.typ == ':=': + return self.parseAssignment(designator) + else: + self.Error('Unknown statement following designator: {0}'.format(self.token)) + else: + # TODO: return empty statement??: + return EmptyStatement() + self.Error('Unknown statement {0}'.format(self.token)) + + def parseStatementSequence(self): + """ Sequence of statements seperated by ';' """ + statements = [ self.parseStatement() ] + while self.hasConsumed(';'): + statements.append( self.parseStatement() ) + return StatementSequence( statements ) + + # Parsing expressions: + """ + grammar of expressions: + expression = SimpleExpression [ reloperator SimpleExpression ] + reloperator = '=' | '<=' | '>=' | '<>' + Simpleexpression = [ '+' | '-' ] term { addoperator term } + addoperator = '+' | '-' | 'or' + term = factor { muloperator factor } + muloperator = '*' | '/' | 'div' | 'mod' | 'and' + factor = number | nil | true | false | "(" expression ")" | + designator [ actualparameters ] | 'not' factor + """ + def parseExpression(self): + """ The connector between the boolean and expression domain """ + expr = self.parseSimpleExpression() + if self.token.typ in ['>=','<=','<','>','<>','=']: + relop = self.Consume() + expr2 = self.parseSimpleExpression() + # Automatic type convert to reals: + if isType(expr.typ, real) and isType(expr2.typ, integer): + expr2 = Unop(expr2, 'INTTOREAL', real) + if isType(expr2.typ, real) and isType(expr.typ, integer): + expr = Unop(expr, 'INTTOREAL', real) + # Type check: + if not isType(expr.typ, expr2.typ): + self.Error('Type mismatch in relop') + if isType(expr.typ, real) and relop in ['<>', '=']: + self.Error('Cannot check real values for equality') + + expr = Relop(expr, relop, expr2, boolean) + return expr + + # Parsing arithmatic expressions: + def parseTerm(self): + a = self.parseFactor() + while self.token.typ in ['*', '/', 'mod', 'div', 'and']: + op = self.Consume() + b = self.parseTerm() + # Type determination and checking: + if op in ['mod', 'div']: + if not isType(a.typ, integer): + self.Error('First operand should be integer, not {0}'.format(a.typ)) + if not isType(b.typ, integer): + self.Error('Second operand should be integer, not {0}'.format(b.typ)) + typ = integer + elif op == '*': + if isType(a.typ, integer) and isType(b.typ, integer): + typ = integer + elif isType(a.typ, real) or isType(b.typ, real): + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if not isType(b.typ, real): + self.Error('second operand must be a real!') + typ = real + else: + self.Error('Unknown operands for multiply: {0}, {1}'.format(a, b)) + elif op == '/': + # Division always yields a real result, for integer division use div + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if not isType(b.typ, real): + self.Error('second operand must be a real!') + typ = real + elif op == 'and': + if not isType(a.typ, boolean): + self.Error('First operand of and must be boolean') + if not isType(b.typ, boolean): + self.Error('Second operand of and must be boolean') + typ = boolean + else: + self.Error('Unknown operand {0}'.format(op)) + + a = Binop(a, op, b, typ) + return a + + def parseFactor(self): + if self.hasConsumed('('): + e = self.parseExpression() + self.Consume(')') + return e + elif self.token.typ == 'NUMBER': + val = self.Consume('NUMBER') + return Constant(val, integer) + elif self.token.typ == 'REAL': + val = self.Consume('REAL') + return Constant(val, real) + elif self.token.typ == 'CHAR': + val = self.Consume('CHAR') + return Constant(val, char) + elif self.token.typ == 'STRING': + txt = self.Consume('STRING') + return StringConstant(txt) + elif self.token.typ in ['true', 'false']: + val = self.Consume() + val = True if val == 'true' else False + return Constant(val, boolean) + elif self.hasConsumed('nil'): + return Constant(0, NilType()) + elif self.hasConsumed('not'): + f = self.parseFactor() + if not isType(f.typ, boolean): + self.Error('argument of boolean negation must be boolean type') + return Unop(f, 'not', boolean) + elif self.token.typ == 'ID': + designator = self.parseDesignator() + # TODO: handle functions different here? + if self.token.typ == '(' and type(designator.typ) is ProcedureType: + return self.parseProcedureCall(designator) + else: + return designator + else: + self.Error('Expected NUMBER, ID or ( expr ), got'+str(self.token)) + + def parseSimpleExpression(self): + """ Arithmatic expression """ + if self.token.typ in ['+', '-']: + # Handle the unary minus + op = self.Consume() + a = self.parseTerm() + typ = a.typ + if not isType(typ,real) and not isType(typ, integer): + self.Error('Unary minus or plus can be only applied to real or integers') + if op == '-': + a = Unop(a, op, typ) + else: + a = self.parseTerm() + while self.token.typ in ['+', '-', 'or']: + op = self.Consume() + b = self.parseTerm() + if op in ['+', '-']: + if isType(a.typ, real) or isType(b.typ, real): + typ = real + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(b.typ, real): + self.Error('second operand must be a real!') + elif isType(a.typ, integer) and isType(b.typ, integer): + typ = integer + else: + self.Error('Invalid types {0} and {1}'.format(a.typ, b.typ)) + elif op == 'or': + if not isType(a.typ, boolean): + self.Error('first operand must be boolean for or operation') + if not isType(b.typ, boolean): + self.Error('second operand must be boolean for or operation') + typ = boolean + else: + self.Error('Unknown operand {0}'.format(op)) + a = Binop(a, op, b, typ) + return a + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/compiler/symboltable.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/compiler/symboltable.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,80 @@ +from .nodes import * +from .errors import Error + +class SymbolTable: + """ + Symbol table for a current scope. + It has functions: + - hasname for checking for a name in current scope or above + - addSymbol to add an object + """ + def __init__(self, parent=None): + self.parent = parent + self.syms = {} + + def __repr__(self): + return 'Symboltable with {0} symbols\n'.format(len(self.syms)) + + def printTable(self, indent=0): + for name in self.syms: + print(self.syms[name]) + + def getAllLocal(self, cls): + """ Get all local objects of a specific type """ + r = [] + for key in self.syms.keys(): + sym = self.syms[key] + if issubclass(type(sym), cls): + r.append(sym) + return r + + def getLocal(self, cls, name): + if name in self.syms.keys(): + sym = self.syms[name] + if isinstance(sym, cls): + return sym + else: + Error('Wrong type found') + else: + Error('Symbol not found') + + # Retrieving of specific classes of items: + def get(self, cls, name): + if self.hasSymbol(name): + sym = self.getSymbol(name) + if issubclass(type(sym), cls): + return sym + raise SymbolException('type {0} undefined'.format(typename)) + + def has(self, cls, name): + if self.hasSymbol(name): + sym = self.getSymbol(name) + if issubclass(type(sym), cls): + return True + return False + + # Adding and retrieving of symbols in general: + def addSymbol(self, sym): + if sym.name in self.syms.keys(): + raise Exception('Symbol "{0}" redefined'.format(sym.name)) + else: + self.syms[sym.name] = sym + + def getSymbol(self, name): + if name in self.syms.keys(): + return self.syms[name] + else: + if self.parent: + return self.parent.getSymbol(name) + else: + Error('Symbol "{0}" undeclared!'.format(name)) + + def hasSymbol(self, name): + if name in self.syms.keys(): + return True + else: + if self.parent: + return self.parent.hasSymbol(name) + else: + return False + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/ide/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/ide/__init__.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,2 @@ +# Package + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/ide/astviewer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/ide/astviewer.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,33 @@ +from PyQt4.QtCore import * +from PyQt4.QtGui import * + +def astToNamedElement(astNode, parentNode): + """ Helper to convert and AST tree to NamedElement tree: """ + item = QStandardItem(str(astNode)) + parentNode.appendRow(item) + for c in astNode.getChildren(): + astToNamedElement(c, item) + +# The actual widget: +class AstViewer(QTreeView): + def __init__(self, parent=None): + super(AstViewer, self).__init__(parent) + self.setHeaderHidden(True) + self.clicked.connect(self.woei) + + def setAst(self, ast): + """ Create a new model and add all ast elements to it """ + model = QStandardItemModel() + if ast: + astToNamedElement(ast, model.invisibleRootItem()) + self.setModel( model ) + self.expandAll() + + def woei(self, index): + if not index.isValid(): + print('Invalid index') + return + print(index.data) + + + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/ide/codeeditor.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/ide/codeeditor.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,128 @@ +from PyQt4.QtCore import * +from PyQt4.QtGui import * +import compiler.lexer + +class MySyntaxHighlighter(QSyntaxHighlighter): + def __init__(self, parent=None): + super(MySyntaxHighlighter, self).__init__(parent) + self.rules = [] + fmt = QTextCharFormat() + fmt.setForeground(Qt.darkBlue) + fmt.setFontWeight(QFont.Bold) + for kw in compiler.lexer.keywords: + pattern = '\\b'+kw+'\\b' + self.rules.append( (pattern, fmt) ) + + # Comments: + fmt = QTextCharFormat() + fmt.setForeground(Qt.gray) + fmt.setFontItalic(True) + pattern = '\{.*\}' + self.rules.append( (pattern, fmt) ) + + # Procedure: + fmt = QTextCharFormat() + fmt.setForeground(Qt.blue) + fmt.setFontItalic(True) + #pattern = '(?<=procedure )[A-Za-z]' + # TODO lookbehind does not work, think something else + #self.rules.append( (pattern, fmt) ) + + def highlightBlock(self, text): + for pattern, fmt in self.rules: + expression = QRegExp(pattern) + index = expression.indexIn(text) + while index >= 0: + length = expression.matchedLength() + self.setFormat(index, length, fmt) + index = expression.indexIn(text, index + length) + +class LineNumberArea(QWidget): + def __init__(self, codeedit): + super(LineNumberArea, self).__init__(codeedit) + self.codeedit = codeedit + # TODO: display error in this: self.setToolTip('hello world') + def sizeHint(self): + return QSize(self.codeedit.lineNumberAreaWidth(), 0) + def paintEvent(self, ev): + self.codeedit.lineNumberAreaPaintEvent(ev) + +class CodeEdit(QPlainTextEdit): + def __init__(self, parent=None): + super(CodeEdit, self).__init__(parent) + self.filename = None + self.setFont(QFont('Courier')) + self.lineNumberArea = LineNumberArea(self) + + self.blockCountChanged.connect(self.updateLineNumberAreaWidth) + self.updateRequest.connect(self.updateLineNumberArea) + + # Syntax highlighter: + self.highlighter = MySyntaxHighlighter(self.document()) + + def loadFile(self, filename): + self.filename = filename + f = open(filename, 'r') + source = f.read() + f.close() + self.setPlainText(source) + def saveFile(self): + if self.filename: + source = str(self.toPlainText()) + f = open(self.filename, 'w') + f.write(source) + f.close() + + def highlightErrorLocation(self, row, col): + tc = QTextCursor(self.document()) + tc.clearSelection() + tc.movePosition(tc.Down, tc.MoveAnchor, row - 1) + tc.movePosition(tc.Right, tc.MoveAnchor, col - 1) + tc.movePosition(tc.NextCharacter, tc.KeepAnchor) # Select 1 character + selection = QTextEdit.ExtraSelection() + lineColor = QColor(Qt.red).lighter(160) + selection.format.setBackground(lineColor) + #selection.format.setProperty(QTextFormat.FullWidthSelection, True) + selection.cursor = tc + self.setExtraSelections( [ selection ] ) + def clearErrors(self): + self.setExtraSelections( [ ] ) + + def lineNumberAreaWidth(self): + digits = 1 + mx = max(1, self.blockCount()) + while mx >= 10: + mx = mx / 10 + digits += 1 + space = 3 + self.fontMetrics().width('8') * digits + return space + def lineNumberAreaPaintEvent(self, ev): + painter = QPainter(self.lineNumberArea) + painter.fillRect(ev.rect(), Qt.lightGray) + block = self.firstVisibleBlock() + blockNumber = block.blockNumber() + top = self.blockBoundingGeometry(block).translated(self.contentOffset()).top() + bottom = top + self.blockBoundingRect(block).height() + while block.isValid() and top <= ev.rect().bottom(): + if block.isVisible() and bottom >= ev.rect().top(): + num = str(blockNumber + 1) + painter.setPen(Qt.black) + painter.drawText(0, top, self.lineNumberArea.width(), self.fontMetrics().height(), Qt.AlignRight, num) + block = block.next() + top = bottom + bottom = top + self.blockBoundingRect(block).height() + blockNumber += 1 + def resizeEvent(self, ev): + super(CodeEdit, self).resizeEvent(ev) + cr = self.contentsRect() + self.lineNumberArea.setGeometry(QRect(cr.left(), cr.top(), self.lineNumberAreaWidth(), cr.height() )) + def updateLineNumberAreaWidth(self, newBlockCount): + self.setViewportMargins(self.lineNumberAreaWidth(), 0, 0, 0) + def updateLineNumberArea(self, rect, dy): + if dy > 0: + self.lineNumberArea.scroll(0, dy) + else: + self.lineNumberArea.update(0, rect.y(), self.lineNumberArea.width(), rect.height()) + if rect.contains(self.viewport().rect()): + self.updateLineNumberAreaWidth(0) + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/ide/ide.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/ide/ide.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,89 @@ +from PyQt4.QtCore import * +from PyQt4.QtGui import * +# ide components: +from .codeeditor import CodeEdit +from .astviewer import AstViewer + +source = """ +module x; +var + a,b,c : integer; +begin + a := 12; + b := a * 12 + 33; + c := a div b + a * b * 99; +end x. +""" + +class BuildOutput(QTextEdit): + """ Build output component """ + def __init__(self, parent=None): + super(BuildOutput, self).__init__(parent) + self.setCurrentFont(QFont('Courier')) + self.setReadOnly(True) + self.append('Build output will appear here!') + +class Ide(QMainWindow): + def __init__(self, parent=None): + super(Ide, self).__init__(parent) + self.setWindowTitle('LCFOS IDE') + + # Create menus: + self.viewMenu = self.menuBar().addMenu('View') + self.projectMenu = self.menuBar().addMenu('Project') + + # Create components: + self.codeedit = CodeEdit() + self.setCentralWidget(self.codeedit) + + self.buildOutput = BuildOutput() + self.addComponent('Build output', self.buildOutput) + + self.astViewer = AstViewer() + self.addComponent('AST viewer', self.astViewer) + + # Create actions: + self.buildAction = QAction('Build!', self) + self.buildAction.setShortcut(QKeySequence('F7')) + self.projectMenu.addAction(self.buildAction) + self.buildAction.triggered.connect(self.buildFile) + + # END of UI construction + + # Load settings: + self.settings = QSettings('windelsoft', 'lcfoside') + self.loadSettings() + + # Load example source: + self.codeedit.setPlainText(source) + + def addComponent(self, name, widget): + dw = QDockWidget(name) + dw.setWidget(widget) + dw.setObjectName(name) + self.addDockWidget(Qt.RightDockWidgetArea, dw) + self.viewMenu.addAction(dw.toggleViewAction()) + def loadSettings(self): + if self.settings.contains('mainwindowstate'): + self.restoreState(self.settings.value('mainwindowstate')) + if self.settings.contains('mainwindowgeometry'): + self.restoreGeometry(self.settings.value('mainwindowgeometry')) + def closeEvent(self, ev): + self.settings.setValue('mainwindowstate', self.saveState()) + self.settings.setValue('mainwindowgeometry', self.saveGeometry()) + self.codeedit.saveFile() + ev.accept() + def buildFile(self): + self.buildOutput.clear() + self.codeedit.clearErrors() + source = str(self.codeedit.toPlainText()) + self.buildOutput.append(str(self.compiler)) + self.astViewer.setAst(None) + try: + output = self.compiler.compilesource(source) + self.astViewer.setAst(output) + self.buildOutput.append("Done!") + except compiler.errors.CompilerException as e: + self.buildOutput.append(e.msg) + self.codeedit.highlightErrorLocation( e.row, e.col ) + diff -r 1a4faf9ef1ea -r 92df07bc2081 ide/runide.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ide/runide.py Sun Sep 18 19:00:29 2011 +0200 @@ -0,0 +1,14 @@ +import sys +from PyQt4.QtGui import QApplication + +# Compiler imports: +from compiler.compiler import Compiler +from ide.ide import Ide + +if __name__ == '__main__': + app = QApplication(sys.argv) + ide = Ide() + ide.compiler = Compiler() + ide.show() + app.exec_() +