Mercurial > lcfOS
changeset 69:60cc36ef5a50
Rework in compiler
author | windel |
---|---|
date | Sat, 27 Oct 2012 14:31:58 +0200 |
parents | 654c5ac4f2c5 |
children | 35286e8abd03 |
files | python/apps/ide.py python/libs/compiler/backends/codegenerator.py python/libs/compiler/codegenerator.py python/libs/compiler/frontends/lexer.py python/libs/compiler/lexer.py python/libs/compiler/parser.py python/libs/compiler/parsergen.py |
diffstat | 7 files changed, 1346 insertions(+), 1347 deletions(-) [+] |
line wrap: on
line diff
--- a/python/apps/ide.py Sat Oct 13 16:13:05 2012 +0200 +++ b/python/apps/ide.py Sat Oct 27 14:31:58 2012 +0200 @@ -1,11 +1,10 @@ -import sys, os +import sys, os, base64 if sys.version_info.major != 3: print("Needs to be run in python version 3.x") sys.exit(1) from PyQt4.QtCore import * from PyQt4.QtGui import * -import base64 # Compiler imports: sys.path.insert(0, os.path.join('..','libs'))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/libs/compiler/backends/codegenerator.py Sat Oct 27 14:31:58 2012 +0200 @@ -0,0 +1,487 @@ +""" + Code generation for 64 bits intel processors +""" + +from .nodes import * +from .errors import Error +from .builtin import real, integer, boolean, char +from .assembler import * + +class CodeGenerator: + def __init__(self): + self.strings = [] + self.initialize() + def initialize(self): + # Register descriptors: + self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',') + self.usedregs = [] + # Members to accumulate the result into: + # The result is an image of bytecode and global variable space. + # Global variables a referenced by RIP relative addressing. + self.image = [] + self.rip = 0 # The current instruction pointer location. + # TODO: backpatch list here? + + # Functions to modify the code image + def addCode(self, code): + assert(type(code) is list) + self.image += code + self.rip += len(code) + def fixCode(self, position, code): + self.image[position:position+len(code)] = code + def align(self, b): + while (self.rip % b) != 0: + self.addCode([0]) + + def saveAllRegisters(self): + regs = list(self.usedregs.keys()) + for reg in regs: + code += self.saveRegister(reg) + + def saveRegister(self, reg): + code = [] + if reg in self.usedregs.keys(): + code.append('mov {0}, {1}'.format(self.usedregs[reg], reg)) + del self.usedregs[reg] + self.freeregs.append(reg) + + def getreg(self, node): + """ acquire a working register for a certain node.""" + # Temporary register bypass action: + if len(self.freeregs) > 0: + reg = self.freeregs.pop(0) + self.usedregs.append(reg) + else: + Error('No more free regs') + node.reg = reg + + def freereg(self, node): + reg = node.reg + node.reg = None + self.freeregs.append(reg) + self.usedregs.remove(reg) + + # Helpers to load and retrieve designated objects: + def storeRegInDesignator(self, reg, designator): + assert(type(reg) is str) + assert(type(designator) is Designator) + if len(designator.selectors) > 0: + self.gencode( designator ) # Load the pointer into some register + self.addCode( mov([designator.reg, 0x0], reg) ) + self.freereg( designator ) + else: + if designator.obj.isLocal: + # Relative from rbp register + mem = ['rbp', designator.obj.offset] + self.addCode( mov(mem, reg) ) + else: + # Relative from RIP after move + self.addCode( mov(['RIP', 0x0], reg) ) + self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) ) + + # Code generation functions: + def genexprcode(self, node): + """ + Generate code for expressions! + Recursively evaluates, and ensures a register contains the answer. + register is an integer register or a floating point reg + """ + if isinstance(node, Binop): + """ Handle a binary operation (two arguments) of some kind """ + self.genexprcode(node.a) + self.genexprcode(node.b) + + if node.op == 'mod': + assert(node.typ.isType(integer)) + self.addCode(mov('rax', node.a.reg)) + self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros + self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg + node.reg = node.a.reg + self.freereg(node.b) # give up register that contains b + self.addCode(mov(node.reg, 'rdx')) # move remainder into result + elif node.op == 'div': + assert(node.typ.isType(integer)) + self.addCode(mov('rax', node.a.reg)) + self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros + self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg + node.reg = node.a.reg + self.freereg(node.b) # give up register that contains b + self.addCode(mov(node.reg, 'rax')) # move result into reg + elif node.op == '*': + if node.typ.isType(integer): + self.addCode(imulreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for * not implemented'.format(node.typ)) + elif node.op == '+': + if node.typ.isType(integer): + self.addCode(addreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for + not implemented'.format(node.typ)) + elif node.op == '-': + if node.typ.isType(integer): + self.addCode(subreg64(node.a.reg, node.b.reg)) + node.reg = node.a.reg + self.freereg(node.b) + else: + Error('{0} for - not implemented'.format(node.typ)) + else: + Error('Unknown Binop {0}'.format(node.op)) + + elif type(node) is Unop: + if node.op == 'INTTOREAL': + self.genexprcode(node.a) + node.reg = node.a.reg + # TODO use 'FILD' instruction + freg = 12 + code.append('Unop inttoreal TODO') + elif node.op == 'ABS': + if isType(node.typ, real): + code = [0xD9, 0xE1] # st(0) = fabs st(0) + Error('ABS error integer') + elif isType(node.typ, integer): + code = [] + Error('ABS error integer') + else: + Error('ABS error') + else: + Error('Unknown Unop {0}'.format(node.op)) + + elif isinstance(node, Designator): + # dereference, array index. Make sure that the result comes into a register + if len(node.selectors) > 0: + self.gencode(node) # Load the pointer into some register + + # Now we can access the object at location '[node.reg]': + if node.typ.isType(integer): + self.addCode( mov(node.reg, [node.reg, 0x0]) ) + else: + Error('Only integer types implemented') + else: + # No selectors, load variable directly + if node.obj.typ.isType(integer): + if type(node.obj) is Constant: + self.genexprcode(node.obj) + node.reg = node.obj.reg + else: + self.getreg(node) + # Get a register to store the integer value + if node.obj.isLocal: + # relative to rbp: + self.addCode( mov(node.reg, ['rbp', node.obj.offset]) ) + else: + self.addCode(mov(node.reg, ['RIP', 0x0])) + self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip)) + else: + Error('Cannot load variable type {0}'.format(node.typ)) + + elif isinstance(node, Relop): + # Create a boolean from operands + # TODO create an alternative for expressions used as conditions. + self.genexprcode(node.a) + self.genexprcode(node.b) + + if node.a.typ.isType(integer): + instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'} + if not node.relop in instructions.keys(): + Error('Unimplemented relop: '+str(node.relop)) + instr = instructions[node.relop] + + node.reg = node.a.reg + self.addCode( cmpreg64(node.a.reg, node.b.reg) ) + self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp + fixloc1 = self.rip - 1 + rip1 = self.rip + self.addCode( xorreg64(node.reg, node.reg) ) + self.addCode( shortjump(0x0) ) # Jump over 1 code + fixloc2 = self.rip - 1 + self.fixCode(fixloc1, imm8(self.rip - rip1)) + rip2 = self.rip + self.addCode( xorreg64(node.reg, node.reg) ) + self.addCode( increg64(node.reg) ) + self.fixCode(fixloc2, imm8(self.rip - rip2)) + + self.freereg(node.b) + else: + Error('Relop not implemented for {0}'.format(node.a.typ)) + + elif type(node) is Constant: + if node.typ.isType(integer): + self.getreg(node) + self.addCode(mov(node.reg, node.value)) + elif node.typ.isType(real): + code += self.getreg(node) + Error('TODO: get real reg') + # TODO: get a fixed point reg, and load the variable in there + else: + Error('Howto generate code for {0}?'.format(node)) + + elif type(node) is ProcedureCall: + if type(node.proc.obj) is BuiltinProcedure: + # Handle builtin procedures different, these not always call + # a function, but generate code. + bi = node.proc.obj + if bi.name == 'chr': + arg = node.args[0] + self.genexprcode(arg) + # Store character in full width register: + # TODO: store in char only register + node.reg = arg.reg + else: + Error('Unknown builtin function {0}'.format(bi.name)) + else: + # Use generic procedure call first + self.gencode(node) + # Retrieve result: + if node.typ.isType(integer): + # Store result! + self.getreg(node) + self.addCode( mov(node.reg, 'rax') ) + else: + Error('Return type not supported {0}'.format(node.typ)) + else: + Error('Cannot generate expression code for: {0}'.format(node)) + + def gencode(self, node): + """ Code generation function for AST nodes """ + if isinstance(node, Module): + # for all imports make a list of pointer to the actual procedures: + for imp in node.imports: + imp.offset = self.rip + self.addCode( [0x0]*8 ) + # global variable storage allocation + variables = node.symtable.getAllLocal(Variable) + for var in variables: + var.isLocal = False + var.offset = self.rip + self.addCode( [0x00] * var.typ.size ) # TODO initial values here? + self.align(8) + # TODO: mark end of data and start of code inside image + # TODO: round data to page size to enable protection by loader. + # Procedure code generation: + procedures = node.symtable.getAllLocal(Procedure) + node.procs = procedures + for proc in procedures: + self.gencode(proc) + # Module init code: + node.initcodeentry = self.rip + self.gencode(node.initcode) + self.addCode( ret() ) + # TODO: how to return from module init code? far return?? + + elif type(node) is Procedure: + # calculate offsets for local variables and parameters + # Variable location relative to 'rbp' register + variables = node.symtable.getAllLocal(Variable) + offset = 0 + paramoffset = 16 + for var in variables: + var.isLocal = True + if not var.isParameter: + offset += var.typ.size + # Offset is negative of rbp in stack frame + var.offset = -offset + node.framesize = offset + # Calculate offsets of parameters relative to rbp register + for par in reversed(node.typ.parameters): + pvar = node.symtable.getLocal(Variable, par.name) + pvar.offset = paramoffset + paramoffset += pvar.typ.size + + # code generation + node.entrypoint = self.rip + self.addCode(push('rbp')) + self.addCode(mov('rbp', 'rsp')) # Setup the base pointer + self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals + self.gencode(node.block) + if node.retexpr: + if node.retexpr.typ.isType(integer): + self.genexprcode(node.retexpr) + self.addCode( mov('rax', node.retexpr.reg) ) + self.freereg(node.retexpr) + else: + Error('Cannot return this kind yet {0}'.format(node.retexpr.typ)) + self.addCode( addreg64('rsp', node.framesize) ) + self.addCode( pop('rbp') ) + self.addCode( ret() ) + assert(len(self.usedregs) == 0) + + elif isinstance(node, StatementSequence): + for s in node.statements: + self.gencode(s) + + elif type(node) is ProcedureCall: + # Prepare parameters on the stack: + stacksize = 0 + assert(len(node.args) == len(node.proc.typ.parameters)) + for arg, param in zip(node.args, node.proc.typ.parameters): + + if param.kind == 'value': + self.genexprcode(arg) + self.addCode( push(arg.reg) ) + self.freereg( arg ) + stacksize += 8 + else: + Error('Parameter kind other than value') + + # Calculate address using designator + if type(node.proc.obj) is Procedure: + self.addCode( call(0x0) ) + self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip)) + elif type(node.proc.obj) is ImportedSymbol: + # Load the entry point of the import table + self.getreg(node.proc.obj) + # Load the address of the procedure: + self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) ) + self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) ) + # Call to the address in register: + self.addCode( call(node.proc.obj.reg) ) + # Free register that holds the address of the object + self.freereg( node.proc.obj ) + elif type(node.proc.obj) is BuiltinProcedure: + if node.proc.obj.name == 'chr': + print('int to char') + else: + Error('Unknown builtin function {0}'.format(node.proc.obj.name)) + else: + Error('Cannot call designator of type {0}'.format(node.proc.obj)) + + # Restore stack (pop all arguments of): + self.addCode(addreg64('rsp', stacksize)) + + elif type(node) is Assignment: + if node.lval.typ.isType(integer): + # TODO if node.rval is Constant of some datatype, move it to mem directly + self.genexprcode(node.rval) # Calculate the value that has to be stored. + self.storeRegInDesignator(node.rval.reg, node.lval) + self.freereg(node.rval) + else: + Error('Assignments of other types not implemented') + # TODO if left and right are designators, do some sort of memcpy. + + elif type(node) is IfStatement: + self.genexprcode(node.condition) + self.addCode( cmpreg64(node.condition.reg, 1) ) + self.freereg(node.condition) + if node.falsestatement: + # If with else clause + self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false + rip1 = self.rip + fixloc1 = self.rip - 4 + self.gencode(node.truestatement) + self.addCode( nearjump( 0x0 ) ) # jump over false code + fixloc2 = self.rip - 4 + self.fixCode(fixloc1, imm32(self.rip - rip1)) + rip2 = self.rip + self.gencode(node.falsestatement) + self.fixCode(fixloc2, imm32(self.rip - rip2)) + else: + # If without else clause + self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false + rip1 = self.rip + fixloc1 = self.rip - 4 + self.gencode(node.truestatement) + self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code. + + elif isinstance(node, WhileStatement): + rip1 = self.rip # Store the start of the while loop + self.genexprcode(node.condition) + self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness + self.freereg(node.condition) + self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later) + fixloc1 = self.rip - 4 + rip2 = self.rip + self.gencode(node.dostatements) + self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below + fixloc2 = self.rip - 4 + rip3 = self.rip # end of while loop + self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop + self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop + + elif type(node) is ForStatement: + # Initial load of iterator variable: + self.genexprcode(node.begin) + self.genexprcode(node.end) + # TODO: link reg with variable so that a register is used instead of a variable + iterreg = node.begin.reg # Get the register used for the loop + #self.addCode(cmpreg64(iterreg, node.endvalue)) + rip1 = self.rip + self.gencode(node.statements) + #self.loadDesignatorInReg(node. + #self.addCode( addreg64(node.variable, node.increment) ) + self.addCode(nearjump(0x0)) + fixloc1 = self.rip - 4 + rip2 = self.rip + self.fixCode(fixloc1, imm32(rip1 - rip2)) + + self.freereg(node.begin) # Release register used in loop + self.freereg(node.end) + Error('No implementation of FOR statement') + + elif type(node) is AsmCode: + def processOperand(op): + if type(op) is list: + if type(op[0]) is Variable: + var = op[0] + if var.isLocal: + return ['rbp', var.offset] + else: + Error('Can only use local variables in inline assembler') + return op + for asmline in node.asmcode: + opcode, operands = asmline + operands = [processOperand(opx) for opx in operands] + print('assembling', opcode, *operands) + func,nargs = opcodes[opcode] + code = func(*operands) + self.addCode(code) + + elif isinstance(node, EmptyStatement): + pass + + + elif type(node) is StringConstant: + self.strings.append(node) + self.data.append(node.value) # Add string to the data section + + elif type(node) is Designator: + if len(node.selectors) > 0: + self.getreg(node) + # Load starting address + if node.obj.isLocal: + self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) ) + else: + # Global variables need to be relocated... + self.addCode(leareg64(node.reg, ['RIP', 0])) + self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip)) + # Loop over all designators.. + for selector in node.selectors: + if type(selector) is Index: + # Deref an array index + self.genexprcode(selector.index) + self.getreg(selector) + self.addCode( mov(selector.reg, selector.typ.elementType.size) ) + self.addCode( imulreg64(selector.reg, selector.index.reg ) ) + self.freereg(selector.index) + self.addCode(addreg64(node.reg, selector.reg)) + self.freereg(selector) + elif type(selector) is Field: + print('Field') + Error('Field not implemented') + else: + Error('Unknown selector') + else: + Error('Can only gencode for designator with selectors') + + else: + print('not generating code for {0}'.format(node)) + + def generatecode(self, ast): + """ code generation front end """ + self.initialize() + self.gencode(ast) + ast.image = self.image +
--- a/python/libs/compiler/codegenerator.py Sat Oct 13 16:13:05 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,487 +0,0 @@ -""" - Code generation for 64 bits intel processors -""" - -from .nodes import * -from .errors import Error -from .builtin import real, integer, boolean, char -from .assembler import * - -class CodeGenerator: - def __init__(self): - self.strings = [] - self.initialize() - def initialize(self): - # Register descriptors: - self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',') - self.usedregs = [] - # Members to accumulate the result into: - # The result is an image of bytecode and global variable space. - # Global variables a referenced by RIP relative addressing. - self.image = [] - self.rip = 0 # The current instruction pointer location. - # TODO: backpatch list here? - - # Functions to modify the code image - def addCode(self, code): - assert(type(code) is list) - self.image += code - self.rip += len(code) - def fixCode(self, position, code): - self.image[position:position+len(code)] = code - def align(self, b): - while (self.rip % b) != 0: - self.addCode([0]) - - def saveAllRegisters(self): - regs = list(self.usedregs.keys()) - for reg in regs: - code += self.saveRegister(reg) - - def saveRegister(self, reg): - code = [] - if reg in self.usedregs.keys(): - code.append('mov {0}, {1}'.format(self.usedregs[reg], reg)) - del self.usedregs[reg] - self.freeregs.append(reg) - - def getreg(self, node): - """ acquire a working register for a certain node.""" - # Temporary register bypass action: - if len(self.freeregs) > 0: - reg = self.freeregs.pop(0) - self.usedregs.append(reg) - else: - Error('No more free regs') - node.reg = reg - - def freereg(self, node): - reg = node.reg - node.reg = None - self.freeregs.append(reg) - self.usedregs.remove(reg) - - # Helpers to load and retrieve designated objects: - def storeRegInDesignator(self, reg, designator): - assert(type(reg) is str) - assert(type(designator) is Designator) - if len(designator.selectors) > 0: - self.gencode( designator ) # Load the pointer into some register - self.addCode( mov([designator.reg, 0x0], reg) ) - self.freereg( designator ) - else: - if designator.obj.isLocal: - # Relative from rbp register - mem = ['rbp', designator.obj.offset] - self.addCode( mov(mem, reg) ) - else: - # Relative from RIP after move - self.addCode( mov(['RIP', 0x0], reg) ) - self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) ) - - # Code generation functions: - def genexprcode(self, node): - """ - Generate code for expressions! - Recursively evaluates, and ensures a register contains the answer. - register is an integer register or a floating point reg - """ - if isinstance(node, Binop): - """ Handle a binary operation (two arguments) of some kind """ - self.genexprcode(node.a) - self.genexprcode(node.b) - - if node.op == 'mod': - assert(node.typ.isType(integer)) - self.addCode(mov('rax', node.a.reg)) - self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros - self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg - node.reg = node.a.reg - self.freereg(node.b) # give up register that contains b - self.addCode(mov(node.reg, 'rdx')) # move remainder into result - elif node.op == 'div': - assert(node.typ.isType(integer)) - self.addCode(mov('rax', node.a.reg)) - self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros - self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg - node.reg = node.a.reg - self.freereg(node.b) # give up register that contains b - self.addCode(mov(node.reg, 'rax')) # move result into reg - elif node.op == '*': - if node.typ.isType(integer): - self.addCode(imulreg64(node.a.reg, node.b.reg)) - node.reg = node.a.reg - self.freereg(node.b) - else: - Error('{0} for * not implemented'.format(node.typ)) - elif node.op == '+': - if node.typ.isType(integer): - self.addCode(addreg64(node.a.reg, node.b.reg)) - node.reg = node.a.reg - self.freereg(node.b) - else: - Error('{0} for + not implemented'.format(node.typ)) - elif node.op == '-': - if node.typ.isType(integer): - self.addCode(subreg64(node.a.reg, node.b.reg)) - node.reg = node.a.reg - self.freereg(node.b) - else: - Error('{0} for - not implemented'.format(node.typ)) - else: - Error('Unknown Binop {0}'.format(node.op)) - - elif type(node) is Unop: - if node.op == 'INTTOREAL': - self.genexprcode(node.a) - node.reg = node.a.reg - # TODO use 'FILD' instruction - freg = 12 - code.append('Unop inttoreal TODO') - elif node.op == 'ABS': - if isType(node.typ, real): - code = [0xD9, 0xE1] # st(0) = fabs st(0) - Error('ABS error integer') - elif isType(node.typ, integer): - code = [] - Error('ABS error integer') - else: - Error('ABS error') - else: - Error('Unknown Unop {0}'.format(node.op)) - - elif isinstance(node, Designator): - # dereference, array index. Make sure that the result comes into a register - if len(node.selectors) > 0: - self.gencode(node) # Load the pointer into some register - - # Now we can access the object at location '[node.reg]': - if node.typ.isType(integer): - self.addCode( mov(node.reg, [node.reg, 0x0]) ) - else: - Error('Only integer types implemented') - else: - # No selectors, load variable directly - if node.obj.typ.isType(integer): - if type(node.obj) is Constant: - self.genexprcode(node.obj) - node.reg = node.obj.reg - else: - self.getreg(node) - # Get a register to store the integer value - if node.obj.isLocal: - # relative to rbp: - self.addCode( mov(node.reg, ['rbp', node.obj.offset]) ) - else: - self.addCode(mov(node.reg, ['RIP', 0x0])) - self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip)) - else: - Error('Cannot load variable type {0}'.format(node.typ)) - - elif isinstance(node, Relop): - # Create a boolean from operands - # TODO create an alternative for expressions used as conditions. - self.genexprcode(node.a) - self.genexprcode(node.b) - - if node.a.typ.isType(integer): - instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'} - if not node.relop in instructions.keys(): - Error('Unimplemented relop: '+str(node.relop)) - instr = instructions[node.relop] - - node.reg = node.a.reg - self.addCode( cmpreg64(node.a.reg, node.b.reg) ) - self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp - fixloc1 = self.rip - 1 - rip1 = self.rip - self.addCode( xorreg64(node.reg, node.reg) ) - self.addCode( shortjump(0x0) ) # Jump over 1 code - fixloc2 = self.rip - 1 - self.fixCode(fixloc1, imm8(self.rip - rip1)) - rip2 = self.rip - self.addCode( xorreg64(node.reg, node.reg) ) - self.addCode( increg64(node.reg) ) - self.fixCode(fixloc2, imm8(self.rip - rip2)) - - self.freereg(node.b) - else: - Error('Relop not implemented for {0}'.format(node.a.typ)) - - elif type(node) is Constant: - if node.typ.isType(integer): - self.getreg(node) - self.addCode(mov(node.reg, node.value)) - elif node.typ.isType(real): - code += self.getreg(node) - Error('TODO: get real reg') - # TODO: get a fixed point reg, and load the variable in there - else: - Error('Howto generate code for {0}?'.format(node)) - - elif type(node) is ProcedureCall: - if type(node.proc.obj) is BuiltinProcedure: - # Handle builtin procedures different, these not always call - # a function, but generate code. - bi = node.proc.obj - if bi.name == 'chr': - arg = node.args[0] - self.genexprcode(arg) - # Store character in full width register: - # TODO: store in char only register - node.reg = arg.reg - else: - Error('Unknown builtin function {0}'.format(bi.name)) - else: - # Use generic procedure call first - self.gencode(node) - # Retrieve result: - if node.typ.isType(integer): - # Store result! - self.getreg(node) - self.addCode( mov(node.reg, 'rax') ) - else: - Error('Return type not supported {0}'.format(node.typ)) - else: - Error('Cannot generate expression code for: {0}'.format(node)) - - def gencode(self, node): - """ Code generation function for AST nodes """ - if isinstance(node, Module): - # for all imports make a list of pointer to the actual procedures: - for imp in node.imports: - imp.offset = self.rip - self.addCode( [0x0]*8 ) - # global variable storage allocation - variables = node.symtable.getAllLocal(Variable) - for var in variables: - var.isLocal = False - var.offset = self.rip - self.addCode( [0x00] * var.typ.size ) # TODO initial values here? - self.align(8) - # TODO: mark end of data and start of code inside image - # TODO: round data to page size to enable protection by loader. - # Procedure code generation: - procedures = node.symtable.getAllLocal(Procedure) - node.procs = procedures - for proc in procedures: - self.gencode(proc) - # Module init code: - node.initcodeentry = self.rip - self.gencode(node.initcode) - self.addCode( ret() ) - # TODO: how to return from module init code? far return?? - - elif type(node) is Procedure: - # calculate offsets for local variables and parameters - # Variable location relative to 'rbp' register - variables = node.symtable.getAllLocal(Variable) - offset = 0 - paramoffset = 16 - for var in variables: - var.isLocal = True - if not var.isParameter: - offset += var.typ.size - # Offset is negative of rbp in stack frame - var.offset = -offset - node.framesize = offset - # Calculate offsets of parameters relative to rbp register - for par in reversed(node.typ.parameters): - pvar = node.symtable.getLocal(Variable, par.name) - pvar.offset = paramoffset - paramoffset += pvar.typ.size - - # code generation - node.entrypoint = self.rip - self.addCode(push('rbp')) - self.addCode(mov('rbp', 'rsp')) # Setup the base pointer - self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals - self.gencode(node.block) - if node.retexpr: - if node.retexpr.typ.isType(integer): - self.genexprcode(node.retexpr) - self.addCode( mov('rax', node.retexpr.reg) ) - self.freereg(node.retexpr) - else: - Error('Cannot return this kind yet {0}'.format(node.retexpr.typ)) - self.addCode( addreg64('rsp', node.framesize) ) - self.addCode( pop('rbp') ) - self.addCode( ret() ) - assert(len(self.usedregs) == 0) - - elif isinstance(node, StatementSequence): - for s in node.statements: - self.gencode(s) - - elif type(node) is ProcedureCall: - # Prepare parameters on the stack: - stacksize = 0 - assert(len(node.args) == len(node.proc.typ.parameters)) - for arg, param in zip(node.args, node.proc.typ.parameters): - - if param.kind == 'value': - self.genexprcode(arg) - self.addCode( push(arg.reg) ) - self.freereg( arg ) - stacksize += 8 - else: - Error('Parameter kind other than value') - - # Calculate address using designator - if type(node.proc.obj) is Procedure: - self.addCode( call(0x0) ) - self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip)) - elif type(node.proc.obj) is ImportedSymbol: - # Load the entry point of the import table - self.getreg(node.proc.obj) - # Load the address of the procedure: - self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) ) - self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) ) - # Call to the address in register: - self.addCode( call(node.proc.obj.reg) ) - # Free register that holds the address of the object - self.freereg( node.proc.obj ) - elif type(node.proc.obj) is BuiltinProcedure: - if node.proc.obj.name == 'chr': - print('int to char') - else: - Error('Unknown builtin function {0}'.format(node.proc.obj.name)) - else: - Error('Cannot call designator of type {0}'.format(node.proc.obj)) - - # Restore stack (pop all arguments of): - self.addCode(addreg64('rsp', stacksize)) - - elif type(node) is Assignment: - if node.lval.typ.isType(integer): - # TODO if node.rval is Constant of some datatype, move it to mem directly - self.genexprcode(node.rval) # Calculate the value that has to be stored. - self.storeRegInDesignator(node.rval.reg, node.lval) - self.freereg(node.rval) - else: - Error('Assignments of other types not implemented') - # TODO if left and right are designators, do some sort of memcpy. - - elif type(node) is IfStatement: - self.genexprcode(node.condition) - self.addCode( cmpreg64(node.condition.reg, 1) ) - self.freereg(node.condition) - if node.falsestatement: - # If with else clause - self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false - rip1 = self.rip - fixloc1 = self.rip - 4 - self.gencode(node.truestatement) - self.addCode( nearjump( 0x0 ) ) # jump over false code - fixloc2 = self.rip - 4 - self.fixCode(fixloc1, imm32(self.rip - rip1)) - rip2 = self.rip - self.gencode(node.falsestatement) - self.fixCode(fixloc2, imm32(self.rip - rip2)) - else: - # If without else clause - self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false - rip1 = self.rip - fixloc1 = self.rip - 4 - self.gencode(node.truestatement) - self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code. - - elif isinstance(node, WhileStatement): - rip1 = self.rip # Store the start of the while loop - self.genexprcode(node.condition) - self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness - self.freereg(node.condition) - self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later) - fixloc1 = self.rip - 4 - rip2 = self.rip - self.gencode(node.dostatements) - self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below - fixloc2 = self.rip - 4 - rip3 = self.rip # end of while loop - self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop - self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop - - elif type(node) is ForStatement: - # Initial load of iterator variable: - self.genexprcode(node.begin) - self.genexprcode(node.end) - # TODO: link reg with variable so that a register is used instead of a variable - iterreg = node.begin.reg # Get the register used for the loop - #self.addCode(cmpreg64(iterreg, node.endvalue)) - rip1 = self.rip - self.gencode(node.statements) - #self.loadDesignatorInReg(node. - #self.addCode( addreg64(node.variable, node.increment) ) - self.addCode(nearjump(0x0)) - fixloc1 = self.rip - 4 - rip2 = self.rip - self.fixCode(fixloc1, imm32(rip1 - rip2)) - - self.freereg(node.begin) # Release register used in loop - self.freereg(node.end) - Error('No implementation of FOR statement') - - elif type(node) is AsmCode: - def processOperand(op): - if type(op) is list: - if type(op[0]) is Variable: - var = op[0] - if var.isLocal: - return ['rbp', var.offset] - else: - Error('Can only use local variables in inline assembler') - return op - for asmline in node.asmcode: - opcode, operands = asmline - operands = [processOperand(opx) for opx in operands] - print('assembling', opcode, *operands) - func,nargs = opcodes[opcode] - code = func(*operands) - self.addCode(code) - - elif isinstance(node, EmptyStatement): - pass - - - elif type(node) is StringConstant: - self.strings.append(node) - self.data.append(node.value) # Add string to the data section - - elif type(node) is Designator: - if len(node.selectors) > 0: - self.getreg(node) - # Load starting address - if node.obj.isLocal: - self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) ) - else: - # Global variables need to be relocated... - self.addCode(leareg64(node.reg, ['RIP', 0])) - self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip)) - # Loop over all designators.. - for selector in node.selectors: - if type(selector) is Index: - # Deref an array index - self.genexprcode(selector.index) - self.getreg(selector) - self.addCode( mov(selector.reg, selector.typ.elementType.size) ) - self.addCode( imulreg64(selector.reg, selector.index.reg ) ) - self.freereg(selector.index) - self.addCode(addreg64(node.reg, selector.reg)) - self.freereg(selector) - elif type(selector) is Field: - print('Field') - Error('Field not implemented') - else: - Error('Unknown selector') - else: - Error('Can only gencode for designator with selectors') - - else: - print('not generating code for {0}'.format(node)) - - def generatecode(self, ast): - """ code generation front end """ - self.initialize() - self.gencode(ast) - ast.image = self.image -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/libs/compiler/frontends/lexer.py Sat Oct 27 14:31:58 2012 +0200 @@ -0,0 +1,71 @@ +import collections +import re +from .errors import CompilerException + +""" + Lexical analyzer part. Splits the input character stream into tokens. +""" + +# Token is used in the lexical analyzer: +Token = collections.namedtuple('Token', 'typ val row col') + +keywords = ['and', 'array', 'begin', 'by', 'case', 'const', 'div', 'do', \ + 'else', 'elsif', 'end', 'false', 'for', 'if', 'import', 'in', 'is', \ + 'mod', 'module', 'nil', 'not', 'of', 'or', 'pointer', 'procedure', \ + 'record', 'repeat', 'return', 'then', 'to', 'true', 'type', 'until', 'var', \ + 'while', 'asm' ] + +def tokenize(s): + """ + Tokenizer, generates an iterator that + returns tokens! + + This GREAT example was taken from python re doc page! + """ + tok_spec = [ + ('REAL', r'\d+\.\d+'), + ('HEXNUMBER', r'0x[\da-fA-F]+'), + ('NUMBER', r'\d+'), + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('NEWLINE', r'\n'), + ('SKIP', r'[ \t]'), + ('COMMENTS', r'{.*}'), + ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), + ('STRING', r"'.*?'") + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'NEWLINE': + line_start = pos + line += 1 + elif typ == 'COMMENTS': + pass + elif typ != 'SKIP': + if typ == 'ID': + if val in keywords: + typ = val + elif typ == 'LEESTEKEN': + typ = val + elif typ == 'NUMBER': + val = int(val) + elif typ == 'HEXNUMBER': + val = int(val[2:], 16) + typ = 'NUMBER' + elif typ == 'REAL': + val = float(val) + elif typ == 'STRING': + val = val[1:-1] + yield Token(typ, val, line, mo.start()-line_start) + pos = mo.end() + mo = gettok(s, pos) + if pos != len(s): + col = pos - line_start + raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) + yield Token('END', '', line, 0) +
--- a/python/libs/compiler/lexer.py Sat Oct 13 16:13:05 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -import collections -import re -from .errors import CompilerException - -""" - Lexical analyzer part. Splits the input character stream into tokens. -""" - -# Token is used in the lexical analyzer: -Token = collections.namedtuple('Token', 'typ val row col') - -keywords = ['and', 'array', 'begin', 'by', 'case', 'const', 'div', 'do', \ - 'else', 'elsif', 'end', 'false', 'for', 'if', 'import', 'in', 'is', \ - 'mod', 'module', 'nil', 'not', 'of', 'or', 'pointer', 'procedure', \ - 'record', 'repeat', 'return', 'then', 'to', 'true', 'type', 'until', 'var', \ - 'while', 'asm' ] - -def tokenize(s): - """ - Tokenizer, generates an iterator that - returns tokens! - - This GREAT example was taken from python re doc page! - """ - tok_spec = [ - ('REAL', r'\d+\.\d+'), - ('HEXNUMBER', r'0x[\da-fA-F]+'), - ('NUMBER', r'\d+'), - ('ID', r'[A-Za-z][A-Za-z\d_]*'), - ('NEWLINE', r'\n'), - ('SKIP', r'[ \t]'), - ('COMMENTS', r'{.*}'), - ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), - ('STRING', r"'.*?'") - ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - line = 1 - pos = line_start = 0 - mo = gettok(s) - while mo is not None: - typ = mo.lastgroup - val = mo.group(typ) - if typ == 'NEWLINE': - line_start = pos - line += 1 - elif typ == 'COMMENTS': - pass - elif typ != 'SKIP': - if typ == 'ID': - if val in keywords: - typ = val - elif typ == 'LEESTEKEN': - typ = val - elif typ == 'NUMBER': - val = int(val) - elif typ == 'HEXNUMBER': - val = int(val[2:], 16) - typ = 'NUMBER' - elif typ == 'REAL': - val = float(val) - elif typ == 'STRING': - val = val[1:-1] - yield Token(typ, val, line, mo.start()-line_start) - pos = mo.end() - mo = gettok(s, pos) - if pos != len(s): - col = pos - line_start - raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) - yield Token('END', '', line, 0) -
--- a/python/libs/compiler/parser.py Sat Oct 13 16:13:05 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,787 +0,0 @@ -""" - This module parses source code into an abstract syntax tree (AST) -""" - -from .symboltable import SymbolTable -from .nodes import * -from .errors import CompilerException, Error -from .modules import loadModule -from .display import printNode -from .builtin import * -from . import assembler - -class Parser: - def __init__(self, tokens): - """ provide the parser with the tokens iterator from the lexer. """ - self.tokens = tokens - self.NextToken() - self.errorlist = [] - - def Error(self, msg): - raise CompilerException(msg, self.token.row, self.token.col) - - # Lexer helpers: - def Consume(self, typ=''): - if self.token.typ == typ or typ == '': - v = self.token.val - self.NextToken() - return v - else: - self.Error('Excected: "{0}", got "{1}"'.format(typ, self.token.val)) - - def hasConsumed(self, typ): - if self.token.typ == typ: - self.Consume(typ) - return True - return False - - def NextToken(self): - self.token = self.tokens.__next__() - # TODO: store filename in location? - self.location = (self.token.row, self.token.col) - - # Helpers to find location of the error in the code: - def setLocation(self, obj, location): - obj.location = location - return obj - def getLocation(self): - return self.location - - """ - Recursive descent parser functions: - A set of mutual recursive functions. - Starting symbol is the Module. - """ - def parseModule(self): - self.imports = [] - loc = self.getLocation() - self.Consume('module') - modname = self.Consume('ID') - self.Consume(';') - mod = Module(modname) - - # Construct a symbol table for this program - mod.symtable = SymbolTable() - # Add built in types and functions: - for x in [real, integer, boolean, char, chr_func]: - mod.symtable.addSymbol(x) - - self.cst = mod.symtable - self.parseImportList() - - self.parseDeclarationSequence() - # Procedures only allowed in this scope - self.parseProcedureDeclarations() - - if self.hasConsumed('begin'): - mod.initcode = self.parseStatementSequence() - else: - mod.initcode = EmptyStatement() - - self.Consume('end') - endname = self.Consume('ID') - if endname != modname: - self.Error('end denoter must be module name') - self.Consume('.') - - mod.imports = self.imports - return self.setLocation(mod, loc) - - # Import part - def parseImportList(self): - if self.hasConsumed('import'): - self.parseImport() - while self.hasConsumed(','): - self.parseImport() - self.Consume(';') - - def parseImport(self): - loc = self.getLocation() - modname = self.Consume('ID') - mod = loadModule(modname) - self.setLocation(mod, loc) - self.cst.addSymbol(mod) - - # Helper to parse an identifier defenitions - def parseIdentDef(self): - loc = self.getLocation() - name = self.Consume('ID') - ispublic = self.hasConsumed('*') - # Make a node of this thing: - i = Id(name) - i.ispublic = ispublic - return self.setLocation(i, loc) - - def parseIdentList(self): - ids = [ self.parseIdentDef() ] - while self.hasConsumed(','): - ids.append( self.parseIdentDef() ) - return ids - - def parseQualIdent(self): - """ Parse a qualified identifier """ - name = self.Consume('ID') - if self.cst.has(Module, name): - modname = name - mod = self.cst.get(Module, modname) - self.Consume('.') - name = self.Consume('ID') - # Try to find existing imported symbol: - for imp in self.imports: - if imp.modname == modname and imp.name == name: - return imp - # Try to find the symbol in the modules exports: - for sym in mod.exports: - if sym.name == name: - impsym = ImportedSymbol(modname, name) - impsym.typ = sym.typ - impsym.signature = mod.signature - self.imports.append(impsym) - return impsym - self.Error("Cannot find symbol {0}".format(name)) - else: - return self.cst.getSymbol(name) - - # Helper to parse a designator - def parseDesignator(self): - """ A designator designates an object. - The base location in memory is denoted by the qualified identifier - The actual address depends on the selector. - """ - loc = self.getLocation() - obj = self.parseQualIdent() - typ = obj.typ - selectors = [] - while self.token.typ in ['.', '[', '^']: - if self.hasConsumed('.'): - field = self.Consume('ID') - if typ is PointerType: - selectors.append(Deref()) - typ = typ.pointedType - if not type(typ) is RecordType: - self.Error("field reference, type not record but {0}".format(typ)) - typ = typ.fields[field] - selectors.append(Field(field)) - elif self.hasConsumed('['): - indexes = self.parseExpressionList() - self.Consume(']') - for idx in indexes: - if not type(typ) is ArrayType: - self.Error('Cannot index non array type') - if not isType(idx.typ, integer): - self.Error('Only integer expressions can be used as an index') - selectors.append(Index(idx, typ)) - typ = typ.elementType - elif self.hasConsumed('^'): - selectors.append(Deref()) - typ = typ.pointedType - return self.setLocation(Designator(obj, selectors, typ), loc) - - # Declaration sequence - def parseDeclarationSequence(self): - """ 1. constants, 2. types, 3. variables """ - self.parseConstantDeclarations() - self.parseTypeDeclarations() - self.parseVariableDeclarations() - - # Constants - def evalExpression(self, expr): - if type(expr) is Binop: - a = self.evalExpression(expr.a) - b = self.evalExpression(expr.b) - if expr.op == '+': - return a + b - elif expr.op == '-': - return a - b - elif expr.op == '*': - return a * b - elif expr.op == '/': - return float(a) / float(b) - elif expr.op == 'mod': - return int(a % b) - elif expr.op == 'div': - return int(a / b) - elif expr.op == 'or': - return a or b - elif expr.op == 'and': - return a and b - else: - self.Error('Cannot evaluate expression with {0}'.format(expr.op)) - elif type(expr) is Constant: - return expr.value - elif type(expr) is Designator: - if type(expr.obj) is Constant: - return self.evalExpression(expr.obj) - else: - self.Error('Cannot evaluate designated object {0}'.format(expr.obj)) - elif type(expr) is Unop: - a = self.evalExpression(expr.a) - if expr.op == 'not': - return not a - elif expr.op == '-': - return -a - else: - self.Error('Unimplemented unary operation {0}'.format(expr.op)) - else: - self.Error('Cannot evaluate expression {0}'.format(expr)) - - def parseConstExpression(self): - e = self.parseExpression() - return self.evalExpression(e), e.typ - - def parseConstantDeclarations(self): - """ Parse const part of a module """ - if self.hasConsumed('const'): - while self.token.typ == 'ID': - i = self.parseIdentDef() - self.Consume('=') - constvalue, typ = self.parseConstExpression() - self.Consume(';') - c = Constant(constvalue, typ, name=i.name, public=i.ispublic) - self.setLocation(c, i.location) - self.cst.addSymbol(c) - - # Type system - def parseTypeDeclarations(self): - if self.hasConsumed('type'): - while self.token.typ == 'ID': - typename, export = self.parseIdentDef() - self.Consume('=') - typ = self.parseStructuredType() - self.Consume(';') - t = DefinedType(typename, typ) - self.cst.addSymbol(t) - - def parseType(self): - if self.token.typ == 'ID': - typename = self.Consume('ID') - if self.cst.has(Type, typename): - typ = self.cst.get(Type, typename) - while type(typ) is DefinedType: - typ = typ.typ - return typ - else: - self.Error('Cannot find type {0}'.format(typename)) - else: - return self.parseStructuredType() - - def parseStructuredType(self): - if self.hasConsumed('array'): - dimensions = [] - dimensions.append( self.parseConstExpression() ) - while self.hasConsumed(','): - dimensions.append( self.parseConstExpression() ) - self.Consume('of') - arr = self.parseType() - for dimension, consttyp in reversed(dimensions): - if not isType(consttyp, integer): - self.Error('array dimension must be an integer type (not {0})'.format(consttyp)) - if dimension < 2: - self.Error('array dimension must be bigger than 1 (not {0})'.format(dimension)) - arr = ArrayType(dimension, arr) - return arr - elif self.hasConsumed('record'): - fields = {} - while self.token.typ == 'ID': - # parse a fieldlist: - identifiers = self.parseIdentList() - self.Consume(':') - typ = self.parseType() - self.Consume(';') - for i in identifiers: - if i.name in fields.keys(): - self.Error('record field "{0}" multiple defined.'.format(i.name)) - fields[i.name] = typ - # TODO store this in another way, symbol table? - self.Consume('end') - return RecordType(fields) - elif self.hasConsumed('pointer'): - self.Consume('to') - typ = self.parseType() - return PointerType(typ) - elif self.hasConsumed('procedure'): - parameters, returntype = self.parseFormalParameters() - return ProcedureType(parameters, returntype) - else: - self.Error('Unknown structured type "{0}"'.format(self.token.val)) - - # Variable declarations: - def parseVariableDeclarations(self): - if self.hasConsumed('var'): - if self.token.typ == 'ID': - while self.token.typ == 'ID': - ids = self.parseIdentList() - self.Consume(':') - typename = self.parseType() - self.Consume(';') - for i in ids: - v = Variable(i.name, typename, public=i.ispublic) - self.setLocation(v, i.location) - self.cst.addSymbol(v) - else: - self.Error('Expected ID, got'+str(self.token)) - - # Procedures - def parseFPsection(self): - if self.hasConsumed('const'): - kind = 'const' - elif self.hasConsumed('var'): - kind = 'var' - else: - kind = 'value' - names = [ self.Consume('ID') ] - while self.hasConsumed(','): - names.append( self.Consume('ID') ) - self.Consume(':') - typ = self.parseType() - parameters = [Parameter(kind, name, typ) - for name in names] - return parameters - - def parseFormalParameters(self): - parameters = [] - self.Consume('(') - if not self.hasConsumed(')'): - parameters += self.parseFPsection() - while self.hasConsumed(';'): - parameters += self.parseFPsection() - self.Consume(')') - if self.hasConsumed(':'): - returntype = self.parseQualIdent() - else: - returntype = void - return ProcedureType(parameters, returntype) - - def parseProcedureDeclarations(self): - procedures = [] - while self.token.typ == 'procedure': - p = self.parseProcedureDeclaration() - procedures.append(p) - self.Consume(';') - return procedures - - def parseProcedureDeclaration(self): - loc = self.getLocation() - self.Consume('procedure') - i = self.parseIdentDef() - procname = i.name - proctyp = self.parseFormalParameters() - procsymtable = SymbolTable(parent = self.cst) - self.cst = procsymtable # Switch symbol table: - # Add parameters as variables to symbol table: - for parameter in proctyp.parameters: - vname = parameter.name - vtyp = parameter.typ - if parameter.kind == 'var': - vtyp = PointerType(vtyp) - variable = Variable(vname, vtyp, False) - if parameter.kind == 'const': - variable.isReadOnly = True - variable.isParameter = True - self.cst.addSymbol(variable) - self.Consume(';') - self.parseDeclarationSequence() - # Mark all variables as local: - for variable in self.cst.getAllLocal(Variable): - variable.isLocal = True - - if self.hasConsumed('begin'): - block = self.parseStatementSequence() - if self.hasConsumed('return'): - returnexpression = self.parseExpression() - else: - returnexpression = None - - if proctyp.returntype.isType(void): - if not returnexpression is None: - self.Error('Void procedure cannot return a value') - else: - if returnexpression is None: - self.Error('Procedure must return a value') - if not isType(returnexpression.typ, proctyp.returntype): - self.Error('Returned type {0} does not match function return type {1}'.format(returnexpression.typ, proctyp.returntype)) - - self.Consume('end') - endname = self.Consume('ID') - if endname != procname: - self.Error('endname should match {0}'.format(name)) - self.cst = procsymtable.parent # Switch back to parent symbol table - proc = Procedure(procname, proctyp, block, procsymtable, returnexpression) - self.setLocation(proc, loc) - self.cst.addSymbol(proc) - proc.public = i.ispublic - return proc - - # Statements: - def parseAssignment(self, lval): - loc = self.getLocation() - self.Consume(':=') - rval = self.parseExpression() - if isType(lval.typ, real) and isType(rval.typ, integer): - rval = Unop(rval, 'INTTOREAL', real) - if type(rval.typ) is NilType: - if not type(lval.typ) is ProcedureType and not type(lval.typ) is PointerType: - self.Error('Can assign nil only to pointers or procedure types, not {0}'.format(lval)) - elif not isType(lval.typ, rval.typ): - self.Error('Type mismatch {0} != {1}'.format(lval.typ, rval.typ)) - return self.setLocation(Assignment(lval, rval), loc) - - def parseExpressionList(self): - expressions = [ self.parseExpression() ] - while self.hasConsumed(','): - expressions.append( self.parseExpression() ) - return expressions - - def parseProcedureCall(self, procedure): - self.Consume('(') - if self.token.typ != ')': - args = self.parseExpressionList() - else: - args = [] - self.Consume(')') - parameters = procedure.typ.parameters - if len(args) != len(parameters): - self.Error("Procedure requires {0} arguments, {1} given".format(len(parameters), len(args))) - for arg, param in zip(args, parameters): - if not arg.typ.isType(param.typ): - print(arg.typ, param.typ) - self.Error('Mismatch in parameter') - return ProcedureCall(procedure, args) - - def parseIfStatement(self): - loc = self.getLocation() - self.Consume('if') - ifs = [] - condition = self.parseExpression() - if not isType(condition.typ, boolean): - self.Error('condition of if statement must be boolean') - self.Consume('then') - truestatement = self.parseStatementSequence() - ifs.append( (condition, truestatement) ) - while self.hasConsumed('elsif'): - condition = self.parseExpression() - if not isType(condition.typ, boolean): - self.Error('condition of if statement must be boolean') - self.Consume('then') - truestatement = self.parseStatementSequence() - ifs.append( (condition, truestatement) ) - if self.hasConsumed('else'): - statement = self.parseStatementSequence() - else: - statement = None - self.Consume('end') - for condition, truestatement in reversed(ifs): - statement = IfStatement(condition, truestatement, statement) - return self.setLocation(statement, loc) - - def parseCase(self): - # TODO - pass - - def parseCaseStatement(self): - self.Consume('case') - expr = self.parseExpression() - self.Consume('of') - self.parseCase() - while self.hasConsumed('|'): - self.parseCase() - self.Consume('end') - - def parseWhileStatement(self): - loc = self.getLocation() - self.Consume('while') - condition = self.parseExpression() - self.Consume('do') - statements = self.parseStatementSequence() - if self.hasConsumed('elsif'): - self.Error('elsif in while not yet implemented') - self.Consume('end') - return self.setLocation(WhileStatement(condition, statements), loc) - - def parseRepeatStatement(self): - self.Consume('repeat') - stmt = self.parseStatementSequence() - self.Consume('until') - cond = self.parseBoolExpression() - - def parseForStatement(self): - loc = self.getLocation() - self.Consume('for') - variable = self.parseDesignator() - if not variable.typ.isType(integer): - self.Error('loop variable of for statement must have integer type') - assert(variable.typ.isType(integer)) - self.Consume(':=') - begin = self.parseExpression() - if not begin.typ.isType(integer): - self.Error('begin expression of a for statement must have integer type') - self.Consume('to') - end = self.parseExpression() - if not end.typ.isType(integer): - self.Error('end expression of a for statement must have integer type') - if self.hasConsumed('by'): - increment, typ = self.parseConstExpression() - if not typ.isType(integer): - self.Error('Increment must be integer') - else: - increment = 1 - assert(type(increment) is int) - self.Consume('do') - statements = self.parseStatementSequence() - self.Consume('end') - return self.setLocation(ForStatement(variable, begin, end, increment, statements), loc) - - def parseAsmcode(self): - # TODO: move this to seperate file - def parseOpcode(): - return self.Consume('ID') - def parseOperand(): - if self.hasConsumed('['): - memref = [] - memref.append(parseOperand()) - self.Consume(']') - return memref - else: - if self.token.typ == 'NUMBER': - return self.Consume('NUMBER') - else: - ID = self.Consume('ID') - if self.cst.has(Variable, ID): - return self.cst.get(Variable, ID) - else: - return ID - - def parseOperands(n): - operands = [] - if n > 0: - operands.append( parseOperand() ) - n = n - 1 - while n > 0: - self.Consume(',') - operands.append(parseOperand()) - n = n - 1 - return operands - self.Consume('asm') - asmcode = [] - while self.token.typ != 'end': - opcode = parseOpcode() - func, numargs = assembler.opcodes[opcode] - operands = parseOperands(numargs) - asmcode.append( (opcode, operands) ) - #print('opcode', opcode, operands) - self.Consume('end') - return AsmCode(asmcode) - - def parseStatement(self): - try: - # Determine statement type based on the pending token: - if self.token.typ == 'if': - return self.parseIfStatement() - elif self.token.typ == 'case': - return self.parseCaseStatement() - elif self.token.typ == 'while': - return self.parseWhileStatement() - elif self.token.typ == 'repeat': - return self.parseRepeatStatement() - elif self.token.typ == 'for': - return self.parseForStatement() - elif self.token.typ == 'asm': - return self.parseAsmcode() - elif self.token.typ == 'ID': - # Assignment or procedure call - designator = self.parseDesignator() - if self.token.typ == '(' and type(designator.typ) is ProcedureType: - return self.parseProcedureCall(designator) - elif self.token.typ == ':=': - return self.parseAssignment(designator) - else: - self.Error('Unknown statement following designator: {0}'.format(self.token)) - else: - # TODO: return empty statement??: - return EmptyStatement() - self.Error('Unknown statement {0}'.format(self.token)) - except CompilerException as e: - print(e) - self.errorlist.append( (e.row, e.col, e.msg)) - # Do error recovery by skipping all tokens until next ; or end - while not (self.token.typ == ';' or self.token.typ == 'end'): - self.Consume(self.token.typ) - return EmptyStatement() - - def parseStatementSequence(self): - """ Sequence of statements seperated by ';' """ - statements = [ self.parseStatement() ] - while self.hasConsumed(';'): - statements.append( self.parseStatement() ) - return StatementSequence( statements ) - - # Parsing expressions: - """ - grammar of expressions: - expression = SimpleExpression [ reloperator SimpleExpression ] - reloperator = '=' | '<=' | '>=' | '<>' - Simpleexpression = [ '+' | '-' ] term { addoperator term } - addoperator = '+' | '-' | 'or' - term = factor { muloperator factor } - muloperator = '*' | '/' | 'div' | 'mod' | 'and' - factor = number | nil | true | false | "(" expression ")" | - designator [ actualparameters ] | 'not' factor - """ - def parseExpression(self): - """ The connector between the boolean and expression domain """ - expr = self.parseSimpleExpression() - if self.token.typ in ['>=','<=','<','>','<>','=']: - relop = self.Consume() - expr2 = self.parseSimpleExpression() - # Automatic type convert to reals: - if isType(expr.typ, real) and isType(expr2.typ, integer): - expr2 = Unop(expr2, 'INTTOREAL', real) - if isType(expr2.typ, real) and isType(expr.typ, integer): - expr = Unop(expr, 'INTTOREAL', real) - # Type check: - if not isType(expr.typ, expr2.typ): - self.Error('Type mismatch in relop') - if isType(expr.typ, real) and relop in ['<>', '=']: - self.Error('Cannot check real values for equality') - - expr = Relop(expr, relop, expr2, boolean) - return expr - - # Parsing arithmatic expressions: - def parseTerm(self): - a = self.parseFactor() - while self.token.typ in ['*', '/', 'mod', 'div', 'and']: - loc = self.getLocation() - op = self.Consume() - b = self.parseTerm() - # Type determination and checking: - if op in ['mod', 'div']: - if not isType(a.typ, integer): - self.Error('First operand should be integer, not {0}'.format(a.typ)) - if not isType(b.typ, integer): - self.Error('Second operand should be integer, not {0}'.format(b.typ)) - typ = integer - elif op == '*': - if isType(a.typ, integer) and isType(b.typ, integer): - typ = integer - elif isType(a.typ, real) or isType(b.typ, real): - if isType(a.typ, integer): - # Automatic type cast - a = Unop(a, 'INTTOREAL', real) - if isType(b.typ, integer): - b = Unop(b, 'INTTOREAL', real) - if not isType(a.typ, real): - self.Error('first operand must be a real!') - if not isType(b.typ, real): - self.Error('second operand must be a real!') - typ = real - else: - self.Error('Unknown operands for multiply: {0}, {1}'.format(a, b)) - elif op == '/': - # Division always yields a real result, for integer division use div - if isType(a.typ, integer): - # Automatic type cast - a = Unop(a, 'INTTOREAL', real) - if isType(b.typ, integer): - b = Unop(b, 'INTTOREAL', real) - if not isType(a.typ, real): - self.Error('first operand must be a real!') - if not isType(b.typ, real): - self.Error('second operand must be a real!') - typ = real - elif op == 'and': - if not isType(a.typ, boolean): - self.Error('First operand of and must be boolean') - if not isType(b.typ, boolean): - self.Error('Second operand of and must be boolean') - typ = boolean - else: - self.Error('Unknown operand {0}'.format(op)) - - a = self.setLocation(Binop(a, op, b, typ), loc) - return a - - def parseFactor(self): - if self.hasConsumed('('): - e = self.parseExpression() - self.Consume(')') - return e - elif self.token.typ == 'NUMBER': - loc = self.getLocation() - val = self.Consume('NUMBER') - return self.setLocation(Constant(val, integer), loc) - elif self.token.typ == 'REAL': - loc = self.getLocation() - val = self.Consume('REAL') - return self.setLocation(Constant(val, real), loc) - elif self.token.typ == 'CHAR': - val = self.Consume('CHAR') - return Constant(val, char) - elif self.token.typ == 'STRING': - txt = self.Consume('STRING') - return StringConstant(txt) - elif self.token.typ in ['true', 'false']: - val = self.Consume() - val = True if val == 'true' else False - return Constant(val, boolean) - elif self.hasConsumed('nil'): - return Constant(0, NilType()) - elif self.hasConsumed('not'): - f = self.parseFactor() - if not isType(f.typ, boolean): - self.Error('argument of boolean negation must be boolean type') - return Unop(f, 'not', boolean) - elif self.token.typ == 'ID': - designator = self.parseDesignator() - # TODO: handle functions different here? - if self.token.typ == '(' and type(designator.typ) is ProcedureType: - return self.parseProcedureCall(designator) - else: - return designator - else: - self.Error('Expected NUMBER, ID or ( expr ), got'+str(self.token)) - - def parseSimpleExpression(self): - """ Arithmatic expression """ - if self.token.typ in ['+', '-']: - # Handle the unary minus - op = self.Consume() - a = self.parseTerm() - typ = a.typ - if not isType(typ,real) and not isType(typ, integer): - self.Error('Unary minus or plus can be only applied to real or integers') - if op == '-': - a = Unop(a, op, typ) - else: - a = self.parseTerm() - while self.token.typ in ['+', '-', 'or']: - loc = self.getLocation() - op = self.Consume() - b = self.parseTerm() - if op in ['+', '-']: - if isType(a.typ, real) or isType(b.typ, real): - typ = real - if isType(a.typ, integer): - # Automatic type cast - a = Unop(a, 'INTTOREAL', real) - if not isType(a.typ, real): - self.Error('first operand must be a real!') - if isType(b.typ, integer): - b = Unop(b, 'INTTOREAL', real) - if not isType(b.typ, real): - self.Error('second operand must be a real!') - elif isType(a.typ, integer) and isType(b.typ, integer): - typ = integer - else: - self.Error('Invalid types {0} and {1}'.format(a.typ, b.typ)) - elif op == 'or': - if not isType(a.typ, boolean): - self.Error('first operand must be boolean for or operation') - if not isType(b.typ, boolean): - self.Error('second operand must be boolean for or operation') - typ = boolean - else: - self.Error('Unknown operand {0}'.format(op)) - a = self.setLocation(Binop(a, op, b, typ), loc) - return a -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/libs/compiler/parsergen.py Sat Oct 27 14:31:58 2012 +0200 @@ -0,0 +1,787 @@ +""" + This module parses source code into an abstract syntax tree (AST) +""" + +from .symboltable import SymbolTable +from .nodes import * +from .errors import CompilerException, Error +from .modules import loadModule +from .display import printNode +from .builtin import * +from . import assembler + +class Parser: + def __init__(self, tokens): + """ provide the parser with the tokens iterator from the lexer. """ + self.tokens = tokens + self.NextToken() + self.errorlist = [] + + def Error(self, msg): + raise CompilerException(msg, self.token.row, self.token.col) + + # Lexer helpers: + def Consume(self, typ=''): + if self.token.typ == typ or typ == '': + v = self.token.val + self.NextToken() + return v + else: + self.Error('Excected: "{0}", got "{1}"'.format(typ, self.token.val)) + + def hasConsumed(self, typ): + if self.token.typ == typ: + self.Consume(typ) + return True + return False + + def NextToken(self): + self.token = self.tokens.__next__() + # TODO: store filename in location? + self.location = (self.token.row, self.token.col) + + # Helpers to find location of the error in the code: + def setLocation(self, obj, location): + obj.location = location + return obj + def getLocation(self): + return self.location + + """ + Recursive descent parser functions: + A set of mutual recursive functions. + Starting symbol is the Module. + """ + def parseModule(self): + self.imports = [] + loc = self.getLocation() + self.Consume('module') + modname = self.Consume('ID') + self.Consume(';') + mod = Module(modname) + + # Construct a symbol table for this program + mod.symtable = SymbolTable() + # Add built in types and functions: + for x in [real, integer, boolean, char, chr_func]: + mod.symtable.addSymbol(x) + + self.cst = mod.symtable + self.parseImportList() + + self.parseDeclarationSequence() + # Procedures only allowed in this scope + self.parseProcedureDeclarations() + + if self.hasConsumed('begin'): + mod.initcode = self.parseStatementSequence() + else: + mod.initcode = EmptyStatement() + + self.Consume('end') + endname = self.Consume('ID') + if endname != modname: + self.Error('end denoter must be module name') + self.Consume('.') + + mod.imports = self.imports + return self.setLocation(mod, loc) + + # Import part + def parseImportList(self): + if self.hasConsumed('import'): + self.parseImport() + while self.hasConsumed(','): + self.parseImport() + self.Consume(';') + + def parseImport(self): + loc = self.getLocation() + modname = self.Consume('ID') + mod = loadModule(modname) + self.setLocation(mod, loc) + self.cst.addSymbol(mod) + + # Helper to parse an identifier defenitions + def parseIdentDef(self): + loc = self.getLocation() + name = self.Consume('ID') + ispublic = self.hasConsumed('*') + # Make a node of this thing: + i = Id(name) + i.ispublic = ispublic + return self.setLocation(i, loc) + + def parseIdentList(self): + ids = [ self.parseIdentDef() ] + while self.hasConsumed(','): + ids.append( self.parseIdentDef() ) + return ids + + def parseQualIdent(self): + """ Parse a qualified identifier """ + name = self.Consume('ID') + if self.cst.has(Module, name): + modname = name + mod = self.cst.get(Module, modname) + self.Consume('.') + name = self.Consume('ID') + # Try to find existing imported symbol: + for imp in self.imports: + if imp.modname == modname and imp.name == name: + return imp + # Try to find the symbol in the modules exports: + for sym in mod.exports: + if sym.name == name: + impsym = ImportedSymbol(modname, name) + impsym.typ = sym.typ + impsym.signature = mod.signature + self.imports.append(impsym) + return impsym + self.Error("Cannot find symbol {0}".format(name)) + else: + return self.cst.getSymbol(name) + + # Helper to parse a designator + def parseDesignator(self): + """ A designator designates an object. + The base location in memory is denoted by the qualified identifier + The actual address depends on the selector. + """ + loc = self.getLocation() + obj = self.parseQualIdent() + typ = obj.typ + selectors = [] + while self.token.typ in ['.', '[', '^']: + if self.hasConsumed('.'): + field = self.Consume('ID') + if typ is PointerType: + selectors.append(Deref()) + typ = typ.pointedType + if not type(typ) is RecordType: + self.Error("field reference, type not record but {0}".format(typ)) + typ = typ.fields[field] + selectors.append(Field(field)) + elif self.hasConsumed('['): + indexes = self.parseExpressionList() + self.Consume(']') + for idx in indexes: + if not type(typ) is ArrayType: + self.Error('Cannot index non array type') + if not isType(idx.typ, integer): + self.Error('Only integer expressions can be used as an index') + selectors.append(Index(idx, typ)) + typ = typ.elementType + elif self.hasConsumed('^'): + selectors.append(Deref()) + typ = typ.pointedType + return self.setLocation(Designator(obj, selectors, typ), loc) + + # Declaration sequence + def parseDeclarationSequence(self): + """ 1. constants, 2. types, 3. variables """ + self.parseConstantDeclarations() + self.parseTypeDeclarations() + self.parseVariableDeclarations() + + # Constants + def evalExpression(self, expr): + if type(expr) is Binop: + a = self.evalExpression(expr.a) + b = self.evalExpression(expr.b) + if expr.op == '+': + return a + b + elif expr.op == '-': + return a - b + elif expr.op == '*': + return a * b + elif expr.op == '/': + return float(a) / float(b) + elif expr.op == 'mod': + return int(a % b) + elif expr.op == 'div': + return int(a / b) + elif expr.op == 'or': + return a or b + elif expr.op == 'and': + return a and b + else: + self.Error('Cannot evaluate expression with {0}'.format(expr.op)) + elif type(expr) is Constant: + return expr.value + elif type(expr) is Designator: + if type(expr.obj) is Constant: + return self.evalExpression(expr.obj) + else: + self.Error('Cannot evaluate designated object {0}'.format(expr.obj)) + elif type(expr) is Unop: + a = self.evalExpression(expr.a) + if expr.op == 'not': + return not a + elif expr.op == '-': + return -a + else: + self.Error('Unimplemented unary operation {0}'.format(expr.op)) + else: + self.Error('Cannot evaluate expression {0}'.format(expr)) + + def parseConstExpression(self): + e = self.parseExpression() + return self.evalExpression(e), e.typ + + def parseConstantDeclarations(self): + """ Parse const part of a module """ + if self.hasConsumed('const'): + while self.token.typ == 'ID': + i = self.parseIdentDef() + self.Consume('=') + constvalue, typ = self.parseConstExpression() + self.Consume(';') + c = Constant(constvalue, typ, name=i.name, public=i.ispublic) + self.setLocation(c, i.location) + self.cst.addSymbol(c) + + # Type system + def parseTypeDeclarations(self): + if self.hasConsumed('type'): + while self.token.typ == 'ID': + typename, export = self.parseIdentDef() + self.Consume('=') + typ = self.parseStructuredType() + self.Consume(';') + t = DefinedType(typename, typ) + self.cst.addSymbol(t) + + def parseType(self): + if self.token.typ == 'ID': + typename = self.Consume('ID') + if self.cst.has(Type, typename): + typ = self.cst.get(Type, typename) + while type(typ) is DefinedType: + typ = typ.typ + return typ + else: + self.Error('Cannot find type {0}'.format(typename)) + else: + return self.parseStructuredType() + + def parseStructuredType(self): + if self.hasConsumed('array'): + dimensions = [] + dimensions.append( self.parseConstExpression() ) + while self.hasConsumed(','): + dimensions.append( self.parseConstExpression() ) + self.Consume('of') + arr = self.parseType() + for dimension, consttyp in reversed(dimensions): + if not isType(consttyp, integer): + self.Error('array dimension must be an integer type (not {0})'.format(consttyp)) + if dimension < 2: + self.Error('array dimension must be bigger than 1 (not {0})'.format(dimension)) + arr = ArrayType(dimension, arr) + return arr + elif self.hasConsumed('record'): + fields = {} + while self.token.typ == 'ID': + # parse a fieldlist: + identifiers = self.parseIdentList() + self.Consume(':') + typ = self.parseType() + self.Consume(';') + for i in identifiers: + if i.name in fields.keys(): + self.Error('record field "{0}" multiple defined.'.format(i.name)) + fields[i.name] = typ + # TODO store this in another way, symbol table? + self.Consume('end') + return RecordType(fields) + elif self.hasConsumed('pointer'): + self.Consume('to') + typ = self.parseType() + return PointerType(typ) + elif self.hasConsumed('procedure'): + parameters, returntype = self.parseFormalParameters() + return ProcedureType(parameters, returntype) + else: + self.Error('Unknown structured type "{0}"'.format(self.token.val)) + + # Variable declarations: + def parseVariableDeclarations(self): + if self.hasConsumed('var'): + if self.token.typ == 'ID': + while self.token.typ == 'ID': + ids = self.parseIdentList() + self.Consume(':') + typename = self.parseType() + self.Consume(';') + for i in ids: + v = Variable(i.name, typename, public=i.ispublic) + self.setLocation(v, i.location) + self.cst.addSymbol(v) + else: + self.Error('Expected ID, got'+str(self.token)) + + # Procedures + def parseFPsection(self): + if self.hasConsumed('const'): + kind = 'const' + elif self.hasConsumed('var'): + kind = 'var' + else: + kind = 'value' + names = [ self.Consume('ID') ] + while self.hasConsumed(','): + names.append( self.Consume('ID') ) + self.Consume(':') + typ = self.parseType() + parameters = [Parameter(kind, name, typ) + for name in names] + return parameters + + def parseFormalParameters(self): + parameters = [] + self.Consume('(') + if not self.hasConsumed(')'): + parameters += self.parseFPsection() + while self.hasConsumed(';'): + parameters += self.parseFPsection() + self.Consume(')') + if self.hasConsumed(':'): + returntype = self.parseQualIdent() + else: + returntype = void + return ProcedureType(parameters, returntype) + + def parseProcedureDeclarations(self): + procedures = [] + while self.token.typ == 'procedure': + p = self.parseProcedureDeclaration() + procedures.append(p) + self.Consume(';') + return procedures + + def parseProcedureDeclaration(self): + loc = self.getLocation() + self.Consume('procedure') + i = self.parseIdentDef() + procname = i.name + proctyp = self.parseFormalParameters() + procsymtable = SymbolTable(parent = self.cst) + self.cst = procsymtable # Switch symbol table: + # Add parameters as variables to symbol table: + for parameter in proctyp.parameters: + vname = parameter.name + vtyp = parameter.typ + if parameter.kind == 'var': + vtyp = PointerType(vtyp) + variable = Variable(vname, vtyp, False) + if parameter.kind == 'const': + variable.isReadOnly = True + variable.isParameter = True + self.cst.addSymbol(variable) + self.Consume(';') + self.parseDeclarationSequence() + # Mark all variables as local: + for variable in self.cst.getAllLocal(Variable): + variable.isLocal = True + + if self.hasConsumed('begin'): + block = self.parseStatementSequence() + if self.hasConsumed('return'): + returnexpression = self.parseExpression() + else: + returnexpression = None + + if proctyp.returntype.isType(void): + if not returnexpression is None: + self.Error('Void procedure cannot return a value') + else: + if returnexpression is None: + self.Error('Procedure must return a value') + if not isType(returnexpression.typ, proctyp.returntype): + self.Error('Returned type {0} does not match function return type {1}'.format(returnexpression.typ, proctyp.returntype)) + + self.Consume('end') + endname = self.Consume('ID') + if endname != procname: + self.Error('endname should match {0}'.format(name)) + self.cst = procsymtable.parent # Switch back to parent symbol table + proc = Procedure(procname, proctyp, block, procsymtable, returnexpression) + self.setLocation(proc, loc) + self.cst.addSymbol(proc) + proc.public = i.ispublic + return proc + + # Statements: + def parseAssignment(self, lval): + loc = self.getLocation() + self.Consume(':=') + rval = self.parseExpression() + if isType(lval.typ, real) and isType(rval.typ, integer): + rval = Unop(rval, 'INTTOREAL', real) + if type(rval.typ) is NilType: + if not type(lval.typ) is ProcedureType and not type(lval.typ) is PointerType: + self.Error('Can assign nil only to pointers or procedure types, not {0}'.format(lval)) + elif not isType(lval.typ, rval.typ): + self.Error('Type mismatch {0} != {1}'.format(lval.typ, rval.typ)) + return self.setLocation(Assignment(lval, rval), loc) + + def parseExpressionList(self): + expressions = [ self.parseExpression() ] + while self.hasConsumed(','): + expressions.append( self.parseExpression() ) + return expressions + + def parseProcedureCall(self, procedure): + self.Consume('(') + if self.token.typ != ')': + args = self.parseExpressionList() + else: + args = [] + self.Consume(')') + parameters = procedure.typ.parameters + if len(args) != len(parameters): + self.Error("Procedure requires {0} arguments, {1} given".format(len(parameters), len(args))) + for arg, param in zip(args, parameters): + if not arg.typ.isType(param.typ): + print(arg.typ, param.typ) + self.Error('Mismatch in parameter') + return ProcedureCall(procedure, args) + + def parseIfStatement(self): + loc = self.getLocation() + self.Consume('if') + ifs = [] + condition = self.parseExpression() + if not isType(condition.typ, boolean): + self.Error('condition of if statement must be boolean') + self.Consume('then') + truestatement = self.parseStatementSequence() + ifs.append( (condition, truestatement) ) + while self.hasConsumed('elsif'): + condition = self.parseExpression() + if not isType(condition.typ, boolean): + self.Error('condition of if statement must be boolean') + self.Consume('then') + truestatement = self.parseStatementSequence() + ifs.append( (condition, truestatement) ) + if self.hasConsumed('else'): + statement = self.parseStatementSequence() + else: + statement = None + self.Consume('end') + for condition, truestatement in reversed(ifs): + statement = IfStatement(condition, truestatement, statement) + return self.setLocation(statement, loc) + + def parseCase(self): + # TODO + pass + + def parseCaseStatement(self): + self.Consume('case') + expr = self.parseExpression() + self.Consume('of') + self.parseCase() + while self.hasConsumed('|'): + self.parseCase() + self.Consume('end') + + def parseWhileStatement(self): + loc = self.getLocation() + self.Consume('while') + condition = self.parseExpression() + self.Consume('do') + statements = self.parseStatementSequence() + if self.hasConsumed('elsif'): + self.Error('elsif in while not yet implemented') + self.Consume('end') + return self.setLocation(WhileStatement(condition, statements), loc) + + def parseRepeatStatement(self): + self.Consume('repeat') + stmt = self.parseStatementSequence() + self.Consume('until') + cond = self.parseBoolExpression() + + def parseForStatement(self): + loc = self.getLocation() + self.Consume('for') + variable = self.parseDesignator() + if not variable.typ.isType(integer): + self.Error('loop variable of for statement must have integer type') + assert(variable.typ.isType(integer)) + self.Consume(':=') + begin = self.parseExpression() + if not begin.typ.isType(integer): + self.Error('begin expression of a for statement must have integer type') + self.Consume('to') + end = self.parseExpression() + if not end.typ.isType(integer): + self.Error('end expression of a for statement must have integer type') + if self.hasConsumed('by'): + increment, typ = self.parseConstExpression() + if not typ.isType(integer): + self.Error('Increment must be integer') + else: + increment = 1 + assert(type(increment) is int) + self.Consume('do') + statements = self.parseStatementSequence() + self.Consume('end') + return self.setLocation(ForStatement(variable, begin, end, increment, statements), loc) + + def parseAsmcode(self): + # TODO: move this to seperate file + def parseOpcode(): + return self.Consume('ID') + def parseOperand(): + if self.hasConsumed('['): + memref = [] + memref.append(parseOperand()) + self.Consume(']') + return memref + else: + if self.token.typ == 'NUMBER': + return self.Consume('NUMBER') + else: + ID = self.Consume('ID') + if self.cst.has(Variable, ID): + return self.cst.get(Variable, ID) + else: + return ID + + def parseOperands(n): + operands = [] + if n > 0: + operands.append( parseOperand() ) + n = n - 1 + while n > 0: + self.Consume(',') + operands.append(parseOperand()) + n = n - 1 + return operands + self.Consume('asm') + asmcode = [] + while self.token.typ != 'end': + opcode = parseOpcode() + func, numargs = assembler.opcodes[opcode] + operands = parseOperands(numargs) + asmcode.append( (opcode, operands) ) + #print('opcode', opcode, operands) + self.Consume('end') + return AsmCode(asmcode) + + def parseStatement(self): + try: + # Determine statement type based on the pending token: + if self.token.typ == 'if': + return self.parseIfStatement() + elif self.token.typ == 'case': + return self.parseCaseStatement() + elif self.token.typ == 'while': + return self.parseWhileStatement() + elif self.token.typ == 'repeat': + return self.parseRepeatStatement() + elif self.token.typ == 'for': + return self.parseForStatement() + elif self.token.typ == 'asm': + return self.parseAsmcode() + elif self.token.typ == 'ID': + # Assignment or procedure call + designator = self.parseDesignator() + if self.token.typ == '(' and type(designator.typ) is ProcedureType: + return self.parseProcedureCall(designator) + elif self.token.typ == ':=': + return self.parseAssignment(designator) + else: + self.Error('Unknown statement following designator: {0}'.format(self.token)) + else: + # TODO: return empty statement??: + return EmptyStatement() + self.Error('Unknown statement {0}'.format(self.token)) + except CompilerException as e: + print(e) + self.errorlist.append( (e.row, e.col, e.msg)) + # Do error recovery by skipping all tokens until next ; or end + while not (self.token.typ == ';' or self.token.typ == 'end'): + self.Consume(self.token.typ) + return EmptyStatement() + + def parseStatementSequence(self): + """ Sequence of statements seperated by ';' """ + statements = [ self.parseStatement() ] + while self.hasConsumed(';'): + statements.append( self.parseStatement() ) + return StatementSequence( statements ) + + # Parsing expressions: + """ + grammar of expressions: + expression = SimpleExpression [ reloperator SimpleExpression ] + reloperator = '=' | '<=' | '>=' | '<>' + Simpleexpression = [ '+' | '-' ] term { addoperator term } + addoperator = '+' | '-' | 'or' + term = factor { muloperator factor } + muloperator = '*' | '/' | 'div' | 'mod' | 'and' + factor = number | nil | true | false | "(" expression ")" | + designator [ actualparameters ] | 'not' factor + """ + def parseExpression(self): + """ The connector between the boolean and expression domain """ + expr = self.parseSimpleExpression() + if self.token.typ in ['>=','<=','<','>','<>','=']: + relop = self.Consume() + expr2 = self.parseSimpleExpression() + # Automatic type convert to reals: + if isType(expr.typ, real) and isType(expr2.typ, integer): + expr2 = Unop(expr2, 'INTTOREAL', real) + if isType(expr2.typ, real) and isType(expr.typ, integer): + expr = Unop(expr, 'INTTOREAL', real) + # Type check: + if not isType(expr.typ, expr2.typ): + self.Error('Type mismatch in relop') + if isType(expr.typ, real) and relop in ['<>', '=']: + self.Error('Cannot check real values for equality') + + expr = Relop(expr, relop, expr2, boolean) + return expr + + # Parsing arithmatic expressions: + def parseTerm(self): + a = self.parseFactor() + while self.token.typ in ['*', '/', 'mod', 'div', 'and']: + loc = self.getLocation() + op = self.Consume() + b = self.parseTerm() + # Type determination and checking: + if op in ['mod', 'div']: + if not isType(a.typ, integer): + self.Error('First operand should be integer, not {0}'.format(a.typ)) + if not isType(b.typ, integer): + self.Error('Second operand should be integer, not {0}'.format(b.typ)) + typ = integer + elif op == '*': + if isType(a.typ, integer) and isType(b.typ, integer): + typ = integer + elif isType(a.typ, real) or isType(b.typ, real): + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if not isType(b.typ, real): + self.Error('second operand must be a real!') + typ = real + else: + self.Error('Unknown operands for multiply: {0}, {1}'.format(a, b)) + elif op == '/': + # Division always yields a real result, for integer division use div + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if not isType(b.typ, real): + self.Error('second operand must be a real!') + typ = real + elif op == 'and': + if not isType(a.typ, boolean): + self.Error('First operand of and must be boolean') + if not isType(b.typ, boolean): + self.Error('Second operand of and must be boolean') + typ = boolean + else: + self.Error('Unknown operand {0}'.format(op)) + + a = self.setLocation(Binop(a, op, b, typ), loc) + return a + + def parseFactor(self): + if self.hasConsumed('('): + e = self.parseExpression() + self.Consume(')') + return e + elif self.token.typ == 'NUMBER': + loc = self.getLocation() + val = self.Consume('NUMBER') + return self.setLocation(Constant(val, integer), loc) + elif self.token.typ == 'REAL': + loc = self.getLocation() + val = self.Consume('REAL') + return self.setLocation(Constant(val, real), loc) + elif self.token.typ == 'CHAR': + val = self.Consume('CHAR') + return Constant(val, char) + elif self.token.typ == 'STRING': + txt = self.Consume('STRING') + return StringConstant(txt) + elif self.token.typ in ['true', 'false']: + val = self.Consume() + val = True if val == 'true' else False + return Constant(val, boolean) + elif self.hasConsumed('nil'): + return Constant(0, NilType()) + elif self.hasConsumed('not'): + f = self.parseFactor() + if not isType(f.typ, boolean): + self.Error('argument of boolean negation must be boolean type') + return Unop(f, 'not', boolean) + elif self.token.typ == 'ID': + designator = self.parseDesignator() + # TODO: handle functions different here? + if self.token.typ == '(' and type(designator.typ) is ProcedureType: + return self.parseProcedureCall(designator) + else: + return designator + else: + self.Error('Expected NUMBER, ID or ( expr ), got'+str(self.token)) + + def parseSimpleExpression(self): + """ Arithmatic expression """ + if self.token.typ in ['+', '-']: + # Handle the unary minus + op = self.Consume() + a = self.parseTerm() + typ = a.typ + if not isType(typ,real) and not isType(typ, integer): + self.Error('Unary minus or plus can be only applied to real or integers') + if op == '-': + a = Unop(a, op, typ) + else: + a = self.parseTerm() + while self.token.typ in ['+', '-', 'or']: + loc = self.getLocation() + op = self.Consume() + b = self.parseTerm() + if op in ['+', '-']: + if isType(a.typ, real) or isType(b.typ, real): + typ = real + if isType(a.typ, integer): + # Automatic type cast + a = Unop(a, 'INTTOREAL', real) + if not isType(a.typ, real): + self.Error('first operand must be a real!') + if isType(b.typ, integer): + b = Unop(b, 'INTTOREAL', real) + if not isType(b.typ, real): + self.Error('second operand must be a real!') + elif isType(a.typ, integer) and isType(b.typ, integer): + typ = integer + else: + self.Error('Invalid types {0} and {1}'.format(a.typ, b.typ)) + elif op == 'or': + if not isType(a.typ, boolean): + self.Error('first operand must be boolean for or operation') + if not isType(b.typ, boolean): + self.Error('second operand must be boolean for or operation') + typ = boolean + else: + self.Error('Unknown operand {0}'.format(op)) + a = self.setLocation(Binop(a, op, b, typ), loc) + return a +