Mercurial > lcfOS
changeset 307:e609d5296ee9
Massive rewrite of codegenerator
author | Windel Bouwman |
---|---|
date | Thu, 12 Dec 2013 20:42:56 +0100 |
parents | b145f8e6050b |
children | 2e7f55319858 |
files | examples/pi/add.pi python/ppci/c3/__init__.py python/ppci/c3/analyse.py python/ppci/c3/astnodes.py python/ppci/c3/builder.py python/ppci/c3/codegenerator.py python/ppci/c3/parser.py python/ppci/c3/scope.py python/ppci/c3/visitor.py python/ppci/codegen/canon.py python/ppci/ir.py python/ppci/irutils.py test/grind.py test/testc3.py test/testhexfile.py test/testir.py |
diffstat | 16 files changed, 440 insertions(+), 601 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/pi/add.pi Thu Dec 12 20:42:56 2013 +0100 @@ -0,0 +1,20 @@ + +function i32 add(i32 a, i32 b) + init: + i32 c = a + b + return c + +function void test() + init: + a = 2 + cjmp a > 3 L1 L2 + L1: + i32 b1 = 3 + jmp L3 + L2: + i32 b2 = 6 + a + jmp L3 + L3: + b = phi i32 [b2, L2], [b1, L1] + return b +
--- a/python/ppci/c3/__init__.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/c3/__init__.py Thu Dec 12 20:42:56 2013 +0100 @@ -2,8 +2,6 @@ from .parser import Parser from .lexer import Lexer -from .analyse import Analyzer -from .analyse import TypeChecker from .codegenerator import CodeGenerator from .visitor import Visitor from .visitor import AstPrinter
--- a/python/ppci/c3/analyse.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/c3/analyse.py Thu Dec 12 20:42:56 2013 +0100 @@ -15,20 +15,20 @@ self.diag.error(msg, loc) def visit(self, pkg, pre, post): - self.pkg = pkg self.visitor.visit(pkg, pre, post) - self.pkg = None -class AddScope(C3Pass): +class ScopeFiller(C3Pass): scoped_types = [Package, Function] - def __init__(self, diag, topScope): + def __init__(self, diag, topScope, packages): super().__init__(diag) self.topScope = topScope + self.packages = packages """ Scope is attached to the correct modules. """ def addScope(self, pkg): self.logger.info('Adding scoping to package {}'.format(pkg.name)) + self.pkg = pkg # Prepare top level scope and set scope to all objects: self.scopeStack = [self.topScope] modScope = Scope(self.CurrentScope) @@ -36,6 +36,14 @@ self.visit(pkg, self.enterScope, self.quitScope) assert len(self.scopeStack) == 2 + self.logger.info('Resolving imports for package {}'.format(pkg.name)) + # Handle imports: + for i in pkg.imports: + if i not in self.packages: + self.error('Cannot import {}'.format(i)) + continue + pkg.scope.addSymbol(self.packages[i]) + @property def CurrentScope(self): return self.scopeStack[-1] @@ -47,14 +55,16 @@ self.CurrentScope.addSymbol(sym) def enterScope(self, sym): - # Distribute the scope: - sym.scope = self.CurrentScope + # Attach scope to references: + if type(sym) is Identifier: + sym.scope = self.CurrentScope # Add symbols to current scope: if isinstance(sym, Symbol): self.addSymbol(sym) + sym.scope = self.CurrentScope - # Create subscope: + # Create subscope for items creating a scope: if type(sym) in self.scoped_types: newScope = Scope(self.CurrentScope) self.scopeStack.append(newScope) @@ -64,340 +74,3 @@ # Pop out of scope: if type(sym) in self.scoped_types: self.scopeStack.pop(-1) - - -class Analyzer(C3Pass): - """ - Context handling is done here. - Scope is attached to the correct modules. - This class checks names and references. - """ - - def analyzePackage(self, pkg, packageDict): - # Prepare top level scope and set scope to all objects: - self.pkg = pkg - - self.logger.info('Resolving imports for package {}'.format(pkg.name)) - # Handle imports: - for i in pkg.imports: - if i not in packageDict: - self.error('Cannot import {}'.format(i)) - continue - ip = packageDict[i] - pkg.scope.addSymbol(ip) - fr = FixRefs(self.diag) - fr.fixRefs(pkg) - - -class FixRefs(C3Pass): - def fixRefs(self, pkg): - self.logger.info('Resolving references for {}'.format(pkg.name)) - self.pkg = pkg - - -# Type checking: - -def theType(t): - """ Recurse until a 'real' type is found """ - if type(t) is DefinedType: - return theType(t.typ) - elif type(t) is Identifier: - return theType(resolveSymbol(t)) - return t - - -def equalTypes(a, b): - """ Compare types a and b for structural equavalence. """ - # Recurse into named types: - a = theType(a) - b = theType(b) - assert isinstance(a, Type) - assert isinstance(b, Type) - - if type(a) is type(b): - if type(a) is BaseType: - return a.name == b.name - elif type(a) is PointerType: - return equalTypes(a.ptype, b.ptype) - elif type(a) is StructureType: - if len(a.mems) != len(b.mems): - return False - return all(equalTypes(am.typ, bm.typ) for am, bm in - zip(a.mems, b.mems)) - else: - raise NotImplementedError('{} not implemented'.format(type(a))) - return False - - -def canCast(fromT, toT): - fromT = theType(fromT) - toT = theType(toT) - if isinstance(fromT, PointerType) and isinstance(toT, PointerType): - return True - elif type(fromT) is BaseType and fromT.name == 'int' and \ - isinstance(toT, PointerType): - return True - return False - - -def expectRval(s): - # TODO: solve this better - s.expect_rvalue = True - -# Reference fixups: -def resolveDesignator(self, d, scope): - assert isinstance(d, Designator), type(d) - assert type(scope) is Scope - if d.tname in scope: - s = scope[d.tname] - if isinstance(d, ImportDesignator): - if s.innerScope.hasSymbol(d.vname): - return s.innerScope.getSymbol(d.vname) - else: - s.addRef(None) # TODO: fix this? - return s - self.error('Cannot resolve name {0}'.format(d.tname), d.loc) - - -def findRefs(self, sym): - if type(sym) is Constant or isinstance(sym, Variable): - sym.typ = self.resolveType(sym.typ, sym.scope) - elif type(sym) is TypeCast: - sym.to_type = self.resolveType(sym.to_type, sym.scope) - elif type(sym) is VariableUse: - sym.target = self.resolveDesignator(sym.target, sym.scope) - elif type(sym) is FunctionCall: - varuse = sym.proc - if type(varuse) is VariableUse: - sym.proc = self.resolveDesignator(varuse.target, sym.scope) - elif type(sym) is Function: - # Checkup function type: - ft = sym.typ - ft.returntype = self.resolveType(ft.returntype, sym.scope) - ft.parametertypes = [self.resolveType(pt, sym.scope) for pt in - ft.parametertypes] - # Mark local variables: - for d in sym.declarations: - if isinstance(d, Variable): - d.isLocal = True - elif type(sym) is DefinedType: - sym.typ = self.resolveType(sym.typ, sym.scope) - - -def resolveSymbol(sym): - name = sym.target - scope = sym.scope - return scope[name] - - -class TypeChecker(C3Pass): - """ Checks if all types are correctly used """ - def checkPackage(self, pkg): - self.logger.info('Type checking {}'.format(pkg.name)) - self.pkg = pkg - AstPrinter().printAst(pkg) - # Prepare some standard types: - self.intType = pkg.scope['int'] - self.boolType = pkg.scope['bool'] - for decl in pkg.declarations: - self.checkDecl(decl) - self.pkg.ok = False - - def checkDecl(self, decl): - if type(decl) is Function: - self.checkFunction(decl) - elif isinstance(decl, Variable): - pass - elif isinstance(decl, Type): - self.checkType(decl) - else: - raise Exception(str(decl)) - - def checkType(self, t): - if type(t) is PointerType: - self.checkType(t.ptype) - elif type(t) is StructureType: - offset = 0 - for mem in t.mems: - mem.offset = offset - self.checkType(mem.typ) - offset += theType(mem.typ).bytesize - t.bytesize = offset - elif type(t) is DefinedType: - self.checkType(t.typ) - elif isinstance(t, Identifier): - pass - elif isinstance(t, Member): - pass - else: - raise Exception('Error resolving type {} {}'.format(t, type(t))) - - def checkVariable(self, v): - self.checkType(v.typ) - - def checkFunction(self, f): - for decl in f.declarations: - self.checkDecl(decl) - self.checkStmt(f.body) - - - def checkStmt(self, sym): - assert isinstance(sym, Statement) - if type(sym) is IfStatement: - self.checkExpr(sym.condition) - if not equalTypes(sym.condition.typ, self.boolType): - msg = 'Condition must be boolean' - self.error(msg, sym.condition.loc) - self.checkStmt(sym.truestatement) - self.checkStmt(sym.falsestatement) - elif type(sym) is WhileStatement: - self.checkExpr(sym.condition) - if not equalTypes(sym.condition.typ, self.boolType): - msg = 'Condition must be boolean' - self.error(msg, sym.condition.loc) - self.checkStmt(sym.statement) - elif type(sym) is Assignment: - self.checkExpr(sym.lval) - self.checkExpr(sym.rval) - l, r = sym.lval, sym.rval - if not equalTypes(l.typ, r.typ): - msg = 'Cannot assign {} to {}'.format(r.typ, l.typ) - self.error(msg, sym.loc) - if not l.lvalue: - self.error('No valid lvalue {}'.format(l), l.loc) - #if sym.rval.lvalue: - # self.error('Right hand side must be an rvalue', sym.rval.loc) - expectRval(sym.rval) - elif type(sym) is ReturnStatement: - pass - elif type(sym) is EmptyStatement: - pass - elif type(sym) is ExpressionStatement: - self.checkExpr(sym.ex) - elif type(sym) is CompoundStatement: - for s in sym.statements: - self.checkStmt(s) - else: - raise Exception(str(sym)) - - def checkExpr(self, sym): - assert isinstance(sym, Expression) - if type(sym) is FunctionCall: - # Check arguments: - for arg in sym.args: - self.checkExpr(arg) - self.checkExpr(sym.proc) - ngiv = len(sym.args) - # tg = - ptypes = sym.proc.typ.parametertypes - nreq = len(ptypes) - if ngiv != nreq: - self.error('Function {2}: {0} arguments required, {1} given' - .format(nreq, ngiv, sym.proc.name), sym.loc) - else: - for a, at in zip(sym.args, ptypes): - expectRval(a) - if not equalTypes(a.typ, at): - self.error('Got {0}, expected {1}' - .format(a.typ, at), a.loc) - # determine return type: - sym.typ = sym.proc.typ.returntype - elif type(sym) is Identifier: - sym.lvalue = True - tg = resolveSymbol(sym) - sym.kind = type(tg) - sym.typ = tg.typ - elif type(sym) is Literal: - sym.lvalue = False - typemap = {int: 'int', float: 'double', bool: 'bool'} - if type(sym.val) in typemap: - sym.typ = self.pkg.scope[typemap[type(sym.val)]] - else: - raise Exception('Unknown literal type'.format(sym.val)) - elif type(sym) is Unop: - if sym.op == '&': - sym.typ = PointerType(sym.a.typ) - sym.lvalue = False - else: - raise Exception('Unknown unop {0}'.format(sym.op)) - elif type(sym) is Deref: - # pointer deref - sym.lvalue = True - # check if the to be dereferenced variable is a pointer type: - self.checkExpr(sym.ptr) - ptype = theType(sym.ptr.typ) - if type(ptype) is PointerType: - sym.typ = ptype.ptype - else: - self.error('Cannot dereference non-pointer type {}' - .format(ptype), sym.loc) - sym.typ = self.intType - elif type(sym) is Member: - self.checkExpr(sym.base) - basetype = sym.base.typ - sym.lvalue = sym.base.lvalue - basetype = theType(basetype) - if type(basetype) is StructureType: - if basetype.hasField(sym.field): - sym.typ = basetype.fieldType(sym.field) - else: - self.error('{} does not contain field {}' - .format(basetype, sym.field), sym.loc) - sym.typ = self.intType - else: - self.error('Cannot select field {} of non-structure type {}' - .format(sym.field, basetype), sym.loc) - sym.typ = self.intType - elif type(sym) is Binop: - self.checkExpr(sym.a) - self.checkExpr(sym.b) - sym.lvalue = False - if sym.op in ['+', '-', '*', '/', '<<', '>>', '|', '&']: - expectRval(sym.a) - expectRval(sym.b) - if equalTypes(sym.a.typ, sym.b.typ): - if equalTypes(sym.a.typ, self.intType): - sym.typ = sym.a.typ - else: - self.error('Can only add integers', sym.loc) - sym.typ = self.intType - else: - # assume void here? TODO: throw exception! - sym.typ = self.intType - self.error('Types unequal {} != {}' - .format(sym.a.typ, sym.b.typ), sym.loc) - elif sym.op in ['>', '<', '==', '<=', '>=', '!=']: - expectRval(sym.a) - expectRval(sym.b) - sym.typ = self.boolType - if not equalTypes(sym.a.typ, sym.b.typ): - self.error('Types unequal {} != {}' - .format(sym.a.typ, sym.b.typ), sym.loc) - elif sym.op in ['or', 'and']: - sym.typ = self.boolType - if not equalTypes(sym.a.typ, self.boolType): - self.error('Must be boolean', sym.a.loc) - if not equalTypes(sym.b.typ, self.boolType): - self.error('Must be boolean', sym.b.loc) - else: - raise Exception('Unknown binop {0}'.format(sym.op)) - elif isinstance(sym, Variable): - pass - elif type(sym) is TypeCast: - self.checkExpr(sym.a) - self.checkType(sym.to_type) - if canCast(sym.a.typ, sym.to_type): - sym.typ = sym.to_type - else: - self.error('Cannot cast {} to {}' - .format(sym.a.typ, sym.to_type), sym.loc) - sym.typ = self.intType - elif type(sym) is Constant: - if not equalTypes(sym.typ, sym.value.typ): - self.error('Cannot assign {0} to {1}' - .format(sym.value.typ, sym.typ), sym.loc) - elif type(sym) in [CompoundStatement, Package, Function, FunctionType, - ExpressionStatement, DefinedType, EmptyStatement]: - pass - else: - raise NotImplementedError('Unknown type check {0}'.format(sym))
--- a/python/ppci/c3/astnodes.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/c3/astnodes.py Thu Dec 12 20:42:56 2013 +0100 @@ -284,7 +284,7 @@ self.loc = loc -class EmptyStatement(Statement): +class Empty(Statement): """ Empty statement which does nothing! """ def __init__(self): super().__init__(None) @@ -293,7 +293,7 @@ return 'NOP' -class CompoundStatement(Statement): +class Compound(Statement): """ Statement consisting of a sequence of other statements """ def __init__(self, statements): super().__init__(None) @@ -305,7 +305,7 @@ return 'COMPOUND STATEMENT' -class ReturnStatement(Statement): +class Return(Statement): def __init__(self, expr, loc): super().__init__(loc) self.expr = expr @@ -335,7 +335,7 @@ return 'Epression' -class IfStatement(Statement): +class If(Statement): def __init__(self, condition, truestatement, falsestatement, loc): super().__init__(loc) self.condition = condition @@ -346,7 +346,7 @@ return 'IF-statement' -class WhileStatement(Statement): +class While(Statement): def __init__(self, condition, statement, loc): super().__init__(loc) self.condition = condition
--- a/python/ppci/c3/builder.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/c3/builder.py Thu Dec 12 20:42:56 2013 +0100 @@ -1,9 +1,8 @@ import logging from .lexer import Lexer from .parser import Parser -from .analyse import TypeChecker, Analyzer from .codegenerator import CodeGenerator -from .analyse import AddScope +from .analyse import ScopeFiller from .scope import createTopScope from .visitor import AstPrinter @@ -18,9 +17,7 @@ self.diag = diag self.lexer = Lexer(diag) self.parser = Parser(diag) - self.tc = TypeChecker(diag) - self.al = Analyzer(diag) - self.cg = CodeGenerator() + self.cg = CodeGenerator(diag) self.topScope = createTopScope(target) # Scope with built in types def build(self, srcs, imps=[]): @@ -31,6 +28,7 @@ self.ok = True self.pkgs = {} + # Parsing stage (phase 1) def doParse(src): tokens = self.lexer.lex(src) return self.parser.parseSource(tokens) @@ -41,30 +39,22 @@ self.ok = False return - #for pkg in all_pkgs: - # AstPrinter().printAst(pkg) - + # Fix scopes and package refs (phase 1.5) packages = {pkg.name: pkg for pkg in all_pkgs} self.pkgs = packages + + scopeFiller = ScopeFiller(self.diag, self.topScope, packages) # Fix scopes: for pkg in all_pkgs: - AddScope(self.diag, self.topScope).addScope(pkg) + scopeFiller.addScope(pkg) if not all(pkg.ok for pkg in all_pkgs): self.ok = False return - for pkg in all_pkgs: - self.al.analyzePackage(pkg, packages) + # Generate intermediate code (phase 2) + # Only return ircode when everything is OK + for pkg in all_pkgs & s_pkgs: + yield self.cg.gencode(pkg) if not all(pkg.ok for pkg in all_pkgs): self.ok = False return - - for pkg in all_pkgs: - self.tc.checkPackage(pkg) - if not all(pkg.ok for pkg in all_pkgs): - self.ok = False - return - - # Only return ircode when everything is OK - for pkg in all_pkgs & s_pkgs: - yield self.cg.gencode(pkg)
--- a/python/ppci/c3/codegenerator.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/c3/codegenerator.py Thu Dec 12 20:42:56 2013 +0100 @@ -1,11 +1,23 @@ import logging from .. import ir -from . import astnodes +from .. import irutils +from .astnodes import Symbol, Package, Variable, Function +from .astnodes import Statement, Empty, Compound, If, While, Assignment +from .astnodes import ExpressionStatement, Return +from .astnodes import Expression, Binop, Unop, Identifier, Deref, Member +from .astnodes import Expression, FunctionCall, Literal, TypeCast +from .astnodes import Type, DefinedType, BaseType, PointerType, StructureType + from ppci import CompilerError -from .analyse import theType -class CodeGenerator(ir.Builder): +class SemanticError(Exception): + def __init__(self, msg, loc): + self.msg = msg + self.loc = loc + + +class CodeGenerator(irutils.Builder): """ Generates intermediate (IR) code from a package. The entry function is 'genModule'. The main task of this part is to rewrite complex control @@ -13,13 +25,19 @@ jump statements. Also complex conditional statements are simplified. Such as 'and' and 'or' statements are rewritten in conditional jumps. And structured datatypes are rewritten. + + Type checking is done in one run with code generation. """ - def __init__(self): + def __init__(self, diag): self.logger = logging.getLogger('c3cgen') + self.diag = diag def gencode(self, pkg): self.prepare() - assert type(pkg) is astnodes.Package + assert type(pkg) is Package + self.pkg = pkg + self.intType = pkg.scope['int'] + self.boolType = pkg.scope['bool'] self.logger.info('Generating ir-code for {}'.format(pkg.name)) self.varMap = {} # Maps variables to storage locations. self.funcMap = {} @@ -27,16 +45,27 @@ self.genModule(pkg) return self.m + def error(self, msg, loc=None): + self.pkg.ok = False + self.diag.error(msg, loc) + # inner helpers: def genModule(self, pkg): # Take care of forward declarations: - for s in pkg.innerScope.Functions: - f = self.newFunction(s.name) - self.funcMap[s] = f - for v in pkg.innerScope.Variables: - self.varMap[v] = self.newTemp() - for s in pkg.innerScope.Functions: - self.genFunction(s) + try: + for s in pkg.innerScope.Functions: + f = self.newFunction(s.name) + self.funcMap[s] = f + for v in pkg.innerScope.Variables: + self.varMap[v] = self.newTemp() + for s in pkg.innerScope.Functions: + self.genFunction(s) + except SemanticError as e: + self.error(e.msg, e.loc) + + def checkType(self, t): + """ Verify the type is correct """ + t = self.theType(t) def genFunction(self, fn): # TODO: handle arguments @@ -51,15 +80,20 @@ for sym in fn.innerScope: # TODO: handle parameters different if sym.isParameter: + self.checkType(sym.typ) v = ir.Parameter(sym.name) f.addParameter(v) elif sym.isLocal: + self.checkType(sym.typ) + v = ir.LocalVariable(sym.name) + f.addLocal(v) + elif isinstance(sym, Variable): + self.checkType(sym.typ) v = ir.LocalVariable(sym.name) f.addLocal(v) else: #v = self.newTemp() raise NotImplementedError('{}'.format(sym)) - # TODO: make this ssa here?? self.varMap[sym] = v self.genCode(fn.body) @@ -70,20 +104,31 @@ self.setFunction(None) def genCode(self, code): - assert isinstance(code, astnodes.Statement) + try: + self.genStmt(code) + except SemanticError as e: + self.error(e.msg, e.loc) + + def genStmt(self, code): + assert isinstance(code, Statement) self.setLoc(code.loc) - if type(code) is astnodes.CompoundStatement: + if type(code) is Compound: for s in code.statements: self.genCode(s) - elif type(code) is astnodes.EmptyStatement: + elif type(code) is Empty: pass - elif type(code) is astnodes.Assignment: - rval = self.genExprCode(code.rval) + elif type(code) is Assignment: lval = self.genExprCode(code.lval) + rval = self.genExprCode(code.rval) + if not self.equalTypes(code.lval.typ, code.rval.typ): + msg = 'Cannot assign {} to {}'.format(code.lval.typ, code.rval.typ) + raise SemanticError(msg, code.loc) + if not code.lval.lvalue: + raise SemanticError('No valid lvalue {}'.format(code.lval), code.lval.loc) self.emit(ir.Move(lval, rval)) - elif type(code) is astnodes.ExpressionStatement: + elif type(code) is ExpressionStatement: self.emit(ir.Exp(self.genExprCode(code.ex))) - elif type(code) is astnodes.IfStatement: + elif type(code) is If: bbtrue = self.newBlock() bbfalse = self.newBlock() te = self.newBlock() @@ -95,13 +140,13 @@ self.genCode(code.falsestatement) self.emit(ir.Jump(te)) self.setBlock(te) - elif type(code) is astnodes.ReturnStatement: + elif type(code) is Return: re = self.genExprCode(code.expr) self.emit(ir.Move(self.fn.return_value, re)) self.emit(ir.Jump(self.fn.epiloog)) b = self.newBlock() self.setBlock(b) - elif type(code) is astnodes.WhileStatement: + elif type(code) is While: bbdo = self.newBlock() bbtest = self.newBlock() te = self.newBlock() @@ -117,78 +162,218 @@ def genCondCode(self, expr, bbtrue, bbfalse): # Implement sequential logical operators - if type(expr) is astnodes.Binop: + if type(expr) is Binop: if expr.op == 'or': l2 = self.newBlock() self.genCondCode(expr.a, bbtrue, l2) + if not equalTypes(expr.a.typ, self.boolType): + raise SemanticError('Must be boolean', expr.a.loc) self.setBlock(l2) self.genCondCode(expr.b, bbtrue, bbfalse) + if not equalTypes(expr.b.typ, self.boolType): + raise SemanticError('Must be boolean', expr.b.loc) elif expr.op == 'and': l2 = self.newBlock() self.genCondCode(expr.a, l2, bbfalse) + if not equalTypes(expr.a.typ, self.boolType): + self.error('Must be boolean', expr.a.loc) self.setBlock(l2) self.genCondCode(expr.b, bbtrue, bbfalse) + if not equalTypes(expr.b.typ, self.boolType): + raise SemanticError('Must be boolean', expr.b.loc) elif expr.op in ['==', '>', '<', '!=', '<=', '>=']: ta = self.genExprCode(expr.a) tb = self.genExprCode(expr.b) + if not self.equalTypes(expr.a.typ, expr.b.typ): + raise SemanticError('Types unequal {} != {}' + .format(expr.a.typ, expr.b.typ), expr.loc) self.emit(ir.CJump(ta, expr.op, tb, bbtrue, bbfalse)) else: raise NotImplementedError('Unknown condition {}'.format(expr)) - elif type(expr) is astnodes.Literal: + expr.typ = self.boolType + elif type(expr) is Literal: if expr.val: self.emit(ir.Jump(bbtrue)) else: self.emit(ir.Jump(bbfalse)) + expr.typ = self.boolType else: raise NotImplementedError('Unknown cond {}'.format(expr)) + if not self.equalTypes(expr.typ, self.boolType): + self.error('Condition must be boolean', expr.loc) def genExprCode(self, expr): - assert isinstance(expr, astnodes.Expression) - if type(expr) is astnodes.Binop and expr.op in ir.Binop.ops: - ra = self.genExprCode(expr.a) - rb = self.genExprCode(expr.b) + assert isinstance(expr, Expression) + if type(expr) is Binop: + expr.lvalue = False + if expr.op in ['+', '-', '*', '/', '<<', '>>', '|', '&']: + ra = self.genExprCode(expr.a) + rb = self.genExprCode(expr.b) + if self.equalTypes(expr.a.typ, self.intType) and \ + self.equalTypes(expr.b.typ, self.intType): + expr.typ = expr.a.typ + else: + # assume void here? TODO: throw exception! + raise SemanticError('Can only add integers', expr.loc) + else: + raise NotImplementedError("Cannot use equality as expressions") return ir.Binop(ra, expr.op, rb) - elif type(expr) is astnodes.Unop and expr.op == '&': - ra = self.genExprCode(expr.a) - assert type(ra) is ir.Mem - return ra.e - elif type(expr) is astnodes.VariableUse: + elif type(expr) is Unop: + if expr.op == '&': + ra = self.genExprCode(expr.a) + expr.typ = PointerType(expr.a.typ) + if not expr.a.lvalue: + raise SemanticError('No valid lvalue', expr.a.loc) + expr.lvalue = False + assert type(ra) is ir.Mem + return ra.e + else: + raise NotImplementedError('Unknown unop {0}'.format(expr.op)) + elif type(expr) is Identifier: + # Generate code for this identifier. + expr.lvalue = True + tg = self.resolveSymbol(expr) + expr.kind = type(tg) + expr.typ = tg.typ # This returns the dereferenced variable. - if expr.target.isParameter: + if type(tg) is Variable: # TODO: now parameters are handled different. Not nice? - return self.varMap[expr.target] + return ir.Mem(self.varMap[tg]) else: - return ir.Mem(self.varMap[expr.target]) - elif type(expr) is astnodes.Deref: + return ir.Mem(self.varMap[tg]) + elif type(expr) is Deref: # dereference pointer type: addr = self.genExprCode(expr.ptr) - return ir.Mem(addr) - elif type(expr) is astnodes.FieldRef: + ptr_typ = self.theType(expr.ptr.typ) + expr.lvalue = True + if type(ptr_typ) is PointerType: + expr.typ = ptr_typ.ptype + return ir.Mem(addr) + else: + raise SemanticError('Cannot deref non-pointer', expr.loc) + expr.typ = self.intType + return ir.Mem(ir.Const(0)) + elif type(expr) is Member: base = self.genExprCode(expr.base) + expr.lvalue = expr.base.lvalue + basetype = self.theType(expr.base.typ) + if type(basetype) is StructureType: + if basetype.hasField(expr.field): + expr.typ = basetype.fieldType(expr.field) + else: + raise SemanticError('{} does not contain field {}' + .format(basetype, expr.field), expr.loc) + else: + raise SemanticError('Cannot select field {} of non-structure type {}' + .format(sym.field, basetype), sym.loc) + assert type(base) is ir.Mem, type(base) base = base.e - bt = theType(expr.base.typ) + bt = self.theType(expr.base.typ) offset = ir.Const(bt.fieldOffset(expr.field)) return ir.Mem(ir.Add(base, offset)) - elif type(expr) is astnodes.Literal: + elif type(expr) is Literal: + expr.lvalue = False + typemap = {int: 'int', float: 'double', bool: 'bool'} + if type(expr.val) in typemap: + expr.typ = self.pkg.scope[typemap[type(expr.val)]] + else: + raise SemanticError('Unknown literal type {}'.format(expr.val)) return ir.Const(expr.val) - elif type(expr) is astnodes.TypeCast: + elif type(expr) is TypeCast: # TODO: improve this mess: ar = self.genExprCode(expr.a) - tt = theType(expr.to_type) - if isinstance(tt, astnodes.PointerType): - if expr.a.typ is expr.scope['int']: - return ar - elif isinstance(expr.a.typ, astnodes.PointerType): - return ar - else: - raise Exception() + ft = self.theType(expr.a.typ) + tt = self.theType(expr.to_type) + if isinstance(ft, PointerType) and isinstance(tt, PointerType): + expr.typ = expr.to_type + return ar + elif type(ft) is BaseType and ft.name == 'int' and \ + isinstance(tt, PointerType): + expr.typ = expr.to_type + return ar else: - raise NotImplementedError("not implemented") - elif type(expr) is astnodes.FunctionCall: + self.error('Cannot cast {} to {}' + .format(ft, tt), expr.loc) + expr.typ = self.intType + return ir.Const(0) + elif type(expr) is FunctionCall: + # Evaluate the arguments: args = [self.genExprCode(e) for e in expr.args] - #fn = self.funcMap[expr.proc] - fn = expr.proc.name - return ir.Call(fn, args) + # Check arguments: + if type(expr.proc) is Identifier: + tg = self.resolveSymbol(expr.proc) + else: + raise Exception() + assert type(tg) is Function + ftyp = tg.typ + fname = tg.name + ptypes = ftyp.parametertypes + if len(expr.args) != len(ptypes): + raise SemanticError('Function {2}: {0} arguments required, {1} given' + .format(len(ptypes), len(expr.args), fname), expr.loc) + for arg, at in zip(expr.args, ptypes): + if not self.equalTypes(arg.typ, at): + raise SemanticError('Got {0}, expected {1}' + .format(arg.typ, at), arg.loc) + # determine return type: + expr.typ = ftyp.returntype + return ir.Call(fname, args) else: raise NotImplementedError('Unknown expr {}'.format(expr)) + + def resolveSymbol(self, sym): + assert type(sym) in [Identifier, Member] + if type(sym) is Member: + base = self.resolveSymbol(sym.base) + scope = base.innerScope + name = sym.field + elif type(sym) is Identifier: + scope = sym.scope + name = sym.target + else: + raise NotImplementedError(str(sym)) + if name in scope: + s = scope[name] + else: + raise SemanticError('{} undefined'.format(name), sym.loc) + assert isinstance(s, Symbol) + return s + + def theType(self, t): + """ Recurse until a 'real' type is found """ + if type(t) is DefinedType: + t = self.theType(t.typ) + elif type(t) is Identifier: + t = self.theType(self.resolveSymbol(t)) + elif type(t) is Member: + # Possible when using types of modules: + t = self.theType(self.resolveSymbol(t)) + elif isinstance(t, Type): + pass + else: + raise NotImplementedError(str(t)) + assert isinstance(t, Type) + return t + + def equalTypes(self, a, b): + """ Compare types a and b for structural equavalence. """ + # Recurse into named types: + a = self.theType(a) + b = self.theType(b) + assert isinstance(a, Type) + assert isinstance(b, Type) + + if type(a) is type(b): + if type(a) is BaseType: + return a.name == b.name + elif type(a) is PointerType: + return self.equalTypes(a.ptype, b.ptype) + elif type(a) is StructureType: + if len(a.mems) != len(b.mems): + return False + return all(self.equalTypes(am.typ, bm.typ) for am, bm in + zip(a.mems, b.mems)) + else: + raise NotImplementedError('{} not implemented'.format(type(a))) + return False
--- a/python/ppci/c3/parser.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/c3/parser.py Thu Dec 12 20:42:56 2013 +0100 @@ -1,8 +1,8 @@ import logging from ppci import CompilerError from .astnodes import Member, Literal, TypeCast, Unop, Binop -from .astnodes import Assignment, ExpressionStatement, CompoundStatement -from .astnodes import ReturnStatement, WhileStatement, IfStatement +from .astnodes import Assignment, ExpressionStatement, Compound +from .astnodes import Return, While, If, Empty from .astnodes import FunctionType, Function, FormalParameter from .astnodes import StructureType, DefinedType, PointerType from .astnodes import Constant, Variable @@ -10,7 +10,6 @@ from .astnodes import Package from .astnodes import Identifier from .astnodes import FunctionCall -from .astnodes import EmptyStatement class Parser: @@ -146,7 +145,7 @@ v.loc = name.loc self.addDeclaration(v) self.Consume(';') - return EmptyStatement() + return Empty() def parseConstDef(self): self.Consume('const') @@ -184,56 +183,56 @@ self.Consume(')') paramtypes = [p.typ for p in parameters] f.typ = FunctionType(paramtypes, returntype) - f.body = self.parseCompoundStatement() + f.body = self.parseCompound() self.currentPart = savePart - def parseIfStatement(self): + def parseIf(self): loc = self.Consume('if').loc self.Consume('(') condition = self.Expression() self.Consume(')') yes = self.Statement() - no = self.Statement() if self.hasConsumed('else') else EmptyStatement() - return IfStatement(condition, yes, no, loc) + no = self.Statement() if self.hasConsumed('else') else Empty() + return If(condition, yes, no, loc) - def parseWhileStatement(self): + def parseWhile(self): loc = self.Consume('while').loc self.Consume('(') condition = self.Expression() self.Consume(')') statements = self.Statement() - return WhileStatement(condition, statements, loc) + return While(condition, statements, loc) - def parseReturnStatement(self): + def parseReturn(self): loc = self.Consume('return').loc if self.Peak == ';': expr = Literal(0, loc) else: expr = self.Expression() self.Consume(';') - return ReturnStatement(expr, loc) + return Return(expr, loc) - def parseCompoundStatement(self): + def parseCompound(self): self.Consume('{') statements = [] while not self.hasConsumed('}'): statements.append(self.Statement()) - return CompoundStatement(statements) + return Compound(statements) def Statement(self): # Determine statement type based on the pending token: if self.Peak == 'if': - return self.parseIfStatement() + return self.parseIf() elif self.Peak == 'while': - return self.parseWhileStatement() + return self.parseWhile() elif self.Peak == '{': - return self.parseCompoundStatement() + return self.parseCompound() elif self.hasConsumed(';'): - return EmptyStatement() + return Empty() elif self.Peak == 'var': return self.parseVarDef() elif self.Peak == 'return': - return self.parseReturnStatement() + return self.parseReturn() else: x = self.UnaryExpression() if self.Peak == '=':
--- a/python/ppci/c3/scope.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/c3/scope.py Thu Dec 12 20:42:56 2013 +0100 @@ -1,4 +1,4 @@ -from .astnodes import Constant, Variable, Function, BaseType +from .astnodes import Constant, Variable, Function, BaseType, Symbol class Scope: @@ -54,6 +54,7 @@ def addSymbol(self, sym): assert sym.name not in self.symbols + assert isinstance(sym, Symbol) self.symbols[sym.name] = sym def __repr__(self):
--- a/python/ppci/c3/visitor.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/c3/visitor.py Thu Dec 12 20:42:56 2013 +0100 @@ -25,21 +25,24 @@ self.do(s) self.do(node.typ) self.do(node.body) - elif type(node) is CompoundStatement: + elif type(node) is Compound: for s in node.statements: self.do(s) - elif type(node) is IfStatement: + elif type(node) is If: self.do(node.condition) self.do(node.truestatement) self.do(node.falsestatement) + elif type(node) is While: + self.do(node.condition) + self.do(node.statement) + elif type(node) is Assignment: + self.do(node.lval) + self.do(node.rval) elif type(node) is FunctionCall: for arg in node.args: self.do(arg) self.do(node.proc) - elif type(node) is Assignment: - self.do(node.lval) - self.do(node.rval) - elif type(node) is ReturnStatement: + elif type(node) is Return: self.do(node.expr) elif type(node) is Binop: self.do(node.a) @@ -50,6 +53,7 @@ self.do(node.ex) elif type(node) is TypeCast: self.do(node.a) + self.do(node.to_type) elif type(node) is Member: self.do(node.base) elif type(node) is Deref: @@ -69,12 +73,9 @@ for pt in node.parametertypes: self.do(pt) self.do(node.returntype) - elif type(node) in [Identifier, Literal, EmptyStatement]: + elif type(node) in [Identifier, Literal, Empty]: # Those nodes do not have child nodes. pass - elif type(node) is WhileStatement: - self.do(node.condition) - self.do(node.statement) else: raise Exception('Could not visit "{0}"'.format(node))
--- a/python/ppci/codegen/canon.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/codegen/canon.py Thu Dec 12 20:42:56 2013 +0100 @@ -1,4 +1,5 @@ from .. import ir +from .. import irutils from itertools import chain def make(function, frame): @@ -44,7 +45,7 @@ else: raise NotImplementedError('STMT NI: {}'.format(stmt)) -newTemp = ir.NamedClassGenerator('canon_reg', ir.Temp).gen +newTemp = irutils.NamedClassGenerator('canon_reg', ir.Temp).gen def rewriteExp(exp, frame): if isinstance(exp, ir.Binop):
--- a/python/ppci/ir.py Mon Dec 09 19:00:21 2013 +0100 +++ b/python/ppci/ir.py Thu Dec 12 20:42:56 2013 +0100 @@ -2,25 +2,6 @@ Intermediate representation (IR) code classes. """ - -def dumpgv(m, outf): - print('digraph G ', file=outf) - print('{', file=outf) - for f in m.Functions: - print('{} [label="{}" shape=box3d]'.format(id(f), f), file=outf) - for bb in f.Blocks: - contents = str(bb) + '\n' - contents += '\n'.join([str(i) for i in bb.Instructions]) - print('{0} [shape=note label="{1}"];' - .format(id(bb), contents), file=outf) - for successor in bb.Successors: - print('"{}" -> "{}"'.format(id(bb), id(successor)), file=outf) - - print('"{}" -> "{}" [label="entry"]' - .format(id(f), id(f.entry)), file=outf) - print('}', file=outf) - - class Module: """ Container unit for variables and functions. """ def __init__(self, name): @@ -422,69 +403,3 @@ def __repr__(self): return 'IF {} {} {} THEN {} ELSE {}'\ .format(self.a, self.cond, self.b, self.lab_yes, self.lab_no) - - -# Constructing IR: - -class NamedClassGenerator: - def __init__(self, prefix, cls): - self.prefix = prefix - self.cls = cls - - def NumGen(): - a = 0 - while True: - yield a - a = a + 1 - self.nums = NumGen() - - def gen(self, prefix=None): - if not prefix: - prefix = self.prefix - return self.cls('{0}{1}'.format(prefix, self.nums.__next__())) - - -class Builder: - """ Base class for ir code generators """ - def __init__(self): - self.prepare() - - def prepare(self): - self.newTemp = NamedClassGenerator('reg', Temp).gen - self.newBlock2 = NamedClassGenerator('block', Block).gen - self.bb = None - self.m = None - self.fn = None - self.loc = None - - # Helpers: - def setModule(self, m): - self.m = m - - def newFunction(self, name): - f = Function(name) - self.m.addFunc(f) - return f - - def newBlock(self): - assert self.fn - b = self.newBlock2() - b.function = self.fn - return b - - def setFunction(self, f): - self.fn = f - self.bb = f.entry if f else None - - def setBlock(self, b): - self.bb = b - - def setLoc(self, l): - self.loc = l - - def emit(self, i): - assert isinstance(i, Statement) - i.debugLoc = self.loc - if not self.bb: - raise Exception('No basic block') - self.bb.addInstruction(i)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/ppci/irutils.py Thu Dec 12 20:42:56 2013 +0100 @@ -0,0 +1,88 @@ + +""" + Some utilities for ir-code. +""" +from .ir import Temp, Block, Function, Statement + +def dumpgv(m, outf): + print('digraph G ', file=outf) + print('{', file=outf) + for f in m.Functions: + print('{} [label="{}" shape=box3d]'.format(id(f), f), file=outf) + for bb in f.Blocks: + contents = str(bb) + '\n' + contents += '\n'.join([str(i) for i in bb.Instructions]) + print('{0} [shape=note label="{1}"];' + .format(id(bb), contents), file=outf) + for successor in bb.Successors: + print('"{}" -> "{}"'.format(id(bb), id(successor)), file=outf) + + print('"{}" -> "{}" [label="entry"]' + .format(id(f), id(f.entry)), file=outf) + print('}', file=outf) + + +# Constructing IR: + +class NamedClassGenerator: + def __init__(self, prefix, cls): + self.prefix = prefix + self.cls = cls + + def NumGen(): + a = 0 + while True: + yield a + a = a + 1 + self.nums = NumGen() + + def gen(self, prefix=None): + if not prefix: + prefix = self.prefix + return self.cls('{0}{1}'.format(prefix, self.nums.__next__())) + + +class Builder: + """ Base class for ir code generators """ + def __init__(self): + self.prepare() + + def prepare(self): + self.newTemp = NamedClassGenerator('reg', Temp).gen + self.newBlock2 = NamedClassGenerator('block', Block).gen + self.bb = None + self.m = None + self.fn = None + self.loc = None + + # Helpers: + def setModule(self, m): + self.m = m + + def newFunction(self, name): + f = Function(name) + self.m.addFunc(f) + return f + + def newBlock(self): + assert self.fn + b = self.newBlock2() + b.function = self.fn + return b + + def setFunction(self, f): + self.fn = f + self.bb = f.entry if f else None + + def setBlock(self, b): + self.bb = b + + def setLoc(self, l): + self.loc = l + + def emit(self, i): + assert isinstance(i, Statement) + i.debugLoc = self.loc + if not self.bb: + raise Exception('No basic block') + self.bb.addInstruction(i)
--- a/test/grind.py Mon Dec 09 19:00:21 2013 +0100 +++ b/test/grind.py Thu Dec 12 20:42:56 2013 +0100 @@ -3,6 +3,10 @@ import cProfile import unittest import pstats +import sys +import os + +sys.path.insert(0, os.path.join('..','python')) if __name__ == '__main__': suite = unittest.TestLoader().discover('.')
--- a/test/testc3.py Mon Dec 09 19:00:21 2013 +0100 +++ b/test/testc3.py Thu Dec 12 20:42:56 2013 +0100 @@ -146,10 +146,9 @@ a2 = b * a; c = a; - c = b > 1; } """ - self.expectErrors(snippet, [8, 9, 10]) + self.expectErrors(snippet, [8, 9]) def testExpression1(self): snippet = """ @@ -186,7 +185,6 @@ def testWhile(self): snippet = """ module tstwhile; - var int a; function void t() { var int i; @@ -194,9 +192,16 @@ while (i < 1054) { i = i + 3; - a = a + i; } + } + """ + self.expectOK(snippet) + def testWhile2(self): + snippet = """ + module tstwhile; + function void t() + { while(true) { } @@ -285,7 +290,7 @@ function void t() { var struct {int x, y;} a; - a.b.c(9); + a.x(9); } """ self.expectOK(snippet) @@ -326,12 +331,12 @@ { pa = 2; // type conflict pa = &a; - pa = &2; + pa = &2; // No valid lvalue &a = pa; // No valid lvalue **pa = 22; // Cannot deref int } """ - self.expectErrors(snippet, [6, 9, 10]) + self.expectErrors(snippet, [6, 8, 9, 10]) def testPointerTypeIr(self): snippet = """
--- a/test/testhexfile.py Mon Dec 09 19:00:21 2013 +0100 +++ b/test/testhexfile.py Thu Dec 12 20:42:56 2013 +0100 @@ -28,11 +28,13 @@ hf.addRegion(0xFFFE, bytes.fromhex('aabbcc')) self.saveload(hf) + @unittest.skip('Takes too long') def testSave4(self): hf = HexFile() hf.addRegion(0xF000, bytes.fromhex('ab')*0x10000) self.saveload(hf) + @unittest.skip('Takes too long') def testSave5(self): hf = HexFile() hf.addRegion(0xF003, bytes.fromhex('ab')*0x10000)
--- a/test/testir.py Mon Dec 09 19:00:21 2013 +0100 +++ b/test/testir.py Thu Dec 12 20:42:56 2013 +0100 @@ -3,12 +3,18 @@ import sys import ppci from ppci import ir +from ppci import irutils from ppci.transform import ConstantFolder class IrCodeTestCase(unittest.TestCase): + def testAdd(self): + v = ir.Add(ir.Const(1), ir.Const(2)) + + +class IrBuilderTestCase(unittest.TestCase): def setUp(self): - self.b = ir.Builder() + self.b = irutils.Builder() self.m = ir.Module('test') self.b.setModule(self.m) @@ -51,7 +57,7 @@ class ConstantFolderTestCase(unittest.TestCase): def setUp(self): - self.b = ir.Builder() + self.b = irutils.Builder() self.cf = ConstantFolder() self.m = ir.Module('test') self.b.setModule(self.m) @@ -77,56 +83,7 @@ v3 = ir.Add(v1, v2) -testsrc = """ -package test2; -function void tesssst(int henkie) -{ - var int a, b, cee; - a = 2 * 33 - 12; - b = a * 2 + 13; - a = b + a; - cee = a; - cee = cee * 2 + a + cee * 2; - if (cee + a > b and b *3 - a+8*b== 3*6-b) - { - var int x = a; - x = b * 2 - a; - a = x * x * (x + 22 - a); - } - else - { - a = b + a + (a + b); - } - var int y; - y = a - b * 53; -} -""" - -testsrc2 = """ -function int add2(int x, int y) -{ - var int res; - res = x + y + 2 - 7 + 2; - //if (y < 2) - //{ - // return y - 33; - //} - - res = res + (x + 2 * y) + (x + 2 * y) + (2*8) + (2*8); - - if (x > 13) - { - while (y > 1337) - { - res = res + 2; - y = y - 12; - } - } - return res; -} - -""" if __name__ == '__main__': unittest.main()