Mercurial > lcfOS
view python/ppci/c3/codegenerator.py @ 396:fb3c1f029b30
Added baselexer into c3 lexer
author | Windel Bouwman |
---|---|
date | Tue, 27 May 2014 22:19:32 +0200 |
parents | 988f3fb861e4 |
children |
line wrap: on
line source
import logging import struct from .. import ir from .. import irutils from . import astnodes as ast class SemanticError(Exception): """ Error thrown when a semantic issue is observed """ def __init__(self, msg, loc): super().__init__() self.msg = msg self.loc = loc class CodeGenerator: """ Generates intermediate (IR) code from a package. The entry function is 'genModule'. The main task of this part is to rewrite complex control structures, such as while and for loops into simple conditional jump statements. Also complex conditional statements are simplified. Such as 'and' and 'or' statements are rewritten in conditional jumps. And structured datatypes are rewritten. Type checking is done in one run with code generation. """ def __init__(self, diag): self.logger = logging.getLogger('c3cgen') self.builder = irutils.Builder() self.diag = diag def gencode(self, pkg): """ Generate code for a single module """ self.builder.prepare() assert type(pkg) is ast.Package self.pkg = pkg self.intType = pkg.scope['int'] self.boolType = pkg.scope['bool'] self.pointerSize = 4 self.logger.debug('Generating ir-code for {}'.format(pkg.name), extra={'c3_ast': pkg}) self.varMap = {} # Maps variables to storage locations. self.builder.m = ir.Module(pkg.name) try: for typ in pkg.Types: self.check_type(typ) # Only generate function if function contains a body: real_functions = list(filter( lambda f: f.body, pkg.Functions)) for v in pkg.innerScope.Variables: v2 = ir.GlobalVariable(v.name, ir.i32) self.varMap[v] = v2 if not v.isLocal: self.builder.m.add_variable(v2) for s in real_functions: self.gen_function(s) except SemanticError as e: self.error(e.msg, e.loc) if self.pkg.ok: return self.builder.m def error(self, msg, loc=None): self.pkg.ok = False self.diag.error(msg, loc) def gen_function(self, fn): # TODO: handle arguments f = self.builder.new_function(fn.name) f.return_value = self.builder.newTemp() self.builder.setFunction(f) l2 = self.builder.newBlock() self.builder.emit(ir.Jump(l2)) self.builder.setBlock(l2) # generate room for locals: for sym in fn.innerScope: self.check_type(sym.typ) if sym.isParameter: p = ir.Parameter(sym.name, ir.i32) variable = ir.LocalVariable(sym.name + '_copy', ir.i32) f.addParameter(p) f.addLocal(variable) # Move parameter into local copy: self.builder.emit(ir.Move(ir.Mem(variable), p)) elif sym.isLocal: variable = ir.LocalVariable(sym.name, ir.i32) f.addLocal(variable) elif isinstance(sym, ast.Variable): variable = ir.LocalVariable(sym.name, ir.i32) f.addLocal(variable) else: raise NotImplementedError('{}'.format(sym)) self.varMap[sym] = variable self.gen_stmt(fn.body) self.builder.emit(ir.Move(f.return_value, ir.Const(0))) self.builder.emit(ir.Jump(f.epiloog)) self.builder.setFunction(None) def gen_stmt(self, code): """ Generate code for a statement """ try: assert isinstance(code, ast.Statement) self.builder.setLoc(code.loc) if type(code) is ast.Compound: for s in code.statements: self.gen_stmt(s) elif type(code) is ast.Empty: pass elif type(code) is ast.Assignment: self.gen_assignment_stmt(code) elif type(code) is ast.ExpressionStatement: self.builder.emit(ir.Exp(self.gen_expr_code(code.ex))) elif type(code) is ast.If: self.gen_if_stmt(code) elif type(code) is ast.Return: re = self.gen_expr_code(code.expr) self.builder.emit(ir.Move(self.builder.fn.return_value, re)) self.builder.emit(ir.Jump(self.builder.fn.epiloog)) b = self.builder.newBlock() self.builder.setBlock(b) elif type(code) is ast.While: self.gen_while(code) elif type(code) is ast.For: self.gen_for_stmt(code) elif type(code) is ast.Switch: raise NotImplementedError('Unknown stmt {}'.format(code)) else: raise NotImplementedError('Unknown stmt {}'.format(code)) except SemanticError as e: self.error(e.msg, e.loc) def gen_assignment_stmt(self, code): """ Generate code for assignment statement """ lval = self.gen_expr_code(code.lval) rval = self.gen_expr_code(code.rval) if not self.equal_types(code.lval.typ, code.rval.typ): raise SemanticError('Cannot assign {} to {}' .format(code.rval.typ, code.lval.typ), code.loc) if not code.lval.lvalue: raise SemanticError('No valid lvalue {}'.format(code.lval), code.lval.loc) self.builder.emit(ir.Move(lval, rval)) def gen_if_stmt(self, code): """ Generate code for if statement """ true_block = self.builder.newBlock() bbfalse = self.builder.newBlock() te = self.builder.newBlock() self.gen_cond_code(code.condition, true_block, bbfalse) self.builder.setBlock(true_block) self.gen_stmt(code.truestatement) self.builder.emit(ir.Jump(te)) self.builder.setBlock(bbfalse) self.gen_stmt(code.falsestatement) self.builder.emit(ir.Jump(te)) self.builder.setBlock(te) def gen_while(self, code): """ Generate code for while statement """ bbdo = self.builder.newBlock() test_block = self.builder.newBlock() final_block = self.builder.newBlock() self.builder.emit(ir.Jump(test_block)) self.builder.setBlock(test_block) self.gen_cond_code(code.condition, bbdo, final_block) self.builder.setBlock(bbdo) self.gen_stmt(code.statement) self.builder.emit(ir.Jump(test_block)) self.builder.setBlock(final_block) def gen_for_stmt(self, code): """ Generate for statement code """ bbdo = self.builder.newBlock() test_block = self.builder.newBlock() final_block = self.builder.newBlock() self.gen_stmt(code.init) self.builder.emit(ir.Jump(test_block)) self.builder.setBlock(test_block) self.gen_cond_code(code.condition, bbdo, final_block) self.builder.setBlock(bbdo) self.gen_stmt(code.statement) self.gen_stmt(code.final) self.builder.emit(ir.Jump(test_block)) self.builder.setBlock(final_block) def gen_cond_code(self, expr, bbtrue, bbfalse): """ Generate conditional logic. Implement sequential logical operators. """ if type(expr) is ast.Binop: if expr.op == 'or': l2 = self.builder.newBlock() self.gen_cond_code(expr.a, bbtrue, l2) if not self.equal_types(expr.a.typ, self.boolType): raise SemanticError('Must be boolean', expr.a.loc) self.builder.setBlock(l2) self.gen_cond_code(expr.b, bbtrue, bbfalse) if not self.equal_types(expr.b.typ, self.boolType): raise SemanticError('Must be boolean', expr.b.loc) elif expr.op == 'and': l2 = self.builder.newBlock() self.gen_cond_code(expr.a, l2, bbfalse) if not self.equal_types(expr.a.typ, self.boolType): self.error('Must be boolean', expr.a.loc) self.builder.setBlock(l2) self.gen_cond_code(expr.b, bbtrue, bbfalse) if not self.equal_types(expr.b.typ, self.boolType): raise SemanticError('Must be boolean', expr.b.loc) elif expr.op in ['==', '>', '<', '!=', '<=', '>=']: ta = self.gen_expr_code(expr.a) tb = self.gen_expr_code(expr.b) if not self.equal_types(expr.a.typ, expr.b.typ): raise SemanticError('Types unequal {} != {}' .format(expr.a.typ, expr.b.typ), expr.loc) self.builder.emit(ir.CJump(ta, expr.op, tb, bbtrue, bbfalse)) else: raise SemanticError('non-bool: {}'.format(expr.op), expr.loc) expr.typ = self.boolType elif type(expr) is ast.Literal: self.gen_expr_code(expr) if expr.val: self.builder.emit(ir.Jump(bbtrue)) else: self.builder.emit(ir.Jump(bbfalse)) else: raise NotImplementedError('Unknown cond {}'.format(expr)) # Check that the condition is a boolean value: if not self.equal_types(expr.typ, self.boolType): self.error('Condition must be boolean', expr.loc) def gen_expr_code(self, expr): """ Generate code for an expression. Return the generated ir-value """ assert isinstance(expr, ast.Expression) if type(expr) is ast.Binop: expr.lvalue = False if expr.op in ['+', '-', '*', '/', '<<', '>>', '|', '&']: ra = self.gen_expr_code(expr.a) rb = self.gen_expr_code(expr.b) if self.equal_types(expr.a.typ, self.intType) and \ self.equal_types(expr.b.typ, self.intType): expr.typ = expr.a.typ elif self.equal_types(expr.b.typ, self.intType) and \ type(expr.a.typ) is ast.PointerType: # Special case for pointer arithmatic TODO: coerce! expr.typ = expr.a.typ else: raise SemanticError('Can only add integers', expr.loc) else: raise NotImplementedError("Cannot use equality as expressions") return ir.Binop(ra, expr.op, rb, "op", ir.i32) elif type(expr) is ast.Unop: if expr.op == '&': ra = self.gen_expr_code(expr.a) expr.typ = ast.PointerType(expr.a.typ) if not expr.a.lvalue: raise SemanticError('No valid lvalue', expr.a.loc) expr.lvalue = False assert type(ra) is ir.Mem return ra.e else: raise NotImplementedError('Unknown unop {0}'.format(expr.op)) elif type(expr) is ast.Identifier: # Generate code for this identifier. tg = self.resolveSymbol(expr) expr.kind = type(tg) expr.typ = tg.typ # This returns the dereferenced variable. if isinstance(tg, ast.Variable): expr.lvalue = True return ir.Mem(self.varMap[tg]) elif isinstance(tg, ast.Constant): c_val = self.gen_expr_code(tg.value) return self.evalConst(c_val) else: raise NotImplementedError(str(tg)) elif type(expr) is ast.Deref: # dereference pointer type: addr = self.gen_expr_code(expr.ptr) ptr_typ = self.the_type(expr.ptr.typ) expr.lvalue = True if type(ptr_typ) is ast.PointerType: expr.typ = ptr_typ.ptype return ir.Mem(addr) else: raise SemanticError('Cannot deref non-pointer', expr.loc) elif type(expr) is ast.Member: return self.gen_member_expr(expr) elif type(expr) is ast.Index: return self.gen_index_expr(expr) elif type(expr) is ast.Literal: return self.gen_literal_expr(expr) elif type(expr) is ast.TypeCast: return self.gen_type_cast(expr) elif type(expr) is ast.Sizeof: # The type of this expression is int: expr.typ = self.intType self.check_type(expr.query_typ) type_size = self.size_of(expr.query_typ) return ir.Const(type_size) elif type(expr) is ast.FunctionCall: return self.gen_function_call(expr) else: raise NotImplementedError('Unknown expr {}'.format(expr)) def gen_member_expr(self, expr): base = self.gen_expr_code(expr.base) expr.lvalue = expr.base.lvalue basetype = self.the_type(expr.base.typ) if type(basetype) is ast.StructureType: if basetype.hasField(expr.field): expr.typ = basetype.fieldType(expr.field) else: raise SemanticError('{} does not contain field {}' .format(basetype, expr.field), expr.loc) else: raise SemanticError('Cannot select {} of non-structure type {}' .format(expr.field, basetype), expr.loc) assert type(base) is ir.Mem, type(base) bt = self.the_type(expr.base.typ) offset = ir.Const(bt.fieldOffset(expr.field)) addr = ir.Add(base.e, offset, "mem_addr", ir.i32) return ir.Mem(addr) def gen_index_expr(self, expr): """ Array indexing """ base = self.gen_expr_code(expr.base) idx = self.gen_expr_code(expr.i) base_typ = self.the_type(expr.base.typ) if not isinstance(base_typ, ast.ArrayType): raise SemanticError('Cannot index non-array type {}' .format(base_typ), expr.base.loc) idx_type = self.the_type(expr.i.typ) if not self.equal_types(idx_type, self.intType): raise SemanticError('Index must be int not {}' .format(idx_type), expr.i.loc) assert type(base) is ir.Mem element_type = self.the_type(base_typ.element_type) element_size = self.size_of(element_type) expr.typ = base_typ.element_type expr.lvalue = True offset = ir.Mul(idx, ir.Const(element_size), "element_offset", ir.i32) addr = ir.Add(base.e, offset, "element_address", ir.i32) return ir.Mem(addr) def gen_literal_expr(self, expr): """ Generate code for literal """ expr.lvalue = False typemap = {int: 'int', float: 'double', bool: 'bool', str: 'string'} if type(expr.val) in typemap: expr.typ = self.pkg.scope[typemap[type(expr.val)]] else: raise SemanticError('Unknown literal type {}' .format(expr.val), expr.loc) # Construct correct const value: if type(expr.val) is str: cval = self.pack_string(expr.val) return ir.Addr(ir.Const(cval)) else: return ir.Const(expr.val) def pack_string(self, txt): """ Pack a string using 4 bytes length followed by text data """ length = struct.pack('<I', len(txt)) data = txt.encode('ascii') return length + data def gen_type_cast(self, expr): """ Generate code for type casting """ ar = self.gen_expr_code(expr.a) from_type = self.the_type(expr.a.typ) to_type = self.the_type(expr.to_type) if isinstance(from_type, ast.PointerType) and \ isinstance(to_type, ast.PointerType): expr.typ = expr.to_type return ar elif self.equal_types(self.intType, from_type) and \ isinstance(to_type, ast.PointerType): expr.typ = expr.to_type return ar elif self.equal_types(self.intType, to_type) \ and isinstance(from_type, ast.PointerType): expr.typ = expr.to_type return ar elif type(from_type) is ast.BaseType and from_type.name == 'byte' and \ type(to_type) is ast.BaseType and to_type.name == 'int': expr.typ = expr.to_type return ar else: raise SemanticError('Cannot cast {} to {}' .format(from_type, to_type), expr.loc) def gen_function_call(self, expr): """ Generate code for a function call """ # Evaluate the arguments: args = [self.gen_expr_code(e) for e in expr.args] # Check arguments: tg = self.resolveSymbol(expr.proc) if type(tg) is not ast.Function: raise SemanticError('cannot call {}'.format(tg)) ftyp = tg.typ fname = tg.package.name + '_' + tg.name ptypes = ftyp.parametertypes if len(expr.args) != len(ptypes): raise SemanticError('{} requires {} arguments, {} given' .format(fname, len(ptypes), len(expr.args)), expr.loc) for arg, at in zip(expr.args, ptypes): if not self.equal_types(arg.typ, at): raise SemanticError('Got {}, expected {}' .format(arg.typ, at), arg.loc) # determine return type: expr.typ = ftyp.returntype return ir.Call(fname, args) def evalConst(self, c): if isinstance(c, ir.Const): return c else: raise SemanticError('Cannot evaluate constant {}'.format(c)) def resolveSymbol(self, sym): if type(sym) is ast.Member: base = self.resolveSymbol(sym.base) if type(base) is not ast.Package: raise SemanticError('Base is not a package', sym.loc) scope = base.innerScope name = sym.field elif type(sym) is ast.Identifier: scope = sym.scope name = sym.target else: raise NotImplementedError(str(sym)) if name in scope: s = scope[name] else: raise SemanticError('{} undefined'.format(name), sym.loc) assert isinstance(s, ast.Symbol) return s def size_of(self, t): """ Determine the byte size of a type """ t = self.the_type(t) if type(t) is ast.BaseType: return t.bytesize elif type(t) is ast.StructureType: return sum(self.size_of(mem.typ) for mem in t.mems) elif type(t) is ast.ArrayType: return t.size * self.size_of(t.element_type) elif type(t) is ast.PointerType: return self.pointerSize else: raise NotImplementedError(str(t)) def the_type(self, t, reveil_defined=True): """ Recurse until a 'real' type is found When reveil_defined is True, defined types are resolved to their backing types. """ if type(t) is ast.DefinedType: if reveil_defined: t = self.the_type(t.typ) elif type(t) in [ast.Identifier, ast.Member]: t = self.the_type(self.resolveSymbol(t), reveil_defined) elif isinstance(t, ast.Type): pass else: raise NotImplementedError(str(t)) assert isinstance(t, ast.Type) return t def equal_types(self, a, b, byname=False): """ Compare types a and b for structural equavalence. if byname is True stop on defined types. """ # Recurse into named types: a = self.the_type(a, not byname) b = self.the_type(b, not byname) # Check types for sanity: self.check_type(a) self.check_type(b) # Do structural equivalence check: if type(a) is type(b): if type(a) is ast.BaseType: return a.name == b.name elif type(a) is ast.PointerType: # If a pointed type is detected, stop structural # equivalence: return self.equal_types(a.ptype, b.ptype, byname=True) elif type(a) is ast.StructureType: if len(a.mems) != len(b.mems): return False return all(self.equal_types(am.typ, bm.typ) for am, bm in zip(a.mems, b.mems)) elif type(a) is ast.ArrayType: return self.equal_types(a.element_type, b.element_type) elif type(a) is ast.DefinedType: # Try by name in case of defined types: return a.name == b.name else: raise NotImplementedError('{} not implemented'.format(type(a))) return False def check_type(self, t, first=True, byname=False): """ Determine struct offsets and check for recursiveness by using mark and sweep algorithm. The calling function could call this function with first set to clear the marks. """ # Reset the mark and sweep: if first: self.got_types = set() # Resolve the type: t = self.the_type(t, not byname) # Check for recursion: if t in self.got_types: raise SemanticError('Recursive data type {}'.format(t), None) if type(t) is ast.BaseType: pass elif type(t) is ast.PointerType: # If a pointed type is detected, stop structural # equivalence: self.check_type(t.ptype, first=False, byname=True) elif type(t) is ast.StructureType: self.got_types.add(t) # Setup offsets of fields. Is this the right place?: offset = 0 for struct_member in t.mems: self.check_type(struct_member.typ, first=False) struct_member.offset = offset offset = offset + self.size_of(struct_member.typ) elif type(t) is ast.ArrayType: self.check_type(t.element_type, first=False) elif type(t) is ast.DefinedType: pass else: raise NotImplementedError('{} not implemented'.format(type(t)))