comparison python/ppci/c3/codegenerator.py @ 307:e609d5296ee9

Massive rewrite of codegenerator
author Windel Bouwman
date Thu, 12 Dec 2013 20:42:56 +0100
parents b145f8e6050b
children 2e7f55319858
comparison
equal deleted inserted replaced
306:b145f8e6050b 307:e609d5296ee9
1 import logging 1 import logging
2 from .. import ir 2 from .. import ir
3 from . import astnodes 3 from .. import irutils
4 from .astnodes import Symbol, Package, Variable, Function
5 from .astnodes import Statement, Empty, Compound, If, While, Assignment
6 from .astnodes import ExpressionStatement, Return
7 from .astnodes import Expression, Binop, Unop, Identifier, Deref, Member
8 from .astnodes import Expression, FunctionCall, Literal, TypeCast
9 from .astnodes import Type, DefinedType, BaseType, PointerType, StructureType
10
4 from ppci import CompilerError 11 from ppci import CompilerError
5 from .analyse import theType 12
6 13
7 14 class SemanticError(Exception):
8 class CodeGenerator(ir.Builder): 15 def __init__(self, msg, loc):
16 self.msg = msg
17 self.loc = loc
18
19
20 class CodeGenerator(irutils.Builder):
9 """ 21 """
10 Generates intermediate (IR) code from a package. The entry function is 22 Generates intermediate (IR) code from a package. The entry function is
11 'genModule'. The main task of this part is to rewrite complex control 23 'genModule'. The main task of this part is to rewrite complex control
12 structures, such as while and for loops into simple conditional 24 structures, such as while and for loops into simple conditional
13 jump statements. Also complex conditional statements are simplified. 25 jump statements. Also complex conditional statements are simplified.
14 Such as 'and' and 'or' statements are rewritten in conditional jumps. 26 Such as 'and' and 'or' statements are rewritten in conditional jumps.
15 And structured datatypes are rewritten. 27 And structured datatypes are rewritten.
28
29 Type checking is done in one run with code generation.
16 """ 30 """
17 def __init__(self): 31 def __init__(self, diag):
18 self.logger = logging.getLogger('c3cgen') 32 self.logger = logging.getLogger('c3cgen')
33 self.diag = diag
19 34
20 def gencode(self, pkg): 35 def gencode(self, pkg):
21 self.prepare() 36 self.prepare()
22 assert type(pkg) is astnodes.Package 37 assert type(pkg) is Package
38 self.pkg = pkg
39 self.intType = pkg.scope['int']
40 self.boolType = pkg.scope['bool']
23 self.logger.info('Generating ir-code for {}'.format(pkg.name)) 41 self.logger.info('Generating ir-code for {}'.format(pkg.name))
24 self.varMap = {} # Maps variables to storage locations. 42 self.varMap = {} # Maps variables to storage locations.
25 self.funcMap = {} 43 self.funcMap = {}
26 self.m = ir.Module(pkg.name) 44 self.m = ir.Module(pkg.name)
27 self.genModule(pkg) 45 self.genModule(pkg)
28 return self.m 46 return self.m
29 47
48 def error(self, msg, loc=None):
49 self.pkg.ok = False
50 self.diag.error(msg, loc)
51
30 # inner helpers: 52 # inner helpers:
31 def genModule(self, pkg): 53 def genModule(self, pkg):
32 # Take care of forward declarations: 54 # Take care of forward declarations:
33 for s in pkg.innerScope.Functions: 55 try:
34 f = self.newFunction(s.name) 56 for s in pkg.innerScope.Functions:
35 self.funcMap[s] = f 57 f = self.newFunction(s.name)
36 for v in pkg.innerScope.Variables: 58 self.funcMap[s] = f
37 self.varMap[v] = self.newTemp() 59 for v in pkg.innerScope.Variables:
38 for s in pkg.innerScope.Functions: 60 self.varMap[v] = self.newTemp()
39 self.genFunction(s) 61 for s in pkg.innerScope.Functions:
62 self.genFunction(s)
63 except SemanticError as e:
64 self.error(e.msg, e.loc)
65
66 def checkType(self, t):
67 """ Verify the type is correct """
68 t = self.theType(t)
40 69
41 def genFunction(self, fn): 70 def genFunction(self, fn):
42 # TODO: handle arguments 71 # TODO: handle arguments
43 f = self.funcMap[fn] 72 f = self.funcMap[fn]
44 f.return_value = self.newTemp() 73 f.return_value = self.newTemp()
49 # generate room for locals: 78 # generate room for locals:
50 79
51 for sym in fn.innerScope: 80 for sym in fn.innerScope:
52 # TODO: handle parameters different 81 # TODO: handle parameters different
53 if sym.isParameter: 82 if sym.isParameter:
83 self.checkType(sym.typ)
54 v = ir.Parameter(sym.name) 84 v = ir.Parameter(sym.name)
55 f.addParameter(v) 85 f.addParameter(v)
56 elif sym.isLocal: 86 elif sym.isLocal:
87 self.checkType(sym.typ)
57 v = ir.LocalVariable(sym.name) 88 v = ir.LocalVariable(sym.name)
58 f.addLocal(v) 89 f.addLocal(v)
90 elif isinstance(sym, Variable):
91 self.checkType(sym.typ)
92 v = ir.LocalVariable(sym.name)
93 f.addLocal(v)
59 else: 94 else:
60 #v = self.newTemp() 95 #v = self.newTemp()
61 raise NotImplementedError('{}'.format(sym)) 96 raise NotImplementedError('{}'.format(sym))
62 # TODO: make this ssa here??
63 self.varMap[sym] = v 97 self.varMap[sym] = v
64 98
65 self.genCode(fn.body) 99 self.genCode(fn.body)
66 # Set the default return value to zero: 100 # Set the default return value to zero:
67 # TBD: this may not be required? 101 # TBD: this may not be required?
68 self.emit(ir.Move(f.return_value, ir.Const(0))) 102 self.emit(ir.Move(f.return_value, ir.Const(0)))
69 self.emit(ir.Jump(f.epiloog)) 103 self.emit(ir.Jump(f.epiloog))
70 self.setFunction(None) 104 self.setFunction(None)
71 105
72 def genCode(self, code): 106 def genCode(self, code):
73 assert isinstance(code, astnodes.Statement) 107 try:
108 self.genStmt(code)
109 except SemanticError as e:
110 self.error(e.msg, e.loc)
111
112 def genStmt(self, code):
113 assert isinstance(code, Statement)
74 self.setLoc(code.loc) 114 self.setLoc(code.loc)
75 if type(code) is astnodes.CompoundStatement: 115 if type(code) is Compound:
76 for s in code.statements: 116 for s in code.statements:
77 self.genCode(s) 117 self.genCode(s)
78 elif type(code) is astnodes.EmptyStatement: 118 elif type(code) is Empty:
79 pass 119 pass
80 elif type(code) is astnodes.Assignment: 120 elif type(code) is Assignment:
121 lval = self.genExprCode(code.lval)
81 rval = self.genExprCode(code.rval) 122 rval = self.genExprCode(code.rval)
82 lval = self.genExprCode(code.lval) 123 if not self.equalTypes(code.lval.typ, code.rval.typ):
124 msg = 'Cannot assign {} to {}'.format(code.lval.typ, code.rval.typ)
125 raise SemanticError(msg, code.loc)
126 if not code.lval.lvalue:
127 raise SemanticError('No valid lvalue {}'.format(code.lval), code.lval.loc)
83 self.emit(ir.Move(lval, rval)) 128 self.emit(ir.Move(lval, rval))
84 elif type(code) is astnodes.ExpressionStatement: 129 elif type(code) is ExpressionStatement:
85 self.emit(ir.Exp(self.genExprCode(code.ex))) 130 self.emit(ir.Exp(self.genExprCode(code.ex)))
86 elif type(code) is astnodes.IfStatement: 131 elif type(code) is If:
87 bbtrue = self.newBlock() 132 bbtrue = self.newBlock()
88 bbfalse = self.newBlock() 133 bbfalse = self.newBlock()
89 te = self.newBlock() 134 te = self.newBlock()
90 self.genCondCode(code.condition, bbtrue, bbfalse) 135 self.genCondCode(code.condition, bbtrue, bbfalse)
91 self.setBlock(bbtrue) 136 self.setBlock(bbtrue)
93 self.emit(ir.Jump(te)) 138 self.emit(ir.Jump(te))
94 self.setBlock(bbfalse) 139 self.setBlock(bbfalse)
95 self.genCode(code.falsestatement) 140 self.genCode(code.falsestatement)
96 self.emit(ir.Jump(te)) 141 self.emit(ir.Jump(te))
97 self.setBlock(te) 142 self.setBlock(te)
98 elif type(code) is astnodes.ReturnStatement: 143 elif type(code) is Return:
99 re = self.genExprCode(code.expr) 144 re = self.genExprCode(code.expr)
100 self.emit(ir.Move(self.fn.return_value, re)) 145 self.emit(ir.Move(self.fn.return_value, re))
101 self.emit(ir.Jump(self.fn.epiloog)) 146 self.emit(ir.Jump(self.fn.epiloog))
102 b = self.newBlock() 147 b = self.newBlock()
103 self.setBlock(b) 148 self.setBlock(b)
104 elif type(code) is astnodes.WhileStatement: 149 elif type(code) is While:
105 bbdo = self.newBlock() 150 bbdo = self.newBlock()
106 bbtest = self.newBlock() 151 bbtest = self.newBlock()
107 te = self.newBlock() 152 te = self.newBlock()
108 self.emit(ir.Jump(bbtest)) 153 self.emit(ir.Jump(bbtest))
109 self.setBlock(bbtest) 154 self.setBlock(bbtest)
115 else: 160 else:
116 raise NotImplementedError('Unknown stmt {}'.format(code)) 161 raise NotImplementedError('Unknown stmt {}'.format(code))
117 162
118 def genCondCode(self, expr, bbtrue, bbfalse): 163 def genCondCode(self, expr, bbtrue, bbfalse):
119 # Implement sequential logical operators 164 # Implement sequential logical operators
120 if type(expr) is astnodes.Binop: 165 if type(expr) is Binop:
121 if expr.op == 'or': 166 if expr.op == 'or':
122 l2 = self.newBlock() 167 l2 = self.newBlock()
123 self.genCondCode(expr.a, bbtrue, l2) 168 self.genCondCode(expr.a, bbtrue, l2)
169 if not equalTypes(expr.a.typ, self.boolType):
170 raise SemanticError('Must be boolean', expr.a.loc)
124 self.setBlock(l2) 171 self.setBlock(l2)
125 self.genCondCode(expr.b, bbtrue, bbfalse) 172 self.genCondCode(expr.b, bbtrue, bbfalse)
173 if not equalTypes(expr.b.typ, self.boolType):
174 raise SemanticError('Must be boolean', expr.b.loc)
126 elif expr.op == 'and': 175 elif expr.op == 'and':
127 l2 = self.newBlock() 176 l2 = self.newBlock()
128 self.genCondCode(expr.a, l2, bbfalse) 177 self.genCondCode(expr.a, l2, bbfalse)
178 if not equalTypes(expr.a.typ, self.boolType):
179 self.error('Must be boolean', expr.a.loc)
129 self.setBlock(l2) 180 self.setBlock(l2)
130 self.genCondCode(expr.b, bbtrue, bbfalse) 181 self.genCondCode(expr.b, bbtrue, bbfalse)
182 if not equalTypes(expr.b.typ, self.boolType):
183 raise SemanticError('Must be boolean', expr.b.loc)
131 elif expr.op in ['==', '>', '<', '!=', '<=', '>=']: 184 elif expr.op in ['==', '>', '<', '!=', '<=', '>=']:
132 ta = self.genExprCode(expr.a) 185 ta = self.genExprCode(expr.a)
133 tb = self.genExprCode(expr.b) 186 tb = self.genExprCode(expr.b)
187 if not self.equalTypes(expr.a.typ, expr.b.typ):
188 raise SemanticError('Types unequal {} != {}'
189 .format(expr.a.typ, expr.b.typ), expr.loc)
134 self.emit(ir.CJump(ta, expr.op, tb, bbtrue, bbfalse)) 190 self.emit(ir.CJump(ta, expr.op, tb, bbtrue, bbfalse))
135 else: 191 else:
136 raise NotImplementedError('Unknown condition {}'.format(expr)) 192 raise NotImplementedError('Unknown condition {}'.format(expr))
137 elif type(expr) is astnodes.Literal: 193 expr.typ = self.boolType
194 elif type(expr) is Literal:
138 if expr.val: 195 if expr.val:
139 self.emit(ir.Jump(bbtrue)) 196 self.emit(ir.Jump(bbtrue))
140 else: 197 else:
141 self.emit(ir.Jump(bbfalse)) 198 self.emit(ir.Jump(bbfalse))
199 expr.typ = self.boolType
142 else: 200 else:
143 raise NotImplementedError('Unknown cond {}'.format(expr)) 201 raise NotImplementedError('Unknown cond {}'.format(expr))
202 if not self.equalTypes(expr.typ, self.boolType):
203 self.error('Condition must be boolean', expr.loc)
144 204
145 def genExprCode(self, expr): 205 def genExprCode(self, expr):
146 assert isinstance(expr, astnodes.Expression) 206 assert isinstance(expr, Expression)
147 if type(expr) is astnodes.Binop and expr.op in ir.Binop.ops: 207 if type(expr) is Binop:
148 ra = self.genExprCode(expr.a) 208 expr.lvalue = False
149 rb = self.genExprCode(expr.b) 209 if expr.op in ['+', '-', '*', '/', '<<', '>>', '|', '&']:
210 ra = self.genExprCode(expr.a)
211 rb = self.genExprCode(expr.b)
212 if self.equalTypes(expr.a.typ, self.intType) and \
213 self.equalTypes(expr.b.typ, self.intType):
214 expr.typ = expr.a.typ
215 else:
216 # assume void here? TODO: throw exception!
217 raise SemanticError('Can only add integers', expr.loc)
218 else:
219 raise NotImplementedError("Cannot use equality as expressions")
150 return ir.Binop(ra, expr.op, rb) 220 return ir.Binop(ra, expr.op, rb)
151 elif type(expr) is astnodes.Unop and expr.op == '&': 221 elif type(expr) is Unop:
152 ra = self.genExprCode(expr.a) 222 if expr.op == '&':
153 assert type(ra) is ir.Mem 223 ra = self.genExprCode(expr.a)
154 return ra.e 224 expr.typ = PointerType(expr.a.typ)
155 elif type(expr) is astnodes.VariableUse: 225 if not expr.a.lvalue:
226 raise SemanticError('No valid lvalue', expr.a.loc)
227 expr.lvalue = False
228 assert type(ra) is ir.Mem
229 return ra.e
230 else:
231 raise NotImplementedError('Unknown unop {0}'.format(expr.op))
232 elif type(expr) is Identifier:
233 # Generate code for this identifier.
234 expr.lvalue = True
235 tg = self.resolveSymbol(expr)
236 expr.kind = type(tg)
237 expr.typ = tg.typ
156 # This returns the dereferenced variable. 238 # This returns the dereferenced variable.
157 if expr.target.isParameter: 239 if type(tg) is Variable:
158 # TODO: now parameters are handled different. Not nice? 240 # TODO: now parameters are handled different. Not nice?
159 return self.varMap[expr.target] 241 return ir.Mem(self.varMap[tg])
160 else: 242 else:
161 return ir.Mem(self.varMap[expr.target]) 243 return ir.Mem(self.varMap[tg])
162 elif type(expr) is astnodes.Deref: 244 elif type(expr) is Deref:
163 # dereference pointer type: 245 # dereference pointer type:
164 addr = self.genExprCode(expr.ptr) 246 addr = self.genExprCode(expr.ptr)
165 return ir.Mem(addr) 247 ptr_typ = self.theType(expr.ptr.typ)
166 elif type(expr) is astnodes.FieldRef: 248 expr.lvalue = True
249 if type(ptr_typ) is PointerType:
250 expr.typ = ptr_typ.ptype
251 return ir.Mem(addr)
252 else:
253 raise SemanticError('Cannot deref non-pointer', expr.loc)
254 expr.typ = self.intType
255 return ir.Mem(ir.Const(0))
256 elif type(expr) is Member:
167 base = self.genExprCode(expr.base) 257 base = self.genExprCode(expr.base)
258 expr.lvalue = expr.base.lvalue
259 basetype = self.theType(expr.base.typ)
260 if type(basetype) is StructureType:
261 if basetype.hasField(expr.field):
262 expr.typ = basetype.fieldType(expr.field)
263 else:
264 raise SemanticError('{} does not contain field {}'
265 .format(basetype, expr.field), expr.loc)
266 else:
267 raise SemanticError('Cannot select field {} of non-structure type {}'
268 .format(sym.field, basetype), sym.loc)
269
168 assert type(base) is ir.Mem, type(base) 270 assert type(base) is ir.Mem, type(base)
169 base = base.e 271 base = base.e
170 bt = theType(expr.base.typ) 272 bt = self.theType(expr.base.typ)
171 offset = ir.Const(bt.fieldOffset(expr.field)) 273 offset = ir.Const(bt.fieldOffset(expr.field))
172 return ir.Mem(ir.Add(base, offset)) 274 return ir.Mem(ir.Add(base, offset))
173 elif type(expr) is astnodes.Literal: 275 elif type(expr) is Literal:
276 expr.lvalue = False
277 typemap = {int: 'int', float: 'double', bool: 'bool'}
278 if type(expr.val) in typemap:
279 expr.typ = self.pkg.scope[typemap[type(expr.val)]]
280 else:
281 raise SemanticError('Unknown literal type {}'.format(expr.val))
174 return ir.Const(expr.val) 282 return ir.Const(expr.val)
175 elif type(expr) is astnodes.TypeCast: 283 elif type(expr) is TypeCast:
176 # TODO: improve this mess: 284 # TODO: improve this mess:
177 ar = self.genExprCode(expr.a) 285 ar = self.genExprCode(expr.a)
178 tt = theType(expr.to_type) 286 ft = self.theType(expr.a.typ)
179 if isinstance(tt, astnodes.PointerType): 287 tt = self.theType(expr.to_type)
180 if expr.a.typ is expr.scope['int']: 288 if isinstance(ft, PointerType) and isinstance(tt, PointerType):
181 return ar 289 expr.typ = expr.to_type
182 elif isinstance(expr.a.typ, astnodes.PointerType): 290 return ar
183 return ar 291 elif type(ft) is BaseType and ft.name == 'int' and \
184 else: 292 isinstance(tt, PointerType):
185 raise Exception() 293 expr.typ = expr.to_type
186 else: 294 return ar
187 raise NotImplementedError("not implemented") 295 else:
188 elif type(expr) is astnodes.FunctionCall: 296 self.error('Cannot cast {} to {}'
297 .format(ft, tt), expr.loc)
298 expr.typ = self.intType
299 return ir.Const(0)
300 elif type(expr) is FunctionCall:
301 # Evaluate the arguments:
189 args = [self.genExprCode(e) for e in expr.args] 302 args = [self.genExprCode(e) for e in expr.args]
190 #fn = self.funcMap[expr.proc] 303 # Check arguments:
191 fn = expr.proc.name 304 if type(expr.proc) is Identifier:
192 return ir.Call(fn, args) 305 tg = self.resolveSymbol(expr.proc)
306 else:
307 raise Exception()
308 assert type(tg) is Function
309 ftyp = tg.typ
310 fname = tg.name
311 ptypes = ftyp.parametertypes
312 if len(expr.args) != len(ptypes):
313 raise SemanticError('Function {2}: {0} arguments required, {1} given'
314 .format(len(ptypes), len(expr.args), fname), expr.loc)
315 for arg, at in zip(expr.args, ptypes):
316 if not self.equalTypes(arg.typ, at):
317 raise SemanticError('Got {0}, expected {1}'
318 .format(arg.typ, at), arg.loc)
319 # determine return type:
320 expr.typ = ftyp.returntype
321 return ir.Call(fname, args)
193 else: 322 else:
194 raise NotImplementedError('Unknown expr {}'.format(expr)) 323 raise NotImplementedError('Unknown expr {}'.format(expr))
324
325 def resolveSymbol(self, sym):
326 assert type(sym) in [Identifier, Member]
327 if type(sym) is Member:
328 base = self.resolveSymbol(sym.base)
329 scope = base.innerScope
330 name = sym.field
331 elif type(sym) is Identifier:
332 scope = sym.scope
333 name = sym.target
334 else:
335 raise NotImplementedError(str(sym))
336 if name in scope:
337 s = scope[name]
338 else:
339 raise SemanticError('{} undefined'.format(name), sym.loc)
340 assert isinstance(s, Symbol)
341 return s
342
343 def theType(self, t):
344 """ Recurse until a 'real' type is found """
345 if type(t) is DefinedType:
346 t = self.theType(t.typ)
347 elif type(t) is Identifier:
348 t = self.theType(self.resolveSymbol(t))
349 elif type(t) is Member:
350 # Possible when using types of modules:
351 t = self.theType(self.resolveSymbol(t))
352 elif isinstance(t, Type):
353 pass
354 else:
355 raise NotImplementedError(str(t))
356 assert isinstance(t, Type)
357 return t
358
359 def equalTypes(self, a, b):
360 """ Compare types a and b for structural equavalence. """
361 # Recurse into named types:
362 a = self.theType(a)
363 b = self.theType(b)
364 assert isinstance(a, Type)
365 assert isinstance(b, Type)
366
367 if type(a) is type(b):
368 if type(a) is BaseType:
369 return a.name == b.name
370 elif type(a) is PointerType:
371 return self.equalTypes(a.ptype, b.ptype)
372 elif type(a) is StructureType:
373 if len(a.mems) != len(b.mems):
374 return False
375 return all(self.equalTypes(am.typ, bm.typ) for am, bm in
376 zip(a.mems, b.mems))
377 else:
378 raise NotImplementedError('{} not implemented'.format(type(a)))
379 return False