Mercurial > lcfOS
comparison python/ppci/c3/codegenerator.py @ 307:e609d5296ee9
Massive rewrite of codegenerator
author | Windel Bouwman |
---|---|
date | Thu, 12 Dec 2013 20:42:56 +0100 |
parents | b145f8e6050b |
children | 2e7f55319858 |
comparison
equal
deleted
inserted
replaced
306:b145f8e6050b | 307:e609d5296ee9 |
---|---|
1 import logging | 1 import logging |
2 from .. import ir | 2 from .. import ir |
3 from . import astnodes | 3 from .. import irutils |
4 from .astnodes import Symbol, Package, Variable, Function | |
5 from .astnodes import Statement, Empty, Compound, If, While, Assignment | |
6 from .astnodes import ExpressionStatement, Return | |
7 from .astnodes import Expression, Binop, Unop, Identifier, Deref, Member | |
8 from .astnodes import Expression, FunctionCall, Literal, TypeCast | |
9 from .astnodes import Type, DefinedType, BaseType, PointerType, StructureType | |
10 | |
4 from ppci import CompilerError | 11 from ppci import CompilerError |
5 from .analyse import theType | 12 |
6 | 13 |
7 | 14 class SemanticError(Exception): |
8 class CodeGenerator(ir.Builder): | 15 def __init__(self, msg, loc): |
16 self.msg = msg | |
17 self.loc = loc | |
18 | |
19 | |
20 class CodeGenerator(irutils.Builder): | |
9 """ | 21 """ |
10 Generates intermediate (IR) code from a package. The entry function is | 22 Generates intermediate (IR) code from a package. The entry function is |
11 'genModule'. The main task of this part is to rewrite complex control | 23 'genModule'. The main task of this part is to rewrite complex control |
12 structures, such as while and for loops into simple conditional | 24 structures, such as while and for loops into simple conditional |
13 jump statements. Also complex conditional statements are simplified. | 25 jump statements. Also complex conditional statements are simplified. |
14 Such as 'and' and 'or' statements are rewritten in conditional jumps. | 26 Such as 'and' and 'or' statements are rewritten in conditional jumps. |
15 And structured datatypes are rewritten. | 27 And structured datatypes are rewritten. |
28 | |
29 Type checking is done in one run with code generation. | |
16 """ | 30 """ |
17 def __init__(self): | 31 def __init__(self, diag): |
18 self.logger = logging.getLogger('c3cgen') | 32 self.logger = logging.getLogger('c3cgen') |
33 self.diag = diag | |
19 | 34 |
20 def gencode(self, pkg): | 35 def gencode(self, pkg): |
21 self.prepare() | 36 self.prepare() |
22 assert type(pkg) is astnodes.Package | 37 assert type(pkg) is Package |
38 self.pkg = pkg | |
39 self.intType = pkg.scope['int'] | |
40 self.boolType = pkg.scope['bool'] | |
23 self.logger.info('Generating ir-code for {}'.format(pkg.name)) | 41 self.logger.info('Generating ir-code for {}'.format(pkg.name)) |
24 self.varMap = {} # Maps variables to storage locations. | 42 self.varMap = {} # Maps variables to storage locations. |
25 self.funcMap = {} | 43 self.funcMap = {} |
26 self.m = ir.Module(pkg.name) | 44 self.m = ir.Module(pkg.name) |
27 self.genModule(pkg) | 45 self.genModule(pkg) |
28 return self.m | 46 return self.m |
29 | 47 |
48 def error(self, msg, loc=None): | |
49 self.pkg.ok = False | |
50 self.diag.error(msg, loc) | |
51 | |
30 # inner helpers: | 52 # inner helpers: |
31 def genModule(self, pkg): | 53 def genModule(self, pkg): |
32 # Take care of forward declarations: | 54 # Take care of forward declarations: |
33 for s in pkg.innerScope.Functions: | 55 try: |
34 f = self.newFunction(s.name) | 56 for s in pkg.innerScope.Functions: |
35 self.funcMap[s] = f | 57 f = self.newFunction(s.name) |
36 for v in pkg.innerScope.Variables: | 58 self.funcMap[s] = f |
37 self.varMap[v] = self.newTemp() | 59 for v in pkg.innerScope.Variables: |
38 for s in pkg.innerScope.Functions: | 60 self.varMap[v] = self.newTemp() |
39 self.genFunction(s) | 61 for s in pkg.innerScope.Functions: |
62 self.genFunction(s) | |
63 except SemanticError as e: | |
64 self.error(e.msg, e.loc) | |
65 | |
66 def checkType(self, t): | |
67 """ Verify the type is correct """ | |
68 t = self.theType(t) | |
40 | 69 |
41 def genFunction(self, fn): | 70 def genFunction(self, fn): |
42 # TODO: handle arguments | 71 # TODO: handle arguments |
43 f = self.funcMap[fn] | 72 f = self.funcMap[fn] |
44 f.return_value = self.newTemp() | 73 f.return_value = self.newTemp() |
49 # generate room for locals: | 78 # generate room for locals: |
50 | 79 |
51 for sym in fn.innerScope: | 80 for sym in fn.innerScope: |
52 # TODO: handle parameters different | 81 # TODO: handle parameters different |
53 if sym.isParameter: | 82 if sym.isParameter: |
83 self.checkType(sym.typ) | |
54 v = ir.Parameter(sym.name) | 84 v = ir.Parameter(sym.name) |
55 f.addParameter(v) | 85 f.addParameter(v) |
56 elif sym.isLocal: | 86 elif sym.isLocal: |
87 self.checkType(sym.typ) | |
57 v = ir.LocalVariable(sym.name) | 88 v = ir.LocalVariable(sym.name) |
58 f.addLocal(v) | 89 f.addLocal(v) |
90 elif isinstance(sym, Variable): | |
91 self.checkType(sym.typ) | |
92 v = ir.LocalVariable(sym.name) | |
93 f.addLocal(v) | |
59 else: | 94 else: |
60 #v = self.newTemp() | 95 #v = self.newTemp() |
61 raise NotImplementedError('{}'.format(sym)) | 96 raise NotImplementedError('{}'.format(sym)) |
62 # TODO: make this ssa here?? | |
63 self.varMap[sym] = v | 97 self.varMap[sym] = v |
64 | 98 |
65 self.genCode(fn.body) | 99 self.genCode(fn.body) |
66 # Set the default return value to zero: | 100 # Set the default return value to zero: |
67 # TBD: this may not be required? | 101 # TBD: this may not be required? |
68 self.emit(ir.Move(f.return_value, ir.Const(0))) | 102 self.emit(ir.Move(f.return_value, ir.Const(0))) |
69 self.emit(ir.Jump(f.epiloog)) | 103 self.emit(ir.Jump(f.epiloog)) |
70 self.setFunction(None) | 104 self.setFunction(None) |
71 | 105 |
72 def genCode(self, code): | 106 def genCode(self, code): |
73 assert isinstance(code, astnodes.Statement) | 107 try: |
108 self.genStmt(code) | |
109 except SemanticError as e: | |
110 self.error(e.msg, e.loc) | |
111 | |
112 def genStmt(self, code): | |
113 assert isinstance(code, Statement) | |
74 self.setLoc(code.loc) | 114 self.setLoc(code.loc) |
75 if type(code) is astnodes.CompoundStatement: | 115 if type(code) is Compound: |
76 for s in code.statements: | 116 for s in code.statements: |
77 self.genCode(s) | 117 self.genCode(s) |
78 elif type(code) is astnodes.EmptyStatement: | 118 elif type(code) is Empty: |
79 pass | 119 pass |
80 elif type(code) is astnodes.Assignment: | 120 elif type(code) is Assignment: |
121 lval = self.genExprCode(code.lval) | |
81 rval = self.genExprCode(code.rval) | 122 rval = self.genExprCode(code.rval) |
82 lval = self.genExprCode(code.lval) | 123 if not self.equalTypes(code.lval.typ, code.rval.typ): |
124 msg = 'Cannot assign {} to {}'.format(code.lval.typ, code.rval.typ) | |
125 raise SemanticError(msg, code.loc) | |
126 if not code.lval.lvalue: | |
127 raise SemanticError('No valid lvalue {}'.format(code.lval), code.lval.loc) | |
83 self.emit(ir.Move(lval, rval)) | 128 self.emit(ir.Move(lval, rval)) |
84 elif type(code) is astnodes.ExpressionStatement: | 129 elif type(code) is ExpressionStatement: |
85 self.emit(ir.Exp(self.genExprCode(code.ex))) | 130 self.emit(ir.Exp(self.genExprCode(code.ex))) |
86 elif type(code) is astnodes.IfStatement: | 131 elif type(code) is If: |
87 bbtrue = self.newBlock() | 132 bbtrue = self.newBlock() |
88 bbfalse = self.newBlock() | 133 bbfalse = self.newBlock() |
89 te = self.newBlock() | 134 te = self.newBlock() |
90 self.genCondCode(code.condition, bbtrue, bbfalse) | 135 self.genCondCode(code.condition, bbtrue, bbfalse) |
91 self.setBlock(bbtrue) | 136 self.setBlock(bbtrue) |
93 self.emit(ir.Jump(te)) | 138 self.emit(ir.Jump(te)) |
94 self.setBlock(bbfalse) | 139 self.setBlock(bbfalse) |
95 self.genCode(code.falsestatement) | 140 self.genCode(code.falsestatement) |
96 self.emit(ir.Jump(te)) | 141 self.emit(ir.Jump(te)) |
97 self.setBlock(te) | 142 self.setBlock(te) |
98 elif type(code) is astnodes.ReturnStatement: | 143 elif type(code) is Return: |
99 re = self.genExprCode(code.expr) | 144 re = self.genExprCode(code.expr) |
100 self.emit(ir.Move(self.fn.return_value, re)) | 145 self.emit(ir.Move(self.fn.return_value, re)) |
101 self.emit(ir.Jump(self.fn.epiloog)) | 146 self.emit(ir.Jump(self.fn.epiloog)) |
102 b = self.newBlock() | 147 b = self.newBlock() |
103 self.setBlock(b) | 148 self.setBlock(b) |
104 elif type(code) is astnodes.WhileStatement: | 149 elif type(code) is While: |
105 bbdo = self.newBlock() | 150 bbdo = self.newBlock() |
106 bbtest = self.newBlock() | 151 bbtest = self.newBlock() |
107 te = self.newBlock() | 152 te = self.newBlock() |
108 self.emit(ir.Jump(bbtest)) | 153 self.emit(ir.Jump(bbtest)) |
109 self.setBlock(bbtest) | 154 self.setBlock(bbtest) |
115 else: | 160 else: |
116 raise NotImplementedError('Unknown stmt {}'.format(code)) | 161 raise NotImplementedError('Unknown stmt {}'.format(code)) |
117 | 162 |
118 def genCondCode(self, expr, bbtrue, bbfalse): | 163 def genCondCode(self, expr, bbtrue, bbfalse): |
119 # Implement sequential logical operators | 164 # Implement sequential logical operators |
120 if type(expr) is astnodes.Binop: | 165 if type(expr) is Binop: |
121 if expr.op == 'or': | 166 if expr.op == 'or': |
122 l2 = self.newBlock() | 167 l2 = self.newBlock() |
123 self.genCondCode(expr.a, bbtrue, l2) | 168 self.genCondCode(expr.a, bbtrue, l2) |
169 if not equalTypes(expr.a.typ, self.boolType): | |
170 raise SemanticError('Must be boolean', expr.a.loc) | |
124 self.setBlock(l2) | 171 self.setBlock(l2) |
125 self.genCondCode(expr.b, bbtrue, bbfalse) | 172 self.genCondCode(expr.b, bbtrue, bbfalse) |
173 if not equalTypes(expr.b.typ, self.boolType): | |
174 raise SemanticError('Must be boolean', expr.b.loc) | |
126 elif expr.op == 'and': | 175 elif expr.op == 'and': |
127 l2 = self.newBlock() | 176 l2 = self.newBlock() |
128 self.genCondCode(expr.a, l2, bbfalse) | 177 self.genCondCode(expr.a, l2, bbfalse) |
178 if not equalTypes(expr.a.typ, self.boolType): | |
179 self.error('Must be boolean', expr.a.loc) | |
129 self.setBlock(l2) | 180 self.setBlock(l2) |
130 self.genCondCode(expr.b, bbtrue, bbfalse) | 181 self.genCondCode(expr.b, bbtrue, bbfalse) |
182 if not equalTypes(expr.b.typ, self.boolType): | |
183 raise SemanticError('Must be boolean', expr.b.loc) | |
131 elif expr.op in ['==', '>', '<', '!=', '<=', '>=']: | 184 elif expr.op in ['==', '>', '<', '!=', '<=', '>=']: |
132 ta = self.genExprCode(expr.a) | 185 ta = self.genExprCode(expr.a) |
133 tb = self.genExprCode(expr.b) | 186 tb = self.genExprCode(expr.b) |
187 if not self.equalTypes(expr.a.typ, expr.b.typ): | |
188 raise SemanticError('Types unequal {} != {}' | |
189 .format(expr.a.typ, expr.b.typ), expr.loc) | |
134 self.emit(ir.CJump(ta, expr.op, tb, bbtrue, bbfalse)) | 190 self.emit(ir.CJump(ta, expr.op, tb, bbtrue, bbfalse)) |
135 else: | 191 else: |
136 raise NotImplementedError('Unknown condition {}'.format(expr)) | 192 raise NotImplementedError('Unknown condition {}'.format(expr)) |
137 elif type(expr) is astnodes.Literal: | 193 expr.typ = self.boolType |
194 elif type(expr) is Literal: | |
138 if expr.val: | 195 if expr.val: |
139 self.emit(ir.Jump(bbtrue)) | 196 self.emit(ir.Jump(bbtrue)) |
140 else: | 197 else: |
141 self.emit(ir.Jump(bbfalse)) | 198 self.emit(ir.Jump(bbfalse)) |
199 expr.typ = self.boolType | |
142 else: | 200 else: |
143 raise NotImplementedError('Unknown cond {}'.format(expr)) | 201 raise NotImplementedError('Unknown cond {}'.format(expr)) |
202 if not self.equalTypes(expr.typ, self.boolType): | |
203 self.error('Condition must be boolean', expr.loc) | |
144 | 204 |
145 def genExprCode(self, expr): | 205 def genExprCode(self, expr): |
146 assert isinstance(expr, astnodes.Expression) | 206 assert isinstance(expr, Expression) |
147 if type(expr) is astnodes.Binop and expr.op in ir.Binop.ops: | 207 if type(expr) is Binop: |
148 ra = self.genExprCode(expr.a) | 208 expr.lvalue = False |
149 rb = self.genExprCode(expr.b) | 209 if expr.op in ['+', '-', '*', '/', '<<', '>>', '|', '&']: |
210 ra = self.genExprCode(expr.a) | |
211 rb = self.genExprCode(expr.b) | |
212 if self.equalTypes(expr.a.typ, self.intType) and \ | |
213 self.equalTypes(expr.b.typ, self.intType): | |
214 expr.typ = expr.a.typ | |
215 else: | |
216 # assume void here? TODO: throw exception! | |
217 raise SemanticError('Can only add integers', expr.loc) | |
218 else: | |
219 raise NotImplementedError("Cannot use equality as expressions") | |
150 return ir.Binop(ra, expr.op, rb) | 220 return ir.Binop(ra, expr.op, rb) |
151 elif type(expr) is astnodes.Unop and expr.op == '&': | 221 elif type(expr) is Unop: |
152 ra = self.genExprCode(expr.a) | 222 if expr.op == '&': |
153 assert type(ra) is ir.Mem | 223 ra = self.genExprCode(expr.a) |
154 return ra.e | 224 expr.typ = PointerType(expr.a.typ) |
155 elif type(expr) is astnodes.VariableUse: | 225 if not expr.a.lvalue: |
226 raise SemanticError('No valid lvalue', expr.a.loc) | |
227 expr.lvalue = False | |
228 assert type(ra) is ir.Mem | |
229 return ra.e | |
230 else: | |
231 raise NotImplementedError('Unknown unop {0}'.format(expr.op)) | |
232 elif type(expr) is Identifier: | |
233 # Generate code for this identifier. | |
234 expr.lvalue = True | |
235 tg = self.resolveSymbol(expr) | |
236 expr.kind = type(tg) | |
237 expr.typ = tg.typ | |
156 # This returns the dereferenced variable. | 238 # This returns the dereferenced variable. |
157 if expr.target.isParameter: | 239 if type(tg) is Variable: |
158 # TODO: now parameters are handled different. Not nice? | 240 # TODO: now parameters are handled different. Not nice? |
159 return self.varMap[expr.target] | 241 return ir.Mem(self.varMap[tg]) |
160 else: | 242 else: |
161 return ir.Mem(self.varMap[expr.target]) | 243 return ir.Mem(self.varMap[tg]) |
162 elif type(expr) is astnodes.Deref: | 244 elif type(expr) is Deref: |
163 # dereference pointer type: | 245 # dereference pointer type: |
164 addr = self.genExprCode(expr.ptr) | 246 addr = self.genExprCode(expr.ptr) |
165 return ir.Mem(addr) | 247 ptr_typ = self.theType(expr.ptr.typ) |
166 elif type(expr) is astnodes.FieldRef: | 248 expr.lvalue = True |
249 if type(ptr_typ) is PointerType: | |
250 expr.typ = ptr_typ.ptype | |
251 return ir.Mem(addr) | |
252 else: | |
253 raise SemanticError('Cannot deref non-pointer', expr.loc) | |
254 expr.typ = self.intType | |
255 return ir.Mem(ir.Const(0)) | |
256 elif type(expr) is Member: | |
167 base = self.genExprCode(expr.base) | 257 base = self.genExprCode(expr.base) |
258 expr.lvalue = expr.base.lvalue | |
259 basetype = self.theType(expr.base.typ) | |
260 if type(basetype) is StructureType: | |
261 if basetype.hasField(expr.field): | |
262 expr.typ = basetype.fieldType(expr.field) | |
263 else: | |
264 raise SemanticError('{} does not contain field {}' | |
265 .format(basetype, expr.field), expr.loc) | |
266 else: | |
267 raise SemanticError('Cannot select field {} of non-structure type {}' | |
268 .format(sym.field, basetype), sym.loc) | |
269 | |
168 assert type(base) is ir.Mem, type(base) | 270 assert type(base) is ir.Mem, type(base) |
169 base = base.e | 271 base = base.e |
170 bt = theType(expr.base.typ) | 272 bt = self.theType(expr.base.typ) |
171 offset = ir.Const(bt.fieldOffset(expr.field)) | 273 offset = ir.Const(bt.fieldOffset(expr.field)) |
172 return ir.Mem(ir.Add(base, offset)) | 274 return ir.Mem(ir.Add(base, offset)) |
173 elif type(expr) is astnodes.Literal: | 275 elif type(expr) is Literal: |
276 expr.lvalue = False | |
277 typemap = {int: 'int', float: 'double', bool: 'bool'} | |
278 if type(expr.val) in typemap: | |
279 expr.typ = self.pkg.scope[typemap[type(expr.val)]] | |
280 else: | |
281 raise SemanticError('Unknown literal type {}'.format(expr.val)) | |
174 return ir.Const(expr.val) | 282 return ir.Const(expr.val) |
175 elif type(expr) is astnodes.TypeCast: | 283 elif type(expr) is TypeCast: |
176 # TODO: improve this mess: | 284 # TODO: improve this mess: |
177 ar = self.genExprCode(expr.a) | 285 ar = self.genExprCode(expr.a) |
178 tt = theType(expr.to_type) | 286 ft = self.theType(expr.a.typ) |
179 if isinstance(tt, astnodes.PointerType): | 287 tt = self.theType(expr.to_type) |
180 if expr.a.typ is expr.scope['int']: | 288 if isinstance(ft, PointerType) and isinstance(tt, PointerType): |
181 return ar | 289 expr.typ = expr.to_type |
182 elif isinstance(expr.a.typ, astnodes.PointerType): | 290 return ar |
183 return ar | 291 elif type(ft) is BaseType and ft.name == 'int' and \ |
184 else: | 292 isinstance(tt, PointerType): |
185 raise Exception() | 293 expr.typ = expr.to_type |
186 else: | 294 return ar |
187 raise NotImplementedError("not implemented") | 295 else: |
188 elif type(expr) is astnodes.FunctionCall: | 296 self.error('Cannot cast {} to {}' |
297 .format(ft, tt), expr.loc) | |
298 expr.typ = self.intType | |
299 return ir.Const(0) | |
300 elif type(expr) is FunctionCall: | |
301 # Evaluate the arguments: | |
189 args = [self.genExprCode(e) for e in expr.args] | 302 args = [self.genExprCode(e) for e in expr.args] |
190 #fn = self.funcMap[expr.proc] | 303 # Check arguments: |
191 fn = expr.proc.name | 304 if type(expr.proc) is Identifier: |
192 return ir.Call(fn, args) | 305 tg = self.resolveSymbol(expr.proc) |
306 else: | |
307 raise Exception() | |
308 assert type(tg) is Function | |
309 ftyp = tg.typ | |
310 fname = tg.name | |
311 ptypes = ftyp.parametertypes | |
312 if len(expr.args) != len(ptypes): | |
313 raise SemanticError('Function {2}: {0} arguments required, {1} given' | |
314 .format(len(ptypes), len(expr.args), fname), expr.loc) | |
315 for arg, at in zip(expr.args, ptypes): | |
316 if not self.equalTypes(arg.typ, at): | |
317 raise SemanticError('Got {0}, expected {1}' | |
318 .format(arg.typ, at), arg.loc) | |
319 # determine return type: | |
320 expr.typ = ftyp.returntype | |
321 return ir.Call(fname, args) | |
193 else: | 322 else: |
194 raise NotImplementedError('Unknown expr {}'.format(expr)) | 323 raise NotImplementedError('Unknown expr {}'.format(expr)) |
324 | |
325 def resolveSymbol(self, sym): | |
326 assert type(sym) in [Identifier, Member] | |
327 if type(sym) is Member: | |
328 base = self.resolveSymbol(sym.base) | |
329 scope = base.innerScope | |
330 name = sym.field | |
331 elif type(sym) is Identifier: | |
332 scope = sym.scope | |
333 name = sym.target | |
334 else: | |
335 raise NotImplementedError(str(sym)) | |
336 if name in scope: | |
337 s = scope[name] | |
338 else: | |
339 raise SemanticError('{} undefined'.format(name), sym.loc) | |
340 assert isinstance(s, Symbol) | |
341 return s | |
342 | |
343 def theType(self, t): | |
344 """ Recurse until a 'real' type is found """ | |
345 if type(t) is DefinedType: | |
346 t = self.theType(t.typ) | |
347 elif type(t) is Identifier: | |
348 t = self.theType(self.resolveSymbol(t)) | |
349 elif type(t) is Member: | |
350 # Possible when using types of modules: | |
351 t = self.theType(self.resolveSymbol(t)) | |
352 elif isinstance(t, Type): | |
353 pass | |
354 else: | |
355 raise NotImplementedError(str(t)) | |
356 assert isinstance(t, Type) | |
357 return t | |
358 | |
359 def equalTypes(self, a, b): | |
360 """ Compare types a and b for structural equavalence. """ | |
361 # Recurse into named types: | |
362 a = self.theType(a) | |
363 b = self.theType(b) | |
364 assert isinstance(a, Type) | |
365 assert isinstance(b, Type) | |
366 | |
367 if type(a) is type(b): | |
368 if type(a) is BaseType: | |
369 return a.name == b.name | |
370 elif type(a) is PointerType: | |
371 return self.equalTypes(a.ptype, b.ptype) | |
372 elif type(a) is StructureType: | |
373 if len(a.mems) != len(b.mems): | |
374 return False | |
375 return all(self.equalTypes(am.typ, bm.typ) for am, bm in | |
376 zip(a.mems, b.mems)) | |
377 else: | |
378 raise NotImplementedError('{} not implemented'.format(type(a))) | |
379 return False |