Mercurial > lcfOS
comparison ide/compiler/codegenerator.py @ 1:92df07bc2081
Initial import of compiler
author | windel |
---|---|
date | Sun, 18 Sep 2011 19:00:29 +0200 |
parents | |
children | 0d5ef85b8698 |
comparison
equal
deleted
inserted
replaced
0:1a4faf9ef1ea | 1:92df07bc2081 |
---|---|
1 """ | |
2 Code generation for 64 bits intel processors | |
3 """ | |
4 | |
5 from .nodes import * | |
6 from .errors import Error | |
7 from .builtin import real, integer, boolean, char | |
8 from .assembler import * | |
9 | |
10 class CodeGenerator: | |
11 def __init__(self): | |
12 self.strings = [] | |
13 self.initialize() | |
14 def initialize(self): | |
15 # Register descriptors: | |
16 self.freeregs = 'r8,r9,r10,r11,r12,r13,r14,r15'.split(',') | |
17 self.usedregs = [] | |
18 # Members to accumulate the result into: | |
19 # The result is an image of bytecode and global variable space. | |
20 # Global variables a referenced by RIP relative addressing. | |
21 self.image = [] | |
22 self.rip = 0 # The current instruction pointer location. | |
23 # TODO: backpatch list here? | |
24 | |
25 # Functions to modify the code image | |
26 def addCode(self, code): | |
27 assert(type(code) is list) | |
28 self.image += code | |
29 self.rip += len(code) | |
30 def fixCode(self, position, code): | |
31 self.image[position:position+len(code)] = code | |
32 def align(self, b): | |
33 while (self.rip % b) != 0: | |
34 self.addCode([0]) | |
35 | |
36 def saveAllRegisters(self): | |
37 regs = list(self.usedregs.keys()) | |
38 for reg in regs: | |
39 code += self.saveRegister(reg) | |
40 | |
41 def saveRegister(self, reg): | |
42 code = [] | |
43 if reg in self.usedregs.keys(): | |
44 code.append('mov {0}, {1}'.format(self.usedregs[reg], reg)) | |
45 del self.usedregs[reg] | |
46 self.freeregs.append(reg) | |
47 | |
48 def getreg(self, node): | |
49 """ acquire a working register for a certain node.""" | |
50 # Temporary register bypass action: | |
51 if len(self.freeregs) > 0: | |
52 reg = self.freeregs.pop(0) | |
53 self.usedregs.append(reg) | |
54 else: | |
55 Error('No more free regs') | |
56 node.reg = reg | |
57 | |
58 def freereg(self, node): | |
59 reg = node.reg | |
60 node.reg = None | |
61 self.freeregs.append(reg) | |
62 self.usedregs.remove(reg) | |
63 | |
64 # Helpers to load and retrieve designated objects: | |
65 def storeRegInDesignator(self, reg, designator): | |
66 assert(type(reg) is str) | |
67 assert(type(designator) is Designator) | |
68 if len(designator.selectors) > 0: | |
69 self.gencode( designator ) # Load the pointer into some register | |
70 self.addCode( mov([designator.reg, 0x0], reg) ) | |
71 self.freereg( designator ) | |
72 else: | |
73 if designator.obj.isLocal: | |
74 # Relative from rbp register | |
75 mem = ['rbp', designator.obj.offset] | |
76 self.addCode( mov(mem, reg) ) | |
77 else: | |
78 # Relative from RIP after move | |
79 self.addCode( mov(['RIP', 0x0], reg) ) | |
80 self.fixCode(self.rip - 4, imm32(designator.obj.offset - self.rip) ) | |
81 | |
82 # Code generation functions: | |
83 def genexprcode(self, node): | |
84 """ | |
85 Generate code for expressions! | |
86 Recursively evaluates, and ensures a register contains the answer. | |
87 register is an integer register or a floating point reg | |
88 """ | |
89 if isinstance(node, Binop): | |
90 """ Handle a binary operation (two arguments) of some kind """ | |
91 self.genexprcode(node.a) | |
92 self.genexprcode(node.b) | |
93 | |
94 if node.op == 'mod': | |
95 assert(node.typ.isType(integer)) | |
96 self.addCode(mov('rax', node.a.reg)) | |
97 self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros | |
98 self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg | |
99 node.reg = node.a.reg | |
100 self.freereg(node.b) # give up register that contains b | |
101 self.addCode(mov(node.reg, 'rdx')) # move remainder into result | |
102 elif node.op == 'div': | |
103 assert(node.typ.isType(integer)) | |
104 self.addCode(mov('rax', node.a.reg)) | |
105 self.addCode(xorreg64('rdx', 'rdx')) # Extend divided number with zeros | |
106 self.addCode(idivreg64(node.b.reg)) # divide rdx:rax with reg | |
107 node.reg = node.a.reg | |
108 self.freereg(node.b) # give up register that contains b | |
109 self.addCode(mov(node.reg, 'rax')) # move result into reg | |
110 elif node.op == '*': | |
111 if node.typ.isType(integer): | |
112 self.addCode(imulreg64(node.a.reg, node.b.reg)) | |
113 node.reg = node.a.reg | |
114 self.freereg(node.b) | |
115 else: | |
116 Error('{0} for * not implemented'.format(node.typ)) | |
117 elif node.op == '+': | |
118 if node.typ.isType(integer): | |
119 self.addCode(addreg64(node.a.reg, node.b.reg)) | |
120 node.reg = node.a.reg | |
121 self.freereg(node.b) | |
122 else: | |
123 Error('{0} for + not implemented'.format(node.typ)) | |
124 elif node.op == '-': | |
125 if node.typ.isType(integer): | |
126 self.addCode(subreg64(node.a.reg, node.b.reg)) | |
127 node.reg = node.a.reg | |
128 self.freereg(node.b) | |
129 else: | |
130 Error('{0} for - not implemented'.format(node.typ)) | |
131 else: | |
132 Error('Unknown Binop {0}'.format(node.op)) | |
133 | |
134 elif type(node) is Unop: | |
135 if node.op == 'INTTOREAL': | |
136 self.genexprcode(node.a) | |
137 node.reg = node.a.reg | |
138 # TODO use 'FILD' instruction | |
139 freg = 12 | |
140 code.append('Unop inttoreal TODO') | |
141 elif node.op == 'ABS': | |
142 if isType(node.typ, real): | |
143 code = [0xD9, 0xE1] # st(0) = fabs st(0) | |
144 Error('ABS error integer') | |
145 elif isType(node.typ, integer): | |
146 code = [] | |
147 Error('ABS error integer') | |
148 else: | |
149 Error('ABS error') | |
150 else: | |
151 Error('Unknown Unop {0}'.format(node.op)) | |
152 | |
153 elif isinstance(node, Designator): | |
154 # dereference, array index. Make sure that the result comes into a register | |
155 if len(node.selectors) > 0: | |
156 self.gencode(node) # Load the pointer into some register | |
157 | |
158 # Now we can access the object at location '[node.reg]': | |
159 if node.typ.isType(integer): | |
160 self.addCode( mov(node.reg, [node.reg, 0x0]) ) | |
161 else: | |
162 Error('Only integer types implemented') | |
163 else: | |
164 # No selectors, load variable directly | |
165 if node.obj.typ.isType(integer): | |
166 if type(node.obj) is Constant: | |
167 self.genexprcode(node.obj) | |
168 node.reg = node.obj.reg | |
169 else: | |
170 self.getreg(node) | |
171 # Get a register to store the integer value | |
172 if node.obj.isLocal: | |
173 # relative to rbp: | |
174 self.addCode( mov(node.reg, ['rbp', node.obj.offset]) ) | |
175 else: | |
176 self.addCode(mov(node.reg, ['RIP', 0x0])) | |
177 self.fixCode(self.rip-4, imm32(node.obj.offset - self.rip)) | |
178 else: | |
179 Error('Cannot load variable type {0}'.format(node.typ)) | |
180 | |
181 elif isinstance(node, Relop): | |
182 # Create a boolean from operands | |
183 # TODO create an alternative for expressions used as conditions. | |
184 self.genexprcode(node.a) | |
185 self.genexprcode(node.b) | |
186 | |
187 if node.a.typ.isType(integer): | |
188 instructions = {'<': 'L', '>': 'G', '<>': 'NE', '>=': 'GE', '<=': 'LE', '=':'E'} | |
189 if not node.relop in instructions.keys(): | |
190 Error('Unimplemented relop: '+str(node.relop)) | |
191 instr = instructions[node.relop] | |
192 | |
193 node.reg = node.a.reg | |
194 self.addCode( cmpreg64(node.a.reg, node.b.reg) ) | |
195 self.addCode( shortjump(0x0, condition=instr) ) # jump over 0 code and jmp | |
196 fixloc1 = self.rip - 1 | |
197 rip1 = self.rip | |
198 self.addCode( xorreg64(node.reg, node.reg) ) | |
199 self.addCode( shortjump(0x0) ) # Jump over 1 code | |
200 fixloc2 = self.rip - 1 | |
201 self.fixCode(fixloc1, imm8(self.rip - rip1)) | |
202 rip2 = self.rip | |
203 self.addCode( xorreg64(node.reg, node.reg) ) | |
204 self.addCode( increg64(node.reg) ) | |
205 self.fixCode(fixloc2, imm8(self.rip - rip2)) | |
206 | |
207 self.freereg(node.b) | |
208 else: | |
209 Error('Relop not implemented for {0}'.format(node.a.typ)) | |
210 | |
211 elif type(node) is Constant: | |
212 if node.typ.isType(integer): | |
213 self.getreg(node) | |
214 self.addCode(mov(node.reg, node.value)) | |
215 elif node.typ.isType(real): | |
216 code += self.getreg(node) | |
217 Error('TODO: get real reg') | |
218 # TODO: get a fixed point reg, and load the variable in there | |
219 else: | |
220 Error('Howto generate code for {0}?'.format(node)) | |
221 | |
222 elif type(node) is ProcedureCall: | |
223 if type(node.proc.obj) is BuiltinProcedure: | |
224 # Handle builtin procedures different, these not always call | |
225 # a function, but generate code. | |
226 bi = node.proc.obj | |
227 if bi.name == 'chr': | |
228 arg = node.args[0] | |
229 self.genexprcode(arg) | |
230 # Store character in full width register: | |
231 # TODO: store in char only register | |
232 node.reg = arg.reg | |
233 else: | |
234 Error('Unknown builtin function {0}'.format(bi.name)) | |
235 else: | |
236 # Use generic procedure call first | |
237 self.gencode(node) | |
238 # Retrieve result: | |
239 if node.typ.isType(integer): | |
240 # Store result! | |
241 self.getreg(node) | |
242 self.addCode( mov(node.reg, 'rax') ) | |
243 else: | |
244 Error('Return type not supported {0}'.format(node.typ)) | |
245 else: | |
246 Error('Cannot generate expression code for: {0}'.format(node)) | |
247 | |
248 def gencode(self, node): | |
249 """ Code generation function for AST nodes """ | |
250 if isinstance(node, Module): | |
251 # for all imports make a list of pointer to the actual procedures: | |
252 for imp in node.imports: | |
253 imp.offset = self.rip | |
254 self.addCode( [0x0]*8 ) | |
255 # global variable storage allocation | |
256 variables = node.symtable.getAllLocal(Variable) | |
257 for var in variables: | |
258 var.isLocal = False | |
259 var.offset = self.rip | |
260 self.addCode( [0x00] * var.typ.size ) # TODO initial values here? | |
261 self.align(8) | |
262 # TODO: mark end of data and start of code inside image | |
263 # TODO: round data to page size to enable protection by loader. | |
264 # Procedure code generation: | |
265 procedures = node.symtable.getAllLocal(Procedure) | |
266 node.procs = procedures | |
267 for proc in procedures: | |
268 self.gencode(proc) | |
269 # Module init code: | |
270 node.initcodeentry = self.rip | |
271 self.gencode(node.initcode) | |
272 self.addCode( ret() ) | |
273 # TODO: how to return from module init code? far return?? | |
274 | |
275 elif type(node) is Procedure: | |
276 # calculate offsets for local variables and parameters | |
277 # Variable location relative to 'rbp' register | |
278 variables = node.symtable.getAllLocal(Variable) | |
279 offset = 0 | |
280 paramoffset = 16 | |
281 for var in variables: | |
282 var.isLocal = True | |
283 if not var.isParameter: | |
284 offset += var.typ.size | |
285 # Offset is negative of rbp in stack frame | |
286 var.offset = -offset | |
287 node.framesize = offset | |
288 # Calculate offsets of parameters relative to rbp register | |
289 for par in reversed(node.typ.parameters): | |
290 pvar = node.symtable.getLocal(Variable, par.name) | |
291 pvar.offset = paramoffset | |
292 paramoffset += pvar.typ.size | |
293 | |
294 # code generation | |
295 node.entrypoint = self.rip | |
296 self.addCode(push('rbp')) | |
297 self.addCode(mov('rbp', 'rsp')) # Setup the base pointer | |
298 self.addCode(subreg64('rsp', node.framesize)) # reserve space for locals | |
299 self.gencode(node.block) | |
300 if node.retexpr: | |
301 if node.retexpr.typ.isType(integer): | |
302 self.genexprcode(node.retexpr) | |
303 self.addCode( mov('rax', node.retexpr.reg) ) | |
304 self.freereg(node.retexpr) | |
305 else: | |
306 Error('Cannot return this kind yet {0}'.format(node.retexpr.typ)) | |
307 self.addCode( addreg64('rsp', node.framesize) ) | |
308 self.addCode( pop('rbp') ) | |
309 self.addCode( ret() ) | |
310 assert(len(self.usedregs) == 0) | |
311 | |
312 elif isinstance(node, StatementSequence): | |
313 for s in node.statements: | |
314 self.gencode(s) | |
315 assert(len(self.usedregs) == 0) | |
316 | |
317 elif type(node) is ProcedureCall: | |
318 # Prepare parameters on the stack: | |
319 stacksize = 0 | |
320 assert(len(node.args) == len(node.proc.typ.parameters)) | |
321 for arg, param in zip(node.args, node.proc.typ.parameters): | |
322 | |
323 if param.kind == 'value': | |
324 self.genexprcode(arg) | |
325 self.addCode( push(arg.reg) ) | |
326 self.freereg( arg ) | |
327 stacksize += 8 | |
328 else: | |
329 Error('Parameter kind other than value') | |
330 | |
331 # Calculate address using designator | |
332 if type(node.proc.obj) is Procedure: | |
333 self.addCode( call(0x0) ) | |
334 self.fixCode( self.rip - 4, imm32(node.proc.obj.entrypoint - self.rip)) | |
335 elif type(node.proc.obj) is ImportedSymbol: | |
336 # Load the entry point of the import table | |
337 self.getreg(node.proc.obj) | |
338 # Load the address of the procedure: | |
339 self.addCode( mov(node.proc.obj.reg, ['RIP', 0x0]) ) | |
340 self.fixCode( self.rip - 4, imm32(node.proc.obj.offset - self.rip) ) | |
341 # Call to the address in register: | |
342 self.addCode( call(node.proc.obj.reg) ) | |
343 # Free register that holds the address of the object | |
344 self.freereg( node.proc.obj ) | |
345 elif type(node.proc.obj) is BuiltinProcedure: | |
346 if node.proc.obj.name == 'chr': | |
347 print('int to char') | |
348 else: | |
349 Error('Unknown builtin function {0}'.format(node.proc.obj.name)) | |
350 else: | |
351 Error('Cannot call designator of type {0}'.format(node.proc.obj)) | |
352 | |
353 # Restore stack (pop all arguments of): | |
354 self.addCode(addreg64('rsp', stacksize)) | |
355 | |
356 elif type(node) is Assignment: | |
357 if node.lval.typ.isType(integer): | |
358 # TODO if node.rval is Constant of some datatype, move it to mem directly | |
359 self.genexprcode(node.rval) # Calculate the value that has to be stored. | |
360 self.storeRegInDesignator(node.rval.reg, node.lval) | |
361 self.freereg(node.rval) | |
362 else: | |
363 Error('Assignments of other types not implemented') | |
364 # TODO if left and right are designators, do some sort of memcpy. | |
365 | |
366 elif type(node) is IfStatement: | |
367 self.genexprcode(node.condition) | |
368 self.addCode( cmpreg64(node.condition.reg, 1) ) | |
369 self.freereg(node.condition) | |
370 if node.falsestatement: | |
371 # If with else clause | |
372 self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false | |
373 rip1 = self.rip | |
374 fixloc1 = self.rip - 4 | |
375 self.gencode(node.truestatement) | |
376 self.addCode( nearjump( 0x0 ) ) # jump over false code | |
377 fixloc2 = self.rip - 4 | |
378 self.fixCode(fixloc1, imm32(self.rip - rip1)) | |
379 rip2 = self.rip | |
380 self.gencode(node.falsestatement) | |
381 self.fixCode(fixloc2, imm32(self.rip - rip2)) | |
382 else: | |
383 # If without else clause | |
384 self.addCode( nearjump(0x0, condition='NE') ) # if Not Equal jump to false | |
385 rip1 = self.rip | |
386 fixloc1 = self.rip - 4 | |
387 self.gencode(node.truestatement) | |
388 self.fixCode(fixloc1, imm32(self.rip - rip1)) # Fixup near jump over true code. | |
389 | |
390 elif isinstance(node, WhileStatement): | |
391 rip1 = self.rip # Store the start of the while loop | |
392 self.genexprcode(node.condition) | |
393 self.addCode( cmpreg64(node.condition.reg, 1) ) # Test condition for true-ness | |
394 self.freereg(node.condition) | |
395 self.addCode( nearjump(0x0, condition='NE') ) # If Not Equal jump over while code AND jump back (fix later) | |
396 fixloc1 = self.rip - 4 | |
397 rip2 = self.rip | |
398 self.gencode(node.dostatements) | |
399 self.addCode( nearjump(0x0) ) # JMP to condition, fix exact jump position below | |
400 fixloc2 = self.rip - 4 | |
401 rip3 = self.rip # end of while loop | |
402 self.fixCode(fixloc2, imm32(rip1 - rip3)) # Fixup jump to start of while loop | |
403 self.fixCode(fixloc1, imm32(rip3 - rip2)) # Fixup jump out of while loop | |
404 | |
405 elif type(node) is ForStatement: | |
406 # Initial load of iterator variable: | |
407 self.genexprcode(node.begin) | |
408 self.storeRegInDesignator(node.begin.reg, node.variable) | |
409 self.freereg(node.begin) | |
410 rip1 = self.rip | |
411 self.gencode(node.statements) | |
412 #self.loadDesignatorInReg(node. | |
413 #self.addCode( addreg64(node.variable, node.increment) ) | |
414 Error('No implementation of FOR statement') | |
415 | |
416 elif type(node) is AsmCode: | |
417 def processOperand(op): | |
418 if type(op) is list: | |
419 if type(op[0]) is Variable: | |
420 var = op[0] | |
421 if var.isLocal: | |
422 return ['rbp', var.offset] | |
423 else: | |
424 Error('Can only use local variables in inline assembler') | |
425 return op | |
426 for asmline in node.asmcode: | |
427 opcode, operands = asmline | |
428 operands = [processOperand(opx) for opx in operands] | |
429 print('assembling', opcode, *operands) | |
430 func,nargs = opcodes[opcode] | |
431 code = func(*operands) | |
432 self.addCode(code) | |
433 | |
434 elif isinstance(node, EmptyStatement): | |
435 pass | |
436 | |
437 | |
438 elif type(node) is StringConstant: | |
439 self.strings.append(node) | |
440 self.data.append(node.value) # Add string to the data section | |
441 | |
442 elif type(node) is Designator: | |
443 if len(node.selectors) > 0: | |
444 self.getreg(node) | |
445 # Load starting address | |
446 if node.obj.isLocal: | |
447 self.addCode( leareg64(node.reg, ['rbp', node.obj.offset]) ) | |
448 else: | |
449 # Global variables need to be relocated... | |
450 self.addCode(leareg64(node.reg, ['RIP', 0])) | |
451 self.fixCode(self.rip - 4, imm32(node.obj.offset - self.rip)) | |
452 # Loop over all designators.. | |
453 for selector in node.selectors: | |
454 if type(selector) is Index: | |
455 # Deref an array index | |
456 self.genexprcode(selector.index) | |
457 self.getreg(selector) | |
458 self.addCode( mov(selector.reg, selector.typ.elementType.size) ) | |
459 self.addCode( imulreg64(selector.reg, selector.index.reg ) ) | |
460 self.freereg(selector.index) | |
461 self.addCode(addreg64(node.reg, selector.reg)) | |
462 self.freereg(selector) | |
463 elif type(selector) is Field: | |
464 print('Field') | |
465 Error('Field not implemented') | |
466 else: | |
467 Error('Unknown selector') | |
468 else: | |
469 Error('Can only gencode for designator with selectors') | |
470 | |
471 else: | |
472 print('not generating code for {0}'.format(node)) | |
473 | |
474 def generatecode(self, ast): | |
475 """ code generation front end """ | |
476 self.initialize() | |
477 self.gencode(ast) | |
478 ast.image = self.image | |
479 |