comparison ide/compiler/parser.py @ 4:0d5ef85b8698

Improved link between ast viewer and code edit
author windel-eee
date Wed, 21 Sep 2011 19:05:18 +0200
parents 92df07bc2081
children 818f80afa78b
comparison
equal deleted inserted replaced
3:77202b0e0f40 4:0d5ef85b8698
35 return True 35 return True
36 return False 36 return False
37 37
38 def NextToken(self): 38 def NextToken(self):
39 self.token = self.tokens.__next__() 39 self.token = self.tokens.__next__()
40 40 self.location = (self.token.row, self.token.col)
41 def setLocation(self): 41
42 pass 42 # Helpers to find location of the error in the code:
43 def attachLocation(self, node): 43 def setLocation(self, obj, location):
44 node.row, node.col = self.token.row, self.token.col 44 obj.location = location
45 return node 45 return obj
46 def getLocation(self):
47 return self.location
48
46 """ 49 """
47 Recursive descent parser functions: 50 Recursive descent parser functions:
48 A set of mutual recursive functions. 51 A set of mutual recursive functions.
49 Starting symbol is the Module. 52 Starting symbol is the Module.
50 """ 53 """
51
52 def parseModule(self): 54 def parseModule(self):
53 self.imports = [] 55 self.imports = []
56 loc = self.getLocation()
54 self.Consume('module') 57 self.Consume('module')
55 modname = self.Consume('ID') 58 modname = self.Consume('ID')
56 self.Consume(';') 59 self.Consume(';')
57 mod = Module(modname) 60 mod = Module(modname)
58 61
79 if endname != modname: 82 if endname != modname:
80 self.Error('end denoter must be module name') 83 self.Error('end denoter must be module name')
81 self.Consume('.') 84 self.Consume('.')
82 85
83 mod.imports = self.imports 86 mod.imports = self.imports
84 return mod 87 return self.setLocation(mod, loc)
85 88
86 # Import part 89 # Import part
87 def parseImportList(self): 90 def parseImportList(self):
88 if self.hasConsumed('import'): 91 if self.hasConsumed('import'):
89 self.parseImport() 92 self.parseImport()
90 while self.hasConsumed(','): 93 while self.hasConsumed(','):
91 self.parseImport() 94 self.parseImport()
92 self.Consume(';') 95 self.Consume(';')
93 96
94 def parseImport(self): 97 def parseImport(self):
98 loc = self.getLocation()
95 modname = self.Consume('ID') 99 modname = self.Consume('ID')
96 mod = loadModule(modname) 100 mod = loadModule(modname)
101 self.setLocation(mod, loc)
97 self.cst.addSymbol(mod) 102 self.cst.addSymbol(mod)
98 103
99 # Helper to parse an identifier defenitions 104 # Helper to parse an identifier defenitions
100 def parseIdentDef(self): 105 def parseIdentDef(self):
106 loc = self.getLocation()
101 name = self.Consume('ID') 107 name = self.Consume('ID')
102 ispublic = self.hasConsumed('*') 108 ispublic = self.hasConsumed('*')
103 return (name, ispublic) 109 # Make a node of this thing:
110 i = Id(name)
111 i.ispublic = ispublic
112 return self.setLocation(i, loc)
104 113
105 def parseIdentList(self): 114 def parseIdentList(self):
106 ids = [ self.parseIdentDef() ] 115 ids = [ self.parseIdentDef() ]
107 while self.hasConsumed(','): 116 while self.hasConsumed(','):
108 ids.append( self.parseIdentDef() ) 117 ids.append( self.parseIdentDef() )
136 def parseDesignator(self): 145 def parseDesignator(self):
137 """ A designator designates an object. 146 """ A designator designates an object.
138 The base location in memory is denoted by the qualified identifier 147 The base location in memory is denoted by the qualified identifier
139 The actual address depends on the selector. 148 The actual address depends on the selector.
140 """ 149 """
150 loc = self.getLocation()
141 obj = self.parseQualIdent() 151 obj = self.parseQualIdent()
142 typ = obj.typ 152 typ = obj.typ
143 selectors = [] 153 selectors = []
144 while self.token.typ in ['.', '[', '^']: 154 while self.token.typ in ['.', '[', '^']:
145 if self.hasConsumed('.'): 155 if self.hasConsumed('.'):
162 selectors.append(Index(idx, typ)) 172 selectors.append(Index(idx, typ))
163 typ = typ.elementType 173 typ = typ.elementType
164 elif self.hasConsumed('^'): 174 elif self.hasConsumed('^'):
165 selectors.append(Deref()) 175 selectors.append(Deref())
166 typ = typ.pointedType 176 typ = typ.pointedType
167 return Designator(obj, selectors, typ) 177 return self.setLocation(Designator(obj, selectors, typ), loc)
168 178
169 # Declaration sequence 179 # Declaration sequence
170 def parseDeclarationSequence(self): 180 def parseDeclarationSequence(self):
171 """ 1. constants, 2. types, 3. variables """ 181 """ 1. constants, 2. types, 3. variables """
172 self.parseConstantDeclarations() 182 self.parseConstantDeclarations()
220 230
221 def parseConstantDeclarations(self): 231 def parseConstantDeclarations(self):
222 """ Parse const part of a module """ 232 """ Parse const part of a module """
223 if self.hasConsumed('const'): 233 if self.hasConsumed('const'):
224 while self.token.typ == 'ID': 234 while self.token.typ == 'ID':
225 name, ispublic = self.parseIdentDef() 235 i = self.parseIdentDef()
226 self.Consume('=') 236 self.Consume('=')
227 constvalue, typ = self.parseConstExpression() 237 constvalue, typ = self.parseConstExpression()
228 self.Consume(';') 238 self.Consume(';')
229 c = Constant(constvalue, typ, name=name, public=ispublic) 239 c = Constant(constvalue, typ, name=i.name, public=i.ispublic)
240 self.setLocation(c, i.location)
230 self.cst.addSymbol(c) 241 self.cst.addSymbol(c)
231 242
232 # Type system 243 # Type system
233 def parseTypeDeclarations(self): 244 def parseTypeDeclarations(self):
234 if self.hasConsumed('type'): 245 if self.hasConsumed('type'):
274 # parse a fieldlist: 285 # parse a fieldlist:
275 identifiers = self.parseIdentList() 286 identifiers = self.parseIdentList()
276 self.Consume(':') 287 self.Consume(':')
277 typ = self.parseType() 288 typ = self.parseType()
278 self.Consume(';') 289 self.Consume(';')
279 for id, public in identifiers: 290 for i in identifiers:
280 if id in fields.keys(): 291 if i.name in fields.keys():
281 self.Error('record field "{0}" multiple defined.'.format(id)) 292 self.Error('record field "{0}" multiple defined.'.format(i.name))
282 fields[id] = typ 293 fields[i.name] = typ
283 # TODO store this in another way, symbol table? 294 # TODO store this in another way, symbol table?
284 self.Consume('end') 295 self.Consume('end')
285 return RecordType(fields) 296 return RecordType(fields)
286 elif self.hasConsumed('pointer'): 297 elif self.hasConsumed('pointer'):
287 self.Consume('to') 298 self.Consume('to')
300 while self.token.typ == 'ID': 311 while self.token.typ == 'ID':
301 ids = self.parseIdentList() 312 ids = self.parseIdentList()
302 self.Consume(':') 313 self.Consume(':')
303 typename = self.parseType() 314 typename = self.parseType()
304 self.Consume(';') 315 self.Consume(';')
305 for name, ispublic in ids: 316 for i in ids:
306 v = Variable(name, typename, public=ispublic) 317 v = Variable(i.name, typename, public=i.ispublic)
318 self.setLocation(v, i.location)
307 self.cst.addSymbol(v) 319 self.cst.addSymbol(v)
308 else: 320 else:
309 self.Error('Expected ID, got'+str(self.token)) 321 self.Error('Expected ID, got'+str(self.token))
310 322
311 # Procedures 323 # Procedures
347 self.Consume(';') 359 self.Consume(';')
348 return procedures 360 return procedures
349 361
350 def parseProcedureDeclaration(self): 362 def parseProcedureDeclaration(self):
351 self.Consume('procedure') 363 self.Consume('procedure')
352 name, ispublic = self.parseIdentDef() 364 i = self.parseIdentDef()
365 procname = i.name
353 proctyp = self.parseFormalParameters() 366 proctyp = self.parseFormalParameters()
354 procsymtable = SymbolTable(parent = self.cst) 367 procsymtable = SymbolTable(parent = self.cst)
355 self.cst = procsymtable # Switch symbol table: 368 self.cst = procsymtable # Switch symbol table:
356 # Add parameters as variables to symbol table: 369 # Add parameters as variables to symbol table:
357 for parameter in proctyp.parameters: 370 for parameter in proctyp.parameters:
386 if not isType(returnexpression.typ, proctyp.returntype): 399 if not isType(returnexpression.typ, proctyp.returntype):
387 self.Error('Returned type {0} does not match function return type {1}'.format(returnexpression.typ, proctyp.returntype)) 400 self.Error('Returned type {0} does not match function return type {1}'.format(returnexpression.typ, proctyp.returntype))
388 401
389 self.Consume('end') 402 self.Consume('end')
390 endname = self.Consume('ID') 403 endname = self.Consume('ID')
391 if endname != name: 404 if endname != procname:
392 self.Error('endname should match {0}'.format(name)) 405 self.Error('endname should match {0}'.format(name))
393 self.cst = procsymtable.parent # Switch back to parent symbol table 406 self.cst = procsymtable.parent # Switch back to parent symbol table
394 proc = Procedure(name, proctyp, block, procsymtable, returnexpression) 407 proc = Procedure(procname, proctyp, block, procsymtable, returnexpression)
395 self.cst.addSymbol(proc) 408 self.cst.addSymbol(proc)
396 proc.public = ispublic 409 proc.public = i.ispublic
397 return proc 410 return proc
398 411
399 # Statements: 412 # Statements:
400 def parseAssignment(self, lval): 413 def parseAssignment(self, lval):
414 loc = self.getLocation()
401 self.Consume(':=') 415 self.Consume(':=')
402 rval = self.parseExpression() 416 rval = self.parseExpression()
403 if isType(lval.typ, real) and isType(rval.typ, integer): 417 if isType(lval.typ, real) and isType(rval.typ, integer):
404 rval = Unop(rval, 'INTTOREAL', real) 418 rval = Unop(rval, 'INTTOREAL', real)
405 if type(rval.typ) is NilType: 419 if type(rval.typ) is NilType:
406 if not type(lval.typ) is ProcedureType and not type(lval.typ) is PointerType: 420 if not type(lval.typ) is ProcedureType and not type(lval.typ) is PointerType:
407 self.Error('Can assign nil only to pointers or procedure types, not {0}'.format(lval)) 421 self.Error('Can assign nil only to pointers or procedure types, not {0}'.format(lval))
408 elif not isType(lval.typ, rval.typ): 422 elif not isType(lval.typ, rval.typ):
409 self.Error('Type mismatch {0} != {1}'.format(lval.typ, rval.typ)) 423 self.Error('Type mismatch {0} != {1}'.format(lval.typ, rval.typ))
410 return Assignment(lval, rval) 424 return self.setLocation(Assignment(lval, rval), loc)
411 425
412 def parseExpressionList(self): 426 def parseExpressionList(self):
413 expressions = [ self.parseExpression() ] 427 expressions = [ self.parseExpression() ]
414 while self.hasConsumed(','): 428 while self.hasConsumed(','):
415 expressions.append( self.parseExpression() ) 429 expressions.append( self.parseExpression() )
430 print(arg.typ, param.typ) 444 print(arg.typ, param.typ)
431 self.Error('Mismatch in parameter') 445 self.Error('Mismatch in parameter')
432 return ProcedureCall(procedure, args) 446 return ProcedureCall(procedure, args)
433 447
434 def parseIfStatement(self): 448 def parseIfStatement(self):
449 loc = self.getLocation()
435 self.Consume('if') 450 self.Consume('if')
436 ifs = [] 451 ifs = []
437 condition = self.parseExpression() 452 condition = self.parseExpression()
438 if not isType(condition.typ, boolean): 453 if not isType(condition.typ, boolean):
439 self.Error('condition of if statement must be boolean') 454 self.Error('condition of if statement must be boolean')
452 else: 467 else:
453 statement = None 468 statement = None
454 self.Consume('end') 469 self.Consume('end')
455 for condition, truestatement in reversed(ifs): 470 for condition, truestatement in reversed(ifs):
456 statement = IfStatement(condition, truestatement, statement) 471 statement = IfStatement(condition, truestatement, statement)
457 return statement 472 return self.setLocation(statement, loc)
458 473
459 def parseCase(self): 474 def parseCase(self):
460 # TODO 475 # TODO
461 pass 476 pass
462 477
468 while self.hasConsumed('|'): 483 while self.hasConsumed('|'):
469 self.parseCase() 484 self.parseCase()
470 self.Consume('end') 485 self.Consume('end')
471 486
472 def parseWhileStatement(self): 487 def parseWhileStatement(self):
488 loc = self.getLocation()
473 self.Consume('while') 489 self.Consume('while')
474 condition = self.parseExpression() 490 condition = self.parseExpression()
475 self.Consume('do') 491 self.Consume('do')
476 statements = self.parseStatementSequence() 492 statements = self.parseStatementSequence()
477 if self.hasConsumed('elsif'): 493 if self.hasConsumed('elsif'):
478 self.Error('elsif in while not yet implemented') 494 self.Error('elsif in while not yet implemented')
479 self.Consume('end') 495 self.Consume('end')
480 return WhileStatement(condition, statements) 496 return self.setLocation(WhileStatement(condition, statements), loc)
481 497
482 def parseRepeatStatement(self): 498 def parseRepeatStatement(self):
483 self.Consume('repeat') 499 self.Consume('repeat')
484 stmt = self.parseStatementSequence() 500 stmt = self.parseStatementSequence()
485 self.Consume('until') 501 self.Consume('until')
486 cond = self.parseBoolExpression() 502 cond = self.parseBoolExpression()
487 503
488 def parseForStatement(self): 504 def parseForStatement(self):
505 loc = self.getLocation()
489 self.Consume('for') 506 self.Consume('for')
490 variable = self.parseDesignator() 507 variable = self.parseDesignator()
491 if not variable.typ.isType(integer): 508 if not variable.typ.isType(integer):
492 self.Error('loop variable of for statement must have integer type') 509 self.Error('loop variable of for statement must have integer type')
493 assert(variable.typ.isType(integer)) 510 assert(variable.typ.isType(integer))
507 increment = 1 524 increment = 1
508 assert(type(increment) is int) 525 assert(type(increment) is int)
509 self.Consume('do') 526 self.Consume('do')
510 statements = self.parseStatementSequence() 527 statements = self.parseStatementSequence()
511 self.Consume('end') 528 self.Consume('end')
512 return ForStatement(variable, begin, end, increment, statements) 529 return self.setLocation(ForStatement(variable, begin, end, increment, statements), loc)
513 530
514 def parseAsmcode(self): 531 def parseAsmcode(self):
532 # TODO: move this to seperate file
515 def parseOpcode(): 533 def parseOpcode():
516 return self.Consume('ID') 534 return self.Consume('ID')
517 def parseOperand(): 535 def parseOperand():
518 if self.hasConsumed('['): 536 if self.hasConsumed('['):
519 memref = [] 537 memref = []
620 638
621 # Parsing arithmatic expressions: 639 # Parsing arithmatic expressions:
622 def parseTerm(self): 640 def parseTerm(self):
623 a = self.parseFactor() 641 a = self.parseFactor()
624 while self.token.typ in ['*', '/', 'mod', 'div', 'and']: 642 while self.token.typ in ['*', '/', 'mod', 'div', 'and']:
643 loc = self.getLocation()
625 op = self.Consume() 644 op = self.Consume()
626 b = self.parseTerm() 645 b = self.parseTerm()
627 # Type determination and checking: 646 # Type determination and checking:
628 if op in ['mod', 'div']: 647 if op in ['mod', 'div']:
629 if not isType(a.typ, integer): 648 if not isType(a.typ, integer):
666 self.Error('Second operand of and must be boolean') 685 self.Error('Second operand of and must be boolean')
667 typ = boolean 686 typ = boolean
668 else: 687 else:
669 self.Error('Unknown operand {0}'.format(op)) 688 self.Error('Unknown operand {0}'.format(op))
670 689
671 a = Binop(a, op, b, typ) 690 a = self.setLocation(Binop(a, op, b, typ), loc)
672 return a 691 return a
673 692
674 def parseFactor(self): 693 def parseFactor(self):
675 if self.hasConsumed('('): 694 if self.hasConsumed('('):
676 e = self.parseExpression() 695 e = self.parseExpression()
677 self.Consume(')') 696 self.Consume(')')
678 return e 697 return e
679 elif self.token.typ == 'NUMBER': 698 elif self.token.typ == 'NUMBER':
680 val = self.Consume('NUMBER') 699 loc = self.getLocation()
681 return Constant(val, integer) 700 val = self.Consume('NUMBER')
701 return self.setLocation(Constant(val, integer), loc)
682 elif self.token.typ == 'REAL': 702 elif self.token.typ == 'REAL':
683 val = self.Consume('REAL') 703 loc = self.getLocation()
684 return Constant(val, real) 704 val = self.Consume('REAL')
705 return self.setLocation(Constant(val, real), loc)
685 elif self.token.typ == 'CHAR': 706 elif self.token.typ == 'CHAR':
686 val = self.Consume('CHAR') 707 val = self.Consume('CHAR')
687 return Constant(val, char) 708 return Constant(val, char)
688 elif self.token.typ == 'STRING': 709 elif self.token.typ == 'STRING':
689 txt = self.Consume('STRING') 710 txt = self.Consume('STRING')
721 if op == '-': 742 if op == '-':
722 a = Unop(a, op, typ) 743 a = Unop(a, op, typ)
723 else: 744 else:
724 a = self.parseTerm() 745 a = self.parseTerm()
725 while self.token.typ in ['+', '-', 'or']: 746 while self.token.typ in ['+', '-', 'or']:
747 loc = self.getLocation()
726 op = self.Consume() 748 op = self.Consume()
727 b = self.parseTerm() 749 b = self.parseTerm()
728 if op in ['+', '-']: 750 if op in ['+', '-']:
729 if isType(a.typ, real) or isType(b.typ, real): 751 if isType(a.typ, real) or isType(b.typ, real):
730 typ = real 752 typ = real
747 if not isType(b.typ, boolean): 769 if not isType(b.typ, boolean):
748 self.Error('second operand must be boolean for or operation') 770 self.Error('second operand must be boolean for or operation')
749 typ = boolean 771 typ = boolean
750 else: 772 else:
751 self.Error('Unknown operand {0}'.format(op)) 773 self.Error('Unknown operand {0}'.format(op))
752 a = Binop(a, op, b, typ) 774 a = self.setLocation(Binop(a, op, b, typ), loc)
753 return a 775 return a
754 776