100
|
1 from .symboltable import SymbolTable
|
70
|
2 from .nodes import *
|
100
|
3 from ...core.errors import CompilerException, Error
|
102
|
4 from .builtin import *
|
100
|
5 #from . import assembler
|
101
|
6 from .lexer import tokenize
|
100
|
7
|
|
8 class KsParser:
|
106
|
9 """ This module parses source code into an abstract syntax tree (AST) """
|
101
|
10 def __init__(self, source):
|
100
|
11 """ provide the parser with the tokens iterator from the lexer. """
|
101
|
12 self.tokens = tokenize(source) # Lexical stage
|
100
|
13 self.NextToken()
|
|
14 self.errorlist = []
|
|
15
|
|
16 def Error(self, msg):
|
|
17 raise CompilerException(msg, self.token.row, self.token.col)
|
|
18
|
|
19 # Lexer helpers:
|
|
20 def Consume(self, typ=''):
|
|
21 if self.token.typ == typ or typ == '':
|
|
22 v = self.token.val
|
|
23 self.NextToken()
|
|
24 return v
|
|
25 else:
|
|
26 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.token.val))
|
|
27
|
|
28 def hasConsumed(self, typ):
|
|
29 if self.token.typ == typ:
|
|
30 self.Consume(typ)
|
|
31 return True
|
|
32 return False
|
|
33
|
|
34 def NextToken(self):
|
|
35 self.token = self.tokens.__next__()
|
|
36 # TODO: store filename in location?
|
|
37 self.location = (self.token.row, self.token.col)
|
|
38
|
|
39 # Helpers to find location of the error in the code:
|
|
40 def setLocation(self, obj, location):
|
|
41 obj.location = location
|
|
42 return obj
|
|
43 def getLocation(self):
|
|
44 return self.location
|
|
45
|
|
46 """
|
|
47 Recursive descent parser functions:
|
|
48 A set of mutual recursive functions.
|
|
49 Starting symbol is the Module.
|
|
50 """
|
|
51 def parseModule(self):
|
101
|
52 """ Top level parsing routine """
|
100
|
53 self.imports = []
|
|
54 loc = self.getLocation()
|
|
55 self.Consume('module')
|
|
56 modname = self.Consume('ID')
|
|
57 self.Consume(';')
|
|
58 mod = Module(modname)
|
|
59
|
|
60 # Construct a symbol table for this program
|
|
61 mod.symtable = SymbolTable()
|
|
62 # Add built in types and functions:
|
|
63 for x in [real, integer, boolean, char, chr_func]:
|
|
64 mod.symtable.addSymbol(x)
|
|
65
|
|
66 self.cst = mod.symtable
|
|
67 self.parseImportList()
|
|
68
|
|
69 self.parseDeclarationSequence()
|
|
70 # Procedures only allowed in this scope
|
|
71 self.parseProcedureDeclarations()
|
|
72
|
|
73 if self.hasConsumed('begin'):
|
|
74 mod.initcode = self.parseStatementSequence()
|
|
75 else:
|
|
76 mod.initcode = EmptyStatement()
|
|
77
|
|
78 self.Consume('end')
|
|
79 endname = self.Consume('ID')
|
|
80 if endname != modname:
|
|
81 self.Error('end denoter must be module name')
|
|
82 self.Consume('.')
|
|
83
|
|
84 mod.imports = self.imports
|
|
85 return self.setLocation(mod, loc)
|
|
86
|
|
87 # Import part
|
|
88 def parseImportList(self):
|
|
89 if self.hasConsumed('import'):
|
|
90 self.parseImport()
|
|
91 while self.hasConsumed(','):
|
|
92 self.parseImport()
|
|
93 self.Consume(';')
|
|
94
|
|
95 def parseImport(self):
|
|
96 loc = self.getLocation()
|
|
97 modname = self.Consume('ID')
|
110
|
98 # TODO: fix
|
101
|
99 #mod = loadModule(modname)
|
100
|
100 self.setLocation(mod, loc)
|
|
101 self.cst.addSymbol(mod)
|
|
102
|
|
103 # Helper to parse an identifier defenitions
|
|
104 def parseIdentDef(self):
|
|
105 loc = self.getLocation()
|
|
106 name = self.Consume('ID')
|
|
107 ispublic = self.hasConsumed('*')
|
|
108 # Make a node of this thing:
|
|
109 i = Id(name)
|
|
110 i.ispublic = ispublic
|
|
111 return self.setLocation(i, loc)
|
|
112
|
|
113 def parseIdentList(self):
|
|
114 ids = [ self.parseIdentDef() ]
|
|
115 while self.hasConsumed(','):
|
|
116 ids.append( self.parseIdentDef() )
|
|
117 return ids
|
|
118
|
|
119 def parseQualIdent(self):
|
|
120 """ Parse a qualified identifier """
|
|
121 name = self.Consume('ID')
|
|
122 if self.cst.has(Module, name):
|
|
123 modname = name
|
|
124 mod = self.cst.get(Module, modname)
|
|
125 self.Consume('.')
|
|
126 name = self.Consume('ID')
|
|
127 # Try to find existing imported symbol:
|
|
128 for imp in self.imports:
|
|
129 if imp.modname == modname and imp.name == name:
|
|
130 return imp
|
|
131 # Try to find the symbol in the modules exports:
|
|
132 for sym in mod.exports:
|
|
133 if sym.name == name:
|
|
134 impsym = ImportedSymbol(modname, name)
|
|
135 impsym.typ = sym.typ
|
|
136 impsym.signature = mod.signature
|
|
137 self.imports.append(impsym)
|
|
138 return impsym
|
|
139 self.Error("Cannot find symbol {0}".format(name))
|
|
140 else:
|
|
141 return self.cst.getSymbol(name)
|
70
|
142
|
100
|
143 # Helper to parse a designator
|
|
144 def parseDesignator(self):
|
|
145 """ A designator designates an object.
|
|
146 The base location in memory is denoted by the qualified identifier
|
|
147 The actual address depends on the selector.
|
|
148 """
|
|
149 loc = self.getLocation()
|
|
150 obj = self.parseQualIdent()
|
|
151 typ = obj.typ
|
|
152 selectors = []
|
|
153 while self.token.typ in ['.', '[', '^']:
|
|
154 if self.hasConsumed('.'):
|
|
155 field = self.Consume('ID')
|
|
156 if typ is PointerType:
|
|
157 selectors.append(Deref())
|
|
158 typ = typ.pointedType
|
|
159 if not type(typ) is RecordType:
|
|
160 self.Error("field reference, type not record but {0}".format(typ))
|
|
161 typ = typ.fields[field]
|
|
162 selectors.append(Field(field))
|
|
163 elif self.hasConsumed('['):
|
|
164 indexes = self.parseExpressionList()
|
|
165 self.Consume(']')
|
|
166 for idx in indexes:
|
|
167 if not type(typ) is ArrayType:
|
|
168 self.Error('Cannot index non array type')
|
|
169 if not isType(idx.typ, integer):
|
|
170 self.Error('Only integer expressions can be used as an index')
|
|
171 selectors.append(Index(idx, typ))
|
|
172 typ = typ.elementType
|
|
173 elif self.hasConsumed('^'):
|
|
174 selectors.append(Deref())
|
|
175 typ = typ.pointedType
|
|
176 return self.setLocation(Designator(obj, selectors, typ), loc)
|
|
177
|
|
178 # Declaration sequence
|
|
179 def parseDeclarationSequence(self):
|
|
180 """ 1. constants, 2. types, 3. variables """
|
|
181 self.parseConstantDeclarations()
|
|
182 self.parseTypeDeclarations()
|
|
183 self.parseVariableDeclarations()
|
|
184
|
|
185 # Constants
|
|
186 def evalExpression(self, expr):
|
|
187 if type(expr) is Binop:
|
|
188 a = self.evalExpression(expr.a)
|
|
189 b = self.evalExpression(expr.b)
|
|
190 if expr.op == '+':
|
|
191 return a + b
|
|
192 elif expr.op == '-':
|
|
193 return a - b
|
|
194 elif expr.op == '*':
|
|
195 return a * b
|
|
196 elif expr.op == '/':
|
|
197 return float(a) / float(b)
|
|
198 elif expr.op == 'mod':
|
|
199 return int(a % b)
|
|
200 elif expr.op == 'div':
|
|
201 return int(a / b)
|
|
202 elif expr.op == 'or':
|
|
203 return a or b
|
|
204 elif expr.op == 'and':
|
|
205 return a and b
|
|
206 else:
|
|
207 self.Error('Cannot evaluate expression with {0}'.format(expr.op))
|
|
208 elif type(expr) is Constant:
|
|
209 return expr.value
|
|
210 elif type(expr) is Designator:
|
|
211 if type(expr.obj) is Constant:
|
|
212 return self.evalExpression(expr.obj)
|
|
213 else:
|
|
214 self.Error('Cannot evaluate designated object {0}'.format(expr.obj))
|
|
215 elif type(expr) is Unop:
|
|
216 a = self.evalExpression(expr.a)
|
|
217 if expr.op == 'not':
|
|
218 return not a
|
|
219 elif expr.op == '-':
|
|
220 return -a
|
|
221 else:
|
|
222 self.Error('Unimplemented unary operation {0}'.format(expr.op))
|
|
223 else:
|
|
224 self.Error('Cannot evaluate expression {0}'.format(expr))
|
|
225
|
110
|
226 def parseConstant(self):
|
100
|
227 e = self.parseExpression()
|
110
|
228 val = self.evalExpression(e)
|
|
229 return Constant(val, e.typ)
|
100
|
230
|
|
231 def parseConstantDeclarations(self):
|
|
232 """ Parse const part of a module """
|
|
233 if self.hasConsumed('const'):
|
|
234 while self.token.typ == 'ID':
|
|
235 i = self.parseIdentDef()
|
|
236 self.Consume('=')
|
110
|
237 c = self.parseConstant()
|
100
|
238 self.Consume(';')
|
110
|
239 c.name = i.name
|
|
240 c.public = i.ispublic
|
100
|
241 self.setLocation(c, i.location)
|
|
242 self.cst.addSymbol(c)
|
|
243
|
|
244 # Type system
|
|
245 def parseTypeDeclarations(self):
|
|
246 if self.hasConsumed('type'):
|
|
247 while self.token.typ == 'ID':
|
|
248 typename, export = self.parseIdentDef()
|
|
249 self.Consume('=')
|
|
250 typ = self.parseStructuredType()
|
|
251 self.Consume(';')
|
|
252 t = DefinedType(typename, typ)
|
|
253 self.cst.addSymbol(t)
|
|
254
|
|
255 def parseType(self):
|
|
256 if self.token.typ == 'ID':
|
|
257 typename = self.Consume('ID')
|
|
258 if self.cst.has(Type, typename):
|
|
259 typ = self.cst.get(Type, typename)
|
|
260 while type(typ) is DefinedType:
|
|
261 typ = typ.typ
|
|
262 return typ
|
|
263 else:
|
|
264 self.Error('Cannot find type {0}'.format(typename))
|
|
265 else:
|
|
266 return self.parseStructuredType()
|
|
267
|
|
268 def parseStructuredType(self):
|
|
269 if self.hasConsumed('array'):
|
|
270 dimensions = []
|
110
|
271 dimensions.append( self.parseConstant() )
|
100
|
272 while self.hasConsumed(','):
|
110
|
273 dimensions.append( self.parseConstant() )
|
100
|
274 self.Consume('of')
|
|
275 arr = self.parseType()
|
110
|
276 for dimension in reversed(dimensions):
|
|
277 if not isType(dimension.typ, integer):
|
100
|
278 self.Error('array dimension must be an integer type (not {0})'.format(consttyp))
|
110
|
279 if dimension.value < 2:
|
|
280 self.Error('array dimension must be bigger than 1 (not {0})'.format(dimension.value))
|
|
281 arr = ArrayType(dimension.value, arr)
|
100
|
282 return arr
|
|
283 elif self.hasConsumed('record'):
|
|
284 fields = {}
|
|
285 while self.token.typ == 'ID':
|
|
286 # parse a fieldlist:
|
|
287 identifiers = self.parseIdentList()
|
|
288 self.Consume(':')
|
|
289 typ = self.parseType()
|
|
290 self.Consume(';')
|
|
291 for i in identifiers:
|
|
292 if i.name in fields.keys():
|
|
293 self.Error('record field "{0}" multiple defined.'.format(i.name))
|
|
294 fields[i.name] = typ
|
|
295 # TODO store this in another way, symbol table?
|
|
296 self.Consume('end')
|
|
297 return RecordType(fields)
|
|
298 elif self.hasConsumed('pointer'):
|
|
299 self.Consume('to')
|
|
300 typ = self.parseType()
|
|
301 return PointerType(typ)
|
|
302 elif self.hasConsumed('procedure'):
|
|
303 parameters, returntype = self.parseFormalParameters()
|
|
304 return ProcedureType(parameters, returntype)
|
|
305 else:
|
|
306 self.Error('Unknown structured type "{0}"'.format(self.token.val))
|
70
|
307
|
100
|
308 # Variable declarations:
|
|
309 def parseVariableDeclarations(self):
|
|
310 if self.hasConsumed('var'):
|
|
311 if self.token.typ == 'ID':
|
|
312 while self.token.typ == 'ID':
|
|
313 ids = self.parseIdentList()
|
|
314 self.Consume(':')
|
|
315 typename = self.parseType()
|
|
316 self.Consume(';')
|
|
317 for i in ids:
|
|
318 v = Variable(i.name, typename, public=i.ispublic)
|
|
319 self.setLocation(v, i.location)
|
|
320 self.cst.addSymbol(v)
|
|
321 else:
|
|
322 self.Error('Expected ID, got'+str(self.token))
|
|
323
|
|
324 # Procedures
|
|
325 def parseFPsection(self):
|
|
326 if self.hasConsumed('const'):
|
|
327 kind = 'const'
|
|
328 elif self.hasConsumed('var'):
|
|
329 kind = 'var'
|
|
330 else:
|
|
331 kind = 'value'
|
|
332 names = [ self.Consume('ID') ]
|
|
333 while self.hasConsumed(','):
|
|
334 names.append( self.Consume('ID') )
|
|
335 self.Consume(':')
|
|
336 typ = self.parseType()
|
|
337 parameters = [Parameter(kind, name, typ)
|
|
338 for name in names]
|
|
339 return parameters
|
|
340
|
|
341 def parseFormalParameters(self):
|
|
342 parameters = []
|
|
343 self.Consume('(')
|
|
344 if not self.hasConsumed(')'):
|
|
345 parameters += self.parseFPsection()
|
|
346 while self.hasConsumed(';'):
|
|
347 parameters += self.parseFPsection()
|
|
348 self.Consume(')')
|
|
349 if self.hasConsumed(':'):
|
|
350 returntype = self.parseQualIdent()
|
|
351 else:
|
|
352 returntype = void
|
|
353 return ProcedureType(parameters, returntype)
|
|
354
|
|
355 def parseProcedureDeclarations(self):
|
|
356 procedures = []
|
|
357 while self.token.typ == 'procedure':
|
|
358 p = self.parseProcedureDeclaration()
|
|
359 procedures.append(p)
|
|
360 self.Consume(';')
|
|
361 return procedures
|
|
362
|
|
363 def parseProcedureDeclaration(self):
|
|
364 loc = self.getLocation()
|
|
365 self.Consume('procedure')
|
|
366 i = self.parseIdentDef()
|
|
367 procname = i.name
|
|
368 proctyp = self.parseFormalParameters()
|
|
369 procsymtable = SymbolTable(parent = self.cst)
|
|
370 self.cst = procsymtable # Switch symbol table:
|
|
371 # Add parameters as variables to symbol table:
|
|
372 for parameter in proctyp.parameters:
|
|
373 vname = parameter.name
|
|
374 vtyp = parameter.typ
|
|
375 if parameter.kind == 'var':
|
|
376 vtyp = PointerType(vtyp)
|
|
377 variable = Variable(vname, vtyp, False)
|
|
378 if parameter.kind == 'const':
|
|
379 variable.isReadOnly = True
|
|
380 variable.isParameter = True
|
|
381 self.cst.addSymbol(variable)
|
|
382 self.Consume(';')
|
|
383 self.parseDeclarationSequence()
|
|
384 # Mark all variables as local:
|
|
385 for variable in self.cst.getAllLocal(Variable):
|
|
386 variable.isLocal = True
|
|
387
|
|
388 if self.hasConsumed('begin'):
|
|
389 block = self.parseStatementSequence()
|
|
390 if self.hasConsumed('return'):
|
|
391 returnexpression = self.parseExpression()
|
|
392 else:
|
|
393 returnexpression = None
|
|
394
|
|
395 if proctyp.returntype.isType(void):
|
|
396 if not returnexpression is None:
|
|
397 self.Error('Void procedure cannot return a value')
|
|
398 else:
|
|
399 if returnexpression is None:
|
|
400 self.Error('Procedure must return a value')
|
|
401 if not isType(returnexpression.typ, proctyp.returntype):
|
|
402 self.Error('Returned type {0} does not match function return type {1}'.format(returnexpression.typ, proctyp.returntype))
|
|
403
|
|
404 self.Consume('end')
|
|
405 endname = self.Consume('ID')
|
|
406 if endname != procname:
|
|
407 self.Error('endname should match {0}'.format(name))
|
|
408 self.cst = procsymtable.parent # Switch back to parent symbol table
|
|
409 proc = Procedure(procname, proctyp, block, procsymtable, returnexpression)
|
|
410 self.setLocation(proc, loc)
|
|
411 self.cst.addSymbol(proc)
|
|
412 proc.public = i.ispublic
|
|
413 return proc
|
|
414
|
|
415 # Statements:
|
|
416 def parseAssignment(self, lval):
|
|
417 loc = self.getLocation()
|
|
418 self.Consume(':=')
|
|
419 rval = self.parseExpression()
|
|
420 if isType(lval.typ, real) and isType(rval.typ, integer):
|
|
421 rval = Unop(rval, 'INTTOREAL', real)
|
|
422 if type(rval.typ) is NilType:
|
|
423 if not type(lval.typ) is ProcedureType and not type(lval.typ) is PointerType:
|
|
424 self.Error('Can assign nil only to pointers or procedure types, not {0}'.format(lval))
|
|
425 elif not isType(lval.typ, rval.typ):
|
|
426 self.Error('Type mismatch {0} != {1}'.format(lval.typ, rval.typ))
|
|
427 return self.setLocation(Assignment(lval, rval), loc)
|
|
428
|
|
429 def parseExpressionList(self):
|
|
430 expressions = [ self.parseExpression() ]
|
|
431 while self.hasConsumed(','):
|
|
432 expressions.append( self.parseExpression() )
|
|
433 return expressions
|
|
434
|
|
435 def parseProcedureCall(self, procedure):
|
|
436 self.Consume('(')
|
|
437 if self.token.typ != ')':
|
|
438 args = self.parseExpressionList()
|
|
439 else:
|
|
440 args = []
|
|
441 self.Consume(')')
|
110
|
442 # Type checking:
|
100
|
443 parameters = procedure.typ.parameters
|
|
444 if len(args) != len(parameters):
|
|
445 self.Error("Procedure requires {0} arguments, {1} given".format(len(parameters), len(args)))
|
|
446 for arg, param in zip(args, parameters):
|
|
447 if not arg.typ.isType(param.typ):
|
|
448 print(arg.typ, param.typ)
|
|
449 self.Error('Mismatch in parameter')
|
|
450 return ProcedureCall(procedure, args)
|
70
|
451
|
100
|
452 def parseIfStatement(self):
|
|
453 loc = self.getLocation()
|
|
454 self.Consume('if')
|
|
455 ifs = []
|
|
456 condition = self.parseExpression()
|
|
457 if not isType(condition.typ, boolean):
|
|
458 self.Error('condition of if statement must be boolean')
|
|
459 self.Consume('then')
|
|
460 truestatement = self.parseStatementSequence()
|
|
461 ifs.append( (condition, truestatement) )
|
|
462 while self.hasConsumed('elsif'):
|
|
463 condition = self.parseExpression()
|
|
464 if not isType(condition.typ, boolean):
|
|
465 self.Error('condition of if statement must be boolean')
|
|
466 self.Consume('then')
|
|
467 truestatement = self.parseStatementSequence()
|
|
468 ifs.append( (condition, truestatement) )
|
|
469 if self.hasConsumed('else'):
|
|
470 statement = self.parseStatementSequence()
|
|
471 else:
|
|
472 statement = None
|
|
473 self.Consume('end')
|
|
474 for condition, truestatement in reversed(ifs):
|
|
475 statement = IfStatement(condition, truestatement, statement)
|
|
476 return self.setLocation(statement, loc)
|
|
477
|
|
478 def parseCase(self):
|
|
479 # TODO
|
|
480 pass
|
|
481
|
|
482 def parseCaseStatement(self):
|
|
483 self.Consume('case')
|
|
484 expr = self.parseExpression()
|
|
485 self.Consume('of')
|
|
486 self.parseCase()
|
|
487 while self.hasConsumed('|'):
|
|
488 self.parseCase()
|
|
489 self.Consume('end')
|
|
490
|
|
491 def parseWhileStatement(self):
|
|
492 loc = self.getLocation()
|
|
493 self.Consume('while')
|
|
494 condition = self.parseExpression()
|
|
495 self.Consume('do')
|
|
496 statements = self.parseStatementSequence()
|
|
497 if self.hasConsumed('elsif'):
|
|
498 self.Error('elsif in while not yet implemented')
|
|
499 self.Consume('end')
|
|
500 return self.setLocation(WhileStatement(condition, statements), loc)
|
|
501
|
|
502 def parseRepeatStatement(self):
|
|
503 self.Consume('repeat')
|
|
504 stmt = self.parseStatementSequence()
|
|
505 self.Consume('until')
|
|
506 cond = self.parseBoolExpression()
|
110
|
507 # TODO
|
100
|
508
|
|
509 def parseForStatement(self):
|
|
510 loc = self.getLocation()
|
|
511 self.Consume('for')
|
|
512 variable = self.parseDesignator()
|
|
513 if not variable.typ.isType(integer):
|
|
514 self.Error('loop variable of for statement must have integer type')
|
|
515 assert(variable.typ.isType(integer))
|
|
516 self.Consume(':=')
|
|
517 begin = self.parseExpression()
|
|
518 if not begin.typ.isType(integer):
|
|
519 self.Error('begin expression of a for statement must have integer type')
|
|
520 self.Consume('to')
|
|
521 end = self.parseExpression()
|
|
522 if not end.typ.isType(integer):
|
|
523 self.Error('end expression of a for statement must have integer type')
|
|
524 if self.hasConsumed('by'):
|
110
|
525 increment = self.parseConstant()
|
|
526 if not increment.typ.isType(integer):
|
100
|
527 self.Error('Increment must be integer')
|
110
|
528 increment = increment.value
|
100
|
529 else:
|
|
530 increment = 1
|
|
531 assert(type(increment) is int)
|
|
532 self.Consume('do')
|
|
533 statements = self.parseStatementSequence()
|
|
534 self.Consume('end')
|
|
535 return self.setLocation(ForStatement(variable, begin, end, increment, statements), loc)
|
70
|
536
|
100
|
537 def parseAsmcode(self):
|
|
538 # TODO: move this to seperate file
|
110
|
539 # TODO: determine what to do with inline asm?
|
100
|
540 def parseOpcode():
|
|
541 return self.Consume('ID')
|
|
542 def parseOperand():
|
|
543 if self.hasConsumed('['):
|
|
544 memref = []
|
|
545 memref.append(parseOperand())
|
|
546 self.Consume(']')
|
|
547 return memref
|
|
548 else:
|
|
549 if self.token.typ == 'NUMBER':
|
|
550 return self.Consume('NUMBER')
|
|
551 else:
|
|
552 ID = self.Consume('ID')
|
|
553 if self.cst.has(Variable, ID):
|
|
554 return self.cst.get(Variable, ID)
|
|
555 else:
|
|
556 return ID
|
|
557
|
|
558 def parseOperands(n):
|
|
559 operands = []
|
|
560 if n > 0:
|
|
561 operands.append( parseOperand() )
|
|
562 n = n - 1
|
|
563 while n > 0:
|
|
564 self.Consume(',')
|
|
565 operands.append(parseOperand())
|
|
566 n = n - 1
|
|
567 return operands
|
|
568 self.Consume('asm')
|
|
569 asmcode = []
|
|
570 while self.token.typ != 'end':
|
|
571 opcode = parseOpcode()
|
|
572 func, numargs = assembler.opcodes[opcode]
|
|
573 operands = parseOperands(numargs)
|
|
574 asmcode.append( (opcode, operands) )
|
|
575 #print('opcode', opcode, operands)
|
|
576 self.Consume('end')
|
|
577 return AsmCode(asmcode)
|
70
|
578
|
100
|
579 def parseStatement(self):
|
|
580 try:
|
|
581 # Determine statement type based on the pending token:
|
|
582 if self.token.typ == 'if':
|
|
583 return self.parseIfStatement()
|
|
584 elif self.token.typ == 'case':
|
|
585 return self.parseCaseStatement()
|
|
586 elif self.token.typ == 'while':
|
|
587 return self.parseWhileStatement()
|
|
588 elif self.token.typ == 'repeat':
|
|
589 return self.parseRepeatStatement()
|
|
590 elif self.token.typ == 'for':
|
|
591 return self.parseForStatement()
|
|
592 elif self.token.typ == 'asm':
|
|
593 return self.parseAsmcode()
|
|
594 elif self.token.typ == 'ID':
|
|
595 # Assignment or procedure call
|
|
596 designator = self.parseDesignator()
|
|
597 if self.token.typ == '(' and type(designator.typ) is ProcedureType:
|
|
598 return self.parseProcedureCall(designator)
|
|
599 elif self.token.typ == ':=':
|
|
600 return self.parseAssignment(designator)
|
|
601 else:
|
|
602 self.Error('Unknown statement following designator: {0}'.format(self.token))
|
|
603 else:
|
|
604 # TODO: return empty statement??:
|
|
605 return EmptyStatement()
|
|
606 self.Error('Unknown statement {0}'.format(self.token))
|
|
607 except CompilerException as e:
|
|
608 print(e)
|
|
609 self.errorlist.append( (e.row, e.col, e.msg))
|
|
610 # Do error recovery by skipping all tokens until next ; or end
|
|
611 while not (self.token.typ == ';' or self.token.typ == 'end'):
|
|
612 self.Consume(self.token.typ)
|
|
613 return EmptyStatement()
|
|
614
|
|
615 def parseStatementSequence(self):
|
|
616 """ Sequence of statements seperated by ';' """
|
110
|
617 statements = [self.parseStatement()]
|
100
|
618 while self.hasConsumed(';'):
|
110
|
619 statements.append(self.parseStatement())
|
|
620 return StatementSequence(statements)
|
70
|
621
|
|
622 # Parsing expressions:
|
|
623 """
|
|
624 grammar of expressions:
|
100
|
625 expression = SimpleExpression [ reloperator SimpleExpression ]
|
|
626 reloperator = '=' | '<=' | '>=' | '<>'
|
|
627 Simpleexpression = [ '+' | '-' ] term { addoperator term }
|
|
628 addoperator = '+' | '-' | 'or'
|
70
|
629 term = factor { muloperator factor }
|
100
|
630 muloperator = '*' | '/' | 'div' | 'mod' | 'and'
|
|
631 factor = number | nil | true | false | "(" expression ")" |
|
|
632 designator [ actualparameters ] | 'not' factor
|
70
|
633 """
|
110
|
634 def getTokenPrecedence(self):
|
|
635 binopPrecs = {}
|
|
636 binopPrecs['and'] = 8
|
|
637 binopPrecs['or'] = 6
|
|
638 binopPrecs['<'] = 10
|
|
639 binopPrecs['>'] = 10
|
|
640 binopPrecs['='] = 10
|
|
641 binopPrecs['<='] = 10
|
|
642 binopPrecs['>='] = 10
|
|
643 binopPrecs['<>'] = 10
|
|
644 binopPrecs['+'] = 20
|
|
645 binopPrecs['-'] = 20
|
|
646 binopPrecs['*'] = 40
|
|
647 binopPrecs['/'] = 40
|
|
648 binopPrecs['div'] = 40
|
|
649 binopPrecs['mod'] = 40
|
|
650
|
|
651 typ = self.token.typ
|
|
652 if typ in binopPrecs:
|
|
653 return binopPrecs[typ]
|
|
654 return 0
|
|
655 def parsePrimary(self):
|
|
656 pass
|
100
|
657 def parseExpression(self):
|
|
658 """ The connector between the boolean and expression domain """
|
110
|
659 # TODO: implement precedence bindin
|
|
660 #lhs = self.parsePrimary()
|
|
661 #return self.parseBinopRhs(lhs)
|
|
662
|
100
|
663 expr = self.parseSimpleExpression()
|
|
664 if self.token.typ in ['>=','<=','<','>','<>','=']:
|
|
665 relop = self.Consume()
|
|
666 expr2 = self.parseSimpleExpression()
|
|
667 # Automatic type convert to reals:
|
|
668 if isType(expr.typ, real) and isType(expr2.typ, integer):
|
|
669 expr2 = Unop(expr2, 'INTTOREAL', real)
|
|
670 if isType(expr2.typ, real) and isType(expr.typ, integer):
|
|
671 expr = Unop(expr, 'INTTOREAL', real)
|
|
672 # Type check:
|
|
673 if not isType(expr.typ, expr2.typ):
|
|
674 self.Error('Type mismatch in relop')
|
|
675 if isType(expr.typ, real) and relop in ['<>', '=']:
|
|
676 self.Error('Cannot check real values for equality')
|
|
677 expr = Relop(expr, relop, expr2, boolean)
|
|
678 return expr
|
70
|
679
|
|
680 # Parsing arithmatic expressions:
|
|
681 def parseTerm(self):
|
|
682 a = self.parseFactor()
|
|
683 while self.token.typ in ['*', '/', 'mod', 'div', 'and']:
|
|
684 loc = self.getLocation()
|
|
685 op = self.Consume()
|
|
686 b = self.parseTerm()
|
|
687 # Type determination and checking:
|
|
688 if op in ['mod', 'div']:
|
|
689 if not isType(a.typ, integer):
|
|
690 self.Error('First operand should be integer, not {0}'.format(a.typ))
|
|
691 if not isType(b.typ, integer):
|
|
692 self.Error('Second operand should be integer, not {0}'.format(b.typ))
|
|
693 typ = integer
|
|
694 elif op == '*':
|
|
695 if isType(a.typ, integer) and isType(b.typ, integer):
|
|
696 typ = integer
|
|
697 elif isType(a.typ, real) or isType(b.typ, real):
|
|
698 if isType(a.typ, integer):
|
|
699 # Automatic type cast
|
|
700 a = Unop(a, 'INTTOREAL', real)
|
|
701 if isType(b.typ, integer):
|
|
702 b = Unop(b, 'INTTOREAL', real)
|
|
703 if not isType(a.typ, real):
|
|
704 self.Error('first operand must be a real!')
|
|
705 if not isType(b.typ, real):
|
|
706 self.Error('second operand must be a real!')
|
|
707 typ = real
|
|
708 else:
|
|
709 self.Error('Unknown operands for multiply: {0}, {1}'.format(a, b))
|
|
710 elif op == '/':
|
|
711 # Division always yields a real result, for integer division use div
|
|
712 if isType(a.typ, integer):
|
|
713 # Automatic type cast
|
|
714 a = Unop(a, 'INTTOREAL', real)
|
|
715 if isType(b.typ, integer):
|
|
716 b = Unop(b, 'INTTOREAL', real)
|
|
717 if not isType(a.typ, real):
|
|
718 self.Error('first operand must be a real!')
|
|
719 if not isType(b.typ, real):
|
|
720 self.Error('second operand must be a real!')
|
|
721 typ = real
|
|
722 elif op == 'and':
|
|
723 if not isType(a.typ, boolean):
|
|
724 self.Error('First operand of and must be boolean')
|
|
725 if not isType(b.typ, boolean):
|
|
726 self.Error('Second operand of and must be boolean')
|
|
727 typ = boolean
|
|
728 else:
|
|
729 self.Error('Unknown operand {0}'.format(op))
|
|
730
|
|
731 a = self.setLocation(Binop(a, op, b, typ), loc)
|
|
732 return a
|
|
733
|
|
734 def parseFactor(self):
|
|
735 if self.hasConsumed('('):
|
|
736 e = self.parseExpression()
|
|
737 self.Consume(')')
|
|
738 return e
|
|
739 elif self.token.typ == 'NUMBER':
|
|
740 loc = self.getLocation()
|
|
741 val = self.Consume('NUMBER')
|
|
742 return self.setLocation(Constant(val, integer), loc)
|
|
743 elif self.token.typ == 'REAL':
|
|
744 loc = self.getLocation()
|
|
745 val = self.Consume('REAL')
|
|
746 return self.setLocation(Constant(val, real), loc)
|
|
747 elif self.token.typ == 'CHAR':
|
|
748 val = self.Consume('CHAR')
|
|
749 return Constant(val, char)
|
100
|
750 elif self.token.typ == 'STRING':
|
|
751 txt = self.Consume('STRING')
|
|
752 return StringConstant(txt)
|
110
|
753 elif self.hasConsumed('true'):
|
|
754 return Constant(True, boolean)
|
|
755 elif self.hasConsumed('false'):
|
|
756 return Constant(False, boolean)
|
70
|
757 elif self.hasConsumed('nil'):
|
|
758 return Constant(0, NilType())
|
|
759 elif self.hasConsumed('not'):
|
|
760 f = self.parseFactor()
|
|
761 if not isType(f.typ, boolean):
|
|
762 self.Error('argument of boolean negation must be boolean type')
|
|
763 return Unop(f, 'not', boolean)
|
|
764 elif self.token.typ == 'ID':
|
|
765 designator = self.parseDesignator()
|
|
766 # TODO: handle functions different here?
|
|
767 if self.token.typ == '(' and type(designator.typ) is ProcedureType:
|
|
768 return self.parseProcedureCall(designator)
|
|
769 else:
|
|
770 return designator
|
|
771 else:
|
|
772 self.Error('Expected NUMBER, ID or ( expr ), got'+str(self.token))
|
|
773
|
|
774 def parseSimpleExpression(self):
|
|
775 """ Arithmatic expression """
|
|
776 if self.token.typ in ['+', '-']:
|
|
777 # Handle the unary minus
|
|
778 op = self.Consume()
|
|
779 a = self.parseTerm()
|
|
780 typ = a.typ
|
|
781 if not isType(typ,real) and not isType(typ, integer):
|
|
782 self.Error('Unary minus or plus can be only applied to real or integers')
|
|
783 if op == '-':
|
|
784 a = Unop(a, op, typ)
|
|
785 else:
|
|
786 a = self.parseTerm()
|
|
787 while self.token.typ in ['+', '-', 'or']:
|
|
788 loc = self.getLocation()
|
|
789 op = self.Consume()
|
|
790 b = self.parseTerm()
|
|
791 if op in ['+', '-']:
|
|
792 if isType(a.typ, real) or isType(b.typ, real):
|
|
793 typ = real
|
|
794 if isType(a.typ, integer):
|
|
795 # Automatic type cast
|
|
796 a = Unop(a, 'INTTOREAL', real)
|
|
797 if not isType(a.typ, real):
|
|
798 self.Error('first operand must be a real!')
|
|
799 if isType(b.typ, integer):
|
|
800 b = Unop(b, 'INTTOREAL', real)
|
|
801 if not isType(b.typ, real):
|
|
802 self.Error('second operand must be a real!')
|
|
803 elif isType(a.typ, integer) and isType(b.typ, integer):
|
|
804 typ = integer
|
|
805 else:
|
|
806 self.Error('Invalid types {0} and {1}'.format(a.typ, b.typ))
|
|
807 elif op == 'or':
|
|
808 if not isType(a.typ, boolean):
|
|
809 self.Error('first operand must be boolean for or operation')
|
|
810 if not isType(b.typ, boolean):
|
|
811 self.Error('second operand must be boolean for or operation')
|
|
812 typ = boolean
|
|
813 else:
|
|
814 self.Error('Unknown operand {0}'.format(op))
|
|
815 a = self.setLocation(Binop(a, op, b, typ), loc)
|
|
816 return a
|
|
817
|