comparison python/c3/parser.py @ 220:3f6c30a5d234

Major change in expression parsing to enable pointers and structs
author Windel Bouwman
date Sat, 06 Jul 2013 21:32:20 +0200
parents 1fa3e0050b49
children 848c4b15fd0b
comparison
equal deleted inserted replaced
219:1fa3e0050b49 220:3f6c30a5d234
1 from . import astnodes, lexer 1 from . import astnodes, lexer
2 from ppci import CompilerError 2 from ppci import CompilerError
3
4 # binop precedence for expressions:
5 binopPrecs = {'or': 5, 'and': 10, \
6 '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \
7 '+': 30, '-': 30, '*': 40, '/': 40 }
8 3
9 class Parser: 4 class Parser:
10 """ Parses sourcecode into an abstract syntax tree (AST) """ 5 """ Parses sourcecode into an abstract syntax tree (AST) """
11 def __init__(self, diag): 6 def __init__(self, diag):
12 self.diag = diag 7 self.diag = diag
16 try: 11 try:
17 self.parsePackage() 12 self.parsePackage()
18 return self.mod 13 return self.mod
19 except CompilerError as e: 14 except CompilerError as e:
20 self.diag.addDiag(e) 15 self.diag.addDiag(e)
16
21 def Error(self, msg): 17 def Error(self, msg):
22 raise CompilerError(msg, self.token.loc) 18 raise CompilerError(msg, self.token.loc)
19
23 # Lexer helpers: 20 # Lexer helpers:
24 def Consume(self, typ): 21 def Consume(self, typ):
25 if self.Peak == typ: 22 if self.Peak == typ:
26 return self.NextToken() 23 return self.NextToken()
27 else: 24 else:
28 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) 25 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak))
26
29 @property 27 @property
30 def Peak(self): 28 def Peak(self):
31 return self.token.typ 29 return self.token.typ
32 @property 30
33 def PeakPrec(self):
34 if self.Peak in binopPrecs:
35 return binopPrecs[self.Peak]
36 return -1
37 def hasConsumed(self, typ): 31 def hasConsumed(self, typ):
38 if self.Peak == typ: 32 if self.Peak == typ:
39 self.Consume(typ) 33 self.Consume(typ)
40 return True 34 return True
41 return False 35 return False
42 36
43 def NextToken(self): 37 def NextToken(self):
44 t = self.token 38 t = self.token
45 if t.typ != 'END': 39 if t.typ != 'END':
46 self.token = self.tokens.__next__() 40 self.token = self.tokens.__next__()
47 return t 41 return t
48 42
49 def initLex(self, source): 43 def initLex(self, source):
50 self.tokens = lexer.tokenize(source) # Lexical stage 44 self.tokens = lexer.tokenize(source) # Lexical stage
51 self.token = self.tokens.__next__() 45 self.token = self.tokens.__next__()
46
52 def addDeclaration(self, decl): 47 def addDeclaration(self, decl):
53 self.currentPart.declarations.append(decl) 48 self.currentPart.declarations.append(decl)
54 49
55 def parseUses(self): 50 def parseUses(self):
51 # TODO: parse uses
56 pass 52 pass
57 53
58 def parsePackage(self): 54 def parsePackage(self):
59 self.Consume('package') 55 self.Consume('package')
60 name = self.Consume('ID') 56 name = self.Consume('ID')
61 self.Consume(';') 57 self.Consume(';')
62 self.mod = astnodes.Package(name.val, name.loc) 58 self.mod = astnodes.Package(name.val, name.loc)
63 self.currentPart = self.mod 59 self.currentPart = self.mod
64 self.parseUses() 60 self.parseUses()
65 # TODO: parse uses 61 while self.Peak != 'END':
66 while self.Peak != 'END': 62 self.parseTopLevel()
67 self.parseTopLevel() 63 self.Consume('END')
68 self.Consume('END')
69 64
70 def parseTopLevel(self): 65 def parseTopLevel(self):
71 if self.Peak == 'function': 66 if self.Peak == 'function':
72 self.parseFunctionDef() 67 self.parseFunctionDef()
73 elif self.Peak == 'var': 68 elif self.Peak == 'var':
125 def parseVar(): 120 def parseVar():
126 name = self.Consume('ID') 121 name = self.Consume('ID')
127 v = astnodes.Variable(name.val, t) 122 v = astnodes.Variable(name.val, t)
128 v.loc = name.loc 123 v.loc = name.loc
129 if self.hasConsumed('='): 124 if self.hasConsumed('='):
130 v.ival = self.parseExpression() 125 v.ival = self.Expression()
131 self.addDeclaration(v) 126 self.addDeclaration(v)
132 parseVar() 127 parseVar()
133 while self.hasConsumed(','): 128 while self.hasConsumed(','):
134 parseVar() 129 parseVar()
135 self.Consume(';') 130 self.Consume(';')
138 self.Consume('const') 133 self.Consume('const')
139 t = self.parseTypeSpec() 134 t = self.parseTypeSpec()
140 def parseConst(): 135 def parseConst():
141 name = self.Consume('ID') 136 name = self.Consume('ID')
142 self.Consume('=') 137 self.Consume('=')
143 val = self.parseExpression() 138 val = self.Expression()
144 c = astnodes.Constant(name.val, t, val) 139 c = astnodes.Constant(name.val, t, val)
145 c.loc = name.loc 140 c.loc = name.loc
146 parseConst() 141 parseConst()
147 while self.hasConsumed(','): 142 while self.hasConsumed(','):
148 parseConst() 143 parseConst()
175 f.typ = astnodes.FunctionType(paramtypes, returntype) 170 f.typ = astnodes.FunctionType(paramtypes, returntype)
176 f.body = self.parseCompoundStatement() 171 f.body = self.parseCompoundStatement()
177 self.currentPart = savePart 172 self.currentPart = savePart
178 173
179 # Statements: 174 # Statements:
180 def parseAssignment(self, lval):
181 lval = astnodes.VariableUse(lval, lval.loc)
182 loc = self.Consume('=').loc
183 rval = self.parseExpression()
184 self.Consume(';')
185 return astnodes.Assignment(lval, rval, loc)
186
187 def parseCall(self, func):
188 self.Consume('(')
189 args = []
190 if not self.hasConsumed(')'):
191 args.append(self.parseExpression())
192 while self.hasConsumed(','):
193 args.append(self.parseExpression())
194 self.Consume(')')
195 return astnodes.FunctionCall(func, args, func.loc)
196 175
197 def parseIfStatement(self): 176 def parseIfStatement(self):
198 loc = self.Consume('if').loc 177 loc = self.Consume('if').loc
199 self.Consume('(') 178 self.Consume('(')
200 condition = self.parseExpression() 179 condition = self.Expression()
201 self.Consume(')') 180 self.Consume(')')
202 yes = self.parseCompoundStatement() 181 yes = self.parseCompoundStatement()
203 if self.hasConsumed('else'): 182 if self.hasConsumed('else'):
204 no = self.parseCompoundStatement() 183 no = self.parseCompoundStatement()
205 else: 184 else:
206 no = astnodes.EmptyStatement() 185 no = astnodes.EmptyStatement()
207 return astnodes.IfStatement(condition, yes, no, loc) 186 return astnodes.IfStatement(condition, yes, no, loc)
208 187
209 def parseWhileStatement(self): 188 def parseWhileStatement(self):
210 loc = self.Consume('while').loc 189 loc = self.Consume('while').loc
211 self.Consume('(') 190 self.Consume('(')
212 condition = self.parseExpression() 191 condition = self.Expression()
213 self.Consume(')') 192 self.Consume(')')
214 statements = self.parseCompoundStatement() 193 statements = self.parseCompoundStatement()
215 return astnodes.WhileStatement(condition, statements, loc) 194 return astnodes.WhileStatement(condition, statements, loc)
216 195
217 def parseReturnStatement(self): 196 def parseReturnStatement(self):
218 self.Consume('return') 197 loc = self.Consume('return').loc
219 expr = self.parseExpression() 198 expr = self.Expression()
220 self.Consume(';') 199 self.Consume(';')
221 return astnodes.ReturnStatement(expr) 200 return astnodes.ReturnStatement(expr, loc)
222 201
223 def parseCompoundStatement(self): 202 def parseCompoundStatement(self):
224 self.Consume('{') 203 self.Consume('{')
225 statements = [] 204 statements = []
226 while not self.hasConsumed('}'): 205 while not self.hasConsumed('}'):
227 s = self.parseStatement() 206 s = self.Statement()
228 if not type(s) is astnodes.EmptyStatement: 207 if type(s) is astnodes.EmptyStatement:
208 continue
229 statements.append(s) 209 statements.append(s)
230 return astnodes.CompoundStatement(statements) 210 return astnodes.CompoundStatement(statements)
231 211
232 def parseStatement(self): 212 def Statement(self):
233 # Determine statement type based on the pending token: 213 # Determine statement type based on the pending token:
234 if self.Peak == 'if': 214 if self.Peak == 'if':
235 return self.parseIfStatement() 215 return self.parseIfStatement()
236 elif self.Peak == 'while': 216 elif self.Peak == 'while':
237 return self.parseWhileStatement() 217 return self.parseWhileStatement()
238 elif self.Peak == '{': 218 elif self.Peak == '{':
239 return self.parseCompoundStatement() 219 return self.parseCompoundStatement()
240 elif self.hasConsumed(';'): 220 elif self.hasConsumed(';'):
241 return astnodes.EmptyStatement() 221 return astnodes.EmptyStatement()
242 elif self.Peak == 'var': 222 elif self.Peak == 'var':
243 self.parseVarDef() 223 self.parseVarDef()
244 return astnodes.EmptyStatement() 224 return astnodes.EmptyStatement()
245 elif self.Peak == 'return': 225 elif self.Peak == 'return':
246 return self.parseReturnStatement() 226 return self.parseReturnStatement()
247 else: 227 else:
248 designator = self.parseDesignator() 228 return self.AssignmentOrCall()
249 if self.Peak == '(': 229
250 return self.parseCall(designator) 230 def AssignmentOrCall(self):
251 elif self.Peak == '=': 231 x = self.UnaryExpression()
252 return self.parseAssignment(designator) 232 if self.Peak == '=':
253 else: 233 # We enter assignment mode here.
254 self.Error('Unable to determine statement') 234 loc = self.Consume('=').loc
255 235 rhs = self.Expression()
256 # Parsing expressions: 236 return astnodes.Assignment(x, rhs, loc)
257 def parseExpression(self): 237 else:
258 return self.parseBinopRhs(self.parsePrimary(), 0) 238 return x
259 239
260 def parsePrimary(self): 240 # Expression section:
261 if self.hasConsumed('('): 241 # We not implement these C constructs:
262 e = self.parseExpression() 242 # a(2), f = 2
263 self.Consume(')') 243 # and this:
264 return e 244 # a = 2 < x : 4 ? 1;
265 elif self.Peak == 'NUMBER': 245
266 val = self.Consume('NUMBER') 246 def Expression(self):
267 return astnodes.Literal(val.val, val.loc) 247 exp = self.LogicalAndExpression()
268 elif self.Peak == 'REAL': 248 while self.Peak == 'or':
269 val = self.Consume('REAL') 249 loc = self.Consume('or').loc
270 return astnodes.Literal(val.val, val.loc) 250 e2 = self.LogicalAndExpression()
271 elif self.Peak == 'true': 251 exp = astnodes.Binop(exp, 'or', e2, loc)
272 val = self.Consume('true') 252 return exp
273 return astnodes.Literal(True, val.loc) 253
274 elif self.Peak == 'false': 254 def LogicalAndExpression(self):
275 val = self.Consume('false') 255 o = self.EqualityExpression()
276 return astnodes.Literal(False, val.loc) 256 while self.Peak == 'and':
277 elif self.Peak == 'ID': 257 loc = self.Consume('and').loc
278 d = self.parseDesignator() 258 o2 = self.EqualityExpression()
279 if self.Peak == '(': 259 o = astnodes.Binop(o, 'and', o2, loc)
280 return self.parseCall(d) 260 return o
281 else: 261
262 def EqualityExpression(self):
263 ee = self.SimpleExpression()
264 while self.Peak in ['<', '==', '>']:
265 op = self.Consume(self.Peak)
266 ee2 = self.SimpleExpression()
267 ee = astnodes.Binop(ee, op.typ, ee2, op.loc)
268 return ee
269
270 def SimpleExpression(self):
271 e = self.Term()
272 while self.Peak in ['+', '-']:
273 op = self.Consume(self.Peak)
274 e2 = self.Term()
275 e = astnodes.Binop(e, op.typ, e2, op.loc)
276 return e
277
278 def Term(self):
279 t = self.Factor()
280 while self.Peak in ['*', '/']:
281 op = self.Consume(self.Peak)
282 t2 = self.Factor()
283 t = astnodes.Binop(t, op.typ, t2, op.loc)
284 return t
285
286 def Factor(self):
287 # TODO: eliminate this step?
288 return self.CastExpression()
289
290 # Domain of unary expressions:
291
292 def CastExpression(self):
293 # TODO: cast conflicts with '(' expr ')'
294 if self.Peak == '(ii':
295 self.Consume('(')
296 print('TODO: implement type cast')
297 #rrrrr
298 self.parseTypeSpec()
299
300 # Type
301 self.Consume(')')
302 ce = self.CastExpression()
303 return ce
304 else:
305 return self.UnaryExpression()
306
307 def UnaryExpression(self):
308 if self.Peak in ['&', '*']:
309 op = self.Consume(self.Peak)
310 ce = self.CastExpression()
311 return astnodes.Unop(op.typ, ce, op.loc)
312 else:
313 return self.PostFixExpression()
314
315 def PostFixExpression(self):
316 pfe = self.PrimaryExpression()
317 while self.Peak in ['[', '(', '.', '->']:
318 if self.hasConsumed('['):
319 pass
320 elif self.hasConsumed('('):
321 # Function call
322 args = []
323 if not self.hasConsumed(')'):
324 args.append(self.Expression())
325 while self.hasConsumed(','):
326 args.append(self.Expression())
327 self.Consume(')')
328 pfe = astnodes.FunctionCall(pfe, args, pfe.loc)
329 else:
330 rrrr
331 return pfe
332
333 def PrimaryExpression(self):
334 if self.hasConsumed('('):
335 e = self.Expression()
336 self.Consume(')')
337 return e
338 elif self.Peak == 'NUMBER':
339 val = self.Consume('NUMBER')
340 return astnodes.Literal(val.val, val.loc)
341 elif self.Peak == 'REAL':
342 val = self.Consume('REAL')
343 return astnodes.Literal(val.val, val.loc)
344 elif self.Peak == 'true':
345 val = self.Consume('true')
346 return astnodes.Literal(True, val.loc)
347 elif self.Peak == 'false':
348 val = self.Consume('false')
349 return astnodes.Literal(False, val.loc)
350 elif self.Peak == 'ID':
351 d = self.parseDesignator()
282 return astnodes.VariableUse(d, d.loc) 352 return astnodes.VariableUse(d, d.loc)
283 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) 353 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))
284 354
285 def parseBinopRhs(self, lhs, min_prec): 355
286 while self.PeakPrec >= min_prec:
287 op_prec = self.PeakPrec
288 op = self.Consume(self.Peak)
289 rhs = self.parsePrimary()
290 while self.PeakPrec > op_prec:
291 rhs = self.parseBinopRhs(rhs, self.PeakPrec)
292 lhs = astnodes.Binop(lhs, op.typ, rhs, op.loc)
293 return lhs
294