Mercurial > lcfOS
comparison python/c3/parser.py @ 220:3f6c30a5d234
Major change in expression parsing to enable pointers and structs
author | Windel Bouwman |
---|---|
date | Sat, 06 Jul 2013 21:32:20 +0200 |
parents | 1fa3e0050b49 |
children | 848c4b15fd0b |
comparison
equal
deleted
inserted
replaced
219:1fa3e0050b49 | 220:3f6c30a5d234 |
---|---|
1 from . import astnodes, lexer | 1 from . import astnodes, lexer |
2 from ppci import CompilerError | 2 from ppci import CompilerError |
3 | |
4 # binop precedence for expressions: | |
5 binopPrecs = {'or': 5, 'and': 10, \ | |
6 '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \ | |
7 '+': 30, '-': 30, '*': 40, '/': 40 } | |
8 | 3 |
9 class Parser: | 4 class Parser: |
10 """ Parses sourcecode into an abstract syntax tree (AST) """ | 5 """ Parses sourcecode into an abstract syntax tree (AST) """ |
11 def __init__(self, diag): | 6 def __init__(self, diag): |
12 self.diag = diag | 7 self.diag = diag |
16 try: | 11 try: |
17 self.parsePackage() | 12 self.parsePackage() |
18 return self.mod | 13 return self.mod |
19 except CompilerError as e: | 14 except CompilerError as e: |
20 self.diag.addDiag(e) | 15 self.diag.addDiag(e) |
16 | |
21 def Error(self, msg): | 17 def Error(self, msg): |
22 raise CompilerError(msg, self.token.loc) | 18 raise CompilerError(msg, self.token.loc) |
19 | |
23 # Lexer helpers: | 20 # Lexer helpers: |
24 def Consume(self, typ): | 21 def Consume(self, typ): |
25 if self.Peak == typ: | 22 if self.Peak == typ: |
26 return self.NextToken() | 23 return self.NextToken() |
27 else: | 24 else: |
28 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) | 25 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) |
26 | |
29 @property | 27 @property |
30 def Peak(self): | 28 def Peak(self): |
31 return self.token.typ | 29 return self.token.typ |
32 @property | 30 |
33 def PeakPrec(self): | |
34 if self.Peak in binopPrecs: | |
35 return binopPrecs[self.Peak] | |
36 return -1 | |
37 def hasConsumed(self, typ): | 31 def hasConsumed(self, typ): |
38 if self.Peak == typ: | 32 if self.Peak == typ: |
39 self.Consume(typ) | 33 self.Consume(typ) |
40 return True | 34 return True |
41 return False | 35 return False |
42 | 36 |
43 def NextToken(self): | 37 def NextToken(self): |
44 t = self.token | 38 t = self.token |
45 if t.typ != 'END': | 39 if t.typ != 'END': |
46 self.token = self.tokens.__next__() | 40 self.token = self.tokens.__next__() |
47 return t | 41 return t |
48 | 42 |
49 def initLex(self, source): | 43 def initLex(self, source): |
50 self.tokens = lexer.tokenize(source) # Lexical stage | 44 self.tokens = lexer.tokenize(source) # Lexical stage |
51 self.token = self.tokens.__next__() | 45 self.token = self.tokens.__next__() |
46 | |
52 def addDeclaration(self, decl): | 47 def addDeclaration(self, decl): |
53 self.currentPart.declarations.append(decl) | 48 self.currentPart.declarations.append(decl) |
54 | 49 |
55 def parseUses(self): | 50 def parseUses(self): |
51 # TODO: parse uses | |
56 pass | 52 pass |
57 | 53 |
58 def parsePackage(self): | 54 def parsePackage(self): |
59 self.Consume('package') | 55 self.Consume('package') |
60 name = self.Consume('ID') | 56 name = self.Consume('ID') |
61 self.Consume(';') | 57 self.Consume(';') |
62 self.mod = astnodes.Package(name.val, name.loc) | 58 self.mod = astnodes.Package(name.val, name.loc) |
63 self.currentPart = self.mod | 59 self.currentPart = self.mod |
64 self.parseUses() | 60 self.parseUses() |
65 # TODO: parse uses | 61 while self.Peak != 'END': |
66 while self.Peak != 'END': | 62 self.parseTopLevel() |
67 self.parseTopLevel() | 63 self.Consume('END') |
68 self.Consume('END') | |
69 | 64 |
70 def parseTopLevel(self): | 65 def parseTopLevel(self): |
71 if self.Peak == 'function': | 66 if self.Peak == 'function': |
72 self.parseFunctionDef() | 67 self.parseFunctionDef() |
73 elif self.Peak == 'var': | 68 elif self.Peak == 'var': |
125 def parseVar(): | 120 def parseVar(): |
126 name = self.Consume('ID') | 121 name = self.Consume('ID') |
127 v = astnodes.Variable(name.val, t) | 122 v = astnodes.Variable(name.val, t) |
128 v.loc = name.loc | 123 v.loc = name.loc |
129 if self.hasConsumed('='): | 124 if self.hasConsumed('='): |
130 v.ival = self.parseExpression() | 125 v.ival = self.Expression() |
131 self.addDeclaration(v) | 126 self.addDeclaration(v) |
132 parseVar() | 127 parseVar() |
133 while self.hasConsumed(','): | 128 while self.hasConsumed(','): |
134 parseVar() | 129 parseVar() |
135 self.Consume(';') | 130 self.Consume(';') |
138 self.Consume('const') | 133 self.Consume('const') |
139 t = self.parseTypeSpec() | 134 t = self.parseTypeSpec() |
140 def parseConst(): | 135 def parseConst(): |
141 name = self.Consume('ID') | 136 name = self.Consume('ID') |
142 self.Consume('=') | 137 self.Consume('=') |
143 val = self.parseExpression() | 138 val = self.Expression() |
144 c = astnodes.Constant(name.val, t, val) | 139 c = astnodes.Constant(name.val, t, val) |
145 c.loc = name.loc | 140 c.loc = name.loc |
146 parseConst() | 141 parseConst() |
147 while self.hasConsumed(','): | 142 while self.hasConsumed(','): |
148 parseConst() | 143 parseConst() |
175 f.typ = astnodes.FunctionType(paramtypes, returntype) | 170 f.typ = astnodes.FunctionType(paramtypes, returntype) |
176 f.body = self.parseCompoundStatement() | 171 f.body = self.parseCompoundStatement() |
177 self.currentPart = savePart | 172 self.currentPart = savePart |
178 | 173 |
179 # Statements: | 174 # Statements: |
180 def parseAssignment(self, lval): | |
181 lval = astnodes.VariableUse(lval, lval.loc) | |
182 loc = self.Consume('=').loc | |
183 rval = self.parseExpression() | |
184 self.Consume(';') | |
185 return astnodes.Assignment(lval, rval, loc) | |
186 | |
187 def parseCall(self, func): | |
188 self.Consume('(') | |
189 args = [] | |
190 if not self.hasConsumed(')'): | |
191 args.append(self.parseExpression()) | |
192 while self.hasConsumed(','): | |
193 args.append(self.parseExpression()) | |
194 self.Consume(')') | |
195 return astnodes.FunctionCall(func, args, func.loc) | |
196 | 175 |
197 def parseIfStatement(self): | 176 def parseIfStatement(self): |
198 loc = self.Consume('if').loc | 177 loc = self.Consume('if').loc |
199 self.Consume('(') | 178 self.Consume('(') |
200 condition = self.parseExpression() | 179 condition = self.Expression() |
201 self.Consume(')') | 180 self.Consume(')') |
202 yes = self.parseCompoundStatement() | 181 yes = self.parseCompoundStatement() |
203 if self.hasConsumed('else'): | 182 if self.hasConsumed('else'): |
204 no = self.parseCompoundStatement() | 183 no = self.parseCompoundStatement() |
205 else: | 184 else: |
206 no = astnodes.EmptyStatement() | 185 no = astnodes.EmptyStatement() |
207 return astnodes.IfStatement(condition, yes, no, loc) | 186 return astnodes.IfStatement(condition, yes, no, loc) |
208 | 187 |
209 def parseWhileStatement(self): | 188 def parseWhileStatement(self): |
210 loc = self.Consume('while').loc | 189 loc = self.Consume('while').loc |
211 self.Consume('(') | 190 self.Consume('(') |
212 condition = self.parseExpression() | 191 condition = self.Expression() |
213 self.Consume(')') | 192 self.Consume(')') |
214 statements = self.parseCompoundStatement() | 193 statements = self.parseCompoundStatement() |
215 return astnodes.WhileStatement(condition, statements, loc) | 194 return astnodes.WhileStatement(condition, statements, loc) |
216 | 195 |
217 def parseReturnStatement(self): | 196 def parseReturnStatement(self): |
218 self.Consume('return') | 197 loc = self.Consume('return').loc |
219 expr = self.parseExpression() | 198 expr = self.Expression() |
220 self.Consume(';') | 199 self.Consume(';') |
221 return astnodes.ReturnStatement(expr) | 200 return astnodes.ReturnStatement(expr, loc) |
222 | 201 |
223 def parseCompoundStatement(self): | 202 def parseCompoundStatement(self): |
224 self.Consume('{') | 203 self.Consume('{') |
225 statements = [] | 204 statements = [] |
226 while not self.hasConsumed('}'): | 205 while not self.hasConsumed('}'): |
227 s = self.parseStatement() | 206 s = self.Statement() |
228 if not type(s) is astnodes.EmptyStatement: | 207 if type(s) is astnodes.EmptyStatement: |
208 continue | |
229 statements.append(s) | 209 statements.append(s) |
230 return astnodes.CompoundStatement(statements) | 210 return astnodes.CompoundStatement(statements) |
231 | 211 |
232 def parseStatement(self): | 212 def Statement(self): |
233 # Determine statement type based on the pending token: | 213 # Determine statement type based on the pending token: |
234 if self.Peak == 'if': | 214 if self.Peak == 'if': |
235 return self.parseIfStatement() | 215 return self.parseIfStatement() |
236 elif self.Peak == 'while': | 216 elif self.Peak == 'while': |
237 return self.parseWhileStatement() | 217 return self.parseWhileStatement() |
238 elif self.Peak == '{': | 218 elif self.Peak == '{': |
239 return self.parseCompoundStatement() | 219 return self.parseCompoundStatement() |
240 elif self.hasConsumed(';'): | 220 elif self.hasConsumed(';'): |
241 return astnodes.EmptyStatement() | 221 return astnodes.EmptyStatement() |
242 elif self.Peak == 'var': | 222 elif self.Peak == 'var': |
243 self.parseVarDef() | 223 self.parseVarDef() |
244 return astnodes.EmptyStatement() | 224 return astnodes.EmptyStatement() |
245 elif self.Peak == 'return': | 225 elif self.Peak == 'return': |
246 return self.parseReturnStatement() | 226 return self.parseReturnStatement() |
247 else: | 227 else: |
248 designator = self.parseDesignator() | 228 return self.AssignmentOrCall() |
249 if self.Peak == '(': | 229 |
250 return self.parseCall(designator) | 230 def AssignmentOrCall(self): |
251 elif self.Peak == '=': | 231 x = self.UnaryExpression() |
252 return self.parseAssignment(designator) | 232 if self.Peak == '=': |
253 else: | 233 # We enter assignment mode here. |
254 self.Error('Unable to determine statement') | 234 loc = self.Consume('=').loc |
255 | 235 rhs = self.Expression() |
256 # Parsing expressions: | 236 return astnodes.Assignment(x, rhs, loc) |
257 def parseExpression(self): | 237 else: |
258 return self.parseBinopRhs(self.parsePrimary(), 0) | 238 return x |
259 | 239 |
260 def parsePrimary(self): | 240 # Expression section: |
261 if self.hasConsumed('('): | 241 # We not implement these C constructs: |
262 e = self.parseExpression() | 242 # a(2), f = 2 |
263 self.Consume(')') | 243 # and this: |
264 return e | 244 # a = 2 < x : 4 ? 1; |
265 elif self.Peak == 'NUMBER': | 245 |
266 val = self.Consume('NUMBER') | 246 def Expression(self): |
267 return astnodes.Literal(val.val, val.loc) | 247 exp = self.LogicalAndExpression() |
268 elif self.Peak == 'REAL': | 248 while self.Peak == 'or': |
269 val = self.Consume('REAL') | 249 loc = self.Consume('or').loc |
270 return astnodes.Literal(val.val, val.loc) | 250 e2 = self.LogicalAndExpression() |
271 elif self.Peak == 'true': | 251 exp = astnodes.Binop(exp, 'or', e2, loc) |
272 val = self.Consume('true') | 252 return exp |
273 return astnodes.Literal(True, val.loc) | 253 |
274 elif self.Peak == 'false': | 254 def LogicalAndExpression(self): |
275 val = self.Consume('false') | 255 o = self.EqualityExpression() |
276 return astnodes.Literal(False, val.loc) | 256 while self.Peak == 'and': |
277 elif self.Peak == 'ID': | 257 loc = self.Consume('and').loc |
278 d = self.parseDesignator() | 258 o2 = self.EqualityExpression() |
279 if self.Peak == '(': | 259 o = astnodes.Binop(o, 'and', o2, loc) |
280 return self.parseCall(d) | 260 return o |
281 else: | 261 |
262 def EqualityExpression(self): | |
263 ee = self.SimpleExpression() | |
264 while self.Peak in ['<', '==', '>']: | |
265 op = self.Consume(self.Peak) | |
266 ee2 = self.SimpleExpression() | |
267 ee = astnodes.Binop(ee, op.typ, ee2, op.loc) | |
268 return ee | |
269 | |
270 def SimpleExpression(self): | |
271 e = self.Term() | |
272 while self.Peak in ['+', '-']: | |
273 op = self.Consume(self.Peak) | |
274 e2 = self.Term() | |
275 e = astnodes.Binop(e, op.typ, e2, op.loc) | |
276 return e | |
277 | |
278 def Term(self): | |
279 t = self.Factor() | |
280 while self.Peak in ['*', '/']: | |
281 op = self.Consume(self.Peak) | |
282 t2 = self.Factor() | |
283 t = astnodes.Binop(t, op.typ, t2, op.loc) | |
284 return t | |
285 | |
286 def Factor(self): | |
287 # TODO: eliminate this step? | |
288 return self.CastExpression() | |
289 | |
290 # Domain of unary expressions: | |
291 | |
292 def CastExpression(self): | |
293 # TODO: cast conflicts with '(' expr ')' | |
294 if self.Peak == '(ii': | |
295 self.Consume('(') | |
296 print('TODO: implement type cast') | |
297 #rrrrr | |
298 self.parseTypeSpec() | |
299 | |
300 # Type | |
301 self.Consume(')') | |
302 ce = self.CastExpression() | |
303 return ce | |
304 else: | |
305 return self.UnaryExpression() | |
306 | |
307 def UnaryExpression(self): | |
308 if self.Peak in ['&', '*']: | |
309 op = self.Consume(self.Peak) | |
310 ce = self.CastExpression() | |
311 return astnodes.Unop(op.typ, ce, op.loc) | |
312 else: | |
313 return self.PostFixExpression() | |
314 | |
315 def PostFixExpression(self): | |
316 pfe = self.PrimaryExpression() | |
317 while self.Peak in ['[', '(', '.', '->']: | |
318 if self.hasConsumed('['): | |
319 pass | |
320 elif self.hasConsumed('('): | |
321 # Function call | |
322 args = [] | |
323 if not self.hasConsumed(')'): | |
324 args.append(self.Expression()) | |
325 while self.hasConsumed(','): | |
326 args.append(self.Expression()) | |
327 self.Consume(')') | |
328 pfe = astnodes.FunctionCall(pfe, args, pfe.loc) | |
329 else: | |
330 rrrr | |
331 return pfe | |
332 | |
333 def PrimaryExpression(self): | |
334 if self.hasConsumed('('): | |
335 e = self.Expression() | |
336 self.Consume(')') | |
337 return e | |
338 elif self.Peak == 'NUMBER': | |
339 val = self.Consume('NUMBER') | |
340 return astnodes.Literal(val.val, val.loc) | |
341 elif self.Peak == 'REAL': | |
342 val = self.Consume('REAL') | |
343 return astnodes.Literal(val.val, val.loc) | |
344 elif self.Peak == 'true': | |
345 val = self.Consume('true') | |
346 return astnodes.Literal(True, val.loc) | |
347 elif self.Peak == 'false': | |
348 val = self.Consume('false') | |
349 return astnodes.Literal(False, val.loc) | |
350 elif self.Peak == 'ID': | |
351 d = self.parseDesignator() | |
282 return astnodes.VariableUse(d, d.loc) | 352 return astnodes.VariableUse(d, d.loc) |
283 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) | 353 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) |
284 | 354 |
285 def parseBinopRhs(self, lhs, min_prec): | 355 |
286 while self.PeakPrec >= min_prec: | |
287 op_prec = self.PeakPrec | |
288 op = self.Consume(self.Peak) | |
289 rhs = self.parsePrimary() | |
290 while self.PeakPrec > op_prec: | |
291 rhs = self.parseBinopRhs(rhs, self.PeakPrec) | |
292 lhs = astnodes.Binop(lhs, op.typ, rhs, op.loc) | |
293 return lhs | |
294 |