Mercurial > lcfOS
comparison python/ppci/c3/parser.py @ 300:158068af716c
yafm
author | Windel Bouwman |
---|---|
date | Tue, 03 Dec 2013 18:00:22 +0100 |
parents | python/c3/parser.py@9417caea2eb3 |
children | 6753763d3bec |
comparison
equal
deleted
inserted
replaced
299:674789d9ff37 | 300:158068af716c |
---|---|
1 import logging | |
2 from .lexer import Lexer | |
3 from .astnodes import FieldRef, Literal, TypeCast, Unop, Binop | |
4 from .astnodes import Assignment, ExpressionStatement, CompoundStatement | |
5 from .astnodes import ReturnStatement, WhileStatement, IfStatement | |
6 from .astnodes import FunctionType, Function, FormalParameter | |
7 from .astnodes import StructureType, DefinedType, PointerType | |
8 from .astnodes import Constant, Variable | |
9 from .astnodes import StructField, Deref | |
10 from .astnodes import Package, ImportDesignator | |
11 from .astnodes import Designator, VariableUse, FunctionCall | |
12 from ppci import CompilerError | |
13 | |
14 | |
15 class Parser: | |
16 """ Parses sourcecode into an abstract syntax tree (AST) """ | |
17 def __init__(self, diag): | |
18 self.logger = logging.getLogger('c3') | |
19 self.diag = diag | |
20 self.lexer = Lexer(diag) | |
21 | |
22 def parseSource(self, source): | |
23 self.logger.info('Parsing source') | |
24 self.initLex(source) | |
25 try: | |
26 self.parsePackage() | |
27 return self.mod | |
28 except CompilerError as e: | |
29 self.diag.addDiag(e) | |
30 | |
31 def Error(self, msg): | |
32 raise CompilerError(msg, self.token.loc) | |
33 | |
34 # Lexer helpers: | |
35 def Consume(self, typ): | |
36 if self.Peak == typ: | |
37 return self.NextToken() | |
38 else: | |
39 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) | |
40 | |
41 @property | |
42 def Peak(self): | |
43 return self.token.typ | |
44 | |
45 @property | |
46 def CurLoc(self): | |
47 return self.token.loc | |
48 | |
49 def hasConsumed(self, typ): | |
50 if self.Peak == typ: | |
51 self.Consume(typ) | |
52 return True | |
53 return False | |
54 | |
55 def NextToken(self): | |
56 t = self.token | |
57 if t.typ != 'END': | |
58 self.token = self.tokens.__next__() | |
59 return t | |
60 | |
61 def initLex(self, source): | |
62 self.tokens = self.lexer.tokenize(source) | |
63 self.token = self.tokens.__next__() | |
64 | |
65 def addDeclaration(self, decl): | |
66 self.currentPart.declarations.append(decl) | |
67 | |
68 def parseImport(self): | |
69 self.Consume('import') | |
70 name = self.Consume('ID').val | |
71 self.mod.imports.append(name) | |
72 self.Consume(';') | |
73 | |
74 def parsePackage(self): | |
75 self.Consume('module') | |
76 name = self.Consume('ID') | |
77 self.Consume(';') | |
78 self.mod = Package(name.val, name.loc) | |
79 self.currentPart = self.mod | |
80 while self.Peak != 'END': | |
81 self.parseTopLevel() | |
82 self.Consume('END') | |
83 | |
84 def parseTopLevel(self): | |
85 if self.Peak == 'function': | |
86 self.parseFunctionDef() | |
87 elif self.Peak == 'var': | |
88 self.parseVarDef() | |
89 elif self.Peak == 'const': | |
90 self.parseConstDef() | |
91 elif self.Peak == 'type': | |
92 self.parseTypeDef() | |
93 elif self.Peak == 'import': | |
94 self.parseImport() | |
95 else: | |
96 self.Error('Expected function, var, const or type') | |
97 | |
98 def parseDesignator(self): | |
99 """ A designator designates an object """ | |
100 name = self.Consume('ID') | |
101 if self.hasConsumed(':'): | |
102 name2 = self.Consume('ID') | |
103 return ImportDesignator(name.val, name2.val, name.loc) | |
104 else: | |
105 return Designator(name.val, name.loc) | |
106 | |
107 # Type system | |
108 def parseTypeSpec(self): | |
109 # For now, do simple type spec, just parse an ID: | |
110 #return self.parseDesignator() | |
111 if self.Peak == 'struct': | |
112 self.Consume('struct') | |
113 self.Consume('{') | |
114 mems = [] | |
115 while self.Peak != '}': | |
116 mem_t = self.parseTypeSpec() | |
117 mem_n = self.Consume('ID').val | |
118 mems.append(StructField(mem_n, mem_t)) | |
119 while self.hasConsumed(','): | |
120 mem_n = self.Consume('ID').val | |
121 mems.append(StructField(mem_n, mem_t)) | |
122 self.Consume(';') | |
123 self.Consume('}') | |
124 theT = StructureType(mems) | |
125 else: | |
126 theT = self.parseDesignator() | |
127 # Check for pointer suffix: | |
128 while self.hasConsumed('*'): | |
129 theT = PointerType(theT) | |
130 return theT | |
131 | |
132 def parseTypeDef(self): | |
133 self.Consume('type') | |
134 newtype = self.parseTypeSpec() | |
135 typename = self.Consume('ID') | |
136 self.Consume(';') | |
137 df = DefinedType(typename.val, newtype, typename.loc) | |
138 self.addDeclaration(df) | |
139 | |
140 # Variable declarations: | |
141 def parseVarDef(self): | |
142 self.Consume('var') | |
143 t = self.parseTypeSpec() | |
144 | |
145 def parseVar(): | |
146 name = self.Consume('ID') | |
147 v = Variable(name.val, t) | |
148 v.loc = name.loc | |
149 if self.hasConsumed('='): | |
150 v.ival = self.Expression() | |
151 self.addDeclaration(v) | |
152 parseVar() | |
153 while self.hasConsumed(','): | |
154 parseVar() | |
155 self.Consume(';') | |
156 | |
157 def parseConstDef(self): | |
158 self.Consume('const') | |
159 t = self.parseTypeSpec() | |
160 | |
161 def parseConst(): | |
162 name = self.Consume('ID') | |
163 self.Consume('=') | |
164 val = self.Expression() | |
165 c = Constant(name.val, t, val) | |
166 c.loc = name.loc | |
167 parseConst() | |
168 while self.hasConsumed(','): | |
169 parseConst() | |
170 self.Consume(';') | |
171 | |
172 # Procedures | |
173 def parseFunctionDef(self): | |
174 loc = self.Consume('function').loc | |
175 returntype = self.parseTypeSpec() | |
176 fname = self.Consume('ID').val | |
177 f = Function(fname, loc) | |
178 self.addDeclaration(f) | |
179 savePart = self.currentPart | |
180 self.currentPart = f | |
181 self.Consume('(') | |
182 parameters = [] | |
183 if not self.hasConsumed(')'): | |
184 def parseParameter(): | |
185 typ = self.parseTypeSpec() | |
186 name = self.Consume('ID') | |
187 param = FormalParameter(name.val, typ) | |
188 param.loc = name.loc | |
189 self.addDeclaration(param) | |
190 parameters.append(param) | |
191 parseParameter() | |
192 while self.hasConsumed(','): | |
193 parseParameter() | |
194 self.Consume(')') | |
195 paramtypes = [p.typ for p in parameters] | |
196 f.typ = FunctionType(paramtypes, returntype) | |
197 f.body = self.parseCompoundStatement() | |
198 self.currentPart = savePart | |
199 | |
200 # Statements: | |
201 | |
202 def parseIfStatement(self): | |
203 loc = self.Consume('if').loc | |
204 self.Consume('(') | |
205 condition = self.Expression() | |
206 self.Consume(')') | |
207 yes = self.parseCompoundStatement() | |
208 if self.hasConsumed('else'): | |
209 no = self.parseCompoundStatement() | |
210 else: | |
211 no = None | |
212 return IfStatement(condition, yes, no, loc) | |
213 | |
214 def parseWhileStatement(self): | |
215 loc = self.Consume('while').loc | |
216 self.Consume('(') | |
217 condition = self.Expression() | |
218 self.Consume(')') | |
219 statements = self.parseCompoundStatement() | |
220 return WhileStatement(condition, statements, loc) | |
221 | |
222 def parseReturnStatement(self): | |
223 loc = self.Consume('return').loc | |
224 if self.Peak == ';': | |
225 expr = Literal(0, loc) | |
226 else: | |
227 expr = self.Expression() | |
228 self.Consume(';') | |
229 return ReturnStatement(expr, loc) | |
230 | |
231 def parseCompoundStatement(self): | |
232 self.Consume('{') | |
233 statements = [] | |
234 while not self.hasConsumed('}'): | |
235 s = self.Statement() | |
236 if s is None: | |
237 continue | |
238 statements.append(s) | |
239 return CompoundStatement(statements) | |
240 | |
241 def Statement(self): | |
242 # Determine statement type based on the pending token: | |
243 if self.Peak == 'if': | |
244 return self.parseIfStatement() | |
245 elif self.Peak == 'while': | |
246 return self.parseWhileStatement() | |
247 elif self.Peak == '{': | |
248 return self.parseCompoundStatement() | |
249 elif self.hasConsumed(';'): | |
250 pass | |
251 elif self.Peak == 'var': | |
252 self.parseVarDef() | |
253 elif self.Peak == 'return': | |
254 return self.parseReturnStatement() | |
255 else: | |
256 return self.AssignmentOrCall() | |
257 | |
258 def AssignmentOrCall(self): | |
259 x = self.UnaryExpression() | |
260 if self.Peak == '=': | |
261 # We enter assignment mode here. | |
262 loc = self.Consume('=').loc | |
263 rhs = self.Expression() | |
264 return Assignment(x, rhs, loc) | |
265 else: | |
266 return ExpressionStatement(x, x.loc) | |
267 | |
268 # Expression section: | |
269 # We not implement these C constructs: | |
270 # a(2), f = 2 | |
271 # and this: | |
272 # a = 2 < x : 4 ? 1; | |
273 | |
274 def Expression(self): | |
275 exp = self.LogicalAndExpression() | |
276 while self.Peak == 'or': | |
277 loc = self.Consume('or').loc | |
278 e2 = self.LogicalAndExpression() | |
279 exp = Binop(exp, 'or', e2, loc) | |
280 return exp | |
281 | |
282 def LogicalAndExpression(self): | |
283 o = self.EqualityExpression() | |
284 while self.Peak == 'and': | |
285 loc = self.Consume('and').loc | |
286 o2 = self.EqualityExpression() | |
287 o = Binop(o, 'and', o2, loc) | |
288 return o | |
289 | |
290 def EqualityExpression(self): | |
291 ee = self.SimpleExpression() | |
292 while self.Peak in ['<', '==', '>', '>=', '<=', '!=']: | |
293 op = self.Consume(self.Peak) | |
294 ee2 = self.SimpleExpression() | |
295 ee = Binop(ee, op.typ, ee2, op.loc) | |
296 return ee | |
297 | |
298 def SimpleExpression(self): | |
299 """ Shift operations before + and - ? """ | |
300 e = self.AddExpression() | |
301 while self.Peak in ['>>', '<<']: | |
302 op = self.Consume(self.Peak) | |
303 e2 = self.AddExpression() | |
304 e = Binop(e, op.typ, e2, op.loc) | |
305 return e | |
306 | |
307 def AddExpression(self): | |
308 e = self.Term() | |
309 while self.Peak in ['+', '-']: | |
310 op = self.Consume(self.Peak) | |
311 e2 = self.Term() | |
312 e = Binop(e, op.typ, e2, op.loc) | |
313 return e | |
314 | |
315 def Term(self): | |
316 t = self.BitwiseOr() | |
317 while self.Peak in ['*', '/']: | |
318 op = self.Consume(self.Peak) | |
319 t2 = self.BitwiseOr() | |
320 t = Binop(t, op.typ, t2, op.loc) | |
321 return t | |
322 | |
323 def BitwiseOr(self): | |
324 a = self.BitwiseAnd() | |
325 while self.Peak in ['|']: | |
326 op = self.Consume(self.Peak) | |
327 b = self.BitwiseAnd() | |
328 a = Binop(a, op.typ, b, op.loc) | |
329 return a | |
330 | |
331 def BitwiseAnd(self): | |
332 a = self.CastExpression() | |
333 while self.Peak in ['&']: | |
334 op = self.Consume(self.Peak) | |
335 b = self.CastExpression() | |
336 a = Binop(a, op.typ, b, op.loc) | |
337 return a | |
338 | |
339 # Domain of unary expressions: | |
340 | |
341 def CastExpression(self): | |
342 """ | |
343 the C-style type cast conflicts with '(' expr ')' | |
344 so introduce extra keyword 'cast' | |
345 """ | |
346 if self.Peak == 'cast': | |
347 loc = self.Consume('cast').loc | |
348 self.Consume('<') | |
349 t = self.parseTypeSpec() | |
350 self.Consume('>') | |
351 self.Consume('(') | |
352 ce = self.Expression() | |
353 self.Consume(')') | |
354 return TypeCast(t, ce, loc) | |
355 else: | |
356 return self.UnaryExpression() | |
357 | |
358 def UnaryExpression(self): | |
359 if self.Peak in ['&', '*']: | |
360 op = self.Consume(self.Peak) | |
361 ce = self.CastExpression() | |
362 if op.val == '*': | |
363 return Deref(ce, op.loc) | |
364 else: | |
365 return Unop(op.typ, ce, op.loc) | |
366 else: | |
367 return self.PostFixExpression() | |
368 | |
369 def PostFixExpression(self): | |
370 pfe = self.PrimaryExpression() | |
371 while self.Peak in ['[', '(', '.', '->']: | |
372 if self.hasConsumed('['): | |
373 pass | |
374 elif self.hasConsumed('('): | |
375 # Function call | |
376 args = [] | |
377 if not self.hasConsumed(')'): | |
378 args.append(self.Expression()) | |
379 while self.hasConsumed(','): | |
380 args.append(self.Expression()) | |
381 self.Consume(')') | |
382 pfe = FunctionCall(pfe, args, pfe.loc) | |
383 elif self.hasConsumed('->'): | |
384 field = self.Consume('ID') | |
385 pfe = Deref(pfe, pfe.loc) | |
386 pfe = FieldRef(pfe, field.val, field.loc) | |
387 elif self.hasConsumed('.'): | |
388 field = self.Consume('ID') | |
389 pfe = FieldRef(pfe, field.val, field.loc) | |
390 else: | |
391 raise Exception() | |
392 return pfe | |
393 | |
394 def PrimaryExpression(self): | |
395 if self.hasConsumed('('): | |
396 e = self.Expression() | |
397 self.Consume(')') | |
398 return e | |
399 elif self.Peak == 'NUMBER': | |
400 val = self.Consume('NUMBER') | |
401 return Literal(val.val, val.loc) | |
402 elif self.Peak == 'REAL': | |
403 val = self.Consume('REAL') | |
404 return Literal(val.val, val.loc) | |
405 elif self.Peak == 'true': | |
406 val = self.Consume('true') | |
407 return Literal(True, val.loc) | |
408 elif self.Peak == 'false': | |
409 val = self.Consume('false') | |
410 return Literal(False, val.loc) | |
411 elif self.Peak == 'ID': | |
412 d = self.parseDesignator() | |
413 return VariableUse(d, d.loc) | |
414 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) |