comparison python/ppci/c3/parser.py @ 300:158068af716c

yafm
author Windel Bouwman
date Tue, 03 Dec 2013 18:00:22 +0100
parents python/c3/parser.py@9417caea2eb3
children 6753763d3bec
comparison
equal deleted inserted replaced
299:674789d9ff37 300:158068af716c
1 import logging
2 from .lexer import Lexer
3 from .astnodes import FieldRef, Literal, TypeCast, Unop, Binop
4 from .astnodes import Assignment, ExpressionStatement, CompoundStatement
5 from .astnodes import ReturnStatement, WhileStatement, IfStatement
6 from .astnodes import FunctionType, Function, FormalParameter
7 from .astnodes import StructureType, DefinedType, PointerType
8 from .astnodes import Constant, Variable
9 from .astnodes import StructField, Deref
10 from .astnodes import Package, ImportDesignator
11 from .astnodes import Designator, VariableUse, FunctionCall
12 from ppci import CompilerError
13
14
15 class Parser:
16 """ Parses sourcecode into an abstract syntax tree (AST) """
17 def __init__(self, diag):
18 self.logger = logging.getLogger('c3')
19 self.diag = diag
20 self.lexer = Lexer(diag)
21
22 def parseSource(self, source):
23 self.logger.info('Parsing source')
24 self.initLex(source)
25 try:
26 self.parsePackage()
27 return self.mod
28 except CompilerError as e:
29 self.diag.addDiag(e)
30
31 def Error(self, msg):
32 raise CompilerError(msg, self.token.loc)
33
34 # Lexer helpers:
35 def Consume(self, typ):
36 if self.Peak == typ:
37 return self.NextToken()
38 else:
39 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak))
40
41 @property
42 def Peak(self):
43 return self.token.typ
44
45 @property
46 def CurLoc(self):
47 return self.token.loc
48
49 def hasConsumed(self, typ):
50 if self.Peak == typ:
51 self.Consume(typ)
52 return True
53 return False
54
55 def NextToken(self):
56 t = self.token
57 if t.typ != 'END':
58 self.token = self.tokens.__next__()
59 return t
60
61 def initLex(self, source):
62 self.tokens = self.lexer.tokenize(source)
63 self.token = self.tokens.__next__()
64
65 def addDeclaration(self, decl):
66 self.currentPart.declarations.append(decl)
67
68 def parseImport(self):
69 self.Consume('import')
70 name = self.Consume('ID').val
71 self.mod.imports.append(name)
72 self.Consume(';')
73
74 def parsePackage(self):
75 self.Consume('module')
76 name = self.Consume('ID')
77 self.Consume(';')
78 self.mod = Package(name.val, name.loc)
79 self.currentPart = self.mod
80 while self.Peak != 'END':
81 self.parseTopLevel()
82 self.Consume('END')
83
84 def parseTopLevel(self):
85 if self.Peak == 'function':
86 self.parseFunctionDef()
87 elif self.Peak == 'var':
88 self.parseVarDef()
89 elif self.Peak == 'const':
90 self.parseConstDef()
91 elif self.Peak == 'type':
92 self.parseTypeDef()
93 elif self.Peak == 'import':
94 self.parseImport()
95 else:
96 self.Error('Expected function, var, const or type')
97
98 def parseDesignator(self):
99 """ A designator designates an object """
100 name = self.Consume('ID')
101 if self.hasConsumed(':'):
102 name2 = self.Consume('ID')
103 return ImportDesignator(name.val, name2.val, name.loc)
104 else:
105 return Designator(name.val, name.loc)
106
107 # Type system
108 def parseTypeSpec(self):
109 # For now, do simple type spec, just parse an ID:
110 #return self.parseDesignator()
111 if self.Peak == 'struct':
112 self.Consume('struct')
113 self.Consume('{')
114 mems = []
115 while self.Peak != '}':
116 mem_t = self.parseTypeSpec()
117 mem_n = self.Consume('ID').val
118 mems.append(StructField(mem_n, mem_t))
119 while self.hasConsumed(','):
120 mem_n = self.Consume('ID').val
121 mems.append(StructField(mem_n, mem_t))
122 self.Consume(';')
123 self.Consume('}')
124 theT = StructureType(mems)
125 else:
126 theT = self.parseDesignator()
127 # Check for pointer suffix:
128 while self.hasConsumed('*'):
129 theT = PointerType(theT)
130 return theT
131
132 def parseTypeDef(self):
133 self.Consume('type')
134 newtype = self.parseTypeSpec()
135 typename = self.Consume('ID')
136 self.Consume(';')
137 df = DefinedType(typename.val, newtype, typename.loc)
138 self.addDeclaration(df)
139
140 # Variable declarations:
141 def parseVarDef(self):
142 self.Consume('var')
143 t = self.parseTypeSpec()
144
145 def parseVar():
146 name = self.Consume('ID')
147 v = Variable(name.val, t)
148 v.loc = name.loc
149 if self.hasConsumed('='):
150 v.ival = self.Expression()
151 self.addDeclaration(v)
152 parseVar()
153 while self.hasConsumed(','):
154 parseVar()
155 self.Consume(';')
156
157 def parseConstDef(self):
158 self.Consume('const')
159 t = self.parseTypeSpec()
160
161 def parseConst():
162 name = self.Consume('ID')
163 self.Consume('=')
164 val = self.Expression()
165 c = Constant(name.val, t, val)
166 c.loc = name.loc
167 parseConst()
168 while self.hasConsumed(','):
169 parseConst()
170 self.Consume(';')
171
172 # Procedures
173 def parseFunctionDef(self):
174 loc = self.Consume('function').loc
175 returntype = self.parseTypeSpec()
176 fname = self.Consume('ID').val
177 f = Function(fname, loc)
178 self.addDeclaration(f)
179 savePart = self.currentPart
180 self.currentPart = f
181 self.Consume('(')
182 parameters = []
183 if not self.hasConsumed(')'):
184 def parseParameter():
185 typ = self.parseTypeSpec()
186 name = self.Consume('ID')
187 param = FormalParameter(name.val, typ)
188 param.loc = name.loc
189 self.addDeclaration(param)
190 parameters.append(param)
191 parseParameter()
192 while self.hasConsumed(','):
193 parseParameter()
194 self.Consume(')')
195 paramtypes = [p.typ for p in parameters]
196 f.typ = FunctionType(paramtypes, returntype)
197 f.body = self.parseCompoundStatement()
198 self.currentPart = savePart
199
200 # Statements:
201
202 def parseIfStatement(self):
203 loc = self.Consume('if').loc
204 self.Consume('(')
205 condition = self.Expression()
206 self.Consume(')')
207 yes = self.parseCompoundStatement()
208 if self.hasConsumed('else'):
209 no = self.parseCompoundStatement()
210 else:
211 no = None
212 return IfStatement(condition, yes, no, loc)
213
214 def parseWhileStatement(self):
215 loc = self.Consume('while').loc
216 self.Consume('(')
217 condition = self.Expression()
218 self.Consume(')')
219 statements = self.parseCompoundStatement()
220 return WhileStatement(condition, statements, loc)
221
222 def parseReturnStatement(self):
223 loc = self.Consume('return').loc
224 if self.Peak == ';':
225 expr = Literal(0, loc)
226 else:
227 expr = self.Expression()
228 self.Consume(';')
229 return ReturnStatement(expr, loc)
230
231 def parseCompoundStatement(self):
232 self.Consume('{')
233 statements = []
234 while not self.hasConsumed('}'):
235 s = self.Statement()
236 if s is None:
237 continue
238 statements.append(s)
239 return CompoundStatement(statements)
240
241 def Statement(self):
242 # Determine statement type based on the pending token:
243 if self.Peak == 'if':
244 return self.parseIfStatement()
245 elif self.Peak == 'while':
246 return self.parseWhileStatement()
247 elif self.Peak == '{':
248 return self.parseCompoundStatement()
249 elif self.hasConsumed(';'):
250 pass
251 elif self.Peak == 'var':
252 self.parseVarDef()
253 elif self.Peak == 'return':
254 return self.parseReturnStatement()
255 else:
256 return self.AssignmentOrCall()
257
258 def AssignmentOrCall(self):
259 x = self.UnaryExpression()
260 if self.Peak == '=':
261 # We enter assignment mode here.
262 loc = self.Consume('=').loc
263 rhs = self.Expression()
264 return Assignment(x, rhs, loc)
265 else:
266 return ExpressionStatement(x, x.loc)
267
268 # Expression section:
269 # We not implement these C constructs:
270 # a(2), f = 2
271 # and this:
272 # a = 2 < x : 4 ? 1;
273
274 def Expression(self):
275 exp = self.LogicalAndExpression()
276 while self.Peak == 'or':
277 loc = self.Consume('or').loc
278 e2 = self.LogicalAndExpression()
279 exp = Binop(exp, 'or', e2, loc)
280 return exp
281
282 def LogicalAndExpression(self):
283 o = self.EqualityExpression()
284 while self.Peak == 'and':
285 loc = self.Consume('and').loc
286 o2 = self.EqualityExpression()
287 o = Binop(o, 'and', o2, loc)
288 return o
289
290 def EqualityExpression(self):
291 ee = self.SimpleExpression()
292 while self.Peak in ['<', '==', '>', '>=', '<=', '!=']:
293 op = self.Consume(self.Peak)
294 ee2 = self.SimpleExpression()
295 ee = Binop(ee, op.typ, ee2, op.loc)
296 return ee
297
298 def SimpleExpression(self):
299 """ Shift operations before + and - ? """
300 e = self.AddExpression()
301 while self.Peak in ['>>', '<<']:
302 op = self.Consume(self.Peak)
303 e2 = self.AddExpression()
304 e = Binop(e, op.typ, e2, op.loc)
305 return e
306
307 def AddExpression(self):
308 e = self.Term()
309 while self.Peak in ['+', '-']:
310 op = self.Consume(self.Peak)
311 e2 = self.Term()
312 e = Binop(e, op.typ, e2, op.loc)
313 return e
314
315 def Term(self):
316 t = self.BitwiseOr()
317 while self.Peak in ['*', '/']:
318 op = self.Consume(self.Peak)
319 t2 = self.BitwiseOr()
320 t = Binop(t, op.typ, t2, op.loc)
321 return t
322
323 def BitwiseOr(self):
324 a = self.BitwiseAnd()
325 while self.Peak in ['|']:
326 op = self.Consume(self.Peak)
327 b = self.BitwiseAnd()
328 a = Binop(a, op.typ, b, op.loc)
329 return a
330
331 def BitwiseAnd(self):
332 a = self.CastExpression()
333 while self.Peak in ['&']:
334 op = self.Consume(self.Peak)
335 b = self.CastExpression()
336 a = Binop(a, op.typ, b, op.loc)
337 return a
338
339 # Domain of unary expressions:
340
341 def CastExpression(self):
342 """
343 the C-style type cast conflicts with '(' expr ')'
344 so introduce extra keyword 'cast'
345 """
346 if self.Peak == 'cast':
347 loc = self.Consume('cast').loc
348 self.Consume('<')
349 t = self.parseTypeSpec()
350 self.Consume('>')
351 self.Consume('(')
352 ce = self.Expression()
353 self.Consume(')')
354 return TypeCast(t, ce, loc)
355 else:
356 return self.UnaryExpression()
357
358 def UnaryExpression(self):
359 if self.Peak in ['&', '*']:
360 op = self.Consume(self.Peak)
361 ce = self.CastExpression()
362 if op.val == '*':
363 return Deref(ce, op.loc)
364 else:
365 return Unop(op.typ, ce, op.loc)
366 else:
367 return self.PostFixExpression()
368
369 def PostFixExpression(self):
370 pfe = self.PrimaryExpression()
371 while self.Peak in ['[', '(', '.', '->']:
372 if self.hasConsumed('['):
373 pass
374 elif self.hasConsumed('('):
375 # Function call
376 args = []
377 if not self.hasConsumed(')'):
378 args.append(self.Expression())
379 while self.hasConsumed(','):
380 args.append(self.Expression())
381 self.Consume(')')
382 pfe = FunctionCall(pfe, args, pfe.loc)
383 elif self.hasConsumed('->'):
384 field = self.Consume('ID')
385 pfe = Deref(pfe, pfe.loc)
386 pfe = FieldRef(pfe, field.val, field.loc)
387 elif self.hasConsumed('.'):
388 field = self.Consume('ID')
389 pfe = FieldRef(pfe, field.val, field.loc)
390 else:
391 raise Exception()
392 return pfe
393
394 def PrimaryExpression(self):
395 if self.hasConsumed('('):
396 e = self.Expression()
397 self.Consume(')')
398 return e
399 elif self.Peak == 'NUMBER':
400 val = self.Consume('NUMBER')
401 return Literal(val.val, val.loc)
402 elif self.Peak == 'REAL':
403 val = self.Consume('REAL')
404 return Literal(val.val, val.loc)
405 elif self.Peak == 'true':
406 val = self.Consume('true')
407 return Literal(True, val.loc)
408 elif self.Peak == 'false':
409 val = self.Consume('false')
410 return Literal(False, val.loc)
411 elif self.Peak == 'ID':
412 d = self.parseDesignator()
413 return VariableUse(d, d.loc)
414 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))