comparison python/ppci/c3/parser.py @ 306:b145f8e6050b

Start on c3 rewrite
author Windel Bouwman
date Mon, 09 Dec 2013 19:00:21 +0100
parents 6753763d3bec
children e609d5296ee9
comparison
equal deleted inserted replaced
305:0615b5308710 306:b145f8e6050b
1 import logging 1 import logging
2 from ppci import CompilerError 2 from ppci import CompilerError
3 from .lexer import Lexer 3 from .astnodes import Member, Literal, TypeCast, Unop, Binop
4 from .astnodes import FieldRef, Literal, TypeCast, Unop, Binop
5 from .astnodes import Assignment, ExpressionStatement, CompoundStatement 4 from .astnodes import Assignment, ExpressionStatement, CompoundStatement
6 from .astnodes import ReturnStatement, WhileStatement, IfStatement 5 from .astnodes import ReturnStatement, WhileStatement, IfStatement
7 from .astnodes import FunctionType, Function, FormalParameter 6 from .astnodes import FunctionType, Function, FormalParameter
8 from .astnodes import StructureType, DefinedType, PointerType 7 from .astnodes import StructureType, DefinedType, PointerType
9 from .astnodes import Constant, Variable 8 from .astnodes import Constant, Variable
10 from .astnodes import StructField, Deref 9 from .astnodes import StructField, Deref
11 from .astnodes import Package, ImportDesignator 10 from .astnodes import Package
12 from .astnodes import Designator, VariableUse, FunctionCall 11 from .astnodes import Identifier
12 from .astnodes import FunctionCall
13 from .astnodes import EmptyStatement
13 14
14 15
15 class Parser: 16 class Parser:
16 """ Parses sourcecode into an abstract syntax tree (AST) """ 17 """ Parses sourcecode into an abstract syntax tree (AST) """
17 def __init__(self, diag): 18 def __init__(self, diag):
18 self.logger = logging.getLogger('c3') 19 self.logger = logging.getLogger('c3')
19 self.diag = diag 20 self.diag = diag
20 self.lexer = Lexer(diag) 21
21 22 def parseSource(self, tokens):
22 def parseSource(self, source):
23 self.logger.info('Parsing source') 23 self.logger.info('Parsing source')
24 self.initLex(source) 24 self.tokens = tokens
25 self.token = self.tokens.__next__()
25 try: 26 try:
26 self.parsePackage() 27 self.parsePackage()
28 self.mod.ok = True # Valid until proven wrong :)
27 return self.mod 29 return self.mod
28 except CompilerError as e: 30 except CompilerError as e:
29 self.diag.addDiag(e) 31 self.diag.addDiag(e)
30 32
31 def Error(self, msg): 33 def Error(self, msg):
56 t = self.token 58 t = self.token
57 if t.typ != 'END': 59 if t.typ != 'END':
58 self.token = self.tokens.__next__() 60 self.token = self.tokens.__next__()
59 return t 61 return t
60 62
61 def initLex(self, source):
62 self.tokens = self.lexer.tokenize(source)
63 self.token = self.tokens.__next__()
64
65 def addDeclaration(self, decl): 63 def addDeclaration(self, decl):
66 self.currentPart.declarations.append(decl) 64 self.currentPart.declarations.append(decl)
67 65
68 def parseImport(self): 66 def parseImport(self):
69 self.Consume('import') 67 self.Consume('import')
84 def parseTopLevel(self): 82 def parseTopLevel(self):
85 if self.Peak == 'function': 83 if self.Peak == 'function':
86 self.parseFunctionDef() 84 self.parseFunctionDef()
87 elif self.Peak == 'var': 85 elif self.Peak == 'var':
88 self.parseVarDef() 86 self.parseVarDef()
87 # TODO handle variable initialization
89 elif self.Peak == 'const': 88 elif self.Peak == 'const':
90 self.parseConstDef() 89 self.parseConstDef()
91 elif self.Peak == 'type': 90 elif self.Peak == 'type':
92 self.parseTypeDef() 91 self.parseTypeDef()
93 elif self.Peak == 'import': 92 elif self.Peak == 'import':
96 self.Error('Expected function, var, const or type') 95 self.Error('Expected function, var, const or type')
97 96
98 def parseDesignator(self): 97 def parseDesignator(self):
99 """ A designator designates an object with a name. """ 98 """ A designator designates an object with a name. """
100 name = self.Consume('ID') 99 name = self.Consume('ID')
101 if self.hasConsumed(':'): 100 return Identifier(name.val, name.loc)
102 name2 = self.Consume('ID') 101
103 return ImportDesignator(name.val, name2.val, name.loc) 102 def parseIdSequence(self):
104 else: 103 ids = [self.Consume('ID')]
105 return Designator(name.val, name.loc) 104 while self.hasConsumed(','):
105 ids.append(self.Consume('ID'))
106 return ids
106 107
107 # Type system 108 # Type system
108 def parseTypeSpec(self): 109 def parseTypeSpec(self):
109 # For now, do simple type spec, just parse an ID: 110 # For now, do simple type spec, just parse an ID:
110 if self.Peak == 'struct': 111 if self.Peak == 'struct':
111 self.Consume('struct') 112 self.Consume('struct')
112 self.Consume('{') 113 self.Consume('{')
113 mems = [] 114 mems = []
114 while self.Peak != '}': 115 while self.Peak != '}':
115 mem_t = self.parseTypeSpec() 116 mem_t = self.parseTypeSpec()
116 mem_n = self.Consume('ID').val 117 for i in self.parseIdSequence():
117 mems.append(StructField(mem_n, mem_t)) 118 mems.append(StructField(i.val, mem_t))
118 while self.hasConsumed(','):
119 mem_n = self.Consume('ID').val
120 mems.append(StructField(mem_n, mem_t))
121 self.Consume(';') 119 self.Consume(';')
122 self.Consume('}') 120 self.Consume('}')
123 theT = StructureType(mems) 121 theT = StructureType(mems)
124 else: 122 elif self.Peak == 'enum':
125 theT = self.parseDesignator() 123 # TODO)
124 raise NotImplementedError()
125 else:
126 theT = self.PostFixExpression()
126 # Check for pointer suffix: 127 # Check for pointer suffix:
127 while self.hasConsumed('*'): 128 while self.hasConsumed('*'):
128 theT = PointerType(theT) 129 theT = PointerType(theT)
129 return theT 130 return theT
130 131
138 139
139 # Variable declarations: 140 # Variable declarations:
140 def parseVarDef(self): 141 def parseVarDef(self):
141 self.Consume('var') 142 self.Consume('var')
142 t = self.parseTypeSpec() 143 t = self.parseTypeSpec()
143 144 for name in self.parseIdSequence():
144 def parseVar():
145 name = self.Consume('ID')
146 v = Variable(name.val, t) 145 v = Variable(name.val, t)
147 v.loc = name.loc 146 v.loc = name.loc
148 if self.hasConsumed('='):
149 v.ival = self.Expression()
150 self.addDeclaration(v) 147 self.addDeclaration(v)
151 parseVar() 148 self.Consume(';')
152 while self.hasConsumed(','): 149 return EmptyStatement()
153 parseVar()
154 self.Consume(';')
155 150
156 def parseConstDef(self): 151 def parseConstDef(self):
157 self.Consume('const') 152 self.Consume('const')
158 t = self.parseTypeSpec() 153 t = self.parseTypeSpec()
159 154 while True:
160 def parseConst():
161 name = self.Consume('ID') 155 name = self.Consume('ID')
162 self.Consume('=') 156 self.Consume('=')
163 val = self.Expression() 157 val = self.Expression()
164 c = Constant(name.val, t, val) 158 c = Constant(name.val, t, val)
165 c.loc = name.loc 159 c.loc = name.loc
166 parseConst() 160 if not self.hasConsumed(','):
167 while self.hasConsumed(','): 161 break
168 parseConst() 162 self.Consume(';')
169 self.Consume(';') 163
170
171 # Procedures
172 def parseFunctionDef(self): 164 def parseFunctionDef(self):
173 loc = self.Consume('function').loc 165 loc = self.Consume('function').loc
174 returntype = self.parseTypeSpec() 166 returntype = self.parseTypeSpec()
175 fname = self.Consume('ID').val 167 fname = self.Consume('ID').val
176 f = Function(fname, loc) 168 f = Function(fname, loc)
178 savePart = self.currentPart 170 savePart = self.currentPart
179 self.currentPart = f 171 self.currentPart = f
180 self.Consume('(') 172 self.Consume('(')
181 parameters = [] 173 parameters = []
182 if not self.hasConsumed(')'): 174 if not self.hasConsumed(')'):
183 def parseParameter(): 175 while True:
184 typ = self.parseTypeSpec() 176 typ = self.parseTypeSpec()
185 name = self.Consume('ID') 177 name = self.Consume('ID')
186 param = FormalParameter(name.val, typ) 178 param = FormalParameter(name.val, typ)
187 param.loc = name.loc 179 param.loc = name.loc
188 self.addDeclaration(param) 180 self.addDeclaration(param)
189 parameters.append(param) 181 parameters.append(param)
190 parseParameter() 182 if not self.hasConsumed(','):
191 while self.hasConsumed(','): 183 break
192 parseParameter()
193 self.Consume(')') 184 self.Consume(')')
194 paramtypes = [p.typ for p in parameters] 185 paramtypes = [p.typ for p in parameters]
195 f.typ = FunctionType(paramtypes, returntype) 186 f.typ = FunctionType(paramtypes, returntype)
196 f.body = self.parseCompoundStatement() 187 f.body = self.parseCompoundStatement()
197 self.currentPart = savePart 188 self.currentPart = savePart
198 189
199 # Statements:
200
201 def parseIfStatement(self): 190 def parseIfStatement(self):
202 loc = self.Consume('if').loc 191 loc = self.Consume('if').loc
203 self.Consume('(') 192 self.Consume('(')
204 condition = self.Expression() 193 condition = self.Expression()
205 self.Consume(')') 194 self.Consume(')')
206 yes = self.parseCompoundStatement() 195 yes = self.Statement()
207 if self.hasConsumed('else'): 196 no = self.Statement() if self.hasConsumed('else') else EmptyStatement()
208 no = self.parseCompoundStatement()
209 else:
210 no = None
211 return IfStatement(condition, yes, no, loc) 197 return IfStatement(condition, yes, no, loc)
212 198
213 def parseWhileStatement(self): 199 def parseWhileStatement(self):
214 loc = self.Consume('while').loc 200 loc = self.Consume('while').loc
215 self.Consume('(') 201 self.Consume('(')
216 condition = self.Expression() 202 condition = self.Expression()
217 self.Consume(')') 203 self.Consume(')')
218 statements = self.parseCompoundStatement() 204 statements = self.Statement()
219 return WhileStatement(condition, statements, loc) 205 return WhileStatement(condition, statements, loc)
220 206
221 def parseReturnStatement(self): 207 def parseReturnStatement(self):
222 loc = self.Consume('return').loc 208 loc = self.Consume('return').loc
223 if self.Peak == ';': 209 if self.Peak == ';':
229 215
230 def parseCompoundStatement(self): 216 def parseCompoundStatement(self):
231 self.Consume('{') 217 self.Consume('{')
232 statements = [] 218 statements = []
233 while not self.hasConsumed('}'): 219 while not self.hasConsumed('}'):
234 s = self.Statement() 220 statements.append(self.Statement())
235 if s is None:
236 continue
237 statements.append(s)
238 return CompoundStatement(statements) 221 return CompoundStatement(statements)
239 222
240 def Statement(self): 223 def Statement(self):
241 # Determine statement type based on the pending token: 224 # Determine statement type based on the pending token:
242 if self.Peak == 'if': 225 if self.Peak == 'if':
244 elif self.Peak == 'while': 227 elif self.Peak == 'while':
245 return self.parseWhileStatement() 228 return self.parseWhileStatement()
246 elif self.Peak == '{': 229 elif self.Peak == '{':
247 return self.parseCompoundStatement() 230 return self.parseCompoundStatement()
248 elif self.hasConsumed(';'): 231 elif self.hasConsumed(';'):
249 pass 232 return EmptyStatement()
250 elif self.Peak == 'var': 233 elif self.Peak == 'var':
251 self.parseVarDef() 234 return self.parseVarDef()
252 elif self.Peak == 'return': 235 elif self.Peak == 'return':
253 return self.parseReturnStatement() 236 return self.parseReturnStatement()
254 else: 237 else:
255 return self.AssignmentOrCall() 238 x = self.UnaryExpression()
256 239 if self.Peak == '=':
257 def AssignmentOrCall(self): 240 # We enter assignment mode here.
258 x = self.UnaryExpression() 241 loc = self.Consume('=').loc
259 if self.Peak == '=': 242 rhs = self.Expression()
260 # We enter assignment mode here. 243 return Assignment(x, rhs, loc)
261 loc = self.Consume('=').loc 244 else:
262 rhs = self.Expression() 245 return ExpressionStatement(x, x.loc)
263 return Assignment(x, rhs, loc)
264 else:
265 return ExpressionStatement(x, x.loc)
266 246
267 # Expression section: 247 # Expression section:
268 # We not implement these C constructs: 248 # We not implement these C constructs:
269 # a(2), f = 2 249 # a(2), f = 2
270 # and this: 250 # and this:
319 t = Binop(t, op.typ, t2, op.loc) 299 t = Binop(t, op.typ, t2, op.loc)
320 return t 300 return t
321 301
322 def BitwiseOr(self): 302 def BitwiseOr(self):
323 a = self.BitwiseAnd() 303 a = self.BitwiseAnd()
324 while self.Peak in ['|']: 304 while self.Peak == '|':
325 op = self.Consume(self.Peak) 305 op = self.Consume(self.Peak)
326 b = self.BitwiseAnd() 306 b = self.BitwiseAnd()
327 a = Binop(a, op.typ, b, op.loc) 307 a = Binop(a, op.typ, b, op.loc)
328 return a 308 return a
329 309
330 def BitwiseAnd(self): 310 def BitwiseAnd(self):
331 a = self.CastExpression() 311 a = self.CastExpression()
332 while self.Peak in ['&']: 312 while self.Peak == '&':
333 op = self.Consume(self.Peak) 313 op = self.Consume(self.Peak)
334 b = self.CastExpression() 314 b = self.CastExpression()
335 a = Binop(a, op.typ, b, op.loc) 315 a = Binop(a, op.typ, b, op.loc)
336 return a 316 return a
337 317
381 if self.hasConsumed('['): 361 if self.hasConsumed('['):
382 raise NotImplementedError('Array not yet implemented') 362 raise NotImplementedError('Array not yet implemented')
383 elif self.hasConsumed('->'): 363 elif self.hasConsumed('->'):
384 field = self.Consume('ID') 364 field = self.Consume('ID')
385 pfe = Deref(pfe, pfe.loc) 365 pfe = Deref(pfe, pfe.loc)
386 pfe = FieldRef(pfe, field.val, field.loc) 366 pfe = Member(pfe, field.val, field.loc)
387 elif self.hasConsumed('.'): 367 elif self.hasConsumed('.'):
388 field = self.Consume('ID') 368 field = self.Consume('ID')
389 pfe = FieldRef(pfe, field.val, field.loc) 369 pfe = Member(pfe, field.val, field.loc)
390 else:
391 raise Exception()
392 return pfe 370 return pfe
393 371
394 def PrimaryExpression(self): 372 def PrimaryExpression(self):
395 if self.hasConsumed('('): 373 if self.hasConsumed('('):
396 e = self.Expression() 374 e = self.Expression()
407 return Literal(True, val.loc) 385 return Literal(True, val.loc)
408 elif self.Peak == 'false': 386 elif self.Peak == 'false':
409 val = self.Consume('false') 387 val = self.Consume('false')
410 return Literal(False, val.loc) 388 return Literal(False, val.loc)
411 elif self.Peak == 'ID': 389 elif self.Peak == 'ID':
412 d = self.parseDesignator() 390 return self.parseDesignator()
413 return VariableUse(d, d.loc)
414 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) 391 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))