Mercurial > lcfOS
comparison python/ppci/c3/parser.py @ 306:b145f8e6050b
Start on c3 rewrite
author | Windel Bouwman |
---|---|
date | Mon, 09 Dec 2013 19:00:21 +0100 |
parents | 6753763d3bec |
children | e609d5296ee9 |
comparison
equal
deleted
inserted
replaced
305:0615b5308710 | 306:b145f8e6050b |
---|---|
1 import logging | 1 import logging |
2 from ppci import CompilerError | 2 from ppci import CompilerError |
3 from .lexer import Lexer | 3 from .astnodes import Member, Literal, TypeCast, Unop, Binop |
4 from .astnodes import FieldRef, Literal, TypeCast, Unop, Binop | |
5 from .astnodes import Assignment, ExpressionStatement, CompoundStatement | 4 from .astnodes import Assignment, ExpressionStatement, CompoundStatement |
6 from .astnodes import ReturnStatement, WhileStatement, IfStatement | 5 from .astnodes import ReturnStatement, WhileStatement, IfStatement |
7 from .astnodes import FunctionType, Function, FormalParameter | 6 from .astnodes import FunctionType, Function, FormalParameter |
8 from .astnodes import StructureType, DefinedType, PointerType | 7 from .astnodes import StructureType, DefinedType, PointerType |
9 from .astnodes import Constant, Variable | 8 from .astnodes import Constant, Variable |
10 from .astnodes import StructField, Deref | 9 from .astnodes import StructField, Deref |
11 from .astnodes import Package, ImportDesignator | 10 from .astnodes import Package |
12 from .astnodes import Designator, VariableUse, FunctionCall | 11 from .astnodes import Identifier |
12 from .astnodes import FunctionCall | |
13 from .astnodes import EmptyStatement | |
13 | 14 |
14 | 15 |
15 class Parser: | 16 class Parser: |
16 """ Parses sourcecode into an abstract syntax tree (AST) """ | 17 """ Parses sourcecode into an abstract syntax tree (AST) """ |
17 def __init__(self, diag): | 18 def __init__(self, diag): |
18 self.logger = logging.getLogger('c3') | 19 self.logger = logging.getLogger('c3') |
19 self.diag = diag | 20 self.diag = diag |
20 self.lexer = Lexer(diag) | 21 |
21 | 22 def parseSource(self, tokens): |
22 def parseSource(self, source): | |
23 self.logger.info('Parsing source') | 23 self.logger.info('Parsing source') |
24 self.initLex(source) | 24 self.tokens = tokens |
25 self.token = self.tokens.__next__() | |
25 try: | 26 try: |
26 self.parsePackage() | 27 self.parsePackage() |
28 self.mod.ok = True # Valid until proven wrong :) | |
27 return self.mod | 29 return self.mod |
28 except CompilerError as e: | 30 except CompilerError as e: |
29 self.diag.addDiag(e) | 31 self.diag.addDiag(e) |
30 | 32 |
31 def Error(self, msg): | 33 def Error(self, msg): |
56 t = self.token | 58 t = self.token |
57 if t.typ != 'END': | 59 if t.typ != 'END': |
58 self.token = self.tokens.__next__() | 60 self.token = self.tokens.__next__() |
59 return t | 61 return t |
60 | 62 |
61 def initLex(self, source): | |
62 self.tokens = self.lexer.tokenize(source) | |
63 self.token = self.tokens.__next__() | |
64 | |
65 def addDeclaration(self, decl): | 63 def addDeclaration(self, decl): |
66 self.currentPart.declarations.append(decl) | 64 self.currentPart.declarations.append(decl) |
67 | 65 |
68 def parseImport(self): | 66 def parseImport(self): |
69 self.Consume('import') | 67 self.Consume('import') |
84 def parseTopLevel(self): | 82 def parseTopLevel(self): |
85 if self.Peak == 'function': | 83 if self.Peak == 'function': |
86 self.parseFunctionDef() | 84 self.parseFunctionDef() |
87 elif self.Peak == 'var': | 85 elif self.Peak == 'var': |
88 self.parseVarDef() | 86 self.parseVarDef() |
87 # TODO handle variable initialization | |
89 elif self.Peak == 'const': | 88 elif self.Peak == 'const': |
90 self.parseConstDef() | 89 self.parseConstDef() |
91 elif self.Peak == 'type': | 90 elif self.Peak == 'type': |
92 self.parseTypeDef() | 91 self.parseTypeDef() |
93 elif self.Peak == 'import': | 92 elif self.Peak == 'import': |
96 self.Error('Expected function, var, const or type') | 95 self.Error('Expected function, var, const or type') |
97 | 96 |
98 def parseDesignator(self): | 97 def parseDesignator(self): |
99 """ A designator designates an object with a name. """ | 98 """ A designator designates an object with a name. """ |
100 name = self.Consume('ID') | 99 name = self.Consume('ID') |
101 if self.hasConsumed(':'): | 100 return Identifier(name.val, name.loc) |
102 name2 = self.Consume('ID') | 101 |
103 return ImportDesignator(name.val, name2.val, name.loc) | 102 def parseIdSequence(self): |
104 else: | 103 ids = [self.Consume('ID')] |
105 return Designator(name.val, name.loc) | 104 while self.hasConsumed(','): |
105 ids.append(self.Consume('ID')) | |
106 return ids | |
106 | 107 |
107 # Type system | 108 # Type system |
108 def parseTypeSpec(self): | 109 def parseTypeSpec(self): |
109 # For now, do simple type spec, just parse an ID: | 110 # For now, do simple type spec, just parse an ID: |
110 if self.Peak == 'struct': | 111 if self.Peak == 'struct': |
111 self.Consume('struct') | 112 self.Consume('struct') |
112 self.Consume('{') | 113 self.Consume('{') |
113 mems = [] | 114 mems = [] |
114 while self.Peak != '}': | 115 while self.Peak != '}': |
115 mem_t = self.parseTypeSpec() | 116 mem_t = self.parseTypeSpec() |
116 mem_n = self.Consume('ID').val | 117 for i in self.parseIdSequence(): |
117 mems.append(StructField(mem_n, mem_t)) | 118 mems.append(StructField(i.val, mem_t)) |
118 while self.hasConsumed(','): | |
119 mem_n = self.Consume('ID').val | |
120 mems.append(StructField(mem_n, mem_t)) | |
121 self.Consume(';') | 119 self.Consume(';') |
122 self.Consume('}') | 120 self.Consume('}') |
123 theT = StructureType(mems) | 121 theT = StructureType(mems) |
124 else: | 122 elif self.Peak == 'enum': |
125 theT = self.parseDesignator() | 123 # TODO) |
124 raise NotImplementedError() | |
125 else: | |
126 theT = self.PostFixExpression() | |
126 # Check for pointer suffix: | 127 # Check for pointer suffix: |
127 while self.hasConsumed('*'): | 128 while self.hasConsumed('*'): |
128 theT = PointerType(theT) | 129 theT = PointerType(theT) |
129 return theT | 130 return theT |
130 | 131 |
138 | 139 |
139 # Variable declarations: | 140 # Variable declarations: |
140 def parseVarDef(self): | 141 def parseVarDef(self): |
141 self.Consume('var') | 142 self.Consume('var') |
142 t = self.parseTypeSpec() | 143 t = self.parseTypeSpec() |
143 | 144 for name in self.parseIdSequence(): |
144 def parseVar(): | |
145 name = self.Consume('ID') | |
146 v = Variable(name.val, t) | 145 v = Variable(name.val, t) |
147 v.loc = name.loc | 146 v.loc = name.loc |
148 if self.hasConsumed('='): | |
149 v.ival = self.Expression() | |
150 self.addDeclaration(v) | 147 self.addDeclaration(v) |
151 parseVar() | 148 self.Consume(';') |
152 while self.hasConsumed(','): | 149 return EmptyStatement() |
153 parseVar() | |
154 self.Consume(';') | |
155 | 150 |
156 def parseConstDef(self): | 151 def parseConstDef(self): |
157 self.Consume('const') | 152 self.Consume('const') |
158 t = self.parseTypeSpec() | 153 t = self.parseTypeSpec() |
159 | 154 while True: |
160 def parseConst(): | |
161 name = self.Consume('ID') | 155 name = self.Consume('ID') |
162 self.Consume('=') | 156 self.Consume('=') |
163 val = self.Expression() | 157 val = self.Expression() |
164 c = Constant(name.val, t, val) | 158 c = Constant(name.val, t, val) |
165 c.loc = name.loc | 159 c.loc = name.loc |
166 parseConst() | 160 if not self.hasConsumed(','): |
167 while self.hasConsumed(','): | 161 break |
168 parseConst() | 162 self.Consume(';') |
169 self.Consume(';') | 163 |
170 | |
171 # Procedures | |
172 def parseFunctionDef(self): | 164 def parseFunctionDef(self): |
173 loc = self.Consume('function').loc | 165 loc = self.Consume('function').loc |
174 returntype = self.parseTypeSpec() | 166 returntype = self.parseTypeSpec() |
175 fname = self.Consume('ID').val | 167 fname = self.Consume('ID').val |
176 f = Function(fname, loc) | 168 f = Function(fname, loc) |
178 savePart = self.currentPart | 170 savePart = self.currentPart |
179 self.currentPart = f | 171 self.currentPart = f |
180 self.Consume('(') | 172 self.Consume('(') |
181 parameters = [] | 173 parameters = [] |
182 if not self.hasConsumed(')'): | 174 if not self.hasConsumed(')'): |
183 def parseParameter(): | 175 while True: |
184 typ = self.parseTypeSpec() | 176 typ = self.parseTypeSpec() |
185 name = self.Consume('ID') | 177 name = self.Consume('ID') |
186 param = FormalParameter(name.val, typ) | 178 param = FormalParameter(name.val, typ) |
187 param.loc = name.loc | 179 param.loc = name.loc |
188 self.addDeclaration(param) | 180 self.addDeclaration(param) |
189 parameters.append(param) | 181 parameters.append(param) |
190 parseParameter() | 182 if not self.hasConsumed(','): |
191 while self.hasConsumed(','): | 183 break |
192 parseParameter() | |
193 self.Consume(')') | 184 self.Consume(')') |
194 paramtypes = [p.typ for p in parameters] | 185 paramtypes = [p.typ for p in parameters] |
195 f.typ = FunctionType(paramtypes, returntype) | 186 f.typ = FunctionType(paramtypes, returntype) |
196 f.body = self.parseCompoundStatement() | 187 f.body = self.parseCompoundStatement() |
197 self.currentPart = savePart | 188 self.currentPart = savePart |
198 | 189 |
199 # Statements: | |
200 | |
201 def parseIfStatement(self): | 190 def parseIfStatement(self): |
202 loc = self.Consume('if').loc | 191 loc = self.Consume('if').loc |
203 self.Consume('(') | 192 self.Consume('(') |
204 condition = self.Expression() | 193 condition = self.Expression() |
205 self.Consume(')') | 194 self.Consume(')') |
206 yes = self.parseCompoundStatement() | 195 yes = self.Statement() |
207 if self.hasConsumed('else'): | 196 no = self.Statement() if self.hasConsumed('else') else EmptyStatement() |
208 no = self.parseCompoundStatement() | |
209 else: | |
210 no = None | |
211 return IfStatement(condition, yes, no, loc) | 197 return IfStatement(condition, yes, no, loc) |
212 | 198 |
213 def parseWhileStatement(self): | 199 def parseWhileStatement(self): |
214 loc = self.Consume('while').loc | 200 loc = self.Consume('while').loc |
215 self.Consume('(') | 201 self.Consume('(') |
216 condition = self.Expression() | 202 condition = self.Expression() |
217 self.Consume(')') | 203 self.Consume(')') |
218 statements = self.parseCompoundStatement() | 204 statements = self.Statement() |
219 return WhileStatement(condition, statements, loc) | 205 return WhileStatement(condition, statements, loc) |
220 | 206 |
221 def parseReturnStatement(self): | 207 def parseReturnStatement(self): |
222 loc = self.Consume('return').loc | 208 loc = self.Consume('return').loc |
223 if self.Peak == ';': | 209 if self.Peak == ';': |
229 | 215 |
230 def parseCompoundStatement(self): | 216 def parseCompoundStatement(self): |
231 self.Consume('{') | 217 self.Consume('{') |
232 statements = [] | 218 statements = [] |
233 while not self.hasConsumed('}'): | 219 while not self.hasConsumed('}'): |
234 s = self.Statement() | 220 statements.append(self.Statement()) |
235 if s is None: | |
236 continue | |
237 statements.append(s) | |
238 return CompoundStatement(statements) | 221 return CompoundStatement(statements) |
239 | 222 |
240 def Statement(self): | 223 def Statement(self): |
241 # Determine statement type based on the pending token: | 224 # Determine statement type based on the pending token: |
242 if self.Peak == 'if': | 225 if self.Peak == 'if': |
244 elif self.Peak == 'while': | 227 elif self.Peak == 'while': |
245 return self.parseWhileStatement() | 228 return self.parseWhileStatement() |
246 elif self.Peak == '{': | 229 elif self.Peak == '{': |
247 return self.parseCompoundStatement() | 230 return self.parseCompoundStatement() |
248 elif self.hasConsumed(';'): | 231 elif self.hasConsumed(';'): |
249 pass | 232 return EmptyStatement() |
250 elif self.Peak == 'var': | 233 elif self.Peak == 'var': |
251 self.parseVarDef() | 234 return self.parseVarDef() |
252 elif self.Peak == 'return': | 235 elif self.Peak == 'return': |
253 return self.parseReturnStatement() | 236 return self.parseReturnStatement() |
254 else: | 237 else: |
255 return self.AssignmentOrCall() | 238 x = self.UnaryExpression() |
256 | 239 if self.Peak == '=': |
257 def AssignmentOrCall(self): | 240 # We enter assignment mode here. |
258 x = self.UnaryExpression() | 241 loc = self.Consume('=').loc |
259 if self.Peak == '=': | 242 rhs = self.Expression() |
260 # We enter assignment mode here. | 243 return Assignment(x, rhs, loc) |
261 loc = self.Consume('=').loc | 244 else: |
262 rhs = self.Expression() | 245 return ExpressionStatement(x, x.loc) |
263 return Assignment(x, rhs, loc) | |
264 else: | |
265 return ExpressionStatement(x, x.loc) | |
266 | 246 |
267 # Expression section: | 247 # Expression section: |
268 # We not implement these C constructs: | 248 # We not implement these C constructs: |
269 # a(2), f = 2 | 249 # a(2), f = 2 |
270 # and this: | 250 # and this: |
319 t = Binop(t, op.typ, t2, op.loc) | 299 t = Binop(t, op.typ, t2, op.loc) |
320 return t | 300 return t |
321 | 301 |
322 def BitwiseOr(self): | 302 def BitwiseOr(self): |
323 a = self.BitwiseAnd() | 303 a = self.BitwiseAnd() |
324 while self.Peak in ['|']: | 304 while self.Peak == '|': |
325 op = self.Consume(self.Peak) | 305 op = self.Consume(self.Peak) |
326 b = self.BitwiseAnd() | 306 b = self.BitwiseAnd() |
327 a = Binop(a, op.typ, b, op.loc) | 307 a = Binop(a, op.typ, b, op.loc) |
328 return a | 308 return a |
329 | 309 |
330 def BitwiseAnd(self): | 310 def BitwiseAnd(self): |
331 a = self.CastExpression() | 311 a = self.CastExpression() |
332 while self.Peak in ['&']: | 312 while self.Peak == '&': |
333 op = self.Consume(self.Peak) | 313 op = self.Consume(self.Peak) |
334 b = self.CastExpression() | 314 b = self.CastExpression() |
335 a = Binop(a, op.typ, b, op.loc) | 315 a = Binop(a, op.typ, b, op.loc) |
336 return a | 316 return a |
337 | 317 |
381 if self.hasConsumed('['): | 361 if self.hasConsumed('['): |
382 raise NotImplementedError('Array not yet implemented') | 362 raise NotImplementedError('Array not yet implemented') |
383 elif self.hasConsumed('->'): | 363 elif self.hasConsumed('->'): |
384 field = self.Consume('ID') | 364 field = self.Consume('ID') |
385 pfe = Deref(pfe, pfe.loc) | 365 pfe = Deref(pfe, pfe.loc) |
386 pfe = FieldRef(pfe, field.val, field.loc) | 366 pfe = Member(pfe, field.val, field.loc) |
387 elif self.hasConsumed('.'): | 367 elif self.hasConsumed('.'): |
388 field = self.Consume('ID') | 368 field = self.Consume('ID') |
389 pfe = FieldRef(pfe, field.val, field.loc) | 369 pfe = Member(pfe, field.val, field.loc) |
390 else: | |
391 raise Exception() | |
392 return pfe | 370 return pfe |
393 | 371 |
394 def PrimaryExpression(self): | 372 def PrimaryExpression(self): |
395 if self.hasConsumed('('): | 373 if self.hasConsumed('('): |
396 e = self.Expression() | 374 e = self.Expression() |
407 return Literal(True, val.loc) | 385 return Literal(True, val.loc) |
408 elif self.Peak == 'false': | 386 elif self.Peak == 'false': |
409 val = self.Consume('false') | 387 val = self.Consume('false') |
410 return Literal(False, val.loc) | 388 return Literal(False, val.loc) |
411 elif self.Peak == 'ID': | 389 elif self.Peak == 'ID': |
412 d = self.parseDesignator() | 390 return self.parseDesignator() |
413 return VariableUse(d, d.loc) | |
414 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) | 391 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) |