Mercurial > lcfOS
comparison python/c3/parser.py @ 213:003c8a976fff
Merge of semantics and parser again ..
author | Windel Bouwman |
---|---|
date | Fri, 05 Jul 2013 11:18:48 +0200 |
parents | 46d62dadd61b |
children | c1ccb1cb4cef |
comparison
equal
deleted
inserted
replaced
212:62386bcee1ba | 213:003c8a976fff |
---|---|
1 from . import astnodes, lexer, semantics | 1 from . import astnodes, lexer |
2 from ppci import CompilerError | 2 from ppci import CompilerError |
3 | 3 |
4 # binop precedence for expressions: | 4 # binop precedence for expressions: |
5 binopPrecs = {'or': 5, 'and': 10, \ | 5 binopPrecs = {'or': 5, 'and': 10, \ |
6 '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \ | 6 '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \ |
7 '+': 30, '-': 30, '*': 40, '/': 40 } | 7 '+': 30, '-': 30, '*': 40, '/': 40 } |
8 | 8 |
9 class Parser: | 9 class Parser: |
10 """ Parses sourcecode into an abstract syntax tree (AST) """ | 10 """ Parses sourcecode into an abstract syntax tree (AST) """ |
11 def __init__(self, diag): | 11 def __init__(self, diag): |
12 self.sema = semantics.Semantics(diag) | 12 self.diag = diag |
13 self.diag = diag | 13 |
14 def parseSource(self, source): | 14 def parseSource(self, source): |
15 self.initLex(source) | 15 self.initLex(source) |
16 self.sema.reinit() | |
17 try: | 16 try: |
18 self.parsePackage() | 17 self.parsePackage() |
19 return self.sema.mod | 18 return self.mod |
20 except CompilerError as e: | 19 except CompilerError as e: |
21 self.diag.addDiag(e) | 20 self.diag.addDiag(e) |
22 def Error(self, msg): | 21 def Error(self, msg): |
23 raise CompilerError(msg, self.token.loc) | 22 raise CompilerError(msg, self.token.loc) |
24 # Lexer helpers: | 23 # Lexer helpers: |
25 def Consume(self, typ): | 24 def Consume(self, typ): |
26 if self.Peak == typ: | 25 if self.Peak == typ: |
27 return self.NextToken() | 26 return self.NextToken() |
28 else: | 27 else: |
29 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) | 28 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) |
30 @property | 29 @property |
31 def Peak(self): | 30 def Peak(self): |
32 return self.token.typ | 31 return self.token.typ |
33 @property | 32 @property |
34 def PeakPrec(self): | 33 def PeakPrec(self): |
35 if self.Peak in binopPrecs: | 34 if self.Peak in binopPrecs: |
36 return binopPrecs[self.Peak] | 35 return binopPrecs[self.Peak] |
37 return -1 | 36 return -1 |
38 def hasConsumed(self, typ): | 37 def hasConsumed(self, typ): |
39 if self.Peak == typ: | 38 if self.Peak == typ: |
40 self.Consume(typ) | 39 self.Consume(typ) |
41 return True | 40 return True |
42 return False | 41 return False |
43 def NextToken(self): | 42 |
43 def NextToken(self): | |
44 t = self.token | 44 t = self.token |
45 if t.typ != 'END': | 45 if t.typ != 'END': |
46 self.token = self.tokens.__next__() | 46 self.token = self.tokens.__next__() |
47 return t | 47 return t |
48 def initLex(self, source): | 48 |
49 def initLex(self, source): | |
49 self.tokens = lexer.tokenize(source) # Lexical stage | 50 self.tokens = lexer.tokenize(source) # Lexical stage |
50 self.token = self.tokens.__next__() | 51 self.token = self.tokens.__next__() |
51 def skipToSemi(self, tt): | 52 |
52 while self.Peak != tt and self.Peak != 'END': | 53 def parseUses(self): |
53 self.NextToken() | 54 pass |
54 if self.Peak == tt: | 55 |
55 self.Consume(tt) | 56 def parsePackage(self): |
56 | |
57 def parsePackage(self): | |
58 self.Consume('package') | 57 self.Consume('package') |
59 name = self.Consume('ID') | 58 name = self.Consume('ID') |
60 self.Consume(';') | 59 self.Consume(';') |
61 self.sema.handlePackage(name.val, name.loc) | 60 self.mod = astnodes.Package(name.val, name.loc) |
61 self.parseUses() | |
62 # TODO: parse uses | 62 # TODO: parse uses |
63 while self.Peak != 'END': | 63 while self.Peak != 'END': |
64 self.parseTopLevel() | 64 self.parseTopLevel() |
65 self.Consume('END') | 65 self.Consume('END') |
66 | 66 |
67 def parseTopLevel(self): | 67 def parseTopLevel(self): |
68 if self.Peak == 'function': | 68 if self.Peak == 'function': |
69 self.parseFunctionDefinition() | 69 self.parseFunctionDef() |
70 elif self.Peak == 'var': | 70 elif self.Peak == 'var': |
71 self.parseVarDef() | 71 self.parseVarDef() |
72 elif self.Peak == 'const': | 72 elif self.Peak == 'const': |
73 self.parseConstDef() | 73 self.parseConstDef() |
74 else: | 74 elif self.Peak == 'type': |
75 self.Error('Expected function or variable') | 75 self.parseTypeDef() |
76 | 76 else: |
77 def parseDesignator(self): | 77 self.Error('Expected function, var, const or type') |
78 | |
79 def parseDesignator(self): | |
78 """ A designator designates an object """ | 80 """ A designator designates an object """ |
79 name = self.Consume('ID') | 81 name = self.Consume('ID') |
80 return self.sema.actOnDesignator(name.val, name.loc) | 82 d = astnodes.Designator(name.val, name.loc) |
81 | |
82 # Type system | |
83 def parseType(self): | |
84 d = self.parseDesignator() | |
85 return d | 83 return d |
86 | 84 |
87 # Variable declarations: | 85 # Type system |
88 def parseVarDef(self): | 86 def parseTypeSpec(self): |
87 # For now, do simple type spec, just parse an ID: | |
88 return self.parseDesignator() | |
89 if self.Peak == 'struct': | |
90 self.Consume('struct') | |
91 self.Consume('{') | |
92 mems = [] | |
93 while self.Peak != '}': | |
94 mem_t = self.parseTypeSpec() | |
95 mem_n = self.Consume('ID') | |
96 mems.append((mem_t, mem_n)) | |
97 while self.hasConsumed(','): | |
98 mem_n = self.Consume('ID') | |
99 mems.append((mem_t, mem_n)) | |
100 self.Consume(';') | |
101 self.Consume('}') | |
102 theT = astnodes.StructureType(mems) | |
103 else: | |
104 theT = self.parseDesignator() | |
105 # Check for pointer suffix: | |
106 while self.hasConsumed('*'): | |
107 theT = astnodes.PointerType(theT) | |
108 return theT | |
109 | |
110 def parseTypeDef(self): | |
111 self.Consume('type') | |
112 newtype = self.parseTypeSpec() | |
113 typename = self.Consume('ID') | |
114 # TODO: action here :) | |
115 self.Consume(';') | |
116 return astnodes.DefinedType(typename, newtype) | |
117 | |
118 # Variable declarations: | |
119 def parseVarDef(self): | |
89 self.Consume('var') | 120 self.Consume('var') |
90 t = self.parseType() | 121 t = self.parseTypeSpec() |
91 def parseVar(): | 122 def parseVar(): |
92 name = self.Consume('ID') | 123 name = self.Consume('ID') |
93 ival = None | 124 v = astnodes.Variable(name.val, t) |
125 v.loc = name.loc | |
94 if self.hasConsumed('='): | 126 if self.hasConsumed('='): |
95 ival = self.parseExpression() | 127 v.ival = self.parseExpression() |
96 self.sema.actOnVarDef(name.val, name.loc, t, ival) | |
97 parseVar() | 128 parseVar() |
98 while self.hasConsumed(','): | 129 while self.hasConsumed(','): |
99 parseVar() | 130 parseVar() |
100 self.Consume(';') | 131 self.Consume(';') |
101 | 132 |
102 def parseConstDef(self): | 133 def parseConstDef(self): |
103 self.Consume('const') | 134 self.Consume('const') |
104 t = self.parseType() | 135 t = self.parseTypeSpec() |
105 def parseConst(): | 136 def parseConst(): |
106 name = self.Consume('ID') | 137 name = self.Consume('ID') |
107 self.Consume('=') | 138 self.Consume('=') |
108 val = self.parseExpression() | 139 val = self.parseExpression() |
109 self.sema.actOnConstDef(name.val, name.loc, t, val) | 140 c = astnodes.Constant(name.val, t, val) |
141 c.loc = name.loc | |
110 parseConst() | 142 parseConst() |
111 while self.hasConsumed(','): | 143 while self.hasConsumed(','): |
112 parseConst() | 144 parseConst() |
113 self.Consume(';') | 145 self.Consume(';') |
114 | 146 |
115 # Procedures | 147 # Procedures |
116 def parseFunctionDefinition(self): | 148 def parseFunctionDef(self): |
117 self.Consume('function') | 149 loc = self.Consume('function').loc |
118 returntype = self.parseType() | 150 returntype = self.parseTypeSpec() |
119 pname = self.Consume('ID') | 151 fname = self.Consume('ID').val |
120 self.sema.actOnFuncDef1(pname.val, pname.loc) | 152 f = astnodes.Function(fname, loc) |
121 self.Consume('(') | 153 self.Consume('(') |
122 parameters = [] | 154 parameters = [] |
123 if not self.hasConsumed(')'): | 155 if not self.hasConsumed(')'): |
124 def parseParameter(): | 156 def parseParameter(): |
125 typ = self.parseType() | 157 typ = self.parseTypeSpec() |
126 name = self.Consume('ID') | 158 name = self.Consume('ID') |
127 parameters.append(self.sema.actOnParameter(name.val, name.loc, typ)) | 159 param = astnodes.Variable(name.val, typ) |
160 param.loc = name.loc | |
161 parameters.append(param) | |
128 parseParameter() | 162 parseParameter() |
129 while self.hasConsumed(','): | 163 while self.hasConsumed(','): |
130 parseParameter() | 164 parseParameter() |
131 self.Consume(')') | 165 self.Consume(')') |
132 body = self.parseCompoundStatement() | 166 body = self.parseCompoundStatement() |
133 self.sema.actOnFuncDef2(parameters, returntype, body) | 167 |
134 | 168 # Statements: |
135 # Statements: | 169 def parseAssignment(self, lval): |
136 def parseAssignment(self, lval): | 170 lval = astnodes.VariableUse(lval, lval.loc) |
137 lval = self.sema.actOnVariableUse(lval, lval.loc) | |
138 loc = self.Consume('=').loc | 171 loc = self.Consume('=').loc |
139 rval = self.parseExpression() | 172 rval = self.parseExpression() |
140 self.Consume(';') | 173 self.Consume(';') |
141 return self.sema.actOnAssignment(lval, rval, loc) | 174 return astnodes.Assignment(lval, rval, loc) |
142 | 175 |
143 def parseProcedureCall(self, func): | 176 def parseCall(self, func): |
144 self.Consume('(') | 177 self.Consume('(') |
145 args = [] | 178 args = [] |
146 if not self.hasConsumed(')'): | 179 if not self.hasConsumed(')'): |
147 args.append(self.parseExpression()) | 180 args.append(self.parseExpression()) |
148 while self.hasConsumed(','): | 181 while self.hasConsumed(','): |
149 args.append(self.parseExpression()) | 182 args.append(self.parseExpression()) |
150 self.Consume(')') | 183 self.Consume(')') |
151 return self.sema.actOnFunctionCall(func, args, func.loc) | 184 return astnodes.FunctionCall(func, args, func.loc) |
152 | 185 |
153 def parseIfStatement(self): | 186 def parseIfStatement(self): |
154 loc = self.Consume('if').loc | 187 loc = self.Consume('if').loc |
155 self.Consume('(') | 188 self.Consume('(') |
156 condition = self.parseExpression() | 189 condition = self.parseExpression() |
157 self.Consume(')') | 190 self.Consume(')') |
158 yes = self.parseCompoundStatement() | 191 yes = self.parseCompoundStatement() |
159 if self.hasConsumed('else'): | 192 if self.hasConsumed('else'): |
160 no = self.parseCompoundStatement() | 193 no = self.parseCompoundStatement() |
161 else: | 194 else: |
162 no = astnodes.EmptyStatement() | 195 no = astnodes.EmptyStatement() |
163 return self.sema.actOnIfStatement(condition, yes, no, loc) | 196 return astnodes.IfStatement(condition, yes, no, loc) |
164 | 197 |
165 def parseWhileStatement(self): | 198 def parseWhileStatement(self): |
166 self.Consume('while') | 199 loc = self.Consume('while').loc |
167 self.Consume('(') | 200 self.Consume('(') |
168 condition = self.parseExpression() | 201 condition = self.parseExpression() |
169 self.Consume(')') | 202 self.Consume(')') |
170 statements = self.parseCompoundStatement() | 203 statements = self.parseCompoundStatement() |
171 return astnodes.WhileStatement(condition, statements) | 204 return astnodes.WhileStatement(condition, statements, loc) |
172 | 205 |
173 def parseReturnStatement(self): | 206 def parseReturnStatement(self): |
174 self.Consume('return') | 207 self.Consume('return') |
175 expr = self.parseExpression() | 208 expr = self.parseExpression() |
176 self.Consume(';') | 209 self.Consume(';') |
177 return astnodes.ReturnStatement(expr) | 210 return astnodes.ReturnStatement(expr) |
178 | 211 |
179 def parseCompoundStatement(self): | 212 def parseCompoundStatement(self): |
180 self.Consume('{') | 213 self.Consume('{') |
181 statements = [] | 214 statements = [] |
182 while not self.hasConsumed('}'): | 215 while not self.hasConsumed('}'): |
183 s = self.parseStatement() | 216 s = self.parseStatement() |
184 if not type(s) is astnodes.EmptyStatement: | 217 if not type(s) is astnodes.EmptyStatement: |
185 statements.append(s) | 218 statements.append(s) |
186 return astnodes.CompoundStatement(statements) | 219 return astnodes.CompoundStatement(statements) |
187 | 220 |
188 def parseStatement(self): | 221 def parseStatement(self): |
189 # Determine statement type based on the pending token: | 222 # Determine statement type based on the pending token: |
190 if self.Peak == 'if': | 223 if self.Peak == 'if': |
191 return self.parseIfStatement() | 224 return self.parseIfStatement() |
192 elif self.Peak == 'while': | 225 elif self.Peak == 'while': |
193 return self.parseWhileStatement() | 226 return self.parseWhileStatement() |
198 elif self.Peak == 'var': | 231 elif self.Peak == 'var': |
199 self.parseVarDef() | 232 self.parseVarDef() |
200 return astnodes.EmptyStatement() | 233 return astnodes.EmptyStatement() |
201 elif self.Peak == 'return': | 234 elif self.Peak == 'return': |
202 return self.parseReturnStatement() | 235 return self.parseReturnStatement() |
203 elif self.Peak == 'ID': | 236 else: |
204 designator = self.parseDesignator() | 237 designator = self.parseDesignator() |
205 if self.Peak == '(': | 238 if self.Peak == '(': |
206 return self.parseProcedureCall(designator) | 239 return self.parseCall(designator) |
207 elif self.Peak == '=': | 240 elif self.Peak == '=': |
208 return self.parseAssignment(designator) | 241 return self.parseAssignment(designator) |
209 self.Error('Unable to determine statement') | 242 else: |
210 | 243 self.Error('Unable to determine statement') |
211 # Parsing expressions: | 244 |
212 def parseExpression(self): | 245 # Parsing expressions: |
246 def parseExpression(self): | |
213 return self.parseBinopRhs(self.parsePrimary(), 0) | 247 return self.parseBinopRhs(self.parsePrimary(), 0) |
214 def parsePrimary(self): | 248 |
249 def parsePrimary(self): | |
215 if self.hasConsumed('('): | 250 if self.hasConsumed('('): |
216 e = self.parseExpression() | 251 e = self.parseExpression() |
217 self.Consume(')') | 252 self.Consume(')') |
218 return e | 253 return e |
219 elif self.Peak == 'NUMBER': | 254 elif self.Peak == 'NUMBER': |
220 val = self.Consume('NUMBER') | 255 val = self.Consume('NUMBER') |
221 return self.sema.actOnNumber(val.val, val.loc) | 256 return astnodes.Literal(val.val, val.loc) |
222 elif self.Peak == 'REAL': | 257 elif self.Peak == 'REAL': |
223 val = self.Consume('REAL') | 258 val = self.Consume('REAL') |
224 return self.sema.actOnNumber(val.val, val.loc) | 259 return astnodes.Literal(val.val, val.loc) |
225 elif self.Peak == 'true': | 260 elif self.Peak == 'true': |
226 val = self.Consume('true') | 261 val = self.Consume('true') |
227 return self.sema.actOnNumber(True, val.loc) | 262 return astnodes.Literal(True, val.loc) |
228 elif self.Peak == 'false': | 263 elif self.Peak == 'false': |
229 val = self.Consume('false') | 264 val = self.Consume('false') |
230 return self.sema.actOnNumber(False, val.loc) | 265 return astnodes.Literal(False, val.loc) |
231 elif self.Peak == 'ID': | 266 elif self.Peak == 'ID': |
232 d = self.parseDesignator() | 267 d = self.parseDesignator() |
233 if self.Peak == '(': | 268 if self.Peak == '(': |
234 return self.parseProcedureCall(d) | 269 return self.parseCall(d) |
235 else: | 270 else: |
236 return self.sema.actOnVariableUse(d, d.loc) | 271 return astnodes.VariableUse(d, d.loc) |
237 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) | 272 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) |
238 | 273 |
239 def parseBinopRhs(self, lhs, min_prec): | 274 def parseBinopRhs(self, lhs, min_prec): |
240 while self.PeakPrec >= min_prec: | 275 while self.PeakPrec >= min_prec: |
241 op_prec = self.PeakPrec | 276 op_prec = self.PeakPrec |
242 op = self.Consume(self.Peak) | 277 op = self.Consume(self.Peak) |
243 rhs = self.parsePrimary() | 278 rhs = self.parsePrimary() |
244 while self.PeakPrec > op_prec: | 279 while self.PeakPrec > op_prec: |
245 rhs = self.parseBinopRhs(rhs, self.PeakPrec) | 280 rhs = self.parseBinopRhs(rhs, self.PeakPrec) |
246 lhs = self.sema.actOnBinop(lhs, op.typ, rhs, op.loc) | 281 lhs = astnodes.Binop(lhs, op.typ, rhs, op.loc) |
247 return lhs | 282 return lhs |
248 | 283 |