comparison python/c3/parser.py @ 213:003c8a976fff

Merge of semantics and parser again ..
author Windel Bouwman
date Fri, 05 Jul 2013 11:18:48 +0200
parents 46d62dadd61b
children c1ccb1cb4cef
comparison
equal deleted inserted replaced
212:62386bcee1ba 213:003c8a976fff
1 from . import astnodes, lexer, semantics 1 from . import astnodes, lexer
2 from ppci import CompilerError 2 from ppci import CompilerError
3 3
4 # binop precedence for expressions: 4 # binop precedence for expressions:
5 binopPrecs = {'or': 5, 'and': 10, \ 5 binopPrecs = {'or': 5, 'and': 10, \
6 '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \ 6 '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \
7 '+': 30, '-': 30, '*': 40, '/': 40 } 7 '+': 30, '-': 30, '*': 40, '/': 40 }
8 8
9 class Parser: 9 class Parser:
10 """ Parses sourcecode into an abstract syntax tree (AST) """ 10 """ Parses sourcecode into an abstract syntax tree (AST) """
11 def __init__(self, diag): 11 def __init__(self, diag):
12 self.sema = semantics.Semantics(diag) 12 self.diag = diag
13 self.diag = diag 13
14 def parseSource(self, source): 14 def parseSource(self, source):
15 self.initLex(source) 15 self.initLex(source)
16 self.sema.reinit()
17 try: 16 try:
18 self.parsePackage() 17 self.parsePackage()
19 return self.sema.mod 18 return self.mod
20 except CompilerError as e: 19 except CompilerError as e:
21 self.diag.addDiag(e) 20 self.diag.addDiag(e)
22 def Error(self, msg): 21 def Error(self, msg):
23 raise CompilerError(msg, self.token.loc) 22 raise CompilerError(msg, self.token.loc)
24 # Lexer helpers: 23 # Lexer helpers:
25 def Consume(self, typ): 24 def Consume(self, typ):
26 if self.Peak == typ: 25 if self.Peak == typ:
27 return self.NextToken() 26 return self.NextToken()
28 else: 27 else:
29 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) 28 self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak))
30 @property 29 @property
31 def Peak(self): 30 def Peak(self):
32 return self.token.typ 31 return self.token.typ
33 @property 32 @property
34 def PeakPrec(self): 33 def PeakPrec(self):
35 if self.Peak in binopPrecs: 34 if self.Peak in binopPrecs:
36 return binopPrecs[self.Peak] 35 return binopPrecs[self.Peak]
37 return -1 36 return -1
38 def hasConsumed(self, typ): 37 def hasConsumed(self, typ):
39 if self.Peak == typ: 38 if self.Peak == typ:
40 self.Consume(typ) 39 self.Consume(typ)
41 return True 40 return True
42 return False 41 return False
43 def NextToken(self): 42
43 def NextToken(self):
44 t = self.token 44 t = self.token
45 if t.typ != 'END': 45 if t.typ != 'END':
46 self.token = self.tokens.__next__() 46 self.token = self.tokens.__next__()
47 return t 47 return t
48 def initLex(self, source): 48
49 def initLex(self, source):
49 self.tokens = lexer.tokenize(source) # Lexical stage 50 self.tokens = lexer.tokenize(source) # Lexical stage
50 self.token = self.tokens.__next__() 51 self.token = self.tokens.__next__()
51 def skipToSemi(self, tt): 52
52 while self.Peak != tt and self.Peak != 'END': 53 def parseUses(self):
53 self.NextToken() 54 pass
54 if self.Peak == tt: 55
55 self.Consume(tt) 56 def parsePackage(self):
56
57 def parsePackage(self):
58 self.Consume('package') 57 self.Consume('package')
59 name = self.Consume('ID') 58 name = self.Consume('ID')
60 self.Consume(';') 59 self.Consume(';')
61 self.sema.handlePackage(name.val, name.loc) 60 self.mod = astnodes.Package(name.val, name.loc)
61 self.parseUses()
62 # TODO: parse uses 62 # TODO: parse uses
63 while self.Peak != 'END': 63 while self.Peak != 'END':
64 self.parseTopLevel() 64 self.parseTopLevel()
65 self.Consume('END') 65 self.Consume('END')
66 66
67 def parseTopLevel(self): 67 def parseTopLevel(self):
68 if self.Peak == 'function': 68 if self.Peak == 'function':
69 self.parseFunctionDefinition() 69 self.parseFunctionDef()
70 elif self.Peak == 'var': 70 elif self.Peak == 'var':
71 self.parseVarDef() 71 self.parseVarDef()
72 elif self.Peak == 'const': 72 elif self.Peak == 'const':
73 self.parseConstDef() 73 self.parseConstDef()
74 else: 74 elif self.Peak == 'type':
75 self.Error('Expected function or variable') 75 self.parseTypeDef()
76 76 else:
77 def parseDesignator(self): 77 self.Error('Expected function, var, const or type')
78
79 def parseDesignator(self):
78 """ A designator designates an object """ 80 """ A designator designates an object """
79 name = self.Consume('ID') 81 name = self.Consume('ID')
80 return self.sema.actOnDesignator(name.val, name.loc) 82 d = astnodes.Designator(name.val, name.loc)
81
82 # Type system
83 def parseType(self):
84 d = self.parseDesignator()
85 return d 83 return d
86 84
87 # Variable declarations: 85 # Type system
88 def parseVarDef(self): 86 def parseTypeSpec(self):
87 # For now, do simple type spec, just parse an ID:
88 return self.parseDesignator()
89 if self.Peak == 'struct':
90 self.Consume('struct')
91 self.Consume('{')
92 mems = []
93 while self.Peak != '}':
94 mem_t = self.parseTypeSpec()
95 mem_n = self.Consume('ID')
96 mems.append((mem_t, mem_n))
97 while self.hasConsumed(','):
98 mem_n = self.Consume('ID')
99 mems.append((mem_t, mem_n))
100 self.Consume(';')
101 self.Consume('}')
102 theT = astnodes.StructureType(mems)
103 else:
104 theT = self.parseDesignator()
105 # Check for pointer suffix:
106 while self.hasConsumed('*'):
107 theT = astnodes.PointerType(theT)
108 return theT
109
110 def parseTypeDef(self):
111 self.Consume('type')
112 newtype = self.parseTypeSpec()
113 typename = self.Consume('ID')
114 # TODO: action here :)
115 self.Consume(';')
116 return astnodes.DefinedType(typename, newtype)
117
118 # Variable declarations:
119 def parseVarDef(self):
89 self.Consume('var') 120 self.Consume('var')
90 t = self.parseType() 121 t = self.parseTypeSpec()
91 def parseVar(): 122 def parseVar():
92 name = self.Consume('ID') 123 name = self.Consume('ID')
93 ival = None 124 v = astnodes.Variable(name.val, t)
125 v.loc = name.loc
94 if self.hasConsumed('='): 126 if self.hasConsumed('='):
95 ival = self.parseExpression() 127 v.ival = self.parseExpression()
96 self.sema.actOnVarDef(name.val, name.loc, t, ival)
97 parseVar() 128 parseVar()
98 while self.hasConsumed(','): 129 while self.hasConsumed(','):
99 parseVar() 130 parseVar()
100 self.Consume(';') 131 self.Consume(';')
101 132
102 def parseConstDef(self): 133 def parseConstDef(self):
103 self.Consume('const') 134 self.Consume('const')
104 t = self.parseType() 135 t = self.parseTypeSpec()
105 def parseConst(): 136 def parseConst():
106 name = self.Consume('ID') 137 name = self.Consume('ID')
107 self.Consume('=') 138 self.Consume('=')
108 val = self.parseExpression() 139 val = self.parseExpression()
109 self.sema.actOnConstDef(name.val, name.loc, t, val) 140 c = astnodes.Constant(name.val, t, val)
141 c.loc = name.loc
110 parseConst() 142 parseConst()
111 while self.hasConsumed(','): 143 while self.hasConsumed(','):
112 parseConst() 144 parseConst()
113 self.Consume(';') 145 self.Consume(';')
114 146
115 # Procedures 147 # Procedures
116 def parseFunctionDefinition(self): 148 def parseFunctionDef(self):
117 self.Consume('function') 149 loc = self.Consume('function').loc
118 returntype = self.parseType() 150 returntype = self.parseTypeSpec()
119 pname = self.Consume('ID') 151 fname = self.Consume('ID').val
120 self.sema.actOnFuncDef1(pname.val, pname.loc) 152 f = astnodes.Function(fname, loc)
121 self.Consume('(') 153 self.Consume('(')
122 parameters = [] 154 parameters = []
123 if not self.hasConsumed(')'): 155 if not self.hasConsumed(')'):
124 def parseParameter(): 156 def parseParameter():
125 typ = self.parseType() 157 typ = self.parseTypeSpec()
126 name = self.Consume('ID') 158 name = self.Consume('ID')
127 parameters.append(self.sema.actOnParameter(name.val, name.loc, typ)) 159 param = astnodes.Variable(name.val, typ)
160 param.loc = name.loc
161 parameters.append(param)
128 parseParameter() 162 parseParameter()
129 while self.hasConsumed(','): 163 while self.hasConsumed(','):
130 parseParameter() 164 parseParameter()
131 self.Consume(')') 165 self.Consume(')')
132 body = self.parseCompoundStatement() 166 body = self.parseCompoundStatement()
133 self.sema.actOnFuncDef2(parameters, returntype, body) 167
134 168 # Statements:
135 # Statements: 169 def parseAssignment(self, lval):
136 def parseAssignment(self, lval): 170 lval = astnodes.VariableUse(lval, lval.loc)
137 lval = self.sema.actOnVariableUse(lval, lval.loc)
138 loc = self.Consume('=').loc 171 loc = self.Consume('=').loc
139 rval = self.parseExpression() 172 rval = self.parseExpression()
140 self.Consume(';') 173 self.Consume(';')
141 return self.sema.actOnAssignment(lval, rval, loc) 174 return astnodes.Assignment(lval, rval, loc)
142 175
143 def parseProcedureCall(self, func): 176 def parseCall(self, func):
144 self.Consume('(') 177 self.Consume('(')
145 args = [] 178 args = []
146 if not self.hasConsumed(')'): 179 if not self.hasConsumed(')'):
147 args.append(self.parseExpression()) 180 args.append(self.parseExpression())
148 while self.hasConsumed(','): 181 while self.hasConsumed(','):
149 args.append(self.parseExpression()) 182 args.append(self.parseExpression())
150 self.Consume(')') 183 self.Consume(')')
151 return self.sema.actOnFunctionCall(func, args, func.loc) 184 return astnodes.FunctionCall(func, args, func.loc)
152 185
153 def parseIfStatement(self): 186 def parseIfStatement(self):
154 loc = self.Consume('if').loc 187 loc = self.Consume('if').loc
155 self.Consume('(') 188 self.Consume('(')
156 condition = self.parseExpression() 189 condition = self.parseExpression()
157 self.Consume(')') 190 self.Consume(')')
158 yes = self.parseCompoundStatement() 191 yes = self.parseCompoundStatement()
159 if self.hasConsumed('else'): 192 if self.hasConsumed('else'):
160 no = self.parseCompoundStatement() 193 no = self.parseCompoundStatement()
161 else: 194 else:
162 no = astnodes.EmptyStatement() 195 no = astnodes.EmptyStatement()
163 return self.sema.actOnIfStatement(condition, yes, no, loc) 196 return astnodes.IfStatement(condition, yes, no, loc)
164 197
165 def parseWhileStatement(self): 198 def parseWhileStatement(self):
166 self.Consume('while') 199 loc = self.Consume('while').loc
167 self.Consume('(') 200 self.Consume('(')
168 condition = self.parseExpression() 201 condition = self.parseExpression()
169 self.Consume(')') 202 self.Consume(')')
170 statements = self.parseCompoundStatement() 203 statements = self.parseCompoundStatement()
171 return astnodes.WhileStatement(condition, statements) 204 return astnodes.WhileStatement(condition, statements, loc)
172 205
173 def parseReturnStatement(self): 206 def parseReturnStatement(self):
174 self.Consume('return') 207 self.Consume('return')
175 expr = self.parseExpression() 208 expr = self.parseExpression()
176 self.Consume(';') 209 self.Consume(';')
177 return astnodes.ReturnStatement(expr) 210 return astnodes.ReturnStatement(expr)
178 211
179 def parseCompoundStatement(self): 212 def parseCompoundStatement(self):
180 self.Consume('{') 213 self.Consume('{')
181 statements = [] 214 statements = []
182 while not self.hasConsumed('}'): 215 while not self.hasConsumed('}'):
183 s = self.parseStatement() 216 s = self.parseStatement()
184 if not type(s) is astnodes.EmptyStatement: 217 if not type(s) is astnodes.EmptyStatement:
185 statements.append(s) 218 statements.append(s)
186 return astnodes.CompoundStatement(statements) 219 return astnodes.CompoundStatement(statements)
187 220
188 def parseStatement(self): 221 def parseStatement(self):
189 # Determine statement type based on the pending token: 222 # Determine statement type based on the pending token:
190 if self.Peak == 'if': 223 if self.Peak == 'if':
191 return self.parseIfStatement() 224 return self.parseIfStatement()
192 elif self.Peak == 'while': 225 elif self.Peak == 'while':
193 return self.parseWhileStatement() 226 return self.parseWhileStatement()
198 elif self.Peak == 'var': 231 elif self.Peak == 'var':
199 self.parseVarDef() 232 self.parseVarDef()
200 return astnodes.EmptyStatement() 233 return astnodes.EmptyStatement()
201 elif self.Peak == 'return': 234 elif self.Peak == 'return':
202 return self.parseReturnStatement() 235 return self.parseReturnStatement()
203 elif self.Peak == 'ID': 236 else:
204 designator = self.parseDesignator() 237 designator = self.parseDesignator()
205 if self.Peak == '(': 238 if self.Peak == '(':
206 return self.parseProcedureCall(designator) 239 return self.parseCall(designator)
207 elif self.Peak == '=': 240 elif self.Peak == '=':
208 return self.parseAssignment(designator) 241 return self.parseAssignment(designator)
209 self.Error('Unable to determine statement') 242 else:
210 243 self.Error('Unable to determine statement')
211 # Parsing expressions: 244
212 def parseExpression(self): 245 # Parsing expressions:
246 def parseExpression(self):
213 return self.parseBinopRhs(self.parsePrimary(), 0) 247 return self.parseBinopRhs(self.parsePrimary(), 0)
214 def parsePrimary(self): 248
249 def parsePrimary(self):
215 if self.hasConsumed('('): 250 if self.hasConsumed('('):
216 e = self.parseExpression() 251 e = self.parseExpression()
217 self.Consume(')') 252 self.Consume(')')
218 return e 253 return e
219 elif self.Peak == 'NUMBER': 254 elif self.Peak == 'NUMBER':
220 val = self.Consume('NUMBER') 255 val = self.Consume('NUMBER')
221 return self.sema.actOnNumber(val.val, val.loc) 256 return astnodes.Literal(val.val, val.loc)
222 elif self.Peak == 'REAL': 257 elif self.Peak == 'REAL':
223 val = self.Consume('REAL') 258 val = self.Consume('REAL')
224 return self.sema.actOnNumber(val.val, val.loc) 259 return astnodes.Literal(val.val, val.loc)
225 elif self.Peak == 'true': 260 elif self.Peak == 'true':
226 val = self.Consume('true') 261 val = self.Consume('true')
227 return self.sema.actOnNumber(True, val.loc) 262 return astnodes.Literal(True, val.loc)
228 elif self.Peak == 'false': 263 elif self.Peak == 'false':
229 val = self.Consume('false') 264 val = self.Consume('false')
230 return self.sema.actOnNumber(False, val.loc) 265 return astnodes.Literal(False, val.loc)
231 elif self.Peak == 'ID': 266 elif self.Peak == 'ID':
232 d = self.parseDesignator() 267 d = self.parseDesignator()
233 if self.Peak == '(': 268 if self.Peak == '(':
234 return self.parseProcedureCall(d) 269 return self.parseCall(d)
235 else: 270 else:
236 return self.sema.actOnVariableUse(d, d.loc) 271 return astnodes.VariableUse(d, d.loc)
237 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) 272 self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))
238 273
239 def parseBinopRhs(self, lhs, min_prec): 274 def parseBinopRhs(self, lhs, min_prec):
240 while self.PeakPrec >= min_prec: 275 while self.PeakPrec >= min_prec:
241 op_prec = self.PeakPrec 276 op_prec = self.PeakPrec
242 op = self.Consume(self.Peak) 277 op = self.Consume(self.Peak)
243 rhs = self.parsePrimary() 278 rhs = self.parsePrimary()
244 while self.PeakPrec > op_prec: 279 while self.PeakPrec > op_prec:
245 rhs = self.parseBinopRhs(rhs, self.PeakPrec) 280 rhs = self.parseBinopRhs(rhs, self.PeakPrec)
246 lhs = self.sema.actOnBinop(lhs, op.typ, rhs, op.loc) 281 lhs = astnodes.Binop(lhs, op.typ, rhs, op.loc)
247 return lhs 282 return lhs
248 283