Mercurial > lcfOS
view python/ppci/c3/parser.py @ 306:b145f8e6050b
Start on c3 rewrite
author | Windel Bouwman |
---|---|
date | Mon, 09 Dec 2013 19:00:21 +0100 |
parents | 6753763d3bec |
children | e609d5296ee9 |
line wrap: on
line source
import logging from ppci import CompilerError from .astnodes import Member, Literal, TypeCast, Unop, Binop from .astnodes import Assignment, ExpressionStatement, CompoundStatement from .astnodes import ReturnStatement, WhileStatement, IfStatement from .astnodes import FunctionType, Function, FormalParameter from .astnodes import StructureType, DefinedType, PointerType from .astnodes import Constant, Variable from .astnodes import StructField, Deref from .astnodes import Package from .astnodes import Identifier from .astnodes import FunctionCall from .astnodes import EmptyStatement class Parser: """ Parses sourcecode into an abstract syntax tree (AST) """ def __init__(self, diag): self.logger = logging.getLogger('c3') self.diag = diag def parseSource(self, tokens): self.logger.info('Parsing source') self.tokens = tokens self.token = self.tokens.__next__() try: self.parsePackage() self.mod.ok = True # Valid until proven wrong :) return self.mod except CompilerError as e: self.diag.addDiag(e) def Error(self, msg): raise CompilerError(msg, self.token.loc) # Lexer helpers: def Consume(self, typ): if self.Peak == typ: return self.NextToken() else: self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) @property def Peak(self): return self.token.typ @property def CurLoc(self): return self.token.loc def hasConsumed(self, typ): if self.Peak == typ: self.Consume(typ) return True return False def NextToken(self): t = self.token if t.typ != 'END': self.token = self.tokens.__next__() return t def addDeclaration(self, decl): self.currentPart.declarations.append(decl) def parseImport(self): self.Consume('import') name = self.Consume('ID').val self.mod.imports.append(name) self.Consume(';') def parsePackage(self): self.Consume('module') name = self.Consume('ID') self.Consume(';') self.mod = Package(name.val, name.loc) self.currentPart = self.mod while self.Peak != 'END': self.parseTopLevel() self.Consume('END') def parseTopLevel(self): if self.Peak == 'function': self.parseFunctionDef() elif self.Peak == 'var': self.parseVarDef() # TODO handle variable initialization elif self.Peak == 'const': self.parseConstDef() elif self.Peak == 'type': self.parseTypeDef() elif self.Peak == 'import': self.parseImport() else: self.Error('Expected function, var, const or type') def parseDesignator(self): """ A designator designates an object with a name. """ name = self.Consume('ID') return Identifier(name.val, name.loc) def parseIdSequence(self): ids = [self.Consume('ID')] while self.hasConsumed(','): ids.append(self.Consume('ID')) return ids # Type system def parseTypeSpec(self): # For now, do simple type spec, just parse an ID: if self.Peak == 'struct': self.Consume('struct') self.Consume('{') mems = [] while self.Peak != '}': mem_t = self.parseTypeSpec() for i in self.parseIdSequence(): mems.append(StructField(i.val, mem_t)) self.Consume(';') self.Consume('}') theT = StructureType(mems) elif self.Peak == 'enum': # TODO) raise NotImplementedError() else: theT = self.PostFixExpression() # Check for pointer suffix: while self.hasConsumed('*'): theT = PointerType(theT) return theT def parseTypeDef(self): self.Consume('type') newtype = self.parseTypeSpec() typename = self.Consume('ID') self.Consume(';') df = DefinedType(typename.val, newtype, typename.loc) self.addDeclaration(df) # Variable declarations: def parseVarDef(self): self.Consume('var') t = self.parseTypeSpec() for name in self.parseIdSequence(): v = Variable(name.val, t) v.loc = name.loc self.addDeclaration(v) self.Consume(';') return EmptyStatement() def parseConstDef(self): self.Consume('const') t = self.parseTypeSpec() while True: name = self.Consume('ID') self.Consume('=') val = self.Expression() c = Constant(name.val, t, val) c.loc = name.loc if not self.hasConsumed(','): break self.Consume(';') def parseFunctionDef(self): loc = self.Consume('function').loc returntype = self.parseTypeSpec() fname = self.Consume('ID').val f = Function(fname, loc) self.addDeclaration(f) savePart = self.currentPart self.currentPart = f self.Consume('(') parameters = [] if not self.hasConsumed(')'): while True: typ = self.parseTypeSpec() name = self.Consume('ID') param = FormalParameter(name.val, typ) param.loc = name.loc self.addDeclaration(param) parameters.append(param) if not self.hasConsumed(','): break self.Consume(')') paramtypes = [p.typ for p in parameters] f.typ = FunctionType(paramtypes, returntype) f.body = self.parseCompoundStatement() self.currentPart = savePart def parseIfStatement(self): loc = self.Consume('if').loc self.Consume('(') condition = self.Expression() self.Consume(')') yes = self.Statement() no = self.Statement() if self.hasConsumed('else') else EmptyStatement() return IfStatement(condition, yes, no, loc) def parseWhileStatement(self): loc = self.Consume('while').loc self.Consume('(') condition = self.Expression() self.Consume(')') statements = self.Statement() return WhileStatement(condition, statements, loc) def parseReturnStatement(self): loc = self.Consume('return').loc if self.Peak == ';': expr = Literal(0, loc) else: expr = self.Expression() self.Consume(';') return ReturnStatement(expr, loc) def parseCompoundStatement(self): self.Consume('{') statements = [] while not self.hasConsumed('}'): statements.append(self.Statement()) return CompoundStatement(statements) def Statement(self): # Determine statement type based on the pending token: if self.Peak == 'if': return self.parseIfStatement() elif self.Peak == 'while': return self.parseWhileStatement() elif self.Peak == '{': return self.parseCompoundStatement() elif self.hasConsumed(';'): return EmptyStatement() elif self.Peak == 'var': return self.parseVarDef() elif self.Peak == 'return': return self.parseReturnStatement() else: x = self.UnaryExpression() if self.Peak == '=': # We enter assignment mode here. loc = self.Consume('=').loc rhs = self.Expression() return Assignment(x, rhs, loc) else: return ExpressionStatement(x, x.loc) # Expression section: # We not implement these C constructs: # a(2), f = 2 # and this: # a = 2 < x : 4 ? 1; def Expression(self): exp = self.LogicalAndExpression() while self.Peak == 'or': loc = self.Consume('or').loc e2 = self.LogicalAndExpression() exp = Binop(exp, 'or', e2, loc) return exp def LogicalAndExpression(self): o = self.EqualityExpression() while self.Peak == 'and': loc = self.Consume('and').loc o2 = self.EqualityExpression() o = Binop(o, 'and', o2, loc) return o def EqualityExpression(self): ee = self.SimpleExpression() while self.Peak in ['<', '==', '>', '>=', '<=', '!=']: op = self.Consume(self.Peak) ee2 = self.SimpleExpression() ee = Binop(ee, op.typ, ee2, op.loc) return ee def SimpleExpression(self): """ Shift operations before + and - ? """ e = self.AddExpression() while self.Peak in ['>>', '<<']: op = self.Consume(self.Peak) e2 = self.AddExpression() e = Binop(e, op.typ, e2, op.loc) return e def AddExpression(self): e = self.Term() while self.Peak in ['+', '-']: op = self.Consume(self.Peak) e2 = self.Term() e = Binop(e, op.typ, e2, op.loc) return e def Term(self): t = self.BitwiseOr() while self.Peak in ['*', '/']: op = self.Consume(self.Peak) t2 = self.BitwiseOr() t = Binop(t, op.typ, t2, op.loc) return t def BitwiseOr(self): a = self.BitwiseAnd() while self.Peak == '|': op = self.Consume(self.Peak) b = self.BitwiseAnd() a = Binop(a, op.typ, b, op.loc) return a def BitwiseAnd(self): a = self.CastExpression() while self.Peak == '&': op = self.Consume(self.Peak) b = self.CastExpression() a = Binop(a, op.typ, b, op.loc) return a # Domain of unary expressions: def CastExpression(self): """ the C-style type cast conflicts with '(' expr ')' so introduce extra keyword 'cast' """ if self.Peak == 'cast': loc = self.Consume('cast').loc self.Consume('<') t = self.parseTypeSpec() self.Consume('>') self.Consume('(') ce = self.Expression() self.Consume(')') return TypeCast(t, ce, loc) else: return self.UnaryExpression() def UnaryExpression(self): if self.Peak in ['&', '*']: op = self.Consume(self.Peak) ce = self.CastExpression() if op.val == '*': return Deref(ce, op.loc) else: return Unop(op.typ, ce, op.loc) else: return self.PostFixExpression() def PostFixExpression(self): pfe = self.PrimaryExpression() if self.hasConsumed('('): # Function call args = [] if not self.hasConsumed(')'): args.append(self.Expression()) while self.hasConsumed(','): args.append(self.Expression()) self.Consume(')') pfe = FunctionCall(pfe, args, pfe.loc) else: while self.Peak in ['[', '.', '->']: if self.hasConsumed('['): raise NotImplementedError('Array not yet implemented') elif self.hasConsumed('->'): field = self.Consume('ID') pfe = Deref(pfe, pfe.loc) pfe = Member(pfe, field.val, field.loc) elif self.hasConsumed('.'): field = self.Consume('ID') pfe = Member(pfe, field.val, field.loc) return pfe def PrimaryExpression(self): if self.hasConsumed('('): e = self.Expression() self.Consume(')') return e elif self.Peak == 'NUMBER': val = self.Consume('NUMBER') return Literal(val.val, val.loc) elif self.Peak == 'REAL': val = self.Consume('REAL') return Literal(val.val, val.loc) elif self.Peak == 'true': val = self.Consume('true') return Literal(True, val.loc) elif self.Peak == 'false': val = self.Consume('false') return Literal(False, val.loc) elif self.Peak == 'ID': return self.parseDesignator() self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))