Mercurial > lcfOS
view python/ppci/c3/parser.py @ 300:158068af716c
yafm
author | Windel Bouwman |
---|---|
date | Tue, 03 Dec 2013 18:00:22 +0100 |
parents | python/c3/parser.py@9417caea2eb3 |
children | 6753763d3bec |
line wrap: on
line source
import logging from .lexer import Lexer from .astnodes import FieldRef, Literal, TypeCast, Unop, Binop from .astnodes import Assignment, ExpressionStatement, CompoundStatement from .astnodes import ReturnStatement, WhileStatement, IfStatement from .astnodes import FunctionType, Function, FormalParameter from .astnodes import StructureType, DefinedType, PointerType from .astnodes import Constant, Variable from .astnodes import StructField, Deref from .astnodes import Package, ImportDesignator from .astnodes import Designator, VariableUse, FunctionCall from ppci import CompilerError class Parser: """ Parses sourcecode into an abstract syntax tree (AST) """ def __init__(self, diag): self.logger = logging.getLogger('c3') self.diag = diag self.lexer = Lexer(diag) def parseSource(self, source): self.logger.info('Parsing source') self.initLex(source) try: self.parsePackage() return self.mod except CompilerError as e: self.diag.addDiag(e) def Error(self, msg): raise CompilerError(msg, self.token.loc) # Lexer helpers: def Consume(self, typ): if self.Peak == typ: return self.NextToken() else: self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) @property def Peak(self): return self.token.typ @property def CurLoc(self): return self.token.loc def hasConsumed(self, typ): if self.Peak == typ: self.Consume(typ) return True return False def NextToken(self): t = self.token if t.typ != 'END': self.token = self.tokens.__next__() return t def initLex(self, source): self.tokens = self.lexer.tokenize(source) self.token = self.tokens.__next__() def addDeclaration(self, decl): self.currentPart.declarations.append(decl) def parseImport(self): self.Consume('import') name = self.Consume('ID').val self.mod.imports.append(name) self.Consume(';') def parsePackage(self): self.Consume('module') name = self.Consume('ID') self.Consume(';') self.mod = Package(name.val, name.loc) self.currentPart = self.mod while self.Peak != 'END': self.parseTopLevel() self.Consume('END') def parseTopLevel(self): if self.Peak == 'function': self.parseFunctionDef() elif self.Peak == 'var': self.parseVarDef() elif self.Peak == 'const': self.parseConstDef() elif self.Peak == 'type': self.parseTypeDef() elif self.Peak == 'import': self.parseImport() else: self.Error('Expected function, var, const or type') def parseDesignator(self): """ A designator designates an object """ name = self.Consume('ID') if self.hasConsumed(':'): name2 = self.Consume('ID') return ImportDesignator(name.val, name2.val, name.loc) else: return Designator(name.val, name.loc) # Type system def parseTypeSpec(self): # For now, do simple type spec, just parse an ID: #return self.parseDesignator() if self.Peak == 'struct': self.Consume('struct') self.Consume('{') mems = [] while self.Peak != '}': mem_t = self.parseTypeSpec() mem_n = self.Consume('ID').val mems.append(StructField(mem_n, mem_t)) while self.hasConsumed(','): mem_n = self.Consume('ID').val mems.append(StructField(mem_n, mem_t)) self.Consume(';') self.Consume('}') theT = StructureType(mems) else: theT = self.parseDesignator() # Check for pointer suffix: while self.hasConsumed('*'): theT = PointerType(theT) return theT def parseTypeDef(self): self.Consume('type') newtype = self.parseTypeSpec() typename = self.Consume('ID') self.Consume(';') df = DefinedType(typename.val, newtype, typename.loc) self.addDeclaration(df) # Variable declarations: def parseVarDef(self): self.Consume('var') t = self.parseTypeSpec() def parseVar(): name = self.Consume('ID') v = Variable(name.val, t) v.loc = name.loc if self.hasConsumed('='): v.ival = self.Expression() self.addDeclaration(v) parseVar() while self.hasConsumed(','): parseVar() self.Consume(';') def parseConstDef(self): self.Consume('const') t = self.parseTypeSpec() def parseConst(): name = self.Consume('ID') self.Consume('=') val = self.Expression() c = Constant(name.val, t, val) c.loc = name.loc parseConst() while self.hasConsumed(','): parseConst() self.Consume(';') # Procedures def parseFunctionDef(self): loc = self.Consume('function').loc returntype = self.parseTypeSpec() fname = self.Consume('ID').val f = Function(fname, loc) self.addDeclaration(f) savePart = self.currentPart self.currentPart = f self.Consume('(') parameters = [] if not self.hasConsumed(')'): def parseParameter(): typ = self.parseTypeSpec() name = self.Consume('ID') param = FormalParameter(name.val, typ) param.loc = name.loc self.addDeclaration(param) parameters.append(param) parseParameter() while self.hasConsumed(','): parseParameter() self.Consume(')') paramtypes = [p.typ for p in parameters] f.typ = FunctionType(paramtypes, returntype) f.body = self.parseCompoundStatement() self.currentPart = savePart # Statements: def parseIfStatement(self): loc = self.Consume('if').loc self.Consume('(') condition = self.Expression() self.Consume(')') yes = self.parseCompoundStatement() if self.hasConsumed('else'): no = self.parseCompoundStatement() else: no = None return IfStatement(condition, yes, no, loc) def parseWhileStatement(self): loc = self.Consume('while').loc self.Consume('(') condition = self.Expression() self.Consume(')') statements = self.parseCompoundStatement() return WhileStatement(condition, statements, loc) def parseReturnStatement(self): loc = self.Consume('return').loc if self.Peak == ';': expr = Literal(0, loc) else: expr = self.Expression() self.Consume(';') return ReturnStatement(expr, loc) def parseCompoundStatement(self): self.Consume('{') statements = [] while not self.hasConsumed('}'): s = self.Statement() if s is None: continue statements.append(s) return CompoundStatement(statements) def Statement(self): # Determine statement type based on the pending token: if self.Peak == 'if': return self.parseIfStatement() elif self.Peak == 'while': return self.parseWhileStatement() elif self.Peak == '{': return self.parseCompoundStatement() elif self.hasConsumed(';'): pass elif self.Peak == 'var': self.parseVarDef() elif self.Peak == 'return': return self.parseReturnStatement() else: return self.AssignmentOrCall() def AssignmentOrCall(self): x = self.UnaryExpression() if self.Peak == '=': # We enter assignment mode here. loc = self.Consume('=').loc rhs = self.Expression() return Assignment(x, rhs, loc) else: return ExpressionStatement(x, x.loc) # Expression section: # We not implement these C constructs: # a(2), f = 2 # and this: # a = 2 < x : 4 ? 1; def Expression(self): exp = self.LogicalAndExpression() while self.Peak == 'or': loc = self.Consume('or').loc e2 = self.LogicalAndExpression() exp = Binop(exp, 'or', e2, loc) return exp def LogicalAndExpression(self): o = self.EqualityExpression() while self.Peak == 'and': loc = self.Consume('and').loc o2 = self.EqualityExpression() o = Binop(o, 'and', o2, loc) return o def EqualityExpression(self): ee = self.SimpleExpression() while self.Peak in ['<', '==', '>', '>=', '<=', '!=']: op = self.Consume(self.Peak) ee2 = self.SimpleExpression() ee = Binop(ee, op.typ, ee2, op.loc) return ee def SimpleExpression(self): """ Shift operations before + and - ? """ e = self.AddExpression() while self.Peak in ['>>', '<<']: op = self.Consume(self.Peak) e2 = self.AddExpression() e = Binop(e, op.typ, e2, op.loc) return e def AddExpression(self): e = self.Term() while self.Peak in ['+', '-']: op = self.Consume(self.Peak) e2 = self.Term() e = Binop(e, op.typ, e2, op.loc) return e def Term(self): t = self.BitwiseOr() while self.Peak in ['*', '/']: op = self.Consume(self.Peak) t2 = self.BitwiseOr() t = Binop(t, op.typ, t2, op.loc) return t def BitwiseOr(self): a = self.BitwiseAnd() while self.Peak in ['|']: op = self.Consume(self.Peak) b = self.BitwiseAnd() a = Binop(a, op.typ, b, op.loc) return a def BitwiseAnd(self): a = self.CastExpression() while self.Peak in ['&']: op = self.Consume(self.Peak) b = self.CastExpression() a = Binop(a, op.typ, b, op.loc) return a # Domain of unary expressions: def CastExpression(self): """ the C-style type cast conflicts with '(' expr ')' so introduce extra keyword 'cast' """ if self.Peak == 'cast': loc = self.Consume('cast').loc self.Consume('<') t = self.parseTypeSpec() self.Consume('>') self.Consume('(') ce = self.Expression() self.Consume(')') return TypeCast(t, ce, loc) else: return self.UnaryExpression() def UnaryExpression(self): if self.Peak in ['&', '*']: op = self.Consume(self.Peak) ce = self.CastExpression() if op.val == '*': return Deref(ce, op.loc) else: return Unop(op.typ, ce, op.loc) else: return self.PostFixExpression() def PostFixExpression(self): pfe = self.PrimaryExpression() while self.Peak in ['[', '(', '.', '->']: if self.hasConsumed('['): pass elif self.hasConsumed('('): # Function call args = [] if not self.hasConsumed(')'): args.append(self.Expression()) while self.hasConsumed(','): args.append(self.Expression()) self.Consume(')') pfe = FunctionCall(pfe, args, pfe.loc) elif self.hasConsumed('->'): field = self.Consume('ID') pfe = Deref(pfe, pfe.loc) pfe = FieldRef(pfe, field.val, field.loc) elif self.hasConsumed('.'): field = self.Consume('ID') pfe = FieldRef(pfe, field.val, field.loc) else: raise Exception() return pfe def PrimaryExpression(self): if self.hasConsumed('('): e = self.Expression() self.Consume(')') return e elif self.Peak == 'NUMBER': val = self.Consume('NUMBER') return Literal(val.val, val.loc) elif self.Peak == 'REAL': val = self.Consume('REAL') return Literal(val.val, val.loc) elif self.Peak == 'true': val = self.Consume('true') return Literal(True, val.loc) elif self.Peak == 'false': val = self.Consume('false') return Literal(False, val.loc) elif self.Peak == 'ID': d = self.parseDesignator() return VariableUse(d, d.loc) self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))