diff python/ppci/c3/parser.py @ 300:158068af716c

yafm
author Windel Bouwman
date Tue, 03 Dec 2013 18:00:22 +0100
parents python/c3/parser.py@9417caea2eb3
children 6753763d3bec
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/ppci/c3/parser.py	Tue Dec 03 18:00:22 2013 +0100
@@ -0,0 +1,414 @@
+import logging
+from .lexer import Lexer
+from .astnodes import FieldRef, Literal, TypeCast, Unop, Binop
+from .astnodes import Assignment, ExpressionStatement, CompoundStatement
+from .astnodes import ReturnStatement, WhileStatement, IfStatement
+from .astnodes import FunctionType, Function, FormalParameter
+from .astnodes import StructureType, DefinedType, PointerType
+from .astnodes import Constant, Variable
+from .astnodes import StructField, Deref
+from .astnodes import Package, ImportDesignator
+from .astnodes import Designator, VariableUse, FunctionCall
+from ppci import CompilerError
+
+
+class Parser:
+    """ Parses sourcecode into an abstract syntax tree (AST) """
+    def __init__(self, diag):
+        self.logger = logging.getLogger('c3')
+        self.diag = diag
+        self.lexer = Lexer(diag)
+
+    def parseSource(self, source):
+        self.logger.info('Parsing source')
+        self.initLex(source)
+        try:
+            self.parsePackage()
+            return self.mod
+        except CompilerError as e:
+            self.diag.addDiag(e)
+
+    def Error(self, msg):
+        raise CompilerError(msg, self.token.loc)
+
+    # Lexer helpers:
+    def Consume(self, typ):
+        if self.Peak == typ:
+            return self.NextToken()
+        else:
+            self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak))
+
+    @property
+    def Peak(self):
+        return self.token.typ
+
+    @property
+    def CurLoc(self):
+        return self.token.loc
+
+    def hasConsumed(self, typ):
+        if self.Peak == typ:
+            self.Consume(typ)
+            return True
+        return False
+
+    def NextToken(self):
+        t = self.token
+        if t.typ != 'END':
+            self.token = self.tokens.__next__()
+        return t
+
+    def initLex(self, source):
+        self.tokens = self.lexer.tokenize(source)
+        self.token = self.tokens.__next__()
+
+    def addDeclaration(self, decl):
+        self.currentPart.declarations.append(decl)
+
+    def parseImport(self):
+        self.Consume('import')
+        name = self.Consume('ID').val
+        self.mod.imports.append(name)
+        self.Consume(';')
+
+    def parsePackage(self):
+        self.Consume('module')
+        name = self.Consume('ID')
+        self.Consume(';')
+        self.mod = Package(name.val, name.loc)
+        self.currentPart = self.mod
+        while self.Peak != 'END':
+            self.parseTopLevel()
+        self.Consume('END')
+
+    def parseTopLevel(self):
+        if self.Peak == 'function':
+            self.parseFunctionDef()
+        elif self.Peak == 'var':
+            self.parseVarDef()
+        elif self.Peak == 'const':
+            self.parseConstDef()
+        elif self.Peak == 'type':
+            self.parseTypeDef()
+        elif self.Peak == 'import':
+            self.parseImport()
+        else:
+            self.Error('Expected function, var, const or type')
+
+    def parseDesignator(self):
+        """ A designator designates an object """
+        name = self.Consume('ID')
+        if self.hasConsumed(':'):
+            name2 = self.Consume('ID')
+            return ImportDesignator(name.val, name2.val, name.loc)
+        else:
+            return Designator(name.val, name.loc)
+
+    # Type system
+    def parseTypeSpec(self):
+        # For now, do simple type spec, just parse an ID:
+        #return self.parseDesignator()
+        if self.Peak == 'struct':
+            self.Consume('struct')
+            self.Consume('{')
+            mems = []
+            while self.Peak != '}':
+                mem_t = self.parseTypeSpec()
+                mem_n = self.Consume('ID').val
+                mems.append(StructField(mem_n, mem_t))
+                while self.hasConsumed(','):
+                    mem_n = self.Consume('ID').val
+                    mems.append(StructField(mem_n, mem_t))
+                self.Consume(';')
+            self.Consume('}')
+            theT = StructureType(mems)
+        else:
+            theT = self.parseDesignator()
+        # Check for pointer suffix:
+        while self.hasConsumed('*'):
+            theT = PointerType(theT)
+        return theT
+
+    def parseTypeDef(self):
+        self.Consume('type')
+        newtype = self.parseTypeSpec()
+        typename = self.Consume('ID')
+        self.Consume(';')
+        df = DefinedType(typename.val, newtype, typename.loc)
+        self.addDeclaration(df)
+
+    # Variable declarations:
+    def parseVarDef(self):
+        self.Consume('var')
+        t = self.parseTypeSpec()
+
+        def parseVar():
+            name = self.Consume('ID')
+            v = Variable(name.val, t)
+            v.loc = name.loc
+            if self.hasConsumed('='):
+                v.ival = self.Expression()
+            self.addDeclaration(v)
+        parseVar()
+        while self.hasConsumed(','):
+            parseVar()
+        self.Consume(';')
+
+    def parseConstDef(self):
+        self.Consume('const')
+        t = self.parseTypeSpec()
+
+        def parseConst():
+            name = self.Consume('ID')
+            self.Consume('=')
+            val = self.Expression()
+            c = Constant(name.val, t, val)
+            c.loc = name.loc
+        parseConst()
+        while self.hasConsumed(','):
+            parseConst()
+        self.Consume(';')
+
+    # Procedures
+    def parseFunctionDef(self):
+        loc = self.Consume('function').loc
+        returntype = self.parseTypeSpec()
+        fname = self.Consume('ID').val
+        f = Function(fname, loc)
+        self.addDeclaration(f)
+        savePart = self.currentPart
+        self.currentPart = f
+        self.Consume('(')
+        parameters = []
+        if not self.hasConsumed(')'):
+            def parseParameter():
+                typ = self.parseTypeSpec()
+                name = self.Consume('ID')
+                param = FormalParameter(name.val, typ)
+                param.loc = name.loc
+                self.addDeclaration(param)
+                parameters.append(param)
+            parseParameter()
+            while self.hasConsumed(','):
+                parseParameter()
+            self.Consume(')')
+        paramtypes = [p.typ for p in parameters]
+        f.typ = FunctionType(paramtypes, returntype)
+        f.body = self.parseCompoundStatement()
+        self.currentPart = savePart
+
+    # Statements:
+
+    def parseIfStatement(self):
+        loc = self.Consume('if').loc
+        self.Consume('(')
+        condition = self.Expression()
+        self.Consume(')')
+        yes = self.parseCompoundStatement()
+        if self.hasConsumed('else'):
+            no = self.parseCompoundStatement()
+        else:
+            no = None
+        return IfStatement(condition, yes, no, loc)
+
+    def parseWhileStatement(self):
+        loc = self.Consume('while').loc
+        self.Consume('(')
+        condition = self.Expression()
+        self.Consume(')')
+        statements = self.parseCompoundStatement()
+        return WhileStatement(condition, statements, loc)
+
+    def parseReturnStatement(self):
+        loc = self.Consume('return').loc
+        if self.Peak == ';':
+            expr = Literal(0, loc)
+        else:
+            expr = self.Expression()
+        self.Consume(';')
+        return ReturnStatement(expr, loc)
+
+    def parseCompoundStatement(self):
+        self.Consume('{')
+        statements = []
+        while not self.hasConsumed('}'):
+            s = self.Statement()
+            if s is None:
+                continue
+            statements.append(s)
+        return CompoundStatement(statements)
+
+    def Statement(self):
+        # Determine statement type based on the pending token:
+        if self.Peak == 'if':
+            return self.parseIfStatement()
+        elif self.Peak == 'while':
+            return self.parseWhileStatement()
+        elif self.Peak == '{':
+            return self.parseCompoundStatement()
+        elif self.hasConsumed(';'):
+            pass
+        elif self.Peak == 'var':
+            self.parseVarDef()
+        elif self.Peak == 'return':
+            return self.parseReturnStatement()
+        else:
+            return self.AssignmentOrCall()
+
+    def AssignmentOrCall(self):
+        x = self.UnaryExpression()
+        if self.Peak == '=':
+            # We enter assignment mode here.
+            loc = self.Consume('=').loc
+            rhs = self.Expression()
+            return Assignment(x, rhs, loc)
+        else:
+            return ExpressionStatement(x, x.loc)
+
+    # Expression section:
+    # We not implement these C constructs:
+    # a(2), f = 2
+    # and this:
+    # a = 2 < x : 4 ? 1;
+
+    def Expression(self):
+        exp = self.LogicalAndExpression()
+        while self.Peak == 'or':
+            loc = self.Consume('or').loc
+            e2 = self.LogicalAndExpression()
+            exp = Binop(exp, 'or', e2, loc)
+        return exp
+
+    def LogicalAndExpression(self):
+        o = self.EqualityExpression()
+        while self.Peak == 'and':
+            loc = self.Consume('and').loc
+            o2 = self.EqualityExpression()
+            o = Binop(o, 'and', o2, loc)
+        return o
+
+    def EqualityExpression(self):
+        ee = self.SimpleExpression()
+        while self.Peak in ['<', '==', '>', '>=', '<=', '!=']:
+            op = self.Consume(self.Peak)
+            ee2 = self.SimpleExpression()
+            ee = Binop(ee, op.typ, ee2, op.loc)
+        return ee
+
+    def SimpleExpression(self):
+        """ Shift operations before + and - ? """
+        e = self.AddExpression()
+        while self.Peak in ['>>', '<<']:
+            op = self.Consume(self.Peak)
+            e2 = self.AddExpression()
+            e = Binop(e, op.typ, e2, op.loc)
+        return e
+
+    def AddExpression(self):
+        e = self.Term()
+        while self.Peak in ['+', '-']:
+            op = self.Consume(self.Peak)
+            e2 = self.Term()
+            e = Binop(e, op.typ, e2, op.loc)
+        return e
+
+    def Term(self):
+        t = self.BitwiseOr()
+        while self.Peak in ['*', '/']:
+            op = self.Consume(self.Peak)
+            t2 = self.BitwiseOr()
+            t = Binop(t, op.typ, t2, op.loc)
+        return t
+
+    def BitwiseOr(self):
+        a = self.BitwiseAnd()
+        while self.Peak in ['|']:
+            op = self.Consume(self.Peak)
+            b = self.BitwiseAnd()
+            a = Binop(a, op.typ, b, op.loc)
+        return a
+
+    def BitwiseAnd(self):
+        a = self.CastExpression()
+        while self.Peak in ['&']:
+            op = self.Consume(self.Peak)
+            b = self.CastExpression()
+            a = Binop(a, op.typ, b, op.loc)
+        return a
+
+    # Domain of unary expressions:
+
+    def CastExpression(self):
+        """
+          the C-style type cast conflicts with '(' expr ')'
+          so introduce extra keyword 'cast'
+        """
+        if self.Peak == 'cast':
+            loc = self.Consume('cast').loc
+            self.Consume('<')
+            t = self.parseTypeSpec()
+            self.Consume('>')
+            self.Consume('(')
+            ce = self.Expression()
+            self.Consume(')')
+            return TypeCast(t, ce, loc)
+        else:
+            return self.UnaryExpression()
+
+    def UnaryExpression(self):
+        if self.Peak in ['&', '*']:
+            op = self.Consume(self.Peak)
+            ce = self.CastExpression()
+            if op.val == '*':
+                return Deref(ce, op.loc)
+            else:
+                return Unop(op.typ, ce, op.loc)
+        else:
+            return self.PostFixExpression()
+
+    def PostFixExpression(self):
+        pfe = self.PrimaryExpression()
+        while self.Peak in ['[', '(', '.', '->']:
+            if self.hasConsumed('['):
+                pass
+            elif self.hasConsumed('('):
+                # Function call
+                args = []
+                if not self.hasConsumed(')'):
+                    args.append(self.Expression())
+                    while self.hasConsumed(','):
+                        args.append(self.Expression())
+                    self.Consume(')')
+                pfe = FunctionCall(pfe, args, pfe.loc)
+            elif self.hasConsumed('->'):
+                field = self.Consume('ID')
+                pfe = Deref(pfe, pfe.loc)
+                pfe = FieldRef(pfe, field.val, field.loc)
+            elif self.hasConsumed('.'):
+                field = self.Consume('ID')
+                pfe = FieldRef(pfe, field.val, field.loc)
+            else:
+                raise Exception()
+        return pfe
+
+    def PrimaryExpression(self):
+        if self.hasConsumed('('):
+            e = self.Expression()
+            self.Consume(')')
+            return e
+        elif self.Peak == 'NUMBER':
+            val = self.Consume('NUMBER')
+            return Literal(val.val, val.loc)
+        elif self.Peak == 'REAL':
+            val = self.Consume('REAL')
+            return Literal(val.val, val.loc)
+        elif self.Peak == 'true':
+            val = self.Consume('true')
+            return Literal(True, val.loc)
+        elif self.Peak == 'false':
+            val = self.Consume('false')
+            return Literal(False, val.loc)
+        elif self.Peak == 'ID':
+            d = self.parseDesignator()
+            return VariableUse(d, d.loc)
+        self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))