view python/c3/parser.py @ 220:3f6c30a5d234

Major change in expression parsing to enable pointers and structs
author Windel Bouwman
date Sat, 06 Jul 2013 21:32:20 +0200
parents 1fa3e0050b49
children 848c4b15fd0b
line wrap: on
line source

from . import astnodes, lexer
from ppci import CompilerError

class Parser:
    """ Parses sourcecode into an abstract syntax tree (AST) """
    def __init__(self, diag):
        self.diag = diag

    def parseSource(self, source):
      self.initLex(source)
      try:
         self.parsePackage()
         return self.mod
      except CompilerError as e:
         self.diag.addDiag(e)

    def Error(self, msg):
        raise CompilerError(msg, self.token.loc)

    # Lexer helpers:
    def Consume(self, typ):
        if self.Peak == typ:
            return self.NextToken()
        else:
            self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak))

    @property
    def Peak(self):
        return self.token.typ

    def hasConsumed(self, typ):
        if self.Peak == typ:
            self.Consume(typ)
            return True
        return False

    def NextToken(self):
        t = self.token
        if t.typ != 'END':
            self.token = self.tokens.__next__()
        return t

    def initLex(self, source):
        self.tokens = lexer.tokenize(source) # Lexical stage
        self.token = self.tokens.__next__()

    def addDeclaration(self, decl):
        self.currentPart.declarations.append(decl)
    
    def parseUses(self):
        # TODO: parse uses
        pass

    def parsePackage(self):
        self.Consume('package')
        name = self.Consume('ID')
        self.Consume(';')
        self.mod = astnodes.Package(name.val, name.loc)
        self.currentPart = self.mod
        self.parseUses()
        while self.Peak != 'END':
            self.parseTopLevel()
        self.Consume('END')

    def parseTopLevel(self):
      if self.Peak == 'function':
         self.parseFunctionDef()
      elif self.Peak == 'var':
         self.parseVarDef()
      elif self.Peak == 'const':
         self.parseConstDef()
      elif self.Peak == 'type':
         self.parseTypeDef()
      else:
         self.Error('Expected function, var, const or type')

    def parseDesignator(self):
      """ A designator designates an object """
      name = self.Consume('ID')
      d = astnodes.Designator(name.val, name.loc)
      return d

    # Type system
    def parseTypeSpec(self):
        # For now, do simple type spec, just parse an ID:
        #return self.parseDesignator()
        if self.Peak == 'struct':
            self.Consume('struct')
            self.Consume('{')
            mems = []
            while self.Peak != '}':
                mem_t = self.parseTypeSpec()
                mem_n = self.Consume('ID')
                mems.append((mem_t, mem_n))
                while self.hasConsumed(','):
                    mem_n = self.Consume('ID')
                    mems.append((mem_t, mem_n))
                self.Consume(';')
            self.Consume('}')
            theT = astnodes.StructureType(mems)
        else:
            theT = self.parseDesignator()
        # Check for pointer suffix:
        while self.hasConsumed('*'):
            theT = astnodes.PointerType(theT)
        return theT

    def parseTypeDef(self):
        self.Consume('type')
        newtype = self.parseTypeSpec()
        typename = self.Consume('ID')
        # TODO: action here :)
        self.Consume(';')
        return astnodes.DefinedType(typename, newtype)

    # Variable declarations:
    def parseVarDef(self):
      self.Consume('var')
      t = self.parseTypeSpec()
      def parseVar():
         name = self.Consume('ID')
         v = astnodes.Variable(name.val, t)
         v.loc = name.loc
         if self.hasConsumed('='):
            v.ival = self.Expression()
         self.addDeclaration(v)
      parseVar()
      while self.hasConsumed(','):
         parseVar()
      self.Consume(';')

    def parseConstDef(self):
      self.Consume('const')
      t = self.parseTypeSpec()
      def parseConst():
         name = self.Consume('ID')
         self.Consume('=')
         val = self.Expression()
         c = astnodes.Constant(name.val, t, val)
         c.loc = name.loc
      parseConst()
      while self.hasConsumed(','):
         parseConst()
      self.Consume(';')
      
    # Procedures
    def parseFunctionDef(self):
      loc = self.Consume('function').loc
      returntype = self.parseTypeSpec()
      fname = self.Consume('ID').val
      f = astnodes.Function(fname, loc)
      self.addDeclaration(f)
      savePart = self.currentPart
      self.currentPart = f
      self.Consume('(')
      parameters = []
      if not self.hasConsumed(')'):
         def parseParameter():
            typ = self.parseTypeSpec()
            name = self.Consume('ID')
            param = astnodes.Variable(name.val, typ)
            param.loc = name.loc
            self.addDeclaration(param)
            parameters.append(param)
         parseParameter()
         while self.hasConsumed(','):
            parseParameter()
         self.Consume(')')
      paramtypes = [p.typ for p in parameters]
      f.typ = astnodes.FunctionType(paramtypes, returntype)
      f.body = self.parseCompoundStatement()
      self.currentPart = savePart

    # Statements:

    def parseIfStatement(self):
      loc = self.Consume('if').loc
      self.Consume('(')
      condition = self.Expression()
      self.Consume(')')
      yes = self.parseCompoundStatement()
      if self.hasConsumed('else'):
         no = self.parseCompoundStatement()
      else:
         no = astnodes.EmptyStatement()
      return astnodes.IfStatement(condition, yes, no, loc)

    def parseWhileStatement(self):
        loc = self.Consume('while').loc
        self.Consume('(')
        condition = self.Expression()
        self.Consume(')')
        statements = self.parseCompoundStatement()
        return astnodes.WhileStatement(condition, statements, loc)

    def parseReturnStatement(self):
        loc = self.Consume('return').loc
        expr = self.Expression()
        self.Consume(';')
        return astnodes.ReturnStatement(expr, loc)

    def parseCompoundStatement(self):
        self.Consume('{')
        statements = []
        while not self.hasConsumed('}'):
            s = self.Statement()
            if type(s) is astnodes.EmptyStatement:
                continue
            statements.append(s)
        return astnodes.CompoundStatement(statements)

    def Statement(self):
        # Determine statement type based on the pending token:
        if self.Peak == 'if':
            return self.parseIfStatement()
        elif self.Peak == 'while':
            return self.parseWhileStatement()
        elif self.Peak == '{':
            return self.parseCompoundStatement()
        elif self.hasConsumed(';'):
            return astnodes.EmptyStatement()
        elif self.Peak == 'var':
            self.parseVarDef()
            return astnodes.EmptyStatement()
        elif self.Peak == 'return':
            return self.parseReturnStatement()
        else:
            return self.AssignmentOrCall()

    def AssignmentOrCall(self):
        x = self.UnaryExpression()
        if self.Peak == '=':
            # We enter assignment mode here.
            loc = self.Consume('=').loc
            rhs = self.Expression()
            return astnodes.Assignment(x, rhs, loc)
        else:
            return x

    # Expression section:
    # We not implement these C constructs:
    # a(2), f = 2
    # and this:
    # a = 2 < x : 4 ? 1;

    def Expression(self):
        exp = self.LogicalAndExpression()
        while self.Peak == 'or':
            loc = self.Consume('or').loc
            e2 = self.LogicalAndExpression()
            exp = astnodes.Binop(exp, 'or', e2, loc)
        return exp

    def LogicalAndExpression(self):
        o = self.EqualityExpression()
        while self.Peak == 'and':
            loc = self.Consume('and').loc
            o2 = self.EqualityExpression()
            o = astnodes.Binop(o, 'and', o2, loc)
        return o

    def EqualityExpression(self):
        ee = self.SimpleExpression()
        while self.Peak in ['<', '==', '>']:
            op = self.Consume(self.Peak)
            ee2 = self.SimpleExpression()
            ee = astnodes.Binop(ee, op.typ, ee2, op.loc)
        return ee

    def SimpleExpression(self):
        e = self.Term()
        while self.Peak in ['+', '-']:
            op = self.Consume(self.Peak)
            e2 = self.Term()
            e = astnodes.Binop(e, op.typ, e2, op.loc)
        return e

    def Term(self):
        t = self.Factor()
        while self.Peak in ['*', '/']:
            op = self.Consume(self.Peak)
            t2 = self.Factor()
            t = astnodes.Binop(t, op.typ, t2, op.loc)
        return t
        
    def Factor(self):
        # TODO: eliminate this step?
        return self.CastExpression()

    # Domain of unary expressions:

    def CastExpression(self):
        # TODO: cast conflicts with '(' expr ')'
        if self.Peak == '(ii':
            self.Consume('(')
            print('TODO: implement type cast')
            #rrrrr
            self.parseTypeSpec()

            # Type
            self.Consume(')')
            ce = self.CastExpression()
            return ce 
        else:
            return self.UnaryExpression()
        
    def UnaryExpression(self):
        if self.Peak in ['&', '*']:
            op = self.Consume(self.Peak)
            ce = self.CastExpression()
            return astnodes.Unop(op.typ, ce, op.loc)
        else:
            return self.PostFixExpression()

    def PostFixExpression(self):
        pfe = self.PrimaryExpression()
        while self.Peak in ['[', '(', '.', '->']:
            if self.hasConsumed('['):
                pass
            elif self.hasConsumed('('):
                # Function call
                args = []
                if not self.hasConsumed(')'):
                    args.append(self.Expression())
                    while self.hasConsumed(','):
                        args.append(self.Expression())
                    self.Consume(')')
                pfe = astnodes.FunctionCall(pfe, args, pfe.loc)
            else:
                rrrr
        return pfe

    def PrimaryExpression(self):
        if self.hasConsumed('('):
            e = self.Expression()
            self.Consume(')')
            return e
        elif self.Peak == 'NUMBER':
            val = self.Consume('NUMBER')
            return astnodes.Literal(val.val, val.loc)
        elif self.Peak == 'REAL':
            val = self.Consume('REAL')
            return astnodes.Literal(val.val, val.loc)
        elif self.Peak == 'true':
            val = self.Consume('true')
            return astnodes.Literal(True, val.loc)
        elif self.Peak == 'false':
            val = self.Consume('false')
            return astnodes.Literal(False, val.loc)
        elif self.Peak == 'ID':
            d = self.parseDesignator()
            return astnodes.VariableUse(d, d.loc)
        self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))