view python/c3/parser.py @ 213:003c8a976fff

Merge of semantics and parser again ..
author Windel Bouwman
date Fri, 05 Jul 2013 11:18:48 +0200
parents 46d62dadd61b
children c1ccb1cb4cef
line wrap: on
line source

from . import astnodes, lexer
from ppci import CompilerError

# binop precedence for expressions:
binopPrecs = {'or': 5, 'and': 10, \
   '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \
   '+': 30, '-': 30, '*': 40, '/': 40 }

class Parser:
    """ Parses sourcecode into an abstract syntax tree (AST) """
    def __init__(self, diag):
        self.diag = diag

    def parseSource(self, source):
      self.initLex(source)
      try:
         self.parsePackage()
         return self.mod
      except CompilerError as e:
         self.diag.addDiag(e)
    def Error(self, msg):
      raise CompilerError(msg, self.token.loc)
    # Lexer helpers:
    def Consume(self, typ):
      if self.Peak == typ:
         return self.NextToken()
      else:
         self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak))
    @property
    def Peak(self):
      return self.token.typ
    @property
    def PeakPrec(self):
      if self.Peak in binopPrecs:
         return binopPrecs[self.Peak]
      return -1
    def hasConsumed(self, typ):
      if self.Peak == typ:
         self.Consume(typ)
         return True
      return False

    def NextToken(self):
      t = self.token
      if t.typ != 'END':
         self.token = self.tokens.__next__()
      return t

    def initLex(self, source):
      self.tokens = lexer.tokenize(source) # Lexical stage
      self.token = self.tokens.__next__()
    
    def parseUses(self):
        pass

    def parsePackage(self):
      self.Consume('package')
      name = self.Consume('ID')
      self.Consume(';')
      self.mod = astnodes.Package(name.val, name.loc)
      self.parseUses()
      # TODO: parse uses
      while self.Peak != 'END':
         self.parseTopLevel()
      self.Consume('END')

    def parseTopLevel(self):
      if self.Peak == 'function':
         self.parseFunctionDef()
      elif self.Peak == 'var':
         self.parseVarDef()
      elif self.Peak == 'const':
         self.parseConstDef()
      elif self.Peak == 'type':
         self.parseTypeDef()
      else:
         self.Error('Expected function, var, const or type')

    def parseDesignator(self):
      """ A designator designates an object """
      name = self.Consume('ID')
      d = astnodes.Designator(name.val, name.loc)
      return d

    # Type system
    def parseTypeSpec(self):
        # For now, do simple type spec, just parse an ID:
        return self.parseDesignator()
        if self.Peak == 'struct':
            self.Consume('struct')
            self.Consume('{')
            mems = []
            while self.Peak != '}':
                mem_t = self.parseTypeSpec()
                mem_n = self.Consume('ID')
                mems.append((mem_t, mem_n))
                while self.hasConsumed(','):
                    mem_n = self.Consume('ID')
                    mems.append((mem_t, mem_n))
                self.Consume(';')
            self.Consume('}')
            theT = astnodes.StructureType(mems)
        else:
            theT = self.parseDesignator()
        # Check for pointer suffix:
        while self.hasConsumed('*'):
            theT = astnodes.PointerType(theT)
        return theT

    def parseTypeDef(self):
        self.Consume('type')
        newtype = self.parseTypeSpec()
        typename = self.Consume('ID')
        # TODO: action here :)
        self.Consume(';')
        return astnodes.DefinedType(typename, newtype)

    # Variable declarations:
    def parseVarDef(self):
      self.Consume('var')
      t = self.parseTypeSpec()
      def parseVar():
         name = self.Consume('ID')
         v = astnodes.Variable(name.val, t)
         v.loc = name.loc
         if self.hasConsumed('='):
            v.ival = self.parseExpression()
      parseVar()
      while self.hasConsumed(','):
         parseVar()
      self.Consume(';')

    def parseConstDef(self):
      self.Consume('const')
      t = self.parseTypeSpec()
      def parseConst():
         name = self.Consume('ID')
         self.Consume('=')
         val = self.parseExpression()
         c = astnodes.Constant(name.val, t, val)
         c.loc = name.loc
      parseConst()
      while self.hasConsumed(','):
         parseConst()
      self.Consume(';')
      
    # Procedures
    def parseFunctionDef(self):
      loc = self.Consume('function').loc
      returntype = self.parseTypeSpec()
      fname = self.Consume('ID').val
      f = astnodes.Function(fname, loc)
      self.Consume('(')
      parameters = []
      if not self.hasConsumed(')'):
         def parseParameter():
            typ = self.parseTypeSpec()
            name = self.Consume('ID')
            param = astnodes.Variable(name.val, typ)
            param.loc = name.loc
            parameters.append(param)
         parseParameter()
         while self.hasConsumed(','):
            parseParameter()
         self.Consume(')')
      body = self.parseCompoundStatement()

    # Statements:
    def parseAssignment(self, lval):
      lval = astnodes.VariableUse(lval, lval.loc)
      loc = self.Consume('=').loc
      rval = self.parseExpression()
      self.Consume(';')
      return astnodes.Assignment(lval, rval, loc)

    def parseCall(self, func):
      self.Consume('(')
      args = [] 
      if not self.hasConsumed(')'):
         args.append(self.parseExpression())
         while self.hasConsumed(','):
            args.append(self.parseExpression())
         self.Consume(')')
      return astnodes.FunctionCall(func, args, func.loc)

    def parseIfStatement(self):
      loc = self.Consume('if').loc
      self.Consume('(')
      condition = self.parseExpression()
      self.Consume(')')
      yes = self.parseCompoundStatement()
      if self.hasConsumed('else'):
         no = self.parseCompoundStatement()
      else:
         no = astnodes.EmptyStatement()
      return astnodes.IfStatement(condition, yes, no, loc)

    def parseWhileStatement(self):
      loc = self.Consume('while').loc
      self.Consume('(')
      condition = self.parseExpression()
      self.Consume(')')
      statements = self.parseCompoundStatement()
      return astnodes.WhileStatement(condition, statements, loc)

    def parseReturnStatement(self):
      self.Consume('return')
      expr = self.parseExpression()
      self.Consume(';')
      return astnodes.ReturnStatement(expr)

    def parseCompoundStatement(self):
      self.Consume('{')
      statements = []
      while not self.hasConsumed('}'):
         s = self.parseStatement()
         if not type(s) is astnodes.EmptyStatement:
            statements.append(s)
      return astnodes.CompoundStatement(statements)

    def parseStatement(self):
      # Determine statement type based on the pending token:
      if self.Peak == 'if':
         return self.parseIfStatement()
      elif self.Peak == 'while':
         return self.parseWhileStatement()
      elif self.Peak == '{':
         return self.parseCompoundStatement()
      elif self.hasConsumed(';'):
         return astnodes.EmptyStatement()
      elif self.Peak == 'var':
         self.parseVarDef()
         return astnodes.EmptyStatement()
      elif self.Peak == 'return':
         return self.parseReturnStatement()
      else:
         designator = self.parseDesignator()
         if self.Peak == '(':
            return self.parseCall(designator)
         elif self.Peak == '=':
            return self.parseAssignment(designator)
         else:
              self.Error('Unable to determine statement')

    # Parsing expressions:
    def parseExpression(self):
      return self.parseBinopRhs(self.parsePrimary(), 0)

    def parsePrimary(self):
      if self.hasConsumed('('):
         e = self.parseExpression()
         self.Consume(')')
         return e
      elif self.Peak == 'NUMBER':
         val = self.Consume('NUMBER')
         return astnodes.Literal(val.val, val.loc)
      elif self.Peak == 'REAL':
         val = self.Consume('REAL')
         return astnodes.Literal(val.val, val.loc)
      elif self.Peak == 'true':
         val = self.Consume('true')
         return astnodes.Literal(True, val.loc)
      elif self.Peak == 'false':
         val = self.Consume('false')
         return astnodes.Literal(False, val.loc)
      elif self.Peak == 'ID':
         d = self.parseDesignator()
         if self.Peak == '(':
            return self.parseCall(d)
         else:
            return astnodes.VariableUse(d, d.loc)
      self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))

    def parseBinopRhs(self, lhs, min_prec):
      while self.PeakPrec >= min_prec:
         op_prec = self.PeakPrec
         op = self.Consume(self.Peak)
         rhs = self.parsePrimary()
         while self.PeakPrec > op_prec:
            rhs = self.parseBinopRhs(rhs, self.PeakPrec)
         lhs = astnodes.Binop(lhs, op.typ, rhs, op.loc)
      return lhs