view python/c3/parser.py @ 169:ee0d30533dae

Added more tests and improved the diagnostic update
author Windel Bouwman
date Sat, 23 Mar 2013 18:34:41 +0100
parents 0b5b2ee6b435
children 46d62dadd61b
line wrap: on
line source

from . import astnodes, lexer, semantics
from ppci import CompilerError

# binop precedence for expressions:
binopPrecs = {'or': 5, 'and': 10, \
   '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \
   '+': 30, '-': 30, '*': 40, '/': 40 }

class Parser:
   """ Parses sourcecode into an abstract syntax tree (AST) """
   def __init__(self, diag):
      self.sema = semantics.Semantics(diag)
      self.diag = diag
   def parseSource(self, source):
      self.initLex(source)
      self.sema.reinit()
      try:
         self.parsePackage()
      except CompilerError as e:
         self.diag.addDiag(e)
      return self.sema.mod
   def Error(self, msg):
      raise CompilerError(msg, self.token.loc)
   # Lexer helpers:
   def Consume(self, typ):
      if self.Peak == typ:
         return self.NextToken()
      else:
         self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak))
   @property
   def Peak(self):
      return self.token.typ
   @property
   def PeakPrec(self):
      if self.Peak in binopPrecs:
         return binopPrecs[self.Peak]
      return -1
   def hasConsumed(self, typ):
      if self.Peak == typ:
         self.Consume(typ)
         return True
      return False
   def NextToken(self):
      t = self.token
      if t.typ != 'END':
         self.token = self.tokens.__next__()
      return t
   def initLex(self, source):
      self.tokens = lexer.tokenize(source) # Lexical stage
      self.token = self.tokens.__next__()
   def skipToSemi(self, tt):
      while self.Peak != tt and self.Peak != 'END':
         self.NextToken()
      if self.Peak == tt:
         self.Consume(tt)
      
   def parsePackage(self):
      self.Consume('package')
      name = self.Consume('ID')
      self.Consume(';')
      self.sema.handlePackage(name.val, name.loc)
      # TODO: parse uses
      while self.Peak != 'END':
         self.parseTopLevel()
      self.Consume('END')

   def parseTopLevel(self):
      if self.Peak == 'function':
         self.parseFunctionDefinition()
      elif self.Peak == 'var':
         self.parseVarDef()
      elif self.Peak == 'const':
         self.parseConstDef()
      else:
         self.Error('Expected function or variable')

   def parseDesignator(self):
      """ A designator designates an object """
      name = self.Consume('ID')
      return self.sema.actOnDesignator(name.val, name.loc)

   # Type system
   def parseType(self):
      d = self.parseDesignator()
      return d

   # Variable declarations:
   def parseVarDef(self):
      self.Consume('var')
      t = self.parseType()
      def parseVar():
         name = self.Consume('ID')
         ival = None
         if self.hasConsumed('='):
            ival = self.parseExpression()
         self.sema.actOnVarDef(name.val, name.loc, t, ival)
      parseVar()
      while self.hasConsumed(','):
         parseVar()
      self.Consume(';')

   def parseConstDef(self):
      self.Consume('const')
      t = self.parseType()
      def parseConst():
         name = self.Consume('ID')
         self.Consume('=')
         val = self.parseExpression()
         self.sema.actOnConstDef(name.val, name.loc, t, val)
      parseConst()
      while self.hasConsumed(','):
         parseConst()
      self.Consume(';')
      
   # Procedures
   def parseFunctionDefinition(self):
      self.Consume('function')
      returntype = self.parseType()
      pname = self.Consume('ID')
      self.sema.actOnFuncDef1(pname.val, pname.loc)
      self.Consume('(')
      parameters = []
      if not self.hasConsumed(')'):
         def parseParameter():
            typ = self.parseType()
            name = self.Consume('ID')
            parameters.append(self.sema.actOnParameter(name.val, name.loc, typ))
         parseParameter()
         while self.hasConsumed(','):
            parseParameter()
         self.Consume(')')
      body = self.parseCompoundStatement()
      self.sema.actOnFuncDef2(parameters, returntype, body)

   # Statements:
   def parseAssignment(self, lval):
      lval = self.sema.actOnVariableUse(lval, lval.loc)
      loc = self.Consume('=').loc
      rval = self.parseExpression()
      self.Consume(';')
      return self.sema.actOnAssignment(lval, rval, loc)

   def parseProcedureCall(self, func):
      self.Consume('(')
      args = [] 
      if not self.hasConsumed(')'):
         args.append(self.parseExpression())
         while self.hasConsumed(','):
            args.append(self.parseExpression())
         self.Consume(')')
      return self.sema.actOnFunctionCall(func, args, func.loc)

   def parseIfStatement(self):
      loc = self.Consume('if').loc
      self.Consume('(')
      condition = self.parseExpression()
      self.Consume(')')
      yes = self.parseCompoundStatement()
      if self.hasConsumed('else'):
         no = self.parseCompoundStatement()
      else:
         no = astnodes.EmptyStatement()
      return self.sema.actOnIfStatement(condition, yes, no, loc)

   def parseWhileStatement(self):
      self.Consume('while')
      self.Consume('(')
      condition = self.parseExpression()
      self.Consume(')')
      statements = self.parseCompoundStatement()
      return astnodes.WhileStatement(condition, statements)

   def parseReturnStatement(self):
      self.Consume('return')
      expr = self.parseExpression()
      self.Consume(';')
      return astnodes.ReturnStatement(expr)

   def parseCompoundStatement(self):
      self.Consume('{')
      statements = []
      while not self.hasConsumed('}'):
         s = self.parseStatement()
         if not type(s) is astnodes.EmptyStatement:
            statements.append(s)
      return astnodes.CompoundStatement(statements)

   def parseStatement(self):
      # Determine statement type based on the pending token:
      if self.Peak == 'if':
         return self.parseIfStatement()
      elif self.Peak == 'while':
         return self.parseWhileStatement()
      elif self.Peak == '{':
         return self.parseCompoundStatement()
      elif self.hasConsumed(';'):
         return astnodes.EmptyStatement()
      elif self.Peak == 'var':
         self.parseVarDef()
         return astnodes.EmptyStatement()
      elif self.Peak == 'return':
         return self.parseReturnStatement()
      elif self.Peak == 'ID':
         designator = self.parseDesignator()
         if self.Peak == '(':
            return self.parseProcedureCall(designator)
         elif self.Peak == '=':
            return self.parseAssignment(designator)
      self.Error('Unable to determine statement')

   # Parsing expressions:
   def parseExpression(self):
      return self.parseBinopRhs(self.parsePrimary(), 0)
   def parsePrimary(self):
      if self.hasConsumed('('):
         e = self.parseExpression()
         self.Consume(')')
         return e
      elif self.Peak == 'NUMBER':
         val = self.Consume('NUMBER')
         return self.sema.actOnNumber(val.val, val.loc)
      elif self.Peak == 'REAL':
         val = self.Consume('REAL')
         return self.sema.actOnNumber(val.val, val.loc)
      elif self.Peak == 'true':
         val = self.Consume('true')
         return self.sema.actOnNumber(True, val.loc)
      elif self.Peak == 'false':
         val = self.Consume('false')
         return self.sema.actOnNumber(False, val.loc)
      elif self.Peak == 'ID':
         d = self.parseDesignator()
         if self.Peak == '(':
            return self.parseProcedureCall(d)
         else:
            return self.sema.actOnVariableUse(d, d.loc)
      self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak))

   def parseBinopRhs(self, lhs, min_prec):
      while self.PeakPrec >= min_prec:
         op_prec = self.PeakPrec
         op = self.Consume(self.Peak)
         rhs = self.parsePrimary()
         while self.PeakPrec > op_prec:
            rhs = self.parseBinopRhs(rhs, self.PeakPrec)
         lhs = self.sema.actOnBinop(lhs, op.typ, rhs, op.loc)
      return lhs