view python/pyyacc.py @ 178:c694ec551f34

Added lex yacc test scripts
author Windel Bouwman
date Sat, 04 May 2013 12:07:17 +0200
parents
children 0f3b1adfd416
line wrap: on
line source


class GrammarError(Exception):
   pass

class Grammar:
   def __init__(self, terminals):
      self.terminals = terminals
      self.nonterminals = []
      self.productions = []
      self.states = None
      self.start_symbol = None
   def add_production(self, name, symbols):
      p = Production(name, symbols)
      self.productions.append(p)
      if not name in self.nonterminals:
         self.nonterminals.append(name)
   def set_start(self, start):
      self.start_symbol = start
   def get_start(self):
      return self.start_symbol
   StartSymbol = property(get_start, set_start)
   def productionsForName(self, name):
      return [p for p in self.productions if p.name == name]
   @property
   def Symbols(self):
      return self.nonterminals + self.terminals
   def calcFollow(self, name):
      """ Determines the follow set for a given name """
      f = set()
      # TODO
      return f

   def e_closure(self, S):
      something_changed = True
      while something_changed:
         something_changed = False
         worklist = list(S)
         for item in worklist:
            if item.IsShift and item.Next in self.nonterminals:
               n = item.Next
               for add_p in self.productionsForName(n):
                  i = Item(add_p, 0)
                  if not i in S:
                     S.add(i)
                     something_changed = True
      return frozenset(S)
   def calcInitialItemSet(self):
      nis = set()
      for p in self.productionsForName(self.StartSymbol):
         nis.add(Item(p, 0))
      return self.e_closure(nis)
   def calcNextItemSet(self, itemset, symbol):
      nis = set()
      for item in itemset:
         if item.IsShift and item.Next == symbol:
            nis.add(Item(item.production, item.dotpos + 1))
      return self.e_closure(nis)
   def calcTables(self):
      print(self.nonterminals + self.terminals)
      self.states = set()
      self.goto_table = {}
      self.action_table = {}
      iis = self.calcInitialItemSet()
      self.s0 = iis
      print(len(iis), iis)
      worklist = [iis]
      self.states.add(iis)
      while len(worklist) > 0:
         itemset = worklist.pop(0)
         has_goto = False
         for symbol in self.Symbols:
            nis = self.calcNextItemSet(itemset, symbol)
            if nis:
               self.goto_table[(itemset, symbol)] = nis
               has_goto = True
               if not nis in self.states:
                  worklist.append(nis)
                  self.states.add(nis)
         if has_goto:
            self.action_table[itemset] = 'shift', 's'
            for item in itemset:
               assert item.IsShift, item
         else:
            # Check for reduce-reduce conflicts:
            assert len(itemset) == 1
            item = list(itemset)[0]
            assert item.IsReduce
            self.action_table[itemset] = 'reduce', item.production
      print(self.Symbols)
      print(len(nis), nis, self.states)
      for s, i in zip(self.states, range(len(self.states))):
         print(i, s)
         
   def parse(self, toks):
      stack = [self.s0]
      while stack[-1] != 'input':
         action, p = self.action_table[stack[-1]]
         print(action, p)
         if action == 'shift':
            stack.append(toks.pop(0))
            s, i = stack[-2:]
            stack.append(self.goto_table[(s, i)])
         else:
            for s in p.symbols:
               stack.pop()
               stack.pop()
            stack.append(p.name)
            s, i = stack[-2:]
            stack.append(self.goto_table[(s, i)])

class Production:
   def __init__(self, name, symbols):
      self.name = name
      self.symbols = symbols
   def __repr__(self):
      return '{0} -> {1}'.format(self.name, self.symbols)
   
class Item:
   def __init__(self, production, dotpos):
      self.production = production
      self.dotpos = dotpos
   def __eq__(self, other):
      if type(other) is type(self):
         return (self.production, self.dotpos) == (other.production, other.dotpos)
      return False
   def __hash__(self):
      return (self.production, self.dotpos).__hash__()
   @property
   def IsReduce(self):
      return self.dotpos == len(self.production.symbols)
   @property
   def IsShift(self):
      return not self.IsReduce
   @property
   def Next(self):
      return self.production.symbols[self.dotpos]
   def matches(self, symbol):
      return self.Next == symbol
   def __repr__(self):
      return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))

# Test it:
# 1. define a simple grammar:
g = Grammar(['EOF', 'identifier', '(', ')', '+'])
g.add_production('input', ['expression', 'EOF'])
g.add_production('expression', ['term'])
g.add_production('expression', ['expression', '+', 'term'])
g.add_production('term', ['identifier'])
g.add_production('term', ['(', 'expression', ')'])
g.StartSymbol = 'input'

# 2. define input:
tokens = ['identifier', '+', 'identifier', 'EOF']

# 3. build tables
g.calcTables()

# 4. feed input
g.parse(tokens)