diff python/pyyacc.py @ 178:c694ec551f34

Added lex yacc test scripts
author Windel Bouwman
date Sat, 04 May 2013 12:07:17 +0200
parents
children 0f3b1adfd416
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/pyyacc.py	Sat May 04 12:07:17 2013 +0200
@@ -0,0 +1,160 @@
+
+class GrammarError(Exception):
+   pass
+
+class Grammar:
+   def __init__(self, terminals):
+      self.terminals = terminals
+      self.nonterminals = []
+      self.productions = []
+      self.states = None
+      self.start_symbol = None
+   def add_production(self, name, symbols):
+      p = Production(name, symbols)
+      self.productions.append(p)
+      if not name in self.nonterminals:
+         self.nonterminals.append(name)
+   def set_start(self, start):
+      self.start_symbol = start
+   def get_start(self):
+      return self.start_symbol
+   StartSymbol = property(get_start, set_start)
+   def productionsForName(self, name):
+      return [p for p in self.productions if p.name == name]
+   @property
+   def Symbols(self):
+      return self.nonterminals + self.terminals
+   def calcFollow(self, name):
+      """ Determines the follow set for a given name """
+      f = set()
+      # TODO
+      return f
+
+   def e_closure(self, S):
+      something_changed = True
+      while something_changed:
+         something_changed = False
+         worklist = list(S)
+         for item in worklist:
+            if item.IsShift and item.Next in self.nonterminals:
+               n = item.Next
+               for add_p in self.productionsForName(n):
+                  i = Item(add_p, 0)
+                  if not i in S:
+                     S.add(i)
+                     something_changed = True
+      return frozenset(S)
+   def calcInitialItemSet(self):
+      nis = set()
+      for p in self.productionsForName(self.StartSymbol):
+         nis.add(Item(p, 0))
+      return self.e_closure(nis)
+   def calcNextItemSet(self, itemset, symbol):
+      nis = set()
+      for item in itemset:
+         if item.IsShift and item.Next == symbol:
+            nis.add(Item(item.production, item.dotpos + 1))
+      return self.e_closure(nis)
+   def calcTables(self):
+      print(self.nonterminals + self.terminals)
+      self.states = set()
+      self.goto_table = {}
+      self.action_table = {}
+      iis = self.calcInitialItemSet()
+      self.s0 = iis
+      print(len(iis), iis)
+      worklist = [iis]
+      self.states.add(iis)
+      while len(worklist) > 0:
+         itemset = worklist.pop(0)
+         has_goto = False
+         for symbol in self.Symbols:
+            nis = self.calcNextItemSet(itemset, symbol)
+            if nis:
+               self.goto_table[(itemset, symbol)] = nis
+               has_goto = True
+               if not nis in self.states:
+                  worklist.append(nis)
+                  self.states.add(nis)
+         if has_goto:
+            self.action_table[itemset] = 'shift', 's'
+            for item in itemset:
+               assert item.IsShift, item
+         else:
+            # Check for reduce-reduce conflicts:
+            assert len(itemset) == 1
+            item = list(itemset)[0]
+            assert item.IsReduce
+            self.action_table[itemset] = 'reduce', item.production
+      print(self.Symbols)
+      print(len(nis), nis, self.states)
+      for s, i in zip(self.states, range(len(self.states))):
+         print(i, s)
+         
+   def parse(self, toks):
+      stack = [self.s0]
+      while stack[-1] != 'input':
+         action, p = self.action_table[stack[-1]]
+         print(action, p)
+         if action == 'shift':
+            stack.append(toks.pop(0))
+            s, i = stack[-2:]
+            stack.append(self.goto_table[(s, i)])
+         else:
+            for s in p.symbols:
+               stack.pop()
+               stack.pop()
+            stack.append(p.name)
+            s, i = stack[-2:]
+            stack.append(self.goto_table[(s, i)])
+
+class Production:
+   def __init__(self, name, symbols):
+      self.name = name
+      self.symbols = symbols
+   def __repr__(self):
+      return '{0} -> {1}'.format(self.name, self.symbols)
+   
+class Item:
+   def __init__(self, production, dotpos):
+      self.production = production
+      self.dotpos = dotpos
+   def __eq__(self, other):
+      if type(other) is type(self):
+         return (self.production, self.dotpos) == (other.production, other.dotpos)
+      return False
+   def __hash__(self):
+      return (self.production, self.dotpos).__hash__()
+   @property
+   def IsReduce(self):
+      return self.dotpos == len(self.production.symbols)
+   @property
+   def IsShift(self):
+      return not self.IsReduce
+   @property
+   def Next(self):
+      return self.production.symbols[self.dotpos]
+   def matches(self, symbol):
+      return self.Next == symbol
+   def __repr__(self):
+      return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))
+
+# Test it:
+# 1. define a simple grammar:
+g = Grammar(['EOF', 'identifier', '(', ')', '+'])
+g.add_production('input', ['expression', 'EOF'])
+g.add_production('expression', ['term'])
+g.add_production('expression', ['expression', '+', 'term'])
+g.add_production('term', ['identifier'])
+g.add_production('term', ['(', 'expression', ')'])
+g.StartSymbol = 'input'
+
+# 2. define input:
+tokens = ['identifier', '+', 'identifier', 'EOF']
+
+# 3. build tables
+g.calcTables()
+
+# 4. feed input
+g.parse(tokens)
+