diff python/pyyacc.py @ 179:0f3b1adfd416

LR(0) parser
author Windel Bouwman
date Sat, 04 May 2013 18:50:36 +0200
parents c694ec551f34
children 25a0753da4cf
line wrap: on
line diff
--- a/python/pyyacc.py	Sat May 04 12:07:17 2013 +0200
+++ b/python/pyyacc.py	Sat May 04 18:50:36 2013 +0200
@@ -1,112 +1,18 @@
-
-class GrammarError(Exception):
-   pass
-
 class Grammar:
    def __init__(self, terminals):
       self.terminals = terminals
       self.nonterminals = []
       self.productions = []
-      self.states = None
-      self.start_symbol = None
    def add_production(self, name, symbols):
       p = Production(name, symbols)
       self.productions.append(p)
       if not name in self.nonterminals:
          self.nonterminals.append(name)
-   def set_start(self, start):
-      self.start_symbol = start
-   def get_start(self):
-      return self.start_symbol
-   StartSymbol = property(get_start, set_start)
    def productionsForName(self, name):
       return [p for p in self.productions if p.name == name]
    @property
    def Symbols(self):
       return self.nonterminals + self.terminals
-   def calcFollow(self, name):
-      """ Determines the follow set for a given name """
-      f = set()
-      # TODO
-      return f
-
-   def e_closure(self, S):
-      something_changed = True
-      while something_changed:
-         something_changed = False
-         worklist = list(S)
-         for item in worklist:
-            if item.IsShift and item.Next in self.nonterminals:
-               n = item.Next
-               for add_p in self.productionsForName(n):
-                  i = Item(add_p, 0)
-                  if not i in S:
-                     S.add(i)
-                     something_changed = True
-      return frozenset(S)
-   def calcInitialItemSet(self):
-      nis = set()
-      for p in self.productionsForName(self.StartSymbol):
-         nis.add(Item(p, 0))
-      return self.e_closure(nis)
-   def calcNextItemSet(self, itemset, symbol):
-      nis = set()
-      for item in itemset:
-         if item.IsShift and item.Next == symbol:
-            nis.add(Item(item.production, item.dotpos + 1))
-      return self.e_closure(nis)
-   def calcTables(self):
-      print(self.nonterminals + self.terminals)
-      self.states = set()
-      self.goto_table = {}
-      self.action_table = {}
-      iis = self.calcInitialItemSet()
-      self.s0 = iis
-      print(len(iis), iis)
-      worklist = [iis]
-      self.states.add(iis)
-      while len(worklist) > 0:
-         itemset = worklist.pop(0)
-         has_goto = False
-         for symbol in self.Symbols:
-            nis = self.calcNextItemSet(itemset, symbol)
-            if nis:
-               self.goto_table[(itemset, symbol)] = nis
-               has_goto = True
-               if not nis in self.states:
-                  worklist.append(nis)
-                  self.states.add(nis)
-         if has_goto:
-            self.action_table[itemset] = 'shift', 's'
-            for item in itemset:
-               assert item.IsShift, item
-         else:
-            # Check for reduce-reduce conflicts:
-            assert len(itemset) == 1
-            item = list(itemset)[0]
-            assert item.IsReduce
-            self.action_table[itemset] = 'reduce', item.production
-      print(self.Symbols)
-      print(len(nis), nis, self.states)
-      for s, i in zip(self.states, range(len(self.states))):
-         print(i, s)
-         
-   def parse(self, toks):
-      stack = [self.s0]
-      while stack[-1] != 'input':
-         action, p = self.action_table[stack[-1]]
-         print(action, p)
-         if action == 'shift':
-            stack.append(toks.pop(0))
-            s, i = stack[-2:]
-            stack.append(self.goto_table[(s, i)])
-         else:
-            for s in p.symbols:
-               stack.pop()
-               stack.pop()
-            stack.append(p.name)
-            s, i = stack[-2:]
-            stack.append(self.goto_table[(s, i)])
 
 class Production:
    def __init__(self, name, symbols):
@@ -114,7 +20,7 @@
       self.symbols = symbols
    def __repr__(self):
       return '{0} -> {1}'.format(self.name, self.symbols)
-   
+
 class Item:
    def __init__(self, production, dotpos):
       self.production = production
@@ -137,24 +43,108 @@
    def matches(self, symbol):
       return self.Next == symbol
    def __repr__(self):
-      return '{0} -> {1} _dot_ {2}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))
+      return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))
+
+class LRgenerator:
+   def __init__(self, g):
+      self.grammar = g
+   def e_closure(self, S):
+      """ Expand itemset by using epsilon moves """
+      something_changed = True
+      while something_changed:
+         something_changed = False
+         worklist = list(S)
+         for item in worklist:
+            if item.IsShift and item.Next in self.grammar.nonterminals:
+               n = item.Next
+               for add_p in self.grammar.productionsForName(n):
+                  i = Item(add_p, 0)
+                  if not i in S:
+                     S.add(i)
+                     something_changed = True
+      return frozenset(S)
+   def initialItemSet(self):
+      nis = set()
+      for p in self.grammar.productionsForName(self.grammar.start_symbol):
+         nis.add(Item(p, 0))
+      return self.e_closure(nis)
+   def nextItemSet(self, itemset, symbol):
+      nis = set()
+      for item in itemset:
+         if item.IsShift and item.Next == symbol:
+            nis.add(Item(item.production, item.dotpos + 1))
+      return self.e_closure(nis)
+   def genParser(self):
+      states = set()
+      goto_table = {}
+      action_table = {}
+      iis = self.initialItemSet()
+      worklist = [iis]
+      states.add(iis)
+      while len(worklist) > 0:
+         itemset = worklist.pop(0)
+         has_goto = False
+         for symbol in self.grammar.Symbols:
+            nis = self.nextItemSet(itemset, symbol)
+            if nis:
+               goto_table[(itemset, symbol)] = nis
+               has_goto = True
+               if not nis in states:
+                  worklist.append(nis)
+                  states.add(nis)
+         if has_goto:
+            action_table[itemset] = 'shift', 's'
+            for item in itemset:
+               assert item.IsShift, item
+         else:
+            # Check for reduce-reduce conflicts:
+            assert len(itemset) == 1
+            item = list(itemset)[0]
+            assert item.IsReduce
+            action_table[itemset] = 'reduce', item.production
+      for s, i in zip(states, range(len(states))):
+         print(i, ' || '.join(str(x) for x in s))
+      p = LRParser()
+      p.goto_table = goto_table
+      p.action_table = action_table
+      p.s0 = iis
+      return p
+
+class LRParser:
+   def parse(self, toks):
+      stack = [self.s0]
+      while stack[-1] != 'input':
+         action, p = self.action_table[stack[-1]]
+         print(action, p)
+         if action == 'shift':
+            stack.append(toks.pop(0))
+            s, i = stack[-2:]
+            stack.append(self.goto_table[(s, i)])
+         elif action == 'reduce':
+            for s in p.symbols:
+               stack.pop()
+               stack.pop()
+            stack.append(p.name)
+            s, i = stack[-2:]
+            if stack[-1] == 'input':
+               break
+            stack.append(self.goto_table[(s, i)])
 
 # Test it:
 # 1. define a simple grammar:
-g = Grammar(['EOF', 'identifier', '(', ')', '+'])
+g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
 g.add_production('input', ['expression', 'EOF'])
 g.add_production('expression', ['term'])
 g.add_production('expression', ['expression', '+', 'term'])
-g.add_production('term', ['identifier'])
+g.add_production('term', ['factor'])
+#g.add_production('term', ['term', '*', 'factor'])
 g.add_production('term', ['(', 'expression', ')'])
-g.StartSymbol = 'input'
-
+g.add_production('factor', ['identifier'])
+g.start_symbol = 'input'
 # 2. define input:
-tokens = ['identifier', '+', 'identifier', 'EOF']
+tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
+# 3. build parser:
+p = LRgenerator(g).genParser()
+# 4. feed input:
+p.parse(tokens)
 
-# 3. build tables
-g.calcTables()
-
-# 4. feed input
-g.parse(tokens)
-