lcfOS: python/pyyacc.py comparison

comparison python/pyyacc.py @ 318:e84047f29c78

Add burg and yacc initial attempts

author	Windel Bouwman
date	Tue, 31 Dec 2013 12:38:15 +0100
parents	6259856841a0
children	8d07a4254f04

comparison

equal deleted inserted replaced

-:e30a77ae359b
+:e84047f29c78
 """
 Parser generator script
 """
-import hashlib
 from ppci import Token
 EPS = 'EPS'
 EOF = 'EOF'
-SHIFT = 1
-REDUCE = 2
-ACCEPT = 3
 class ParserGenerationException(Exception):
 """ Raised when something goes wrong during parser generation """
 pass
 class ParserException(Exception):
 """ Raised during a failure in the parsing process """
 pass
+class Action:
+def __repr__(self):
+return 'Action'
+def __eq__(self, other):
+return str(self) == str(other)
+class Shift(Action):
+def __init__(self, to_state):
+self.to_state = to_state
+def __repr__(self):
+return 'Shift({})'.format(self.to_state)
+class Reduce(Action):
+def __init__(self, rule):
+self.rule = rule
+def __repr__(self):
+return 'Reduce({})'.format(self.rule)
+class Accept(Reduce):
+def __repr__(self):
+return 'Accept({})'.format(self.rule)
+def print_grammar(g):
+""" Pretty print a grammar """
+print(g)
+for production in g.productions:
+print(production)
 class Grammar:
 """ Defines a grammar of a language """
 def __init__(self, terminals):
 self.terminals = terminals
 self.nonterminals = []
 self.productions = []
 self._first = None  # Cached first set
+self.start_symbol = None
+def __repr__(self):
+return 'Grammar with {} rules'.format(len(self.productions))
 def add_production(self, name, symbols, f=None):
 """ Add a production rule to the grammar """
 production = Production(name, symbols, f)
 self.productions.append(production)
 """ Get all the symbols defined by this grammar """
 return self.nonterminals + self.terminals
 @property
 def first(self):
 """
 The first set is a mapping from a grammar symbol to a set of
 set of all terminal symbols that can be the first terminal when
 looking for the grammar symbol
 """
 if not self._first:
 self._first = self.calcFirstSets()
 return self._first
 for p in self.productionsForName(self.start_symbol):
 iis.add(Item(p, 0, EOF))
 return self.closure(iis)
 def nextItemSet(self, itemset, symbol):
 """
 Determines the next itemset for the current set and a symbol
 This is the goto procedure
 """
 next_set = set()
 for item in itemset:
 if item.can_shift_over(symbol):
 next_set.add(item.shifted())
 return self.closure(next_set)
 def genCanonicalSet(self, iis):
 states = []
 worklist = []
 transitions = {}
 def addSt(s):
 if not nis:
 continue
 addSt(nis)
 transitions[(states.index(itemset), symbol)] = states.index(nis)
 return states, transitions
 def checkSymbols(self):
 """ Checks no symbols are undefined """
 for production in self.productions:
 for symbol in production.symbols:
 if symbol not in self.Symbols + [EPS]:
 raise ParserGenerationException('Symbol {0} undefined'.format(symbol))
-def getSignature(self):
-m = hashlib.md5()
-m.update((str(self.productions) + str(self.start_symbol)).encode('ascii'))
-signature = m.hexdigest()
 def genParser(self):
 """ Generates a parser from the grammar (using a caching algorithm) """
-signature = self.getSignature()
 action_table, goto_table = self.doGenerate()
 p = LRParser(action_table, goto_table, self.start_symbol)
 p.grammar = self
 return p
 def doGenerate(self):
+if not self.start_symbol:
+self.start_symbol = self.productions[0].name
 self.checkSymbols()
 action_table = {}
 goto_table = {}
 iis = self.initialItemSet()
 # First generate all item sets by using the nextItemset function:
 states, transitions = self.genCanonicalSet(iis)
-def action_str(act):
-a, p = act
-if a is SHIFT:
-return 'Shift {0}'.format(0)
-elif a is REDUCE:
-return 'Reduce {0}'.format(p)
-return 'Other'
 def setAction(state, t, action):
+assert isinstance(action, Action)
 key = (state, t)
 assert type(state) is int
 assert type(t) is str
 if key in action_table:
 action2 = action_table[key]
 if action != action2:
-if (action2[0] == REDUCE) and (action[0] == SHIFT):
+if (type(action2) is Reduce) and (type(action) is Shift):
 # Automatically resolve and do the shift action!
 # Simple, but almost always what you want!!
 action_table[key] = action
+elif isinstance(action2, Shift) and isinstance(action, Reduce):
+pass
 else:
-if (action2[0] == SHIFT) and (action[0] == REDUCE):
+a1 = str(action)
-pass
+a2 = str(action2)
-else:
+raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2))
-a1 = action_str(action)
-a2 = action_str(action2)
-raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2))
 else:
 action_table[key] = action
 # Fill action table:
 for state in states:
 # Detect conflicts:
 for item in state:
 if item.IsShift and item.Next in self.terminals:
 # Rule 1, a shift item:
 nextstate = transitions[(states.index(state), item.Next)]
-setAction(states.index(state), item.Next, (SHIFT, nextstate))
+setAction(states.index(state), item.Next, Shift(nextstate))
 if item.IsReduce:
 if item.production.name == self.start_symbol and item.look_ahead == EOF:
 # Rule 3: accept:
-setAction(states.index(state), item.look_ahead, (ACCEPT, self.productions.index(item.production)))
+act = Accept(self.productions.index(item.production))
 else:
 # Rule 2, reduce item:
-setAction(states.index(state), item.look_ahead, (REDUCE, self.productions.index(item.production)))
+act = Reduce(self.productions.index(item.production))
+setAction(states.index(state), item.look_ahead, act)
 for nt in self.nonterminals:
 key = (states.index(state), nt)
 if key in transitions:
 goto_table[key] = transitions[key]
 return action_table, goto_table
 self.name = name
 self.symbols = symbols
 self.f = f
 def __repr__(self):
-return '{0} -> {1}'.format(self.name, self.symbols)
+action = ' ' + str(self.f) if self.f else ''
+return '{0} -> {1}'.format(self.name, self.symbols) + action
 class Item:
 """
 Represents a partially parsed item
 It has a production it is looking for, a position
 in this production called the 'dot' and a look ahead
 symbol that must follow this item.
 """
 def __init__(self, production, dotpos, look_ahead):
 args = (name, predot, postdot, self.look_ahead)
 return '[{0} -> {1} . {2} -> {3}]'.format(*args)
 class LRParser:
-""" LR parser """
+""" LR parser automata """
 def __init__(self, action_table, goto_table, start_symbol):
 self.action_table = action_table
 self.goto_table = goto_table
 self.start_symbol = start_symbol
-def parse(self, toks):
+def parse(self, lexer):
 """ Parse an iterable with tokens """
-assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks))
+assert hasattr(lexer, 'next_token'), '{0} is no lexer'.format(type(lexer))
 stack = [0]
 r_data_stack = []
-try:
+look_ahead = lexer.next_token()
-look_ahead = toks.__next__()
-except StopIteration:
-look_ahead = Token(EOF, EOF)
 assert type(look_ahead) is Token
 # TODO: exit on this condition:
 while stack != [0, self.start_symbol, 2222]:
-#print(stack)
 state = stack[-1]   # top of stack
 key = (state, look_ahead.typ)
 if not key in self.action_table:
 raise ParserException('Error parsing at character {0}'.format(look_ahead))
-action, param = self.action_table[key]
+action = self.action_table[key]
-if action == REDUCE:
+if type(action) is Reduce:
 f_args = []
-param = self.grammar.productions[param]
+prod = self.grammar.productions[action.rule]
-for s in param.symbols:
+for s in prod.symbols:
 stack.pop()
 stack.pop()
 f_args.append(r_data_stack.pop())
 f_args.reverse()
 r_data = None
-if param.f:
+if prod.f:
-r_data = param.f(*f_args)
+r_data = prod.f(*f_args)
 state = stack[-1]
-stack.append(param.name)
+stack.append(prod.name)
-stack.append(self.goto_table[(state, param.name)])
+stack.append(self.goto_table[(state, prod.name)])
 r_data_stack.append(r_data)
-elif action == SHIFT:
+elif type(action) is Shift:
 stack.append(look_ahead.typ)
-stack.append(param)
+stack.append(action.to_state)
-r_data_stack.append(look_ahead.val)
+r_data_stack.append(look_ahead)
-try:
+look_ahead = lexer.next_token()
-look_ahead = toks.__next__()
-except StopIteration:
-look_ahead = Token(EOF, EOF)
 assert type(look_ahead) is Token
-elif action == ACCEPT:
+elif type(action) is Accept:
 # Pop last rule data off the stack:
 f_args = []
-param = self.grammar.productions[param]
+param = self.grammar.productions[action.rule]
 for s in param.symbols:
 stack.pop()
 stack.pop()
 f_args.append(r_data_stack.pop())
 f_args.reverse()
 if param.f:
-param.f(*f_args)
+ret_val = param.f(*f_args)
+else:
+ret_val = None
 # Break out!
 break
 # At exit, the stack must be 1 long
 # TODO: fix that this holds:
 #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack)
+return ret_val

Mercurial > lcfOS

comparison python/pyyacc.py @ 318:e84047f29c78