lcfOS: python/pyyacc.py annotate

annotate python/pyyacc.py @ 180:25a0753da4cf

Re-organized files

author	Windel Bouwman
date	Thu, 09 May 2013 17:35:19 +0200
parents	0f3b1adfd416
children	216da5e46efc

rev	line source
178 c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	1 class Grammar:
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	2 def __init__(self, terminals):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	3 self.terminals = terminals
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	4 self.nonterminals = []
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	5 self.productions = []
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	6 def add_production(self, name, symbols):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	7 p = Production(name, symbols)
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	8 self.productions.append(p)
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	9 if not name in self.nonterminals:
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	10 self.nonterminals.append(name)
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	11 def productionsForName(self, name):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	12 return [p for p in self.productions if p.name == name]
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	13 @property
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	14 def Symbols(self):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	15 return self.nonterminals + self.terminals
180 25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	16 def calcFollow(self):
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	17 follow = {}
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	18 for nt in self.nonterminals:
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	19 follow[nt] = set()
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	20 while True:
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	21 change = False
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	22 # 1.
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	23 for p in self.productions:
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	24 pass
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	25 if not change:
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	26 break
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	27 return follow
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	28 def calcFirst(self):
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	29 first = {}
25a0753da4cf Re-organized files Windel Bouwman parents: 179 diff changeset	30 return first
178 c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	31
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	32 class Production:
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	33 def __init__(self, name, symbols):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	34 self.name = name
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	35 self.symbols = symbols
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	36 def __repr__(self):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	37 return '{0} -> {1}'.format(self.name, self.symbols)
179 0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	38
178 c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	39 class Item:
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	40 def __init__(self, production, dotpos):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	41 self.production = production
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	42 self.dotpos = dotpos
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	43 def __eq__(self, other):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	44 if type(other) is type(self):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	45 return (self.production, self.dotpos) == (other.production, other.dotpos)
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	46 return False
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	47 def __hash__(self):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	48 return (self.production, self.dotpos).__hash__()
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	49 @property
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	50 def IsReduce(self):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	51 return self.dotpos == len(self.production.symbols)
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	52 @property
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	53 def IsShift(self):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	54 return not self.IsReduce
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	55 @property
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	56 def Next(self):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	57 return self.production.symbols[self.dotpos]
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	58 def matches(self, symbol):
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	59 return self.Next == symbol
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	60 def __repr__(self):
179 0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	61 return '{0} -> {1} _dot_ {2} {{}}'.format(self.production.name, ' '.join(self.production.symbols[0:self.dotpos]), ' '.join(self.production.symbols[self.dotpos:]))
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	62
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	63 class LRgenerator:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	64 def __init__(self, g):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	65 self.grammar = g
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	66 def e_closure(self, S):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	67 """ Expand itemset by using epsilon moves """
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	68 something_changed = True
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	69 while something_changed:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	70 something_changed = False
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	71 worklist = list(S)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	72 for item in worklist:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	73 if item.IsShift and item.Next in self.grammar.nonterminals:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	74 n = item.Next
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	75 for add_p in self.grammar.productionsForName(n):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	76 i = Item(add_p, 0)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	77 if not i in S:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	78 S.add(i)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	79 something_changed = True
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	80 return frozenset(S)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	81 def initialItemSet(self):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	82 nis = set()
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	83 for p in self.grammar.productionsForName(self.grammar.start_symbol):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	84 nis.add(Item(p, 0))
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	85 return self.e_closure(nis)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	86 def nextItemSet(self, itemset, symbol):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	87 nis = set()
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	88 for item in itemset:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	89 if item.IsShift and item.Next == symbol:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	90 nis.add(Item(item.production, item.dotpos + 1))
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	91 return self.e_closure(nis)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	92 def genParser(self):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	93 states = set()
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	94 goto_table = {}
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	95 action_table = {}
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	96 iis = self.initialItemSet()
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	97 worklist = [iis]
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	98 states.add(iis)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	99 while len(worklist) > 0:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	100 itemset = worklist.pop(0)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	101 has_goto = False
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	102 for symbol in self.grammar.Symbols:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	103 nis = self.nextItemSet(itemset, symbol)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	104 if nis:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	105 goto_table[(itemset, symbol)] = nis
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	106 has_goto = True
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	107 if not nis in states:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	108 worklist.append(nis)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	109 states.add(nis)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	110 if has_goto:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	111 action_table[itemset] = 'shift', 's'
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	112 for item in itemset:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	113 assert item.IsShift, item
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	114 else:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	115 # Check for reduce-reduce conflicts:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	116 assert len(itemset) == 1
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	117 item = list(itemset)[0]
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	118 assert item.IsReduce
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	119 action_table[itemset] = 'reduce', item.production
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	120 for s, i in zip(states, range(len(states))):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	121 print(i, ' \|\| '.join(str(x) for x in s))
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	122 p = LRParser()
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	123 p.goto_table = goto_table
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	124 p.action_table = action_table
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	125 p.s0 = iis
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	126 return p
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	127
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	128 class LRParser:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	129 def parse(self, toks):
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	130 stack = [self.s0]
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	131 while stack[-1] != 'input':
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	132 action, p = self.action_table[stack[-1]]
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	133 print(action, p)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	134 if action == 'shift':
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	135 stack.append(toks.pop(0))
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	136 s, i = stack[-2:]
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	137 stack.append(self.goto_table[(s, i)])
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	138 elif action == 'reduce':
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	139 for s in p.symbols:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	140 stack.pop()
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	141 stack.pop()
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	142 stack.append(p.name)
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	143 s, i = stack[-2:]
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	144 if stack[-1] == 'input':
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	145 break
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	146 stack.append(self.goto_table[(s, i)])
178 c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	147
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	148 # Test it:
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	149 # 1. define a simple grammar:
179 0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	150 g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
178 c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	151 g.add_production('input', ['expression', 'EOF'])
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	152 g.add_production('expression', ['term'])
c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	153 g.add_production('expression', ['expression', '+', 'term'])
179 0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	154 g.add_production('term', ['factor'])
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	155 #g.add_production('term', ['term', '*', 'factor'])
178 c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	156 g.add_production('term', ['(', 'expression', ')'])
179 0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	157 g.add_production('factor', ['identifier'])
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	158 g.start_symbol = 'input'
178 c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	159 # 2. define input:
179 0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	160 tokens = ['identifier', '+', 'identifier', '+', 'identifier', 'EOF']
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	161 # 3. build parser:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	162 p = LRgenerator(g).genParser()
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	163 # 4. feed input:
0f3b1adfd416 LR(0) parser Windel Bouwman parents: 178 diff changeset	164 p.parse(tokens)
178 c694ec551f34 Added lex yacc test scripts Windel Bouwman parents: diff changeset	165

Mercurial > lcfOS

annotate python/pyyacc.py @ 180:25a0753da4cf