comparison python/pyyacc.py @ 319:8d07a4254f04

Work on burg
author Windel Bouwman
date Sat, 18 Jan 2014 18:58:43 +0100
parents e84047f29c78
children c7cc54c0dfdf
comparison
equal deleted inserted replaced
318:e84047f29c78 319:8d07a4254f04
51 """ Pretty print a grammar """ 51 """ Pretty print a grammar """
52 print(g) 52 print(g)
53 for production in g.productions: 53 for production in g.productions:
54 print(production) 54 print(production)
55 55
56
57 def calculate_first_sets(grammar):
58 """
59 Calculate first sets for each grammar symbol
60 This is a dictionary which maps each grammar symbol
61 to a set of terminals that can be encountered first
62 when looking for the symbol.
63 """
64 first = {}
65 nullable = {}
66 for terminal in grammar.terminals | {EOF, EPS}:
67 first[terminal] = set([terminal])
68 nullable[terminal] = False
69 for nt in grammar.nonterminals:
70 first[nt] = set()
71 nullable[nt] = False
72 while True:
73 some_change = False
74 for rule in grammar.productions:
75 # Check for null-ability:
76 if all(nullable[beta] for beta in rule.symbols):
77 if not nullable[rule.name]:
78 nullable[rule.name] = True
79 some_change = True
80 # Update first sets:
81 for beta in rule.symbols:
82 if not nullable[beta]:
83 if first[beta] - first[rule.name]:
84 first[rule.name] |= first[beta]
85 some_change = True
86 break
87 if not some_change:
88 break
89 return first
90
91
56 class Grammar: 92 class Grammar:
57 """ Defines a grammar of a language """ 93 """ Defines a grammar of a language """
58 def __init__(self, terminals): 94 def __init__(self, terminals):
59 self.terminals = terminals 95 self.terminals = set(terminals)
60 self.nonterminals = [] 96 self.nonterminals = set()
61 self.productions = [] 97 self.productions = []
62 self._first = None # Cached first set 98 self._first = None # Cached first set
63 self.start_symbol = None 99 self.start_symbol = None
64 100
65 def __repr__(self): 101 def __repr__(self):
69 """ Add a production rule to the grammar """ 105 """ Add a production rule to the grammar """
70 production = Production(name, symbols, f) 106 production = Production(name, symbols, f)
71 self.productions.append(production) 107 self.productions.append(production)
72 if name in self.terminals: 108 if name in self.terminals:
73 raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) 109 raise ParserGenerationException("Cannot redefine terminal {0}".format(name))
74 if not name in self.nonterminals: 110 self.nonterminals.add(name)
75 self.nonterminals.append(name)
76 self._first = None # Invalidate cached version 111 self._first = None # Invalidate cached version
77 112
78 def productionsForName(self, name): 113 def productionsForName(self, name):
79 """ Retrieve all productions for a non terminal """ 114 """ Retrieve all productions for a non terminal """
80 return [p for p in self.productions if p.name == name] 115 return [p for p in self.productions if p.name == name]
81 116
82 @property 117 @property
83 def Symbols(self): 118 def Symbols(self):
84 """ Get all the symbols defined by this grammar """ 119 """ Get all the symbols defined by this grammar """
85 return self.nonterminals + self.terminals 120 return self.nonterminals | self.terminals
86 121
87 @property 122 @property
88 def first(self): 123 def first(self):
89 """ 124 """
90 The first set is a mapping from a grammar symbol to a set of 125 The first set is a mapping from a grammar symbol to a set of
91 set of all terminal symbols that can be the first terminal when 126 set of all terminal symbols that can be the first terminal when
92 looking for the grammar symbol 127 looking for the grammar symbol
93 """ 128 """
94 if not self._first: 129 if not self._first:
95 self._first = self.calcFirstSets() 130 self._first = calculate_first_sets(self)
96 return self._first 131 return self._first
97
98 def calcFirstSets(self):
99 """
100 Calculate first sets for each grammar symbol
101 This is a dictionary which maps each grammar symbol
102 to a set of terminals that can be encountered first
103 when looking for the symbol.
104 """
105 first = {}
106 for t in self.terminals + [EOF, EPS]:
107 first[t] = set([t])
108 for nt in self.nonterminals:
109 first[nt] = set()
110 epsset = {EPS}
111 while True:
112 some_change = False
113 for p in self.productions:
114 rhs = set()
115 for beta in p.symbols:
116 rhs = rhs | (first[beta] - epsset)
117 if not EPS in first[beta]:
118 break
119 else:
120 if EPS in first[beta]:
121 rhs.add(EPS)
122 if rhs - first[p.name]:
123 first[p.name] |= rhs
124 some_change = True
125 if not some_change:
126 break
127 return first
128 132
129 def closure(self, itemset): 133 def closure(self, itemset):
130 """ Expand itemset by using epsilon moves """ 134 """ Expand itemset by using epsilon moves """
131 worklist = list(itemset) 135 worklist = list(itemset)
132 def addIt(itm): 136 def addIt(itm):
138 f = set(self.first[itm.NextNext]) 142 f = set(self.first[itm.NextNext])
139 if EPS in f: 143 if EPS in f:
140 f.discard(EPS) 144 f.discard(EPS)
141 f.add(itm.look_ahead) 145 f.add(itm.look_ahead)
142 return f 146 return f
143 # Start of algorithm: 147 # Start of algorithm:
144 while worklist: 148 while worklist:
145 item = worklist.pop(0) 149 item = worklist.pop(0)
146 if not item.IsShift: 150 if not item.IsShift:
147 continue 151 continue
148 if not (item.Next in self.nonterminals): 152 if not (item.Next in self.nonterminals):
160 iis.add(Item(p, 0, EOF)) 164 iis.add(Item(p, 0, EOF))
161 return self.closure(iis) 165 return self.closure(iis)
162 166
163 def nextItemSet(self, itemset, symbol): 167 def nextItemSet(self, itemset, symbol):
164 """ 168 """
165 Determines the next itemset for the current set and a symbol 169 Determines the next itemset for the current set and a symbol
166 This is the goto procedure 170 This is the goto procedure
167 """ 171 """
168 next_set = set() 172 next_set = set()
169 for item in itemset: 173 for item in itemset:
170 if item.can_shift_over(symbol): 174 if item.can_shift_over(symbol):
192 196
193 def checkSymbols(self): 197 def checkSymbols(self):
194 """ Checks no symbols are undefined """ 198 """ Checks no symbols are undefined """
195 for production in self.productions: 199 for production in self.productions:
196 for symbol in production.symbols: 200 for symbol in production.symbols:
197 if symbol not in self.Symbols + [EPS]: 201 if symbol not in self.Symbols:
198 raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) 202 raise ParserGenerationException('Symbol {0} undefined'.format(symbol))
199 203
200 def genParser(self): 204 def genParser(self):
201 """ Generates a parser from the grammar (using a caching algorithm) """ 205 """ Generates a parser from the grammar (using a caching algorithm) """
202 action_table, goto_table = self.doGenerate() 206 action_table, goto_table = self.doGenerate()
393 ret_val = None 397 ret_val = None
394 # Break out! 398 # Break out!
395 break 399 break
396 # At exit, the stack must be 1 long 400 # At exit, the stack must be 1 long
397 # TODO: fix that this holds: 401 # TODO: fix that this holds:
398 #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) 402 #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack)
399 return ret_val 403 return ret_val