Mercurial > lcfOS
comparison python/pyyacc.py @ 319:8d07a4254f04
Work on burg
author | Windel Bouwman |
---|---|
date | Sat, 18 Jan 2014 18:58:43 +0100 |
parents | e84047f29c78 |
children | c7cc54c0dfdf |
comparison
equal
deleted
inserted
replaced
318:e84047f29c78 | 319:8d07a4254f04 |
---|---|
51 """ Pretty print a grammar """ | 51 """ Pretty print a grammar """ |
52 print(g) | 52 print(g) |
53 for production in g.productions: | 53 for production in g.productions: |
54 print(production) | 54 print(production) |
55 | 55 |
56 | |
57 def calculate_first_sets(grammar): | |
58 """ | |
59 Calculate first sets for each grammar symbol | |
60 This is a dictionary which maps each grammar symbol | |
61 to a set of terminals that can be encountered first | |
62 when looking for the symbol. | |
63 """ | |
64 first = {} | |
65 nullable = {} | |
66 for terminal in grammar.terminals | {EOF, EPS}: | |
67 first[terminal] = set([terminal]) | |
68 nullable[terminal] = False | |
69 for nt in grammar.nonterminals: | |
70 first[nt] = set() | |
71 nullable[nt] = False | |
72 while True: | |
73 some_change = False | |
74 for rule in grammar.productions: | |
75 # Check for null-ability: | |
76 if all(nullable[beta] for beta in rule.symbols): | |
77 if not nullable[rule.name]: | |
78 nullable[rule.name] = True | |
79 some_change = True | |
80 # Update first sets: | |
81 for beta in rule.symbols: | |
82 if not nullable[beta]: | |
83 if first[beta] - first[rule.name]: | |
84 first[rule.name] |= first[beta] | |
85 some_change = True | |
86 break | |
87 if not some_change: | |
88 break | |
89 return first | |
90 | |
91 | |
56 class Grammar: | 92 class Grammar: |
57 """ Defines a grammar of a language """ | 93 """ Defines a grammar of a language """ |
58 def __init__(self, terminals): | 94 def __init__(self, terminals): |
59 self.terminals = terminals | 95 self.terminals = set(terminals) |
60 self.nonterminals = [] | 96 self.nonterminals = set() |
61 self.productions = [] | 97 self.productions = [] |
62 self._first = None # Cached first set | 98 self._first = None # Cached first set |
63 self.start_symbol = None | 99 self.start_symbol = None |
64 | 100 |
65 def __repr__(self): | 101 def __repr__(self): |
69 """ Add a production rule to the grammar """ | 105 """ Add a production rule to the grammar """ |
70 production = Production(name, symbols, f) | 106 production = Production(name, symbols, f) |
71 self.productions.append(production) | 107 self.productions.append(production) |
72 if name in self.terminals: | 108 if name in self.terminals: |
73 raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) | 109 raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) |
74 if not name in self.nonterminals: | 110 self.nonterminals.add(name) |
75 self.nonterminals.append(name) | |
76 self._first = None # Invalidate cached version | 111 self._first = None # Invalidate cached version |
77 | 112 |
78 def productionsForName(self, name): | 113 def productionsForName(self, name): |
79 """ Retrieve all productions for a non terminal """ | 114 """ Retrieve all productions for a non terminal """ |
80 return [p for p in self.productions if p.name == name] | 115 return [p for p in self.productions if p.name == name] |
81 | 116 |
82 @property | 117 @property |
83 def Symbols(self): | 118 def Symbols(self): |
84 """ Get all the symbols defined by this grammar """ | 119 """ Get all the symbols defined by this grammar """ |
85 return self.nonterminals + self.terminals | 120 return self.nonterminals | self.terminals |
86 | 121 |
87 @property | 122 @property |
88 def first(self): | 123 def first(self): |
89 """ | 124 """ |
90 The first set is a mapping from a grammar symbol to a set of | 125 The first set is a mapping from a grammar symbol to a set of |
91 set of all terminal symbols that can be the first terminal when | 126 set of all terminal symbols that can be the first terminal when |
92 looking for the grammar symbol | 127 looking for the grammar symbol |
93 """ | 128 """ |
94 if not self._first: | 129 if not self._first: |
95 self._first = self.calcFirstSets() | 130 self._first = calculate_first_sets(self) |
96 return self._first | 131 return self._first |
97 | |
98 def calcFirstSets(self): | |
99 """ | |
100 Calculate first sets for each grammar symbol | |
101 This is a dictionary which maps each grammar symbol | |
102 to a set of terminals that can be encountered first | |
103 when looking for the symbol. | |
104 """ | |
105 first = {} | |
106 for t in self.terminals + [EOF, EPS]: | |
107 first[t] = set([t]) | |
108 for nt in self.nonterminals: | |
109 first[nt] = set() | |
110 epsset = {EPS} | |
111 while True: | |
112 some_change = False | |
113 for p in self.productions: | |
114 rhs = set() | |
115 for beta in p.symbols: | |
116 rhs = rhs | (first[beta] - epsset) | |
117 if not EPS in first[beta]: | |
118 break | |
119 else: | |
120 if EPS in first[beta]: | |
121 rhs.add(EPS) | |
122 if rhs - first[p.name]: | |
123 first[p.name] |= rhs | |
124 some_change = True | |
125 if not some_change: | |
126 break | |
127 return first | |
128 | 132 |
129 def closure(self, itemset): | 133 def closure(self, itemset): |
130 """ Expand itemset by using epsilon moves """ | 134 """ Expand itemset by using epsilon moves """ |
131 worklist = list(itemset) | 135 worklist = list(itemset) |
132 def addIt(itm): | 136 def addIt(itm): |
138 f = set(self.first[itm.NextNext]) | 142 f = set(self.first[itm.NextNext]) |
139 if EPS in f: | 143 if EPS in f: |
140 f.discard(EPS) | 144 f.discard(EPS) |
141 f.add(itm.look_ahead) | 145 f.add(itm.look_ahead) |
142 return f | 146 return f |
143 # Start of algorithm: | 147 # Start of algorithm: |
144 while worklist: | 148 while worklist: |
145 item = worklist.pop(0) | 149 item = worklist.pop(0) |
146 if not item.IsShift: | 150 if not item.IsShift: |
147 continue | 151 continue |
148 if not (item.Next in self.nonterminals): | 152 if not (item.Next in self.nonterminals): |
160 iis.add(Item(p, 0, EOF)) | 164 iis.add(Item(p, 0, EOF)) |
161 return self.closure(iis) | 165 return self.closure(iis) |
162 | 166 |
163 def nextItemSet(self, itemset, symbol): | 167 def nextItemSet(self, itemset, symbol): |
164 """ | 168 """ |
165 Determines the next itemset for the current set and a symbol | 169 Determines the next itemset for the current set and a symbol |
166 This is the goto procedure | 170 This is the goto procedure |
167 """ | 171 """ |
168 next_set = set() | 172 next_set = set() |
169 for item in itemset: | 173 for item in itemset: |
170 if item.can_shift_over(symbol): | 174 if item.can_shift_over(symbol): |
192 | 196 |
193 def checkSymbols(self): | 197 def checkSymbols(self): |
194 """ Checks no symbols are undefined """ | 198 """ Checks no symbols are undefined """ |
195 for production in self.productions: | 199 for production in self.productions: |
196 for symbol in production.symbols: | 200 for symbol in production.symbols: |
197 if symbol not in self.Symbols + [EPS]: | 201 if symbol not in self.Symbols: |
198 raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) | 202 raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) |
199 | 203 |
200 def genParser(self): | 204 def genParser(self): |
201 """ Generates a parser from the grammar (using a caching algorithm) """ | 205 """ Generates a parser from the grammar (using a caching algorithm) """ |
202 action_table, goto_table = self.doGenerate() | 206 action_table, goto_table = self.doGenerate() |
393 ret_val = None | 397 ret_val = None |
394 # Break out! | 398 # Break out! |
395 break | 399 break |
396 # At exit, the stack must be 1 long | 400 # At exit, the stack must be 1 long |
397 # TODO: fix that this holds: | 401 # TODO: fix that this holds: |
398 #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) | 402 #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) |
399 return ret_val | 403 return ret_val |