Mercurial > pylearn
comparison doc/v2_planning/plugin_PL.py @ 1253:826d78f0135f
Prototype for "hooks" simpler than full control-flow rewrite.
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Fri, 24 Sep 2010 01:46:12 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1252:4a1339682c8f | 1253:826d78f0135f |
---|---|
1 class RBM(Model): | |
2 ''' | |
3 Restricted Boltzmann Machine. | |
4 ''' | |
5 def __init__(self, n_visible, n_hidden, visible = None, name = None): | |
6 | |
7 if name is None: | |
8 self.__name__ = self.__class__.__name__ | |
9 else: | |
10 self.__name__ = name | |
11 self.n_visible = n_visible | |
12 self.n_hidden = n_hidden | |
13 | |
14 if self.visible is None: | |
15 self.visible = theano.tensor.matrix([name='.'.join(self.__name__, 'visible'])) | |
16 else: | |
17 self.visible = visible | |
18 | |
19 self.W = theano.shared( | |
20 numpy.zeros((n_visible, n_hidden), dtype=theano.config.floatX), | |
21 name=self.__name__ + '.W') | |
22 self.b_hid = theano.shared( | |
23 numpy.zeros((n_hidden,), dtype=theano.config.floatX), | |
24 name=self.__name__ + '.b_hid') | |
25 self.b_vis = theano.shared( | |
26 numpy.zeros((n_hidden,), dtype=theano.config.floatX), | |
27 name=self.__name__ + '.b_vis') | |
28 | |
29 self.inputs = [self.visible] | |
30 self.targets = [] | |
31 self.parameters = [self.W, self.b_hid, self.b_vis] | |
32 self.outputs = ... | |
33 self.cost = None | |
34 self.gradients = [...] | |
35 | |
36 class LogisticRegression(Model): | |
37 pass | |
38 | |
39 class GradientBasedLearner(Learner): | |
40 ''' | |
41 Learner that uses a gradient-base Optimizer to train a Model. | |
42 ''' | |
43 def __init__(self, model, optimizer, name = None): | |
44 self.model = model | |
45 self.optimizer = optimizer | |
46 ... | |
47 | |
48 self.updates = optimizer.iterative_optimizer( | |
49 parameters = model.parameters, | |
50 cost = model.cost) | |
51 | |
52 # TODO: not sure of how to interface data set with the function | |
53 self.train_fn = theano.function(model.inputs+model.targets, | |
54 model.cost, updates=self.updates) | |
55 | |
56 def use_dataset(self, dataset): | |
57 self.train_set = dataset | |
58 | |
59 # The decorator indicates that this function will declare some hooks. | |
60 # More hooks could be automatically declared, for instance | |
61 # 'begin_function' and 'end_function' | |
62 @declare_hooks(['begin_train_iter', 'end_train_iter']) | |
63 def adapt(self, n_steps=1): | |
64 for i in xrange(n_steps): | |
65 self.adapt.hooks.execute( | |
66 'begin_train_iter', | |
67 context = dict(iter=i, total=n_steps, locals=locals())) | |
68 | |
69 data = self.train_set.next() | |
70 self.train_fn(data) | |
71 | |
72 self.adapt.hooks.execute( | |
73 'end_train_iter', | |
74 context = dict(iter = i, locals=locals())) | |
75 | |
76 | |
77 class SGD(Optimizer): | |
78 ''' | |
79 Stochastic gradient descent with fixed learning rate. | |
80 ''' | |
81 def __init__(self, step_size): | |
82 self.step_size = step_size | |
83 | |
84 def iterative_optimizer( | |
85 parameters, | |
86 cost=None, | |
87 gradients=None, | |
88 stop=None, | |
89 updates=None, | |
90 ): | |
91 | |
92 if updates is not None: | |
93 ret = updates | |
94 else: | |
95 ret = {} | |
96 | |
97 if gradients is None: | |
98 if cost is None: | |
99 raise SomeError('SGD needs to be provided either a cost or a gradients list') | |
100 gradients = theano.tensor.grad(cost, parameters) | |
101 | |
102 for p, g in izip(parameters, gradients): | |
103 if p in updates: | |
104 raise KeyError('Parameter %s already has an update value (%s)' % (p, g)) | |
105 ret[p] = p - self.step_size * g | |
106 | |
107 # never stop | |
108 if stop is not None: | |
109 ret[stop] = False | |
110 | |
111 return ret | |
112 | |
113 | |
114 | |
115 class DBN(Learner): | |
116 ''' | |
117 Deep Belief Network. | |
118 ''' | |
119 def __init__(self, n_layers, layer_config, n_ft_steps, ft_step_size): | |
120 # Layers are GradientBasedLearners, with DBN as Model | |
121 self.layers = [] | |
122 # Pretraining cumulative schedule | |
123 self.pt_cumul_schedule = [0] | |
124 # Build the layers and the fully-connected model | |
125 self.input = theano.tensor.matrix(name='.'.join([self.__name__, 'input'])) | |
126 self.output = self.input | |
127 self.ft_params = [] | |
128 for i,lconf in enumerate(layer_config): | |
129 rbm = RBM(visible = self.output, ...) | |
130 self.output = rbm.hidden_expectation | |
131 layer = GradientBasedLearner(...) | |
132 self.layers.append(layer) | |
133 | |
134 self.pt_cumul_schedule.append( | |
135 self.pt_cumul_schedule[-1]+lc.n_pretrain_steps) | |
136 | |
137 self.ft_params.extend([rbm.W, rbm.b_hid]) | |
138 | |
139 # Build the fine-tunable model | |
140 self.target = theano.tensor.ivector(name='.'.join([self.__name__, 'target'])) | |
141 logreg = LogisticRegression(...) | |
142 self.output = logreg.output | |
143 self.cost = logreg.nll | |
144 self.ft_params.extend([logreg.W, logreg.b]) | |
145 | |
146 ft_optimizer = SGD(ft_step_size) | |
147 self.ft_updates = ft_optimizer.iterative_optimizer( | |
148 parameters = self.ft_params, | |
149 cost = self.cost) | |
150 self.ft_fn = theano.function( | |
151 [self.input, self.target], | |
152 self.cost, | |
153 updates = self.ft_updates, | |
154 name='.'.join([self.__name__, 'ft_fn'])) | |
155 | |
156 self.stage = 0 | |
157 | |
158 @declare_hooks([ | |
159 'begin_pretrain_layer','end_pretrain_layer', | |
160 'begin_finetune_iter', 'end_finetune_iter']) | |
161 | |
162 def adapt(self, n_steps=1): | |
163 ''' | |
164 Each "step" is accomplished by the corresponding Learner (either an RBM, | |
165 or the global NNet). | |
166 ''' | |
167 train_x, train_y = self.dataset | |
168 n_remaining_steps = n_steps | |
169 # Unsupervised pre-training | |
170 for i, layer in ienumerate(self.layers): | |
171 if (self.pt_cumul_schedule[i] <= self.stage | |
172 and self.stage < self.pt_cumul_schedule[i+1]): | |
173 | |
174 self.adapt.hooks.execute( | |
175 'begin_pretrain_layer', | |
176 context = dict(iter=i, total=len(self.layers), locals=locals())) | |
177 | |
178 n_pt_steps = min(n_remaining_steps, self.pt_cumul_schedule[i+1] - self.stage) | |
179 layer.use_dataset(train_x) | |
180 layer.adapt(n_steps = n_pt_steps) | |
181 self.stage += self.n_pt_steps | |
182 n_remaining_steps -= n_pt_steps | |
183 | |
184 self.adapt.hooks.execute( | |
185 'end_pretrain_layer', | |
186 context = dict(iter=i, total=len(self.layers), locals=locals())) | |
187 | |
188 # For the next layer, the data needs to be preprocessed | |
189 train_x = layer.compute_Eh_given_v(train_x) # or just compute_output? | |
190 | |
191 # Supervised fine-tuning | |
192 if n_remaining_steps > 0: | |
193 sup_data = train_x, train_y | |
194 for i in xrange(n_remaining_steps): | |
195 self.adapt.hooks.execute( | |
196 'begin_train_iter', | |
197 context = dict(iter=i, total=n_steps, locals=locals())) | |
198 | |
199 data = self.train_set.next() | |
200 self.ft_fn(data) | |
201 | |
202 self.adapt.hooks.execute( | |
203 'end_train_iter', | |
204 context = dict(iter = i, locals=locals())) | |
205 | |
206 | |
207 ## TODO: implement k-fold cross-validation | |
208 | |
209 class Hooks: | |
210 def __init__(self): | |
211 # The DB consists in a dictionary, | |
212 # the keys are the hooks' names (as strings), | |
213 # the values are lists of (function, exec_condition) pairs of functions | |
214 self.db = {} | |
215 | |
216 def declare(self, name): | |
217 if name in self.db: | |
218 raise KeyError('Hook "%s" is already declared' % name) | |
219 self.db[name] = [] | |
220 | |
221 def execute(self, name, context): | |
222 if name not in self.db: | |
223 raise KeyError('Hook "%s" does not exist', % name) | |
224 #TODO: add contextual information to context, like current time, time of last call,... | |
225 for fn, cond in self.db[name]: | |
226 if cond(**context): | |
227 fn(**context) | |
228 | |
229 def register(self, name, function, exec_condition): | |
230 if name not in self.db: | |
231 raise KeyError('Hook "%s" does not exist', % name) | |
232 self.db[name].append((function, exec_condition)) | |
233 | |
234 #TODO: add __getattr__ to have more intuitive access to the hooks | |
235 | |
236 # Hook declaration mechanism | |
237 def declare_hooks(hooks_list): | |
238 def deco(f): | |
239 f.hooks = Hooks() | |
240 for hook_name in hooks_list: | |
241 f.hooks.declare(hook_name) | |
242 | |
243 return deco | |
244 | |
245 # Conditions | |
246 def always(): | |
247 return lambda *args, **kwargs: True | |
248 | |
249 | |
250 def main(): | |
251 train_data = MNIST.gettrain() | |
252 test_data = MNIST.gettest() | |
253 | |
254 train_x, train_y = train_data | |
255 | |
256 preprocessor = PCA(ndim = 80) | |
257 preprocessor.train(train_x) | |
258 | |
259 preprocessed_x = preprocessor.compute_output(train_x) | |
260 ## for more robustess, we can have something like: | |
261 # peprocessed_x = ProcessDataSet(orig_data = train_x, function = preprocessor.compute_output) | |
262 | |
263 | |
264 x = matrix() | |
265 dbn = DBN( | |
266 input = x, | |
267 n_layers = 3, | |
268 layer_config = [dict(n_hidden = 500, n_unsup_steps=1000)] * 3 | |
269 ) | |
270 | |
271 dbn.layers[0].adapt.hooks.register( | |
272 'begin_train_iter', | |
273 function = ..., | |
274 exec_cond = always() | |
275 ) | |
276 | |
277 dbn.layers[0].adapt.hooks.register( | |
278 'end_train_iter', | |
279 function = ..., | |
280 exec_cond = lambda iter, **kwargs: iter%20==0 | |
281 ) | |
282 | |
283 | |
284 if __name__ == '__main__': | |
285 main() | |
286 | |
287 |