Mercurial > ift6266
comparison deep/rbm/rbm.py @ 369:d81284e13d77
modified to run experiments with PNIST
author | goldfinger |
---|---|
date | Sat, 24 Apr 2010 11:32:26 -0400 |
parents | 9685e9d94cc4 |
children | 1e99dc965b5b |
comparison
equal
deleted
inserted
replaced
368:d391ad815d89 | 369:d81284e13d77 |
---|---|
3 Boltzmann Machines (BMs) are a particular form of energy-based model which | 3 Boltzmann Machines (BMs) are a particular form of energy-based model which |
4 contain hidden variables. Restricted Boltzmann Machines further restrict BMs | 4 contain hidden variables. Restricted Boltzmann Machines further restrict BMs |
5 to those without visible-visible and hidden-hidden connections. | 5 to those without visible-visible and hidden-hidden connections. |
6 """ | 6 """ |
7 | 7 |
8 | |
9 import numpy, time, cPickle, gzip, PIL.Image | 8 import numpy, time, cPickle, gzip, PIL.Image |
10 | 9 |
11 import theano | 10 import theano |
12 import theano.tensor as T | 11 import theano.tensor as T |
13 import os | 12 import os |
13 import pdb | |
14 import numpy | |
15 import pylab | |
16 import time | |
17 import theano.tensor.nnet | |
18 import pylearn | |
19 import ift6266 | |
20 import theano,pylearn.version,ift6266 | |
21 from pylearn.io import filetensor as ft | |
22 from ift6266 import datasets | |
14 | 23 |
15 from theano.tensor.shared_randomstreams import RandomStreams | 24 from theano.tensor.shared_randomstreams import RandomStreams |
16 | 25 |
17 from utils import tile_raster_images | 26 from utils import tile_raster_images |
18 from logistic_sgd import load_data | 27 from logistic_sgd import load_data |
19 | 28 |
20 | |
21 class RBM(object): | 29 class RBM(object): |
22 """Restricted Boltzmann Machine (RBM) """ | 30 """Restricted Boltzmann Machine (RBM) """ |
23 def __init__(self, input=None, n_visible=784, n_hidden=1000, \ | 31 def __init__(self, input=None, n_visible=32*32, n_hidden=500, \ |
24 W = None, hbias = None, vbias = None, numpy_rng = None, | 32 W = None, hbias = None, vbias = None, numpy_rng = None, |
25 theano_rng = None): | 33 theano_rng = None): |
26 """ | 34 """ |
27 RBM constructor. Defines the parameters of the model along with | 35 RBM constructor. Defines the parameters of the model along with |
28 basic operations for inferring hidden from visible (and vice-versa), | 36 basic operations for inferring hidden from visible (and vice-versa), |
87 | 95 |
88 self.W = W | 96 self.W = W |
89 self.hbias = hbias | 97 self.hbias = hbias |
90 self.vbias = vbias | 98 self.vbias = vbias |
91 self.theano_rng = theano_rng | 99 self.theano_rng = theano_rng |
100 | |
92 # **** WARNING: It is not a good idea to put things in this list | 101 # **** WARNING: It is not a good idea to put things in this list |
93 # other than shared variables created in this function. | 102 # other than shared variables created in this function. |
94 self.params = [self.W, self.hbias, self.vbias] | 103 self.params = [self.W, self.hbias, self.vbias] |
95 self.batch_size = self.input.shape[0] | 104 self.batch_size = self.input.shape[0] |
96 | 105 |
129 starting from the visible state''' | 138 starting from the visible state''' |
130 h1_mean, h1_sample = self.sample_h_given_v(v0_sample) | 139 h1_mean, h1_sample = self.sample_h_given_v(v0_sample) |
131 v1_mean, v1_sample = self.sample_v_given_h(h1_sample) | 140 v1_mean, v1_sample = self.sample_v_given_h(h1_sample) |
132 return [h1_mean, h1_sample, v1_mean, v1_sample] | 141 return [h1_mean, h1_sample, v1_mean, v1_sample] |
133 | 142 |
134 def cd(self, lr = 0.1, persistent=None): | 143 def cd(self, lr = 0.1, persistent=None, k=1): |
135 """ | 144 """ |
136 This functions implements one step of CD-1 or PCD-1 | 145 This functions implements one step of CD-1 or PCD-1 |
137 | 146 |
138 :param lr: learning rate used to train the RBM | 147 :param lr: learning rate used to train the RBM |
139 :param persistent: None for CD. For PCD, shared variable containing old state | 148 :param persistent: None for CD. For PCD, shared variable containing old state |
154 if persistent is None: | 163 if persistent is None: |
155 chain_start = ph_sample | 164 chain_start = ph_sample |
156 else: | 165 else: |
157 chain_start = persistent | 166 chain_start = persistent |
158 | 167 |
159 # perform actual negative phase | 168 # perform actual negative phase (the CD-1) |
160 [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(chain_start) | 169 [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(chain_start) |
170 | |
171 #perform CD-k | |
172 if k-1>0: | |
173 for i in range(k-1): | |
174 [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(nh_sample) | |
175 | |
176 | |
161 | 177 |
162 # determine gradients on RBM parameters | 178 # determine gradients on RBM parameters |
163 g_vbias = T.sum( self.input - nv_mean, axis = 0)/self.batch_size | 179 g_vbias = T.sum( self.input - nv_mean, axis = 0)/self.batch_size |
164 g_hbias = T.sum( ph_mean - nh_mean, axis = 0)/self.batch_size | 180 g_hbias = T.sum( ph_mean - nh_mean, axis = 0)/self.batch_size |
165 g_W = T.dot(ph_mean.T, self.input )/ self.batch_size - \ | 181 g_W = T.dot(ph_mean.T, self.input )/ self.batch_size - \ |
222 | 238 |
223 return cross_entropy | 239 return cross_entropy |
224 | 240 |
225 | 241 |
226 | 242 |
243 def test_rbm(b_size = 25, nhidden = 1000, kk = 1, persistance = 0, | |
244 dataset= 0): | |
245 """ | |
246 Demonstrate *** | |
247 | |
248 This is demonstrated on MNIST. | |
249 | |
250 :param learning_rate: learning rate used for training the RBM | |
251 | |
252 :param training_epochs: number of epochs used for training | |
253 | |
254 :param dataset: path the the pickled dataset | |
255 | |
256 """ | |
257 | |
258 learning_rate=0.1 | |
259 | |
260 if data_set==0: | |
261 datasets=datasets.nist_all() | |
262 elif data_set==1: | |
263 datasets=datasets.nist_P07() | |
264 elif data_set==2: | |
265 datasets=datasets.PNIST07() | |
266 | |
267 | |
268 # revoir la recuperation des donnees | |
269 ## dataset = load_data(dataset) | |
270 ## | |
271 ## train_set_x, train_set_y = datasets[0] | |
272 ## test_set_x , test_set_y = datasets[2] | |
273 ## training_epochs = 10 # a determiner | |
274 | |
275 batch_size = b_size # size of the minibatch | |
276 | |
277 # compute number of minibatches for training, validation and testing | |
278 #n_train_batches = train_set_x.value.shape[0] / batch_size | |
279 | |
280 # allocate symbolic variables for the data | |
281 index = T.lscalar() # index to a [mini]batch | |
282 x = T.matrix('x') # the data is presented as rasterized images | |
283 | |
284 rng = numpy.random.RandomState(123) | |
285 theano_rng = RandomStreams( rng.randint(2**30)) | |
286 | |
287 | |
288 # construct the RBM class | |
289 rbm = RBM( input = x, n_visible=32*32, \ | |
290 n_hidden = nhidden, numpy_rng = rng, theano_rng = theano_rng) | |
291 | |
292 | |
293 # initialize storage fot the persistent chain (state = hidden layer of chain) | |
294 if persistance == 1: | |
295 persistent_chain = theano.shared(numpy.zeros((batch_size, 500))) | |
296 # get the cost and the gradient corresponding to one step of CD | |
297 cost, updates = rbm.cd(lr=learning_rate, persistent=persistent_chain, k= kk) | |
298 | |
299 else: | |
300 # get the cost and the gradient corresponding to one step of CD | |
301 #persistance_chain = None | |
302 cost, updates = rbm.cd(lr=learning_rate, persistent=None, k= kk) | |
303 | |
304 ################################# | |
305 # Training the RBM # | |
306 ################################# | |
307 dirname = 'data=%i'%dataset + ' persistance=%i'%persistance + ' n_hidden=%i'%n_hidden + 'batch_size=i%'%b_size | |
308 os.makedirs(dirname) | |
309 os.chdir(dirname) | |
310 | |
311 # it is ok for a theano function to have no output | |
312 # the purpose of train_rbm is solely to update the RBM parameters | |
313 train_rbm = theano.function([x], cost, | |
314 updates = updates, | |
315 ) | |
316 | |
317 plotting_time = 0.0 | |
318 start_time = time.clock() | |
319 bufsize = 1000 | |
320 | |
321 # go through training epochs | |
322 costs = [] | |
323 for epoch in xrange(training_epochs): | |
324 | |
325 # go through the training set | |
326 mean_cost = [] | |
327 for mini_x, mini_y in datasets.train(b_size): | |
328 mean_cost += [train_rbm(mini_x)] | |
329 ## learning_rate = learning_rate - 0.0001 | |
330 ## learning_rate = learning_rate/(tau+( epoch*batch_index*batch_size)) | |
331 | |
332 #learning_rate = learning_rate/10 | |
333 | |
334 costs.append(numpy.mean(mean_cost)) | |
335 | |
336 # Plot filters after each training epoch | |
337 plotting_start = time.clock() | |
338 # Construct image from the weight matrix | |
339 image = PIL.Image.fromarray(tile_raster_images( X = rbm.W.value.T, | |
340 img_shape = (32,32),tile_shape = (10,10), | |
341 tile_spacing=(1,1))) | |
342 image.save('filters_at_epoch_%i.png'%epoch) | |
343 plotting_stop = time.clock() | |
344 plotting_time += (plotting_stop - plotting_start) | |
345 | |
346 end_time = time.clock() | |
347 | |
348 pretraining_time = (end_time - start_time) - plotting_time | |
349 | |
350 | |
351 | |
352 | |
353 | |
354 | |
355 ################################# | |
356 # Sampling from the RBM # | |
357 ################################# | |
358 | |
359 # find out the number of test samples | |
360 number_of_test_samples = 1000 | |
361 | |
362 test_set_x, test_y = datasets.test(100*b_size) | |
363 # pick random test examples, with which to initialize the persistent chain | |
364 test_idx = rng.randint(number_of_test_samples - b_size) | |
365 persistent_vis_chain = theano.shared(test_set_x.value[test_idx:test_idx+b_size]) | |
366 | |
367 # define one step of Gibbs sampling (mf = mean-field) | |
368 [hid_mf, hid_sample, vis_mf, vis_sample] = rbm.gibbs_vhv(persistent_vis_chain) | |
369 | |
370 # the sample at the end of the channel is returned by ``gibbs_1`` as | |
371 # its second output; note that this is computed as a binomial draw, | |
372 # therefore it is formed of ints (0 and 1) and therefore needs to | |
373 # be converted to the same dtype as ``persistent_vis_chain`` | |
374 vis_sample = T.cast(vis_sample, dtype=theano.config.floatX) | |
375 | |
376 # construct the function that implements our persistent chain | |
377 # we generate the "mean field" activations for plotting and the actual samples for | |
378 # reinitializing the state of our persistent chain | |
379 sample_fn = theano.function([], [vis_mf, vis_sample], | |
380 updates = { persistent_vis_chain:vis_sample}) | |
381 | |
382 # sample the RBM, plotting every `plot_every`-th sample; do this | |
383 # until you plot at least `n_samples` | |
384 n_samples = 10 | |
385 # run minibatch size chains for gibbs samples (number of negative particles) | |
386 plot_every = b_size | |
387 | |
388 for idx in xrange(n_samples): | |
389 | |
390 # do `plot_every` intermediate samplings of which we do not care | |
391 for jdx in xrange(plot_every): | |
392 vis_mf, vis_sample = sample_fn() | |
393 | |
394 # construct image | |
395 image = PIL.Image.fromarray(tile_raster_images( | |
396 X = vis_mf, | |
397 img_shape = (32,32), | |
398 tile_shape = (10,10), | |
399 tile_spacing = (1,1) ) ) | |
400 #print ' ... plotting sample ', idx | |
401 image.save('sample_%i_step_%i.png'%(idx,idx*jdx)) | |
402 | |
403 #save the model | |
404 model = [rbm.W, rbm.vbias, rbm.hbias] | |
405 f = fopen('params.txt', 'w') | |
406 pickle.dump(model, f) | |
407 f.close() | |
408 #os.chdir('./..') | |
409 return numpy.mean(costs), pretraining_time/360 | |
410 | |
411 | |
412 def experiment(state, channel): | |
413 | |
414 (mean_cost, time_execution) = test_rbm(b_size = state.b_size,\ | |
415 nhidden = state.ndidden,\ | |
416 kk = state.kk,\ | |
417 persistance = state.persistance,\ | |
418 dataset = state.dataset) | |
419 | |
420 state.mean_costs = mean_costs | |
421 state.time_execution = time_execution | |
422 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) | |
423 return channel.COMPLETE | |
424 | |
425 if __name__ == '__main__': | |
426 | |
427 test_rbm() |