comparison deep/rbm/rbm.py @ 369:d81284e13d77

modified to run experiments with PNIST
author goldfinger
date Sat, 24 Apr 2010 11:32:26 -0400
parents 9685e9d94cc4
children 1e99dc965b5b
comparison
equal deleted inserted replaced
368:d391ad815d89 369:d81284e13d77
3 Boltzmann Machines (BMs) are a particular form of energy-based model which 3 Boltzmann Machines (BMs) are a particular form of energy-based model which
4 contain hidden variables. Restricted Boltzmann Machines further restrict BMs 4 contain hidden variables. Restricted Boltzmann Machines further restrict BMs
5 to those without visible-visible and hidden-hidden connections. 5 to those without visible-visible and hidden-hidden connections.
6 """ 6 """
7 7
8
9 import numpy, time, cPickle, gzip, PIL.Image 8 import numpy, time, cPickle, gzip, PIL.Image
10 9
11 import theano 10 import theano
12 import theano.tensor as T 11 import theano.tensor as T
13 import os 12 import os
13 import pdb
14 import numpy
15 import pylab
16 import time
17 import theano.tensor.nnet
18 import pylearn
19 import ift6266
20 import theano,pylearn.version,ift6266
21 from pylearn.io import filetensor as ft
22 from ift6266 import datasets
14 23
15 from theano.tensor.shared_randomstreams import RandomStreams 24 from theano.tensor.shared_randomstreams import RandomStreams
16 25
17 from utils import tile_raster_images 26 from utils import tile_raster_images
18 from logistic_sgd import load_data 27 from logistic_sgd import load_data
19 28
20
21 class RBM(object): 29 class RBM(object):
22 """Restricted Boltzmann Machine (RBM) """ 30 """Restricted Boltzmann Machine (RBM) """
23 def __init__(self, input=None, n_visible=784, n_hidden=1000, \ 31 def __init__(self, input=None, n_visible=32*32, n_hidden=500, \
24 W = None, hbias = None, vbias = None, numpy_rng = None, 32 W = None, hbias = None, vbias = None, numpy_rng = None,
25 theano_rng = None): 33 theano_rng = None):
26 """ 34 """
27 RBM constructor. Defines the parameters of the model along with 35 RBM constructor. Defines the parameters of the model along with
28 basic operations for inferring hidden from visible (and vice-versa), 36 basic operations for inferring hidden from visible (and vice-versa),
87 95
88 self.W = W 96 self.W = W
89 self.hbias = hbias 97 self.hbias = hbias
90 self.vbias = vbias 98 self.vbias = vbias
91 self.theano_rng = theano_rng 99 self.theano_rng = theano_rng
100
92 # **** WARNING: It is not a good idea to put things in this list 101 # **** WARNING: It is not a good idea to put things in this list
93 # other than shared variables created in this function. 102 # other than shared variables created in this function.
94 self.params = [self.W, self.hbias, self.vbias] 103 self.params = [self.W, self.hbias, self.vbias]
95 self.batch_size = self.input.shape[0] 104 self.batch_size = self.input.shape[0]
96 105
129 starting from the visible state''' 138 starting from the visible state'''
130 h1_mean, h1_sample = self.sample_h_given_v(v0_sample) 139 h1_mean, h1_sample = self.sample_h_given_v(v0_sample)
131 v1_mean, v1_sample = self.sample_v_given_h(h1_sample) 140 v1_mean, v1_sample = self.sample_v_given_h(h1_sample)
132 return [h1_mean, h1_sample, v1_mean, v1_sample] 141 return [h1_mean, h1_sample, v1_mean, v1_sample]
133 142
134 def cd(self, lr = 0.1, persistent=None): 143 def cd(self, lr = 0.1, persistent=None, k=1):
135 """ 144 """
136 This functions implements one step of CD-1 or PCD-1 145 This functions implements one step of CD-1 or PCD-1
137 146
138 :param lr: learning rate used to train the RBM 147 :param lr: learning rate used to train the RBM
139 :param persistent: None for CD. For PCD, shared variable containing old state 148 :param persistent: None for CD. For PCD, shared variable containing old state
154 if persistent is None: 163 if persistent is None:
155 chain_start = ph_sample 164 chain_start = ph_sample
156 else: 165 else:
157 chain_start = persistent 166 chain_start = persistent
158 167
159 # perform actual negative phase 168 # perform actual negative phase (the CD-1)
160 [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(chain_start) 169 [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(chain_start)
170
171 #perform CD-k
172 if k-1>0:
173 for i in range(k-1):
174 [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(nh_sample)
175
176
161 177
162 # determine gradients on RBM parameters 178 # determine gradients on RBM parameters
163 g_vbias = T.sum( self.input - nv_mean, axis = 0)/self.batch_size 179 g_vbias = T.sum( self.input - nv_mean, axis = 0)/self.batch_size
164 g_hbias = T.sum( ph_mean - nh_mean, axis = 0)/self.batch_size 180 g_hbias = T.sum( ph_mean - nh_mean, axis = 0)/self.batch_size
165 g_W = T.dot(ph_mean.T, self.input )/ self.batch_size - \ 181 g_W = T.dot(ph_mean.T, self.input )/ self.batch_size - \
222 238
223 return cross_entropy 239 return cross_entropy
224 240
225 241
226 242
243 def test_rbm(b_size = 25, nhidden = 1000, kk = 1, persistance = 0,
244 dataset= 0):
245 """
246 Demonstrate ***
247
248 This is demonstrated on MNIST.
249
250 :param learning_rate: learning rate used for training the RBM
251
252 :param training_epochs: number of epochs used for training
253
254 :param dataset: path the the pickled dataset
255
256 """
257
258 learning_rate=0.1
259
260 if data_set==0:
261 datasets=datasets.nist_all()
262 elif data_set==1:
263 datasets=datasets.nist_P07()
264 elif data_set==2:
265 datasets=datasets.PNIST07()
266
267
268 # revoir la recuperation des donnees
269 ## dataset = load_data(dataset)
270 ##
271 ## train_set_x, train_set_y = datasets[0]
272 ## test_set_x , test_set_y = datasets[2]
273 ## training_epochs = 10 # a determiner
274
275 batch_size = b_size # size of the minibatch
276
277 # compute number of minibatches for training, validation and testing
278 #n_train_batches = train_set_x.value.shape[0] / batch_size
279
280 # allocate symbolic variables for the data
281 index = T.lscalar() # index to a [mini]batch
282 x = T.matrix('x') # the data is presented as rasterized images
283
284 rng = numpy.random.RandomState(123)
285 theano_rng = RandomStreams( rng.randint(2**30))
286
287
288 # construct the RBM class
289 rbm = RBM( input = x, n_visible=32*32, \
290 n_hidden = nhidden, numpy_rng = rng, theano_rng = theano_rng)
291
292
293 # initialize storage fot the persistent chain (state = hidden layer of chain)
294 if persistance == 1:
295 persistent_chain = theano.shared(numpy.zeros((batch_size, 500)))
296 # get the cost and the gradient corresponding to one step of CD
297 cost, updates = rbm.cd(lr=learning_rate, persistent=persistent_chain, k= kk)
298
299 else:
300 # get the cost and the gradient corresponding to one step of CD
301 #persistance_chain = None
302 cost, updates = rbm.cd(lr=learning_rate, persistent=None, k= kk)
303
304 #################################
305 # Training the RBM #
306 #################################
307 dirname = 'data=%i'%dataset + ' persistance=%i'%persistance + ' n_hidden=%i'%n_hidden + 'batch_size=i%'%b_size
308 os.makedirs(dirname)
309 os.chdir(dirname)
310
311 # it is ok for a theano function to have no output
312 # the purpose of train_rbm is solely to update the RBM parameters
313 train_rbm = theano.function([x], cost,
314 updates = updates,
315 )
316
317 plotting_time = 0.0
318 start_time = time.clock()
319 bufsize = 1000
320
321 # go through training epochs
322 costs = []
323 for epoch in xrange(training_epochs):
324
325 # go through the training set
326 mean_cost = []
327 for mini_x, mini_y in datasets.train(b_size):
328 mean_cost += [train_rbm(mini_x)]
329 ## learning_rate = learning_rate - 0.0001
330 ## learning_rate = learning_rate/(tau+( epoch*batch_index*batch_size))
331
332 #learning_rate = learning_rate/10
333
334 costs.append(numpy.mean(mean_cost))
335
336 # Plot filters after each training epoch
337 plotting_start = time.clock()
338 # Construct image from the weight matrix
339 image = PIL.Image.fromarray(tile_raster_images( X = rbm.W.value.T,
340 img_shape = (32,32),tile_shape = (10,10),
341 tile_spacing=(1,1)))
342 image.save('filters_at_epoch_%i.png'%epoch)
343 plotting_stop = time.clock()
344 plotting_time += (plotting_stop - plotting_start)
345
346 end_time = time.clock()
347
348 pretraining_time = (end_time - start_time) - plotting_time
349
350
351
352
353
354
355 #################################
356 # Sampling from the RBM #
357 #################################
358
359 # find out the number of test samples
360 number_of_test_samples = 1000
361
362 test_set_x, test_y = datasets.test(100*b_size)
363 # pick random test examples, with which to initialize the persistent chain
364 test_idx = rng.randint(number_of_test_samples - b_size)
365 persistent_vis_chain = theano.shared(test_set_x.value[test_idx:test_idx+b_size])
366
367 # define one step of Gibbs sampling (mf = mean-field)
368 [hid_mf, hid_sample, vis_mf, vis_sample] = rbm.gibbs_vhv(persistent_vis_chain)
369
370 # the sample at the end of the channel is returned by ``gibbs_1`` as
371 # its second output; note that this is computed as a binomial draw,
372 # therefore it is formed of ints (0 and 1) and therefore needs to
373 # be converted to the same dtype as ``persistent_vis_chain``
374 vis_sample = T.cast(vis_sample, dtype=theano.config.floatX)
375
376 # construct the function that implements our persistent chain
377 # we generate the "mean field" activations for plotting and the actual samples for
378 # reinitializing the state of our persistent chain
379 sample_fn = theano.function([], [vis_mf, vis_sample],
380 updates = { persistent_vis_chain:vis_sample})
381
382 # sample the RBM, plotting every `plot_every`-th sample; do this
383 # until you plot at least `n_samples`
384 n_samples = 10
385 # run minibatch size chains for gibbs samples (number of negative particles)
386 plot_every = b_size
387
388 for idx in xrange(n_samples):
389
390 # do `plot_every` intermediate samplings of which we do not care
391 for jdx in xrange(plot_every):
392 vis_mf, vis_sample = sample_fn()
393
394 # construct image
395 image = PIL.Image.fromarray(tile_raster_images(
396 X = vis_mf,
397 img_shape = (32,32),
398 tile_shape = (10,10),
399 tile_spacing = (1,1) ) )
400 #print ' ... plotting sample ', idx
401 image.save('sample_%i_step_%i.png'%(idx,idx*jdx))
402
403 #save the model
404 model = [rbm.W, rbm.vbias, rbm.hbias]
405 f = fopen('params.txt', 'w')
406 pickle.dump(model, f)
407 f.close()
408 #os.chdir('./..')
409 return numpy.mean(costs), pretraining_time/360
410
411
412 def experiment(state, channel):
413
414 (mean_cost, time_execution) = test_rbm(b_size = state.b_size,\
415 nhidden = state.ndidden,\
416 kk = state.kk,\
417 persistance = state.persistance,\
418 dataset = state.dataset)
419
420 state.mean_costs = mean_costs
421 state.time_execution = time_execution
422 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
423 return channel.COMPLETE
424
425 if __name__ == '__main__':
426
427 test_rbm()