ift6266: deep/rbm/rbm.py comparison

comparison deep/rbm/rbm.py @ 369:d81284e13d77

modified to run experiments with PNIST

author	goldfinger
date	Sat, 24 Apr 2010 11:32:26 -0400
parents	9685e9d94cc4
children	1e99dc965b5b

comparison

equal deleted inserted replaced

-:d391ad815d89
+:d81284e13d77
 Boltzmann Machines (BMs) are a particular form of energy-based model which
 contain hidden variables. Restricted Boltzmann Machines further restrict BMs
 to those without visible-visible and hidden-hidden connections.
 """
 import numpy, time, cPickle, gzip, PIL.Image
 import theano
 import theano.tensor as T
 import os
+import pdb
+import numpy
+import pylab
+import time
+import theano.tensor.nnet
+import pylearn
+import ift6266
+import theano,pylearn.version,ift6266
+from pylearn.io import filetensor as ft
+from ift6266 import datasets
 from theano.tensor.shared_randomstreams import RandomStreams
 from utils import tile_raster_images
 from logistic_sgd import load_data
 class RBM(object):
 """Restricted Boltzmann Machine (RBM)  """
-def __init__(self, input=None, n_visible=784, n_hidden=1000, \
+def __init__(self, input=None, n_visible=32*32, n_hidden=500, \
 W = None, hbias = None, vbias = None, numpy_rng = None,
 theano_rng = None):
 """
 RBM constructor. Defines the parameters of the model along with
 basic operations for inferring hidden from visible (and vice-versa),
 self.W          = W
 self.hbias      = hbias
 self.vbias      = vbias
 self.theano_rng = theano_rng
 # **** WARNING: It is not a good idea to put things in this list
 # other than shared variables created in this function.
 self.params     = [self.W, self.hbias, self.vbias]
 self.batch_size = self.input.shape[0]
 starting from the visible state'''
 h1_mean, h1_sample = self.sample_h_given_v(v0_sample)
 v1_mean, v1_sample = self.sample_v_given_h(h1_sample)
 return [h1_mean, h1_sample, v1_mean, v1_sample]
-def cd(self, lr = 0.1, persistent=None):
+def cd(self, lr = 0.1, persistent=None, k=1):
 """
 This functions implements one step of CD-1 or PCD-1
 :param lr: learning rate used to train the RBM
 :param persistent: None for CD. For PCD, shared variable containing old state
 if persistent is None:
 chain_start = ph_sample
 else:
 chain_start = persistent
-# perform actual negative phase
+# perform actual negative phase (the CD-1)
 [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(chain_start)
+#perform CD-k
+if k-1>0:
+for i in range(k-1):
+[nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(nh_sample)
 # determine gradients on RBM parameters
 g_vbias = T.sum( self.input - nv_mean, axis = 0)/self.batch_size
 g_hbias = T.sum( ph_mean    - nh_mean, axis = 0)/self.batch_size
 g_W = T.dot(ph_mean.T, self.input   )/ self.batch_size - \
 return cross_entropy
+def test_rbm(b_size = 25, nhidden = 1000, kk = 1, persistance = 0,
+dataset= 0):
+"""
+Demonstrate ***
+This is demonstrated on MNIST.
+:param learning_rate: learning rate used for training the RBM
+:param training_epochs: number of epochs used for training
+:param dataset: path the the pickled dataset
+"""
+learning_rate=0.1
+if data_set==0:
+	datasets=datasets.nist_all()
+elif data_set==1:
+datasets=datasets.nist_P07()
+elif data_set==2:
+datasets=datasets.PNIST07()
+# revoir la recuperation des donnees
+##    dataset = load_data(dataset)
+##
+##    train_set_x, train_set_y = datasets[0]
+##    test_set_x , test_set_y  = datasets[2]
+##    training_epochs = 10 # a determiner
+batch_size = b_size    # size of the minibatch
+# compute number of minibatches for training, validation and testing
+#n_train_batches = train_set_x.value.shape[0] / batch_size
+# allocate symbolic variables for the data
+index = T.lscalar()    # index to a [mini]batch
+x     = T.matrix('x')  # the data is presented as rasterized images
+rng        = numpy.random.RandomState(123)
+theano_rng = RandomStreams( rng.randint(2**30))
+# construct the RBM class
+rbm = RBM( input = x, n_visible=32*32, \
+n_hidden = nhidden, numpy_rng = rng, theano_rng = theano_rng)
+# initialize storage fot the persistent chain (state = hidden layer of chain)
+if persistance == 1:
+persistent_chain = theano.shared(numpy.zeros((batch_size, 500)))
+# get the cost and the gradient corresponding to one step of CD
+cost, updates = rbm.cd(lr=learning_rate, persistent=persistent_chain, k= kk)
+else:
+# get the cost and the gradient corresponding to one step of CD
+#persistance_chain = None
+cost, updates = rbm.cd(lr=learning_rate, persistent=None, k= kk)
+#################################
+#     Training the RBM          #
+#################################
+dirname = 'data=%i'%dataset + ' persistance=%i'%persistance + ' n_hidden=%i'%n_hidden + 'batch_size=i%'%b_size
+os.makedirs(dirname)
+os.chdir(dirname)
+# it is ok for a theano function to have no output
+# the purpose of train_rbm is solely to update the RBM parameters
+train_rbm = theano.function([x], cost,
+updates = updates,
+)
+plotting_time = 0.0
+start_time = time.clock()
+bufsize = 1000
+# go through training epochs
+costs = []
+for epoch in xrange(training_epochs):
+# go through the training set
+mean_cost = []
+for mini_x, mini_y in datasets.train(b_size):
+mean_cost += [train_rbm(mini_x)]
+##           learning_rate = learning_rate - 0.0001
+##           learning_rate = learning_rate/(tau+( epoch*batch_index*batch_size))
+#learning_rate = learning_rate/10
+costs.append(numpy.mean(mean_cost))
+# Plot filters after each training epoch
+plotting_start = time.clock()
+# Construct image from the weight matrix
+image = PIL.Image.fromarray(tile_raster_images( X = rbm.W.value.T,
+img_shape = (32,32),tile_shape = (10,10),
+tile_spacing=(1,1)))
+image.save('filters_at_epoch_%i.png'%epoch)
+plotting_stop = time.clock()
+plotting_time += (plotting_stop - plotting_start)
+end_time = time.clock()
+pretraining_time = (end_time - start_time) - plotting_time
+#################################
+#     Sampling from the RBM     #
+#################################
+# find out the number of test samples
+number_of_test_samples = 1000
+test_set_x, test_y  = datasets.test(100*b_size)
+# pick random test examples, with which to initialize the persistent chain
+test_idx = rng.randint(number_of_test_samples - b_size)
+persistent_vis_chain = theano.shared(test_set_x.value[test_idx:test_idx+b_size])
+# define one step of Gibbs sampling (mf = mean-field)
+[hid_mf, hid_sample, vis_mf, vis_sample] =  rbm.gibbs_vhv(persistent_vis_chain)
+# the sample at the end of the channel is returned by ``gibbs_1`` as
+# its second output; note that this is computed as a binomial draw,
+# therefore it is formed of ints (0 and 1) and therefore needs to
+# be converted to the same dtype as ``persistent_vis_chain``
+vis_sample = T.cast(vis_sample, dtype=theano.config.floatX)
+# construct the function that implements our persistent chain
+# we generate the "mean field" activations for plotting and the actual samples for
+# reinitializing the state of our persistent chain
+sample_fn = theano.function([], [vis_mf, vis_sample],
+updates = { persistent_vis_chain:vis_sample})
+# sample the RBM, plotting every `plot_every`-th sample; do this
+# until you plot at least `n_samples`
+n_samples = 10
+# run minibatch size chains for gibbs samples (number of negative particles)
+plot_every = b_size
+for idx in xrange(n_samples):
+# do `plot_every` intermediate samplings of which we do not care
+for jdx in  xrange(plot_every):
+vis_mf, vis_sample = sample_fn()
+# construct image
+image = PIL.Image.fromarray(tile_raster_images(
+X          = vis_mf,
+img_shape  = (32,32),
+tile_shape = (10,10),
+tile_spacing = (1,1) ) )
+#print ' ... plotting sample ', idx
+image.save('sample_%i_step_%i.png'%(idx,idx*jdx))
+#save the model
+model = [rbm.W, rbm.vbias, rbm.hbias]
+f = fopen('params.txt', 'w')
+pickle.dump(model, f)
+f.close()
+#os.chdir('./..')
+return numpy.mean(costs), pretraining_time/360
+def experiment(state, channel):
+(mean_cost, time_execution) = test_rbm(b_size = state.b_size,\
+nhidden = state.ndidden,\
+kk = state.kk,\
+persistance = state.persistance,\
+dataset = state.dataset)
+state.mean_costs = mean_costs
+state.time_execution = time_execution
+pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+return channel.COMPLETE
+if __name__ == '__main__':
+test_rbm()

Mercurial > ift6266

comparison deep/rbm/rbm.py @ 369:d81284e13d77