# HG changeset patch # User Arnaud Bergeron # Date 1267753401 18000 # Node ID 43af74a348ac0ad63e4901a20dfbe64a50205792 # Parent e12702b88a2d6fb360297b0e368a6048f86e451a# Parent 10a801240bfcefbd1375101016ffd113b3929476 Merge branches from main repo. diff -r e12702b88a2d -r 43af74a348ac baseline/__init__.py diff -r e12702b88a2d -r 43af74a348ac baseline/conv_mlp/__init__.py diff -r e12702b88a2d -r 43af74a348ac baseline/conv_mlp/convolutional_mlp.py --- a/baseline/conv_mlp/convolutional_mlp.py Thu Mar 04 09:43:23 2010 -0500 +++ b/baseline/conv_mlp/convolutional_mlp.py Thu Mar 04 20:43:21 2010 -0500 @@ -26,7 +26,8 @@ import theano.sandbox.softsign import pylearn.datasets.MNIST from pylearn.io import filetensor as ft -from theano.sandbox import conv, downsample +from theano.tensor.signal import downsample +from theano.tensor.nnet import conv class LeNetConvPoolLayer(object): diff -r e12702b88a2d -r 43af74a348ac baseline/deep_mlp/__init__.py diff -r e12702b88a2d -r 43af74a348ac baseline/log_reg/__init__.py diff -r e12702b88a2d -r 43af74a348ac baseline/log_reg/log_reg.py --- a/baseline/log_reg/log_reg.py Thu Mar 04 09:43:23 2010 -0500 +++ b/baseline/log_reg/log_reg.py Thu Mar 04 20:43:21 2010 -0500 @@ -35,11 +35,11 @@ """ __docformat__ = 'restructedtext en' -import numpy, time, cPickle, gzip +import numpy, time import theano import theano.tensor as T - +from ift6266 import datasets class LogisticRegression(object): """Multi-class Logistic Regression Class @@ -112,6 +112,8 @@ # i.e., the mean log-likelihood across the minibatch. return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] ) + def MSE(self, y): + return -T.mean(abs((self.p_t_given_x)[T.arange(y.shape[0]), y]-y)**2) def errors( self, y ): """Return a float representing the number of errors in the minibatch @@ -135,107 +137,12 @@ else: raise NotImplementedError() -def shared_dataset( data_xy ): - """ Function that loads the dataset into shared variables - - The reason we store our dataset in shared variables is to allow - Theano to copy it into the GPU memory (when code is run on GPU). - Since copying data into the GPU is slow, copying a minibatch everytime - is needed (the default behaviour if the data is not in a shared - variable) would lead to a large decrease in performance. - """ - data_x, data_y = data_xy - shared_x = theano.shared( numpy.asarray( data_x, dtype = theano.config.floatX ) ) - shared_y = theano.shared( numpy.asarray( data_y, dtype = theano.config.floatX ) ) - # When storing data on the GPU it has to be stored as floats - # therefore we will store the labels as ``floatX`` as well - # (``shared_y`` does exactly that). But during our computations - # we need them as ints (we use labels as index, and if they are - # floats it doesn't make sense) therefore instead of returning - # ``shared_y`` we will have to cast it to int. This little hack - # lets ous get around this issue - return shared_x, T.cast( shared_y, 'int32' ) - -def load_data_pkl_gz( dataset ): - ''' Loads the dataset - - :type dataset: string - :param dataset: the path to the dataset (here MNIST) - ''' - - #-------------------------------------------------------------------------------------------------------------------- - # Load Data - #-------------------------------------------------------------------------------------------------------------------- - - - print '... loading data' - - # Load the dataset - f = gzip.open(dataset,'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() - - test_set_x, test_set_y = shared_dataset( test_set ) - valid_set_x, valid_set_y = shared_dataset( valid_set ) - train_set_x, train_set_y = shared_dataset( train_set ) - - rval = [ ( train_set_x, train_set_y ), ( valid_set_x,valid_set_y ), ( test_set_x, test_set_y ) ] - return rval - -##def load_data_ft( verbose = False,\ -## data_path = '/data/lisa/data/nist/by_class/'\ -## train_data = 'all/all_train_data.ft',\ -## train_labels = 'all/all_train_labels.ft',\ -## test_data = 'all/all_test_data.ft',\ -## test_labels = 'all/all_test_labels.ft'): -## -## train_data_file = open(data_path + train_data) -## train_labels_file = open(data_path + train_labels) -## test_labels_file = open(data_path + test_data) -## test_data_file = open(data_path + test_labels) -## -## raw_train_data = ft.read( train_data_file) -## raw_train_labels = ft.read(train_labels_file) -## raw_test_data = ft.read( test_labels_file) -## raw_test_labels = ft.read( test_data_file) -## -## f.close() -## g.close() -## i.close() -## h.close() -## -## -## test_set_x, test_set_y = shared_dataset(test_set) -## valid_set_x, valid_set_y = shared_dataset(valid_set) -## train_set_x, train_set_y = shared_dataset(train_set) -## -## rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] -## return rval -## #create a validation set the same size as the test size -## #use the end of the training array for this purpose -## #discard the last remaining so we get a %batch_size number -## test_size=len(raw_test_labels) -## test_size = int(test_size/batch_size) -## test_size*=batch_size -## train_size = len(raw_train_data) -## train_size = int(train_size/batch_size) -## train_size*=batch_size -## validation_size =test_size -## offset = train_size-test_size -## if verbose == True: -## print 'train size = %d' %train_size -## print 'test size = %d' %test_size -## print 'valid size = %d' %validation_size -## print 'offset = %d' %offset -## -## - #-------------------------------------------------------------------------------------------------------------------- # MAIN #-------------------------------------------------------------------------------------------------------------------- def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \ - dataset_name = 'mnist.pkl.gz', image_size = 28 * 28, nb_class = 10, \ + dataset=datasets.nist_digits, image_size = 32 * 32, nb_class = 10, \ patience = 5000, patience_increase = 2, improvement_threshold = 0.995): """ @@ -254,9 +161,8 @@ :type batch_size: int :param batch_size: size of the minibatch - :type dataset_name: string - :param dataset: the path of the MNIST dataset file from - http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz + :type dataset: dataset + :param dataset: a dataset instance from ift6266.datasets :type image_size: int :param image_size: size of the input image in pixels (width * height) @@ -275,17 +181,6 @@ """ - datasets = load_data_pkl_gz( dataset_name ) - - train_set_x, train_set_y = datasets[0] - valid_set_x, valid_set_y = datasets[1] - test_set_x , test_set_y = datasets[2] - - # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.value.shape[0] / batch_size - n_valid_batches = valid_set_x.value.shape[0] / batch_size - n_test_batches = test_set_x.value.shape[0] / batch_size - #-------------------------------------------------------------------------------------------------------------------- # Build actual model #-------------------------------------------------------------------------------------------------------------------- @@ -308,17 +203,11 @@ # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch - test_model = theano.function( inputs = [ index ], - outputs = classifier.errors( y ), - givens = { - x:test_set_x[ index * batch_size: ( index + 1 ) * batch_size ], - y:test_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) + test_model = theano.function( inputs = [ x, y ], + outputs = classifier.errors( y )) - validate_model = theano.function( inputs = [ index ], - outputs = classifier.errors( y ), - givens = { - x:valid_set_x[ index * batch_size: ( index + 1 ) * batch_size ], - y:valid_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) + validate_model = theano.function( inputs = [ x, y ], + outputs = classifier.errors( y )) # compute the gradient of cost with respect to theta = ( W, b ) g_W = T.grad( cost = cost, wrt = classifier.W ) @@ -331,12 +220,9 @@ # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` - train_model = theano.function( inputs = [ index ], + train_model = theano.function( inputs = [ x, y ], outputs = cost, - updates = updates, - givens = { - x: train_set_x[ index * batch_size: ( index + 1 ) * batch_size ], - y: train_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) + updates = updates) #-------------------------------------------------------------------------------------------------------------------- # Train model @@ -349,38 +235,38 @@ # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min( n_train_batches, patience * 0.5 ) + validation_frequency = patience * 0.5 # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch - best_params = None + best_params = None best_validation_loss = float('inf') - test_score = 0. - start_time = time.clock() + test_score = 0. + start_time = time.clock() done_looping = False - n_epochs = nb_max_examples / train_set_x.value.shape[0] - epoch = 0 + n_iters = nb_max_examples / batch_size + epoch = 0 + iter = 0 - while ( epoch < n_epochs ) and ( not done_looping ): + while ( iter < n_iters ) and ( not done_looping ): epoch = epoch + 1 - for minibatch_index in xrange( n_train_batches ): + for x, y in dataset.train(batch_size): - minibatch_avg_cost = train_model( minibatch_index ) + minibatch_avg_cost = train_model( x, y ) # iteration number - iter = epoch * n_train_batches + minibatch_index + iter += 1 - if ( iter + 1 ) % validation_frequency == 0: + if iter % validation_frequency == 0: # compute zero-one loss on validation set - validation_losses = [ validate_model( i ) for i in xrange( n_valid_batches ) ] + validation_losses = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ] this_validation_loss = numpy.mean( validation_losses ) - print('epoch %i, minibatch %i/%i, validation error %f %%' % \ - ( epoch, minibatch_index + 1,n_train_batches, \ - this_validation_loss*100. ) ) + print('epoch %i, iter %i, validation error %f %%' % \ + ( epoch, iter, this_validation_loss*100. ) ) # if we got the best validation score until now @@ -393,12 +279,12 @@ best_validation_loss = this_validation_loss # test it on the test set - test_losses = [test_model(i) for i in xrange(n_test_batches)] + test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] test_score = numpy.mean(test_losses) - print((' epoch %i, minibatch %i/%i, test error of best ' + print((' epoch %i, iter %i, test error of best ' 'model %f %%') % \ - (epoch, minibatch_index+1, n_train_batches,test_score*100.)) + (epoch, iter, test_score*100.)) if patience <= iter : done_looping = True diff -r e12702b88a2d -r 43af74a348ac baseline/mlp/__init__.py diff -r e12702b88a2d -r 43af74a348ac deep/autoencoder/__init__.py diff -r e12702b88a2d -r 43af74a348ac deep/convolutional_dae/__init__.py diff -r e12702b88a2d -r 43af74a348ac deep/convolutional_dae/stacked_convolutional_dae.py --- a/deep/convolutional_dae/stacked_convolutional_dae.py Thu Mar 04 09:43:23 2010 -0500 +++ b/deep/convolutional_dae/stacked_convolutional_dae.py Thu Mar 04 20:43:21 2010 -0500 @@ -7,44 +7,10 @@ from theano.tensor.signal import downsample from theano.tensor.nnet import conv -import gzip -import cPickle - - -class LogisticRegression(object): - - def __init__(self, input, n_in, n_out): - - self.W = theano.shared( value=numpy.zeros((n_in,n_out), - dtype = theano.config.floatX) ) - - self.b = theano.shared( value=numpy.zeros((n_out,), - dtype = theano.config.floatX) ) - - self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) - - self.y_pred=T.argmax(self.p_y_given_x, axis=1) - - self.params = [self.W, self.b] - - def negative_log_likelihood(self, y): - return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) - - def MSE(self, y): - return -T.mean(abs((self.p_y_given_x)[T.arange(y.shape[0]),y]-y)**2) +from ift6266 import datasets - def errors(self, y): - if y.ndim != self.y_pred.ndim: - raise TypeError('y should have the same shape as self.y_pred', - ('y', target.type, 'y_pred', self.y_pred.type)) - - - if y.dtype.startswith('int'): - return T.mean(T.neq(self.y_pred, y)) - else: - raise NotImplementedError() - +from ift6266.baseline.log_reg.log_reg import LogisticRegression class SigmoidalLayer(object): def __init__(self, rng, input, n_in, n_out): @@ -65,8 +31,9 @@ class dA_conv(object): - def __init__(self, corruption_level = 0.1, input = None, shared_W = None,\ - shared_b = None, filter_shape = None, image_shape = None, poolsize = (2,2)): + def __init__(self, input, filter_shape, corruption_level = 0.1, + shared_W = None, shared_b = None, image_shape = None, + poolsize = (2,2)): theano_rng = RandomStreams() @@ -80,13 +47,11 @@ self.W = shared_W self.b = shared_b else: - initial_W = numpy.asarray( numpy.random.uniform( \ - low = -numpy.sqrt(6./(fan_in+fan_out)), \ - high = numpy.sqrt(6./(fan_in+fan_out)), \ + initial_W = numpy.asarray( numpy.random.uniform( + low = -numpy.sqrt(6./(fan_in+fan_out)), + high = numpy.sqrt(6./(fan_in+fan_out)), size = filter_shape), dtype = theano.config.floatX) - initial_b = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX) - - + initial_b = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.W = theano.shared(value = initial_W, name = "W") self.b = theano.shared(value = initial_b, name = "b") @@ -101,9 +66,8 @@ self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x - conv1_out = conv.conv2d(self.tilde_x, self.W, \ - filter_shape=filter_shape, \ - image_shape=image_shape, border_mode='valid') + conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, + image_shape=image_shape, border_mode='valid') self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) @@ -111,19 +75,15 @@ da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ filter_shape[3] ] - da_image_shape = [ image_shape[0],filter_shape[0],image_shape[2]-filter_shape[2]+1, \ - image_shape[3]-filter_shape[3]+1 ] initial_W_prime = numpy.asarray( numpy.random.uniform( \ low = -numpy.sqrt(6./(fan_in+fan_out)), \ high = numpy.sqrt(6./(fan_in+fan_out)), \ size = da_filter_shape), dtype = theano.config.floatX) self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") - #import pdb;pdb.set_trace() - - conv2_out = conv.conv2d(self.y, self.W_prime, \ - filter_shape = da_filter_shape, image_shape = da_image_shape ,\ - border_mode='full') + conv2_out = conv.conv2d(self.y, self.W_prime, + filter_shape = da_filter_shape, + border_mode='full') self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale @@ -134,19 +94,16 @@ self.cost = T.mean(self.L) self.params = [ self.W, self.b, self.b_prime ] - - class LeNetConvPoolLayer(object): - def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)): - assert image_shape[1]==filter_shape[1] + def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): self.input = input W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX) - self.W = theano.shared(value = W_values) + self.W = theano.shared(value=W_values) - b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX) - self.b = theano.shared(value= b_values) + b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) + self.b = theano.shared(value=b_values) conv_out = conv.conv2d(input, self.W, filter_shape=filter_shape, image_shape=image_shape) @@ -168,67 +125,60 @@ class SdA(): - def __init__(self, input, n_ins_conv, n_ins_mlp, train_set_x, train_set_y, batch_size, \ - conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, \ - rng, n_out, pretrain_lr, finetune_lr): - + def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, + mlp_hidden_layers_sizes, corruption_levels, rng, n_out, + pretrain_lr, finetune_lr): + self.layers = [] self.pretrain_functions = [] self.params = [] self.conv_n_layers = len(conv_hidden_layers_sizes) self.mlp_n_layers = len(mlp_hidden_layers_sizes) - - index = T.lscalar() # index to a [mini]batch + self.x = T.dmatrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of - - for i in xrange( self.conv_n_layers ): - filter_shape=conv_hidden_layers_sizes[i][0] image_shape=conv_hidden_layers_sizes[i][1] max_poolsize=conv_hidden_layers_sizes[i][2] if i == 0 : - layer_input=self.x.reshape((batch_size,1,28,28)) + layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) else: layer_input=self.layers[-1].output - - layer = LeNetConvPoolLayer(rng, input=layer_input, \ - image_shape=image_shape, \ - filter_shape=filter_shape,poolsize=max_poolsize) - print 'Convolutional layer '+str(i+1)+' created' - + + layer = LeNetConvPoolLayer(rng, input=layer_input, + image_shape=image_shape, + filter_shape=filter_shape, + poolsize=max_poolsize) + print 'Convolutional layer', str(i+1), 'created' + self.layers += [layer] self.params += layer.params - - da_layer = dA_conv(corruption_level = corruption_levels[0],\ - input = layer_input, \ - shared_W = layer.W, shared_b = layer.b,\ - filter_shape = filter_shape , image_shape = image_shape ) - - + + da_layer = dA_conv(corruption_level = corruption_levels[0], + input = layer_input, + shared_W = layer.W, shared_b = layer.b, + filter_shape = filter_shape, + image_shape = image_shape ) + gparams = T.grad(da_layer.cost, da_layer.params) - + updates = {} for param, gparam in zip(da_layer.params, gparams): - updates[param] = param - gparam * pretrain_lr - - - update_fn = theano.function([index], da_layer.cost, \ - updates = updates, - givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size]} ) - + updates[param] = param - gparam * pretrain_lr + + update_fn = theano.function([self.x], da_layer.cost, updates = updates) + self.pretrain_functions += [update_fn] - + for i in xrange( self.mlp_n_layers ): if i == 0 : input_size = n_ins_mlp else: input_size = mlp_hidden_layers_sizes[i-1] - + if i == 0 : if len( self.layers ) == 0 : layer_input=self.x @@ -236,72 +186,43 @@ layer_input = self.layers[-1].output.flatten(2) else: layer_input = self.layers[-1].output - + layer = SigmoidalLayer(rng, layer_input, input_size, mlp_hidden_layers_sizes[i] ) - + self.layers += [layer] self.params += layer.params - - print 'MLP layer '+str(i+1)+' created' + print 'MLP layer', str(i+1), 'created' self.logLayer = LogisticRegression(input=self.layers[-1].output, \ n_in=mlp_hidden_layers_sizes[-1], n_out=n_out) self.params += self.logLayer.params - + cost = self.logLayer.negative_log_likelihood(self.y) + + gparams = T.grad(cost, self.params) - gparams = T.grad(cost, self.params) updates = {} - for param,gparam in zip(self.params, gparams): updates[param] = param - gparam*finetune_lr - - self.finetune = theano.function([index], cost, - updates = updates, - givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size], - self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) - + + self.finetune = theano.function([self.x, self.y], cost, updates = updates) + + self.errors = self.logLayer.errors(self.y) - self.errors = self.logLayer.errors(self.y) - - - def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \ pretrain_lr = 0.01, training_epochs = 1000, \ - dataset='mnist.pkl.gz'): - - f = gzip.open(dataset,'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() - - - def shared_dataset(data_xy): - data_x, data_y = data_xy - shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) - shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) - return shared_x, T.cast(shared_y, 'int32') - - - test_set_x, test_set_y = shared_dataset(test_set) - valid_set_x, valid_set_y = shared_dataset(valid_set) - train_set_x, train_set_y = shared_dataset(train_set) - + dataset=datasets.nist_digits): + batch_size = 500 # size of the minibatch - - n_train_batches = train_set_x.value.shape[0] / batch_size - n_valid_batches = valid_set_x.value.shape[0] / batch_size - n_test_batches = test_set_x.value.shape[0] / batch_size - # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1d vector of - # [int] labels - layer0_input = x.reshape((batch_size,1,28,28)) + # [int] labels + layer0_input = x.reshape((x.shape[0],1,32,32)) # Setup the convolutional layers with their DAs(add as many as you want) @@ -310,45 +231,34 @@ ker1=2 ker2=2 conv_layers=[] - conv_layers.append([[ker1,1,5,5], [batch_size,1,28,28], [2,2] ]) - conv_layers.append([[ker2,ker1,5,5], [batch_size,ker1,12,12], [2,2] ]) + conv_layers.append([[ker1,1,5,5], None, [2,2] ]) + conv_layers.append([[ker2,ker1,5,5], None, [2,2] ]) # Setup the MLP layers of the network mlp_layers=[500] - network = SdA(input = layer0_input, n_ins_conv = 28*28, n_ins_mlp = ker2*4*4, \ - train_set_x = train_set_x, train_set_y = train_set_y, batch_size = batch_size, - conv_hidden_layers_sizes = conv_layers, \ - mlp_hidden_layers_sizes = mlp_layers, \ - corruption_levels = corruption_levels , n_out = 10, \ - rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate ) + network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4, + conv_hidden_layers_sizes = conv_layers, + mlp_hidden_layers_sizes = mlp_layers, + corruption_levels = corruption_levels , n_out = 10, + rng = rng , pretrain_lr = pretrain_lr , + finetune_lr = learning_rate ) - test_model = theano.function([index], network.errors, - givens = { - network.x: test_set_x[index*batch_size:(index+1)*batch_size], - network.y: test_set_y[index*batch_size:(index+1)*batch_size]}) + test_model = theano.function([network.x, network.y], network.errors) - validate_model = theano.function([index], network.errors, - givens = { - network.x: valid_set_x[index*batch_size:(index+1)*batch_size], - network.y: valid_set_y[index*batch_size:(index+1)*batch_size]}) - - - start_time = time.clock() for i in xrange(len(network.layers)-len(mlp_layers)): for epoch in xrange(pretraining_epochs): - for batch_index in xrange(n_train_batches): - c = network.pretrain_functions[i](batch_index) - print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch),c + for x, y in dataset.train(batch_size): + c = network.pretrain_functions[i](x) + print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c patience = 10000 # look as this many examples regardless patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS # FOUND improvement_threshold = 0.995 # a relative improvement of this much is - validation_frequency = min(n_train_batches, patience/2) - + validation_frequency = patience/2 best_params = None best_validation_loss = float('inf') @@ -357,23 +267,21 @@ done_looping = False epoch = 0 - + iter = 0 + while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(n_train_batches): + for x, y in dataset.train(batch_size): - cost_ij = network.finetune(minibatch_index) - iter = epoch * n_train_batches + minibatch_index - - if (iter+1) % validation_frequency == 0: + cost_ij = network.finetune(x, y) + iter += 1 + + if iter % validation_frequency == 0: + validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)] + this_validation_loss = numpy.mean(validation_losses) + print('epoch %i, iter %i, validation error %f %%' % \ + (epoch, iter, this_validation_loss*100.)) - validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] - this_validation_loss = numpy.mean(validation_losses) - print('epoch %i, minibatch %i/%i, validation error %f %%' % \ - (epoch, minibatch_index+1, n_train_batches, \ - this_validation_loss*100.)) - - # if we got the best validation score until now if this_validation_loss < best_validation_loss: @@ -381,35 +289,28 @@ if this_validation_loss < best_validation_loss * \ improvement_threshold : patience = max(patience, iter * patience_increase) - + # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter - + # test it on the test set - test_losses = [test_model(i) for i in xrange(n_test_batches)] + test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] test_score = numpy.mean(test_losses) - print((' epoch %i, minibatch %i/%i, test error of best ' + print((' epoch %i, iter %i, test error of best ' 'model %f %%') % - (epoch, minibatch_index+1, n_train_batches, - test_score*100.)) - - + (epoch, iter, test_score*100.)) + if patience <= iter : - done_looping = True - break - + done_looping = True + break + end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) - - - - - if __name__ == '__main__': sgd_optimization_mnist() diff -r e12702b88a2d -r 43af74a348ac deep/stacked_dae/mnist_sda.py --- a/deep/stacked_dae/mnist_sda.py Thu Mar 04 09:43:23 2010 -0500 +++ b/deep/stacked_dae/mnist_sda.py Thu Mar 04 20:43:21 2010 -0500 @@ -1,6 +1,7 @@ #!/usr/bin/python # coding: utf-8 +# TODO: This probably doesn't work anymore, adapt to new code in sgd_opt # Parameterize call to sgd_optimization for MNIST import numpy diff -r e12702b88a2d -r 43af74a348ac deep/stacked_dae/nist_sda.py --- a/deep/stacked_dae/nist_sda.py Thu Mar 04 09:43:23 2010 -0500 +++ b/deep/stacked_dae/nist_sda.py Thu Mar 04 20:43:21 2010 -0500 @@ -21,33 +21,35 @@ import jobman, jobman.sql from pylearn.io import filetensor -from utils import produit_croise_jobs +from utils import produit_cartesien_jobs from sgd_optimization import SdaSgdOptimizer -SERIES_AVAILABLE = False -try: - from scalar_series import * - SERIES_AVAILABLE = True -except ImportError: - print "Could not import Series" +from ift6266.utils.scalar_series import * + +############################################################################## +# GLOBALS TEST_CONFIG = False NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' - -JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda2' +JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4' +EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" REDUCE_TRAIN_TO = None MAX_FINETUNING_EPOCHS = 1000 -REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc. +# number of minibatches before taking means for valid error etc. +REDUCE_EVERY = 1000 + if TEST_CONFIG: REDUCE_TRAIN_TO = 1000 MAX_FINETUNING_EPOCHS = 2 REDUCE_EVERY = 10 -EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" - +# Possible values the hyperparameters can take. These are then +# combined with produit_cartesien_jobs so we get a list of all +# possible combinations, each one resulting in a job inserted +# in the jobman DB. JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], 'pretraining_epochs_per_layer': [10,20], 'hidden_layers_sizes': [300,800], @@ -58,30 +60,36 @@ 'num_hidden_layers':[2,3]} # Just useful for tests... minimal number of epochs -DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, - 'pretraining_lr':0.01, - 'pretraining_epochs_per_layer':1, - 'max_finetuning_epochs':1, - 'hidden_layers_sizes':1000, +DEFAULT_HP_NIST = DD({'finetuning_lr':0.1, + 'pretraining_lr':0.1, + 'pretraining_epochs_per_layer':20, + 'max_finetuning_epochs':2, + 'hidden_layers_sizes':300, 'corruption_levels':0.2, 'minibatch_size':20, - 'reduce_train_to':1000, - 'num_hidden_layers':1}) + #'reduce_train_to':300, + 'num_hidden_layers':2}) +''' +Function called by jobman upon launching each job +Its path is the one given when inserting jobs: +ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint +''' def jobman_entrypoint(state, channel): + # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) channel.save() workingdir = os.getcwd() print "Will load NIST" - sys.stdout.flush() - nist = NIST(20) + nist = NIST(minibatch_size=20) print "NIST loaded" - sys.stdout.flush() + # For test runs, we don't want to use the whole dataset so + # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] @@ -89,7 +97,7 @@ rtt = REDUCE_TRAIN_TO if rtt: - print "Reducing training set to ", rtt, " examples" + print "Reducing training set to "+str(rtt)+ " examples" nist.reduce_train_set(rtt) train,valid,test = nist.get_tvt() @@ -98,17 +106,13 @@ n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) - hls = state.hidden_layers_sizes - cl = state.corruption_levels - nhl = state.num_hidden_layers - state.hidden_layers_sizes = [hls] * nhl - state.corruption_levels = [cl] * nhl + # b,b',W for each hidden layer + # + b,W of last layer (logreg) + numparams = state.num_hidden_layers * 3 + 2 + series_mux = None + series_mux = create_series(workingdir, numparams) - # b,b',W for each hidden layer + b,W of last layer (logreg) - numparams = nhl * 3 + 2 - series_mux = None - if SERIES_AVAILABLE: - series_mux = create_series(workingdir, numparams) + print "Creating optimizer with state, ", state optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ @@ -120,11 +124,10 @@ optimizer.finetune() channel.save() - pylearn.version.record_versions(state,[theano,ift6266,pylearn]) - channel.save() - return channel.COMPLETE +# These Series objects are used to save various statistics +# during the training. def create_series(basedir, numparams): mux = SeriesMultiplexer() @@ -146,8 +149,11 @@ return mux +# Perform insertion into the Postgre DB based on combination +# of hyperparameter values above +# (see comment for produit_cartesien_jobs() to know how it works) def jobman_insert_nist(): - jobs = produit_croise_jobs(JOB_VALS) + jobs = produit_cartesien_jobs(JOB_VALS) db = jobman.sql.db(JOBDB) for job in jobs: @@ -233,35 +239,6 @@ raw_input("Press any key") -# hp for hyperparameters -def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'): - global DEFAULT_HP_NIST - hp = hp and hp or DEFAULT_HP_NIST - - print "Will load NIST" - - import time - t1 = time.time() - nist = NIST(20, reduce_train_to=100) - t2 = time.time() - - print "NIST loaded. time delta = ", t2-t1 - - train,valid,test = nist.get_tvt() - dataset = (train,valid,test) - - print train[0][15] - print type(train[0][1]) - - - print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) - - n_ins = 32*32 - n_outs = 62 # 10 digits, 26*2 (lower, capitals) - - optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0) - optimizer.train() - if __name__ == '__main__': import sys @@ -275,11 +252,9 @@ jobman_insert_nist() elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': - chanmock = DD({'COMPLETE':0}) + chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) jobman_entrypoint(DEFAULT_HP_NIST, chanmock) - elif len(args) > 0 and args[0] == 'estimate': - estimate_total_time() else: - sgd_optimization_nist() + print "Bad arguments" diff -r e12702b88a2d -r 43af74a348ac deep/stacked_dae/sgd_optimization.py --- a/deep/stacked_dae/sgd_optimization.py Thu Mar 04 09:43:23 2010 -0500 +++ b/deep/stacked_dae/sgd_optimization.py Thu Mar 04 20:43:21 2010 -0500 @@ -6,6 +6,7 @@ import numpy import theano import time +import datetime import theano.tensor as T import sys @@ -59,20 +60,27 @@ # compute number of minibatches for training, validation and testing self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size - self.n_test_batches = self.test_set_x.value.shape[0] / self.hp.minibatch_size + # remove last batch in case it's incomplete + self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 def init_classifier(self): print "Constructing classifier" + # we don't want to save arrays in DD objects, so + # we recreate those arrays here + nhl = self.hp.num_hidden_layers + layers_sizes = [self.hp.hidden_layers_sizes] * nhl + corruption_levels = [self.hp.corruption_levels] * nhl + # construct the stacked denoising autoencoder class self.classifier = SdA( \ train_set_x= self.train_set_x, \ train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ - hidden_layers_sizes = self.hp.hidden_layers_sizes, \ + hidden_layers_sizes = layers_sizes, \ n_outs = self.n_outs, \ - corruption_levels = self.hp.corruption_levels,\ + corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ finetune_lr = self.hp.finetuning_lr,\ @@ -85,7 +93,7 @@ self.finetune() def pretrain(self): - print "STARTING PRETRAINING" + print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() start_time = time.clock() @@ -101,6 +109,8 @@ print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() + + self.series_mux.append("params", self.classifier.all_params) end_time = time.clock() @@ -110,7 +120,7 @@ sys.stdout.flush() def finetune(self): - print "STARTING FINETUNING" + print "STARTING FINETUNING, time = ", datetime.datetime.now() index = T.lscalar() # index to a [mini]batch minibatch_size = self.hp.minibatch_size diff -r e12702b88a2d -r 43af74a348ac deep/stacked_dae/stacked_dae.py --- a/deep/stacked_dae/stacked_dae.py Thu Mar 04 09:43:23 2010 -0500 +++ b/deep/stacked_dae/stacked_dae.py Thu Mar 04 20:43:21 2010 -0500 @@ -10,6 +10,15 @@ from utils import update_locals +# taken from LeDeepNet/daa.py +# has a special case when taking log(0) (defined =0) +# modified to not take the mean anymore +from theano.tensor.xlogx import xlogx, xlogy0 +# it's target*log(output) +def binary_cross_entropy(target, output, sum_axis=1): + XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output)) + return -T.sum(XE, axis=sum_axis) + class LogisticRegression(object): def __init__(self, input, n_in, n_out): # initialize with 0 the weights W as a matrix of shape (n_in, n_out) @@ -128,7 +137,16 @@ # Equation (4) # note : we sum over the size of a datapoint; if we are using minibatches, # L will be a vector, with one entry per example in minibatch - self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) + #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) + #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) + + # I added this epsilon to avoid getting log(0) and 1/0 in grad + # This means conceptually that there'd be no probability of 0, but that + # doesn't seem to me as important (maybe I'm wrong?). + eps = 0.00000001 + eps_1 = 1-eps + self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ + + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) # note : L is now a vector, where each element is the cross-entropy cost # of the reconstruction of the corresponding example of the # minibatch. We need to compute the average of all these to get @@ -138,8 +156,6 @@ self.params = [ self.W, self.b, self.b_prime ] - - class SdA(object): def __init__(self, train_set_x, train_set_y, batch_size, n_ins, hidden_layers_sizes, n_outs, @@ -147,6 +163,7 @@ # Just to make sure those are not modified somewhere else afterwards hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) corruption_levels = copy.deepcopy(corruption_levels) + update_locals(self, locals()) self.layers = [] @@ -157,6 +174,17 @@ self.all_params = [] self.n_layers = len(hidden_layers_sizes) + print "Creating SdA with params:" + print "batch_size", batch_size + print "hidden_layers_sizes", hidden_layers_sizes + print "corruption_levels", corruption_levels + print "n_ins", n_ins + print "n_outs", n_outs + print "pretrain_lr", pretrain_lr + print "finetune_lr", finetune_lr + print "input_divider", input_divider + print "----" + self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) if len(hidden_layers_sizes) < 1 : diff -r e12702b88a2d -r 43af74a348ac deep/stacked_dae/utils.py --- a/deep/stacked_dae/utils.py Thu Mar 04 09:43:23 2010 -0500 +++ b/deep/stacked_dae/utils.py Thu Mar 04 20:43:21 2010 -0500 @@ -1,14 +1,26 @@ #!/usr/bin/python +# coding: utf-8 + +from __future__ import with_statement from jobman import DD # from pylearn codebase +# useful in __init__(param1, param2, etc.) to save +# values in self.param1, self.param2... just call +# update_locals(self, locals()) def update_locals(obj, dct): if 'self' in dct: del dct['self'] obj.__dict__.update(dct) -def produit_croise_jobs(val_dict): +# from a dictionary of possible values for hyperparameters, e.g. +# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]} +# create a list of other dictionaries representing all the possible +# combinations, thus in this example creating: +# [{'learning_rate': 0.1, 'num_layers': 1}, ...] +# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2)) +def produit_cartesien_jobs(val_dict): job_list = [DD()] all_keys = val_dict.keys() @@ -24,9 +36,9 @@ return job_list -def test_produit_croise_jobs(): +def test_produit_cartesien_jobs(): vals = {'a': [1,2], 'b': [3,4,5]} - print produit_croise_jobs(vals) + print produit_cartesien_jobs(vals) # taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python diff -r e12702b88a2d -r 43af74a348ac scripts/launch_generate100.py --- a/scripts/launch_generate100.py Thu Mar 04 09:43:23 2010 -0500 +++ b/scripts/launch_generate100.py Thu Mar 04 20:43:21 2010 -0500 @@ -3,10 +3,12 @@ import os dir1 = "/data/lisa/data/ift6266h10/" +mach = "brams0c.iro.umontreal.ca,brams02.iro.umontreal.ca,brams03.iro.umontreal.ca,maggie22.iro.umontreal.ca" + for i,s in enumerate(['valid','test']): for j,c in enumerate([0.3,0.5,0.7,1]): l = str(c).replace('.','') - os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j)) + os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (mach, dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j)) for i in range(100): - os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i)) + os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i)) diff -r e12702b88a2d -r 43af74a348ac test.py --- a/test.py Thu Mar 04 09:43:23 2010 -0500 +++ b/test.py Thu Mar 04 20:43:21 2010 -0500 @@ -1,8 +1,7 @@ import doctest, sys, pkgutil -def runTests(options = doctest.ELLIPSIS or doctest.DONT_ACCEPT_TRUE_FOR_1): +def runTests(): import ift6266 - predefs = ift6266.__dict__ for (_, name, ispkg) in pkgutil.walk_packages(ift6266.__path__, ift6266.__name__+'.'): if not ispkg: if name.startswith('ift6266.scripts.') or \ @@ -11,9 +10,21 @@ 'ift6266.data_generation.transformations.testmod', 'ift6266.data_generation.transformations.gimp_script']: continue - print "Testing:", name - __import__(name) - doctest.testmod(sys.modules[name], extraglobs=predefs, optionflags=options) + test(name) + +def test(name): + import ift6266 + predefs = ift6266.__dict__ + options = doctest.ELLIPSIS or doctest.DONT_ACCEPT_TRUE_FOR_1 + print "Testing:", name + __import__(name) + doctest.testmod(sys.modules[name], extraglobs=predefs, optionflags=options) if __name__ == '__main__': - runTests() + if len(sys.argv) > 1: + for mod in sys.argv[1:]: + if mod.endswith('.py'): + mod = mod[:-3] + test(mod) + else: + runTests()