# HG changeset patch # User fsavard # Date 1267708903 18000 # Node ID 10a801240bfcefbd1375101016ffd113b3929476 # Parent e1f5f66dd7dda8f0c8ff2feaccc7020638f2b516# Parent fd1b5237e49e59538eeaa7e4953f2eae6ccc6420 Merge diff -r e1f5f66dd7dd -r 10a801240bfc baseline/__init__.py diff -r e1f5f66dd7dd -r 10a801240bfc baseline/conv_mlp/__init__.py diff -r e1f5f66dd7dd -r 10a801240bfc baseline/conv_mlp/convolutional_mlp.py --- a/baseline/conv_mlp/convolutional_mlp.py Thu Mar 04 08:18:42 2010 -0500 +++ b/baseline/conv_mlp/convolutional_mlp.py Thu Mar 04 08:21:43 2010 -0500 @@ -26,7 +26,8 @@ import theano.sandbox.softsign import pylearn.datasets.MNIST from pylearn.io import filetensor as ft -from theano.sandbox import conv, downsample +from theano.tensor.signal import downsample +from theano.tensor.nnet import conv class LeNetConvPoolLayer(object): diff -r e1f5f66dd7dd -r 10a801240bfc baseline/deep_mlp/__init__.py diff -r e1f5f66dd7dd -r 10a801240bfc baseline/log_reg/__init__.py diff -r e1f5f66dd7dd -r 10a801240bfc baseline/log_reg/log_reg.py --- a/baseline/log_reg/log_reg.py Thu Mar 04 08:18:42 2010 -0500 +++ b/baseline/log_reg/log_reg.py Thu Mar 04 08:21:43 2010 -0500 @@ -35,11 +35,11 @@ """ __docformat__ = 'restructedtext en' -import numpy, time, cPickle, gzip +import numpy, time import theano import theano.tensor as T - +from ift6266 import datasets class LogisticRegression(object): """Multi-class Logistic Regression Class @@ -112,6 +112,8 @@ # i.e., the mean log-likelihood across the minibatch. return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] ) + def MSE(self, y): + return -T.mean(abs((self.p_t_given_x)[T.arange(y.shape[0]), y]-y)**2) def errors( self, y ): """Return a float representing the number of errors in the minibatch @@ -135,107 +137,12 @@ else: raise NotImplementedError() -def shared_dataset( data_xy ): - """ Function that loads the dataset into shared variables - - The reason we store our dataset in shared variables is to allow - Theano to copy it into the GPU memory (when code is run on GPU). - Since copying data into the GPU is slow, copying a minibatch everytime - is needed (the default behaviour if the data is not in a shared - variable) would lead to a large decrease in performance. - """ - data_x, data_y = data_xy - shared_x = theano.shared( numpy.asarray( data_x, dtype = theano.config.floatX ) ) - shared_y = theano.shared( numpy.asarray( data_y, dtype = theano.config.floatX ) ) - # When storing data on the GPU it has to be stored as floats - # therefore we will store the labels as ``floatX`` as well - # (``shared_y`` does exactly that). But during our computations - # we need them as ints (we use labels as index, and if they are - # floats it doesn't make sense) therefore instead of returning - # ``shared_y`` we will have to cast it to int. This little hack - # lets ous get around this issue - return shared_x, T.cast( shared_y, 'int32' ) - -def load_data_pkl_gz( dataset ): - ''' Loads the dataset - - :type dataset: string - :param dataset: the path to the dataset (here MNIST) - ''' - - #-------------------------------------------------------------------------------------------------------------------- - # Load Data - #-------------------------------------------------------------------------------------------------------------------- - - - print '... loading data' - - # Load the dataset - f = gzip.open(dataset,'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() - - test_set_x, test_set_y = shared_dataset( test_set ) - valid_set_x, valid_set_y = shared_dataset( valid_set ) - train_set_x, train_set_y = shared_dataset( train_set ) - - rval = [ ( train_set_x, train_set_y ), ( valid_set_x,valid_set_y ), ( test_set_x, test_set_y ) ] - return rval - -##def load_data_ft( verbose = False,\ -## data_path = '/data/lisa/data/nist/by_class/'\ -## train_data = 'all/all_train_data.ft',\ -## train_labels = 'all/all_train_labels.ft',\ -## test_data = 'all/all_test_data.ft',\ -## test_labels = 'all/all_test_labels.ft'): -## -## train_data_file = open(data_path + train_data) -## train_labels_file = open(data_path + train_labels) -## test_labels_file = open(data_path + test_data) -## test_data_file = open(data_path + test_labels) -## -## raw_train_data = ft.read( train_data_file) -## raw_train_labels = ft.read(train_labels_file) -## raw_test_data = ft.read( test_labels_file) -## raw_test_labels = ft.read( test_data_file) -## -## f.close() -## g.close() -## i.close() -## h.close() -## -## -## test_set_x, test_set_y = shared_dataset(test_set) -## valid_set_x, valid_set_y = shared_dataset(valid_set) -## train_set_x, train_set_y = shared_dataset(train_set) -## -## rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] -## return rval -## #create a validation set the same size as the test size -## #use the end of the training array for this purpose -## #discard the last remaining so we get a %batch_size number -## test_size=len(raw_test_labels) -## test_size = int(test_size/batch_size) -## test_size*=batch_size -## train_size = len(raw_train_data) -## train_size = int(train_size/batch_size) -## train_size*=batch_size -## validation_size =test_size -## offset = train_size-test_size -## if verbose == True: -## print 'train size = %d' %train_size -## print 'test size = %d' %test_size -## print 'valid size = %d' %validation_size -## print 'offset = %d' %offset -## -## - #-------------------------------------------------------------------------------------------------------------------- # MAIN #-------------------------------------------------------------------------------------------------------------------- def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \ - dataset_name = 'mnist.pkl.gz', image_size = 28 * 28, nb_class = 10, \ + dataset=datasets.nist_digits, image_size = 32 * 32, nb_class = 10, \ patience = 5000, patience_increase = 2, improvement_threshold = 0.995): """ @@ -254,9 +161,8 @@ :type batch_size: int :param batch_size: size of the minibatch - :type dataset_name: string - :param dataset: the path of the MNIST dataset file from - http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz + :type dataset: dataset + :param dataset: a dataset instance from ift6266.datasets :type image_size: int :param image_size: size of the input image in pixels (width * height) @@ -275,17 +181,6 @@ """ - datasets = load_data_pkl_gz( dataset_name ) - - train_set_x, train_set_y = datasets[0] - valid_set_x, valid_set_y = datasets[1] - test_set_x , test_set_y = datasets[2] - - # compute number of minibatches for training, validation and testing - n_train_batches = train_set_x.value.shape[0] / batch_size - n_valid_batches = valid_set_x.value.shape[0] / batch_size - n_test_batches = test_set_x.value.shape[0] / batch_size - #-------------------------------------------------------------------------------------------------------------------- # Build actual model #-------------------------------------------------------------------------------------------------------------------- @@ -308,17 +203,11 @@ # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch - test_model = theano.function( inputs = [ index ], - outputs = classifier.errors( y ), - givens = { - x:test_set_x[ index * batch_size: ( index + 1 ) * batch_size ], - y:test_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) + test_model = theano.function( inputs = [ x, y ], + outputs = classifier.errors( y )) - validate_model = theano.function( inputs = [ index ], - outputs = classifier.errors( y ), - givens = { - x:valid_set_x[ index * batch_size: ( index + 1 ) * batch_size ], - y:valid_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) + validate_model = theano.function( inputs = [ x, y ], + outputs = classifier.errors( y )) # compute the gradient of cost with respect to theta = ( W, b ) g_W = T.grad( cost = cost, wrt = classifier.W ) @@ -331,12 +220,9 @@ # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` - train_model = theano.function( inputs = [ index ], + train_model = theano.function( inputs = [ x, y ], outputs = cost, - updates = updates, - givens = { - x: train_set_x[ index * batch_size: ( index + 1 ) * batch_size ], - y: train_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) + updates = updates) #-------------------------------------------------------------------------------------------------------------------- # Train model @@ -349,38 +235,38 @@ # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min( n_train_batches, patience * 0.5 ) + validation_frequency = patience * 0.5 # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch - best_params = None + best_params = None best_validation_loss = float('inf') - test_score = 0. - start_time = time.clock() + test_score = 0. + start_time = time.clock() done_looping = False - n_epochs = nb_max_examples / train_set_x.value.shape[0] - epoch = 0 + n_iters = nb_max_examples / batch_size + epoch = 0 + iter = 0 - while ( epoch < n_epochs ) and ( not done_looping ): + while ( iter < n_iters ) and ( not done_looping ): epoch = epoch + 1 - for minibatch_index in xrange( n_train_batches ): + for x, y in dataset.train(batch_size): - minibatch_avg_cost = train_model( minibatch_index ) + minibatch_avg_cost = train_model( x, y ) # iteration number - iter = epoch * n_train_batches + minibatch_index + iter += 1 - if ( iter + 1 ) % validation_frequency == 0: + if iter % validation_frequency == 0: # compute zero-one loss on validation set - validation_losses = [ validate_model( i ) for i in xrange( n_valid_batches ) ] + validation_losses = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ] this_validation_loss = numpy.mean( validation_losses ) - print('epoch %i, minibatch %i/%i, validation error %f %%' % \ - ( epoch, minibatch_index + 1,n_train_batches, \ - this_validation_loss*100. ) ) + print('epoch %i, iter %i, validation error %f %%' % \ + ( epoch, iter, this_validation_loss*100. ) ) # if we got the best validation score until now @@ -393,12 +279,12 @@ best_validation_loss = this_validation_loss # test it on the test set - test_losses = [test_model(i) for i in xrange(n_test_batches)] + test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] test_score = numpy.mean(test_losses) - print((' epoch %i, minibatch %i/%i, test error of best ' + print((' epoch %i, iter %i, test error of best ' 'model %f %%') % \ - (epoch, minibatch_index+1, n_train_batches,test_score*100.)) + (epoch, iter, test_score*100.)) if patience <= iter : done_looping = True diff -r e1f5f66dd7dd -r 10a801240bfc baseline/mlp/__init__.py diff -r e1f5f66dd7dd -r 10a801240bfc deep/autoencoder/__init__.py diff -r e1f5f66dd7dd -r 10a801240bfc deep/convolutional_dae/__init__.py diff -r e1f5f66dd7dd -r 10a801240bfc deep/convolutional_dae/stacked_convolutional_dae.py --- a/deep/convolutional_dae/stacked_convolutional_dae.py Thu Mar 04 08:18:42 2010 -0500 +++ b/deep/convolutional_dae/stacked_convolutional_dae.py Thu Mar 04 08:21:43 2010 -0500 @@ -7,44 +7,10 @@ from theano.tensor.signal import downsample from theano.tensor.nnet import conv -import gzip -import cPickle - - -class LogisticRegression(object): - - def __init__(self, input, n_in, n_out): - - self.W = theano.shared( value=numpy.zeros((n_in,n_out), - dtype = theano.config.floatX) ) - - self.b = theano.shared( value=numpy.zeros((n_out,), - dtype = theano.config.floatX) ) - - self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) - - self.y_pred=T.argmax(self.p_y_given_x, axis=1) - - self.params = [self.W, self.b] - - def negative_log_likelihood(self, y): - return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) - - def MSE(self, y): - return -T.mean(abs((self.p_y_given_x)[T.arange(y.shape[0]),y]-y)**2) +from ift6266 import datasets - def errors(self, y): - if y.ndim != self.y_pred.ndim: - raise TypeError('y should have the same shape as self.y_pred', - ('y', target.type, 'y_pred', self.y_pred.type)) - - - if y.dtype.startswith('int'): - return T.mean(T.neq(self.y_pred, y)) - else: - raise NotImplementedError() - +from ift6266.baseline.log_reg.log_reg import LogisticRegression class SigmoidalLayer(object): def __init__(self, rng, input, n_in, n_out): @@ -65,8 +31,9 @@ class dA_conv(object): - def __init__(self, corruption_level = 0.1, input = None, shared_W = None,\ - shared_b = None, filter_shape = None, image_shape = None, poolsize = (2,2)): + def __init__(self, input, filter_shape, corruption_level = 0.1, + shared_W = None, shared_b = None, image_shape = None, + poolsize = (2,2)): theano_rng = RandomStreams() @@ -80,13 +47,11 @@ self.W = shared_W self.b = shared_b else: - initial_W = numpy.asarray( numpy.random.uniform( \ - low = -numpy.sqrt(6./(fan_in+fan_out)), \ - high = numpy.sqrt(6./(fan_in+fan_out)), \ + initial_W = numpy.asarray( numpy.random.uniform( + low = -numpy.sqrt(6./(fan_in+fan_out)), + high = numpy.sqrt(6./(fan_in+fan_out)), size = filter_shape), dtype = theano.config.floatX) - initial_b = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX) - - + initial_b = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.W = theano.shared(value = initial_W, name = "W") self.b = theano.shared(value = initial_b, name = "b") @@ -101,9 +66,8 @@ self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x - conv1_out = conv.conv2d(self.tilde_x, self.W, \ - filter_shape=filter_shape, \ - image_shape=image_shape, border_mode='valid') + conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, + image_shape=image_shape, border_mode='valid') self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) @@ -111,19 +75,15 @@ da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ filter_shape[3] ] - da_image_shape = [ image_shape[0],filter_shape[0],image_shape[2]-filter_shape[2]+1, \ - image_shape[3]-filter_shape[3]+1 ] initial_W_prime = numpy.asarray( numpy.random.uniform( \ low = -numpy.sqrt(6./(fan_in+fan_out)), \ high = numpy.sqrt(6./(fan_in+fan_out)), \ size = da_filter_shape), dtype = theano.config.floatX) self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") - #import pdb;pdb.set_trace() - - conv2_out = conv.conv2d(self.y, self.W_prime, \ - filter_shape = da_filter_shape, image_shape = da_image_shape ,\ - border_mode='full') + conv2_out = conv.conv2d(self.y, self.W_prime, + filter_shape = da_filter_shape, + border_mode='full') self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale @@ -134,19 +94,16 @@ self.cost = T.mean(self.L) self.params = [ self.W, self.b, self.b_prime ] - - class LeNetConvPoolLayer(object): - def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)): - assert image_shape[1]==filter_shape[1] + def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): self.input = input W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX) - self.W = theano.shared(value = W_values) + self.W = theano.shared(value=W_values) - b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX) - self.b = theano.shared(value= b_values) + b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) + self.b = theano.shared(value=b_values) conv_out = conv.conv2d(input, self.W, filter_shape=filter_shape, image_shape=image_shape) @@ -168,67 +125,60 @@ class SdA(): - def __init__(self, input, n_ins_conv, n_ins_mlp, train_set_x, train_set_y, batch_size, \ - conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, \ - rng, n_out, pretrain_lr, finetune_lr): - + def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, + mlp_hidden_layers_sizes, corruption_levels, rng, n_out, + pretrain_lr, finetune_lr): + self.layers = [] self.pretrain_functions = [] self.params = [] self.conv_n_layers = len(conv_hidden_layers_sizes) self.mlp_n_layers = len(mlp_hidden_layers_sizes) - - index = T.lscalar() # index to a [mini]batch + self.x = T.dmatrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of - - for i in xrange( self.conv_n_layers ): - filter_shape=conv_hidden_layers_sizes[i][0] image_shape=conv_hidden_layers_sizes[i][1] max_poolsize=conv_hidden_layers_sizes[i][2] if i == 0 : - layer_input=self.x.reshape((batch_size,1,28,28)) + layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) else: layer_input=self.layers[-1].output - - layer = LeNetConvPoolLayer(rng, input=layer_input, \ - image_shape=image_shape, \ - filter_shape=filter_shape,poolsize=max_poolsize) - print 'Convolutional layer '+str(i+1)+' created' - + + layer = LeNetConvPoolLayer(rng, input=layer_input, + image_shape=image_shape, + filter_shape=filter_shape, + poolsize=max_poolsize) + print 'Convolutional layer', str(i+1), 'created' + self.layers += [layer] self.params += layer.params - - da_layer = dA_conv(corruption_level = corruption_levels[0],\ - input = layer_input, \ - shared_W = layer.W, shared_b = layer.b,\ - filter_shape = filter_shape , image_shape = image_shape ) - - + + da_layer = dA_conv(corruption_level = corruption_levels[0], + input = layer_input, + shared_W = layer.W, shared_b = layer.b, + filter_shape = filter_shape, + image_shape = image_shape ) + gparams = T.grad(da_layer.cost, da_layer.params) - + updates = {} for param, gparam in zip(da_layer.params, gparams): - updates[param] = param - gparam * pretrain_lr - - - update_fn = theano.function([index], da_layer.cost, \ - updates = updates, - givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size]} ) - + updates[param] = param - gparam * pretrain_lr + + update_fn = theano.function([self.x], da_layer.cost, updates = updates) + self.pretrain_functions += [update_fn] - + for i in xrange( self.mlp_n_layers ): if i == 0 : input_size = n_ins_mlp else: input_size = mlp_hidden_layers_sizes[i-1] - + if i == 0 : if len( self.layers ) == 0 : layer_input=self.x @@ -236,72 +186,43 @@ layer_input = self.layers[-1].output.flatten(2) else: layer_input = self.layers[-1].output - + layer = SigmoidalLayer(rng, layer_input, input_size, mlp_hidden_layers_sizes[i] ) - + self.layers += [layer] self.params += layer.params - - print 'MLP layer '+str(i+1)+' created' + print 'MLP layer', str(i+1), 'created' self.logLayer = LogisticRegression(input=self.layers[-1].output, \ n_in=mlp_hidden_layers_sizes[-1], n_out=n_out) self.params += self.logLayer.params - + cost = self.logLayer.negative_log_likelihood(self.y) + + gparams = T.grad(cost, self.params) - gparams = T.grad(cost, self.params) updates = {} - for param,gparam in zip(self.params, gparams): updates[param] = param - gparam*finetune_lr - - self.finetune = theano.function([index], cost, - updates = updates, - givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size], - self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) - + + self.finetune = theano.function([self.x, self.y], cost, updates = updates) + + self.errors = self.logLayer.errors(self.y) - self.errors = self.logLayer.errors(self.y) - - - def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \ pretrain_lr = 0.01, training_epochs = 1000, \ - dataset='mnist.pkl.gz'): - - f = gzip.open(dataset,'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() - - - def shared_dataset(data_xy): - data_x, data_y = data_xy - shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) - shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) - return shared_x, T.cast(shared_y, 'int32') - - - test_set_x, test_set_y = shared_dataset(test_set) - valid_set_x, valid_set_y = shared_dataset(valid_set) - train_set_x, train_set_y = shared_dataset(train_set) - + dataset=datasets.nist_digits): + batch_size = 500 # size of the minibatch - - n_train_batches = train_set_x.value.shape[0] / batch_size - n_valid_batches = valid_set_x.value.shape[0] / batch_size - n_test_batches = test_set_x.value.shape[0] / batch_size - # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1d vector of - # [int] labels - layer0_input = x.reshape((batch_size,1,28,28)) + # [int] labels + layer0_input = x.reshape((x.shape[0],1,32,32)) # Setup the convolutional layers with their DAs(add as many as you want) @@ -310,45 +231,34 @@ ker1=2 ker2=2 conv_layers=[] - conv_layers.append([[ker1,1,5,5], [batch_size,1,28,28], [2,2] ]) - conv_layers.append([[ker2,ker1,5,5], [batch_size,ker1,12,12], [2,2] ]) + conv_layers.append([[ker1,1,5,5], None, [2,2] ]) + conv_layers.append([[ker2,ker1,5,5], None, [2,2] ]) # Setup the MLP layers of the network mlp_layers=[500] - network = SdA(input = layer0_input, n_ins_conv = 28*28, n_ins_mlp = ker2*4*4, \ - train_set_x = train_set_x, train_set_y = train_set_y, batch_size = batch_size, - conv_hidden_layers_sizes = conv_layers, \ - mlp_hidden_layers_sizes = mlp_layers, \ - corruption_levels = corruption_levels , n_out = 10, \ - rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate ) + network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4, + conv_hidden_layers_sizes = conv_layers, + mlp_hidden_layers_sizes = mlp_layers, + corruption_levels = corruption_levels , n_out = 10, + rng = rng , pretrain_lr = pretrain_lr , + finetune_lr = learning_rate ) - test_model = theano.function([index], network.errors, - givens = { - network.x: test_set_x[index*batch_size:(index+1)*batch_size], - network.y: test_set_y[index*batch_size:(index+1)*batch_size]}) + test_model = theano.function([network.x, network.y], network.errors) - validate_model = theano.function([index], network.errors, - givens = { - network.x: valid_set_x[index*batch_size:(index+1)*batch_size], - network.y: valid_set_y[index*batch_size:(index+1)*batch_size]}) - - - start_time = time.clock() for i in xrange(len(network.layers)-len(mlp_layers)): for epoch in xrange(pretraining_epochs): - for batch_index in xrange(n_train_batches): - c = network.pretrain_functions[i](batch_index) - print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch),c + for x, y in dataset.train(batch_size): + c = network.pretrain_functions[i](x) + print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c patience = 10000 # look as this many examples regardless patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS # FOUND improvement_threshold = 0.995 # a relative improvement of this much is - validation_frequency = min(n_train_batches, patience/2) - + validation_frequency = patience/2 best_params = None best_validation_loss = float('inf') @@ -357,23 +267,21 @@ done_looping = False epoch = 0 - + iter = 0 + while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(n_train_batches): + for x, y in dataset.train(batch_size): - cost_ij = network.finetune(minibatch_index) - iter = epoch * n_train_batches + minibatch_index - - if (iter+1) % validation_frequency == 0: + cost_ij = network.finetune(x, y) + iter += 1 + + if iter % validation_frequency == 0: + validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)] + this_validation_loss = numpy.mean(validation_losses) + print('epoch %i, iter %i, validation error %f %%' % \ + (epoch, iter, this_validation_loss*100.)) - validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] - this_validation_loss = numpy.mean(validation_losses) - print('epoch %i, minibatch %i/%i, validation error %f %%' % \ - (epoch, minibatch_index+1, n_train_batches, \ - this_validation_loss*100.)) - - # if we got the best validation score until now if this_validation_loss < best_validation_loss: @@ -381,35 +289,28 @@ if this_validation_loss < best_validation_loss * \ improvement_threshold : patience = max(patience, iter * patience_increase) - + # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter - + # test it on the test set - test_losses = [test_model(i) for i in xrange(n_test_batches)] + test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] test_score = numpy.mean(test_losses) - print((' epoch %i, minibatch %i/%i, test error of best ' + print((' epoch %i, iter %i, test error of best ' 'model %f %%') % - (epoch, minibatch_index+1, n_train_batches, - test_score*100.)) - - + (epoch, iter, test_score*100.)) + if patience <= iter : - done_looping = True - break - + done_looping = True + break + end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) - - - - - if __name__ == '__main__': sgd_optimization_mnist() diff -r e1f5f66dd7dd -r 10a801240bfc deep/stacked_dae/nist_sda.py --- a/deep/stacked_dae/nist_sda.py Thu Mar 04 08:18:42 2010 -0500 +++ b/deep/stacked_dae/nist_sda.py Thu Mar 04 08:21:43 2010 -0500 @@ -255,9 +255,6 @@ chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) jobman_entrypoint(DEFAULT_HP_NIST, chanmock) - elif len(args) > 0 and args[0] == 'estimate': - estimate_total_time() - else: print "Bad arguments" diff -r e1f5f66dd7dd -r 10a801240bfc scripts/launch_generate100.py --- a/scripts/launch_generate100.py Thu Mar 04 08:18:42 2010 -0500 +++ b/scripts/launch_generate100.py Thu Mar 04 08:21:43 2010 -0500 @@ -3,10 +3,12 @@ import os dir1 = "/data/lisa/data/ift6266h10/" +mach = "brams0c.iro.umontreal.ca,brams02.iro.umontreal.ca,brams03.iro.umontreal.ca,maggie22.iro.umontreal.ca" + for i,s in enumerate(['valid','test']): for j,c in enumerate([0.3,0.5,0.7,1]): l = str(c).replace('.','') - os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j)) + os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (mach, dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j)) for i in range(100): - os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i)) + os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i)) diff -r e1f5f66dd7dd -r 10a801240bfc test.py --- a/test.py Thu Mar 04 08:18:42 2010 -0500 +++ b/test.py Thu Mar 04 08:21:43 2010 -0500 @@ -1,8 +1,7 @@ import doctest, sys, pkgutil -def runTests(options = doctest.ELLIPSIS or doctest.DONT_ACCEPT_TRUE_FOR_1): +def runTests(): import ift6266 - predefs = ift6266.__dict__ for (_, name, ispkg) in pkgutil.walk_packages(ift6266.__path__, ift6266.__name__+'.'): if not ispkg: if name.startswith('ift6266.scripts.') or \ @@ -11,9 +10,21 @@ 'ift6266.data_generation.transformations.testmod', 'ift6266.data_generation.transformations.gimp_script']: continue - print "Testing:", name - __import__(name) - doctest.testmod(sys.modules[name], extraglobs=predefs, optionflags=options) + test(name) + +def test(name): + import ift6266 + predefs = ift6266.__dict__ + options = doctest.ELLIPSIS or doctest.DONT_ACCEPT_TRUE_FOR_1 + print "Testing:", name + __import__(name) + doctest.testmod(sys.modules[name], extraglobs=predefs, optionflags=options) if __name__ == '__main__': - runTests() + if len(sys.argv) > 1: + for mod in sys.argv[1:]: + if mod.endswith('.py'): + mod = mod[:-3] + test(mod) + else: + runTests()