Mercurial > ift6266
changeset 256:bd7e50d56d80
merge
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Wed, 17 Mar 2010 14:04:12 -0400 |
parents | 17c72763d574 (current diff) a491d3600a77 (diff) |
children | 0c0f0b3f6a93 6d16a2bf142b |
files | |
diffstat | 5 files changed, 287 insertions(+), 326 deletions(-) [+] |
line wrap: on
line diff
--- a/baseline/conv_mlp/convolutional_mlp.py Wed Mar 17 14:01:23 2010 -0400 +++ b/baseline/conv_mlp/convolutional_mlp.py Wed Mar 17 14:04:12 2010 -0400 @@ -26,8 +26,8 @@ import theano.sandbox.softsign import pylearn.datasets.MNIST from pylearn.io import filetensor as ft -from theano.tensor.signal import downsample -from theano.tensor.nnet import conv +from theano.sandbox import conv, downsample +import theano,pylearn.version,ift6266 class LeNetConvPoolLayer(object): @@ -214,7 +214,7 @@ # Dealing with the training set # get the list of training images (x) and their labels (y) - (train_set_x, train_set_y) = (d[:4000,:],labels[:4000]) + (train_set_x, train_set_y) = (d[:200000,:],labels[:200000]) # initialize the list of training minibatches with empty list train_batches = [] for i in xrange(0, len(train_set_x), batch_size): @@ -229,7 +229,7 @@ #print train_batches[500] # Dealing with the validation set - (valid_set_x, valid_set_y) = (d[4000:5000,:],labels[4000:5000]) + (valid_set_x, valid_set_y) = (d[200000:270000,:],labels[200000:270000]) # initialize the list of validation minibatches valid_batches = [] for i in xrange(0, len(valid_set_x), batch_size): @@ -237,17 +237,18 @@ [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])] # Dealing with the testing set - (test_set_x, test_set_y) = (d[5000:6000,:],labels[5000:6000]) + (test_set_x, test_set_y) = (d[270000:340000,:],labels[270000:340000]) # initialize the list of testing minibatches test_batches = [] for i in xrange(0, len(test_set_x), batch_size): test_batches = test_batches + \ [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])] + return train_batches, valid_batches, test_batches -def evaluate_lenet5(learning_rate=0.1, n_iter=1, batch_size=20, n_kern0=20,n_kern1=50,filter_shape=5,n_layer=3, dataset='mnist.pkl.gz'): +def evaluate_lenet5(learning_rate=0.1, n_iter=200, batch_size=20, n_kern0=20, n_kern1=50, n_layer=3, filter_shape0=5, filter_shape1=5, dataset='mnist.pkl.gz'): rng = numpy.random.RandomState(23455) print 'Before load dataset' @@ -256,6 +257,16 @@ ishape = (32,32) # this is the size of NIST images n_kern2=80 + n_kern3=100 + if n_layer==4: + filter_shape1=3 + filter_shape2=3 + if n_layer==5: + filter_shape0=4 + filter_shape1=2 + filter_shape2=2 + filter_shape3=2 + # allocate symbolic variables for the data x = T.matrix('x') # rasterized images @@ -276,7 +287,7 @@ # 4D output tensor is thus of shape (20,20,14,14) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size,1,32,32), - filter_shape=(n_kern0,1,filter_shape,filter_shape), poolsize=(2,2)) + filter_shape=(n_kern0,1,filter_shape0,filter_shape0), poolsize=(2,2)) if(n_layer>2): @@ -284,17 +295,17 @@ # filtering reduces the image size to (14-5+1,14-5+1)=(10,10) # maxpooling reduces this further to (10/2,10/2) = (5,5) # 4D output tensor is thus of shape (20,50,5,5) - fshape=(32-filter_shape+1)/2 + fshape0=(32-filter_shape0+1)/2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, - image_shape=(batch_size,n_kern0,fshape,fshape), - filter_shape=(n_kern1,n_kern0,filter_shape,filter_shape), poolsize=(2,2)) + image_shape=(batch_size,n_kern0,fshape0,fshape0), + filter_shape=(n_kern1,n_kern0,filter_shape1,filter_shape1), poolsize=(2,2)) else: - fshape=(32-filter_shape+1)/2 + fshape0=(32-filter_shape0+1)/2 layer1_input = layer0.output.flatten(2) # construct a fully-connected sigmoidal layer - layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape*fshape, n_out=500) + layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape0*fshape0, n_out=500) layer2 = LogisticRegression(input=layer1.output, n_in=500, n_out=10) cost = layer2.negative_log_likelihood(y) @@ -304,17 +315,46 @@ if(n_layer>3): - fshape=(32-filter_shape+1)/2 - fshape2=(fshape-filter_shape+1)/2 - fshape3=(fshape2-filter_shape+1)/2 + fshape0=(32-filter_shape0+1)/2 + fshape1=(fshape0-filter_shape1+1)/2 layer2 = LeNetConvPoolLayer(rng, input=layer1.output, - image_shape=(batch_size,n_kern1,fshape2,fshape2), - filter_shape=(n_kern2,n_kern1,filter_shape,filter_shape), poolsize=(2,2)) + image_shape=(batch_size,n_kern1,fshape1,fshape1), + filter_shape=(n_kern2,n_kern1,filter_shape2,filter_shape2), poolsize=(2,2)) + + if(n_layer>4): + + + fshape0=(32-filter_shape0+1)/2 + fshape1=(fshape0-filter_shape1+1)/2 + fshape2=(fshape1-filter_shape2+1)/2 + fshape3=(fshape2-filter_shape3+1)/2 + layer3 = LeNetConvPoolLayer(rng, input=layer2.output, + image_shape=(batch_size,n_kern2,fshape2,fshape2), + filter_shape=(n_kern3,n_kern2,filter_shape3,filter_shape3), poolsize=(2,2)) + + layer4_input = layer3.output.flatten(2) + layer4 = SigmoidalLayer(rng, input=layer4_input, + n_in=n_kern3*fshape3*fshape3, n_out=500) + + + layer5 = LogisticRegression(input=layer4.output, n_in=500, n_out=10) + + cost = layer5.negative_log_likelihood(y) + + test_model = theano.function([x,y], layer5.errors(y)) + + params = layer5.params+ layer4.params+ layer3.params+ layer2.params+ layer1.params + layer0.params + + elif(n_layer>3): + + fshape0=(32-filter_shape0+1)/2 + fshape1=(fshape0-filter_shape1+1)/2 + fshape2=(fshape1-filter_shape2+1)/2 layer3_input = layer2.output.flatten(2) layer3 = SigmoidalLayer(rng, input=layer3_input, - n_in=n_kern2*fshape3*fshape3, n_out=500) + n_in=n_kern2*fshape2*fshape2, n_out=500) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) @@ -328,8 +368,8 @@ elif(n_layer>2): - fshape=(32-filter_shape+1)/2 - fshape2=(fshape-filter_shape+1)/2 + fshape0=(32-filter_shape0+1)/2 + fshape1=(fshape0-filter_shape1+1)/2 # the SigmoidalLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). @@ -338,7 +378,7 @@ # construct a fully-connected sigmoidal layer layer2 = SigmoidalLayer(rng, input=layer2_input, - n_in=n_kern1*fshape2*fshape2, n_out=500) + n_in=n_kern1*fshape1*fshape1, n_out=500) # classify the values of the fully-connected sigmoidal layer @@ -462,7 +502,7 @@ def experiment(state, channel): print 'start experiment' - (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.filter_shape, state.n_layer) + (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.n_layer, state.filter_shape0, state.filter_shape1) print 'end experiment' state.best_validation_loss = best_validation_loss
--- a/deep/convolutional_dae/stacked_convolutional_dae.py Wed Mar 17 14:01:23 2010 -0400 +++ b/deep/convolutional_dae/stacked_convolutional_dae.py Wed Mar 17 14:04:12 2010 -0400 @@ -1,6 +1,7 @@ import numpy import theano import time +import sys import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams import theano.sandbox.softsign @@ -8,15 +9,19 @@ from theano.tensor.signal import downsample from theano.tensor.nnet import conv -from ift6266 import datasets +sys.path.append('../../../') +from ift6266 import datasets from ift6266.baseline.log_reg.log_reg import LogisticRegression - + +batch_size = 100 + + class SigmoidalLayer(object): def __init__(self, rng, input, n_in, n_out): self.input = input - + W_values = numpy.asarray( rng.uniform( \ low = -numpy.sqrt(6./(n_in+n_out)), \ high = numpy.sqrt(6./(n_in+n_out)), \ @@ -32,8 +37,7 @@ class dA_conv(object): def __init__(self, input, filter_shape, corruption_level = 0.1, - shared_W = None, shared_b = None, image_shape = None, - poolsize = (2,2)): + shared_W = None, shared_b = None, image_shape = None): theano_rng = RandomStreams() @@ -57,8 +61,6 @@ initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) - - self.W_prime=T.dtensor4('W_prime') self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") @@ -67,7 +69,9 @@ self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, - image_shape=image_shape, border_mode='valid') + image_shape=image_shape, + unroll_kern=4,unroll_batch=4, + border_mode='valid') self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) @@ -75,6 +79,8 @@ da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ filter_shape[3] ] + da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] + #import pdb; pdb.set_trace() initial_W_prime = numpy.asarray( numpy.random.uniform( \ low = -numpy.sqrt(6./(fan_in+fan_out)), \ high = numpy.sqrt(6./(fan_in+fan_out)), \ @@ -82,7 +88,9 @@ self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") conv2_out = conv.conv2d(self.y, self.W_prime, - filter_shape = da_filter_shape, + filter_shape = da_filter_shape,\ + image_shape = da_image_shape, \ + unroll_kern=4,unroll_batch=4, \ border_mode='full') self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale @@ -96,6 +104,7 @@ self.params = [ self.W, self.b, self.b_prime ] class LeNetConvPoolLayer(object): + def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): self.input = input @@ -106,7 +115,8 @@ self.b = theano.shared(value=b_values) conv_out = conv.conv2d(input, self.W, - filter_shape=filter_shape, image_shape=image_shape) + filter_shape=filter_shape, image_shape=image_shape, + unroll_kern=4,unroll_batch=4) fan_in = numpy.prod(filter_shape[1:]) @@ -144,7 +154,7 @@ max_poolsize=conv_hidden_layers_sizes[i][2] if i == 0 : - layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) + layer_input=self.x.reshape((batch_size, 1, 32, 32)) else: layer_input=self.layers[-1].output @@ -160,7 +170,7 @@ da_layer = dA_conv(corruption_level = corruption_levels[0], input = layer_input, shared_W = layer.W, shared_b = layer.b, - filter_shape = filter_shape, + filter_shape=filter_shape, image_shape = image_shape ) gparams = T.grad(da_layer.cost, da_layer.params) @@ -211,36 +221,43 @@ self.errors = self.logLayer.errors(self.y) -def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \ - pretrain_lr = 0.01, training_epochs = 1000, \ +def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 0, \ + pretrain_lr = 0.1, training_epochs = 1000, \ + kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ + corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ + max_pool_layers = [ [2,2] , [2,2] ], \ dataset=datasets.nist_digits): - batch_size = 500 # size of the minibatch # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1d vector of # [int] labels - layer0_input = x.reshape((x.shape[0],1,32,32)) + + layer0_input = x.reshape((batch_size,1,32,32)) - - # Setup the convolutional layers with their DAs(add as many as you want) - corruption_levels = [ 0.2, 0.2, 0.2] rng = numpy.random.RandomState(1234) - ker1=2 - ker2=2 conv_layers=[] - conv_layers.append([[ker1,1,5,5], None, [2,2] ]) - conv_layers.append([[ker2,ker1,5,5], None, [2,2] ]) + init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\ + [ batch_size , 1, 32, 32 ], + max_pool_layers[0] ] + conv_layers.append(init_layer) - # Setup the MLP layers of the network - mlp_layers=[500] - - network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4, + conv_n_out = int((32-kernels[0][2]+1)/max_pool_layers[0][0]) + print init_layer[1] + + for i in range(1,len(kernels)): + layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ + [ batch_size, kernels[i-1][0],conv_n_out,conv_n_out ], + max_pool_layers[i] ] + conv_layers.append(layer) + conv_n_out = int( (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]) + print layer [1] + network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, conv_hidden_layers_sizes = conv_layers, mlp_hidden_layers_sizes = mlp_layers, - corruption_levels = corruption_levels , n_out = 10, + corruption_levels = corruption_levels , n_out = 62, rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate ) @@ -250,7 +267,9 @@ for i in xrange(len(network.layers)-len(mlp_layers)): for epoch in xrange(pretraining_epochs): for x, y in dataset.train(batch_size): - c = network.pretrain_functions[i](x) + if x.shape[0] == batch_size: + c = network.pretrain_functions[i](x) + print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c patience = 10000 # look as this many examples regardless @@ -272,12 +291,16 @@ while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for x, y in dataset.train(batch_size): - + if x.shape[0] != batch_size: + continue cost_ij = network.finetune(x, y) iter += 1 if iter % validation_frequency == 0: - validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)] + validation_losses = [] + for xv, yv in dataset.valid(batch_size): + if xv.shape[0] == batch_size: + validation_losses.append(test_model(xv, yv)) this_validation_loss = numpy.mean(validation_losses) print('epoch %i, iter %i, validation error %f %%' % \ (epoch, iter, this_validation_loss*100.)) @@ -295,7 +318,10 @@ best_iter = iter # test it on the test set - test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] + test_losses=[] + for xt, yt in dataset.test(batch_size): + if xt.shape[0] == batch_size: + test_losses.append(test_model(xt, yt)) test_score = numpy.mean(test_losses) print((' epoch %i, iter %i, test error of best ' 'model %f %%') %
--- a/deep/stacked_dae/v_sylvain/nist_sda.py Wed Mar 17 14:01:23 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/nist_sda.py Wed Mar 17 14:04:12 2010 -0400 @@ -21,9 +21,8 @@ import jobman, jobman.sql from pylearn.io import filetensor -from ift6266 import datasets - from utils import produit_cartesien_jobs +from copy import copy from sgd_optimization import SdaSgdOptimizer @@ -31,49 +30,8 @@ from ift6266.utils.seriestables import * import tables -############################################################################## -# GLOBALS - -TEST_CONFIG = False - -#NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' -JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/sylvainpl_sda_vsylvain' -EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v_sylvain.nist_sda.jobman_entrypoint" - -REDUCE_TRAIN_TO = None -MAX_FINETUNING_EPOCHS = 1000 -# number of minibatches before taking means for valid error etc. -REDUCE_EVERY = 100 - -if TEST_CONFIG: - REDUCE_TRAIN_TO = 1000 - MAX_FINETUNING_EPOCHS = 2 - REDUCE_EVERY = 10 - MINIBATCH_SIZE=20 - -# Possible values the hyperparameters can take. These are then -# combined with produit_cartesien_jobs so we get a list of all -# possible combinations, each one resulting in a job inserted -# in the jobman DB. -JOB_VALS = {'pretraining_lr': [0.1],#, 0.01],#, 0.001],#, 0.0001], - 'pretraining_epochs_per_layer': [10], - 'hidden_layers_sizes': [500], - 'corruption_levels': [0.1], - 'minibatch_size': [20], - 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], - 'finetuning_lr':[0.1], #0.001 was very bad, so we leave it out - 'num_hidden_layers':[1,1]} - -# Just useful for tests... minimal number of epochs -DEFAULT_HP_NIST = DD({'finetuning_lr':0.1, - 'pretraining_lr':0.1, - 'pretraining_epochs_per_layer':2, - 'max_finetuning_epochs':2, - 'hidden_layers_sizes':500, - 'corruption_levels':0.2, - 'minibatch_size':20, - 'reduce_train_to':10000, - 'num_hidden_layers':1}) +from ift6266 import datasets +from config import * ''' Function called by jobman upon launching each job @@ -85,48 +43,82 @@ # TODO: remove this, bad for number of simultaneous requests on DB channel.save() - workingdir = os.getcwd() - - ########### Il faudrait arranger ici pour train plus petit - -## print "Will load NIST" -## -## nist = NIST(minibatch_size=20) -## -## print "NIST loaded" -## # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): - rtt = int(state['reduce_train_to']/state['minibatch_size']) + rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: - rtt = int(REDUCE_TRAIN_TO/MINIBATCH_SIZE) - - if rtt: - print "Reducing training set to "+str(rtt*state['minibatch_size'])+ " examples" - else: - rtt=float('inf') #No reduction -## nist.reduce_train_set(rtt) -## -## train,valid,test = nist.get_tvt() -## dataset = (train,valid,test) - + rtt = REDUCE_TRAIN_TO + n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) - + + examples_per_epoch = NIST_ALL_TRAIN_SIZE + series = create_series(state.num_hidden_layers) print "Creating optimizer with state, ", state - optimizer = SdaSgdOptimizer(dataset=datasets.nist_all, hyperparameters=state, \ + optimizer = SdaSgdOptimizer(dataset=datasets.nist_all, + hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ - series=series) + examples_per_epoch=examples_per_epoch, \ + series=series, + max_minibatches=rtt) - optimizer.pretrain(datasets.nist_all,rtt) + parameters=[] + optimizer.pretrain(datasets.nist_all) channel.save() + + #Set some of the parameters used for the finetuning + if state.has_key('finetune_set'): + finetune_choice=state['finetune_set'] + else: + finetune_choice=FINETUNE_SET + + if state.has_key('max_finetuning_epochs'): + max_finetune_epoch_NIST=state['max_finetuning_epochs'] + else: + max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS + + if state.has_key('max_finetuning_epochs_P07'): + max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] + else: + max_finetune_epoch_P07=max_finetune_epoch_NIST + + #Decide how the finetune is done + + if finetune_choice==0: + print('\n\n\tfinetune avec nist\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST) + if finetune_choice==1: + print('\n\n\tfinetune avec P07\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07) + if finetune_choice==2: + print('\n\n\tfinetune avec nist suivi de P07\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST) + optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07) - optimizer.finetune(datasets.nist_all,rtt) + if finetune_choice==-1: + print('\nSerie de 3 essais de fine-tuning') + print('\n\n\tfinetune avec nist\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST) + channel.save() + print('\n\n\tfinetune avec P07\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07) + channel.save() + print('\n\n\tfinetune avec nist suivi de P07\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST) + optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07) + channel.save() + channel.save() return channel.COMPLETE @@ -207,98 +199,19 @@ print "inserted" -class NIST: - def __init__(self, minibatch_size, basepath=None, reduce_train_to=None): - global NIST_ALL_LOCATION - - self.minibatch_size = minibatch_size - self.basepath = basepath and basepath or NIST_ALL_LOCATION - - self.set_filenames() - - # arrays of 2 elements: .x, .y - self.train = [None, None] - self.test = [None, None] - - self.load_train_test() - - self.valid = [[], []] - self.split_train_valid() - if reduce_train_to: - self.reduce_train_set(reduce_train_to) - - def get_tvt(self): - return self.train, self.valid, self.test - - def set_filenames(self): - self.train_files = ['all_train_data.ft', - 'all_train_labels.ft'] - - self.test_files = ['all_test_data.ft', - 'all_test_labels.ft'] - - def load_train_test(self): - self.load_data_labels(self.train_files, self.train) - self.load_data_labels(self.test_files, self.test) - - def load_data_labels(self, filenames, pair): - for i, fn in enumerate(filenames): - f = open(os.path.join(self.basepath, fn)) - pair[i] = filetensor.read(f) - f.close() - - def reduce_train_set(self, max): - self.train[0] = self.train[0][:max] - self.train[1] = self.train[1][:max] - - if max < len(self.test[0]): - for ar in (self.test, self.valid): - ar[0] = ar[0][:max] - ar[1] = ar[1][:max] - - def split_train_valid(self): - test_len = len(self.test[0]) - - new_train_x = self.train[0][:-test_len] - new_train_y = self.train[1][:-test_len] - - self.valid[0] = self.train[0][-test_len:] - self.valid[1] = self.train[1][-test_len:] - - self.train[0] = new_train_x - self.train[1] = new_train_y - -def test_load_nist(): - print "Will load NIST" - - import time - t1 = time.time() - nist = NIST(20) - t2 = time.time() - - print "NIST loaded. time delta = ", t2-t1 - - tr,v,te = nist.get_tvt() - - print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) - - raw_input("Press any key") - if __name__ == '__main__': - import sys - args = sys.argv[1:] - if len(args) > 0 and args[0] == 'load_nist': - test_load_nist() + #if len(args) > 0 and args[0] == 'load_nist': + # test_load_nist() - elif len(args) > 0 and args[0] == 'jobman_insert': + if len(args) > 0 and args[0] == 'jobman_insert': jobman_insert_nist() elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) - jobman_entrypoint(DEFAULT_HP_NIST, chanmock) + jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock) else: print "Bad arguments"
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py Wed Mar 17 14:01:23 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py Wed Mar 17 14:04:12 2010 -0400 @@ -12,31 +12,12 @@ from jobman import DD import jobman, jobman.sql +from copy import copy from stacked_dae import SdA from ift6266.utils.seriestables import * -##def shared_dataset(data_xy): -## data_x, data_y = data_xy -## if theano.config.device.startswith("gpu"): -## print "TRANSFERING DATASETS (via shared()) TO GPU" -## shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) -## shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) -## shared_y = T.cast(shared_y, 'int32') -## else: -## print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES" -## shared_x = theano.shared(data_x) -## shared_y = theano.shared(data_y) -## return shared_x, shared_y - - ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin -def shared_dataset(batch_size, n_in): - - shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX)) - shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX)) - return shared_x, shared_y - default_series = { \ 'reconstruction_error' : DummySeries(), 'training_error' : DummySeries(), @@ -45,37 +26,34 @@ 'params' : DummySeries() } +def itermax(iter, max): + for i,it in enumerate(iter): + if i >= max: + break + yield it + class SdaSgdOptimizer: - def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series): + def __init__(self, dataset, hyperparameters, n_ins, n_outs, + examples_per_epoch, series=default_series, max_minibatches=None): self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs - self.input_divider = input_divider + self.parameters_pre=[] + self.max_minibatches = max_minibatches + print "SdaSgdOptimizer, max_minibatches =", max_minibatches + + self.ex_per_epoch = examples_per_epoch + self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size + self.series = series self.rng = numpy.random.RandomState(1234) - self.init_datasets() self.init_classifier() sys.stdout.flush() - - def init_datasets(self): - print "init_datasets" - sys.stdout.flush() - - #train_set, valid_set, test_set = self.dataset - self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - - # compute number of minibatches for training, validation and testing - self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size - self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size - # remove last batch in case it's incomplete - self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 def init_classifier(self): print "Constructing classifier" @@ -88,8 +66,6 @@ # construct the stacked denoising autoencoder class self.classifier = SdA( \ - train_set_x= self.train_set_x, \ - train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ @@ -97,8 +73,7 @@ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ - finetune_lr = self.hp.finetuning_lr,\ - input_divider = self.input_divider ) + finetune_lr = self.hp.finetuning_lr) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") @@ -108,7 +83,7 @@ self.pretrain(self.dataset) self.finetune(self.dataset) - def pretrain(self,dataset,reduce): + def pretrain(self,dataset): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() @@ -118,15 +93,19 @@ # go through pretraining epochs for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set - batch_index=int(0) + batch_index=0 for x,y in dataset.train(self.hp.minibatch_size): - batch_index+=1 - if batch_index > reduce: #If maximum number of mini-batch is used - break c = self.classifier.pretrain_functions[i](x) - self.series["reconstruction_error"].append((epoch, batch_index), c) + batch_index+=1 + + #if batch_index % 100 == 0: + # print "100 batches" + + # useful when doing tests + if self.max_minibatches and batch_index >= self.max_minibatches: + break print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() @@ -137,33 +116,33 @@ print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) self.hp.update({'pretraining_time': end_time-start_time}) - + sys.stdout.flush() + + #To be able to load them later for tests on finetune + self.parameters_pre=[copy(x.value) for x in self.classifier.params] - def finetune(self,dataset,reduce): + + def finetune(self,dataset,num_finetune): print "STARTING FINETUNING, time = ", datetime.datetime.now() - #index = T.lscalar() # index to a [mini]batch minibatch_size = self.hp.minibatch_size - ensemble_x = T.matrix('ensemble_x') - ensemble_y = T.ivector('ensemble_y') # create a function to compute the mistakes that are made by the model # on the validation set, or testing set - shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) - test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, - givens = { - #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) - self.classifier.x: ensemble_x, - self.classifier.y: ensemble_y}) + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: ensemble_x, + # self.classifier.y: ensemble_y]}) - validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, - givens = { - #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) - self.classifier.x: ensemble_x, - self.classifier.y: ensemble_y}) + validate_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: , + # self.classifier.y: ]}) # early-stopping parameters @@ -172,11 +151,13 @@ # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(self.n_train_batches, patience/2) + validation_frequency = min(self.mb_per_epoch, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch + if self.max_minibatches and validation_frequency > self.max_minibatches: + validation_frequency = self.max_minibatches / 2 best_params = None best_validation_loss = float('inf') @@ -186,37 +167,31 @@ done_looping = False epoch = 0 - while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): + total_mb_index = 0 + + while (epoch < num_finetune) and (not done_looping): epoch = epoch + 1 - minibatch_index=int(0) + minibatch_index = -1 for x,y in dataset.train(minibatch_size): - minibatch_index +=1 - - if minibatch_index > reduce: #If maximum number of mini-batchs is used - break - + minibatch_index += 1 cost_ij = self.classifier.finetune(x,y) - iter = epoch * self.n_train_batches + minibatch_index + total_mb_index += 1 self.series["training_error"].append((epoch, minibatch_index), cost_ij) - if (iter+1) % validation_frequency == 0: + if (total_mb_index+1) % validation_frequency == 0: - #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] - test_index=int(0) - validation_losses=[] - for x,y in dataset.valid(minibatch_size): - test_index+=1 - if test_index > reduce: - break - validation_losses.append(validate_model(x,y)) + iter = dataset.valid(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + validation_losses = [validate_model(x,y) for x,y in iter] this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].\ append((epoch, minibatch_index), this_validation_loss*100.) print('epoch %i, minibatch %i, validation error %f %%' % \ - (epoch, minibatch_index, \ + (epoch, minibatch_index+1, \ this_validation_loss*100.)) @@ -226,21 +201,17 @@ #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : - patience = max(patience, iter * patience_increase) + patience = max(patience, total_mb_index * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss - best_iter = iter + best_iter = total_mb_index # test it on the test set - #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] - test_losses=[] - i=0 - for x,y in dataset.test(minibatch_size): - i+=1 - if i > reduce: - break - test_losses.append(test_model(x,y)) + iter = dataset.test(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + test_losses = [test_model(x,y) for x,y in iter] test_score = numpy.mean(test_losses) self.series["test_error"].\ @@ -248,14 +219,18 @@ print((' epoch %i, minibatch %i, test error of best ' 'model %f %%') % - (epoch, minibatch_index, + (epoch, minibatch_index+1, test_score*100.)) sys.stdout.flush() + # useful when doing tests + if self.max_minibatches and minibatch_index >= self.max_minibatches: + break + self.series['params'].append((epoch,), self.classifier.all_params) - if patience <= iter : + if patience <= total_mb_index: done_looping = True break @@ -269,6 +244,15 @@ 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) + + + #Set parameters like they where right after pre-train + def reload_parameters(self): + + for idx,x in enumerate(self.parameters_pre): + self.classifier.params[idx].value=copy(x) + +
--- a/deep/stacked_dae/v_sylvain/stacked_dae.py Wed Mar 17 14:01:23 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/stacked_dae.py Wed Mar 17 14:04:12 2010 -0400 @@ -165,9 +165,9 @@ class SdA(object): - def __init__(self, train_set_x, train_set_y, batch_size, n_ins, + def __init__(self, batch_size, n_ins, hidden_layers_sizes, n_outs, - corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): + corruption_levels, rng, pretrain_lr, finetune_lr): # Just to make sure those are not modified somewhere else afterwards hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) corruption_levels = copy.deepcopy(corruption_levels) @@ -190,23 +190,17 @@ print "n_outs", n_outs print "pretrain_lr", pretrain_lr print "finetune_lr", finetune_lr - print "input_divider", input_divider print "----" - #self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) - if len(hidden_layers_sizes) < 1 : raiseException (' You must have at least one hidden layer ') # allocate symbolic variables for the data - ##index = T.lscalar() # index to a [mini]batch + #index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels - ensemble = T.matrix('ensemble') - ensemble_x = T.matrix('ensemble_x') - ensemble_y = T.ivector('ensemble_y') for i in xrange( self.n_layers ): # construct the sigmoidal layer @@ -250,10 +244,15 @@ updates[param] = param - gparam * pretrain_lr # create a function that trains the dA - update_fn = theano.function([ensemble], dA_layer.cost, \ - updates = updates, - givens = { - self.x : ensemble}) + update_fn = theano.function([self.x], dA_layer.cost, \ + updates = updates)#, + # givens = { + # self.x : ensemble}) + # collect this function into a list + #update_fn = theano.function([index], dA_layer.cost, \ + # updates = updates, + # givens = { + # self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) # collect this function into a list self.pretrain_functions += [update_fn] @@ -276,18 +275,17 @@ for param,gparam in zip(self.params, gparams): updates[param] = param - gparam*finetune_lr - self.finetune = theano.function([ensemble_x,ensemble_y], cost, - updates = updates, - givens = { - #self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, - #self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) - self.x : ensemble_x, - self.y : ensemble_y} ) + self.finetune = theano.function([self.x,self.y], cost, + updates = updates)#, + # givens = { + # self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, + # self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) + if __name__ == '__main__': import sys