# HG changeset patch # User Arnaud Bergeron # Date 1268854281 14400 # Node ID 0c0f0b3f6a93c11bd279fc829dfff99fc28ed7db # Parent 3919c71e3091d4d1ccf96a67f333a6901f72ba80# Parent bd7e50d56d801b644d8e28e8773ec22067d22bed branch merge. diff -r 3919c71e3091 -r 0c0f0b3f6a93 baseline/conv_mlp/convolutional_mlp.py --- a/baseline/conv_mlp/convolutional_mlp.py Wed Mar 17 15:24:25 2010 -0400 +++ b/baseline/conv_mlp/convolutional_mlp.py Wed Mar 17 15:31:21 2010 -0400 @@ -26,8 +26,8 @@ import theano.sandbox.softsign import pylearn.datasets.MNIST from pylearn.io import filetensor as ft -from theano.tensor.signal import downsample -from theano.tensor.nnet import conv +from theano.sandbox import conv, downsample +import theano,pylearn.version,ift6266 class LeNetConvPoolLayer(object): @@ -214,7 +214,7 @@ # Dealing with the training set # get the list of training images (x) and their labels (y) - (train_set_x, train_set_y) = (d[:4000,:],labels[:4000]) + (train_set_x, train_set_y) = (d[:200000,:],labels[:200000]) # initialize the list of training minibatches with empty list train_batches = [] for i in xrange(0, len(train_set_x), batch_size): @@ -229,7 +229,7 @@ #print train_batches[500] # Dealing with the validation set - (valid_set_x, valid_set_y) = (d[4000:5000,:],labels[4000:5000]) + (valid_set_x, valid_set_y) = (d[200000:270000,:],labels[200000:270000]) # initialize the list of validation minibatches valid_batches = [] for i in xrange(0, len(valid_set_x), batch_size): @@ -237,17 +237,18 @@ [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])] # Dealing with the testing set - (test_set_x, test_set_y) = (d[5000:6000,:],labels[5000:6000]) + (test_set_x, test_set_y) = (d[270000:340000,:],labels[270000:340000]) # initialize the list of testing minibatches test_batches = [] for i in xrange(0, len(test_set_x), batch_size): test_batches = test_batches + \ [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])] + return train_batches, valid_batches, test_batches -def evaluate_lenet5(learning_rate=0.1, n_iter=1, batch_size=20, n_kern0=20,n_kern1=50,filter_shape=5,n_layer=3, dataset='mnist.pkl.gz'): +def evaluate_lenet5(learning_rate=0.1, n_iter=200, batch_size=20, n_kern0=20, n_kern1=50, n_layer=3, filter_shape0=5, filter_shape1=5, dataset='mnist.pkl.gz'): rng = numpy.random.RandomState(23455) print 'Before load dataset' @@ -256,6 +257,16 @@ ishape = (32,32) # this is the size of NIST images n_kern2=80 + n_kern3=100 + if n_layer==4: + filter_shape1=3 + filter_shape2=3 + if n_layer==5: + filter_shape0=4 + filter_shape1=2 + filter_shape2=2 + filter_shape3=2 + # allocate symbolic variables for the data x = T.matrix('x') # rasterized images @@ -276,7 +287,7 @@ # 4D output tensor is thus of shape (20,20,14,14) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size,1,32,32), - filter_shape=(n_kern0,1,filter_shape,filter_shape), poolsize=(2,2)) + filter_shape=(n_kern0,1,filter_shape0,filter_shape0), poolsize=(2,2)) if(n_layer>2): @@ -284,17 +295,17 @@ # filtering reduces the image size to (14-5+1,14-5+1)=(10,10) # maxpooling reduces this further to (10/2,10/2) = (5,5) # 4D output tensor is thus of shape (20,50,5,5) - fshape=(32-filter_shape+1)/2 + fshape0=(32-filter_shape0+1)/2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, - image_shape=(batch_size,n_kern0,fshape,fshape), - filter_shape=(n_kern1,n_kern0,filter_shape,filter_shape), poolsize=(2,2)) + image_shape=(batch_size,n_kern0,fshape0,fshape0), + filter_shape=(n_kern1,n_kern0,filter_shape1,filter_shape1), poolsize=(2,2)) else: - fshape=(32-filter_shape+1)/2 + fshape0=(32-filter_shape0+1)/2 layer1_input = layer0.output.flatten(2) # construct a fully-connected sigmoidal layer - layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape*fshape, n_out=500) + layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape0*fshape0, n_out=500) layer2 = LogisticRegression(input=layer1.output, n_in=500, n_out=10) cost = layer2.negative_log_likelihood(y) @@ -304,17 +315,46 @@ if(n_layer>3): - fshape=(32-filter_shape+1)/2 - fshape2=(fshape-filter_shape+1)/2 - fshape3=(fshape2-filter_shape+1)/2 + fshape0=(32-filter_shape0+1)/2 + fshape1=(fshape0-filter_shape1+1)/2 layer2 = LeNetConvPoolLayer(rng, input=layer1.output, - image_shape=(batch_size,n_kern1,fshape2,fshape2), - filter_shape=(n_kern2,n_kern1,filter_shape,filter_shape), poolsize=(2,2)) + image_shape=(batch_size,n_kern1,fshape1,fshape1), + filter_shape=(n_kern2,n_kern1,filter_shape2,filter_shape2), poolsize=(2,2)) + + if(n_layer>4): + + + fshape0=(32-filter_shape0+1)/2 + fshape1=(fshape0-filter_shape1+1)/2 + fshape2=(fshape1-filter_shape2+1)/2 + fshape3=(fshape2-filter_shape3+1)/2 + layer3 = LeNetConvPoolLayer(rng, input=layer2.output, + image_shape=(batch_size,n_kern2,fshape2,fshape2), + filter_shape=(n_kern3,n_kern2,filter_shape3,filter_shape3), poolsize=(2,2)) + + layer4_input = layer3.output.flatten(2) + layer4 = SigmoidalLayer(rng, input=layer4_input, + n_in=n_kern3*fshape3*fshape3, n_out=500) + + + layer5 = LogisticRegression(input=layer4.output, n_in=500, n_out=10) + + cost = layer5.negative_log_likelihood(y) + + test_model = theano.function([x,y], layer5.errors(y)) + + params = layer5.params+ layer4.params+ layer3.params+ layer2.params+ layer1.params + layer0.params + + elif(n_layer>3): + + fshape0=(32-filter_shape0+1)/2 + fshape1=(fshape0-filter_shape1+1)/2 + fshape2=(fshape1-filter_shape2+1)/2 layer3_input = layer2.output.flatten(2) layer3 = SigmoidalLayer(rng, input=layer3_input, - n_in=n_kern2*fshape3*fshape3, n_out=500) + n_in=n_kern2*fshape2*fshape2, n_out=500) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) @@ -328,8 +368,8 @@ elif(n_layer>2): - fshape=(32-filter_shape+1)/2 - fshape2=(fshape-filter_shape+1)/2 + fshape0=(32-filter_shape0+1)/2 + fshape1=(fshape0-filter_shape1+1)/2 # the SigmoidalLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). @@ -338,7 +378,7 @@ # construct a fully-connected sigmoidal layer layer2 = SigmoidalLayer(rng, input=layer2_input, - n_in=n_kern1*fshape2*fshape2, n_out=500) + n_in=n_kern1*fshape1*fshape1, n_out=500) # classify the values of the fully-connected sigmoidal layer @@ -462,7 +502,7 @@ def experiment(state, channel): print 'start experiment' - (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.filter_shape, state.n_layer) + (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.n_layer, state.filter_shape0, state.filter_shape1) print 'end experiment' state.best_validation_loss = best_validation_loss diff -r 3919c71e3091 -r 0c0f0b3f6a93 data_generation/pipeline/pipeline.py --- a/data_generation/pipeline/pipeline.py Wed Mar 17 15:24:25 2010 -0400 +++ b/data_generation/pipeline/pipeline.py Wed Mar 17 15:31:21 2010 -0400 @@ -10,6 +10,7 @@ import numpy import ift6266.data_generation.transformations.filetensor as ft import random +import copy # To debug locally, also call with -s 100 (to stop after ~100) # (otherwise we allocate all needed memory, might be loonnng and/or crash @@ -59,6 +60,7 @@ -b, --prob-captcha: probability of using a captcha image -g, --prob-ocr: probability of using an ocr image -y, --seed: the job seed + -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations ''' try: @@ -76,6 +78,12 @@ random.seed(int(a)) numpy.random.seed(int(a)) +for o, a in opts: + if o in ('-t','--type'): + type_pipeline = int(a) + else: + type_pipeline = 0 + if DEBUG_X: import pylab pylab.ion() @@ -104,7 +112,17 @@ VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR, on_screen=False) ###---------------------order of transformation module -MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] +if type_pipeline == 0: + MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] + stop_idx = 0 +if type_pipeline == 1: + MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(False),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] + stop_idx = 5 + #we disable transformation corresponding to MODULE_INSTANCES[stop_idx:] but we still need to apply them on dummy images + #in order to be sure to have the same random generator state than with the default pipeline. + #This is not optimal (we do more calculus than necessary) but it is a quick hack to produce similar results than previous generation + + # These should have a "after_transform_callback(self, image)" method # (called after each call to transform_image in a module) @@ -155,7 +173,7 @@ sys.stdout.flush() global_idx = img_no - + img = img.reshape(img_size) param_idx = 0 @@ -174,8 +192,13 @@ p = mod.regenerate_parameters(complexity) self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p param_idx += len(p) - - img = mod.transform_image(img) + + if not(stop_idx) or stop_idx > mod_idx: + img = mod.transform_image(img) + else: + tmp = mod.transform_image(copy.copy(img)) + #this is done to be sure to have the same global random generator state + #we don't apply the transformation on the original image but on a copy in case of in-place transformations if should_hook_after_each: for hook in AFTER_EACH_MODULE_HOOK: @@ -349,6 +372,8 @@ prob_ocr = float(a) elif o in ('-y', "--seed"): pass + elif o in ('-t', "--type"): + type_pipeline = int(a) else: assert False, "unhandled option" diff -r 3919c71e3091 -r 0c0f0b3f6a93 data_generation/transformations/gimp_script.py --- a/data_generation/transformations/gimp_script.py Wed Mar 17 15:24:25 2010 -0400 +++ b/data_generation/transformations/gimp_script.py Wed Mar 17 15:31:21 2010 -0400 @@ -38,7 +38,7 @@ return ['mblur_length', 'mblur_angle', 'pinch'] def regenerate_parameters(self, complexity): - if complexity and self.blur_bool: + if complexity: self.mblur_length = abs(int(round(numpy.random.normal(0, 3*complexity)))) else: self.mblur_length = 0 @@ -50,7 +50,7 @@ def transform_image(self, image): if self.mblur_length or self.pinch: setpix(image) - if self.mblur_length: + if self.mblur_length and self.blur_bool: pdb.plug_in_mblur(img, layer1, 0, self.mblur_length, self.mblur_angle, 0, 0) if self.pinch: pdb.plug_in_whirl_pinch(img, layer1, 0.0, self.pinch, 1.0) diff -r 3919c71e3091 -r 0c0f0b3f6a93 deep/convolutional_dae/stacked_convolutional_dae.py diff -r 3919c71e3091 -r 0c0f0b3f6a93 deep/stacked_dae/v_sylvain/nist_sda.py --- a/deep/stacked_dae/v_sylvain/nist_sda.py Wed Mar 17 15:24:25 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/nist_sda.py Wed Mar 17 15:31:21 2010 -0400 @@ -21,9 +21,8 @@ import jobman, jobman.sql from pylearn.io import filetensor -from ift6266 import datasets - from utils import produit_cartesien_jobs +from copy import copy from sgd_optimization import SdaSgdOptimizer @@ -31,49 +30,8 @@ from ift6266.utils.seriestables import * import tables -############################################################################## -# GLOBALS - -TEST_CONFIG = False - -#NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' -JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/sylvainpl_sda_vsylvain' -EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v_sylvain.nist_sda.jobman_entrypoint" - -REDUCE_TRAIN_TO = None -MAX_FINETUNING_EPOCHS = 1000 -# number of minibatches before taking means for valid error etc. -REDUCE_EVERY = 100 - -if TEST_CONFIG: - REDUCE_TRAIN_TO = 1000 - MAX_FINETUNING_EPOCHS = 2 - REDUCE_EVERY = 10 - MINIBATCH_SIZE=20 - -# Possible values the hyperparameters can take. These are then -# combined with produit_cartesien_jobs so we get a list of all -# possible combinations, each one resulting in a job inserted -# in the jobman DB. -JOB_VALS = {'pretraining_lr': [0.1],#, 0.01],#, 0.001],#, 0.0001], - 'pretraining_epochs_per_layer': [10], - 'hidden_layers_sizes': [500], - 'corruption_levels': [0.1], - 'minibatch_size': [20], - 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], - 'finetuning_lr':[0.1], #0.001 was very bad, so we leave it out - 'num_hidden_layers':[1,1]} - -# Just useful for tests... minimal number of epochs -DEFAULT_HP_NIST = DD({'finetuning_lr':0.1, - 'pretraining_lr':0.1, - 'pretraining_epochs_per_layer':2, - 'max_finetuning_epochs':2, - 'hidden_layers_sizes':500, - 'corruption_levels':0.2, - 'minibatch_size':20, - 'reduce_train_to':10000, - 'num_hidden_layers':1}) +from ift6266 import datasets +from config import * ''' Function called by jobman upon launching each job @@ -85,48 +43,82 @@ # TODO: remove this, bad for number of simultaneous requests on DB channel.save() - workingdir = os.getcwd() - - ########### Il faudrait arranger ici pour train plus petit - -## print "Will load NIST" -## -## nist = NIST(minibatch_size=20) -## -## print "NIST loaded" -## # For test runs, we don't want to use the whole dataset so # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): - rtt = int(state['reduce_train_to']/state['minibatch_size']) + rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: - rtt = int(REDUCE_TRAIN_TO/MINIBATCH_SIZE) - - if rtt: - print "Reducing training set to "+str(rtt*state['minibatch_size'])+ " examples" - else: - rtt=float('inf') #No reduction -## nist.reduce_train_set(rtt) -## -## train,valid,test = nist.get_tvt() -## dataset = (train,valid,test) - + rtt = REDUCE_TRAIN_TO + n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) - + + examples_per_epoch = NIST_ALL_TRAIN_SIZE + series = create_series(state.num_hidden_layers) print "Creating optimizer with state, ", state - optimizer = SdaSgdOptimizer(dataset=datasets.nist_all, hyperparameters=state, \ + optimizer = SdaSgdOptimizer(dataset=datasets.nist_all, + hyperparameters=state, \ n_ins=n_ins, n_outs=n_outs,\ - series=series) + examples_per_epoch=examples_per_epoch, \ + series=series, + max_minibatches=rtt) - optimizer.pretrain(datasets.nist_all,rtt) + parameters=[] + optimizer.pretrain(datasets.nist_all) channel.save() + + #Set some of the parameters used for the finetuning + if state.has_key('finetune_set'): + finetune_choice=state['finetune_set'] + else: + finetune_choice=FINETUNE_SET + + if state.has_key('max_finetuning_epochs'): + max_finetune_epoch_NIST=state['max_finetuning_epochs'] + else: + max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS + + if state.has_key('max_finetuning_epochs_P07'): + max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] + else: + max_finetune_epoch_P07=max_finetune_epoch_NIST + + #Decide how the finetune is done + + if finetune_choice==0: + print('\n\n\tfinetune avec nist\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST) + if finetune_choice==1: + print('\n\n\tfinetune avec P07\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07) + if finetune_choice==2: + print('\n\n\tfinetune avec nist suivi de P07\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST) + optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07) - optimizer.finetune(datasets.nist_all,rtt) + if finetune_choice==-1: + print('\nSerie de 3 essais de fine-tuning') + print('\n\n\tfinetune avec nist\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST) + channel.save() + print('\n\n\tfinetune avec P07\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07) + channel.save() + print('\n\n\tfinetune avec nist suivi de P07\n\n') + optimizer.reload_parameters() + optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST) + optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07) + channel.save() + channel.save() return channel.COMPLETE @@ -207,98 +199,19 @@ print "inserted" -class NIST: - def __init__(self, minibatch_size, basepath=None, reduce_train_to=None): - global NIST_ALL_LOCATION - - self.minibatch_size = minibatch_size - self.basepath = basepath and basepath or NIST_ALL_LOCATION - - self.set_filenames() - - # arrays of 2 elements: .x, .y - self.train = [None, None] - self.test = [None, None] - - self.load_train_test() - - self.valid = [[], []] - self.split_train_valid() - if reduce_train_to: - self.reduce_train_set(reduce_train_to) - - def get_tvt(self): - return self.train, self.valid, self.test - - def set_filenames(self): - self.train_files = ['all_train_data.ft', - 'all_train_labels.ft'] - - self.test_files = ['all_test_data.ft', - 'all_test_labels.ft'] - - def load_train_test(self): - self.load_data_labels(self.train_files, self.train) - self.load_data_labels(self.test_files, self.test) - - def load_data_labels(self, filenames, pair): - for i, fn in enumerate(filenames): - f = open(os.path.join(self.basepath, fn)) - pair[i] = filetensor.read(f) - f.close() - - def reduce_train_set(self, max): - self.train[0] = self.train[0][:max] - self.train[1] = self.train[1][:max] - - if max < len(self.test[0]): - for ar in (self.test, self.valid): - ar[0] = ar[0][:max] - ar[1] = ar[1][:max] - - def split_train_valid(self): - test_len = len(self.test[0]) - - new_train_x = self.train[0][:-test_len] - new_train_y = self.train[1][:-test_len] - - self.valid[0] = self.train[0][-test_len:] - self.valid[1] = self.train[1][-test_len:] - - self.train[0] = new_train_x - self.train[1] = new_train_y - -def test_load_nist(): - print "Will load NIST" - - import time - t1 = time.time() - nist = NIST(20) - t2 = time.time() - - print "NIST loaded. time delta = ", t2-t1 - - tr,v,te = nist.get_tvt() - - print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) - - raw_input("Press any key") - if __name__ == '__main__': - import sys - args = sys.argv[1:] - if len(args) > 0 and args[0] == 'load_nist': - test_load_nist() + #if len(args) > 0 and args[0] == 'load_nist': + # test_load_nist() - elif len(args) > 0 and args[0] == 'jobman_insert': + if len(args) > 0 and args[0] == 'jobman_insert': jobman_insert_nist() elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) - jobman_entrypoint(DEFAULT_HP_NIST, chanmock) + jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock) else: print "Bad arguments" diff -r 3919c71e3091 -r 0c0f0b3f6a93 deep/stacked_dae/v_sylvain/sgd_optimization.py --- a/deep/stacked_dae/v_sylvain/sgd_optimization.py Wed Mar 17 15:24:25 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py Wed Mar 17 15:31:21 2010 -0400 @@ -12,31 +12,12 @@ from jobman import DD import jobman, jobman.sql +from copy import copy from stacked_dae import SdA from ift6266.utils.seriestables import * -##def shared_dataset(data_xy): -## data_x, data_y = data_xy -## if theano.config.device.startswith("gpu"): -## print "TRANSFERING DATASETS (via shared()) TO GPU" -## shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) -## shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) -## shared_y = T.cast(shared_y, 'int32') -## else: -## print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES" -## shared_x = theano.shared(data_x) -## shared_y = theano.shared(data_y) -## return shared_x, shared_y - - ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin -def shared_dataset(batch_size, n_in): - - shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX)) - shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX)) - return shared_x, shared_y - default_series = { \ 'reconstruction_error' : DummySeries(), 'training_error' : DummySeries(), @@ -45,37 +26,34 @@ 'params' : DummySeries() } +def itermax(iter, max): + for i,it in enumerate(iter): + if i >= max: + break + yield it + class SdaSgdOptimizer: - def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series): + def __init__(self, dataset, hyperparameters, n_ins, n_outs, + examples_per_epoch, series=default_series, max_minibatches=None): self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs - self.input_divider = input_divider + self.parameters_pre=[] + self.max_minibatches = max_minibatches + print "SdaSgdOptimizer, max_minibatches =", max_minibatches + + self.ex_per_epoch = examples_per_epoch + self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size + self.series = series self.rng = numpy.random.RandomState(1234) - self.init_datasets() self.init_classifier() sys.stdout.flush() - - def init_datasets(self): - print "init_datasets" - sys.stdout.flush() - - #train_set, valid_set, test_set = self.dataset - self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - - # compute number of minibatches for training, validation and testing - self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size - self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size - # remove last batch in case it's incomplete - self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 def init_classifier(self): print "Constructing classifier" @@ -88,8 +66,6 @@ # construct the stacked denoising autoencoder class self.classifier = SdA( \ - train_set_x= self.train_set_x, \ - train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ @@ -97,8 +73,7 @@ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ - finetune_lr = self.hp.finetuning_lr,\ - input_divider = self.input_divider ) + finetune_lr = self.hp.finetuning_lr) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") @@ -108,7 +83,7 @@ self.pretrain(self.dataset) self.finetune(self.dataset) - def pretrain(self,dataset,reduce): + def pretrain(self,dataset): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() @@ -118,15 +93,19 @@ # go through pretraining epochs for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set - batch_index=int(0) + batch_index=0 for x,y in dataset.train(self.hp.minibatch_size): - batch_index+=1 - if batch_index > reduce: #If maximum number of mini-batch is used - break c = self.classifier.pretrain_functions[i](x) - self.series["reconstruction_error"].append((epoch, batch_index), c) + batch_index+=1 + + #if batch_index % 100 == 0: + # print "100 batches" + + # useful when doing tests + if self.max_minibatches and batch_index >= self.max_minibatches: + break print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() @@ -137,33 +116,33 @@ print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) self.hp.update({'pretraining_time': end_time-start_time}) - + sys.stdout.flush() + + #To be able to load them later for tests on finetune + self.parameters_pre=[copy(x.value) for x in self.classifier.params] - def finetune(self,dataset,reduce): + + def finetune(self,dataset,num_finetune): print "STARTING FINETUNING, time = ", datetime.datetime.now() - #index = T.lscalar() # index to a [mini]batch minibatch_size = self.hp.minibatch_size - ensemble_x = T.matrix('ensemble_x') - ensemble_y = T.ivector('ensemble_y') # create a function to compute the mistakes that are made by the model # on the validation set, or testing set - shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) - test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, - givens = { - #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) - self.classifier.x: ensemble_x, - self.classifier.y: ensemble_y}) + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: ensemble_x, + # self.classifier.y: ensemble_y]}) - validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, - givens = { - #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) - self.classifier.x: ensemble_x, - self.classifier.y: ensemble_y}) + validate_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: , + # self.classifier.y: ]}) # early-stopping parameters @@ -172,11 +151,13 @@ # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(self.n_train_batches, patience/2) + validation_frequency = min(self.mb_per_epoch, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch + if self.max_minibatches and validation_frequency > self.max_minibatches: + validation_frequency = self.max_minibatches / 2 best_params = None best_validation_loss = float('inf') @@ -186,37 +167,31 @@ done_looping = False epoch = 0 - while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): + total_mb_index = 0 + + while (epoch < num_finetune) and (not done_looping): epoch = epoch + 1 - minibatch_index=int(0) + minibatch_index = -1 for x,y in dataset.train(minibatch_size): - minibatch_index +=1 - - if minibatch_index > reduce: #If maximum number of mini-batchs is used - break - + minibatch_index += 1 cost_ij = self.classifier.finetune(x,y) - iter = epoch * self.n_train_batches + minibatch_index + total_mb_index += 1 self.series["training_error"].append((epoch, minibatch_index), cost_ij) - if (iter+1) % validation_frequency == 0: + if (total_mb_index+1) % validation_frequency == 0: - #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] - test_index=int(0) - validation_losses=[] - for x,y in dataset.valid(minibatch_size): - test_index+=1 - if test_index > reduce: - break - validation_losses.append(validate_model(x,y)) + iter = dataset.valid(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + validation_losses = [validate_model(x,y) for x,y in iter] this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].\ append((epoch, minibatch_index), this_validation_loss*100.) print('epoch %i, minibatch %i, validation error %f %%' % \ - (epoch, minibatch_index, \ + (epoch, minibatch_index+1, \ this_validation_loss*100.)) @@ -226,21 +201,17 @@ #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : - patience = max(patience, iter * patience_increase) + patience = max(patience, total_mb_index * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss - best_iter = iter + best_iter = total_mb_index # test it on the test set - #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] - test_losses=[] - i=0 - for x,y in dataset.test(minibatch_size): - i+=1 - if i > reduce: - break - test_losses.append(test_model(x,y)) + iter = dataset.test(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + test_losses = [test_model(x,y) for x,y in iter] test_score = numpy.mean(test_losses) self.series["test_error"].\ @@ -248,14 +219,18 @@ print((' epoch %i, minibatch %i, test error of best ' 'model %f %%') % - (epoch, minibatch_index, + (epoch, minibatch_index+1, test_score*100.)) sys.stdout.flush() + # useful when doing tests + if self.max_minibatches and minibatch_index >= self.max_minibatches: + break + self.series['params'].append((epoch,), self.classifier.all_params) - if patience <= iter : + if patience <= total_mb_index: done_looping = True break @@ -269,6 +244,15 @@ 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) + + + #Set parameters like they where right after pre-train + def reload_parameters(self): + + for idx,x in enumerate(self.parameters_pre): + self.classifier.params[idx].value=copy(x) + + diff -r 3919c71e3091 -r 0c0f0b3f6a93 deep/stacked_dae/v_sylvain/stacked_dae.py --- a/deep/stacked_dae/v_sylvain/stacked_dae.py Wed Mar 17 15:24:25 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/stacked_dae.py Wed Mar 17 15:31:21 2010 -0400 @@ -165,9 +165,9 @@ class SdA(object): - def __init__(self, train_set_x, train_set_y, batch_size, n_ins, + def __init__(self, batch_size, n_ins, hidden_layers_sizes, n_outs, - corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): + corruption_levels, rng, pretrain_lr, finetune_lr): # Just to make sure those are not modified somewhere else afterwards hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) corruption_levels = copy.deepcopy(corruption_levels) @@ -190,23 +190,17 @@ print "n_outs", n_outs print "pretrain_lr", pretrain_lr print "finetune_lr", finetune_lr - print "input_divider", input_divider print "----" - #self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) - if len(hidden_layers_sizes) < 1 : raiseException (' You must have at least one hidden layer ') # allocate symbolic variables for the data - ##index = T.lscalar() # index to a [mini]batch + #index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels - ensemble = T.matrix('ensemble') - ensemble_x = T.matrix('ensemble_x') - ensemble_y = T.ivector('ensemble_y') for i in xrange( self.n_layers ): # construct the sigmoidal layer @@ -250,10 +244,15 @@ updates[param] = param - gparam * pretrain_lr # create a function that trains the dA - update_fn = theano.function([ensemble], dA_layer.cost, \ - updates = updates, - givens = { - self.x : ensemble}) + update_fn = theano.function([self.x], dA_layer.cost, \ + updates = updates)#, + # givens = { + # self.x : ensemble}) + # collect this function into a list + #update_fn = theano.function([index], dA_layer.cost, \ + # updates = updates, + # givens = { + # self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) # collect this function into a list self.pretrain_functions += [update_fn] @@ -276,18 +275,17 @@ for param,gparam in zip(self.params, gparams): updates[param] = param - gparam*finetune_lr - self.finetune = theano.function([ensemble_x,ensemble_y], cost, - updates = updates, - givens = { - #self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, - #self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) - self.x : ensemble_x, - self.y : ensemble_y} ) + self.finetune = theano.function([self.x,self.y], cost, + updates = updates)#, + # givens = { + # self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, + # self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) + if __name__ == '__main__': import sys diff -r 3919c71e3091 -r 0c0f0b3f6a93 scripts/launch_generate100.py --- a/scripts/launch_generate100.py Wed Mar 17 15:24:25 2010 -0400 +++ b/scripts/launch_generate100.py Wed Mar 17 15:31:21 2010 -0400 @@ -3,12 +3,17 @@ import os dir1 = "/data/lisa/data/ift6266h10/" -mach = "brams0c.iro.umontreal.ca,brams02.iro.umontreal.ca,brams03.iro.umontreal.ca,maggie22.iro.umontreal.ca" +mach = "maggie16.iro.umontreal.ca,maggie15.iro.umontreal.ca" for i,s in enumerate(['valid','test']): for j,c in enumerate([0.3,0.5,0.7,1]): l = str(c).replace('.','') os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (mach, dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j)) +#P07 for i in range(100): os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i)) + +#PNIST07 +for i in range(100): + os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/PNIST07_train%d_data.ft -p %sdata/PNIST07_train%d_params -x %sdata/PNIST07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d -t %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i,1))