# HG changeset patch # User SylvainPL # Date 1268673740 14400 # Node ID 9fc641d7adda89273fb5272805e1c6279266ea3e # Parent 9b6e0af062af921fe9b3c2bd07476ed599bc41e1 Possibilite de restreindre la taille des ensemble d'entrainement, valid et test afin de pouvoir tester le code rapidement diff -r 9b6e0af062af -r 9fc641d7adda deep/stacked_dae/v_sylvain/nist_sda.py --- a/deep/stacked_dae/v_sylvain/nist_sda.py Mon Mar 15 10:09:50 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/nist_sda.py Mon Mar 15 13:22:20 2010 -0400 @@ -49,6 +49,7 @@ REDUCE_TRAIN_TO = 1000 MAX_FINETUNING_EPOCHS = 2 REDUCE_EVERY = 10 + MINIBATCH_SIZE=20 # Possible values the hyperparameters can take. These are then # combined with produit_cartesien_jobs so we get a list of all @@ -71,7 +72,7 @@ 'hidden_layers_sizes':500, 'corruption_levels':0.2, 'minibatch_size':20, - #'reduce_train_to':10000, + 'reduce_train_to':10000, 'num_hidden_layers':1}) ''' @@ -94,16 +95,18 @@ ## ## print "NIST loaded" ## -## # For test runs, we don't want to use the whole dataset so -## # reduce it to fewer elements if asked to. -## rtt = None -## if state.has_key('reduce_train_to'): -## rtt = state['reduce_train_to'] -## elif REDUCE_TRAIN_TO: -## rtt = REDUCE_TRAIN_TO -## -## if rtt: -## print "Reducing training set to "+str(rtt)+ " examples" + # For test runs, we don't want to use the whole dataset so + # reduce it to fewer elements if asked to. + rtt = None + if state.has_key('reduce_train_to'): + rtt = int(state['reduce_train_to']/state['minibatch_size']) + elif REDUCE_TRAIN_TO: + rtt = int(REDUCE_TRAIN_TO/MINIBATCH_SIZE) + + if rtt: + print "Reducing training set to "+str(rtt*state['minibatch_size'])+ " examples" + else: + rtt=float('inf') #No reduction ## nist.reduce_train_set(rtt) ## ## train,valid,test = nist.get_tvt() @@ -111,7 +114,7 @@ n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) - + series = create_series(state.num_hidden_layers) print "Creating optimizer with state, ", state @@ -120,10 +123,10 @@ n_ins=n_ins, n_outs=n_outs,\ series=series) - optimizer.pretrain(datasets.nist_all) + optimizer.pretrain(datasets.nist_all,rtt) channel.save() - optimizer.finetune(datasets.nist_all) + optimizer.finetune(datasets.nist_all,rtt) channel.save() return channel.COMPLETE diff -r 9b6e0af062af -r 9fc641d7adda deep/stacked_dae/v_sylvain/sgd_optimization.py --- a/deep/stacked_dae/v_sylvain/sgd_optimization.py Mon Mar 15 10:09:50 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py Mon Mar 15 13:22:20 2010 -0400 @@ -108,7 +108,7 @@ self.pretrain(self.dataset) self.finetune(self.dataset) - def pretrain(self,dataset): + def pretrain(self,dataset,reduce): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() @@ -120,8 +120,11 @@ # go through the training set batch_index=int(0) for x,y in dataset.train(self.hp.minibatch_size): + batch_index+=1 + if batch_index > reduce: #If maximum number of mini-batch is used + break c = self.classifier.pretrain_functions[i](x) - batch_index+=1 + self.series["reconstruction_error"].append((epoch, batch_index), c) @@ -137,7 +140,7 @@ sys.stdout.flush() - def finetune(self,dataset): + def finetune(self,dataset,reduce): print "STARTING FINETUNING, time = ", datetime.datetime.now() #index = T.lscalar() # index to a [mini]batch @@ -185,10 +188,13 @@ while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): epoch = epoch + 1 - minibatch_index=int(-1) + minibatch_index=int(0) for x,y in dataset.train(minibatch_size): + minibatch_index +=1 - minibatch_index+=1 + if minibatch_index > reduce: #If maximum number of mini-batchs is used + break + cost_ij = self.classifier.finetune(x,y) iter = epoch * self.n_train_batches + minibatch_index @@ -196,14 +202,21 @@ if (iter+1) % validation_frequency == 0: - validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] + #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] + test_index=int(0) + validation_losses=[] + for x,y in dataset.valid(minibatch_size): + test_index+=1 + if test_index > reduce: + break + validation_losses.append(validate_model(x,y)) this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].\ append((epoch, minibatch_index), this_validation_loss*100.) - print('epoch %i, minibatch %i/%i, validation error %f %%' % \ - (epoch, minibatch_index+1, self.n_train_batches, \ + print('epoch %i, minibatch %i, validation error %f %%' % \ + (epoch, minibatch_index, \ this_validation_loss*100.)) @@ -220,15 +233,22 @@ best_iter = iter # test it on the test set - test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] + #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] + test_losses=[] + i=0 + for x,y in dataset.test(minibatch_size): + i+=1 + if i > reduce: + break + test_losses.append(test_model(x,y)) test_score = numpy.mean(test_losses) self.series["test_error"].\ append((epoch, minibatch_index), test_score*100.) - print((' epoch %i, minibatch %i/%i, test error of best ' + print((' epoch %i, minibatch %i, test error of best ' 'model %f %%') % - (epoch, minibatch_index+1, self.n_train_batches, + (epoch, minibatch_index, test_score*100.)) sys.stdout.flush()