# HG changeset patch # User fsavard # Date 1269458064 14400 # Node ID 694e75413413d1ea9711f7dd39e65cf29db63804 # Parent 8a3af19ae2723dfb1a3e4fcf81fcd8d0ee2cdfa6# Parent 28b628f331b2ff6a99e76089dc4117460e61b616 Merge diff -r 8a3af19ae272 -r 694e75413413 deep/stacked_dae/v_sylvain/nist_sda.py --- a/deep/stacked_dae/v_sylvain/nist_sda.py Wed Mar 24 15:13:48 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/nist_sda.py Wed Mar 24 15:14:24 2010 -0400 @@ -68,7 +68,21 @@ max_minibatches=rtt) parameters=[] - optimizer.pretrain(datasets.nist_P07()) + #Number of files of P07 used for pretraining + nb_file=0 + if state['pretrain_choice'] == 0: + print('\n\tpretraining with NIST\n') + optimizer.pretrain(datasets.nist_all()) + elif state['pretrain_choice'] == 1: + #To know how many file will be used during pretraining + nb_file = state['pretraining_epochs_per_layer'] + state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset + if nb_file >=100: + sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+ + "You have to correct the code (and be patient, P07 is huge !!)\n"+ + "or reduce the number of pretraining epoch to run the code (better idea).\n") + print('\n\tpretraining with P07') + optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) channel.save() #Set some of the parameters used for the finetuning @@ -89,34 +103,47 @@ #Decide how the finetune is done - if finetune_choice==0: - print('\n\n\tfinetune avec nist\n\n') - optimizer.reload_parameters() - optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1) - if finetune_choice==1: - print('\n\n\tfinetune avec P07\n\n') - optimizer.reload_parameters() - optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0) - if finetune_choice==2: - print('\n\n\tfinetune avec nist suivi de P07\n\n') - optimizer.reload_parameters() - optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1) - optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0) - + if finetune_choice == 0: + print('\n\n\tfinetune with NIST\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1) + channel.save() + if finetune_choice == 1: + print('\n\n\tfinetune with P07\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0) + channel.save() + if finetune_choice == 2: + print('\n\n\tfinetune with NIST followed by P07\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=21) + optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20) + channel.save() + if finetune_choice == 3: + print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\ + All hidden units output are input of the logistic regression\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1,special=1) + + if finetune_choice==-1: - print('\nSerie de 3 essais de fine-tuning') - print('\n\n\tfinetune avec nist\n\n') - optimizer.reload_parameters() - optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1) + print('\nSERIE OF 3 DIFFERENT FINETUNINGS') + print('\n\n\tfinetune with NIST\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1) + channel.save() + print('\n\n\tfinetune with P07\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0) channel.save() - print('\n\n\tfinetune avec P07\n\n') - optimizer.reload_parameters() - optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0) + print('\n\n\tfinetune with NIST (done earlier) followed by P07 (written here)\n\n') + optimizer.reload_parameters('params_finetune_NIST.txt') + optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20) channel.save() - print('\n\n\tfinetune avec nist suivi de P07\n\n') - optimizer.reload_parameters() - optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1) - optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0) + print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\ + All hidden units output are input of the logistic regression\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1,special=1) channel.save() channel.save() diff -r 8a3af19ae272 -r 694e75413413 deep/stacked_dae/v_sylvain/sgd_optimization.py --- a/deep/stacked_dae/v_sylvain/sgd_optimization.py Wed Mar 24 15:13:48 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py Wed Mar 24 15:14:24 2010 -0400 @@ -95,8 +95,11 @@ for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set batch_index=0 + count=0 + num_files=0 for x,y in dataset.train(self.hp.minibatch_size): c = self.classifier.pretrain_functions[i](x) + count +=1 self.series["reconstruction_error"].append((epoch, batch_index), c) batch_index+=1 @@ -107,11 +110,21 @@ # useful when doing tests if self.max_minibatches and batch_index >= self.max_minibatches: break - - print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c - sys.stdout.flush() + + #When we pass through the data only once (the case with P07) + #There is approximately 800*1024=819200 examples per file (1k per example and files are 800M) + if self.hp.pretraining_epochs_per_layer == 1 and count%819200 == 0: + print 'Pre-training layer %i, epoch %d, cost '%(i,num_files),c + num_files+=1 + sys.stdout.flush() + self.series['params'].append((num_files,), self.classifier.all_params) + + #When NIST is used + if self.hp.pretraining_epochs_per_layer > 1: + print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c + sys.stdout.flush() - self.series['params'].append((epoch,), self.classifier.all_params) + self.series['params'].append((epoch,), self.classifier.all_params) end_time = time.clock() @@ -127,14 +140,19 @@ f.close() - def finetune(self,dataset,dataset_test,num_finetune,ind_test): + def finetune(self,dataset,dataset_test,num_finetune,ind_test,special=0): + + if special != 0 and special != 1: + sys.exit('Bad value for variable special. Must be in {0,1}') print "STARTING FINETUNING, time = ", datetime.datetime.now() minibatch_size = self.hp.minibatch_size - if ind_test == 0: + if ind_test == 0 or ind_test == 20: nom_test = "NIST" + nom_train="P07" else: nom_test = "P07" + nom_train = "NIST" # create a function to compute the mistakes that are made by the model @@ -177,20 +195,28 @@ epoch = 0 total_mb_index = 0 + minibatch_index = -1 while (epoch < num_finetune) and (not done_looping): epoch = epoch + 1 - minibatch_index = -1 + for x,y in dataset.train(minibatch_size): minibatch_index += 1 - cost_ij = self.classifier.finetune(x,y) + if special == 0: + cost_ij = self.classifier.finetune(x,y) + elif special == 1: + cost_ij = self.classifier.finetune2(x,y) total_mb_index += 1 self.series["training_error"].append((epoch, minibatch_index), cost_ij) if (total_mb_index+1) % validation_frequency == 0: - - iter = dataset.valid(minibatch_size) + #minibatch_index += 1 + #The validation set is always NIST + if ind_test == 0: + iter=dataset_test.valid(minibatch_size) + else: + iter = dataset.valid(minibatch_size) if self.max_minibatches: iter = itermax(iter, self.max_minibatches) validation_losses = [validate_model(x,y) for x,y in iter] @@ -199,8 +225,8 @@ self.series["validation_error"].\ append((epoch, minibatch_index), this_validation_loss*100.) - print('epoch %i, minibatch %i, validation error %f %%' % \ - (epoch, minibatch_index+1, \ + print('epoch %i, minibatch %i, validation error on %s : %f %%' % \ + (epoch, minibatch_index+1,nom_test, \ this_validation_loss*100.)) @@ -233,16 +259,20 @@ self.series["test_error"].\ append((epoch, minibatch_index), test_score*100.) - print((' epoch %i, minibatch %i, test error of best ' + print((' epoch %i, minibatch %i, test error on dataset %s (train data) of best ' 'model %f %%') % - (epoch, minibatch_index+1, + (epoch, minibatch_index+1,nom_train, test_score*100.)) print((' epoch %i, minibatch %i, test error on dataset %s of best ' 'model %f %%') % (epoch, minibatch_index+1,nom_test, test_score2*100.)) - + + if patience <= total_mb_index: + done_looping = True + break + sys.stdout.flush() # useful when doing tests @@ -251,8 +281,7 @@ self.series['params'].append((epoch,), self.classifier.all_params) - if patience <= total_mb_index: - done_looping = True + if done_looping == True: #To exit completly the fine-tuning break end_time = time.clock() @@ -261,19 +290,45 @@ 'test_score':test_score, 'num_finetuning_epochs':epoch}) - print(('Optimization complete with best validation score of %f %%,' - 'with test performance %f %%') % - (best_validation_loss * 100., test_score*100.)) + print(('\nOptimization complete with best validation score of %f %%,' + 'with test performance %f %% on dataset %s ') % + (best_validation_loss * 100., test_score*100.,nom_train)) print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.)) print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) + #Save a copy of the parameters in a file to be able to get them in the future + + if special == 1: #To keep a track of the value of the parameters + parameters_finetune=[copy(x.value) for x in self.classifier.params] + f = open('params_finetune_stanford.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + elif ind_test== 0: #To keep a track of the value of the parameters + parameters_finetune=[copy(x.value) for x in self.classifier.params] + f = open('params_finetune_P07.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + elif ind_test== 1: #For the run with 2 finetunes. It will be faster. + parameters_finetune=[copy(x.value) for x in self.classifier.params] + f = open('params_finetune_NIST.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + elif ind_test== 20: #To keep a track of the value of the parameters + parameters_finetune=[copy(x.value) for x in self.classifier.params] + f = open('params_finetune_NIST_then_P07.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + #Set parameters like they where right after pre-train - def reload_parameters(self): + def reload_parameters(self,which): #self.parameters_pre=pickle.load('params_pretrain.txt') - f = open('params_pretrain.txt') + f = open(which) self.parameters_pre=pickle.load(f) f.close() for idx,x in enumerate(self.parameters_pre): diff -r 8a3af19ae272 -r 694e75413413 deep/stacked_dae/v_sylvain/stacked_dae.py --- a/deep/stacked_dae/v_sylvain/stacked_dae.py Wed Mar 24 15:13:48 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/stacked_dae.py Wed Mar 24 15:14:24 2010 -0400 @@ -36,6 +36,7 @@ # list of parameters for this layer self.params = [self.W, self.b] + def negative_log_likelihood(self, y): return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) @@ -181,6 +182,7 @@ # (not used for finetuning... still using ".params") self.all_params = [] self.n_layers = len(hidden_layers_sizes) + self.logistic_params = [] print "Creating SdA with params:" print "batch_size", batch_size @@ -257,7 +259,7 @@ self.pretrain_functions += [update_fn] - # We now need to add a logistic layer on top of the MLP + # We now need to add a logistic layer on top of the SDA self.logLayer = LogisticRegression(\ input = self.layers[-1].output,\ n_in = hidden_layers_sizes[-1], n_out = n_outs) @@ -277,15 +279,48 @@ self.finetune = theano.function([self.x,self.y], cost, updates = updates)#, - # givens = { - # self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, - # self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) + + #STRUCTURE FOR THE FINETUNING OF THE LOGISTIC REGRESSION ON THE TOP WITH + #ALL HIDDEN LAYERS AS INPUT + + all_h=[] + for i in xrange(self.n_layers): + all_h.append(self.layers[i].output) + self.all_hidden=T.concatenate(all_h,axis=1) + + + self.logLayer2 = LogisticRegression(\ + input = self.all_hidden,\ + n_in = sum(hidden_layers_sizes), n_out = n_outs) + #n_in=hidden_layers_sizes[0],n_out=n_outs) + + #self.logistic_params+= self.logLayer2.params + # construct a function that implements one step of finetunining + + # compute the cost, defined as the negative log likelihood + cost2 = self.logLayer2.negative_log_likelihood(self.y) + # compute the gradients with respect to the model parameters + gparams2 = T.grad(cost2, self.logLayer2.params) + + # compute list of updates + updates2 = {} + for param,gparam in zip(self.logLayer2.params, gparams2): + updates2[param] = param - gparam*finetune_lr + + self.finetune2 = theano.function([self.x,self.y], cost2, + updates = updates2) + + # symbolic variable that points to the number of errors made on the + # minibatch given by self.x and self.y + + self.errors2 = self.logLayer2.errors(self.y) + if __name__ == '__main__': import sys