Mercurial > ift6266
diff baseline/mlp/mlp_nist.py @ 377:0b7e64e8e93f
branch merge
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Sun, 25 Apr 2010 17:12:03 -0400 |
parents | 76b7182dd32e |
children | 60a4432b8071 |
line wrap: on
line diff
--- a/baseline/mlp/mlp_nist.py Sun Apr 25 17:10:09 2010 -0400 +++ b/baseline/mlp/mlp_nist.py Sun Apr 25 17:12:03 2010 -0400 @@ -23,6 +23,7 @@ """ __docformat__ = 'restructedtext en' +import sys import pdb import numpy import pylab @@ -163,6 +164,75 @@ else: raise NotImplementedError() +def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz', + data_set=0): + + + + # allocate symbolic variables for the data + x = T.fmatrix() # the data is presented as rasterized images + y = T.lvector() # the labels are presented as 1D vector of + # [long int] labels + + # load the data set and create an mlp based on the dimensions of the model + model=numpy.load(model_name) + W1=model['W1'] + W2=model['W2'] + b1=model['b1'] + b2=model['b2'] + nb_hidden=b1.shape[0] + input_dim=W1.shape[0] + nb_targets=b2.shape[0] + learning_rate=0.1 + + + if data_set==0: + dataset=datasets.nist_all() + elif data_set==1: + dataset=datasets.nist_P07() + + + classifier = MLP( input=x,\ + n_in=input_dim,\ + n_hidden=nb_hidden,\ + n_out=nb_targets, + learning_rate=learning_rate) + + + #overwrite weights with weigths from model + classifier.W1.value=W1 + classifier.W2.value=W2 + classifier.b1.value=b1 + classifier.b2.value=b2 + + + cost = classifier.negative_log_likelihood(y) \ + + 0.0 * classifier.L1 \ + + 0.0 * classifier.L2_sqr + + # compiling a theano function that computes the mistakes that are made by + # the model on a minibatch + test_model = theano.function([x,y], classifier.errors(y)) + + + + #get the test error + #use a batch size of 1 so we can get the sub-class error + #without messing with matrices (will be upgraded later) + test_score=0 + temp=0 + for xt,yt in dataset.test(20): + test_score += test_model(xt,yt) + temp = temp+1 + test_score /= temp + + + return test_score*100 + + + + + def mlp_full_nist( verbose = 1,\ adaptive_lr = 0,\ @@ -174,15 +244,19 @@ batch_size=20,\ nb_hidden = 30,\ nb_targets = 62, - tau=1e6,\ - lr_t2_factor=0.5): + tau=1e6,\ + lr_t2_factor=0.5,\ + init_model=0,\ + channel=0): + if channel!=0: + channel.save() configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] #save initial learning rate if classical adaptive lr is used initial_lr=learning_rate - max_div_count=3 + max_div_count=1000 total_validation_error_list = [] @@ -195,6 +269,8 @@ dataset=datasets.nist_all() elif data_set==1: dataset=datasets.nist_P07() + elif data_set==2: + dataset=datasets.PNIST07() @@ -215,6 +291,14 @@ learning_rate=learning_rate) + # check if we want to initialise the weights with a previously calculated model + # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets) + if init_model!=0: + old_model=numpy.load(init_model) + classifier.W1.value=old_model['W1'] + classifier.W2.value=old_model['W2'] + classifier.b1.value=old_model['b1'] + classifier.b2.value=old_model['b2'] # the cost we minimize during training is the negative log likelihood of @@ -289,8 +373,9 @@ - if verbose == 1: - print 'starting training' + + print 'starting training' + sys.stdout.flush() while(minibatch_index*batch_size<nb_max_exemples): for x, y in dataset.train(batch_size): @@ -303,10 +388,12 @@ #train model cost_ij = train_model(x,y) - if (minibatch_index+1) % validation_frequency == 0: + if (minibatch_index) % validation_frequency == 0: #save the current learning rate learning_rate_list.append(classifier.lr.value) divergence_flag_list.append(divergence_flag) + + # compute the validation error this_validation_loss = 0. @@ -319,10 +406,15 @@ this_validation_loss /= temp #save the validation loss total_validation_error_list.append(this_validation_loss) - if verbose == 1: - print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % - (epoch, minibatch_index+1,classifier.lr.value, - this_validation_loss*100.)) + + print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % + (epoch, minibatch_index+1,classifier.lr.value, + this_validation_loss*100.)) + sys.stdout.flush() + + #save temp results to check during training + numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\ + learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) # if we got the best validation score until now if this_validation_loss < best_validation_loss: @@ -344,11 +436,12 @@ test_score += test_model(xt,yt) temp = temp+1 test_score /= temp - if verbose == 1: - print(('epoch %i, minibatch %i, test error of best ' - 'model %f %%') % - (epoch, minibatch_index+1, - test_score*100.)) + + print(('epoch %i, minibatch %i, test error of best ' + 'model %f %%') % + (epoch, minibatch_index+1, + test_score*100.)) + sys.stdout.flush() # if the validation error is going up, we are overfitting (or oscillating) # check if we are allowed to continue and if we will adjust the learning rate @@ -374,12 +467,13 @@ test_score += test_model(xt,yt) temp=temp+1 test_score /= temp - if verbose == 1: - print ' validation error is going up, possibly stopping soon' - print((' epoch %i, minibatch %i, test error of best ' - 'model %f %%') % - (epoch, minibatch_index+1, - test_score*100.)) + + print ' validation error is going up, possibly stopping soon' + print((' epoch %i, minibatch %i, test error of best ' + 'model %f %%') % + (epoch, minibatch_index+1, + test_score*100.)) + sys.stdout.flush() @@ -393,6 +487,9 @@ #force one epoch at least if epoch>0 and minibatch_index*batch_size>nb_max_exemples: break + + + time_n= time_n + batch_size @@ -401,12 +498,13 @@ # we have finished looping through the training set epoch = epoch+1 end_time = time.clock() - if verbose == 1: - print(('Optimization complete. Best validation score of %f %% ' - 'obtained at iteration %i, with test performance %f %%') % - (best_validation_loss * 100., best_iter, test_score*100.)) - print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) - print minibatch_index + + print(('Optimization complete. Best validation score of %f %% ' + 'obtained at iteration %i, with test performance %f %%') % + (best_validation_loss * 100., best_iter, test_score*100.)) + print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) + print minibatch_index + sys.stdout.flush() #save the model and the weights numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) @@ -427,7 +525,8 @@ tau=state.tau,\ verbose = state.verbose,\ lr_t2_factor=state.lr_t2_factor, - data_set=state.data_set) + data_set=state.data_set, + channel=channel) state.train_error=train_error state.validation_error=validation_error state.test_error=test_error