Mercurial > ift6266
diff baseline_algorithms/mlp/mlp_nist.py @ 159:e81241cfc2de
merge
author | Myriam Cote <cotemyri@iro.umontreal.ca> |
---|---|
date | Thu, 25 Feb 2010 09:05:48 -0500 |
parents | 8ceaaf812891 |
children |
line wrap: on
line diff
--- a/baseline_algorithms/mlp/mlp_nist.py Thu Feb 25 09:04:40 2010 -0500 +++ b/baseline_algorithms/mlp/mlp_nist.py Thu Feb 25 09:05:48 2010 -0500 @@ -30,6 +30,7 @@ import theano.tensor as T import time import theano.tensor.nnet +import pylearn from pylearn.io import filetensor as ft data_path = '/data/lisa/data/nist/by_class/' @@ -45,7 +46,7 @@ - def __init__(self, input, n_in, n_hidden, n_out): + def __init__(self, input, n_in, n_hidden, n_out,learning_rate): """Initialize the parameters for the multilayer perceptron :param input: symbolic variable that describes the input of the @@ -94,8 +95,14 @@ self.b2 = theano.shared( value = numpy.zeros((n_out,), dtype= theano.config.floatX)) + #include the learning rate in the classifer so + #we can modify it on the fly when we want + lr_value=learning_rate + self.lr=theano.shared(value=lr_value) # symbolic expression computing the values of the hidden layer self.hidden = T.tanh(T.dot(input, self.W1)+ self.b1) + + # symbolic expression computing the values of the top layer self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2) @@ -103,6 +110,10 @@ # compute prediction as class whose probability is maximal in # symbolic form self.y_pred = T.argmax( self.p_y_given_x, axis =1) + self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1) + + + # L1 norm ; one regularization option is to enforce L1 norm to # be small @@ -150,21 +161,9 @@ else: raise NotImplementedError() -#def jobman_mlp(state,channel): -# (validation_error,test_error,nb_exemples,time)=mlp_full_nist(state.learning_rate,\ - # state.n_iter,\ - # state.batch_size,\ - # state.nb_hidden_units) - # state.validation_error = validation_error - # state.test_error = test_error - # state.nb_exemples = nb_exemples - # state.time=time - # return channel.COMPLETE - - - def mlp_full_nist( verbose = False,\ + adaptive_lr = 0,\ train_data = 'all/all_train_data.ft',\ train_labels = 'all/all_train_labels.ft',\ test_data = 'all/all_test_data.ft',\ @@ -178,6 +177,14 @@ nb_targets = 62): + configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] + + total_validation_error_list = [] + total_train_error_list = [] + learning_rate_list=[] + best_training_error=float('inf'); + + f = open(data_path+train_data) g= open(data_path+train_labels) @@ -235,11 +242,17 @@ y = T.lvector() # the labels are presented as 1D vector of # [long int] labels + if verbose==True: + print 'finished parsing the data' # construct the logistic regression class classifier = MLP( input=x.reshape((batch_size,32*32)),\ n_in=32*32,\ n_hidden=nb_hidden,\ - n_out=nb_targets) + n_out=nb_targets, + learning_rate=learning_rate) + + + # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed @@ -260,10 +273,10 @@ # specify how to update the parameters of the model as a dictionary updates = \ - { classifier.W1: classifier.W1 - learning_rate*g_W1 \ - , classifier.b1: classifier.b1 - learning_rate*g_b1 \ - , classifier.W2: classifier.W2 - learning_rate*g_W2 \ - , classifier.b2: classifier.b2 - learning_rate*g_b2 } + { classifier.W1: classifier.W1 - classifier.lr*g_W1 \ + , classifier.b1: classifier.b1 - classifier.lr*g_b1 \ + , classifier.W2: classifier.W2 - classifier.lr*g_W2 \ + , classifier.b2: classifier.b2 - classifier.lr*g_b2 } # compiling a theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules @@ -273,13 +286,17 @@ + + #conditions for stopping the adaptation: #1) we have reached nb_max_exemples (this is rounded up to be a multiple of the train size) - #2) validation error is going up (probable overfitting) + #2) validation error is going up twice in a row(probable overfitting) # This means we no longer stop on slow convergence as low learning rates stopped # too fast. + + # no longer relevant patience =nb_max_exemples/batch_size patience_increase = 2 # wait this much longer when a new best is # found @@ -296,9 +313,9 @@ test_score = 0. start_time = time.clock() n_iter = nb_max_exemples/batch_size # nb of max times we are allowed to run through all exemples - n_iter = n_iter/n_minibatches + 1 + n_iter = n_iter/n_minibatches + 1 #round up n_iter=max(1,n_iter) # run at least once on short debug call - # have a maximum of `n_iter` iterations through the entire dataset + if verbose == True: print 'looping at most %d times through the data set' %n_iter @@ -307,7 +324,9 @@ # get epoch and minibatch index epoch = iter / n_minibatches minibatch_index = iter % n_minibatches - + + + # get the minibatches corresponding to `iter` modulo # `len(train_batches)` x,y = train_batches[ minibatch_index ] @@ -317,7 +336,7 @@ if (iter+1) % validation_frequency == 0: # compute zero-one loss on validation set - + this_validation_loss = 0. for x,y in validation_batches: # sum up the errors for each minibatch @@ -325,26 +344,40 @@ this_validation_loss += test_model(x_float,y) # get the average by dividing with the number of minibatches this_validation_loss /= len(validation_batches) + #save the validation loss + total_validation_error_list.append(this_validation_loss) + + #get the training error rate + this_train_loss=0 + for x,y in train_batches: + # sum up the errors for each minibatch + x_float = x/255.0 + this_train_loss += test_model(x_float,y) + # get the average by dividing with the number of minibatches + this_train_loss /= len(train_batches) + #save the validation loss + total_train_error_list.append(this_train_loss) + if(this_train_loss<best_training_error): + best_training_error=this_train_loss + if verbose == True: - print('epoch %i, minibatch %i/%i, validation error %f %%' % \ + print('epoch %i, minibatch %i/%i, validation error %f, training error %f %%' % \ (epoch, minibatch_index+1, n_minibatches, \ - this_validation_loss*100.)) + this_validation_loss*100.,this_train_loss*100)) + + + #save the learning rate + learning_rate_list.append(classifier.lr.value) # if we got the best validation score until now if this_validation_loss < best_validation_loss: - - #improve patience if loss improvement is good enough - if this_validation_loss < best_validation_loss * \ - improvement_threshold : - patience = max(patience, iter * patience_increase) - elif verbose == True: - print 'slow convergence stop' - # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter - + # reset patience if we are going down again + # so we continue exploring + patience=nb_max_exemples/batch_size # test it on the test set test_score = 0. for x,y in test_batches: @@ -357,33 +390,40 @@ (epoch, minibatch_index+1, n_minibatches, test_score*100.)) - #if the validation error is going up, we are overfitting - #stop converging - elif this_validation_loss > best_validation_loss: + # if the validation error is going up, we are overfitting (or oscillating) + # stop converging but run at least to next validation + # to check overfitting or ocsillation + # the saved weights of the model will be a bit off in that case + elif this_validation_loss >= best_validation_loss: #calculate the test error at this point and exit # test it on the test set - if verbose==True: - print ' We are diverging' - best_iter = iter + # however, if adaptive_lr is true, try reducing the lr to + # get us out of an oscilliation + if adaptive_lr==1: + classifier.lr.value=classifier.lr.value/2.0 + test_score = 0. + #cap the patience so we are allowed one more validation error + #calculation before aborting + patience = iter+validation_frequency+1 for x,y in test_batches: x_float=x/255.0 test_score += test_model(x_float,y) test_score /= len(test_batches) if verbose == True: - print ' validation error is going up, stopping now' + print ' validation error is going up, possibly stopping soon' print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index+1, n_minibatches, test_score*100.)) - break + - - if patience <= iter : - break - + if iter>patience: + print 'we have diverged' + break + end_time = time.clock() if verbose == True: @@ -391,17 +431,25 @@ 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter, test_score*100.)) print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) - print iter - return (best_validation_loss * 100.,test_score*100.,best_iter*batch_size,(end_time-start_time)/60) + print iter + + #save the model and the weights + numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) + numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\ + learning_rate_list=learning_rate_list) + + return (best_training_error*100.0,best_validation_loss * 100.,test_score*100.,best_iter*batch_size,(end_time-start_time)/60) if __name__ == '__main__': mlp_full_mnist() def jobman_mlp_full_nist(state,channel): - (validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\ + (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\ nb_max_exemples=state.nb_max_exemples,\ - nb_hidden=state.nb_hidden) + nb_hidden=state.nb_hidden,\ + adaptive_lr=state.adaptive_lr) + state.train_error=train_error state.validation_error=validation_error state.test_error=test_error state.nb_exemples=nb_exemples