ift6266: baseline_algorithms/mlp/mlp

comparison baseline_algorithms/mlp/mlp_nist.py @ 159:e81241cfc2de

merge

author	Myriam Cote <cotemyri@iro.umontreal.ca>
date	Thu, 25 Feb 2010 09:05:48 -0500
parents	8ceaaf812891
children

comparison

equal deleted inserted replaced

-:d1bb6e06497a
+:e81241cfc2de
 import pylab
 import theano
 import theano.tensor as T
 import time
 import theano.tensor.nnet
+import pylearn
 from pylearn.io import filetensor as ft
 data_path = '/data/lisa/data/nist/by_class/'
 class MLP(object):
 sigmoid function  while the top layer is a softamx layer.
 """
-def __init__(self, input, n_in, n_hidden, n_out):
+def __init__(self, input, n_in, n_hidden, n_out,learning_rate):
 """Initialize the parameters for the multilayer perceptron
 :param input: symbolic variable that describes the input of the
 architecture (one minibatch)
 dtype= theano.config.floatX))
 self.W2 = theano.shared( value = W2_values )
 self.b2 = theano.shared( value = numpy.zeros((n_out,),
 dtype= theano.config.floatX))
+#include the learning rate in the classifer so
+#we can modify it on the fly when we want
+lr_value=learning_rate
+self.lr=theano.shared(value=lr_value)
 # symbolic expression computing the values of the hidden layer
 self.hidden = T.tanh(T.dot(input, self.W1)+ self.b1)
 # symbolic expression computing the values of the top layer
 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2)
 # compute prediction as class whose probability is maximal in
 # symbolic form
 self.y_pred = T.argmax( self.p_y_given_x, axis =1)
+self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1)
 # L1 norm ; one regularization option is to enforce L1 norm to
 # be small
 self.L1     = abs(self.W1).sum() + abs(self.W2).sum()
 # represents a mistake in prediction
 return T.mean(T.neq(self.y_pred, y))
 else:
 raise NotImplementedError()
-#def jobman_mlp(state,channel):
-#    (validation_error,test_error,nb_exemples,time)=mlp_full_nist(state.learning_rate,\
-#                                                                state.n_iter,\
-#                                                                state.batch_size,\
-#                                                                state.nb_hidden_units)
-#   state.validation_error = validation_error
-#   state.test_error = test_error
-#   state.nb_exemples = nb_exemples
-#  state.time=time
-# return channel.COMPLETE
 def mlp_full_nist(      verbose = False,\
+adaptive_lr = 0,\
 train_data = 'all/all_train_data.ft',\
 train_labels = 'all/all_train_labels.ft',\
 test_data = 'all/all_test_data.ft',\
 test_labels = 'all/all_test_labels.ft',\
 learning_rate=0.01,\
 nb_max_exemples=1000000,\
 batch_size=20,\
 nb_hidden = 500,\
 nb_targets = 62):
+configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
+total_validation_error_list = []
+total_train_error_list = []
+learning_rate_list=[]
+best_training_error=float('inf');
 f = open(data_path+train_data)
 g= open(data_path+train_labels)
 h = open(data_path+test_data)
 # allocate symbolic variables for the data
 x = T.fmatrix()  # the data is presented as rasterized images
 y = T.lvector()  # the labels are presented as 1D vector of
 # [long int] labels
+if verbose==True:
+print 'finished parsing the data'
 # construct the logistic regression class
 classifier = MLP( input=x.reshape((batch_size,32*32)),\
 n_in=32*32,\
 n_hidden=nb_hidden,\
-n_out=nb_targets)
+n_out=nb_targets,
+learning_rate=learning_rate)
 # the cost we minimize during training is the negative log likelihood of
 # the model plus the regularization terms (L1 and L2); cost is expressed
 # here symbolically
 cost = classifier.negative_log_likelihood(y) \
 g_W2 = T.grad(cost, classifier.W2)
 g_b2 = T.grad(cost, classifier.b2)
 # specify how to update the parameters of the model as a dictionary
 updates = \
-{ classifier.W1: classifier.W1 - learning_rate*g_W1 \
+{ classifier.W1: classifier.W1 - classifier.lr*g_W1 \
-, classifier.b1: classifier.b1 - learning_rate*g_b1 \
+, classifier.b1: classifier.b1 - classifier.lr*g_b1 \
-, classifier.W2: classifier.W2 - learning_rate*g_W2 \
+, classifier.W2: classifier.W2 - classifier.lr*g_W2 \
-, classifier.b2: classifier.b2 - learning_rate*g_b2 }
+, classifier.b2: classifier.b2 - classifier.lr*g_b2 }
 # compiling a theano function `train_model` that returns the cost, but in
 # the same time updates the parameter of the model based on the rules
 # defined in `updates`
 train_model = theano.function([x, y], cost, updates = updates )
 n_minibatches        = len(train_batches)
 #conditions for stopping the adaptation:
 #1) we have reached  nb_max_exemples (this is rounded up to be a multiple of the train size)
-#2) validation error is going up (probable overfitting)
+#2) validation error is going up twice in a row(probable overfitting)
 # This means we no longer stop on slow convergence as low learning rates stopped
 # too fast.
+# no longer relevant
 patience              =nb_max_exemples/batch_size
 patience_increase     = 2     # wait this much longer when a new best is
 # found
 improvement_threshold = 0.995 # a relative improvement of this much is
 # considered significant
 best_validation_loss = float('inf')
 best_iter            = 0
 test_score           = 0.
 start_time = time.clock()
 n_iter = nb_max_exemples/batch_size  # nb of max times we are allowed to run through all exemples
-n_iter = n_iter/n_minibatches + 1
+n_iter = n_iter/n_minibatches + 1 #round up
 n_iter=max(1,n_iter) # run at least once on short debug call
-# have a maximum of `n_iter` iterations through the entire dataset
 if verbose == True:
 print 'looping at most %d times through the data set' %n_iter
 for iter in xrange(n_iter* n_minibatches):
 # get epoch and minibatch index
 epoch           = iter / n_minibatches
 minibatch_index =  iter % n_minibatches
 # get the minibatches corresponding to `iter` modulo
 # `len(train_batches)`
 x,y = train_batches[ minibatch_index ]
 # convert to float
 x_float = x/255.0
 cost_ij = train_model(x_float,y)
 if (iter+1) % validation_frequency == 0:
 # compute zero-one loss on validation set
 this_validation_loss = 0.
 for x,y in validation_batches:
 # sum up the errors for each minibatch
 x_float = x/255.0
 this_validation_loss += test_model(x_float,y)
 # get the average by dividing with the number of minibatches
 this_validation_loss /= len(validation_batches)
+#save the validation loss
+total_validation_error_list.append(this_validation_loss)
+#get the training error rate
+this_train_loss=0
+for x,y in train_batches:
+# sum up the errors for each minibatch
+x_float = x/255.0
+this_train_loss += test_model(x_float,y)
+# get the average by dividing with the number of minibatches
+this_train_loss /= len(train_batches)
+#save the validation loss
+total_train_error_list.append(this_train_loss)
+if(this_train_loss<best_training_error):
+best_training_error=this_train_loss
 if verbose == True:
-print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+print('epoch %i, minibatch %i/%i, validation error %f, training error %f %%' % \
 (epoch, minibatch_index+1, n_minibatches, \
-this_validation_loss*100.))
+this_validation_loss*100.,this_train_loss*100))
+#save the learning rate
+learning_rate_list.append(classifier.lr.value)
 # if we got the best validation score until now
 if this_validation_loss < best_validation_loss:
-#improve patience if loss improvement is good enough
-if this_validation_loss < best_validation_loss *  \
-improvement_threshold :
-patience = max(patience, iter * patience_increase)
-elif verbose == True:
-print 'slow convergence stop'
 # save best validation score and iteration number
 best_validation_loss = this_validation_loss
 best_iter = iter
+# reset patience if we are going down again
+# so we continue exploring
+patience=nb_max_exemples/batch_size
 # test it on the test set
 test_score = 0.
 for x,y in test_batches:
 x_float=x/255.0
 test_score += test_model(x_float,y)
 print(('     epoch %i, minibatch %i/%i, test error of best '
 'model %f %%') %
 (epoch, minibatch_index+1, n_minibatches,
 test_score*100.))
-#if the validation error is going up, we are overfitting
+# if the validation error is going up, we are overfitting (or oscillating)
-#stop converging
+# stop converging but run at least to next validation
-elif this_validation_loss > best_validation_loss:
+# to check overfitting or ocsillation
+# the saved weights of the model will be a bit off in that case
+elif this_validation_loss >= best_validation_loss:
 #calculate the test error at this point and exit
 # test it on the test set
-if verbose==True:
+# however, if adaptive_lr is true, try reducing the lr to
-print ' We are diverging'
+# get us out of an oscilliation
-best_iter = iter
+if adaptive_lr==1:
+classifier.lr.value=classifier.lr.value/2.0
 test_score = 0.
+#cap the patience so we are allowed one more validation error
+#calculation before aborting
+patience = iter+validation_frequency+1
 for x,y in test_batches:
 x_float=x/255.0
 test_score += test_model(x_float,y)
 test_score /= len(test_batches)
 if verbose == True:
-print ' validation error is going up, stopping now'
+print ' validation error is going up, possibly stopping soon'
 print(('     epoch %i, minibatch %i/%i, test error of best '
 'model %f %%') %
 (epoch, minibatch_index+1, n_minibatches,
 test_score*100.))
-break
+if iter>patience:
-if patience <= iter :
+print 'we have diverged'
 break
 end_time = time.clock()
 if verbose == True:
 print(('Optimization complete. Best validation score of %f %% '
 'obtained at iteration %i, with test performance %f %%') %
 (best_validation_loss * 100., best_iter, test_score*100.))
 print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
 print iter
-return (best_validation_loss * 100.,test_score*100.,best_iter*batch_size,(end_time-start_time)/60)
+#save the model and the weights
+numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)
+numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\
+learning_rate_list=learning_rate_list)
+return (best_training_error*100.0,best_validation_loss * 100.,test_score*100.,best_iter*batch_size,(end_time-start_time)/60)
 if __name__ == '__main__':
 mlp_full_mnist()
 def jobman_mlp_full_nist(state,channel):
-(validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\
+(train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\
 nb_max_exemples=state.nb_max_exemples,\
-nb_hidden=state.nb_hidden)
+nb_hidden=state.nb_hidden,\
+adaptive_lr=state.adaptive_lr)
+state.train_error=train_error
 state.validation_error=validation_error
 state.test_error=test_error
 state.nb_exemples=nb_exemples
 state.time=time
 return channel.COMPLETE

Mercurial > ift6266

comparison baseline_algorithms/mlp/mlp_nist.py @ 159:e81241cfc2de