# HG changeset patch # User xaviermuller # Date 1270584052 14400 # Node ID 7439073664766a050410677633b73a019b49c046 # Parent 403b9e6ecfaa10cdf53cadd73fe525ee03773f4c code clean up in progress diff -r 403b9e6ecfaa -r 743907366476 baseline/mlp/mlp_nist.py --- a/baseline/mlp/mlp_nist.py Fri Apr 02 14:54:05 2010 -0400 +++ b/baseline/mlp/mlp_nist.py Tue Apr 06 16:00:52 2010 -0400 @@ -33,6 +33,7 @@ import pylearn import theano,pylearn.version,ift6266 from pylearn.io import filetensor as ft +from ift6266 import datasets data_path = '/data/lisa/data/nist/by_class/' @@ -165,16 +166,13 @@ def mlp_full_nist( verbose = 1,\ adaptive_lr = 0,\ - train_data = 'all/all_train_data.ft',\ - train_labels = 'all/all_train_labels.ft',\ - test_data = 'all/all_test_data.ft',\ - test_labels = 'all/all_test_labels.ft',\ + data_set=0,\ learning_rate=0.01,\ L1_reg = 0.00,\ L2_reg = 0.0001,\ nb_max_exemples=1000000,\ batch_size=20,\ - nb_hidden = 500,\ + nb_hidden = 30,\ nb_targets = 62, tau=1e6,\ lr_t2_factor=0.5): @@ -190,57 +188,11 @@ learning_rate_list=[] best_training_error=float('inf'); + if data_set==0: + dataset=datasets.nist_all() - - f = open(data_path+train_data) - g= open(data_path+train_labels) - h = open(data_path+test_data) - i= open(data_path+test_labels) - raw_train_data = ft.read(f) - raw_train_labels = ft.read(g) - raw_test_data = ft.read(h) - raw_test_labels = ft.read(i) - - f.close() - g.close() - i.close() - h.close() - #create a validation set the same size as the test size - #use the end of the training array for this purpose - #discard the last remaining so we get a %batch_size number - test_size=len(raw_test_labels) - test_size = int(test_size/batch_size) - test_size*=batch_size - train_size = len(raw_train_data) - train_size = int(train_size/batch_size) - train_size*=batch_size - validation_size =test_size - offset = train_size-test_size - if verbose == 1: - print 'train size = %d' %train_size - print 'test size = %d' %test_size - print 'valid size = %d' %validation_size - print 'offset = %d' %offset - - - train_set = (raw_train_data,raw_train_labels) - train_batches = [] - for i in xrange(0, train_size-test_size, batch_size): - train_batches = train_batches + \ - [(raw_train_data[i:i+batch_size], raw_train_labels[i:i+batch_size])] - - test_batches = [] - for i in xrange(0, test_size, batch_size): - test_batches = test_batches + \ - [(raw_test_data[i:i+batch_size], raw_test_labels[i:i+batch_size])] - - validation_batches = [] - for i in xrange(0, test_size, batch_size): - validation_batches = validation_batches + \ - [(raw_train_data[offset+i:offset+i+batch_size], raw_train_labels[offset+i:offset+i+batch_size])] - ishape = (32,32) # this is the size of NIST images @@ -249,10 +201,9 @@ y = T.lvector() # the labels are presented as 1D vector of # [long int] labels - if verbose==1: - print 'finished parsing the data' + # construct the logistic regression class - classifier = MLP( input=x.reshape((batch_size,32*32)),\ + classifier = MLP( input=x,\ n_in=32*32,\ n_hidden=nb_hidden,\ n_out=nb_targets, @@ -289,7 +240,9 @@ # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function([x, y], cost, updates = updates ) - n_minibatches = len(train_batches) + + + @@ -303,8 +256,13 @@ # This means we no longer stop on slow convergence as low learning rates stopped # too fast. - # no longer relevant - patience =nb_max_exemples/batch_size + #approximate number of samples in the training set + #this is just to have a validation frequency + #roughly proportionnal to the training set + n_minibatches = 650000/batch_size + + + patience =nb_max_exemples/batch_size #in units of minibatch patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is @@ -314,139 +272,121 @@ - best_params = None + best_validation_loss = float('inf') best_iter = 0 test_score = 0. start_time = time.clock() - n_iter = nb_max_exemples/batch_size # nb of max times we are allowed to run through all exemples - n_iter = n_iter/n_minibatches + 1 #round up - n_iter=max(1,n_iter) # run at least once on short debug call time_n=0 #in unit of exemples + minibatch_index=0 + epoch=0 + temp=0 + - if verbose == 1: - print 'looping at most %d times through the data set' %n_iter - for iter in xrange(n_iter* n_minibatches): - - # get epoch and minibatch index - epoch = iter / n_minibatches - minibatch_index = iter % n_minibatches - - - if adaptive_lr==2: - classifier.lr.value = tau*initial_lr/(tau+time_n) - + print 'looking at most at %i exemples' %nb_max_exemples + while(minibatch_index*batch_size= best_validation_loss: - #calculate the test error at this point and exit - # test it on the test set - # however, if adaptive_lr is true, try reducing the lr to - # get us out of an oscilliation - if adaptive_lr==1: - classifier.lr.value=classifier.lr.value*lr_t2_factor - - test_score = 0. - #cap the patience so we are allowed one more validation error - #calculation before aborting - patience = iter+validation_frequency+1 - for x,y in test_batches: - x_float=x/255.0 - test_score += test_model(x_float,y) - test_score /= len(test_batches) - if verbose == 1: - print ' validation error is going up, possibly stopping soon' - print((' epoch %i, minibatch %i/%i, test error of best ' - 'model %f %%') % - (epoch, minibatch_index+1, n_minibatches, - test_score*100.)) - - - - - if iter>patience: - print 'we have diverged' - break - - - time_n= time_n + batch_size + print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % + (epoch, minibatch_index+1,classifier.lr.value, + this_validation_loss*100.)) + + # if we got the best validation score until now + if this_validation_loss < best_validation_loss: + # save best validation score and iteration number + best_validation_loss = this_validation_loss + best_iter = minibatch_index + # reset patience if we are going down again + # so we continue exploring + patience=nb_max_exemples/batch_size + # test it on the test set + test_score = 0. + temp =0 + for xt,yt in dataset.test(batch_size): + test_score += test_model(xt,yt) + temp = temp+1 + test_score /= temp + if verbose == 1: + print(('epoch %i, minibatch %i, test error of best ' + 'model %f %%') % + (epoch, minibatch_index+1, + test_score*100.)) + + # if the validation error is going up, we are overfitting (or oscillating) + # stop converging but run at least to next validation + # to check overfitting or ocsillation + # the saved weights of the model will be a bit off in that case + elif this_validation_loss >= best_validation_loss: + #calculate the test error at this point and exit + # test it on the test set + # however, if adaptive_lr is true, try reducing the lr to + # get us out of an oscilliation + if adaptive_lr==1: + classifier.lr.value=classifier.lr.value*lr_t2_factor + + test_score = 0. + #cap the patience so we are allowed one more validation error + #calculation before aborting + patience = minibatch_index+validation_frequency+1 + temp=0 + for xt,yt in dataset.test(batch_size): + test_score += test_model(xt,yt) + temp=temp+1 + test_score /= temp + if verbose == 1: + print ' validation error is going up, possibly stopping soon' + print((' epoch %i, minibatch %i, test error of best ' + 'model %f %%') % + (epoch, minibatch_index+1, + test_score*100.)) + + + + + if minibatch_index>patience: + print 'we have diverged' + break + + + time_n= time_n + batch_size + epoch = epoch+1 end_time = time.clock() if verbose == 1: print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter, test_score*100.)) print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) - print iter + print minibatch_index #save the model and the weights numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)