ift6266: baseline/mlp/mlp_nist.py comparison

comparison baseline/mlp/mlp_nist.py @ 377:0b7e64e8e93f

branch merge

author	Arnaud Bergeron <abergeron@gmail.com>
date	Sun, 25 Apr 2010 17:12:03 -0400
parents	76b7182dd32e
children	60a4432b8071

comparison

equal deleted inserted replaced

-:01445a75c702
+:0b7e64e8e93f
 to do lr first, then add regularization)
 """
 __docformat__ = 'restructedtext en'
+import sys
 import pdb
 import numpy
 import pylab
 import theano
 import theano.tensor as T
 # represents a mistake in prediction
 return T.mean(T.neq(self.y_pred, y))
 else:
 raise NotImplementedError()
+def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz',
+data_set=0):
+# allocate symbolic variables for the data
+x = T.fmatrix()  # the data is presented as rasterized images
+y = T.lvector()  # the labels are presented as 1D vector of
+# [long int] labels
+# load the data set and create an mlp based on the dimensions of the model
+model=numpy.load(model_name)
+W1=model['W1']
+W2=model['W2']
+b1=model['b1']
+b2=model['b2']
+nb_hidden=b1.shape[0]
+input_dim=W1.shape[0]
+nb_targets=b2.shape[0]
+learning_rate=0.1
+if data_set==0:
+dataset=datasets.nist_all()
+elif data_set==1:
+dataset=datasets.nist_P07()
+classifier = MLP( input=x,\
+n_in=input_dim,\
+n_hidden=nb_hidden,\
+n_out=nb_targets,
+learning_rate=learning_rate)
+#overwrite weights with weigths from model
+classifier.W1.value=W1
+classifier.W2.value=W2
+classifier.b1.value=b1
+classifier.b2.value=b2
+cost = classifier.negative_log_likelihood(y) \
++ 0.0 * classifier.L1 \
++ 0.0 * classifier.L2_sqr
+# compiling a theano function that computes the mistakes that are made by
+# the model on a minibatch
+test_model = theano.function([x,y], classifier.errors(y))
+#get the test error
+#use a batch size of 1 so we can get the sub-class error
+#without messing with matrices (will be upgraded later)
+test_score=0
+temp=0
+for xt,yt in dataset.test(20):
+test_score += test_model(xt,yt)
+temp = temp+1
+test_score /= temp
+return test_score*100
 def mlp_full_nist(      verbose = 1,\
 adaptive_lr = 0,\
 data_set=0,\
 learning_rate=0.01,\
 L2_reg = 0.0001,\
 nb_max_exemples=1000000,\
 batch_size=20,\
 nb_hidden = 30,\
 nb_targets = 62,
-			tau=1e6,\
+tau=1e6,\
-			lr_t2_factor=0.5):
+lr_t2_factor=0.5,\
+init_model=0,\
+channel=0):
+if channel!=0:
+channel.save()
 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
 #save initial learning rate if classical adaptive lr is used
 initial_lr=learning_rate
-max_div_count=3
+max_div_count=1000
 total_validation_error_list = []
 total_train_error_list = []
 learning_rate_list=[]
 if data_set==0:
 	dataset=datasets.nist_all()
 elif data_set==1:
 dataset=datasets.nist_P07()
+elif data_set==2:
+dataset=datasets.PNIST07()
 ishape     = (32,32) # this is the size of NIST images
 n_hidden=nb_hidden,\
 n_out=nb_targets,
 learning_rate=learning_rate)
+# check if we want to initialise the weights with a previously calculated model
+# dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets)
+if init_model!=0:
+old_model=numpy.load(init_model)
+classifier.W1.value=old_model['W1']
+classifier.W2.value=old_model['W2']
+classifier.b1.value=old_model['b1']
+classifier.b2.value=old_model['b2']
 # the cost we minimize during training is the negative log likelihood of
 # the model plus the regularization terms (L1 and L2); cost is expressed
 # here symbolically
 temp=0
 divergence_flag=0
-if verbose == 1:
 print 'starting training'
+sys.stdout.flush()
 while(minibatch_index*batch_size<nb_max_exemples):
 for x, y in dataset.train(batch_size):
 #if we are using the classic learning rate deacay, adjust it before training of current mini-batch
 #train model
 cost_ij = train_model(x,y)
-if (minibatch_index+1) % validation_frequency == 0:
+if (minibatch_index) % validation_frequency == 0:
 #save the current learning rate
 learning_rate_list.append(classifier.lr.value)
 divergence_flag_list.append(divergence_flag)
 # compute the validation error
 this_validation_loss = 0.
 temp=0
 for xv,yv in dataset.valid(1):
 temp=temp+1
 # get the average by dividing with the number of minibatches
 this_validation_loss /= temp
 #save the validation loss
 total_validation_error_list.append(this_validation_loss)
-if verbose == 1:
-print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') %
+		print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') %
-(epoch, minibatch_index+1,classifier.lr.value,
+			(epoch, minibatch_index+1,classifier.lr.value,
-this_validation_loss*100.))
+			this_validation_loss*100.))
+		sys.stdout.flush()
+		#save temp results to check during training
+numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\
+learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list)
 # if we got the best validation score until now
 if this_validation_loss < best_validation_loss:
 # save best validation score and iteration number
 best_validation_loss = this_validation_loss
 temp =0
 for xt,yt in dataset.test(batch_size):
 test_score += test_model(xt,yt)
 temp = temp+1
 test_score /= temp
-if verbose == 1:
-print(('epoch %i, minibatch %i, test error of best '
+		    print(('epoch %i, minibatch %i, test error of best '
-'model %f %%') %
+			'model %f %%') %
-(epoch, minibatch_index+1,
+				(epoch, minibatch_index+1,
-test_score*100.))
+				test_score*100.))
+sys.stdout.flush()
 # if the validation error is going up, we are overfitting (or oscillating)
 # check if we are allowed to continue and if we will adjust the learning rate
 elif this_validation_loss >= best_validation_loss:
 temp=0
 for xt,yt in dataset.test(batch_size):
 test_score += test_model(xt,yt)
 temp=temp+1
 test_score /= temp
-if verbose == 1:
 print ' validation error is going up, possibly stopping soon'
 print(('     epoch %i, minibatch %i, test error of best '
 'model %f %%') %
 (epoch, minibatch_index+1,
 test_score*100.))
+sys.stdout.flush()
 # check early stop condition
 if divergence_flag==max_div_count:
 #check if we have seen enough exemples
 #force one epoch at least
 if epoch>0 and minibatch_index*batch_size>nb_max_exemples:
 break
 time_n= time_n + batch_size
 minibatch_index =  minibatch_index + 1
 # we have finished looping through the training set
 epoch = epoch+1
 end_time = time.clock()
-if verbose == 1:
 print(('Optimization complete. Best validation score of %f %% '
 'obtained at iteration %i, with test performance %f %%') %
 (best_validation_loss * 100., best_iter, test_score*100.))
 print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
 print minibatch_index
+sys.stdout.flush()
 #save the model and the weights
 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)
 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\
 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list)
 										nb_hidden=state.nb_hidden,\
 										adaptive_lr=state.adaptive_lr,\
 										tau=state.tau,\
 										verbose = state.verbose,\
 										lr_t2_factor=state.lr_t2_factor,
-data_set=state.data_set)
+data_set=state.data_set,
+channel=channel)
 state.train_error=train_error
 state.validation_error=validation_error
 state.test_error=test_error
 state.nb_exemples=nb_exemples
 state.time=time

Mercurial > ift6266

comparison baseline/mlp/mlp_nist.py @ 377:0b7e64e8e93f