# HG changeset patch # User xaviermuller # Date 1271522568 14400 # Node ID fca22114bb2379647eab3259fbe20f885b64dd29 # Parent 8d116d4a75936aff9140160ac45d736d7abf55f2 added async save, restart from old model and independant error calculation based on Arnaud's iterator diff -r 8d116d4a7593 -r fca22114bb23 baseline/mlp/mlp_nist.py --- a/baseline/mlp/mlp_nist.py Fri Apr 16 16:05:55 2010 -0400 +++ b/baseline/mlp/mlp_nist.py Sat Apr 17 12:42:48 2010 -0400 @@ -163,6 +163,75 @@ else: raise NotImplementedError() +def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz', + data_set=0): + + + + # allocate symbolic variables for the data + x = T.fmatrix() # the data is presented as rasterized images + y = T.lvector() # the labels are presented as 1D vector of + # [long int] labels + + # load the data set and create an mlp based on the dimensions of the model + model=numpy.load(model_name) + W1=model['W1'] + W2=model['W2'] + b1=model['b1'] + b2=model['b2'] + nb_hidden=b1.shape[0] + input_dim=W1.shape[0] + nb_targets=b2.shape[0] + learning_rate=0.1 + + + if data_set==0: + dataset=datasets.nist_all() + elif data_set==1: + dataset=datasets.nist_P07() + + + classifier = MLP( input=x,\ + n_in=input_dim,\ + n_hidden=nb_hidden,\ + n_out=nb_targets, + learning_rate=learning_rate) + + + #overwrite weights with weigths from model + classifier.W1.value=W1 + classifier.W2.value=W2 + classifier.b1.value=b1 + classifier.b2.value=b2 + + + cost = classifier.negative_log_likelihood(y) \ + + 0.0 * classifier.L1 \ + + 0.0 * classifier.L2_sqr + + # compiling a theano function that computes the mistakes that are made by + # the model on a minibatch + test_model = theano.function([x,y], classifier.errors(y)) + + + + #get the test error + #use a batch size of 1 so we can get the sub-class error + #without messing with matrices (will be upgraded later) + test_score=0 + temp=0 + for xt,yt in dataset.test(20): + test_score += test_model(xt,yt) + temp = temp+1 + test_score /= temp + + + return test_score*100 + + + + + def mlp_full_nist( verbose = 1,\ adaptive_lr = 0,\ @@ -174,15 +243,19 @@ batch_size=20,\ nb_hidden = 30,\ nb_targets = 62, - tau=1e6,\ - lr_t2_factor=0.5): + tau=1e6,\ + lr_t2_factor=0.5,\ + init_model=0,\ + channel=0): + if channel!=0: + channel.save() configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] #save initial learning rate if classical adaptive lr is used initial_lr=learning_rate - max_div_count=3 + max_div_count=1000 total_validation_error_list = [] @@ -215,6 +288,14 @@ learning_rate=learning_rate) + # check if we want to initialise the weights with a previously calculated model + # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets) + if init_model!=0: + old_model=numpy.load(init_model) + classifier.W1.value=old_model['W1'] + classifier.W2.value=old_model['W2'] + classifier.b1.value=old_model['b1'] + classifier.b2.value=old_model['b2'] # the cost we minimize during training is the negative log likelihood of @@ -303,10 +384,14 @@ #train model cost_ij = train_model(x,y) - if (minibatch_index+1) % validation_frequency == 0: + if (minibatch_index) % validation_frequency == 0: #save the current learning rate learning_rate_list.append(classifier.lr.value) divergence_flag_list.append(divergence_flag) + + #save temp results to check during training + numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\ + learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) # compute the validation error this_validation_loss = 0. @@ -393,6 +478,9 @@ #force one epoch at least if epoch>0 and minibatch_index*batch_size>nb_max_exemples: break + + + time_n= time_n + batch_size @@ -427,7 +515,8 @@ tau=state.tau,\ verbose = state.verbose,\ lr_t2_factor=state.lr_t2_factor, - data_set=state.data_set) + data_set=state.data_set, + channel=channel) state.train_error=train_error state.validation_error=validation_error state.test_error=test_error