comparison baseline/mlp/mlp_nist.py @ 338:fca22114bb23

added async save, restart from old model and independant error calculation based on Arnaud's iterator
author xaviermuller
date Sat, 17 Apr 2010 12:42:48 -0400
parents 1763c64030d1
children 22efb4968054
comparison
equal deleted inserted replaced
337:8d116d4a7593 338:fca22114bb23
161 # represents a mistake in prediction 161 # represents a mistake in prediction
162 return T.mean(T.neq(self.y_pred, y)) 162 return T.mean(T.neq(self.y_pred, y))
163 else: 163 else:
164 raise NotImplementedError() 164 raise NotImplementedError()
165 165
166 def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz',
167 data_set=0):
168
169
170
171 # allocate symbolic variables for the data
172 x = T.fmatrix() # the data is presented as rasterized images
173 y = T.lvector() # the labels are presented as 1D vector of
174 # [long int] labels
175
176 # load the data set and create an mlp based on the dimensions of the model
177 model=numpy.load(model_name)
178 W1=model['W1']
179 W2=model['W2']
180 b1=model['b1']
181 b2=model['b2']
182 nb_hidden=b1.shape[0]
183 input_dim=W1.shape[0]
184 nb_targets=b2.shape[0]
185 learning_rate=0.1
186
187
188 if data_set==0:
189 dataset=datasets.nist_all()
190 elif data_set==1:
191 dataset=datasets.nist_P07()
192
193
194 classifier = MLP( input=x,\
195 n_in=input_dim,\
196 n_hidden=nb_hidden,\
197 n_out=nb_targets,
198 learning_rate=learning_rate)
199
200
201 #overwrite weights with weigths from model
202 classifier.W1.value=W1
203 classifier.W2.value=W2
204 classifier.b1.value=b1
205 classifier.b2.value=b2
206
207
208 cost = classifier.negative_log_likelihood(y) \
209 + 0.0 * classifier.L1 \
210 + 0.0 * classifier.L2_sqr
211
212 # compiling a theano function that computes the mistakes that are made by
213 # the model on a minibatch
214 test_model = theano.function([x,y], classifier.errors(y))
215
216
217
218 #get the test error
219 #use a batch size of 1 so we can get the sub-class error
220 #without messing with matrices (will be upgraded later)
221 test_score=0
222 temp=0
223 for xt,yt in dataset.test(20):
224 test_score += test_model(xt,yt)
225 temp = temp+1
226 test_score /= temp
227
228
229 return test_score*100
230
231
232
233
234
166 235
167 def mlp_full_nist( verbose = 1,\ 236 def mlp_full_nist( verbose = 1,\
168 adaptive_lr = 0,\ 237 adaptive_lr = 0,\
169 data_set=0,\ 238 data_set=0,\
170 learning_rate=0.01,\ 239 learning_rate=0.01,\
172 L2_reg = 0.0001,\ 241 L2_reg = 0.0001,\
173 nb_max_exemples=1000000,\ 242 nb_max_exemples=1000000,\
174 batch_size=20,\ 243 batch_size=20,\
175 nb_hidden = 30,\ 244 nb_hidden = 30,\
176 nb_targets = 62, 245 nb_targets = 62,
177 tau=1e6,\ 246 tau=1e6,\
178 lr_t2_factor=0.5): 247 lr_t2_factor=0.5,\
179 248 init_model=0,\
180 249 channel=0):
250
251
252 if channel!=0:
253 channel.save()
181 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] 254 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
182 255
183 #save initial learning rate if classical adaptive lr is used 256 #save initial learning rate if classical adaptive lr is used
184 initial_lr=learning_rate 257 initial_lr=learning_rate
185 max_div_count=3 258 max_div_count=1000
186 259
187 260
188 total_validation_error_list = [] 261 total_validation_error_list = []
189 total_train_error_list = [] 262 total_train_error_list = []
190 learning_rate_list=[] 263 learning_rate_list=[]
213 n_hidden=nb_hidden,\ 286 n_hidden=nb_hidden,\
214 n_out=nb_targets, 287 n_out=nb_targets,
215 learning_rate=learning_rate) 288 learning_rate=learning_rate)
216 289
217 290
291 # check if we want to initialise the weights with a previously calculated model
292 # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets)
293 if init_model!=0:
294 old_model=numpy.load(init_model)
295 classifier.W1.value=old_model['W1']
296 classifier.W2.value=old_model['W2']
297 classifier.b1.value=old_model['b1']
298 classifier.b2.value=old_model['b2']
218 299
219 300
220 # the cost we minimize during training is the negative log likelihood of 301 # the cost we minimize during training is the negative log likelihood of
221 # the model plus the regularization terms (L1 and L2); cost is expressed 302 # the model plus the regularization terms (L1 and L2); cost is expressed
222 # here symbolically 303 # here symbolically
301 382
302 383
303 #train model 384 #train model
304 cost_ij = train_model(x,y) 385 cost_ij = train_model(x,y)
305 386
306 if (minibatch_index+1) % validation_frequency == 0: 387 if (minibatch_index) % validation_frequency == 0:
307 #save the current learning rate 388 #save the current learning rate
308 learning_rate_list.append(classifier.lr.value) 389 learning_rate_list.append(classifier.lr.value)
309 divergence_flag_list.append(divergence_flag) 390 divergence_flag_list.append(divergence_flag)
391
392 #save temp results to check during training
393 numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\
394 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list)
310 395
311 # compute the validation error 396 # compute the validation error
312 this_validation_loss = 0. 397 this_validation_loss = 0.
313 temp=0 398 temp=0
314 for xv,yv in dataset.valid(1): 399 for xv,yv in dataset.valid(1):
391 476
392 #check if we have seen enough exemples 477 #check if we have seen enough exemples
393 #force one epoch at least 478 #force one epoch at least
394 if epoch>0 and minibatch_index*batch_size>nb_max_exemples: 479 if epoch>0 and minibatch_index*batch_size>nb_max_exemples:
395 break 480 break
481
482
483
396 484
397 485
398 time_n= time_n + batch_size 486 time_n= time_n + batch_size
399 minibatch_index = minibatch_index + 1 487 minibatch_index = minibatch_index + 1
400 488
425 nb_hidden=state.nb_hidden,\ 513 nb_hidden=state.nb_hidden,\
426 adaptive_lr=state.adaptive_lr,\ 514 adaptive_lr=state.adaptive_lr,\
427 tau=state.tau,\ 515 tau=state.tau,\
428 verbose = state.verbose,\ 516 verbose = state.verbose,\
429 lr_t2_factor=state.lr_t2_factor, 517 lr_t2_factor=state.lr_t2_factor,
430 data_set=state.data_set) 518 data_set=state.data_set,
519 channel=channel)
431 state.train_error=train_error 520 state.train_error=train_error
432 state.validation_error=validation_error 521 state.validation_error=validation_error
433 state.test_error=test_error 522 state.test_error=test_error
434 state.nb_exemples=nb_exemples 523 state.nb_exemples=nb_exemples
435 state.time=time 524 state.time=time