comparison deep/stacked_dae/v_sylvain/sgd_optimization.py @ 305:fe5d428c2acc

Ajout de commentaires ainsi que correction de bug pour sauvegarder les parametres
author SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca>
date Wed, 31 Mar 2010 21:00:59 -0400
parents 1cc535f3e254
children a5abd5c8b4b0
comparison
equal deleted inserted replaced
304:1e4bf5a5b46d 305:fe5d428c2acc
86 86
87 def pretrain(self,dataset): 87 def pretrain(self,dataset):
88 print "STARTING PRETRAINING, time = ", datetime.datetime.now() 88 print "STARTING PRETRAINING, time = ", datetime.datetime.now()
89 sys.stdout.flush() 89 sys.stdout.flush()
90 90
91 un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 batch 91 un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 file
92 92
93 start_time = time.clock() 93 start_time = time.clock()
94 ## Pre-train layer-wise 94 ## Pre-train layer-wise
95 for i in xrange(self.classifier.n_layers): 95 for i in xrange(self.classifier.n_layers):
96 # go through pretraining epochs 96 # go through pretraining epochs
196 done_looping = False 196 done_looping = False
197 epoch = 0 197 epoch = 0
198 198
199 total_mb_index = 0 199 total_mb_index = 0
200 minibatch_index = -1 200 minibatch_index = -1
201 parameters_finetune=[]
201 202
202 while (epoch < num_finetune) and (not done_looping): 203 while (epoch < num_finetune) and (not done_looping):
203 epoch = epoch + 1 204 epoch = epoch + 1
204 205
205 for x,y in dataset.train(minibatch_size): 206 for x,y in dataset.train(minibatch_size):
212 213
213 self.series["training_error"].append((epoch, minibatch_index), cost_ij) 214 self.series["training_error"].append((epoch, minibatch_index), cost_ij)
214 215
215 if (total_mb_index+1) % validation_frequency == 0: 216 if (total_mb_index+1) % validation_frequency == 0:
216 #minibatch_index += 1 217 #minibatch_index += 1
217 #The validation set is always NIST 218 #The validation set is always NIST (we want the model to be good on NIST)
218 if ind_test == 0: 219 if ind_test == 0 | ind_test == 20:
219 iter=dataset_test.valid(minibatch_size) 220 iter=dataset_test.valid(minibatch_size)
220 else: 221 else:
221 iter = dataset.valid(minibatch_size) 222 iter = dataset.valid(minibatch_size)
222 if self.max_minibatches: 223 if self.max_minibatches:
223 iter = itermax(iter, self.max_minibatches) 224 iter = itermax(iter, self.max_minibatches)
225 this_validation_loss = numpy.mean(validation_losses) 226 this_validation_loss = numpy.mean(validation_losses)
226 227
227 self.series["validation_error"].\ 228 self.series["validation_error"].\
228 append((epoch, minibatch_index), this_validation_loss*100.) 229 append((epoch, minibatch_index), this_validation_loss*100.)
229 230
230 print('epoch %i, minibatch %i, validation error on %s : %f %%' % \ 231 print('epoch %i, minibatch %i, validation error on NIST : %f %%' % \
231 (epoch, minibatch_index+1,nom_test, \ 232 (epoch, minibatch_index+1, \
232 this_validation_loss*100.)) 233 this_validation_loss*100.))
233 234
234 235
235 # if we got the best validation score until now 236 # if we got the best validation score until now
236 if this_validation_loss < best_validation_loss: 237 if this_validation_loss < best_validation_loss:
238 #improve patience if loss improvement is good enough 239 #improve patience if loss improvement is good enough
239 if this_validation_loss < best_validation_loss * \ 240 if this_validation_loss < best_validation_loss * \
240 improvement_threshold : 241 improvement_threshold :
241 patience = max(patience, total_mb_index * patience_increase) 242 patience = max(patience, total_mb_index * patience_increase)
242 243
243 # save best validation score and iteration number 244 # save best validation score, iteration number and parameters
244 best_validation_loss = this_validation_loss 245 best_validation_loss = this_validation_loss
245 best_iter = total_mb_index 246 best_iter = total_mb_index
247 parameters_finetune=[copy(x.value) for x in self.classifier.params]
246 248
247 # test it on the test set 249 # test it on the test set
248 iter = dataset.test(minibatch_size) 250 iter = dataset.test(minibatch_size)
249 if self.max_minibatches: 251 if self.max_minibatches:
250 iter = itermax(iter, self.max_minibatches) 252 iter = itermax(iter, self.max_minibatches)
271 (epoch, minibatch_index+1,nom_test, 273 (epoch, minibatch_index+1,nom_test,
272 test_score2*100.)) 274 test_score2*100.))
273 275
274 if patience <= total_mb_index: 276 if patience <= total_mb_index:
275 done_looping = True 277 done_looping = True
276 break 278 break #to exit the FOR loop
277 279
278 sys.stdout.flush() 280 sys.stdout.flush()
279 281
280 # useful when doing tests 282 # useful when doing tests
281 if self.max_minibatches and minibatch_index >= self.max_minibatches: 283 if self.max_minibatches and minibatch_index >= self.max_minibatches:
282 break 284 break
283 285
284 self.series['params'].append((epoch,), self.classifier.all_params) 286 self.series['params'].append((epoch,), self.classifier.all_params)
285 287
286 if done_looping == True: #To exit completly the fine-tuning 288 if done_looping == True: #To exit completly the fine-tuning
287 break 289 break #to exit the WHILE loop
288 290
289 end_time = time.clock() 291 end_time = time.clock()
290 self.hp.update({'finetuning_time':end_time-start_time,\ 292 self.hp.update({'finetuning_time':end_time-start_time,\
291 'best_validation_error':best_validation_loss,\ 293 'best_validation_error':best_validation_loss,\
292 'test_score':test_score, 294 'test_score':test_score,
297 (best_validation_loss * 100., test_score*100.,nom_train)) 299 (best_validation_loss * 100., test_score*100.,nom_train))
298 print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.)) 300 print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.))
299 301
300 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) 302 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
301 303
304 sys.stdout.flush()
305
302 #Save a copy of the parameters in a file to be able to get them in the future 306 #Save a copy of the parameters in a file to be able to get them in the future
303 307
304 if special == 1: #To keep a track of the value of the parameters 308 if special == 1: #To keep a track of the value of the parameters
305 parameters_finetune=[copy(x.value) for x in self.classifier.params]
306 f = open('params_finetune_stanford.txt', 'w') 309 f = open('params_finetune_stanford.txt', 'w')
307 pickle.dump(parameters_finetune,f) 310 pickle.dump(parameters_finetune,f)
308 f.close() 311 f.close()
309 312
310 elif ind_test== 0: #To keep a track of the value of the parameters 313 elif ind_test== 0: #To keep a track of the value of the parameters
311 parameters_finetune=[copy(x.value) for x in self.classifier.params]
312 f = open('params_finetune_P07.txt', 'w') 314 f = open('params_finetune_P07.txt', 'w')
313 pickle.dump(parameters_finetune,f) 315 pickle.dump(parameters_finetune,f)
314 f.close() 316 f.close()
315 317
316 elif ind_test== 1: #For the run with 2 finetunes. It will be faster. 318 elif ind_test== 1: #For the run with 2 finetunes. It will be faster.
317 parameters_finetune=[copy(x.value) for x in self.classifier.params]
318 f = open('params_finetune_NIST.txt', 'w') 319 f = open('params_finetune_NIST.txt', 'w')
319 pickle.dump(parameters_finetune,f) 320 pickle.dump(parameters_finetune,f)
320 f.close() 321 f.close()
321 322
322 elif ind_test== 20: #To keep a track of the value of the parameters 323 elif ind_test== 20: #To keep a track of the value of the parameters
323 parameters_finetune=[copy(x.value) for x in self.classifier.params]
324 f = open('params_finetune_NIST_then_P07.txt', 'w') 324 f = open('params_finetune_NIST_then_P07.txt', 'w')
325 pickle.dump(parameters_finetune,f) 325 pickle.dump(parameters_finetune,f)
326 f.close() 326 f.close()
327 327
328 328
329 #Set parameters like they where right after pre-train 329 #Set parameters like they where right after pre-train or finetune
330 def reload_parameters(self,which): 330 def reload_parameters(self,which):
331 331
332 #self.parameters_pre=pickle.load('params_pretrain.txt') 332 #self.parameters_pre=pickle.load('params_pretrain.txt')
333 f = open(which) 333 f = open(which)
334 self.parameters_pre=pickle.load(f) 334 self.parameters_pre=pickle.load(f)