Mercurial > ift6266
comparison baseline/mlp/mlp_nist.py @ 338:fca22114bb23
added async save, restart from old model and independant error calculation based on Arnaud's iterator
author | xaviermuller |
---|---|
date | Sat, 17 Apr 2010 12:42:48 -0400 |
parents | 1763c64030d1 |
children | 22efb4968054 |
comparison
equal
deleted
inserted
replaced
337:8d116d4a7593 | 338:fca22114bb23 |
---|---|
161 # represents a mistake in prediction | 161 # represents a mistake in prediction |
162 return T.mean(T.neq(self.y_pred, y)) | 162 return T.mean(T.neq(self.y_pred, y)) |
163 else: | 163 else: |
164 raise NotImplementedError() | 164 raise NotImplementedError() |
165 | 165 |
166 def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz', | |
167 data_set=0): | |
168 | |
169 | |
170 | |
171 # allocate symbolic variables for the data | |
172 x = T.fmatrix() # the data is presented as rasterized images | |
173 y = T.lvector() # the labels are presented as 1D vector of | |
174 # [long int] labels | |
175 | |
176 # load the data set and create an mlp based on the dimensions of the model | |
177 model=numpy.load(model_name) | |
178 W1=model['W1'] | |
179 W2=model['W2'] | |
180 b1=model['b1'] | |
181 b2=model['b2'] | |
182 nb_hidden=b1.shape[0] | |
183 input_dim=W1.shape[0] | |
184 nb_targets=b2.shape[0] | |
185 learning_rate=0.1 | |
186 | |
187 | |
188 if data_set==0: | |
189 dataset=datasets.nist_all() | |
190 elif data_set==1: | |
191 dataset=datasets.nist_P07() | |
192 | |
193 | |
194 classifier = MLP( input=x,\ | |
195 n_in=input_dim,\ | |
196 n_hidden=nb_hidden,\ | |
197 n_out=nb_targets, | |
198 learning_rate=learning_rate) | |
199 | |
200 | |
201 #overwrite weights with weigths from model | |
202 classifier.W1.value=W1 | |
203 classifier.W2.value=W2 | |
204 classifier.b1.value=b1 | |
205 classifier.b2.value=b2 | |
206 | |
207 | |
208 cost = classifier.negative_log_likelihood(y) \ | |
209 + 0.0 * classifier.L1 \ | |
210 + 0.0 * classifier.L2_sqr | |
211 | |
212 # compiling a theano function that computes the mistakes that are made by | |
213 # the model on a minibatch | |
214 test_model = theano.function([x,y], classifier.errors(y)) | |
215 | |
216 | |
217 | |
218 #get the test error | |
219 #use a batch size of 1 so we can get the sub-class error | |
220 #without messing with matrices (will be upgraded later) | |
221 test_score=0 | |
222 temp=0 | |
223 for xt,yt in dataset.test(20): | |
224 test_score += test_model(xt,yt) | |
225 temp = temp+1 | |
226 test_score /= temp | |
227 | |
228 | |
229 return test_score*100 | |
230 | |
231 | |
232 | |
233 | |
234 | |
166 | 235 |
167 def mlp_full_nist( verbose = 1,\ | 236 def mlp_full_nist( verbose = 1,\ |
168 adaptive_lr = 0,\ | 237 adaptive_lr = 0,\ |
169 data_set=0,\ | 238 data_set=0,\ |
170 learning_rate=0.01,\ | 239 learning_rate=0.01,\ |
172 L2_reg = 0.0001,\ | 241 L2_reg = 0.0001,\ |
173 nb_max_exemples=1000000,\ | 242 nb_max_exemples=1000000,\ |
174 batch_size=20,\ | 243 batch_size=20,\ |
175 nb_hidden = 30,\ | 244 nb_hidden = 30,\ |
176 nb_targets = 62, | 245 nb_targets = 62, |
177 tau=1e6,\ | 246 tau=1e6,\ |
178 lr_t2_factor=0.5): | 247 lr_t2_factor=0.5,\ |
179 | 248 init_model=0,\ |
180 | 249 channel=0): |
250 | |
251 | |
252 if channel!=0: | |
253 channel.save() | |
181 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] | 254 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] |
182 | 255 |
183 #save initial learning rate if classical adaptive lr is used | 256 #save initial learning rate if classical adaptive lr is used |
184 initial_lr=learning_rate | 257 initial_lr=learning_rate |
185 max_div_count=3 | 258 max_div_count=1000 |
186 | 259 |
187 | 260 |
188 total_validation_error_list = [] | 261 total_validation_error_list = [] |
189 total_train_error_list = [] | 262 total_train_error_list = [] |
190 learning_rate_list=[] | 263 learning_rate_list=[] |
213 n_hidden=nb_hidden,\ | 286 n_hidden=nb_hidden,\ |
214 n_out=nb_targets, | 287 n_out=nb_targets, |
215 learning_rate=learning_rate) | 288 learning_rate=learning_rate) |
216 | 289 |
217 | 290 |
291 # check if we want to initialise the weights with a previously calculated model | |
292 # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets) | |
293 if init_model!=0: | |
294 old_model=numpy.load(init_model) | |
295 classifier.W1.value=old_model['W1'] | |
296 classifier.W2.value=old_model['W2'] | |
297 classifier.b1.value=old_model['b1'] | |
298 classifier.b2.value=old_model['b2'] | |
218 | 299 |
219 | 300 |
220 # the cost we minimize during training is the negative log likelihood of | 301 # the cost we minimize during training is the negative log likelihood of |
221 # the model plus the regularization terms (L1 and L2); cost is expressed | 302 # the model plus the regularization terms (L1 and L2); cost is expressed |
222 # here symbolically | 303 # here symbolically |
301 | 382 |
302 | 383 |
303 #train model | 384 #train model |
304 cost_ij = train_model(x,y) | 385 cost_ij = train_model(x,y) |
305 | 386 |
306 if (minibatch_index+1) % validation_frequency == 0: | 387 if (minibatch_index) % validation_frequency == 0: |
307 #save the current learning rate | 388 #save the current learning rate |
308 learning_rate_list.append(classifier.lr.value) | 389 learning_rate_list.append(classifier.lr.value) |
309 divergence_flag_list.append(divergence_flag) | 390 divergence_flag_list.append(divergence_flag) |
391 | |
392 #save temp results to check during training | |
393 numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\ | |
394 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) | |
310 | 395 |
311 # compute the validation error | 396 # compute the validation error |
312 this_validation_loss = 0. | 397 this_validation_loss = 0. |
313 temp=0 | 398 temp=0 |
314 for xv,yv in dataset.valid(1): | 399 for xv,yv in dataset.valid(1): |
391 | 476 |
392 #check if we have seen enough exemples | 477 #check if we have seen enough exemples |
393 #force one epoch at least | 478 #force one epoch at least |
394 if epoch>0 and minibatch_index*batch_size>nb_max_exemples: | 479 if epoch>0 and minibatch_index*batch_size>nb_max_exemples: |
395 break | 480 break |
481 | |
482 | |
483 | |
396 | 484 |
397 | 485 |
398 time_n= time_n + batch_size | 486 time_n= time_n + batch_size |
399 minibatch_index = minibatch_index + 1 | 487 minibatch_index = minibatch_index + 1 |
400 | 488 |
425 nb_hidden=state.nb_hidden,\ | 513 nb_hidden=state.nb_hidden,\ |
426 adaptive_lr=state.adaptive_lr,\ | 514 adaptive_lr=state.adaptive_lr,\ |
427 tau=state.tau,\ | 515 tau=state.tau,\ |
428 verbose = state.verbose,\ | 516 verbose = state.verbose,\ |
429 lr_t2_factor=state.lr_t2_factor, | 517 lr_t2_factor=state.lr_t2_factor, |
430 data_set=state.data_set) | 518 data_set=state.data_set, |
519 channel=channel) | |
431 state.train_error=train_error | 520 state.train_error=train_error |
432 state.validation_error=validation_error | 521 state.validation_error=validation_error |
433 state.test_error=test_error | 522 state.test_error=test_error |
434 state.nb_exemples=nb_exemples | 523 state.nb_exemples=nb_exemples |
435 state.time=time | 524 state.time=time |