Mercurial > ift6266
comparison baseline/mlp/mlp_nist.py @ 445:868f82777839
added jobman all test + val error and sigmoid output
author | xaviermuller |
---|---|
date | Tue, 04 May 2010 11:17:27 -0400 |
parents | 3dba84c0fbc1 |
children |
comparison
equal
deleted
inserted
replaced
444:18841eeb433f | 445:868f82777839 |
---|---|
47 sigmoid function while the top layer is a softamx layer. | 47 sigmoid function while the top layer is a softamx layer. |
48 """ | 48 """ |
49 | 49 |
50 | 50 |
51 | 51 |
52 def __init__(self, input, n_in, n_hidden, n_out,learning_rate): | 52 def __init__(self, input, n_in, n_hidden, n_out,learning_rate,detection_mode): |
53 """Initialize the parameters for the multilayer perceptron | 53 """Initialize the parameters for the multilayer perceptron |
54 | 54 |
55 :param input: symbolic variable that describes the input of the | 55 :param input: symbolic variable that describes the input of the |
56 architecture (one minibatch) | 56 architecture (one minibatch) |
57 | 57 |
106 self.hidden = T.tanh(T.dot(input, self.W1)+ self.b1) | 106 self.hidden = T.tanh(T.dot(input, self.W1)+ self.b1) |
107 | 107 |
108 | 108 |
109 | 109 |
110 # symbolic expression computing the values of the top layer | 110 # symbolic expression computing the values of the top layer |
111 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2) | 111 if(detection_mode==0): |
112 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2) | |
113 else: | |
114 self.p_y_given_x= T.nnet.sigmoid(T.dot(self.hidden, self.W2)+self.b2) | |
115 | |
116 | |
117 | |
118 # self.y_out_sig= T.sigmoid(T.dot(self.hidden, self.W2)+self.b2) | |
112 | 119 |
113 # compute prediction as class whose probability is maximal in | 120 # compute prediction as class whose probability is maximal in |
114 # symbolic form | 121 # symbolic form |
115 self.y_pred = T.argmax( self.p_y_given_x, axis =1) | 122 self.y_pred = T.argmax( self.p_y_given_x, axis =1) |
116 self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1) | 123 |
124 # self.y_pred_sig = T.argmax( self.y_out_sig, axis =1) | |
125 | |
117 | 126 |
118 | 127 |
119 | 128 |
120 | 129 |
121 # L1 norm ; one regularization option is to enforce L1 norm to | 130 # L1 norm ; one regularization option is to enforce L1 norm to |
141 | 150 |
142 :param y: corresponds to a vector that gives for each example the | 151 :param y: corresponds to a vector that gives for each example the |
143 :correct label | 152 :correct label |
144 """ | 153 """ |
145 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) | 154 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) |
155 | |
156 | |
157 def cross_entropy(self, y): | |
158 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]+T.sum(T.log(1-self.p_y_given_x), axis=1)-T.log(1-self.p_y_given_x)[T.arange(y.shape[0]),y]) | |
159 | |
160 | |
146 | 161 |
147 | 162 |
148 | 163 |
149 | 164 |
150 def errors(self, y): | 165 def errors(self, y): |
208 vmin_exemple_count=0.0 | 223 vmin_exemple_count=0.0 |
209 | 224 |
210 vmaj_error_count=0.0 | 225 vmaj_error_count=0.0 |
211 vmaj_exemple_count=0.0 | 226 vmaj_exemple_count=0.0 |
212 | 227 |
228 nbc_error_count=0.0 | |
229 vnbc_error_count=0.0 | |
230 | |
213 | 231 |
214 | 232 |
215 if data_set==0: | 233 if data_set==0: |
216 dataset=datasets.nist_all() | 234 print 'using nist' |
235 dataset=datasets.nist_all() | |
217 elif data_set==1: | 236 elif data_set==1: |
237 print 'using p07' | |
218 dataset=datasets.nist_P07() | 238 dataset=datasets.nist_P07() |
239 elif data_set==2: | |
240 print 'using pnist' | |
241 dataset=datasets.PNIST07() | |
242 | |
243 | |
219 | 244 |
220 | 245 |
221 | 246 |
222 #get the test error | 247 #get the test error |
223 #use a batch size of 1 so we can get the sub-class error | 248 #use a batch size of 1 so we can get the sub-class error |
241 | 266 |
242 predicted_class=numpy.argmax(a1_out) | 267 predicted_class=numpy.argmax(a1_out) |
243 wanted_class=yt[0] | 268 wanted_class=yt[0] |
244 if(predicted_class!=wanted_class): | 269 if(predicted_class!=wanted_class): |
245 total_error_count = total_error_count +1 | 270 total_error_count = total_error_count +1 |
246 | 271 |
272 | |
273 if(not(predicted_class==wanted_class or ( (((predicted_class+26)==wanted_class) or ((predicted_class-26)==wanted_class)) and wanted_class>9) )): | |
274 nbc_error_count = nbc_error_count +1 | |
275 | |
276 | |
247 #treat digit error | 277 #treat digit error |
248 if(wanted_class<10): | 278 if(wanted_class<10): |
249 nb_exemple_count=nb_exemple_count + 1 | 279 nb_exemple_count=nb_exemple_count + 1 |
250 predicted_class=numpy.argmax(a1_out[0:10]) | 280 predicted_class=numpy.argmax(a1_out[0:10]) |
251 if(predicted_class!=wanted_class): | 281 if(predicted_class!=wanted_class): |
292 predicted_class=numpy.argmax(a1_out) | 322 predicted_class=numpy.argmax(a1_out) |
293 wanted_class=yt[0] | 323 wanted_class=yt[0] |
294 if(predicted_class!=wanted_class): | 324 if(predicted_class!=wanted_class): |
295 vtotal_error_count = vtotal_error_count +1 | 325 vtotal_error_count = vtotal_error_count +1 |
296 | 326 |
327 if(not(predicted_class==wanted_class or ( (((predicted_class+26)==wanted_class) or ((predicted_class-26)==wanted_class)) and wanted_class>9) )): | |
328 vnbc_error_count = nbc_error_count +1 | |
329 | |
297 #treat digit error | 330 #treat digit error |
298 if(wanted_class<10): | 331 if(wanted_class<10): |
299 vnb_exemple_count=vnb_exemple_count + 1 | 332 vnb_exemple_count=vnb_exemple_count + 1 |
300 predicted_class=numpy.argmax(a1_out[0:10]) | 333 predicted_class=numpy.argmax(a1_out[0:10]) |
301 if(predicted_class!=wanted_class): | 334 if(predicted_class!=wanted_class): |
324 print (('total error = %f') % ((total_error_count/total_exemple_count)*100.0)) | 357 print (('total error = %f') % ((total_error_count/total_exemple_count)*100.0)) |
325 print (('number error = %f') % ((nb_error_count/nb_exemple_count)*100.0)) | 358 print (('number error = %f') % ((nb_error_count/nb_exemple_count)*100.0)) |
326 print (('char error = %f') % ((char_error_count/char_exemple_count)*100.0)) | 359 print (('char error = %f') % ((char_error_count/char_exemple_count)*100.0)) |
327 print (('min error = %f') % ((min_error_count/min_exemple_count)*100.0)) | 360 print (('min error = %f') % ((min_error_count/min_exemple_count)*100.0)) |
328 print (('maj error = %f') % ((maj_error_count/maj_exemple_count)*100.0)) | 361 print (('maj error = %f') % ((maj_error_count/maj_exemple_count)*100.0)) |
362 print (('36 error = %f') % ((nbc_error_count/total_exemple_count)*100.0)) | |
329 | 363 |
330 print (('valid total error = %f') % ((vtotal_error_count/vtotal_exemple_count)*100.0)) | 364 print (('valid total error = %f') % ((vtotal_error_count/vtotal_exemple_count)*100.0)) |
331 print (('valid number error = %f') % ((vnb_error_count/vnb_exemple_count)*100.0)) | 365 print (('valid number error = %f') % ((vnb_error_count/vnb_exemple_count)*100.0)) |
332 print (('valid char error = %f') % ((vchar_error_count/vchar_exemple_count)*100.0)) | 366 print (('valid char error = %f') % ((vchar_error_count/vchar_exemple_count)*100.0)) |
333 print (('valid min error = %f') % ((vmin_error_count/vmin_exemple_count)*100.0)) | 367 print (('valid min error = %f') % ((vmin_error_count/vmin_exemple_count)*100.0)) |
334 print (('valid maj error = %f') % ((vmaj_error_count/vmaj_exemple_count)*100.0)) | 368 print (('valid maj error = %f') % ((vmaj_error_count/vmaj_exemple_count)*100.0)) |
335 | 369 print (('valid 36 error = %f') % ((vnbc_error_count/vtotal_exemple_count)*100.0)) |
336 print ((' num total = %d,%d') % (total_exemple_count,total_error_count)) | 370 |
337 print ((' num nb = %d,%d') % (nb_exemple_count,nb_error_count)) | 371 print (('num total = %d,%d') % (total_exemple_count,total_error_count)) |
338 print ((' num min = %d,%d') % (min_exemple_count,min_error_count)) | 372 print (('num nb = %d,%d') % (nb_exemple_count,nb_error_count)) |
339 print ((' num maj = %d,%d') % (maj_exemple_count,maj_error_count)) | 373 print (('num min = %d,%d') % (min_exemple_count,min_error_count)) |
340 print ((' num char = %d,%d') % (char_exemple_count,char_error_count)) | 374 print (('num maj = %d,%d') % (maj_exemple_count,maj_error_count)) |
341 return (total_error_count/total_exemple_count)*100.0 | 375 print (('num char = %d,%d') % (char_exemple_count,char_error_count)) |
342 | 376 |
377 | |
378 | |
379 total_error_count/=total_exemple_count | |
380 nb_error_count/=nb_exemple_count | |
381 char_error_count/=char_exemple_count | |
382 min_error_count/=min_exemple_count | |
383 maj_error_count/=maj_exemple_count | |
384 nbc_error_count/=total_exemple_count | |
385 | |
386 vtotal_error_count/=vtotal_exemple_count | |
387 vnb_error_count/=vnb_exemple_count | |
388 vchar_error_count/=vchar_exemple_count | |
389 vmin_error_count/=vmin_exemple_count | |
390 vmaj_error_count/=vmaj_exemple_count | |
391 vnbc_error_count/=vtotal_exemple_count | |
392 | |
393 | |
394 | |
395 return (total_error_count,nb_error_count,char_error_count,min_error_count,maj_error_count,nbc_error_count,\ | |
396 vtotal_error_count,vnb_error_count,vchar_error_count,vmin_error_count,vmaj_error_count,vnbc_error_count) | |
397 | |
398 def jobman_get_error(state,channel): | |
399 (all_t_error,nb_t_error,char_t_error,min_t_error,maj_t_error,nbc_t_error, | |
400 all_v_error,nb_v_error,char_v_error,min_v_error,maj_v_error,nbc_v_error)=mlp_get_nist_error(data_set=state.data_set,\ | |
401 model_name=state.model_name) | |
402 | |
403 state.all_t_error=all_t_error*100.0 | |
404 state.nb_t_error=nb_t_error*100.0 | |
405 state.char_t_error=char_t_error*100.0 | |
406 state.min_t_error=min_t_error*100.0 | |
407 state.maj_t_error=maj_t_error*100.0 | |
408 state.nbc_t_error=nbc_t_error*100.0 | |
409 | |
410 state.all_v_error=all_v_error*100.0 | |
411 state.nb_v_error=nb_v_error*100.0 | |
412 state.char_v_error=char_v_error*100.0 | |
413 state.min_v_error=min_v_error*100.0 | |
414 state.maj_v_error=maj_v_error*100.0 | |
415 state.nbc_v_error=nbc_v_error*100.0 | |
416 | |
417 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) | |
418 return channel.COMPLETE | |
343 | 419 |
344 | 420 |
345 | 421 |
346 | 422 |
347 | 423 |
356 nb_hidden = 30,\ | 432 nb_hidden = 30,\ |
357 nb_targets = 62, | 433 nb_targets = 62, |
358 tau=1e6,\ | 434 tau=1e6,\ |
359 lr_t2_factor=0.5,\ | 435 lr_t2_factor=0.5,\ |
360 init_model=0,\ | 436 init_model=0,\ |
361 channel=0): | 437 channel=0,\ |
438 detection_mode=0): | |
362 | 439 |
363 | 440 |
364 if channel!=0: | 441 if channel!=0: |
365 channel.save() | 442 channel.save() |
366 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] | 443 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] |
401 # construct the logistic regression class | 478 # construct the logistic regression class |
402 classifier = MLP( input=x,\ | 479 classifier = MLP( input=x,\ |
403 n_in=32*32,\ | 480 n_in=32*32,\ |
404 n_hidden=nb_hidden,\ | 481 n_hidden=nb_hidden,\ |
405 n_out=nb_targets, | 482 n_out=nb_targets, |
406 learning_rate=learning_rate) | 483 learning_rate=learning_rate, |
484 detection_mode=detection_mode) | |
407 | 485 |
408 | 486 |
409 # check if we want to initialise the weights with a previously calculated model | 487 # check if we want to initialise the weights with a previously calculated model |
410 # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets) | 488 # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets) |
411 if init_model!=0: | 489 if init_model!=0: |
419 | 497 |
420 | 498 |
421 # the cost we minimize during training is the negative log likelihood of | 499 # the cost we minimize during training is the negative log likelihood of |
422 # the model plus the regularization terms (L1 and L2); cost is expressed | 500 # the model plus the regularization terms (L1 and L2); cost is expressed |
423 # here symbolically | 501 # here symbolically |
424 cost = classifier.negative_log_likelihood(y) \ | 502 if(detection_mode==0): |
503 cost = classifier.negative_log_likelihood(y) \ | |
425 + L1_reg * classifier.L1 \ | 504 + L1_reg * classifier.L1 \ |
426 + L2_reg * classifier.L2_sqr | 505 + L2_reg * classifier.L2_sqr |
506 else: | |
507 cost = classifier.cross_entropy(y) \ | |
508 + L1_reg * classifier.L1 \ | |
509 + L2_reg * classifier.L2_sqr | |
510 | |
427 | 511 |
428 # compiling a theano function that computes the mistakes that are made by | 512 # compiling a theano function that computes the mistakes that are made by |
429 # the model on a minibatch | 513 # the model on a minibatch |
430 test_model = theano.function([x,y], classifier.errors(y)) | 514 test_model = theano.function([x,y], classifier.errors(y)) |
431 | 515 |
502 classifier.lr.value = tau*initial_lr/(tau+time_n) | 586 classifier.lr.value = tau*initial_lr/(tau+time_n) |
503 | 587 |
504 | 588 |
505 #train model | 589 #train model |
506 cost_ij = train_model(x,y) | 590 cost_ij = train_model(x,y) |
507 | |
508 if (minibatch_index) % validation_frequency == 0: | 591 if (minibatch_index) % validation_frequency == 0: |
509 #save the current learning rate | 592 #save the current learning rate |
510 learning_rate_list.append(classifier.lr.value) | 593 learning_rate_list.append(classifier.lr.value) |
511 divergence_flag_list.append(divergence_flag) | 594 divergence_flag_list.append(divergence_flag) |
512 | 595 |
643 tau=state.tau,\ | 726 tau=state.tau,\ |
644 verbose = state.verbose,\ | 727 verbose = state.verbose,\ |
645 lr_t2_factor=state.lr_t2_factor, | 728 lr_t2_factor=state.lr_t2_factor, |
646 data_set=state.data_set, | 729 data_set=state.data_set, |
647 init_model=state.init_model, | 730 init_model=state.init_model, |
731 detection_mode = state.detection_mode,\ | |
648 channel=channel) | 732 channel=channel) |
649 state.train_error=train_error | 733 state.train_error=train_error |
650 state.validation_error=validation_error | 734 state.validation_error=validation_error |
651 state.test_error=test_error | 735 state.test_error=test_error |
652 state.nb_exemples=nb_exemples | 736 state.nb_exemples=nb_exemples |