comparison baseline/mlp/mlp_nist.py @ 445:868f82777839

added jobman all test + val error and sigmoid output
author xaviermuller
date Tue, 04 May 2010 11:17:27 -0400
parents 3dba84c0fbc1
children
comparison
equal deleted inserted replaced
444:18841eeb433f 445:868f82777839
47 sigmoid function while the top layer is a softamx layer. 47 sigmoid function while the top layer is a softamx layer.
48 """ 48 """
49 49
50 50
51 51
52 def __init__(self, input, n_in, n_hidden, n_out,learning_rate): 52 def __init__(self, input, n_in, n_hidden, n_out,learning_rate,detection_mode):
53 """Initialize the parameters for the multilayer perceptron 53 """Initialize the parameters for the multilayer perceptron
54 54
55 :param input: symbolic variable that describes the input of the 55 :param input: symbolic variable that describes the input of the
56 architecture (one minibatch) 56 architecture (one minibatch)
57 57
106 self.hidden = T.tanh(T.dot(input, self.W1)+ self.b1) 106 self.hidden = T.tanh(T.dot(input, self.W1)+ self.b1)
107 107
108 108
109 109
110 # symbolic expression computing the values of the top layer 110 # symbolic expression computing the values of the top layer
111 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2) 111 if(detection_mode==0):
112 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2)
113 else:
114 self.p_y_given_x= T.nnet.sigmoid(T.dot(self.hidden, self.W2)+self.b2)
115
116
117
118 # self.y_out_sig= T.sigmoid(T.dot(self.hidden, self.W2)+self.b2)
112 119
113 # compute prediction as class whose probability is maximal in 120 # compute prediction as class whose probability is maximal in
114 # symbolic form 121 # symbolic form
115 self.y_pred = T.argmax( self.p_y_given_x, axis =1) 122 self.y_pred = T.argmax( self.p_y_given_x, axis =1)
116 self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1) 123
124 # self.y_pred_sig = T.argmax( self.y_out_sig, axis =1)
125
117 126
118 127
119 128
120 129
121 # L1 norm ; one regularization option is to enforce L1 norm to 130 # L1 norm ; one regularization option is to enforce L1 norm to
141 150
142 :param y: corresponds to a vector that gives for each example the 151 :param y: corresponds to a vector that gives for each example the
143 :correct label 152 :correct label
144 """ 153 """
145 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) 154 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
155
156
157 def cross_entropy(self, y):
158 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]+T.sum(T.log(1-self.p_y_given_x), axis=1)-T.log(1-self.p_y_given_x)[T.arange(y.shape[0]),y])
159
160
146 161
147 162
148 163
149 164
150 def errors(self, y): 165 def errors(self, y):
208 vmin_exemple_count=0.0 223 vmin_exemple_count=0.0
209 224
210 vmaj_error_count=0.0 225 vmaj_error_count=0.0
211 vmaj_exemple_count=0.0 226 vmaj_exemple_count=0.0
212 227
228 nbc_error_count=0.0
229 vnbc_error_count=0.0
230
213 231
214 232
215 if data_set==0: 233 if data_set==0:
216 dataset=datasets.nist_all() 234 print 'using nist'
235 dataset=datasets.nist_all()
217 elif data_set==1: 236 elif data_set==1:
237 print 'using p07'
218 dataset=datasets.nist_P07() 238 dataset=datasets.nist_P07()
239 elif data_set==2:
240 print 'using pnist'
241 dataset=datasets.PNIST07()
242
243
219 244
220 245
221 246
222 #get the test error 247 #get the test error
223 #use a batch size of 1 so we can get the sub-class error 248 #use a batch size of 1 so we can get the sub-class error
241 266
242 predicted_class=numpy.argmax(a1_out) 267 predicted_class=numpy.argmax(a1_out)
243 wanted_class=yt[0] 268 wanted_class=yt[0]
244 if(predicted_class!=wanted_class): 269 if(predicted_class!=wanted_class):
245 total_error_count = total_error_count +1 270 total_error_count = total_error_count +1
246 271
272
273 if(not(predicted_class==wanted_class or ( (((predicted_class+26)==wanted_class) or ((predicted_class-26)==wanted_class)) and wanted_class>9) )):
274 nbc_error_count = nbc_error_count +1
275
276
247 #treat digit error 277 #treat digit error
248 if(wanted_class<10): 278 if(wanted_class<10):
249 nb_exemple_count=nb_exemple_count + 1 279 nb_exemple_count=nb_exemple_count + 1
250 predicted_class=numpy.argmax(a1_out[0:10]) 280 predicted_class=numpy.argmax(a1_out[0:10])
251 if(predicted_class!=wanted_class): 281 if(predicted_class!=wanted_class):
292 predicted_class=numpy.argmax(a1_out) 322 predicted_class=numpy.argmax(a1_out)
293 wanted_class=yt[0] 323 wanted_class=yt[0]
294 if(predicted_class!=wanted_class): 324 if(predicted_class!=wanted_class):
295 vtotal_error_count = vtotal_error_count +1 325 vtotal_error_count = vtotal_error_count +1
296 326
327 if(not(predicted_class==wanted_class or ( (((predicted_class+26)==wanted_class) or ((predicted_class-26)==wanted_class)) and wanted_class>9) )):
328 vnbc_error_count = nbc_error_count +1
329
297 #treat digit error 330 #treat digit error
298 if(wanted_class<10): 331 if(wanted_class<10):
299 vnb_exemple_count=vnb_exemple_count + 1 332 vnb_exemple_count=vnb_exemple_count + 1
300 predicted_class=numpy.argmax(a1_out[0:10]) 333 predicted_class=numpy.argmax(a1_out[0:10])
301 if(predicted_class!=wanted_class): 334 if(predicted_class!=wanted_class):
324 print (('total error = %f') % ((total_error_count/total_exemple_count)*100.0)) 357 print (('total error = %f') % ((total_error_count/total_exemple_count)*100.0))
325 print (('number error = %f') % ((nb_error_count/nb_exemple_count)*100.0)) 358 print (('number error = %f') % ((nb_error_count/nb_exemple_count)*100.0))
326 print (('char error = %f') % ((char_error_count/char_exemple_count)*100.0)) 359 print (('char error = %f') % ((char_error_count/char_exemple_count)*100.0))
327 print (('min error = %f') % ((min_error_count/min_exemple_count)*100.0)) 360 print (('min error = %f') % ((min_error_count/min_exemple_count)*100.0))
328 print (('maj error = %f') % ((maj_error_count/maj_exemple_count)*100.0)) 361 print (('maj error = %f') % ((maj_error_count/maj_exemple_count)*100.0))
362 print (('36 error = %f') % ((nbc_error_count/total_exemple_count)*100.0))
329 363
330 print (('valid total error = %f') % ((vtotal_error_count/vtotal_exemple_count)*100.0)) 364 print (('valid total error = %f') % ((vtotal_error_count/vtotal_exemple_count)*100.0))
331 print (('valid number error = %f') % ((vnb_error_count/vnb_exemple_count)*100.0)) 365 print (('valid number error = %f') % ((vnb_error_count/vnb_exemple_count)*100.0))
332 print (('valid char error = %f') % ((vchar_error_count/vchar_exemple_count)*100.0)) 366 print (('valid char error = %f') % ((vchar_error_count/vchar_exemple_count)*100.0))
333 print (('valid min error = %f') % ((vmin_error_count/vmin_exemple_count)*100.0)) 367 print (('valid min error = %f') % ((vmin_error_count/vmin_exemple_count)*100.0))
334 print (('valid maj error = %f') % ((vmaj_error_count/vmaj_exemple_count)*100.0)) 368 print (('valid maj error = %f') % ((vmaj_error_count/vmaj_exemple_count)*100.0))
335 369 print (('valid 36 error = %f') % ((vnbc_error_count/vtotal_exemple_count)*100.0))
336 print ((' num total = %d,%d') % (total_exemple_count,total_error_count)) 370
337 print ((' num nb = %d,%d') % (nb_exemple_count,nb_error_count)) 371 print (('num total = %d,%d') % (total_exemple_count,total_error_count))
338 print ((' num min = %d,%d') % (min_exemple_count,min_error_count)) 372 print (('num nb = %d,%d') % (nb_exemple_count,nb_error_count))
339 print ((' num maj = %d,%d') % (maj_exemple_count,maj_error_count)) 373 print (('num min = %d,%d') % (min_exemple_count,min_error_count))
340 print ((' num char = %d,%d') % (char_exemple_count,char_error_count)) 374 print (('num maj = %d,%d') % (maj_exemple_count,maj_error_count))
341 return (total_error_count/total_exemple_count)*100.0 375 print (('num char = %d,%d') % (char_exemple_count,char_error_count))
342 376
377
378
379 total_error_count/=total_exemple_count
380 nb_error_count/=nb_exemple_count
381 char_error_count/=char_exemple_count
382 min_error_count/=min_exemple_count
383 maj_error_count/=maj_exemple_count
384 nbc_error_count/=total_exemple_count
385
386 vtotal_error_count/=vtotal_exemple_count
387 vnb_error_count/=vnb_exemple_count
388 vchar_error_count/=vchar_exemple_count
389 vmin_error_count/=vmin_exemple_count
390 vmaj_error_count/=vmaj_exemple_count
391 vnbc_error_count/=vtotal_exemple_count
392
393
394
395 return (total_error_count,nb_error_count,char_error_count,min_error_count,maj_error_count,nbc_error_count,\
396 vtotal_error_count,vnb_error_count,vchar_error_count,vmin_error_count,vmaj_error_count,vnbc_error_count)
397
398 def jobman_get_error(state,channel):
399 (all_t_error,nb_t_error,char_t_error,min_t_error,maj_t_error,nbc_t_error,
400 all_v_error,nb_v_error,char_v_error,min_v_error,maj_v_error,nbc_v_error)=mlp_get_nist_error(data_set=state.data_set,\
401 model_name=state.model_name)
402
403 state.all_t_error=all_t_error*100.0
404 state.nb_t_error=nb_t_error*100.0
405 state.char_t_error=char_t_error*100.0
406 state.min_t_error=min_t_error*100.0
407 state.maj_t_error=maj_t_error*100.0
408 state.nbc_t_error=nbc_t_error*100.0
409
410 state.all_v_error=all_v_error*100.0
411 state.nb_v_error=nb_v_error*100.0
412 state.char_v_error=char_v_error*100.0
413 state.min_v_error=min_v_error*100.0
414 state.maj_v_error=maj_v_error*100.0
415 state.nbc_v_error=nbc_v_error*100.0
416
417 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
418 return channel.COMPLETE
343 419
344 420
345 421
346 422
347 423
356 nb_hidden = 30,\ 432 nb_hidden = 30,\
357 nb_targets = 62, 433 nb_targets = 62,
358 tau=1e6,\ 434 tau=1e6,\
359 lr_t2_factor=0.5,\ 435 lr_t2_factor=0.5,\
360 init_model=0,\ 436 init_model=0,\
361 channel=0): 437 channel=0,\
438 detection_mode=0):
362 439
363 440
364 if channel!=0: 441 if channel!=0:
365 channel.save() 442 channel.save()
366 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] 443 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
401 # construct the logistic regression class 478 # construct the logistic regression class
402 classifier = MLP( input=x,\ 479 classifier = MLP( input=x,\
403 n_in=32*32,\ 480 n_in=32*32,\
404 n_hidden=nb_hidden,\ 481 n_hidden=nb_hidden,\
405 n_out=nb_targets, 482 n_out=nb_targets,
406 learning_rate=learning_rate) 483 learning_rate=learning_rate,
484 detection_mode=detection_mode)
407 485
408 486
409 # check if we want to initialise the weights with a previously calculated model 487 # check if we want to initialise the weights with a previously calculated model
410 # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets) 488 # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets)
411 if init_model!=0: 489 if init_model!=0:
419 497
420 498
421 # the cost we minimize during training is the negative log likelihood of 499 # the cost we minimize during training is the negative log likelihood of
422 # the model plus the regularization terms (L1 and L2); cost is expressed 500 # the model plus the regularization terms (L1 and L2); cost is expressed
423 # here symbolically 501 # here symbolically
424 cost = classifier.negative_log_likelihood(y) \ 502 if(detection_mode==0):
503 cost = classifier.negative_log_likelihood(y) \
425 + L1_reg * classifier.L1 \ 504 + L1_reg * classifier.L1 \
426 + L2_reg * classifier.L2_sqr 505 + L2_reg * classifier.L2_sqr
506 else:
507 cost = classifier.cross_entropy(y) \
508 + L1_reg * classifier.L1 \
509 + L2_reg * classifier.L2_sqr
510
427 511
428 # compiling a theano function that computes the mistakes that are made by 512 # compiling a theano function that computes the mistakes that are made by
429 # the model on a minibatch 513 # the model on a minibatch
430 test_model = theano.function([x,y], classifier.errors(y)) 514 test_model = theano.function([x,y], classifier.errors(y))
431 515
502 classifier.lr.value = tau*initial_lr/(tau+time_n) 586 classifier.lr.value = tau*initial_lr/(tau+time_n)
503 587
504 588
505 #train model 589 #train model
506 cost_ij = train_model(x,y) 590 cost_ij = train_model(x,y)
507
508 if (minibatch_index) % validation_frequency == 0: 591 if (minibatch_index) % validation_frequency == 0:
509 #save the current learning rate 592 #save the current learning rate
510 learning_rate_list.append(classifier.lr.value) 593 learning_rate_list.append(classifier.lr.value)
511 divergence_flag_list.append(divergence_flag) 594 divergence_flag_list.append(divergence_flag)
512 595
643 tau=state.tau,\ 726 tau=state.tau,\
644 verbose = state.verbose,\ 727 verbose = state.verbose,\
645 lr_t2_factor=state.lr_t2_factor, 728 lr_t2_factor=state.lr_t2_factor,
646 data_set=state.data_set, 729 data_set=state.data_set,
647 init_model=state.init_model, 730 init_model=state.init_model,
731 detection_mode = state.detection_mode,\
648 channel=channel) 732 channel=channel)
649 state.train_error=train_error 733 state.train_error=train_error
650 state.validation_error=validation_error 734 state.validation_error=validation_error
651 state.test_error=test_error 735 state.test_error=test_error
652 state.nb_exemples=nb_exemples 736 state.nb_exemples=nb_exemples