comparison baseline/mlp/ratio_classes/mlp_nist_ratio.py @ 443:89a49dae6cf3

merge
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Mon, 03 May 2010 18:38:58 -0400
parents d8129a09ffb1
children
comparison
equal deleted inserted replaced
442:d5b2b6397a5a 443:89a49dae6cf3
22 to do lr first, then add regularization) 22 to do lr first, then add regularization)
23 23
24 """ 24 """
25 __docformat__ = 'restructedtext en' 25 __docformat__ = 'restructedtext en'
26 26
27 import ift6266 27 import setup_batches
28 from scripts import setup_batches
29 import pdb 28 import pdb
30 import numpy 29 import numpy
31 30
32 import theano 31 import theano
33 import theano.tensor as T 32 import theano.tensor as T
48 sigmoid function while the top layer is a softamx layer. 47 sigmoid function while the top layer is a softamx layer.
49 """ 48 """
50 49
51 50
52 51
53 def __init__(self, input, n_in, n_hidden, n_out,learning_rate): 52 def __init__(self, input, n_in, n_hidden, n_out,learning_rate, test_subclass):
54 """Initialize the parameters for the multilayer perceptron 53 """Initialize the parameters for the multilayer perceptron
55 54
56 :param input: symbolic variable that describes the input of the 55 :param input: symbolic variable that describes the input of the
57 architecture (one minibatch) 56 architecture (one minibatch)
58 57
111 # symbolic expression computing the values of the top layer 110 # symbolic expression computing the values of the top layer
112 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2) 111 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2)
113 112
114 # compute prediction as class whose probability is maximal in 113 # compute prediction as class whose probability is maximal in
115 # symbolic form 114 # symbolic form
116 self.y_pred = T.argmax( self.p_y_given_x, axis =1) 115 #self.y_pred = T.argmax( self.p_y_given_x, axis =1)
117 self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1) 116 #self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1)
118 117
119 118 self.test_subclass = test_subclass
120 119
121 120 #if (self.test_subclass == "u"):
121 # self.y_pred = T.argmax( self.p_y_given_x[10:35], axis =1) + 10
122 #elif (self.test_subclass == "l"):
123 # self.y_pred = T.argmax( self.p_y_given_x[35:], axis =1) + 35
124 #elif (self.test_subclass == "d"):
125 # self.y_pred = T.argmax( self.p_y_given_x[0:9], axis =1)
126 #else:
127 self.y_pred = T.argmax( self.p_y_given_x, axis =1)
128
122 # L1 norm ; one regularization option is to enforce L1 norm to 129 # L1 norm ; one regularization option is to enforce L1 norm to
123 # be small 130 # be small
124 self.L1 = abs(self.W1).sum() + abs(self.W2).sum() 131 self.L1 = abs(self.W1).sum() + abs(self.W2).sum()
125 132
126 # square of L2 norm ; one regularization option is to enforce 133 # square of L2 norm ; one regularization option is to enforce
176 L1_reg = 0.00,\ 183 L1_reg = 0.00,\
177 L2_reg = 0.0001,\ 184 L2_reg = 0.0001,\
178 nb_max_exemples=1000000,\ 185 nb_max_exemples=1000000,\
179 batch_size=20,\ 186 batch_size=20,\
180 nb_hidden = 500,\ 187 nb_hidden = 500,\
181 nb_targets = 62,\ 188 nb_targets = 26,\
182 tau=1e6,\ 189 tau=1e6,\
183 main_class="d",\ 190 main_class="l",\
184 start_ratio=1,\ 191 start_ratio=1,\
185 end_ratio=1): 192 end_ratio=1):
186 193
187 194
188 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] 195 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
214 print 'finished parsing the data' 221 print 'finished parsing the data'
215 # construct the logistic regression class 222 # construct the logistic regression class
216 classifier = MLP( input=x.reshape((batch_size,32*32)),\ 223 classifier = MLP( input=x.reshape((batch_size,32*32)),\
217 n_in=32*32,\ 224 n_in=32*32,\
218 n_hidden=nb_hidden,\ 225 n_hidden=nb_hidden,\
219 n_out=nb_targets, 226 n_out=nb_targets,\
220 learning_rate=learning_rate) 227 learning_rate=learning_rate,\
228 test_subclass=main_class)
221 229
222 230
223 231
224 232
225 # the cost we minimize during training is the negative log likelihood of 233 # the cost we minimize during training is the negative log likelihood of
283 n_iter = nb_max_exemples/batch_size # nb of max times we are allowed to run through all exemples 291 n_iter = nb_max_exemples/batch_size # nb of max times we are allowed to run through all exemples
284 n_iter = n_iter/n_minibatches + 1 #round up 292 n_iter = n_iter/n_minibatches + 1 #round up
285 n_iter=max(1,n_iter) # run at least once on short debug call 293 n_iter=max(1,n_iter) # run at least once on short debug call
286 time_n=0 #in unit of exemples 294 time_n=0 #in unit of exemples
287 295
288 296 if (main_class == "u"):
297 class_offset = 10
298 elif (main_class == "l"):
299 class_offset = 36
300 else:
301 class_offset = 0
302
289 303
290 if verbose == True: 304 if verbose == True:
291 print 'looping at most %d times through the data set' %n_iter 305 print 'looping at most %d times through the data set' %n_iter
292 for iter in xrange(n_iter* n_minibatches): 306 for iter in xrange(n_iter* n_minibatches):
293 307
300 classifier.lr.value = tau*initial_lr/(tau+time_n) 314 classifier.lr.value = tau*initial_lr/(tau+time_n)
301 315
302 # get the minibatches corresponding to `iter` modulo 316 # get the minibatches corresponding to `iter` modulo
303 # `len(train_batches)` 317 # `len(train_batches)`
304 x,y = train_batches[ minibatch_index ] 318 x,y = train_batches[ minibatch_index ]
319
320 y = y - class_offset
321
305 # convert to float 322 # convert to float
306 x_float = x/255.0 323 x_float = x/255.0
307 cost_ij = train_model(x_float,y) 324 cost_ij = train_model(x_float,y)
308 325
309 if (iter+1) % validation_frequency == 0: 326 if (iter+1) % validation_frequency == 0:
310 # compute zero-one loss on validation set 327 # compute zero-one loss on validation set
311 328
312 this_validation_loss = 0. 329 this_validation_loss = 0.
313 for x,y in validation_batches: 330 for x,y in validation_batches:
314 # sum up the errors for each minibatch 331 # sum up the errors for each minibatch
332 y = y - class_offset
315 x_float = x/255.0 333 x_float = x/255.0
316 this_validation_loss += test_model(x_float,y) 334 this_validation_loss += test_model(x_float,y)
317 # get the average by dividing with the number of minibatches 335 # get the average by dividing with the number of minibatches
318 this_validation_loss /= len(validation_batches) 336 this_validation_loss /= len(validation_batches)
319 #save the validation loss 337 #save the validation loss
321 339
322 #get the training error rate 340 #get the training error rate
323 this_train_loss=0 341 this_train_loss=0
324 for x,y in train_batches: 342 for x,y in train_batches:
325 # sum up the errors for each minibatch 343 # sum up the errors for each minibatch
344 y = y - class_offset
326 x_float = x/255.0 345 x_float = x/255.0
327 this_train_loss += test_model(x_float,y) 346 this_train_loss += test_model(x_float,y)
328 # get the average by dividing with the number of minibatches 347 # get the average by dividing with the number of minibatches
329 this_train_loss /= len(train_batches) 348 this_train_loss /= len(train_batches)
330 #save the validation loss 349 #save the validation loss
353 # so we continue exploring 372 # so we continue exploring
354 patience=nb_max_exemples/batch_size 373 patience=nb_max_exemples/batch_size
355 # test it on the test set 374 # test it on the test set
356 test_score = 0. 375 test_score = 0.
357 for x,y in test_batches: 376 for x,y in test_batches:
377 y = y - class_offset
358 x_float=x/255.0 378 x_float=x/255.0
359 test_score += test_model(x_float,y) 379 test_score += test_model(x_float,y)
360 test_score /= len(test_batches) 380 test_score /= len(test_batches)
361 if verbose == True: 381 if verbose == True:
362 print((' epoch %i, minibatch %i/%i, test error of best ' 382 print((' epoch %i, minibatch %i/%i, test error of best '
379 test_score = 0. 399 test_score = 0.
380 #cap the patience so we are allowed one more validation error 400 #cap the patience so we are allowed one more validation error
381 #calculation before aborting 401 #calculation before aborting
382 patience = iter+validation_frequency+1 402 patience = iter+validation_frequency+1
383 for x,y in test_batches: 403 for x,y in test_batches:
404 y = y - class_offset
384 x_float=x/255.0 405 x_float=x/255.0
385 test_score += test_model(x_float,y) 406 test_score += test_model(x_float,y)
386 test_score /= len(test_batches) 407 test_score /= len(test_batches)
387 if verbose == True: 408 if verbose == True:
388 print ' validation error is going up, possibly stopping soon' 409 print ' validation error is going up, possibly stopping soon'
419 if __name__ == '__main__': 440 if __name__ == '__main__':
420 mlp_full_nist(True) 441 mlp_full_nist(True)
421 442
422 def jobman_mlp_full_nist(state,channel): 443 def jobman_mlp_full_nist(state,channel):
423 (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\ 444 (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\
424 nb_max_exemples=state.nb_max_exemples,\
425 nb_hidden=state.nb_hidden,\ 445 nb_hidden=state.nb_hidden,\
426 adaptive_lr=state.adaptive_lr,\ 446 main_class=state.main_class,\
427 tau=state.tau,\ 447 start_ratio=state.ratio,\
428 main_class=state.main_class,\ 448 end_ratio=state.ratio)
429 start_ratio=state.start_ratio,\
430 end_ratio=state.end_ratio)
431 state.train_error=train_error 449 state.train_error=train_error
432 state.validation_error=validation_error 450 state.validation_error=validation_error
433 state.test_error=test_error 451 state.test_error=test_error
434 state.nb_exemples=nb_exemples 452 state.nb_exemples=nb_exemples
435 state.time=time 453 state.time=time