comparison baseline/log_reg/log_reg.py @ 205:10a801240bfc

Merge
author fsavard
date Thu, 04 Mar 2010 08:21:43 -0500
parents 777f48ba30df
children 7be1f086a89e
comparison
equal deleted inserted replaced
204:e1f5f66dd7dd 205:10a801240bfc
33 Christopher M. Bishop, section 4.3.2 33 Christopher M. Bishop, section 4.3.2
34 34
35 """ 35 """
36 __docformat__ = 'restructedtext en' 36 __docformat__ = 'restructedtext en'
37 37
38 import numpy, time, cPickle, gzip 38 import numpy, time
39 39
40 import theano 40 import theano
41 import theano.tensor as T 41 import theano.tensor as T
42 42 from ift6266 import datasets
43 43
44 class LogisticRegression(object): 44 class LogisticRegression(object):
45 """Multi-class Logistic Regression Class 45 """Multi-class Logistic Regression Class
46 46
47 The logistic regression is fully described by a weight matrix :math:`W` 47 The logistic regression is fully described by a weight matrix :math:`W`
110 # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]] 110 # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]]
111 # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v, 111 # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v,
112 # i.e., the mean log-likelihood across the minibatch. 112 # i.e., the mean log-likelihood across the minibatch.
113 return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] ) 113 return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] )
114 114
115 def MSE(self, y):
116 return -T.mean(abs((self.p_t_given_x)[T.arange(y.shape[0]), y]-y)**2)
115 117
116 def errors( self, y ): 118 def errors( self, y ):
117 """Return a float representing the number of errors in the minibatch 119 """Return a float representing the number of errors in the minibatch
118 over the total number of examples of the minibatch ; zero one 120 over the total number of examples of the minibatch ; zero one
119 loss over the size of the minibatch 121 loss over the size of the minibatch
133 # represents a mistake in prediction 135 # represents a mistake in prediction
134 return T.mean( T.neq( self.y_pred, y ) ) 136 return T.mean( T.neq( self.y_pred, y ) )
135 else: 137 else:
136 raise NotImplementedError() 138 raise NotImplementedError()
137 139
138 def shared_dataset( data_xy ):
139 """ Function that loads the dataset into shared variables
140
141 The reason we store our dataset in shared variables is to allow
142 Theano to copy it into the GPU memory (when code is run on GPU).
143 Since copying data into the GPU is slow, copying a minibatch everytime
144 is needed (the default behaviour if the data is not in a shared
145 variable) would lead to a large decrease in performance.
146 """
147 data_x, data_y = data_xy
148 shared_x = theano.shared( numpy.asarray( data_x, dtype = theano.config.floatX ) )
149 shared_y = theano.shared( numpy.asarray( data_y, dtype = theano.config.floatX ) )
150 # When storing data on the GPU it has to be stored as floats
151 # therefore we will store the labels as ``floatX`` as well
152 # (``shared_y`` does exactly that). But during our computations
153 # we need them as ints (we use labels as index, and if they are
154 # floats it doesn't make sense) therefore instead of returning
155 # ``shared_y`` we will have to cast it to int. This little hack
156 # lets ous get around this issue
157 return shared_x, T.cast( shared_y, 'int32' )
158
159 def load_data_pkl_gz( dataset ):
160 ''' Loads the dataset
161
162 :type dataset: string
163 :param dataset: the path to the dataset (here MNIST)
164 '''
165
166 #--------------------------------------------------------------------------------------------------------------------
167 # Load Data
168 #--------------------------------------------------------------------------------------------------------------------
169
170
171 print '... loading data'
172
173 # Load the dataset
174 f = gzip.open(dataset,'rb')
175 train_set, valid_set, test_set = cPickle.load(f)
176 f.close()
177
178 test_set_x, test_set_y = shared_dataset( test_set )
179 valid_set_x, valid_set_y = shared_dataset( valid_set )
180 train_set_x, train_set_y = shared_dataset( train_set )
181
182 rval = [ ( train_set_x, train_set_y ), ( valid_set_x,valid_set_y ), ( test_set_x, test_set_y ) ]
183 return rval
184
185 ##def load_data_ft( verbose = False,\
186 ## data_path = '/data/lisa/data/nist/by_class/'\
187 ## train_data = 'all/all_train_data.ft',\
188 ## train_labels = 'all/all_train_labels.ft',\
189 ## test_data = 'all/all_test_data.ft',\
190 ## test_labels = 'all/all_test_labels.ft'):
191 ##
192 ## train_data_file = open(data_path + train_data)
193 ## train_labels_file = open(data_path + train_labels)
194 ## test_labels_file = open(data_path + test_data)
195 ## test_data_file = open(data_path + test_labels)
196 ##
197 ## raw_train_data = ft.read( train_data_file)
198 ## raw_train_labels = ft.read(train_labels_file)
199 ## raw_test_data = ft.read( test_labels_file)
200 ## raw_test_labels = ft.read( test_data_file)
201 ##
202 ## f.close()
203 ## g.close()
204 ## i.close()
205 ## h.close()
206 ##
207 ##
208 ## test_set_x, test_set_y = shared_dataset(test_set)
209 ## valid_set_x, valid_set_y = shared_dataset(valid_set)
210 ## train_set_x, train_set_y = shared_dataset(train_set)
211 ##
212 ## rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
213 ## return rval
214 ## #create a validation set the same size as the test size
215 ## #use the end of the training array for this purpose
216 ## #discard the last remaining so we get a %batch_size number
217 ## test_size=len(raw_test_labels)
218 ## test_size = int(test_size/batch_size)
219 ## test_size*=batch_size
220 ## train_size = len(raw_train_data)
221 ## train_size = int(train_size/batch_size)
222 ## train_size*=batch_size
223 ## validation_size =test_size
224 ## offset = train_size-test_size
225 ## if verbose == True:
226 ## print 'train size = %d' %train_size
227 ## print 'test size = %d' %test_size
228 ## print 'valid size = %d' %validation_size
229 ## print 'offset = %d' %offset
230 ##
231 ##
232
233 #-------------------------------------------------------------------------------------------------------------------- 140 #--------------------------------------------------------------------------------------------------------------------
234 # MAIN 141 # MAIN
235 #-------------------------------------------------------------------------------------------------------------------- 142 #--------------------------------------------------------------------------------------------------------------------
236 143
237 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \ 144 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \
238 dataset_name = 'mnist.pkl.gz', image_size = 28 * 28, nb_class = 10, \ 145 dataset=datasets.nist_digits, image_size = 32 * 32, nb_class = 10, \
239 patience = 5000, patience_increase = 2, improvement_threshold = 0.995): 146 patience = 5000, patience_increase = 2, improvement_threshold = 0.995):
240 147
241 """ 148 """
242 Demonstrate stochastic gradient descent optimization of a log-linear 149 Demonstrate stochastic gradient descent optimization of a log-linear
243 model 150 model
252 :param nb_max_examples: maximal number of epochs to run the optimizer 159 :param nb_max_examples: maximal number of epochs to run the optimizer
253 160
254 :type batch_size: int 161 :type batch_size: int
255 :param batch_size: size of the minibatch 162 :param batch_size: size of the minibatch
256 163
257 :type dataset_name: string 164 :type dataset: dataset
258 :param dataset: the path of the MNIST dataset file from 165 :param dataset: a dataset instance from ift6266.datasets
259 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
260 166
261 :type image_size: int 167 :type image_size: int
262 :param image_size: size of the input image in pixels (width * height) 168 :param image_size: size of the input image in pixels (width * height)
263 169
264 :type nb_class: int 170 :type nb_class: int
273 :type improvement_threshold: float 179 :type improvement_threshold: float
274 :param improvement_threshold: a relative improvement of this much is considered significant 180 :param improvement_threshold: a relative improvement of this much is considered significant
275 181
276 182
277 """ 183 """
278 datasets = load_data_pkl_gz( dataset_name )
279
280 train_set_x, train_set_y = datasets[0]
281 valid_set_x, valid_set_y = datasets[1]
282 test_set_x , test_set_y = datasets[2]
283
284 # compute number of minibatches for training, validation and testing
285 n_train_batches = train_set_x.value.shape[0] / batch_size
286 n_valid_batches = valid_set_x.value.shape[0] / batch_size
287 n_test_batches = test_set_x.value.shape[0] / batch_size
288
289 #-------------------------------------------------------------------------------------------------------------------- 184 #--------------------------------------------------------------------------------------------------------------------
290 # Build actual model 185 # Build actual model
291 #-------------------------------------------------------------------------------------------------------------------- 186 #--------------------------------------------------------------------------------------------------------------------
292 187
293 print '... building the model' 188 print '... building the model'
306 # the model in symbolic format 201 # the model in symbolic format
307 cost = classifier.negative_log_likelihood( y ) 202 cost = classifier.negative_log_likelihood( y )
308 203
309 # compiling a Theano function that computes the mistakes that are made by 204 # compiling a Theano function that computes the mistakes that are made by
310 # the model on a minibatch 205 # the model on a minibatch
311 test_model = theano.function( inputs = [ index ], 206 test_model = theano.function( inputs = [ x, y ],
312 outputs = classifier.errors( y ), 207 outputs = classifier.errors( y ))
313 givens = { 208
314 x:test_set_x[ index * batch_size: ( index + 1 ) * batch_size ], 209 validate_model = theano.function( inputs = [ x, y ],
315 y:test_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) 210 outputs = classifier.errors( y ))
316
317 validate_model = theano.function( inputs = [ index ],
318 outputs = classifier.errors( y ),
319 givens = {
320 x:valid_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
321 y:valid_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
322 211
323 # compute the gradient of cost with respect to theta = ( W, b ) 212 # compute the gradient of cost with respect to theta = ( W, b )
324 g_W = T.grad( cost = cost, wrt = classifier.W ) 213 g_W = T.grad( cost = cost, wrt = classifier.W )
325 g_b = T.grad( cost = cost, wrt = classifier.b ) 214 g_b = T.grad( cost = cost, wrt = classifier.b )
326 215
329 classifier.b: classifier.b - learning_rate * g_b} 218 classifier.b: classifier.b - learning_rate * g_b}
330 219
331 # compiling a Theano function `train_model` that returns the cost, but in 220 # compiling a Theano function `train_model` that returns the cost, but in
332 # the same time updates the parameter of the model based on the rules 221 # the same time updates the parameter of the model based on the rules
333 # defined in `updates` 222 # defined in `updates`
334 train_model = theano.function( inputs = [ index ], 223 train_model = theano.function( inputs = [ x, y ],
335 outputs = cost, 224 outputs = cost,
336 updates = updates, 225 updates = updates)
337 givens = {
338 x: train_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
339 y: train_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
340 226
341 #-------------------------------------------------------------------------------------------------------------------- 227 #--------------------------------------------------------------------------------------------------------------------
342 # Train model 228 # Train model
343 #-------------------------------------------------------------------------------------------------------------------- 229 #--------------------------------------------------------------------------------------------------------------------
344 230
347 patience = 5000 # look as this many examples regardless 233 patience = 5000 # look as this many examples regardless
348 patience_increase = 2 # wait this much longer when a new best is 234 patience_increase = 2 # wait this much longer when a new best is
349 # found 235 # found
350 improvement_threshold = 0.995 # a relative improvement of this much is 236 improvement_threshold = 0.995 # a relative improvement of this much is
351 # considered significant 237 # considered significant
352 validation_frequency = min( n_train_batches, patience * 0.5 ) 238 validation_frequency = patience * 0.5
353 # go through this many 239 # go through this many
354 # minibatche before checking the network 240 # minibatche before checking the network
355 # on the validation set; in this case we 241 # on the validation set; in this case we
356 # check every epoch 242 # check every epoch
357 243
358 best_params = None 244 best_params = None
359 best_validation_loss = float('inf') 245 best_validation_loss = float('inf')
360 test_score = 0. 246 test_score = 0.
361 start_time = time.clock() 247 start_time = time.clock()
362 248
363 done_looping = False 249 done_looping = False
364 n_epochs = nb_max_examples / train_set_x.value.shape[0] 250 n_iters = nb_max_examples / batch_size
365 epoch = 0 251 epoch = 0
366 252 iter = 0
367 while ( epoch < n_epochs ) and ( not done_looping ): 253
254 while ( iter < n_iters ) and ( not done_looping ):
368 255
369 epoch = epoch + 1 256 epoch = epoch + 1
370 for minibatch_index in xrange( n_train_batches ): 257 for x, y in dataset.train(batch_size):
371 258
372 minibatch_avg_cost = train_model( minibatch_index ) 259 minibatch_avg_cost = train_model( x, y )
373 # iteration number 260 # iteration number
374 iter = epoch * n_train_batches + minibatch_index 261 iter += 1
375 262
376 if ( iter + 1 ) % validation_frequency == 0: 263 if iter % validation_frequency == 0:
377 # compute zero-one loss on validation set 264 # compute zero-one loss on validation set
378 validation_losses = [ validate_model( i ) for i in xrange( n_valid_batches ) ] 265 validation_losses = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ]
379 this_validation_loss = numpy.mean( validation_losses ) 266 this_validation_loss = numpy.mean( validation_losses )
380 267
381 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ 268 print('epoch %i, iter %i, validation error %f %%' % \
382 ( epoch, minibatch_index + 1,n_train_batches, \ 269 ( epoch, iter, this_validation_loss*100. ) )
383 this_validation_loss*100. ) )
384 270
385 271
386 # if we got the best validation score until now 272 # if we got the best validation score until now
387 if this_validation_loss < best_validation_loss: 273 if this_validation_loss < best_validation_loss:
388 #improve patience if loss improvement is good enough 274 #improve patience if loss improvement is good enough
391 patience = max( patience, iter * patience_increase ) 277 patience = max( patience, iter * patience_increase )
392 278
393 best_validation_loss = this_validation_loss 279 best_validation_loss = this_validation_loss
394 # test it on the test set 280 # test it on the test set
395 281
396 test_losses = [test_model(i) for i in xrange(n_test_batches)] 282 test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
397 test_score = numpy.mean(test_losses) 283 test_score = numpy.mean(test_losses)
398 284
399 print((' epoch %i, minibatch %i/%i, test error of best ' 285 print((' epoch %i, iter %i, test error of best '
400 'model %f %%') % \ 286 'model %f %%') % \
401 (epoch, minibatch_index+1, n_train_batches,test_score*100.)) 287 (epoch, iter, test_score*100.))
402 288
403 if patience <= iter : 289 if patience <= iter :
404 done_looping = True 290 done_looping = True
405 break 291 break
406 292