Mercurial > ift6266
comparison baseline/log_reg/log_reg.py @ 205:10a801240bfc
Merge
author | fsavard |
---|---|
date | Thu, 04 Mar 2010 08:21:43 -0500 |
parents | 777f48ba30df |
children | 7be1f086a89e |
comparison
equal
deleted
inserted
replaced
204:e1f5f66dd7dd | 205:10a801240bfc |
---|---|
33 Christopher M. Bishop, section 4.3.2 | 33 Christopher M. Bishop, section 4.3.2 |
34 | 34 |
35 """ | 35 """ |
36 __docformat__ = 'restructedtext en' | 36 __docformat__ = 'restructedtext en' |
37 | 37 |
38 import numpy, time, cPickle, gzip | 38 import numpy, time |
39 | 39 |
40 import theano | 40 import theano |
41 import theano.tensor as T | 41 import theano.tensor as T |
42 | 42 from ift6266 import datasets |
43 | 43 |
44 class LogisticRegression(object): | 44 class LogisticRegression(object): |
45 """Multi-class Logistic Regression Class | 45 """Multi-class Logistic Regression Class |
46 | 46 |
47 The logistic regression is fully described by a weight matrix :math:`W` | 47 The logistic regression is fully described by a weight matrix :math:`W` |
110 # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]] | 110 # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]] |
111 # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v, | 111 # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v, |
112 # i.e., the mean log-likelihood across the minibatch. | 112 # i.e., the mean log-likelihood across the minibatch. |
113 return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] ) | 113 return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] ) |
114 | 114 |
115 def MSE(self, y): | |
116 return -T.mean(abs((self.p_t_given_x)[T.arange(y.shape[0]), y]-y)**2) | |
115 | 117 |
116 def errors( self, y ): | 118 def errors( self, y ): |
117 """Return a float representing the number of errors in the minibatch | 119 """Return a float representing the number of errors in the minibatch |
118 over the total number of examples of the minibatch ; zero one | 120 over the total number of examples of the minibatch ; zero one |
119 loss over the size of the minibatch | 121 loss over the size of the minibatch |
133 # represents a mistake in prediction | 135 # represents a mistake in prediction |
134 return T.mean( T.neq( self.y_pred, y ) ) | 136 return T.mean( T.neq( self.y_pred, y ) ) |
135 else: | 137 else: |
136 raise NotImplementedError() | 138 raise NotImplementedError() |
137 | 139 |
138 def shared_dataset( data_xy ): | |
139 """ Function that loads the dataset into shared variables | |
140 | |
141 The reason we store our dataset in shared variables is to allow | |
142 Theano to copy it into the GPU memory (when code is run on GPU). | |
143 Since copying data into the GPU is slow, copying a minibatch everytime | |
144 is needed (the default behaviour if the data is not in a shared | |
145 variable) would lead to a large decrease in performance. | |
146 """ | |
147 data_x, data_y = data_xy | |
148 shared_x = theano.shared( numpy.asarray( data_x, dtype = theano.config.floatX ) ) | |
149 shared_y = theano.shared( numpy.asarray( data_y, dtype = theano.config.floatX ) ) | |
150 # When storing data on the GPU it has to be stored as floats | |
151 # therefore we will store the labels as ``floatX`` as well | |
152 # (``shared_y`` does exactly that). But during our computations | |
153 # we need them as ints (we use labels as index, and if they are | |
154 # floats it doesn't make sense) therefore instead of returning | |
155 # ``shared_y`` we will have to cast it to int. This little hack | |
156 # lets ous get around this issue | |
157 return shared_x, T.cast( shared_y, 'int32' ) | |
158 | |
159 def load_data_pkl_gz( dataset ): | |
160 ''' Loads the dataset | |
161 | |
162 :type dataset: string | |
163 :param dataset: the path to the dataset (here MNIST) | |
164 ''' | |
165 | |
166 #-------------------------------------------------------------------------------------------------------------------- | |
167 # Load Data | |
168 #-------------------------------------------------------------------------------------------------------------------- | |
169 | |
170 | |
171 print '... loading data' | |
172 | |
173 # Load the dataset | |
174 f = gzip.open(dataset,'rb') | |
175 train_set, valid_set, test_set = cPickle.load(f) | |
176 f.close() | |
177 | |
178 test_set_x, test_set_y = shared_dataset( test_set ) | |
179 valid_set_x, valid_set_y = shared_dataset( valid_set ) | |
180 train_set_x, train_set_y = shared_dataset( train_set ) | |
181 | |
182 rval = [ ( train_set_x, train_set_y ), ( valid_set_x,valid_set_y ), ( test_set_x, test_set_y ) ] | |
183 return rval | |
184 | |
185 ##def load_data_ft( verbose = False,\ | |
186 ## data_path = '/data/lisa/data/nist/by_class/'\ | |
187 ## train_data = 'all/all_train_data.ft',\ | |
188 ## train_labels = 'all/all_train_labels.ft',\ | |
189 ## test_data = 'all/all_test_data.ft',\ | |
190 ## test_labels = 'all/all_test_labels.ft'): | |
191 ## | |
192 ## train_data_file = open(data_path + train_data) | |
193 ## train_labels_file = open(data_path + train_labels) | |
194 ## test_labels_file = open(data_path + test_data) | |
195 ## test_data_file = open(data_path + test_labels) | |
196 ## | |
197 ## raw_train_data = ft.read( train_data_file) | |
198 ## raw_train_labels = ft.read(train_labels_file) | |
199 ## raw_test_data = ft.read( test_labels_file) | |
200 ## raw_test_labels = ft.read( test_data_file) | |
201 ## | |
202 ## f.close() | |
203 ## g.close() | |
204 ## i.close() | |
205 ## h.close() | |
206 ## | |
207 ## | |
208 ## test_set_x, test_set_y = shared_dataset(test_set) | |
209 ## valid_set_x, valid_set_y = shared_dataset(valid_set) | |
210 ## train_set_x, train_set_y = shared_dataset(train_set) | |
211 ## | |
212 ## rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] | |
213 ## return rval | |
214 ## #create a validation set the same size as the test size | |
215 ## #use the end of the training array for this purpose | |
216 ## #discard the last remaining so we get a %batch_size number | |
217 ## test_size=len(raw_test_labels) | |
218 ## test_size = int(test_size/batch_size) | |
219 ## test_size*=batch_size | |
220 ## train_size = len(raw_train_data) | |
221 ## train_size = int(train_size/batch_size) | |
222 ## train_size*=batch_size | |
223 ## validation_size =test_size | |
224 ## offset = train_size-test_size | |
225 ## if verbose == True: | |
226 ## print 'train size = %d' %train_size | |
227 ## print 'test size = %d' %test_size | |
228 ## print 'valid size = %d' %validation_size | |
229 ## print 'offset = %d' %offset | |
230 ## | |
231 ## | |
232 | |
233 #-------------------------------------------------------------------------------------------------------------------- | 140 #-------------------------------------------------------------------------------------------------------------------- |
234 # MAIN | 141 # MAIN |
235 #-------------------------------------------------------------------------------------------------------------------- | 142 #-------------------------------------------------------------------------------------------------------------------- |
236 | 143 |
237 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \ | 144 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \ |
238 dataset_name = 'mnist.pkl.gz', image_size = 28 * 28, nb_class = 10, \ | 145 dataset=datasets.nist_digits, image_size = 32 * 32, nb_class = 10, \ |
239 patience = 5000, patience_increase = 2, improvement_threshold = 0.995): | 146 patience = 5000, patience_increase = 2, improvement_threshold = 0.995): |
240 | 147 |
241 """ | 148 """ |
242 Demonstrate stochastic gradient descent optimization of a log-linear | 149 Demonstrate stochastic gradient descent optimization of a log-linear |
243 model | 150 model |
252 :param nb_max_examples: maximal number of epochs to run the optimizer | 159 :param nb_max_examples: maximal number of epochs to run the optimizer |
253 | 160 |
254 :type batch_size: int | 161 :type batch_size: int |
255 :param batch_size: size of the minibatch | 162 :param batch_size: size of the minibatch |
256 | 163 |
257 :type dataset_name: string | 164 :type dataset: dataset |
258 :param dataset: the path of the MNIST dataset file from | 165 :param dataset: a dataset instance from ift6266.datasets |
259 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz | |
260 | 166 |
261 :type image_size: int | 167 :type image_size: int |
262 :param image_size: size of the input image in pixels (width * height) | 168 :param image_size: size of the input image in pixels (width * height) |
263 | 169 |
264 :type nb_class: int | 170 :type nb_class: int |
273 :type improvement_threshold: float | 179 :type improvement_threshold: float |
274 :param improvement_threshold: a relative improvement of this much is considered significant | 180 :param improvement_threshold: a relative improvement of this much is considered significant |
275 | 181 |
276 | 182 |
277 """ | 183 """ |
278 datasets = load_data_pkl_gz( dataset_name ) | |
279 | |
280 train_set_x, train_set_y = datasets[0] | |
281 valid_set_x, valid_set_y = datasets[1] | |
282 test_set_x , test_set_y = datasets[2] | |
283 | |
284 # compute number of minibatches for training, validation and testing | |
285 n_train_batches = train_set_x.value.shape[0] / batch_size | |
286 n_valid_batches = valid_set_x.value.shape[0] / batch_size | |
287 n_test_batches = test_set_x.value.shape[0] / batch_size | |
288 | |
289 #-------------------------------------------------------------------------------------------------------------------- | 184 #-------------------------------------------------------------------------------------------------------------------- |
290 # Build actual model | 185 # Build actual model |
291 #-------------------------------------------------------------------------------------------------------------------- | 186 #-------------------------------------------------------------------------------------------------------------------- |
292 | 187 |
293 print '... building the model' | 188 print '... building the model' |
306 # the model in symbolic format | 201 # the model in symbolic format |
307 cost = classifier.negative_log_likelihood( y ) | 202 cost = classifier.negative_log_likelihood( y ) |
308 | 203 |
309 # compiling a Theano function that computes the mistakes that are made by | 204 # compiling a Theano function that computes the mistakes that are made by |
310 # the model on a minibatch | 205 # the model on a minibatch |
311 test_model = theano.function( inputs = [ index ], | 206 test_model = theano.function( inputs = [ x, y ], |
312 outputs = classifier.errors( y ), | 207 outputs = classifier.errors( y )) |
313 givens = { | 208 |
314 x:test_set_x[ index * batch_size: ( index + 1 ) * batch_size ], | 209 validate_model = theano.function( inputs = [ x, y ], |
315 y:test_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) | 210 outputs = classifier.errors( y )) |
316 | |
317 validate_model = theano.function( inputs = [ index ], | |
318 outputs = classifier.errors( y ), | |
319 givens = { | |
320 x:valid_set_x[ index * batch_size: ( index + 1 ) * batch_size ], | |
321 y:valid_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) | |
322 | 211 |
323 # compute the gradient of cost with respect to theta = ( W, b ) | 212 # compute the gradient of cost with respect to theta = ( W, b ) |
324 g_W = T.grad( cost = cost, wrt = classifier.W ) | 213 g_W = T.grad( cost = cost, wrt = classifier.W ) |
325 g_b = T.grad( cost = cost, wrt = classifier.b ) | 214 g_b = T.grad( cost = cost, wrt = classifier.b ) |
326 | 215 |
329 classifier.b: classifier.b - learning_rate * g_b} | 218 classifier.b: classifier.b - learning_rate * g_b} |
330 | 219 |
331 # compiling a Theano function `train_model` that returns the cost, but in | 220 # compiling a Theano function `train_model` that returns the cost, but in |
332 # the same time updates the parameter of the model based on the rules | 221 # the same time updates the parameter of the model based on the rules |
333 # defined in `updates` | 222 # defined in `updates` |
334 train_model = theano.function( inputs = [ index ], | 223 train_model = theano.function( inputs = [ x, y ], |
335 outputs = cost, | 224 outputs = cost, |
336 updates = updates, | 225 updates = updates) |
337 givens = { | |
338 x: train_set_x[ index * batch_size: ( index + 1 ) * batch_size ], | |
339 y: train_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) | |
340 | 226 |
341 #-------------------------------------------------------------------------------------------------------------------- | 227 #-------------------------------------------------------------------------------------------------------------------- |
342 # Train model | 228 # Train model |
343 #-------------------------------------------------------------------------------------------------------------------- | 229 #-------------------------------------------------------------------------------------------------------------------- |
344 | 230 |
347 patience = 5000 # look as this many examples regardless | 233 patience = 5000 # look as this many examples regardless |
348 patience_increase = 2 # wait this much longer when a new best is | 234 patience_increase = 2 # wait this much longer when a new best is |
349 # found | 235 # found |
350 improvement_threshold = 0.995 # a relative improvement of this much is | 236 improvement_threshold = 0.995 # a relative improvement of this much is |
351 # considered significant | 237 # considered significant |
352 validation_frequency = min( n_train_batches, patience * 0.5 ) | 238 validation_frequency = patience * 0.5 |
353 # go through this many | 239 # go through this many |
354 # minibatche before checking the network | 240 # minibatche before checking the network |
355 # on the validation set; in this case we | 241 # on the validation set; in this case we |
356 # check every epoch | 242 # check every epoch |
357 | 243 |
358 best_params = None | 244 best_params = None |
359 best_validation_loss = float('inf') | 245 best_validation_loss = float('inf') |
360 test_score = 0. | 246 test_score = 0. |
361 start_time = time.clock() | 247 start_time = time.clock() |
362 | 248 |
363 done_looping = False | 249 done_looping = False |
364 n_epochs = nb_max_examples / train_set_x.value.shape[0] | 250 n_iters = nb_max_examples / batch_size |
365 epoch = 0 | 251 epoch = 0 |
366 | 252 iter = 0 |
367 while ( epoch < n_epochs ) and ( not done_looping ): | 253 |
254 while ( iter < n_iters ) and ( not done_looping ): | |
368 | 255 |
369 epoch = epoch + 1 | 256 epoch = epoch + 1 |
370 for minibatch_index in xrange( n_train_batches ): | 257 for x, y in dataset.train(batch_size): |
371 | 258 |
372 minibatch_avg_cost = train_model( minibatch_index ) | 259 minibatch_avg_cost = train_model( x, y ) |
373 # iteration number | 260 # iteration number |
374 iter = epoch * n_train_batches + minibatch_index | 261 iter += 1 |
375 | 262 |
376 if ( iter + 1 ) % validation_frequency == 0: | 263 if iter % validation_frequency == 0: |
377 # compute zero-one loss on validation set | 264 # compute zero-one loss on validation set |
378 validation_losses = [ validate_model( i ) for i in xrange( n_valid_batches ) ] | 265 validation_losses = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ] |
379 this_validation_loss = numpy.mean( validation_losses ) | 266 this_validation_loss = numpy.mean( validation_losses ) |
380 | 267 |
381 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ | 268 print('epoch %i, iter %i, validation error %f %%' % \ |
382 ( epoch, minibatch_index + 1,n_train_batches, \ | 269 ( epoch, iter, this_validation_loss*100. ) ) |
383 this_validation_loss*100. ) ) | |
384 | 270 |
385 | 271 |
386 # if we got the best validation score until now | 272 # if we got the best validation score until now |
387 if this_validation_loss < best_validation_loss: | 273 if this_validation_loss < best_validation_loss: |
388 #improve patience if loss improvement is good enough | 274 #improve patience if loss improvement is good enough |
391 patience = max( patience, iter * patience_increase ) | 277 patience = max( patience, iter * patience_increase ) |
392 | 278 |
393 best_validation_loss = this_validation_loss | 279 best_validation_loss = this_validation_loss |
394 # test it on the test set | 280 # test it on the test set |
395 | 281 |
396 test_losses = [test_model(i) for i in xrange(n_test_batches)] | 282 test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] |
397 test_score = numpy.mean(test_losses) | 283 test_score = numpy.mean(test_losses) |
398 | 284 |
399 print((' epoch %i, minibatch %i/%i, test error of best ' | 285 print((' epoch %i, iter %i, test error of best ' |
400 'model %f %%') % \ | 286 'model %f %%') % \ |
401 (epoch, minibatch_index+1, n_train_batches,test_score*100.)) | 287 (epoch, iter, test_score*100.)) |
402 | 288 |
403 if patience <= iter : | 289 if patience <= iter : |
404 done_looping = True | 290 done_looping = True |
405 break | 291 break |
406 | 292 |