Mercurial > ift6266
comparison baseline/log_reg/log_reg.py @ 198:5d88ed99c0af
Modify the log_reg.py tutorial code to use the datasets module.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Tue, 02 Mar 2010 18:16:49 -0500 |
parents | d37c944133c3 |
children | 777f48ba30df |
comparison
equal
deleted
inserted
replaced
197:9116cfe8e4ab | 198:5d88ed99c0af |
---|---|
33 Christopher M. Bishop, section 4.3.2 | 33 Christopher M. Bishop, section 4.3.2 |
34 | 34 |
35 """ | 35 """ |
36 __docformat__ = 'restructedtext en' | 36 __docformat__ = 'restructedtext en' |
37 | 37 |
38 import numpy, time, cPickle, gzip | 38 import numpy, time |
39 | 39 |
40 import theano | 40 import theano |
41 import theano.tensor as T | 41 import theano.tensor as T |
42 | 42 from ift6266 import datasets |
43 | 43 |
44 class LogisticRegression(object): | 44 class LogisticRegression(object): |
45 """Multi-class Logistic Regression Class | 45 """Multi-class Logistic Regression Class |
46 | 46 |
47 The logistic regression is fully described by a weight matrix :math:`W` | 47 The logistic regression is fully described by a weight matrix :math:`W` |
133 # represents a mistake in prediction | 133 # represents a mistake in prediction |
134 return T.mean( T.neq( self.y_pred, y ) ) | 134 return T.mean( T.neq( self.y_pred, y ) ) |
135 else: | 135 else: |
136 raise NotImplementedError() | 136 raise NotImplementedError() |
137 | 137 |
138 def shared_dataset( data_xy ): | |
139 """ Function that loads the dataset into shared variables | |
140 | |
141 The reason we store our dataset in shared variables is to allow | |
142 Theano to copy it into the GPU memory (when code is run on GPU). | |
143 Since copying data into the GPU is slow, copying a minibatch everytime | |
144 is needed (the default behaviour if the data is not in a shared | |
145 variable) would lead to a large decrease in performance. | |
146 """ | |
147 data_x, data_y = data_xy | |
148 shared_x = theano.shared( numpy.asarray( data_x, dtype = theano.config.floatX ) ) | |
149 shared_y = theano.shared( numpy.asarray( data_y, dtype = theano.config.floatX ) ) | |
150 # When storing data on the GPU it has to be stored as floats | |
151 # therefore we will store the labels as ``floatX`` as well | |
152 # (``shared_y`` does exactly that). But during our computations | |
153 # we need them as ints (we use labels as index, and if they are | |
154 # floats it doesn't make sense) therefore instead of returning | |
155 # ``shared_y`` we will have to cast it to int. This little hack | |
156 # lets ous get around this issue | |
157 return shared_x, T.cast( shared_y, 'int32' ) | |
158 | |
159 def load_data_pkl_gz( dataset ): | |
160 ''' Loads the dataset | |
161 | |
162 :type dataset: string | |
163 :param dataset: the path to the dataset (here MNIST) | |
164 ''' | |
165 | |
166 #-------------------------------------------------------------------------------------------------------------------- | |
167 # Load Data | |
168 #-------------------------------------------------------------------------------------------------------------------- | |
169 | |
170 | |
171 print '... loading data' | |
172 | |
173 # Load the dataset | |
174 f = gzip.open(dataset,'rb') | |
175 train_set, valid_set, test_set = cPickle.load(f) | |
176 f.close() | |
177 | |
178 test_set_x, test_set_y = shared_dataset( test_set ) | |
179 valid_set_x, valid_set_y = shared_dataset( valid_set ) | |
180 train_set_x, train_set_y = shared_dataset( train_set ) | |
181 | |
182 rval = [ ( train_set_x, train_set_y ), ( valid_set_x,valid_set_y ), ( test_set_x, test_set_y ) ] | |
183 return rval | |
184 | |
185 ##def load_data_ft( verbose = False,\ | |
186 ## data_path = '/data/lisa/data/nist/by_class/'\ | |
187 ## train_data = 'all/all_train_data.ft',\ | |
188 ## train_labels = 'all/all_train_labels.ft',\ | |
189 ## test_data = 'all/all_test_data.ft',\ | |
190 ## test_labels = 'all/all_test_labels.ft'): | |
191 ## | |
192 ## train_data_file = open(data_path + train_data) | |
193 ## train_labels_file = open(data_path + train_labels) | |
194 ## test_labels_file = open(data_path + test_data) | |
195 ## test_data_file = open(data_path + test_labels) | |
196 ## | |
197 ## raw_train_data = ft.read( train_data_file) | |
198 ## raw_train_labels = ft.read(train_labels_file) | |
199 ## raw_test_data = ft.read( test_labels_file) | |
200 ## raw_test_labels = ft.read( test_data_file) | |
201 ## | |
202 ## f.close() | |
203 ## g.close() | |
204 ## i.close() | |
205 ## h.close() | |
206 ## | |
207 ## | |
208 ## test_set_x, test_set_y = shared_dataset(test_set) | |
209 ## valid_set_x, valid_set_y = shared_dataset(valid_set) | |
210 ## train_set_x, train_set_y = shared_dataset(train_set) | |
211 ## | |
212 ## rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] | |
213 ## return rval | |
214 ## #create a validation set the same size as the test size | |
215 ## #use the end of the training array for this purpose | |
216 ## #discard the last remaining so we get a %batch_size number | |
217 ## test_size=len(raw_test_labels) | |
218 ## test_size = int(test_size/batch_size) | |
219 ## test_size*=batch_size | |
220 ## train_size = len(raw_train_data) | |
221 ## train_size = int(train_size/batch_size) | |
222 ## train_size*=batch_size | |
223 ## validation_size =test_size | |
224 ## offset = train_size-test_size | |
225 ## if verbose == True: | |
226 ## print 'train size = %d' %train_size | |
227 ## print 'test size = %d' %test_size | |
228 ## print 'valid size = %d' %validation_size | |
229 ## print 'offset = %d' %offset | |
230 ## | |
231 ## | |
232 | |
233 #-------------------------------------------------------------------------------------------------------------------- | 138 #-------------------------------------------------------------------------------------------------------------------- |
234 # MAIN | 139 # MAIN |
235 #-------------------------------------------------------------------------------------------------------------------- | 140 #-------------------------------------------------------------------------------------------------------------------- |
236 | 141 |
237 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \ | 142 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \ |
238 dataset_name = 'mnist.pkl.gz', image_size = 28 * 28, nb_class = 10, \ | 143 dataset=datasets.nist_digits, image_size = 32 * 32, nb_class = 10, \ |
239 patience = 5000, patience_increase = 2, improvement_threshold = 0.995): | 144 patience = 5000, patience_increase = 2, improvement_threshold = 0.995): |
240 | 145 |
241 """ | 146 """ |
242 Demonstrate stochastic gradient descent optimization of a log-linear | 147 Demonstrate stochastic gradient descent optimization of a log-linear |
243 model | 148 model |
252 :param nb_max_examples: maximal number of epochs to run the optimizer | 157 :param nb_max_examples: maximal number of epochs to run the optimizer |
253 | 158 |
254 :type batch_size: int | 159 :type batch_size: int |
255 :param batch_size: size of the minibatch | 160 :param batch_size: size of the minibatch |
256 | 161 |
257 :type dataset_name: string | 162 :type dataset: dataset |
258 :param dataset: the path of the MNIST dataset file from | 163 :param dataset: a dataset instance from ift6266.datasets |
259 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz | |
260 | 164 |
261 :type image_size: int | 165 :type image_size: int |
262 :param image_size: size of the input image in pixels (width * height) | 166 :param image_size: size of the input image in pixels (width * height) |
263 | 167 |
264 :type nb_class: int | 168 :type nb_class: int |
273 :type improvement_threshold: float | 177 :type improvement_threshold: float |
274 :param improvement_threshold: a relative improvement of this much is considered significant | 178 :param improvement_threshold: a relative improvement of this much is considered significant |
275 | 179 |
276 | 180 |
277 """ | 181 """ |
278 datasets = load_data_pkl_gz( dataset_name ) | |
279 | |
280 train_set_x, train_set_y = datasets[0] | |
281 valid_set_x, valid_set_y = datasets[1] | |
282 test_set_x , test_set_y = datasets[2] | |
283 | |
284 # compute number of minibatches for training, validation and testing | |
285 n_train_batches = train_set_x.value.shape[0] / batch_size | |
286 n_valid_batches = valid_set_x.value.shape[0] / batch_size | |
287 n_test_batches = test_set_x.value.shape[0] / batch_size | |
288 | |
289 #-------------------------------------------------------------------------------------------------------------------- | 182 #-------------------------------------------------------------------------------------------------------------------- |
290 # Build actual model | 183 # Build actual model |
291 #-------------------------------------------------------------------------------------------------------------------- | 184 #-------------------------------------------------------------------------------------------------------------------- |
292 | 185 |
293 print '... building the model' | 186 print '... building the model' |
306 # the model in symbolic format | 199 # the model in symbolic format |
307 cost = classifier.negative_log_likelihood( y ) | 200 cost = classifier.negative_log_likelihood( y ) |
308 | 201 |
309 # compiling a Theano function that computes the mistakes that are made by | 202 # compiling a Theano function that computes the mistakes that are made by |
310 # the model on a minibatch | 203 # the model on a minibatch |
311 test_model = theano.function( inputs = [ index ], | 204 test_model = theano.function( inputs = [ x, y ], |
312 outputs = classifier.errors( y ), | 205 outputs = classifier.errors( y )) |
313 givens = { | 206 |
314 x:test_set_x[ index * batch_size: ( index + 1 ) * batch_size ], | 207 validate_model = theano.function( inputs = [ x, y ], |
315 y:test_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) | 208 outputs = classifier.errors( y )) |
316 | |
317 validate_model = theano.function( inputs = [ index ], | |
318 outputs = classifier.errors( y ), | |
319 givens = { | |
320 x:valid_set_x[ index * batch_size: ( index + 1 ) * batch_size ], | |
321 y:valid_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) | |
322 | 209 |
323 # compute the gradient of cost with respect to theta = ( W, b ) | 210 # compute the gradient of cost with respect to theta = ( W, b ) |
324 g_W = T.grad( cost = cost, wrt = classifier.W ) | 211 g_W = T.grad( cost = cost, wrt = classifier.W ) |
325 g_b = T.grad( cost = cost, wrt = classifier.b ) | 212 g_b = T.grad( cost = cost, wrt = classifier.b ) |
326 | 213 |
329 classifier.b: classifier.b - learning_rate * g_b} | 216 classifier.b: classifier.b - learning_rate * g_b} |
330 | 217 |
331 # compiling a Theano function `train_model` that returns the cost, but in | 218 # compiling a Theano function `train_model` that returns the cost, but in |
332 # the same time updates the parameter of the model based on the rules | 219 # the same time updates the parameter of the model based on the rules |
333 # defined in `updates` | 220 # defined in `updates` |
334 train_model = theano.function( inputs = [ index ], | 221 train_model = theano.function( inputs = [ x, y ], |
335 outputs = cost, | 222 outputs = cost, |
336 updates = updates, | 223 updates = updates) |
337 givens = { | |
338 x: train_set_x[ index * batch_size: ( index + 1 ) * batch_size ], | |
339 y: train_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } ) | |
340 | 224 |
341 #-------------------------------------------------------------------------------------------------------------------- | 225 #-------------------------------------------------------------------------------------------------------------------- |
342 # Train model | 226 # Train model |
343 #-------------------------------------------------------------------------------------------------------------------- | 227 #-------------------------------------------------------------------------------------------------------------------- |
344 | 228 |
347 patience = 5000 # look as this many examples regardless | 231 patience = 5000 # look as this many examples regardless |
348 patience_increase = 2 # wait this much longer when a new best is | 232 patience_increase = 2 # wait this much longer when a new best is |
349 # found | 233 # found |
350 improvement_threshold = 0.995 # a relative improvement of this much is | 234 improvement_threshold = 0.995 # a relative improvement of this much is |
351 # considered significant | 235 # considered significant |
352 validation_frequency = min( n_train_batches, patience * 0.5 ) | 236 validation_frequency = patience * 0.5 |
353 # go through this many | 237 # go through this many |
354 # minibatche before checking the network | 238 # minibatche before checking the network |
355 # on the validation set; in this case we | 239 # on the validation set; in this case we |
356 # check every epoch | 240 # check every epoch |
357 | 241 |
358 best_params = None | 242 best_params = None |
359 best_validation_loss = float('inf') | 243 best_validation_loss = float('inf') |
360 test_score = 0. | 244 test_score = 0. |
361 start_time = time.clock() | 245 start_time = time.clock() |
362 | 246 |
363 done_looping = False | 247 done_looping = False |
364 n_epochs = nb_max_examples / train_set_x.value.shape[0] | 248 n_iters = nb_max_examples / batch_size |
365 epoch = 0 | 249 epoch = 0 |
366 | 250 iter = 0 |
367 while ( epoch < n_epochs ) and ( not done_looping ): | 251 |
252 while ( iter < n_iters ) and ( not done_looping ): | |
368 | 253 |
369 epoch = epoch + 1 | 254 epoch = epoch + 1 |
370 for minibatch_index in xrange( n_train_batches ): | 255 for x, y in dataset.train(batch_size): |
371 | 256 |
372 minibatch_avg_cost = train_model( minibatch_index ) | 257 minibatch_avg_cost = train_model( x, y ) |
373 # iteration number | 258 # iteration number |
374 iter = epoch * n_train_batches + minibatch_index | 259 iter += 1 |
375 | 260 |
376 if ( iter + 1 ) % validation_frequency == 0: | 261 if iter % validation_frequency == 0: |
377 # compute zero-one loss on validation set | 262 # compute zero-one loss on validation set |
378 validation_losses = [ validate_model( i ) for i in xrange( n_valid_batches ) ] | 263 validation_losses = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ] |
379 this_validation_loss = numpy.mean( validation_losses ) | 264 this_validation_loss = numpy.mean( validation_losses ) |
380 | 265 |
381 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ | 266 print('epoch %i, iter %i, validation error %f %%' % \ |
382 ( epoch, minibatch_index + 1,n_train_batches, \ | 267 ( epoch, iter, this_validation_loss*100. ) ) |
383 this_validation_loss*100. ) ) | |
384 | 268 |
385 | 269 |
386 # if we got the best validation score until now | 270 # if we got the best validation score until now |
387 if this_validation_loss < best_validation_loss: | 271 if this_validation_loss < best_validation_loss: |
388 #improve patience if loss improvement is good enough | 272 #improve patience if loss improvement is good enough |
391 patience = max( patience, iter * patience_increase ) | 275 patience = max( patience, iter * patience_increase ) |
392 | 276 |
393 best_validation_loss = this_validation_loss | 277 best_validation_loss = this_validation_loss |
394 # test it on the test set | 278 # test it on the test set |
395 | 279 |
396 test_losses = [test_model(i) for i in xrange(n_test_batches)] | 280 test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] |
397 test_score = numpy.mean(test_losses) | 281 test_score = numpy.mean(test_losses) |
398 | 282 |
399 print((' epoch %i, minibatch %i/%i, test error of best ' | 283 print((' epoch %i, iter %i, test error of best ' |
400 'model %f %%') % \ | 284 'model %f %%') % \ |
401 (epoch, minibatch_index+1, n_train_batches,test_score*100.)) | 285 (epoch, iter, test_score*100.)) |
402 | 286 |
403 if patience <= iter : | 287 if patience <= iter : |
404 done_looping = True | 288 done_looping = True |
405 break | 289 break |
406 | 290 |