ift6266: baseline/log_reg/log

comparison baseline/log_reg/log_reg.py @ 205:10a801240bfc

Merge

author	fsavard
date	Thu, 04 Mar 2010 08:21:43 -0500
parents	777f48ba30df
children	7be1f086a89e

comparison

equal deleted inserted replaced

-:e1f5f66dd7dd
+:10a801240bfc
 Christopher M. Bishop, section 4.3.2
 """
 __docformat__ = 'restructedtext en'
-import numpy, time, cPickle, gzip
+import numpy, time
 import theano
 import theano.tensor as T
+from ift6266 import datasets
 class LogisticRegression(object):
 """Multi-class Logistic Regression Class
 The logistic regression is fully described by a weight matrix :math:`W`
 # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]]
 # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v,
 # i.e., the mean log-likelihood across the minibatch.
 return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] )
+def MSE(self, y):
+return -T.mean(abs((self.p_t_given_x)[T.arange(y.shape[0]), y]-y)**2)
 def errors( self, y ):
 """Return a float representing the number of errors in the minibatch
 over the total number of examples of the minibatch ; zero one
 loss over the size of the minibatch
 # represents a mistake in prediction
 return T.mean( T.neq( self.y_pred, y ) )
 else:
 raise NotImplementedError()
-def shared_dataset( data_xy ):
-""" Function that loads the dataset into shared variables
-The reason we store our dataset in shared variables is to allow
-Theano to copy it into the GPU memory (when code is run on GPU).
-Since copying data into the GPU is slow, copying a minibatch everytime
-is needed (the default behaviour if the data is not in a shared
-variable) would lead to a large decrease in performance.
-"""
-data_x, data_y = data_xy
-shared_x = theano.shared( numpy.asarray( data_x, dtype = theano.config.floatX ) )
-shared_y = theano.shared( numpy.asarray( data_y, dtype = theano.config.floatX ) )
-# When storing data on the GPU it has to be stored as floats
-# therefore we will store the labels as ``floatX`` as well
-# (``shared_y`` does exactly that). But during our computations
-# we need them as ints (we use labels as index, and if they are
-# floats it doesn't make sense) therefore instead of returning
-# ``shared_y`` we will have to cast it to int. This little hack
-# lets ous get around this issue
-return shared_x, T.cast( shared_y, 'int32' )
-def load_data_pkl_gz( dataset ):
-''' Loads the dataset
-:type dataset: string
-:param dataset: the path to the dataset (here MNIST)
-'''
-#--------------------------------------------------------------------------------------------------------------------
-# Load Data
-#--------------------------------------------------------------------------------------------------------------------
-print '... loading data'
-# Load the dataset
-f = gzip.open(dataset,'rb')
-train_set, valid_set, test_set = cPickle.load(f)
-f.close()
-test_set_x,  test_set_y  = shared_dataset( test_set )
-valid_set_x, valid_set_y = shared_dataset( valid_set )
-train_set_x, train_set_y = shared_dataset( train_set )
-rval = [ ( train_set_x, train_set_y ), ( valid_set_x,valid_set_y ), ( test_set_x, test_set_y ) ]
-return rval
-##def load_data_ft(      verbose = False,\
-##                                    data_path = '/data/lisa/data/nist/by_class/'\
-##                                    train_data = 'all/all_train_data.ft',\
-##                                    train_labels = 'all/all_train_labels.ft',\
-##                                    test_data = 'all/all_test_data.ft',\
-##                                    test_labels = 'all/all_test_labels.ft'):
-##
-##    train_data_file = open(data_path + train_data)
-##    train_labels_file = open(data_path + train_labels)
-##    test_labels_file = open(data_path + test_data)
-##    test_data_file = open(data_path + test_labels)
-##
-##    raw_train_data = ft.read( train_data_file)
-##    raw_train_labels = ft.read(train_labels_file)
-##    raw_test_data = ft.read( test_labels_file)
-##    raw_test_labels = ft.read( test_data_file)
-##
-##    f.close()
-##    g.close()
-##    i.close()
-##    h.close()
-##
-##
-##    test_set_x,  test_set_y  = shared_dataset(test_set)
-##    valid_set_x, valid_set_y = shared_dataset(valid_set)
-##    train_set_x, train_set_y = shared_dataset(train_set)
-##
-##    rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
-##    return rval
-##    #create a validation set the same size as the test size
-##    #use the end of the training array for this purpose
-##    #discard the last remaining so we get a %batch_size number
-##    test_size=len(raw_test_labels)
-##    test_size = int(test_size/batch_size)
-##    test_size*=batch_size
-##    train_size = len(raw_train_data)
-##    train_size = int(train_size/batch_size)
-##    train_size*=batch_size
-##    validation_size =test_size
-##    offset = train_size-test_size
-##    if verbose == True:
-##        print 'train size = %d' %train_size
-##        print 'test size = %d' %test_size
-##        print 'valid size = %d' %validation_size
-##        print 'offset = %d' %offset
-##
-##
 #--------------------------------------------------------------------------------------------------------------------
 # MAIN
 #--------------------------------------------------------------------------------------------------------------------
 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \
-dataset_name = 'mnist.pkl.gz', image_size = 28 * 28, nb_class = 10,  \
+dataset=datasets.nist_digits, image_size = 32 * 32, nb_class = 10,  \
 patience = 5000, patience_increase = 2, improvement_threshold = 0.995):
 """
 Demonstrate stochastic gradient descent optimization of a log-linear
 model
 :param nb_max_examples: maximal number of epochs to run the optimizer
 :type batch_size: int
 :param batch_size:  size of the minibatch
-:type dataset_name: string
+:type dataset: dataset
-:param dataset: the path of the MNIST dataset file from
+:param dataset: a dataset instance from ift6266.datasets
-http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
 :type image_size: int
 :param image_size: size of the input image in pixels (width * height)
 :type nb_class: int
 :type improvement_threshold: float
 :param improvement_threshold: a relative improvement of this much is considered significant
 """
-datasets = load_data_pkl_gz( dataset_name )
-train_set_x, train_set_y = datasets[0]
-valid_set_x, valid_set_y = datasets[1]
-test_set_x , test_set_y   = datasets[2]
-# compute number of minibatches for training, validation and testing
-n_train_batches = train_set_x.value.shape[0] / batch_size
-n_valid_batches = valid_set_x.value.shape[0] / batch_size
-n_test_batches  = test_set_x.value.shape[0]  / batch_size
 #--------------------------------------------------------------------------------------------------------------------
 # Build actual model
 #--------------------------------------------------------------------------------------------------------------------
 print '... building the model'
 # the model in symbolic format
 cost = classifier.negative_log_likelihood( y )
 # compiling a Theano function that computes the mistakes that are made by
 # the model on a minibatch
-test_model = theano.function( inputs = [ index ],
+test_model = theano.function( inputs = [ x, y ],
-outputs = classifier.errors( y ),
+outputs = classifier.errors( y ))
-givens = {
-x:test_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
+validate_model = theano.function( inputs = [ x, y ],
-y:test_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
+outputs = classifier.errors( y ))
-validate_model = theano.function( inputs = [ index ],
-outputs = classifier.errors( y ),
-givens = {
-x:valid_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
-y:valid_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
 # compute the gradient of cost with respect to theta = ( W, b )
 g_W = T.grad( cost = cost, wrt = classifier.W )
 g_b  = T.grad( cost = cost, wrt = classifier.b )
 classifier.b: classifier.b  - learning_rate * g_b}
 # compiling a Theano function `train_model` that returns the cost, but in
 # the same time updates the parameter of the model based on the rules
 # defined in `updates`
-train_model = theano.function( inputs = [ index ],
+train_model = theano.function( inputs = [ x, y ],
 outputs = cost,
-updates = updates,
+updates = updates)
-givens = {
-x: train_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
-y: train_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
 #--------------------------------------------------------------------------------------------------------------------
 # Train model
 #--------------------------------------------------------------------------------------------------------------------
 patience              = 5000  # look as this many examples regardless
 patience_increase     = 2     # wait this much longer when a new best is
 # found
 improvement_threshold = 0.995 # a relative improvement of this much is
 # considered significant
-validation_frequency  = min( n_train_batches, patience * 0.5 )
+validation_frequency  = patience * 0.5
 # go through this many
 # minibatche before checking the network
 # on the validation set; in this case we
 # check every epoch
-best_params             = None
+best_params          = None
 best_validation_loss = float('inf')
-test_score                 = 0.
+test_score           = 0.
-start_time                  = time.clock()
+start_time           = time.clock()
 done_looping = False
-n_epochs       = nb_max_examples / train_set_x.value.shape[0]
+n_iters      = nb_max_examples / batch_size
-epoch             = 0
+epoch        = 0
+iter        = 0
-while ( epoch < n_epochs ) and ( not done_looping ):
+while ( iter < n_iters ) and ( not done_looping ):
 epoch = epoch + 1
-for minibatch_index in xrange( n_train_batches ):
+for x, y in dataset.train(batch_size):
-minibatch_avg_cost = train_model( minibatch_index )
+minibatch_avg_cost = train_model( x, y )
 # iteration number
-iter = epoch * n_train_batches + minibatch_index
+iter += 1
-if ( iter + 1 ) % validation_frequency == 0:
+if iter % validation_frequency == 0:
 # compute zero-one loss on validation set
-validation_losses     = [ validate_model( i ) for i in xrange( n_valid_batches ) ]
+validation_losses     = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ]
 this_validation_loss = numpy.mean( validation_losses )
-print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+print('epoch %i, iter %i, validation error %f %%' % \
-( epoch, minibatch_index + 1,n_train_batches, \
+( epoch, iter, this_validation_loss*100. ) )
-this_validation_loss*100. ) )
 # if we got the best validation score until now
 if this_validation_loss < best_validation_loss:
 #improve patience if loss improvement is good enough
 patience = max( patience, iter * patience_increase )
 best_validation_loss = this_validation_loss
 # test it on the test set
-test_losses = [test_model(i) for i in xrange(n_test_batches)]
+test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
 test_score  = numpy.mean(test_losses)
-print(('     epoch %i, minibatch %i/%i, test error of best '
+print(('     epoch %i, iter %i, test error of best '
 'model %f %%') % \
-(epoch, minibatch_index+1, n_train_batches,test_score*100.))
+(epoch, iter, test_score*100.))
 if patience <= iter :
 done_looping = True
 break

Mercurial > ift6266

comparison baseline/log_reg/log_reg.py @ 205:10a801240bfc