comparison deep/deep_mlp/logistic_sgd.py @ 626:75dbbe409578

Added code for deep mlp, experiment code to go along with it. Also added code I used to filter the P07 / PNIST07 datasets to keep only digits.
author fsavard
date Wed, 16 Mar 2011 13:43:32 -0400
parents
children
comparison
equal deleted inserted replaced
625:128bc92897f2 626:75dbbe409578
1 import numpy, time, cPickle, gzip, sys, os
2
3 import theano
4 import theano.tensor as T
5
6 class LogisticRegression(object):
7 def __init__(self, input, n_in, n_out):
8 self.W = theano.shared(value=numpy.zeros((n_in,n_out),
9 dtype = theano.config.floatX),
10 name='W')
11 self.b = theano.shared(value=numpy.zeros((n_out,),
12 dtype = theano.config.floatX),
13 name='b')
14
15 self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
16
17 self.y_pred=T.argmax(self.p_y_given_x, axis=1)
18
19 self.params = [self.W, self.b]
20
21 def negative_log_likelihood(self, y):
22 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
23
24
25 def errors(self, y):
26 if y.ndim != self.y_pred.ndim:
27 raise TypeError('y should have the same shape as self.y_pred',
28 ('y', target.type, 'y_pred', self.y_pred.type))
29
30 if y.dtype.startswith('int'):
31 return T.mean(T.neq(self.y_pred, y))
32 else:
33 raise NotImplementedError()
34
35
36 def load_data(dataset):
37 ''' Loads the dataset
38
39 :type dataset: string
40 :param dataset: the path to the dataset (here MNIST)
41 '''
42
43 #############
44 # LOAD DATA #
45 #############
46 print '... loading data'
47
48 # Load the dataset
49 f = gzip.open(dataset,'rb')
50 train_set, valid_set, test_set = cPickle.load(f)
51 f.close()
52
53
54 def shared_dataset(data_xy):
55 """ Function that loads the dataset into shared variables
56
57 The reason we store our dataset in shared variables is to allow
58 Theano to copy it into the GPU memory (when code is run on GPU).
59 Since copying data into the GPU is slow, copying a minibatch everytime
60 is needed (the default behaviour if the data is not in a shared
61 variable) would lead to a large decrease in performance.
62 """
63 data_x, data_y = data_xy
64 shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
65 shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
66 # When storing data on the GPU it has to be stored as floats
67 # therefore we will store the labels as ``floatX`` as well
68 # (``shared_y`` does exactly that). But during our computations
69 # we need them as ints (we use labels as index, and if they are
70 # floats it doesn't make sense) therefore instead of returning
71 # ``shared_y`` we will have to cast it to int. This little hack
72 # lets ous get around this issue
73 return shared_x, T.cast(shared_y, 'int32')
74
75 test_set_x, test_set_y = shared_dataset(test_set)
76 valid_set_x, valid_set_y = shared_dataset(valid_set)
77 train_set_x, train_set_y = shared_dataset(train_set)
78
79 rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
80 return rval
81
82
83
84
85 def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='../data/mnist.pkl.gz',
86 batch_size = 600):
87 datasets = load_data(dataset)
88
89 train_set_x, train_set_y = datasets[0]
90 valid_set_x, valid_set_y = datasets[1]
91 test_set_x , test_set_y = datasets[2]
92
93 # compute number of minibatches for training, validation and testing
94 n_train_batches = train_set_x.value.shape[0] / batch_size
95 n_valid_batches = valid_set_x.value.shape[0] / batch_size
96 n_test_batches = test_set_x.value.shape[0] / batch_size
97
98
99 ######################
100 # BUILD ACTUAL MODEL #
101 ######################
102 print '... building the model'
103
104
105 # allocate symbolic variables for the data
106 index = T.lscalar() # index to a [mini]batch
107 x = T.matrix('x') # the data is presented as rasterized images
108 y = T.ivector('y') # the labels are presented as 1D vector of
109 # [int] labels
110
111 # construct the logistic regression class
112 # Each MNIST image has size 28*28
113 classifier = LogisticRegression( input=x, n_in=28*28, n_out=10)
114
115 # the cost we minimize during training is the negative log likelihood of
116 # the model in symbolic format
117 cost = classifier.negative_log_likelihood(y)
118
119 # compiling a Theano function that computes the mistakes that are made by
120 # the model on a minibatch
121 test_model = theano.function(inputs = [index],
122 outputs = classifier.errors(y),
123 givens={
124 x:test_set_x[index*batch_size:(index+1)*batch_size],
125 y:test_set_y[index*batch_size:(index+1)*batch_size]})
126
127 validate_model = theano.function( inputs = [index],
128 outputs = classifier.errors(y),
129 givens={
130 x:valid_set_x[index*batch_size:(index+1)*batch_size],
131 y:valid_set_y[index*batch_size:(index+1)*batch_size]})
132
133 # compute the gradient of cost with respect to theta = (W,b)
134 g_W = T.grad(cost = cost, wrt = classifier.W)
135 g_b = T.grad(cost = cost, wrt = classifier.b)
136
137 # specify how to update the parameters of the model as a dictionary
138 updates ={classifier.W: classifier.W - learning_rate*g_W,\
139 classifier.b: classifier.b - learning_rate*g_b}
140
141 # compiling a Theano function `train_model` that returns the cost, but in
142 # the same time updates the parameter of the model based on the rules
143 # defined in `updates`
144 train_model = theano.function(inputs = [index],
145 outputs = cost,
146 updates = updates,
147 givens={
148 x:train_set_x[index*batch_size:(index+1)*batch_size],
149 y:train_set_y[index*batch_size:(index+1)*batch_size]})
150
151 ###############
152 # TRAIN MODEL #
153 ###############
154 print '... training the model'
155 # early-stopping parameters
156 patience = 5000 # look as this many examples regardless
157 patience_increase = 2 # wait this much longer when a new best is
158 # found
159 improvement_threshold = 0.995 # a relative improvement of this much is
160 # considered significant
161 validation_frequency = min(n_train_batches, patience/2)
162 # go through this many
163 # minibatche before checking the network
164 # on the validation set; in this case we
165 # check every epoch
166
167 best_params = None
168 best_validation_loss = float('inf')
169 test_score = 0.
170 start_time = time.clock()
171
172 done_looping = False
173 epoch = 0
174 while (epoch < n_epochs) and (not done_looping):
175 epoch = epoch + 1
176 for minibatch_index in xrange(n_train_batches):
177
178 minibatch_avg_cost = train_model(minibatch_index)
179 # iteration number
180 iter = epoch * n_train_batches + minibatch_index
181
182 if (iter+1) % validation_frequency == 0:
183 # compute zero-one loss on validation set
184 validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
185 this_validation_loss = numpy.mean(validation_losses)
186
187 print('epoch %i, minibatch %i/%i, validation error %f %%' % \
188 (epoch, minibatch_index+1,n_train_batches, \
189 this_validation_loss*100.))
190
191
192 # if we got the best validation score until now
193 if this_validation_loss < best_validation_loss:
194 #improve patience if loss improvement is good enough
195 if this_validation_loss < best_validation_loss * \
196 improvement_threshold :
197 patience = max(patience, iter * patience_increase)
198
199 best_validation_loss = this_validation_loss
200 # test it on the test set
201
202 test_losses = [test_model(i) for i in xrange(n_test_batches)]
203 test_score = numpy.mean(test_losses)
204
205 print((' epoch %i, minibatch %i/%i, test error of best '
206 'model %f %%') % \
207 (epoch, minibatch_index+1, n_train_batches,test_score*100.))
208
209 if patience <= iter :
210 done_looping = True
211 break
212
213 end_time = time.clock()
214 print(('Optimization complete with best validation score of %f %%,'
215 'with test performance %f %%') %
216 (best_validation_loss * 100., test_score*100.))
217 print 'The code run for %d epochs, with %f epochs/sec'%(epoch,1.*epoch/(end_time-start_time))
218 print >> sys.stderr, ('The code for file '+os.path.split(__file__)[1]+' ran for %.1fs' % ((end_time-start_time)))
219
220 if __name__ == '__main__':
221 sgd_optimization_mnist()
222
223