Mercurial > ift6266
comparison deep/deep_mlp/logistic_sgd.py @ 626:75dbbe409578
Added code for deep mlp, experiment code to go along with it. Also added code I used to filter the P07 / PNIST07 datasets to keep only digits.
author | fsavard |
---|---|
date | Wed, 16 Mar 2011 13:43:32 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
625:128bc92897f2 | 626:75dbbe409578 |
---|---|
1 import numpy, time, cPickle, gzip, sys, os | |
2 | |
3 import theano | |
4 import theano.tensor as T | |
5 | |
6 class LogisticRegression(object): | |
7 def __init__(self, input, n_in, n_out): | |
8 self.W = theano.shared(value=numpy.zeros((n_in,n_out), | |
9 dtype = theano.config.floatX), | |
10 name='W') | |
11 self.b = theano.shared(value=numpy.zeros((n_out,), | |
12 dtype = theano.config.floatX), | |
13 name='b') | |
14 | |
15 self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) | |
16 | |
17 self.y_pred=T.argmax(self.p_y_given_x, axis=1) | |
18 | |
19 self.params = [self.W, self.b] | |
20 | |
21 def negative_log_likelihood(self, y): | |
22 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) | |
23 | |
24 | |
25 def errors(self, y): | |
26 if y.ndim != self.y_pred.ndim: | |
27 raise TypeError('y should have the same shape as self.y_pred', | |
28 ('y', target.type, 'y_pred', self.y_pred.type)) | |
29 | |
30 if y.dtype.startswith('int'): | |
31 return T.mean(T.neq(self.y_pred, y)) | |
32 else: | |
33 raise NotImplementedError() | |
34 | |
35 | |
36 def load_data(dataset): | |
37 ''' Loads the dataset | |
38 | |
39 :type dataset: string | |
40 :param dataset: the path to the dataset (here MNIST) | |
41 ''' | |
42 | |
43 ############# | |
44 # LOAD DATA # | |
45 ############# | |
46 print '... loading data' | |
47 | |
48 # Load the dataset | |
49 f = gzip.open(dataset,'rb') | |
50 train_set, valid_set, test_set = cPickle.load(f) | |
51 f.close() | |
52 | |
53 | |
54 def shared_dataset(data_xy): | |
55 """ Function that loads the dataset into shared variables | |
56 | |
57 The reason we store our dataset in shared variables is to allow | |
58 Theano to copy it into the GPU memory (when code is run on GPU). | |
59 Since copying data into the GPU is slow, copying a minibatch everytime | |
60 is needed (the default behaviour if the data is not in a shared | |
61 variable) would lead to a large decrease in performance. | |
62 """ | |
63 data_x, data_y = data_xy | |
64 shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) | |
65 shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) | |
66 # When storing data on the GPU it has to be stored as floats | |
67 # therefore we will store the labels as ``floatX`` as well | |
68 # (``shared_y`` does exactly that). But during our computations | |
69 # we need them as ints (we use labels as index, and if they are | |
70 # floats it doesn't make sense) therefore instead of returning | |
71 # ``shared_y`` we will have to cast it to int. This little hack | |
72 # lets ous get around this issue | |
73 return shared_x, T.cast(shared_y, 'int32') | |
74 | |
75 test_set_x, test_set_y = shared_dataset(test_set) | |
76 valid_set_x, valid_set_y = shared_dataset(valid_set) | |
77 train_set_x, train_set_y = shared_dataset(train_set) | |
78 | |
79 rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] | |
80 return rval | |
81 | |
82 | |
83 | |
84 | |
85 def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='../data/mnist.pkl.gz', | |
86 batch_size = 600): | |
87 datasets = load_data(dataset) | |
88 | |
89 train_set_x, train_set_y = datasets[0] | |
90 valid_set_x, valid_set_y = datasets[1] | |
91 test_set_x , test_set_y = datasets[2] | |
92 | |
93 # compute number of minibatches for training, validation and testing | |
94 n_train_batches = train_set_x.value.shape[0] / batch_size | |
95 n_valid_batches = valid_set_x.value.shape[0] / batch_size | |
96 n_test_batches = test_set_x.value.shape[0] / batch_size | |
97 | |
98 | |
99 ###################### | |
100 # BUILD ACTUAL MODEL # | |
101 ###################### | |
102 print '... building the model' | |
103 | |
104 | |
105 # allocate symbolic variables for the data | |
106 index = T.lscalar() # index to a [mini]batch | |
107 x = T.matrix('x') # the data is presented as rasterized images | |
108 y = T.ivector('y') # the labels are presented as 1D vector of | |
109 # [int] labels | |
110 | |
111 # construct the logistic regression class | |
112 # Each MNIST image has size 28*28 | |
113 classifier = LogisticRegression( input=x, n_in=28*28, n_out=10) | |
114 | |
115 # the cost we minimize during training is the negative log likelihood of | |
116 # the model in symbolic format | |
117 cost = classifier.negative_log_likelihood(y) | |
118 | |
119 # compiling a Theano function that computes the mistakes that are made by | |
120 # the model on a minibatch | |
121 test_model = theano.function(inputs = [index], | |
122 outputs = classifier.errors(y), | |
123 givens={ | |
124 x:test_set_x[index*batch_size:(index+1)*batch_size], | |
125 y:test_set_y[index*batch_size:(index+1)*batch_size]}) | |
126 | |
127 validate_model = theano.function( inputs = [index], | |
128 outputs = classifier.errors(y), | |
129 givens={ | |
130 x:valid_set_x[index*batch_size:(index+1)*batch_size], | |
131 y:valid_set_y[index*batch_size:(index+1)*batch_size]}) | |
132 | |
133 # compute the gradient of cost with respect to theta = (W,b) | |
134 g_W = T.grad(cost = cost, wrt = classifier.W) | |
135 g_b = T.grad(cost = cost, wrt = classifier.b) | |
136 | |
137 # specify how to update the parameters of the model as a dictionary | |
138 updates ={classifier.W: classifier.W - learning_rate*g_W,\ | |
139 classifier.b: classifier.b - learning_rate*g_b} | |
140 | |
141 # compiling a Theano function `train_model` that returns the cost, but in | |
142 # the same time updates the parameter of the model based on the rules | |
143 # defined in `updates` | |
144 train_model = theano.function(inputs = [index], | |
145 outputs = cost, | |
146 updates = updates, | |
147 givens={ | |
148 x:train_set_x[index*batch_size:(index+1)*batch_size], | |
149 y:train_set_y[index*batch_size:(index+1)*batch_size]}) | |
150 | |
151 ############### | |
152 # TRAIN MODEL # | |
153 ############### | |
154 print '... training the model' | |
155 # early-stopping parameters | |
156 patience = 5000 # look as this many examples regardless | |
157 patience_increase = 2 # wait this much longer when a new best is | |
158 # found | |
159 improvement_threshold = 0.995 # a relative improvement of this much is | |
160 # considered significant | |
161 validation_frequency = min(n_train_batches, patience/2) | |
162 # go through this many | |
163 # minibatche before checking the network | |
164 # on the validation set; in this case we | |
165 # check every epoch | |
166 | |
167 best_params = None | |
168 best_validation_loss = float('inf') | |
169 test_score = 0. | |
170 start_time = time.clock() | |
171 | |
172 done_looping = False | |
173 epoch = 0 | |
174 while (epoch < n_epochs) and (not done_looping): | |
175 epoch = epoch + 1 | |
176 for minibatch_index in xrange(n_train_batches): | |
177 | |
178 minibatch_avg_cost = train_model(minibatch_index) | |
179 # iteration number | |
180 iter = epoch * n_train_batches + minibatch_index | |
181 | |
182 if (iter+1) % validation_frequency == 0: | |
183 # compute zero-one loss on validation set | |
184 validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] | |
185 this_validation_loss = numpy.mean(validation_losses) | |
186 | |
187 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ | |
188 (epoch, minibatch_index+1,n_train_batches, \ | |
189 this_validation_loss*100.)) | |
190 | |
191 | |
192 # if we got the best validation score until now | |
193 if this_validation_loss < best_validation_loss: | |
194 #improve patience if loss improvement is good enough | |
195 if this_validation_loss < best_validation_loss * \ | |
196 improvement_threshold : | |
197 patience = max(patience, iter * patience_increase) | |
198 | |
199 best_validation_loss = this_validation_loss | |
200 # test it on the test set | |
201 | |
202 test_losses = [test_model(i) for i in xrange(n_test_batches)] | |
203 test_score = numpy.mean(test_losses) | |
204 | |
205 print((' epoch %i, minibatch %i/%i, test error of best ' | |
206 'model %f %%') % \ | |
207 (epoch, minibatch_index+1, n_train_batches,test_score*100.)) | |
208 | |
209 if patience <= iter : | |
210 done_looping = True | |
211 break | |
212 | |
213 end_time = time.clock() | |
214 print(('Optimization complete with best validation score of %f %%,' | |
215 'with test performance %f %%') % | |
216 (best_validation_loss * 100., test_score*100.)) | |
217 print 'The code run for %d epochs, with %f epochs/sec'%(epoch,1.*epoch/(end_time-start_time)) | |
218 print >> sys.stderr, ('The code for file '+os.path.split(__file__)[1]+' ran for %.1fs' % ((end_time-start_time))) | |
219 | |
220 if __name__ == '__main__': | |
221 sgd_optimization_mnist() | |
222 | |
223 |