Mercurial > ift6266
comparison deep/deep_mlp/mlp.py @ 626:75dbbe409578
Added code for deep mlp, experiment code to go along with it. Also added code I used to filter the P07 / PNIST07 datasets to keep only digits.
author | fsavard |
---|---|
date | Wed, 16 Mar 2011 13:43:32 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
625:128bc92897f2 | 626:75dbbe409578 |
---|---|
1 __docformat__ = 'restructedtext en' | |
2 | |
3 import numpy, time, cPickle, gzip, sys, os | |
4 | |
5 import theano | |
6 import theano.tensor as T | |
7 | |
8 from logistic_sgd import LogisticRegression, load_data | |
9 | |
10 class HiddenLayer(object): | |
11 def __init__(self, rng, input, n_in, n_out, activation = T.tanh): | |
12 print "Creating HiddenLayer with params" | |
13 print locals() | |
14 | |
15 self.input = input | |
16 | |
17 W_values = numpy.asarray( rng.uniform( | |
18 low = - numpy.sqrt(6./(n_in+n_out)), | |
19 high = numpy.sqrt(6./(n_in+n_out)), | |
20 size = (n_in, n_out)), dtype = theano.config.floatX) | |
21 if activation == theano.tensor.nnet.sigmoid: | |
22 W_values *= 4 | |
23 | |
24 self.W = theano.shared(value = W_values, name ='W') | |
25 | |
26 b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) | |
27 self.b = theano.shared(value= b_values, name ='b') | |
28 | |
29 self.output = activation(T.dot(input, self.W) + self.b) | |
30 | |
31 self.params = [self.W, self.b] | |
32 | |
33 | |
34 class MLP(object): | |
35 def __init__(self, rng, input, n_in, n_hidden_layers, n_hidden, n_out): | |
36 print "Creating MLP with params" | |
37 print locals() | |
38 | |
39 self.input = input | |
40 | |
41 self.hiddenLayers = [] | |
42 | |
43 last_input = input | |
44 last_n_out = n_in | |
45 for i in range(n_hidden_layers): | |
46 self.hiddenLayers.append(\ | |
47 HiddenLayer(rng = rng, input = last_input, | |
48 n_in = last_n_out, | |
49 n_out = n_hidden, | |
50 activation = T.tanh)) | |
51 last_input = self.hiddenLayers[-1].output | |
52 last_n_out = n_hidden | |
53 | |
54 self.logRegressionLayer = LogisticRegression( | |
55 input = self.hiddenLayers[-1].output, | |
56 n_in = n_hidden, | |
57 n_out = n_out) | |
58 | |
59 self.L1 = abs(self.logRegressionLayer.W).sum() | |
60 for h in self.hiddenLayers: | |
61 self.L1 += abs(h.W).sum() | |
62 | |
63 self.L2_sqr = (self.logRegressionLayer.W**2).sum() | |
64 for h in self.hiddenLayers: | |
65 self.L2_sqr += (h.W**2).sum() | |
66 | |
67 self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood | |
68 | |
69 self.errors = self.logRegressionLayer.errors | |
70 | |
71 self.params = [] | |
72 for hl in self.hiddenLayers: | |
73 self.params += hl.params | |
74 self.params += self.logRegressionLayer.params | |
75 | |
76 | |
77 def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000, | |
78 dataset = '../data/mnist.pkl.gz', batch_size = 20): | |
79 datasets = load_data(dataset) | |
80 | |
81 train_set_x, train_set_y = datasets[0] | |
82 valid_set_x, valid_set_y = datasets[1] | |
83 test_set_x , test_set_y = datasets[2] | |
84 | |
85 n_train_batches = train_set_x.value.shape[0] / batch_size | |
86 n_valid_batches = valid_set_x.value.shape[0] / batch_size | |
87 n_test_batches = test_set_x.value.shape[0] / batch_size | |
88 | |
89 ###################### | |
90 # BUILD ACTUAL MODEL # | |
91 ###################### | |
92 print '... building the model' | |
93 | |
94 # allocate symbolic variables for the data | |
95 index = T.lscalar() # index to a [mini]batch | |
96 x = T.matrix('x') # the data is presented as rasterized images | |
97 y = T.ivector('y') # the labels are presented as 1D vector of | |
98 # [int] labels | |
99 | |
100 rng = numpy.random.RandomState(1234) | |
101 | |
102 # construct the MLP class | |
103 classifier = MLP( rng = rng, input=x, n_in=28*28, n_hidden = 500, n_out=10) | |
104 | |
105 # the cost we minimize during training is the negative log likelihood of | |
106 # the model plus the regularization terms (L1 and L2); cost is expressed | |
107 # here symbolically | |
108 cost = classifier.negative_log_likelihood(y) \ | |
109 + L1_reg * classifier.L1 \ | |
110 + L2_reg * classifier.L2_sqr | |
111 | |
112 # compiling a Theano function that computes the mistakes that are made | |
113 # by the model on a minibatch | |
114 test_model = theano.function(inputs = [index], | |
115 outputs = classifier.errors(y), | |
116 givens={ | |
117 x:test_set_x[index*batch_size:(index+1)*batch_size], | |
118 y:test_set_y[index*batch_size:(index+1)*batch_size]}) | |
119 | |
120 validate_model = theano.function(inputs = [index], | |
121 outputs = classifier.errors(y), | |
122 givens={ | |
123 x:valid_set_x[index*batch_size:(index+1)*batch_size], | |
124 y:valid_set_y[index*batch_size:(index+1)*batch_size]}) | |
125 | |
126 # compute the gradient of cost with respect to theta (sotred in params) | |
127 # the resulting gradients will be stored in a list gparams | |
128 gparams = [] | |
129 for param in classifier.params: | |
130 gparam = T.grad(cost, param) | |
131 gparams.append(gparam) | |
132 | |
133 | |
134 # specify how to update the parameters of the model as a dictionary | |
135 updates = {} | |
136 # given two list the zip A = [ a1,a2,a3,a4] and B = [b1,b2,b3,b4] of | |
137 # same length, zip generates a list C of same size, where each element | |
138 # is a pair formed from the two lists : | |
139 # C = [ (a1,b1), (a2,b2), (a3,b3) , (a4,b4) ] | |
140 for param, gparam in zip(classifier.params, gparams): | |
141 updates[param] = param - learning_rate*gparam | |
142 | |
143 # compiling a Theano function `train_model` that returns the cost, but | |
144 # in the same time updates the parameter of the model based on the rules | |
145 # defined in `updates` | |
146 train_model =theano.function( inputs = [index], outputs = cost, | |
147 updates = updates, | |
148 givens={ | |
149 x:train_set_x[index*batch_size:(index+1)*batch_size], | |
150 y:train_set_y[index*batch_size:(index+1)*batch_size]}) | |
151 | |
152 ############### | |
153 # TRAIN MODEL # | |
154 ############### | |
155 print '... training' | |
156 | |
157 # early-stopping parameters | |
158 patience = 10000 # look as this many examples regardless | |
159 patience_increase = 2 # wait this much longer when a new best is | |
160 # found | |
161 improvement_threshold = 0.995 # a relative improvement of this much is | |
162 # considered significant | |
163 validation_frequency = min(n_train_batches,patience/2) | |
164 # go through this many | |
165 # minibatche before checking the network | |
166 # on the validation set; in this case we | |
167 # check every epoch | |
168 | |
169 | |
170 best_params = None | |
171 best_validation_loss = float('inf') | |
172 best_iter = 0 | |
173 test_score = 0. | |
174 start_time = time.clock() | |
175 | |
176 epoch = 0 | |
177 done_looping = False | |
178 | |
179 while (epoch < n_epochs) and (not done_looping): | |
180 epoch = epoch + 1 | |
181 for minibatch_index in xrange(n_train_batches): | |
182 | |
183 minibatch_avg_cost = train_model(minibatch_index) | |
184 # iteration number | |
185 iter = epoch * n_train_batches + minibatch_index | |
186 | |
187 if (iter+1) % validation_frequency == 0: | |
188 # compute zero-one loss on validation set | |
189 validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] | |
190 this_validation_loss = numpy.mean(validation_losses) | |
191 | |
192 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ | |
193 (epoch, minibatch_index+1,n_train_batches, \ | |
194 this_validation_loss*100.)) | |
195 | |
196 | |
197 # if we got the best validation score until now | |
198 if this_validation_loss < best_validation_loss: | |
199 #improve patience if loss improvement is good enough | |
200 if this_validation_loss < best_validation_loss * \ | |
201 improvement_threshold : | |
202 patience = max(patience, iter * patience_increase) | |
203 | |
204 best_validation_loss = this_validation_loss | |
205 # test it on the test set | |
206 | |
207 test_losses = [test_model(i) for i in xrange(n_test_batches)] | |
208 test_score = numpy.mean(test_losses) | |
209 | |
210 |