comparison deep/deep_mlp/mlp.py @ 626:75dbbe409578

Added code for deep mlp, experiment code to go along with it. Also added code I used to filter the P07 / PNIST07 datasets to keep only digits.
author fsavard
date Wed, 16 Mar 2011 13:43:32 -0400
parents
children
comparison
equal deleted inserted replaced
625:128bc92897f2 626:75dbbe409578
1 __docformat__ = 'restructedtext en'
2
3 import numpy, time, cPickle, gzip, sys, os
4
5 import theano
6 import theano.tensor as T
7
8 from logistic_sgd import LogisticRegression, load_data
9
10 class HiddenLayer(object):
11 def __init__(self, rng, input, n_in, n_out, activation = T.tanh):
12 print "Creating HiddenLayer with params"
13 print locals()
14
15 self.input = input
16
17 W_values = numpy.asarray( rng.uniform(
18 low = - numpy.sqrt(6./(n_in+n_out)),
19 high = numpy.sqrt(6./(n_in+n_out)),
20 size = (n_in, n_out)), dtype = theano.config.floatX)
21 if activation == theano.tensor.nnet.sigmoid:
22 W_values *= 4
23
24 self.W = theano.shared(value = W_values, name ='W')
25
26 b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
27 self.b = theano.shared(value= b_values, name ='b')
28
29 self.output = activation(T.dot(input, self.W) + self.b)
30
31 self.params = [self.W, self.b]
32
33
34 class MLP(object):
35 def __init__(self, rng, input, n_in, n_hidden_layers, n_hidden, n_out):
36 print "Creating MLP with params"
37 print locals()
38
39 self.input = input
40
41 self.hiddenLayers = []
42
43 last_input = input
44 last_n_out = n_in
45 for i in range(n_hidden_layers):
46 self.hiddenLayers.append(\
47 HiddenLayer(rng = rng, input = last_input,
48 n_in = last_n_out,
49 n_out = n_hidden,
50 activation = T.tanh))
51 last_input = self.hiddenLayers[-1].output
52 last_n_out = n_hidden
53
54 self.logRegressionLayer = LogisticRegression(
55 input = self.hiddenLayers[-1].output,
56 n_in = n_hidden,
57 n_out = n_out)
58
59 self.L1 = abs(self.logRegressionLayer.W).sum()
60 for h in self.hiddenLayers:
61 self.L1 += abs(h.W).sum()
62
63 self.L2_sqr = (self.logRegressionLayer.W**2).sum()
64 for h in self.hiddenLayers:
65 self.L2_sqr += (h.W**2).sum()
66
67 self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
68
69 self.errors = self.logRegressionLayer.errors
70
71 self.params = []
72 for hl in self.hiddenLayers:
73 self.params += hl.params
74 self.params += self.logRegressionLayer.params
75
76
77 def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
78 dataset = '../data/mnist.pkl.gz', batch_size = 20):
79 datasets = load_data(dataset)
80
81 train_set_x, train_set_y = datasets[0]
82 valid_set_x, valid_set_y = datasets[1]
83 test_set_x , test_set_y = datasets[2]
84
85 n_train_batches = train_set_x.value.shape[0] / batch_size
86 n_valid_batches = valid_set_x.value.shape[0] / batch_size
87 n_test_batches = test_set_x.value.shape[0] / batch_size
88
89 ######################
90 # BUILD ACTUAL MODEL #
91 ######################
92 print '... building the model'
93
94 # allocate symbolic variables for the data
95 index = T.lscalar() # index to a [mini]batch
96 x = T.matrix('x') # the data is presented as rasterized images
97 y = T.ivector('y') # the labels are presented as 1D vector of
98 # [int] labels
99
100 rng = numpy.random.RandomState(1234)
101
102 # construct the MLP class
103 classifier = MLP( rng = rng, input=x, n_in=28*28, n_hidden = 500, n_out=10)
104
105 # the cost we minimize during training is the negative log likelihood of
106 # the model plus the regularization terms (L1 and L2); cost is expressed
107 # here symbolically
108 cost = classifier.negative_log_likelihood(y) \
109 + L1_reg * classifier.L1 \
110 + L2_reg * classifier.L2_sqr
111
112 # compiling a Theano function that computes the mistakes that are made
113 # by the model on a minibatch
114 test_model = theano.function(inputs = [index],
115 outputs = classifier.errors(y),
116 givens={
117 x:test_set_x[index*batch_size:(index+1)*batch_size],
118 y:test_set_y[index*batch_size:(index+1)*batch_size]})
119
120 validate_model = theano.function(inputs = [index],
121 outputs = classifier.errors(y),
122 givens={
123 x:valid_set_x[index*batch_size:(index+1)*batch_size],
124 y:valid_set_y[index*batch_size:(index+1)*batch_size]})
125
126 # compute the gradient of cost with respect to theta (sotred in params)
127 # the resulting gradients will be stored in a list gparams
128 gparams = []
129 for param in classifier.params:
130 gparam = T.grad(cost, param)
131 gparams.append(gparam)
132
133
134 # specify how to update the parameters of the model as a dictionary
135 updates = {}
136 # given two list the zip A = [ a1,a2,a3,a4] and B = [b1,b2,b3,b4] of
137 # same length, zip generates a list C of same size, where each element
138 # is a pair formed from the two lists :
139 # C = [ (a1,b1), (a2,b2), (a3,b3) , (a4,b4) ]
140 for param, gparam in zip(classifier.params, gparams):
141 updates[param] = param - learning_rate*gparam
142
143 # compiling a Theano function `train_model` that returns the cost, but
144 # in the same time updates the parameter of the model based on the rules
145 # defined in `updates`
146 train_model =theano.function( inputs = [index], outputs = cost,
147 updates = updates,
148 givens={
149 x:train_set_x[index*batch_size:(index+1)*batch_size],
150 y:train_set_y[index*batch_size:(index+1)*batch_size]})
151
152 ###############
153 # TRAIN MODEL #
154 ###############
155 print '... training'
156
157 # early-stopping parameters
158 patience = 10000 # look as this many examples regardless
159 patience_increase = 2 # wait this much longer when a new best is
160 # found
161 improvement_threshold = 0.995 # a relative improvement of this much is
162 # considered significant
163 validation_frequency = min(n_train_batches,patience/2)
164 # go through this many
165 # minibatche before checking the network
166 # on the validation set; in this case we
167 # check every epoch
168
169
170 best_params = None
171 best_validation_loss = float('inf')
172 best_iter = 0
173 test_score = 0.
174 start_time = time.clock()
175
176 epoch = 0
177 done_looping = False
178
179 while (epoch < n_epochs) and (not done_looping):
180 epoch = epoch + 1
181 for minibatch_index in xrange(n_train_batches):
182
183 minibatch_avg_cost = train_model(minibatch_index)
184 # iteration number
185 iter = epoch * n_train_batches + minibatch_index
186
187 if (iter+1) % validation_frequency == 0:
188 # compute zero-one loss on validation set
189 validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
190 this_validation_loss = numpy.mean(validation_losses)
191
192 print('epoch %i, minibatch %i/%i, validation error %f %%' % \
193 (epoch, minibatch_index+1,n_train_batches, \
194 this_validation_loss*100.))
195
196
197 # if we got the best validation score until now
198 if this_validation_loss < best_validation_loss:
199 #improve patience if loss improvement is good enough
200 if this_validation_loss < best_validation_loss * \
201 improvement_threshold :
202 patience = max(patience, iter * patience_increase)
203
204 best_validation_loss = this_validation_loss
205 # test it on the test set
206
207 test_losses = [test_model(i) for i in xrange(n_test_batches)]
208 test_score = numpy.mean(test_losses)
209
210