Mercurial > ift6266
annotate deep/autoencoder/DA_training.py @ 501:5927432d8b8d
-
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Tue, 01 Jun 2010 12:28:05 -0400 |
parents | e12702b88a2d |
children |
rev | line source |
---|---|
190 | 1 """ |
2 This tutorial introduces stacked denoising auto-encoders (SdA) using Theano. | |
3 | |
4 Denoising autoencoders are the building blocks for SDAE. | |
5 They are based on auto-encoders as the ones used in Bengio et al. 2007. | |
6 An autoencoder takes an input x and first maps it to a hidden representation | |
7 y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting | |
8 latent representation y is then mapped back to a "reconstructed" vector | |
9 z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight | |
10 matrix W' can optionally be constrained such that W' = W^T, in which case | |
11 the autoencoder is said to have tied weights. The network is trained such | |
12 that to minimize the reconstruction error (the error between x and z). | |
13 | |
14 For the denosing autoencoder, during training, first x is corrupted into | |
15 \tilde{x}, where \tilde{x} is a partially destroyed version of x by means | |
16 of a stochastic mapping. Afterwards y is computed as before (using | |
17 \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction | |
18 error is now measured between z and the uncorrupted input x, which is | |
19 computed as the cross-entropy : | |
20 - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)] | |
21 | |
22 For X iteration of the main program loop it takes *** minutes on an | |
23 Intel Core i7 and *** minutes on GPU (NVIDIA GTX 285 graphics processor). | |
24 | |
25 | |
26 References : | |
27 - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and | |
28 Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, | |
29 2008 | |
30 - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise | |
31 Training of Deep Networks, Advances in Neural Information Processing | |
32 Systems 19, 2007 | |
33 | |
34 """ | |
35 | |
36 import numpy | |
37 import theano | |
38 import time | |
39 import theano.tensor as T | |
40 from theano.tensor.shared_randomstreams import RandomStreams | |
41 | |
42 import gzip | |
43 import cPickle | |
44 | |
45 from pylearn.io import filetensor as ft | |
46 | |
47 class dA(): | |
48 """Denoising Auto-Encoder class (dA) | |
49 | |
50 A denoising autoencoders tries to reconstruct the input from a corrupted | |
51 version of it by projecting it first in a latent space and reprojecting | |
52 it afterwards back in the input space. Please refer to Vincent et al.,2008 | |
53 for more details. If x is the input then equation (1) computes a partially | |
54 destroyed version of x by means of a stochastic mapping q_D. Equation (2) | |
55 computes the projection of the input into the latent space. Equation (3) | |
56 computes the reconstruction of the input, while equation (4) computes the | |
57 reconstruction error. | |
58 | |
59 .. math:: | |
60 | |
61 \tilde{x} ~ q_D(\tilde{x}|x) (1) | |
62 | |
63 y = s(W \tilde{x} + b) (2) | |
64 | |
65 z = s(W' y + b') (3) | |
66 | |
67 L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4) | |
68 | |
69 """ | |
70 | |
71 def __init__(self, n_visible= 784, n_hidden= 500, complexity = 0.1, input= None): | |
72 """ | |
73 Initialize the DAE class by specifying the number of visible units (the | |
74 dimension d of the input ), the number of hidden units ( the dimension | |
75 d' of the latent or hidden space ) and by giving a symbolic variable | |
76 for the input. Such a symbolic variable is useful when the input is | |
77 the result of some computations. For example when dealing with SDAEs, | |
78 the dA on layer 2 gets as input the output of the DAE on layer 1. | |
79 This output can be written as a function of the input to the entire | |
80 model, and as such can be computed by theano whenever needed. | |
81 | |
82 :param n_visible: number of visible units | |
83 | |
84 :param n_hidden: number of hidden units | |
85 | |
86 :param input: a symbolic description of the input or None | |
87 | |
88 """ | |
89 self.n_visible = n_visible | |
90 self.n_hidden = n_hidden | |
91 | |
92 # create a Theano random generator that gives symbolic random values | |
93 theano_rng = RandomStreams() | |
94 # create a numpy random generator | |
95 numpy_rng = numpy.random.RandomState() | |
206
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
96 |
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
97 # print the parameter of the DA |
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
98 if True : |
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
99 print 'input size = %d' %n_visible |
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
100 print 'hidden size = %d' %n_hidden |
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
101 print 'complexity = %2.2f' %complexity |
190 | 102 |
103 # initial values for weights and biases | |
104 # note : W' was written as `W_prime` and b' as `b_prime` | |
105 | |
106 # W is initialized with `initial_W` which is uniformely sampled | |
107 # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible) | |
108 # the output of uniform if converted using asarray to dtype | |
109 # theano.config.floatX so that the code is runable on GPU | |
110 initial_W = numpy.asarray( numpy.random.uniform( \ | |
111 low = -numpy.sqrt(6./(n_visible+n_hidden)), \ | |
112 high = numpy.sqrt(6./(n_visible+n_hidden)), \ | |
113 size = (n_visible, n_hidden)), dtype = theano.config.floatX) | |
114 initial_b = numpy.zeros(n_hidden) | |
115 | |
116 # W' is initialized with `initial_W_prime` which is uniformely sampled | |
117 # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible) | |
118 # the output of uniform if converted using asarray to dtype | |
119 # theano.config.floatX so that the code is runable on GPU | |
120 initial_b_prime= numpy.zeros(n_visible) | |
121 | |
122 | |
123 # theano shared variables for weights and biases | |
124 self.W = theano.shared(value = initial_W, name = "W") | |
125 self.b = theano.shared(value = initial_b, name = "b") | |
126 # tied weights, therefore W_prime is W transpose | |
127 self.W_prime = self.W.T | |
128 self.b_prime = theano.shared(value = initial_b_prime, name = "b'") | |
129 | |
130 # if no input is given, generate a variable representing the input | |
131 if input == None : | |
132 # we use a matrix because we expect a minibatch of several examples, | |
133 # each example being a row | |
134 x = T.dmatrix(name = 'input') | |
135 else: | |
136 x = input | |
137 # Equation (1) | |
138 # note : first argument of theano.rng.binomial is the shape(size) of | |
139 # random numbers that it should produce | |
140 # second argument is the number of trials | |
141 # third argument is the probability of success of any trial | |
142 # | |
143 # this will produce an array of 0s and 1s where 1 has a | |
144 # probability of 0.9 and 0 of 0.1 | |
145 | |
146 tilde_x = theano_rng.binomial( x.shape, 1, 1-complexity) * x | |
147 # Equation (2) | |
148 # note : y is stored as an attribute of the class so that it can be | |
149 # used later when stacking dAs. | |
150 self.y = T.nnet.sigmoid(T.dot(tilde_x, self.W ) + self.b) | |
151 # Equation (3) | |
152 z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) | |
153 # Equation (4) | |
154 self.L = - T.sum( x*T.log(z) + (1-x)*T.log(1-z), axis=1 ) | |
155 # note : L is now a vector, where each element is the cross-entropy cost | |
156 # of the reconstruction of the corresponding example of the | |
157 # minibatch. We need to compute the average of all these to get | |
158 # the cost of the minibatch | |
159 self.cost = T.mean(self.L) | |
160 # note : y is computed from the corrupted `tilde_x`. Later on, | |
161 # we will need the hidden layer obtained from the uncorrupted | |
162 # input when for example we will pass this as input to the layer | |
163 # above | |
164 self.hidden_values = T.nnet.sigmoid( T.dot(x, self.W) + self.b) | |
165 | |
166 | |
167 | |
168 def sgd_optimization_nist( learning_rate=0.01, \ | |
169 n_iter = 300, n_code_layer = 400, \ | |
170 complexity = 0.1): | |
171 """ | |
172 Demonstrate stochastic gradient descent optimization for a denoising autoencoder | |
173 | |
174 This is demonstrated on MNIST. | |
175 | |
176 :param learning_rate: learning rate used (factor for the stochastic | |
177 gradient | |
178 | |
179 :param pretraining_epochs: number of epoch to do pretraining | |
180 | |
181 :param pretrain_lr: learning rate to be used during pre-training | |
182 | |
183 :param n_iter: maximal number of iterations ot run the optimizer | |
184 | |
185 """ | |
186 #open file to save the validation and test curve | |
187 filename = 'lr_' + str(learning_rate) + 'ni_' + str(n_iter) + 'nc_' + str(n_code_layer) + \ | |
188 'c_' + str(complexity) + '.txt' | |
189 | |
190 result_file = open(filename, 'w') | |
191 | |
192 | |
193 | |
194 data_path = '/data/lisa/data/nist/by_class/' | |
195 f = open(data_path+'all/all_train_data.ft') | |
196 g = open(data_path+'all/all_train_labels.ft') | |
197 h = open(data_path+'all/all_test_data.ft') | |
198 i = open(data_path+'all/all_test_labels.ft') | |
199 | |
200 train_set_x = ft.read(f) | |
201 train_set_y = ft.read(g) | |
202 test_set_x = ft.read(h) | |
203 test_set_y = ft.read(i) | |
204 | |
205 f.close() | |
206 g.close() | |
207 i.close() | |
208 h.close() | |
209 | |
210 # make minibatches of size 20 | |
211 batch_size = 20 # sized of the minibatch | |
212 | |
213 #create a validation set the same size as the test size | |
214 #use the end of the training array for this purpose | |
215 #discard the last remaining so we get a %batch_size number | |
216 test_size=len(test_set_y) | |
217 test_size = int(test_size/batch_size) | |
218 test_size*=batch_size | |
219 train_size = len(train_set_x) | |
220 train_size = int(train_size/batch_size) | |
221 train_size*=batch_size | |
222 validation_size =test_size | |
223 offset = train_size-test_size | |
224 if True: | |
225 print 'train size = %d' %train_size | |
226 print 'test size = %d' %test_size | |
227 print 'valid size = %d' %validation_size | |
228 print 'offset = %d' %offset | |
229 | |
230 | |
231 #train_set = (train_set_x,train_set_y) | |
232 train_batches = [] | |
233 for i in xrange(0, train_size-test_size, batch_size): | |
234 train_batches = train_batches + \ | |
235 [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])] | |
236 | |
237 test_batches = [] | |
238 for i in xrange(0, test_size, batch_size): | |
239 test_batches = test_batches + \ | |
240 [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])] | |
241 | |
242 valid_batches = [] | |
243 for i in xrange(0, test_size, batch_size): | |
244 valid_batches = valid_batches + \ | |
245 [(train_set_x[offset+i:offset+i+batch_size], \ | |
246 train_set_y[offset+i:offset+i+batch_size])] | |
247 | |
248 | |
249 ishape = (32,32) # this is the size of NIST images | |
250 | |
251 # allocate symbolic variables for the data | |
252 x = T.fmatrix() # the data is presented as rasterized images | |
253 y = T.lvector() # the labels are presented as 1D vector of | |
254 # [long int] labels | |
255 | |
256 # construct the denoising autoencoder class | |
257 n_ins = 32*32 | |
206
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
258 encoder = dA(n_ins, n_code_layer, complexity, input = x.reshape((batch_size,n_ins))) |
190 | 259 |
260 # Train autoencoder | |
261 | |
262 # compute gradients of the layer parameters | |
263 gW = T.grad(encoder.cost, encoder.W) | |
264 gb = T.grad(encoder.cost, encoder.b) | |
265 gb_prime = T.grad(encoder.cost, encoder.b_prime) | |
266 # compute the updated value of the parameters after one step | |
267 updated_W = encoder.W - gW * learning_rate | |
268 updated_b = encoder.b - gb * learning_rate | |
269 updated_b_prime = encoder.b_prime - gb_prime * learning_rate | |
270 | |
271 # defining the function that evaluate the symbolic description of | |
272 # one update step | |
273 train_model = theano.function([x], encoder.cost, updates=\ | |
274 { encoder.W : updated_W, \ | |
275 encoder.b : updated_b, \ | |
276 encoder.b_prime : updated_b_prime } ) | |
277 | |
278 | |
279 | |
280 | |
281 # compiling a theano function that computes the mistakes that are made | |
282 # by the model on a minibatch | |
283 test_model = theano.function([x], encoder.cost) | |
284 | |
285 normalize = numpy.asarray(255, dtype=theano.config.floatX) | |
286 | |
287 | |
288 n_minibatches = len(train_batches) | |
289 | |
290 # early-stopping parameters | |
291 patience = 10000000 / batch_size # look as this many examples regardless | |
292 patience_increase = 2 # wait this much longer when a new best is | |
293 # found | |
294 improvement_threshold = 0.995 # a relative improvement of this much is | |
295 # considered significant | |
296 validation_frequency = n_minibatches # go through this many | |
297 # minibatche before checking the network | |
298 # on the validation set; in this case we | |
299 # check every epoch | |
300 | |
301 | |
302 best_params = None | |
303 best_validation_loss = float('inf') | |
304 best_iter = 0 | |
305 test_score = 0. | |
306 start_time = time.clock() | |
307 # have a maximum of `n_iter` iterations through the entire dataset | |
308 for iter in xrange(n_iter* n_minibatches): | |
309 | |
310 # get epoch and minibatch index | |
311 epoch = iter / n_minibatches | |
312 minibatch_index = iter % n_minibatches | |
313 | |
314 # get the minibatches corresponding to `iter` modulo | |
315 # `len(train_batches)` | |
316 x,y = train_batches[ minibatch_index ] | |
317 ''' | |
318 if iter == 0: | |
319 b = numpy.asarray(255, dtype=theano.config.floatX) | |
320 x = x / b | |
321 print x | |
322 print y | |
323 print x.__class__ | |
324 print x.shape | |
325 print x.dtype.name | |
326 print y.dtype.name | |
327 print x.min(), x.max() | |
328 ''' | |
329 | |
330 cost_ij = train_model(x/normalize) | |
331 | |
332 if (iter+1) % validation_frequency == 0: | |
333 # compute zero-one loss on validation set | |
334 this_validation_loss = 0. | |
335 for x,y in valid_batches: | |
336 # sum up the errors for each minibatch | |
337 this_validation_loss += test_model(x/normalize) | |
338 # get the average by dividing with the number of minibatches | |
339 this_validation_loss /= len(valid_batches) | |
340 | |
341 print('epoch %i, minibatch %i/%i, validation error %f ' % \ | |
342 (epoch, minibatch_index+1, n_minibatches, \ | |
343 this_validation_loss)) | |
344 | |
345 # save value in file | |
346 result_file.write(str(epoch) + ' ' + str(this_validation_loss)+ '\n') | |
347 | |
348 | |
349 # if we got the best validation score until now | |
350 if this_validation_loss < best_validation_loss: | |
351 | |
352 #improve patience if loss improvement is good enough | |
353 if this_validation_loss < best_validation_loss * \ | |
354 improvement_threshold : | |
355 patience = max(patience, iter * patience_increase) | |
356 | |
357 best_validation_loss = this_validation_loss | |
358 best_iter = iter | |
359 # test it on the test set | |
360 | |
361 test_score = 0. | |
362 for x,y in test_batches: | |
363 test_score += test_model(x/normalize) | |
364 test_score /= len(test_batches) | |
365 print((' epoch %i, minibatch %i/%i, test error of best ' | |
366 'model %f ') % | |
367 (epoch, minibatch_index+1, n_minibatches, | |
368 test_score)) | |
369 | |
370 if patience <= iter : | |
206
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
371 print('iter (%i) is superior than patience(%i). break', (iter, patience)) |
190 | 372 break |
373 | |
374 | |
375 | |
376 end_time = time.clock() | |
377 print(('Optimization complete with best validation score of %f ,' | |
378 'with test performance %f ') % | |
379 (best_validation_loss, test_score)) | |
380 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) | |
381 | |
382 | |
383 result_file.close() | |
384 | |
385 return (best_validation_loss, test_score, (end_time-start_time)/60, best_iter) | |
386 | |
387 def sgd_optimization_mnist( learning_rate=0.01, \ | |
388 n_iter = 1, n_code_layer = 400, \ | |
389 complexity = 0.1): | |
390 """ | |
391 Demonstrate stochastic gradient descent optimization for a denoising autoencoder | |
392 | |
393 This is demonstrated on MNIST. | |
394 | |
395 :param learning_rate: learning rate used (factor for the stochastic | |
396 gradient | |
397 | |
398 :param pretraining_epochs: number of epoch to do pretraining | |
399 | |
400 :param pretrain_lr: learning rate to be used during pre-training | |
401 | |
402 :param n_iter: maximal number of iterations ot run the optimizer | |
403 | |
404 """ | |
405 #open file to save the validation and test curve | |
406 filename = 'lr_' + str(learning_rate) + 'ni_' + str(n_iter) + 'nc_' + str(n_code_layer) + \ | |
407 'c_' + str(complexity) + '.txt' | |
408 | |
409 result_file = open(filename, 'w') | |
410 | |
411 # Load the dataset | |
412 f = gzip.open('/u/lisa/HTML/deep/data/mnist/mnist.pkl.gz','rb') | |
413 train_set, valid_set, test_set = cPickle.load(f) | |
414 f.close() | |
415 | |
416 # make minibatches of size 20 | |
417 batch_size = 20 # sized of the minibatch | |
418 | |
419 # Dealing with the training set | |
420 # get the list of training images (x) and their labels (y) | |
421 (train_set_x, train_set_y) = train_set | |
422 # initialize the list of training minibatches with empty list | |
423 train_batches = [] | |
424 for i in xrange(0, len(train_set_x), batch_size): | |
425 # add to the list of minibatches the minibatch starting at | |
426 # position i, ending at position i+batch_size | |
427 # a minibatch is a pair ; the first element of the pair is a list | |
428 # of datapoints, the second element is the list of corresponding | |
429 # labels | |
430 train_batches = train_batches + \ | |
431 [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])] | |
432 | |
433 # Dealing with the validation set | |
434 (valid_set_x, valid_set_y) = valid_set | |
435 # initialize the list of validation minibatches | |
436 valid_batches = [] | |
437 for i in xrange(0, len(valid_set_x), batch_size): | |
438 valid_batches = valid_batches + \ | |
439 [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])] | |
440 | |
441 # Dealing with the testing set | |
442 (test_set_x, test_set_y) = test_set | |
443 # initialize the list of testing minibatches | |
444 test_batches = [] | |
445 for i in xrange(0, len(test_set_x), batch_size): | |
446 test_batches = test_batches + \ | |
447 [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])] | |
448 | |
449 | |
450 ishape = (28,28) # this is the size of MNIST images | |
451 | |
452 # allocate symbolic variables for the data | |
453 x = T.fmatrix() # the data is presented as rasterized images | |
454 y = T.lvector() # the labels are presented as 1D vector of | |
455 # [long int] labels | |
456 | |
457 # construct the denoising autoencoder class | |
458 n_ins = 28*28 | |
206
e12702b88a2d
removed one bug: the complexity value was not sent to the dA class
youssouf
parents:
190
diff
changeset
|
459 encoder = dA(n_ins, n_code_layer, complexity, input = x.reshape((batch_size,n_ins))) |
190 | 460 |
461 # Train autoencoder | |
462 | |
463 # compute gradients of the layer parameters | |
464 gW = T.grad(encoder.cost, encoder.W) | |
465 gb = T.grad(encoder.cost, encoder.b) | |
466 gb_prime = T.grad(encoder.cost, encoder.b_prime) | |
467 # compute the updated value of the parameters after one step | |
468 updated_W = encoder.W - gW * learning_rate | |
469 updated_b = encoder.b - gb * learning_rate | |
470 updated_b_prime = encoder.b_prime - gb_prime * learning_rate | |
471 | |
472 # defining the function that evaluate the symbolic description of | |
473 # one update step | |
474 train_model = theano.function([x], encoder.cost, updates=\ | |
475 { encoder.W : updated_W, \ | |
476 encoder.b : updated_b, \ | |
477 encoder.b_prime : updated_b_prime } ) | |
478 | |
479 | |
480 | |
481 | |
482 # compiling a theano function that computes the mistakes that are made | |
483 # by the model on a minibatch | |
484 test_model = theano.function([x], encoder.cost) | |
485 | |
486 | |
487 | |
488 | |
489 n_minibatches = len(train_batches) | |
490 | |
491 # early-stopping parameters | |
492 patience = 10000# look as this many examples regardless | |
493 patience_increase = 2 # wait this much longer when a new best is | |
494 # found | |
495 improvement_threshold = 0.995 # a relative improvement of this much is | |
496 # considered significant | |
497 validation_frequency = n_minibatches # go through this many | |
498 # minibatche before checking the network | |
499 # on the validation set; in this case we | |
500 # check every epoch | |
501 | |
502 | |
503 best_params = None | |
504 best_validation_loss = float('inf') | |
505 best_iter = 0 | |
506 test_score = 0. | |
507 start_time = time.clock() | |
508 # have a maximum of `n_iter` iterations through the entire dataset | |
509 for iter in xrange(n_iter* n_minibatches): | |
510 | |
511 # get epoch and minibatch index | |
512 epoch = iter / n_minibatches | |
513 minibatch_index = iter % n_minibatches | |
514 | |
515 # get the minibatches corresponding to `iter` modulo | |
516 # `len(train_batches)` | |
517 x,y = train_batches[ minibatch_index ] | |
518 cost_ij = train_model(x) | |
519 | |
520 if (iter+1) % validation_frequency == 0: | |
521 # compute zero-one loss on validation set | |
522 this_validation_loss = 0. | |
523 for x,y in valid_batches: | |
524 # sum up the errors for each minibatch | |
525 this_validation_loss += test_model(x) | |
526 # get the average by dividing with the number of minibatches | |
527 this_validation_loss /= len(valid_batches) | |
528 | |
529 print('epoch %i, minibatch %i/%i, validation error %f ' % \ | |
530 (epoch, minibatch_index+1, n_minibatches, \ | |
531 this_validation_loss)) | |
532 | |
533 # save value in file | |
534 result_file.write(str(epoch) + ' ' + str(this_validation_loss)+ '\n') | |
535 | |
536 | |
537 # if we got the best validation score until now | |
538 if this_validation_loss < best_validation_loss: | |
539 | |
540 #improve patience if loss improvement is good enough | |
541 if this_validation_loss < best_validation_loss * \ | |
542 improvement_threshold : | |
543 patience = max(patience, iter * patience_increase) | |
544 | |
545 best_validation_loss = this_validation_loss | |
546 best_iter = iter | |
547 # test it on the test set | |
548 | |
549 test_score = 0. | |
550 for x,y in test_batches: | |
551 test_score += test_model(x) | |
552 test_score /= len(test_batches) | |
553 print((' epoch %i, minibatch %i/%i, test error of best ' | |
554 'model %f ') % | |
555 (epoch, minibatch_index+1, n_minibatches, | |
556 test_score)) | |
557 | |
558 if patience <= iter : | |
559 print('iter (%i) is superior than patience(%i). break', iter, patience) | |
560 break | |
561 | |
562 | |
563 end_time = time.clock() | |
564 print(('Optimization complete with best validation score of %f ,' | |
565 'with test performance %f ') % | |
566 (best_validation_loss, test_score)) | |
567 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) | |
568 | |
569 | |
570 result_file.close() | |
571 | |
572 return (best_validation_loss, test_score, (end_time-start_time)/60, best_iter) | |
573 | |
574 | |
575 def experiment(state,channel): | |
576 | |
577 (best_validation_loss, test_score, minutes_trained, iter) = \ | |
578 sgd_optimization_mnist(state.learning_rate, state.n_iter, state.n_code_layer, | |
579 state.complexity) | |
580 | |
581 state.best_validation_loss = best_validation_loss | |
582 state.test_score = test_score | |
583 state.minutes_trained = minutes_trained | |
584 state.iter = iter | |
585 | |
586 return channel.COMPLETE | |
587 | |
588 def experiment_nist(state,channel): | |
589 | |
590 (best_validation_loss, test_score, minutes_trained, iter) = \ | |
591 sgd_optimization_nist(state.learning_rate, state.n_iter, state.n_code_layer, | |
592 state.complexity) | |
593 | |
594 state.best_validation_loss = best_validation_loss | |
595 state.test_score = test_score | |
596 state.minutes_trained = minutes_trained | |
597 state.iter = iter | |
598 | |
599 return channel.COMPLETE | |
600 | |
601 | |
602 if __name__ == '__main__': | |
603 | |
604 sgd_optimization_nist() | |
605 | |
606 |