comparison deep/stacked_dae/old/sgd_optimization.py @ 265:c8fe09a65039

Déplacer le nouveau code de stacked_dae de v2 vers le répertoire de base 'stacked_dae', et bougé le vieux code vers le répertoire 'old'
author fsavard
date Fri, 19 Mar 2010 10:54:39 -0400
parents deep/stacked_dae/sgd_optimization.py@acb942530923
children
comparison
equal deleted inserted replaced
243:3c54cb3713ef 265:c8fe09a65039
1 #!/usr/bin/python
2 # coding: utf-8
3
4 # Generic SdA optimization loop, adapted from the deeplearning.net tutorial
5
6 import numpy
7 import theano
8 import time
9 import datetime
10 import theano.tensor as T
11 import sys
12
13 from jobman import DD
14 import jobman, jobman.sql
15
16 from stacked_dae import SdA
17
18 def shared_dataset(data_xy):
19 data_x, data_y = data_xy
20 #shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
21 #shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
22 #shared_y = T.cast(shared_y, 'int32')
23 shared_x = theano.shared(data_x)
24 shared_y = theano.shared(data_y)
25 return shared_x, shared_y
26
27 class DummyMux():
28 def append(self, param1, param2):
29 pass
30
31 class SdaSgdOptimizer:
32 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None):
33 self.dataset = dataset
34 self.hp = hyperparameters
35 self.n_ins = n_ins
36 self.n_outs = n_outs
37 self.input_divider = input_divider
38
39 if not series_mux:
40 series_mux = DummyMux()
41 print "No series multiplexer set"
42 self.series_mux = series_mux
43
44 self.rng = numpy.random.RandomState(1234)
45
46 self.init_datasets()
47 self.init_classifier()
48
49 sys.stdout.flush()
50
51 def init_datasets(self):
52 print "init_datasets"
53 sys.stdout.flush()
54
55 train_set, valid_set, test_set = self.dataset
56 self.test_set_x, self.test_set_y = shared_dataset(test_set)
57 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
58 self.train_set_x, self.train_set_y = shared_dataset(train_set)
59
60 # compute number of minibatches for training, validation and testing
61 self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
62 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
63 # remove last batch in case it's incomplete
64 self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1
65
66 def init_classifier(self):
67 print "Constructing classifier"
68
69 # we don't want to save arrays in DD objects, so
70 # we recreate those arrays here
71 nhl = self.hp.num_hidden_layers
72 layers_sizes = [self.hp.hidden_layers_sizes] * nhl
73 corruption_levels = [self.hp.corruption_levels] * nhl
74
75 # construct the stacked denoising autoencoder class
76 self.classifier = SdA( \
77 train_set_x= self.train_set_x, \
78 train_set_y = self.train_set_y,\
79 batch_size = self.hp.minibatch_size, \
80 n_ins= self.n_ins, \
81 hidden_layers_sizes = layers_sizes, \
82 n_outs = self.n_outs, \
83 corruption_levels = corruption_levels,\
84 rng = self.rng,\
85 pretrain_lr = self.hp.pretraining_lr, \
86 finetune_lr = self.hp.finetuning_lr,\
87 input_divider = self.input_divider )
88
89 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
90
91 sys.stdout.flush()
92
93 def train(self):
94 self.pretrain()
95 self.finetune()
96
97 def pretrain(self):
98 print "STARTING PRETRAINING, time = ", datetime.datetime.now()
99 sys.stdout.flush()
100
101 #time_acc_func = 0.0
102 #time_acc_total = 0.0
103
104 start_time = time.clock()
105 ## Pre-train layer-wise
106 for i in xrange(self.classifier.n_layers):
107 # go through pretraining epochs
108 for epoch in xrange(self.hp.pretraining_epochs_per_layer):
109 # go through the training set
110 for batch_index in xrange(self.n_train_batches):
111 #t1 = time.clock()
112 c = self.classifier.pretrain_functions[i](batch_index)
113 #t2 = time.clock()
114
115 #time_acc_func += t2 - t1
116
117 #if batch_index % 500 == 0:
118 # print "acc / total", time_acc_func / (t2 - start_time), time_acc_func
119
120 self.series_mux.append("reconstruction_error", c)
121
122 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
123 sys.stdout.flush()
124
125 self.series_mux.append("params", self.classifier.all_params)
126
127 end_time = time.clock()
128
129 print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
130 self.hp.update({'pretraining_time': end_time-start_time})
131
132 sys.stdout.flush()
133
134 def finetune(self):
135 print "STARTING FINETUNING, time = ", datetime.datetime.now()
136
137 index = T.lscalar() # index to a [mini]batch
138 minibatch_size = self.hp.minibatch_size
139
140 # create a function to compute the mistakes that are made by the model
141 # on the validation set, or testing set
142 shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX))
143 test_model = theano.function([index], self.classifier.errors,
144 givens = {
145 self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
146 self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
147
148 validate_model = theano.function([index], self.classifier.errors,
149 givens = {
150 self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
151 self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
152
153
154 # early-stopping parameters
155 patience = 10000 # look as this many examples regardless
156 patience_increase = 2. # wait this much longer when a new best is
157 # found
158 improvement_threshold = 0.995 # a relative improvement of this much is
159 # considered significant
160 validation_frequency = min(self.n_train_batches, patience/2)
161 # go through this many
162 # minibatche before checking the network
163 # on the validation set; in this case we
164 # check every epoch
165
166 best_params = None
167 best_validation_loss = float('inf')
168 test_score = 0.
169 start_time = time.clock()
170
171 done_looping = False
172 epoch = 0
173
174 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
175 epoch = epoch + 1
176 for minibatch_index in xrange(self.n_train_batches):
177
178 cost_ij = self.classifier.finetune(minibatch_index)
179 iter = epoch * self.n_train_batches + minibatch_index
180
181 self.series_mux.append("training_error", cost_ij)
182
183 if (iter+1) % validation_frequency == 0:
184
185 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
186 this_validation_loss = numpy.mean(validation_losses)
187 self.series_mux.append("validation_error", this_validation_loss)
188 print('epoch %i, minibatch %i/%i, validation error %f %%' % \
189 (epoch, minibatch_index+1, self.n_train_batches, \
190 this_validation_loss*100.))
191
192
193 # if we got the best validation score until now
194 if this_validation_loss < best_validation_loss:
195
196 #improve patience if loss improvement is good enough
197 if this_validation_loss < best_validation_loss * \
198 improvement_threshold :
199 patience = max(patience, iter * patience_increase)
200
201 # save best validation score and iteration number
202 best_validation_loss = this_validation_loss
203 best_iter = iter
204
205 # test it on the test set
206 test_losses = [test_model(i) for i in xrange(self.n_test_batches)]
207 test_score = numpy.mean(test_losses)
208 self.series_mux.append("test_error", test_score)
209 print((' epoch %i, minibatch %i/%i, test error of best '
210 'model %f %%') %
211 (epoch, minibatch_index+1, self.n_train_batches,
212 test_score*100.))
213
214 sys.stdout.flush()
215
216 self.series_mux.append("params", self.classifier.all_params)
217
218 if patience <= iter :
219 done_looping = True
220 break
221
222 end_time = time.clock()
223 self.hp.update({'finetuning_time':end_time-start_time,\
224 'best_validation_error':best_validation_loss,\
225 'test_score':test_score,
226 'num_finetuning_epochs':epoch})
227
228 print(('Optimization complete with best validation score of %f %%,'
229 'with test performance %f %%') %
230 (best_validation_loss * 100., test_score*100.))
231 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
232
233
234