Mercurial > ift6266
comparison deep/stacked_dae/v_sylvain/sgd_optimization.py @ 260:0c0f0b3f6a93
branch merge.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Wed, 17 Mar 2010 15:31:21 -0400 |
parents | 7dd43ef66d15 |
children | a0264184684e |
comparison
equal
deleted
inserted
replaced
259:3919c71e3091 | 260:0c0f0b3f6a93 |
---|---|
10 import theano.tensor as T | 10 import theano.tensor as T |
11 import sys | 11 import sys |
12 | 12 |
13 from jobman import DD | 13 from jobman import DD |
14 import jobman, jobman.sql | 14 import jobman, jobman.sql |
15 from copy import copy | |
15 | 16 |
16 from stacked_dae import SdA | 17 from stacked_dae import SdA |
17 | 18 |
18 from ift6266.utils.seriestables import * | 19 from ift6266.utils.seriestables import * |
19 | |
20 ##def shared_dataset(data_xy): | |
21 ## data_x, data_y = data_xy | |
22 ## if theano.config.device.startswith("gpu"): | |
23 ## print "TRANSFERING DATASETS (via shared()) TO GPU" | |
24 ## shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) | |
25 ## shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) | |
26 ## shared_y = T.cast(shared_y, 'int32') | |
27 ## else: | |
28 ## print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES" | |
29 ## shared_x = theano.shared(data_x) | |
30 ## shared_y = theano.shared(data_y) | |
31 ## return shared_x, shared_y | |
32 | |
33 ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin | |
34 def shared_dataset(batch_size, n_in): | |
35 | |
36 shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX)) | |
37 shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX)) | |
38 return shared_x, shared_y | |
39 | 20 |
40 default_series = { \ | 21 default_series = { \ |
41 'reconstruction_error' : DummySeries(), | 22 'reconstruction_error' : DummySeries(), |
42 'training_error' : DummySeries(), | 23 'training_error' : DummySeries(), |
43 'validation_error' : DummySeries(), | 24 'validation_error' : DummySeries(), |
44 'test_error' : DummySeries(), | 25 'test_error' : DummySeries(), |
45 'params' : DummySeries() | 26 'params' : DummySeries() |
46 } | 27 } |
47 | 28 |
29 def itermax(iter, max): | |
30 for i,it in enumerate(iter): | |
31 if i >= max: | |
32 break | |
33 yield it | |
34 | |
48 class SdaSgdOptimizer: | 35 class SdaSgdOptimizer: |
49 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series): | 36 def __init__(self, dataset, hyperparameters, n_ins, n_outs, |
37 examples_per_epoch, series=default_series, max_minibatches=None): | |
50 self.dataset = dataset | 38 self.dataset = dataset |
51 self.hp = hyperparameters | 39 self.hp = hyperparameters |
52 self.n_ins = n_ins | 40 self.n_ins = n_ins |
53 self.n_outs = n_outs | 41 self.n_outs = n_outs |
54 self.input_divider = input_divider | 42 self.parameters_pre=[] |
55 | 43 |
44 self.max_minibatches = max_minibatches | |
45 print "SdaSgdOptimizer, max_minibatches =", max_minibatches | |
46 | |
47 self.ex_per_epoch = examples_per_epoch | |
48 self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size | |
49 | |
56 self.series = series | 50 self.series = series |
57 | 51 |
58 self.rng = numpy.random.RandomState(1234) | 52 self.rng = numpy.random.RandomState(1234) |
59 | 53 |
60 self.init_datasets() | |
61 self.init_classifier() | 54 self.init_classifier() |
62 | 55 |
63 sys.stdout.flush() | 56 sys.stdout.flush() |
64 | |
65 def init_datasets(self): | |
66 print "init_datasets" | |
67 sys.stdout.flush() | |
68 | |
69 #train_set, valid_set, test_set = self.dataset | |
70 self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) | |
71 self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) | |
72 self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) | |
73 | |
74 # compute number of minibatches for training, validation and testing | |
75 self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size | |
76 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size | |
77 # remove last batch in case it's incomplete | |
78 self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 | |
79 | 57 |
80 def init_classifier(self): | 58 def init_classifier(self): |
81 print "Constructing classifier" | 59 print "Constructing classifier" |
82 | 60 |
83 # we don't want to save arrays in DD objects, so | 61 # we don't want to save arrays in DD objects, so |
86 layers_sizes = [self.hp.hidden_layers_sizes] * nhl | 64 layers_sizes = [self.hp.hidden_layers_sizes] * nhl |
87 corruption_levels = [self.hp.corruption_levels] * nhl | 65 corruption_levels = [self.hp.corruption_levels] * nhl |
88 | 66 |
89 # construct the stacked denoising autoencoder class | 67 # construct the stacked denoising autoencoder class |
90 self.classifier = SdA( \ | 68 self.classifier = SdA( \ |
91 train_set_x= self.train_set_x, \ | |
92 train_set_y = self.train_set_y,\ | |
93 batch_size = self.hp.minibatch_size, \ | 69 batch_size = self.hp.minibatch_size, \ |
94 n_ins= self.n_ins, \ | 70 n_ins= self.n_ins, \ |
95 hidden_layers_sizes = layers_sizes, \ | 71 hidden_layers_sizes = layers_sizes, \ |
96 n_outs = self.n_outs, \ | 72 n_outs = self.n_outs, \ |
97 corruption_levels = corruption_levels,\ | 73 corruption_levels = corruption_levels,\ |
98 rng = self.rng,\ | 74 rng = self.rng,\ |
99 pretrain_lr = self.hp.pretraining_lr, \ | 75 pretrain_lr = self.hp.pretraining_lr, \ |
100 finetune_lr = self.hp.finetuning_lr,\ | 76 finetune_lr = self.hp.finetuning_lr) |
101 input_divider = self.input_divider ) | |
102 | 77 |
103 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") | 78 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") |
104 | 79 |
105 sys.stdout.flush() | 80 sys.stdout.flush() |
106 | 81 |
107 def train(self): | 82 def train(self): |
108 self.pretrain(self.dataset) | 83 self.pretrain(self.dataset) |
109 self.finetune(self.dataset) | 84 self.finetune(self.dataset) |
110 | 85 |
111 def pretrain(self,dataset,reduce): | 86 def pretrain(self,dataset): |
112 print "STARTING PRETRAINING, time = ", datetime.datetime.now() | 87 print "STARTING PRETRAINING, time = ", datetime.datetime.now() |
113 sys.stdout.flush() | 88 sys.stdout.flush() |
114 | 89 |
115 start_time = time.clock() | 90 start_time = time.clock() |
116 ## Pre-train layer-wise | 91 ## Pre-train layer-wise |
117 for i in xrange(self.classifier.n_layers): | 92 for i in xrange(self.classifier.n_layers): |
118 # go through pretraining epochs | 93 # go through pretraining epochs |
119 for epoch in xrange(self.hp.pretraining_epochs_per_layer): | 94 for epoch in xrange(self.hp.pretraining_epochs_per_layer): |
120 # go through the training set | 95 # go through the training set |
121 batch_index=int(0) | 96 batch_index=0 |
122 for x,y in dataset.train(self.hp.minibatch_size): | 97 for x,y in dataset.train(self.hp.minibatch_size): |
98 c = self.classifier.pretrain_functions[i](x) | |
99 | |
100 self.series["reconstruction_error"].append((epoch, batch_index), c) | |
123 batch_index+=1 | 101 batch_index+=1 |
124 if batch_index > reduce: #If maximum number of mini-batch is used | 102 |
103 #if batch_index % 100 == 0: | |
104 # print "100 batches" | |
105 | |
106 # useful when doing tests | |
107 if self.max_minibatches and batch_index >= self.max_minibatches: | |
125 break | 108 break |
126 c = self.classifier.pretrain_functions[i](x) | |
127 | |
128 | |
129 self.series["reconstruction_error"].append((epoch, batch_index), c) | |
130 | 109 |
131 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c | 110 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c |
132 sys.stdout.flush() | 111 sys.stdout.flush() |
133 | 112 |
134 self.series['params'].append((epoch,), self.classifier.all_params) | 113 self.series['params'].append((epoch,), self.classifier.all_params) |
135 | 114 |
136 end_time = time.clock() | 115 end_time = time.clock() |
137 | 116 |
138 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) | 117 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) |
139 self.hp.update({'pretraining_time': end_time-start_time}) | 118 self.hp.update({'pretraining_time': end_time-start_time}) |
140 | 119 |
141 sys.stdout.flush() | 120 sys.stdout.flush() |
142 | 121 |
143 def finetune(self,dataset,reduce): | 122 #To be able to load them later for tests on finetune |
123 self.parameters_pre=[copy(x.value) for x in self.classifier.params] | |
124 | |
125 | |
126 def finetune(self,dataset,num_finetune): | |
144 print "STARTING FINETUNING, time = ", datetime.datetime.now() | 127 print "STARTING FINETUNING, time = ", datetime.datetime.now() |
145 | 128 |
146 #index = T.lscalar() # index to a [mini]batch | |
147 minibatch_size = self.hp.minibatch_size | 129 minibatch_size = self.hp.minibatch_size |
148 ensemble_x = T.matrix('ensemble_x') | |
149 ensemble_y = T.ivector('ensemble_y') | |
150 | 130 |
151 # create a function to compute the mistakes that are made by the model | 131 # create a function to compute the mistakes that are made by the model |
152 # on the validation set, or testing set | 132 # on the validation set, or testing set |
153 shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) | 133 test_model = \ |
154 test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, | 134 theano.function( |
155 givens = { | 135 [self.classifier.x,self.classifier.y], self.classifier.errors) |
156 #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, | 136 # givens = { |
157 #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) | 137 # self.classifier.x: ensemble_x, |
158 self.classifier.x: ensemble_x, | 138 # self.classifier.y: ensemble_y]}) |
159 self.classifier.y: ensemble_y}) | 139 |
160 | 140 validate_model = \ |
161 validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, | 141 theano.function( |
162 givens = { | 142 [self.classifier.x,self.classifier.y], self.classifier.errors) |
163 #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, | 143 # givens = { |
164 #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) | 144 # self.classifier.x: , |
165 self.classifier.x: ensemble_x, | 145 # self.classifier.y: ]}) |
166 self.classifier.y: ensemble_y}) | |
167 | 146 |
168 | 147 |
169 # early-stopping parameters | 148 # early-stopping parameters |
170 patience = 10000 # look as this many examples regardless | 149 patience = 10000 # look as this many examples regardless |
171 patience_increase = 2. # wait this much longer when a new best is | 150 patience_increase = 2. # wait this much longer when a new best is |
172 # found | 151 # found |
173 improvement_threshold = 0.995 # a relative improvement of this much is | 152 improvement_threshold = 0.995 # a relative improvement of this much is |
174 # considered significant | 153 # considered significant |
175 validation_frequency = min(self.n_train_batches, patience/2) | 154 validation_frequency = min(self.mb_per_epoch, patience/2) |
176 # go through this many | 155 # go through this many |
177 # minibatche before checking the network | 156 # minibatche before checking the network |
178 # on the validation set; in this case we | 157 # on the validation set; in this case we |
179 # check every epoch | 158 # check every epoch |
159 if self.max_minibatches and validation_frequency > self.max_minibatches: | |
160 validation_frequency = self.max_minibatches / 2 | |
180 | 161 |
181 best_params = None | 162 best_params = None |
182 best_validation_loss = float('inf') | 163 best_validation_loss = float('inf') |
183 test_score = 0. | 164 test_score = 0. |
184 start_time = time.clock() | 165 start_time = time.clock() |
185 | 166 |
186 done_looping = False | 167 done_looping = False |
187 epoch = 0 | 168 epoch = 0 |
188 | 169 |
189 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): | 170 total_mb_index = 0 |
171 | |
172 while (epoch < num_finetune) and (not done_looping): | |
190 epoch = epoch + 1 | 173 epoch = epoch + 1 |
191 minibatch_index=int(0) | 174 minibatch_index = -1 |
192 for x,y in dataset.train(minibatch_size): | 175 for x,y in dataset.train(minibatch_size): |
193 minibatch_index +=1 | 176 minibatch_index += 1 |
194 | |
195 if minibatch_index > reduce: #If maximum number of mini-batchs is used | |
196 break | |
197 | |
198 cost_ij = self.classifier.finetune(x,y) | 177 cost_ij = self.classifier.finetune(x,y) |
199 iter = epoch * self.n_train_batches + minibatch_index | 178 total_mb_index += 1 |
200 | 179 |
201 self.series["training_error"].append((epoch, minibatch_index), cost_ij) | 180 self.series["training_error"].append((epoch, minibatch_index), cost_ij) |
202 | 181 |
203 if (iter+1) % validation_frequency == 0: | 182 if (total_mb_index+1) % validation_frequency == 0: |
204 | 183 |
205 #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] | 184 iter = dataset.valid(minibatch_size) |
206 test_index=int(0) | 185 if self.max_minibatches: |
207 validation_losses=[] | 186 iter = itermax(iter, self.max_minibatches) |
208 for x,y in dataset.valid(minibatch_size): | 187 validation_losses = [validate_model(x,y) for x,y in iter] |
209 test_index+=1 | |
210 if test_index > reduce: | |
211 break | |
212 validation_losses.append(validate_model(x,y)) | |
213 this_validation_loss = numpy.mean(validation_losses) | 188 this_validation_loss = numpy.mean(validation_losses) |
214 | 189 |
215 self.series["validation_error"].\ | 190 self.series["validation_error"].\ |
216 append((epoch, minibatch_index), this_validation_loss*100.) | 191 append((epoch, minibatch_index), this_validation_loss*100.) |
217 | 192 |
218 print('epoch %i, minibatch %i, validation error %f %%' % \ | 193 print('epoch %i, minibatch %i, validation error %f %%' % \ |
219 (epoch, minibatch_index, \ | 194 (epoch, minibatch_index+1, \ |
220 this_validation_loss*100.)) | 195 this_validation_loss*100.)) |
221 | 196 |
222 | 197 |
223 # if we got the best validation score until now | 198 # if we got the best validation score until now |
224 if this_validation_loss < best_validation_loss: | 199 if this_validation_loss < best_validation_loss: |
225 | 200 |
226 #improve patience if loss improvement is good enough | 201 #improve patience if loss improvement is good enough |
227 if this_validation_loss < best_validation_loss * \ | 202 if this_validation_loss < best_validation_loss * \ |
228 improvement_threshold : | 203 improvement_threshold : |
229 patience = max(patience, iter * patience_increase) | 204 patience = max(patience, total_mb_index * patience_increase) |
230 | 205 |
231 # save best validation score and iteration number | 206 # save best validation score and iteration number |
232 best_validation_loss = this_validation_loss | 207 best_validation_loss = this_validation_loss |
233 best_iter = iter | 208 best_iter = total_mb_index |
234 | 209 |
235 # test it on the test set | 210 # test it on the test set |
236 #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] | 211 iter = dataset.test(minibatch_size) |
237 test_losses=[] | 212 if self.max_minibatches: |
238 i=0 | 213 iter = itermax(iter, self.max_minibatches) |
239 for x,y in dataset.test(minibatch_size): | 214 test_losses = [test_model(x,y) for x,y in iter] |
240 i+=1 | |
241 if i > reduce: | |
242 break | |
243 test_losses.append(test_model(x,y)) | |
244 test_score = numpy.mean(test_losses) | 215 test_score = numpy.mean(test_losses) |
245 | 216 |
246 self.series["test_error"].\ | 217 self.series["test_error"].\ |
247 append((epoch, minibatch_index), test_score*100.) | 218 append((epoch, minibatch_index), test_score*100.) |
248 | 219 |
249 print((' epoch %i, minibatch %i, test error of best ' | 220 print((' epoch %i, minibatch %i, test error of best ' |
250 'model %f %%') % | 221 'model %f %%') % |
251 (epoch, minibatch_index, | 222 (epoch, minibatch_index+1, |
252 test_score*100.)) | 223 test_score*100.)) |
253 | 224 |
254 sys.stdout.flush() | 225 sys.stdout.flush() |
255 | 226 |
227 # useful when doing tests | |
228 if self.max_minibatches and minibatch_index >= self.max_minibatches: | |
229 break | |
230 | |
256 self.series['params'].append((epoch,), self.classifier.all_params) | 231 self.series['params'].append((epoch,), self.classifier.all_params) |
257 | 232 |
258 if patience <= iter : | 233 if patience <= total_mb_index: |
259 done_looping = True | 234 done_looping = True |
260 break | 235 break |
261 | 236 |
262 end_time = time.clock() | 237 end_time = time.clock() |
263 self.hp.update({'finetuning_time':end_time-start_time,\ | 238 self.hp.update({'finetuning_time':end_time-start_time,\ |
267 | 242 |
268 print(('Optimization complete with best validation score of %f %%,' | 243 print(('Optimization complete with best validation score of %f %%,' |
269 'with test performance %f %%') % | 244 'with test performance %f %%') % |
270 (best_validation_loss * 100., test_score*100.)) | 245 (best_validation_loss * 100., test_score*100.)) |
271 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) | 246 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) |
272 | 247 |
273 | 248 |
274 | 249 #Set parameters like they where right after pre-train |
250 def reload_parameters(self): | |
251 | |
252 for idx,x in enumerate(self.parameters_pre): | |
253 self.classifier.params[idx].value=copy(x) | |
254 | |
255 | |
256 | |
257 | |
258 |