comparison deep/stacked_dae/v_sylvain/sgd_optimization.py @ 260:0c0f0b3f6a93

branch merge.
author Arnaud Bergeron <abergeron@gmail.com>
date Wed, 17 Mar 2010 15:31:21 -0400
parents 7dd43ef66d15
children a0264184684e
comparison
equal deleted inserted replaced
259:3919c71e3091 260:0c0f0b3f6a93
10 import theano.tensor as T 10 import theano.tensor as T
11 import sys 11 import sys
12 12
13 from jobman import DD 13 from jobman import DD
14 import jobman, jobman.sql 14 import jobman, jobman.sql
15 from copy import copy
15 16
16 from stacked_dae import SdA 17 from stacked_dae import SdA
17 18
18 from ift6266.utils.seriestables import * 19 from ift6266.utils.seriestables import *
19
20 ##def shared_dataset(data_xy):
21 ## data_x, data_y = data_xy
22 ## if theano.config.device.startswith("gpu"):
23 ## print "TRANSFERING DATASETS (via shared()) TO GPU"
24 ## shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
25 ## shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
26 ## shared_y = T.cast(shared_y, 'int32')
27 ## else:
28 ## print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES"
29 ## shared_x = theano.shared(data_x)
30 ## shared_y = theano.shared(data_y)
31 ## return shared_x, shared_y
32
33 ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin
34 def shared_dataset(batch_size, n_in):
35
36 shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX))
37 shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX))
38 return shared_x, shared_y
39 20
40 default_series = { \ 21 default_series = { \
41 'reconstruction_error' : DummySeries(), 22 'reconstruction_error' : DummySeries(),
42 'training_error' : DummySeries(), 23 'training_error' : DummySeries(),
43 'validation_error' : DummySeries(), 24 'validation_error' : DummySeries(),
44 'test_error' : DummySeries(), 25 'test_error' : DummySeries(),
45 'params' : DummySeries() 26 'params' : DummySeries()
46 } 27 }
47 28
29 def itermax(iter, max):
30 for i,it in enumerate(iter):
31 if i >= max:
32 break
33 yield it
34
48 class SdaSgdOptimizer: 35 class SdaSgdOptimizer:
49 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series): 36 def __init__(self, dataset, hyperparameters, n_ins, n_outs,
37 examples_per_epoch, series=default_series, max_minibatches=None):
50 self.dataset = dataset 38 self.dataset = dataset
51 self.hp = hyperparameters 39 self.hp = hyperparameters
52 self.n_ins = n_ins 40 self.n_ins = n_ins
53 self.n_outs = n_outs 41 self.n_outs = n_outs
54 self.input_divider = input_divider 42 self.parameters_pre=[]
55 43
44 self.max_minibatches = max_minibatches
45 print "SdaSgdOptimizer, max_minibatches =", max_minibatches
46
47 self.ex_per_epoch = examples_per_epoch
48 self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
49
56 self.series = series 50 self.series = series
57 51
58 self.rng = numpy.random.RandomState(1234) 52 self.rng = numpy.random.RandomState(1234)
59 53
60 self.init_datasets()
61 self.init_classifier() 54 self.init_classifier()
62 55
63 sys.stdout.flush() 56 sys.stdout.flush()
64
65 def init_datasets(self):
66 print "init_datasets"
67 sys.stdout.flush()
68
69 #train_set, valid_set, test_set = self.dataset
70 self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
71 self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
72 self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
73
74 # compute number of minibatches for training, validation and testing
75 self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
76 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
77 # remove last batch in case it's incomplete
78 self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1
79 57
80 def init_classifier(self): 58 def init_classifier(self):
81 print "Constructing classifier" 59 print "Constructing classifier"
82 60
83 # we don't want to save arrays in DD objects, so 61 # we don't want to save arrays in DD objects, so
86 layers_sizes = [self.hp.hidden_layers_sizes] * nhl 64 layers_sizes = [self.hp.hidden_layers_sizes] * nhl
87 corruption_levels = [self.hp.corruption_levels] * nhl 65 corruption_levels = [self.hp.corruption_levels] * nhl
88 66
89 # construct the stacked denoising autoencoder class 67 # construct the stacked denoising autoencoder class
90 self.classifier = SdA( \ 68 self.classifier = SdA( \
91 train_set_x= self.train_set_x, \
92 train_set_y = self.train_set_y,\
93 batch_size = self.hp.minibatch_size, \ 69 batch_size = self.hp.minibatch_size, \
94 n_ins= self.n_ins, \ 70 n_ins= self.n_ins, \
95 hidden_layers_sizes = layers_sizes, \ 71 hidden_layers_sizes = layers_sizes, \
96 n_outs = self.n_outs, \ 72 n_outs = self.n_outs, \
97 corruption_levels = corruption_levels,\ 73 corruption_levels = corruption_levels,\
98 rng = self.rng,\ 74 rng = self.rng,\
99 pretrain_lr = self.hp.pretraining_lr, \ 75 pretrain_lr = self.hp.pretraining_lr, \
100 finetune_lr = self.hp.finetuning_lr,\ 76 finetune_lr = self.hp.finetuning_lr)
101 input_divider = self.input_divider )
102 77
103 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") 78 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
104 79
105 sys.stdout.flush() 80 sys.stdout.flush()
106 81
107 def train(self): 82 def train(self):
108 self.pretrain(self.dataset) 83 self.pretrain(self.dataset)
109 self.finetune(self.dataset) 84 self.finetune(self.dataset)
110 85
111 def pretrain(self,dataset,reduce): 86 def pretrain(self,dataset):
112 print "STARTING PRETRAINING, time = ", datetime.datetime.now() 87 print "STARTING PRETRAINING, time = ", datetime.datetime.now()
113 sys.stdout.flush() 88 sys.stdout.flush()
114 89
115 start_time = time.clock() 90 start_time = time.clock()
116 ## Pre-train layer-wise 91 ## Pre-train layer-wise
117 for i in xrange(self.classifier.n_layers): 92 for i in xrange(self.classifier.n_layers):
118 # go through pretraining epochs 93 # go through pretraining epochs
119 for epoch in xrange(self.hp.pretraining_epochs_per_layer): 94 for epoch in xrange(self.hp.pretraining_epochs_per_layer):
120 # go through the training set 95 # go through the training set
121 batch_index=int(0) 96 batch_index=0
122 for x,y in dataset.train(self.hp.minibatch_size): 97 for x,y in dataset.train(self.hp.minibatch_size):
98 c = self.classifier.pretrain_functions[i](x)
99
100 self.series["reconstruction_error"].append((epoch, batch_index), c)
123 batch_index+=1 101 batch_index+=1
124 if batch_index > reduce: #If maximum number of mini-batch is used 102
103 #if batch_index % 100 == 0:
104 # print "100 batches"
105
106 # useful when doing tests
107 if self.max_minibatches and batch_index >= self.max_minibatches:
125 break 108 break
126 c = self.classifier.pretrain_functions[i](x)
127
128
129 self.series["reconstruction_error"].append((epoch, batch_index), c)
130 109
131 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c 110 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
132 sys.stdout.flush() 111 sys.stdout.flush()
133 112
134 self.series['params'].append((epoch,), self.classifier.all_params) 113 self.series['params'].append((epoch,), self.classifier.all_params)
135 114
136 end_time = time.clock() 115 end_time = time.clock()
137 116
138 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) 117 print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
139 self.hp.update({'pretraining_time': end_time-start_time}) 118 self.hp.update({'pretraining_time': end_time-start_time})
140 119
141 sys.stdout.flush() 120 sys.stdout.flush()
142 121
143 def finetune(self,dataset,reduce): 122 #To be able to load them later for tests on finetune
123 self.parameters_pre=[copy(x.value) for x in self.classifier.params]
124
125
126 def finetune(self,dataset,num_finetune):
144 print "STARTING FINETUNING, time = ", datetime.datetime.now() 127 print "STARTING FINETUNING, time = ", datetime.datetime.now()
145 128
146 #index = T.lscalar() # index to a [mini]batch
147 minibatch_size = self.hp.minibatch_size 129 minibatch_size = self.hp.minibatch_size
148 ensemble_x = T.matrix('ensemble_x')
149 ensemble_y = T.ivector('ensemble_y')
150 130
151 # create a function to compute the mistakes that are made by the model 131 # create a function to compute the mistakes that are made by the model
152 # on the validation set, or testing set 132 # on the validation set, or testing set
153 shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) 133 test_model = \
154 test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, 134 theano.function(
155 givens = { 135 [self.classifier.x,self.classifier.y], self.classifier.errors)
156 #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, 136 # givens = {
157 #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) 137 # self.classifier.x: ensemble_x,
158 self.classifier.x: ensemble_x, 138 # self.classifier.y: ensemble_y]})
159 self.classifier.y: ensemble_y}) 139
160 140 validate_model = \
161 validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, 141 theano.function(
162 givens = { 142 [self.classifier.x,self.classifier.y], self.classifier.errors)
163 #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, 143 # givens = {
164 #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) 144 # self.classifier.x: ,
165 self.classifier.x: ensemble_x, 145 # self.classifier.y: ]})
166 self.classifier.y: ensemble_y})
167 146
168 147
169 # early-stopping parameters 148 # early-stopping parameters
170 patience = 10000 # look as this many examples regardless 149 patience = 10000 # look as this many examples regardless
171 patience_increase = 2. # wait this much longer when a new best is 150 patience_increase = 2. # wait this much longer when a new best is
172 # found 151 # found
173 improvement_threshold = 0.995 # a relative improvement of this much is 152 improvement_threshold = 0.995 # a relative improvement of this much is
174 # considered significant 153 # considered significant
175 validation_frequency = min(self.n_train_batches, patience/2) 154 validation_frequency = min(self.mb_per_epoch, patience/2)
176 # go through this many 155 # go through this many
177 # minibatche before checking the network 156 # minibatche before checking the network
178 # on the validation set; in this case we 157 # on the validation set; in this case we
179 # check every epoch 158 # check every epoch
159 if self.max_minibatches and validation_frequency > self.max_minibatches:
160 validation_frequency = self.max_minibatches / 2
180 161
181 best_params = None 162 best_params = None
182 best_validation_loss = float('inf') 163 best_validation_loss = float('inf')
183 test_score = 0. 164 test_score = 0.
184 start_time = time.clock() 165 start_time = time.clock()
185 166
186 done_looping = False 167 done_looping = False
187 epoch = 0 168 epoch = 0
188 169
189 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): 170 total_mb_index = 0
171
172 while (epoch < num_finetune) and (not done_looping):
190 epoch = epoch + 1 173 epoch = epoch + 1
191 minibatch_index=int(0) 174 minibatch_index = -1
192 for x,y in dataset.train(minibatch_size): 175 for x,y in dataset.train(minibatch_size):
193 minibatch_index +=1 176 minibatch_index += 1
194
195 if minibatch_index > reduce: #If maximum number of mini-batchs is used
196 break
197
198 cost_ij = self.classifier.finetune(x,y) 177 cost_ij = self.classifier.finetune(x,y)
199 iter = epoch * self.n_train_batches + minibatch_index 178 total_mb_index += 1
200 179
201 self.series["training_error"].append((epoch, minibatch_index), cost_ij) 180 self.series["training_error"].append((epoch, minibatch_index), cost_ij)
202 181
203 if (iter+1) % validation_frequency == 0: 182 if (total_mb_index+1) % validation_frequency == 0:
204 183
205 #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] 184 iter = dataset.valid(minibatch_size)
206 test_index=int(0) 185 if self.max_minibatches:
207 validation_losses=[] 186 iter = itermax(iter, self.max_minibatches)
208 for x,y in dataset.valid(minibatch_size): 187 validation_losses = [validate_model(x,y) for x,y in iter]
209 test_index+=1
210 if test_index > reduce:
211 break
212 validation_losses.append(validate_model(x,y))
213 this_validation_loss = numpy.mean(validation_losses) 188 this_validation_loss = numpy.mean(validation_losses)
214 189
215 self.series["validation_error"].\ 190 self.series["validation_error"].\
216 append((epoch, minibatch_index), this_validation_loss*100.) 191 append((epoch, minibatch_index), this_validation_loss*100.)
217 192
218 print('epoch %i, minibatch %i, validation error %f %%' % \ 193 print('epoch %i, minibatch %i, validation error %f %%' % \
219 (epoch, minibatch_index, \ 194 (epoch, minibatch_index+1, \
220 this_validation_loss*100.)) 195 this_validation_loss*100.))
221 196
222 197
223 # if we got the best validation score until now 198 # if we got the best validation score until now
224 if this_validation_loss < best_validation_loss: 199 if this_validation_loss < best_validation_loss:
225 200
226 #improve patience if loss improvement is good enough 201 #improve patience if loss improvement is good enough
227 if this_validation_loss < best_validation_loss * \ 202 if this_validation_loss < best_validation_loss * \
228 improvement_threshold : 203 improvement_threshold :
229 patience = max(patience, iter * patience_increase) 204 patience = max(patience, total_mb_index * patience_increase)
230 205
231 # save best validation score and iteration number 206 # save best validation score and iteration number
232 best_validation_loss = this_validation_loss 207 best_validation_loss = this_validation_loss
233 best_iter = iter 208 best_iter = total_mb_index
234 209
235 # test it on the test set 210 # test it on the test set
236 #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] 211 iter = dataset.test(minibatch_size)
237 test_losses=[] 212 if self.max_minibatches:
238 i=0 213 iter = itermax(iter, self.max_minibatches)
239 for x,y in dataset.test(minibatch_size): 214 test_losses = [test_model(x,y) for x,y in iter]
240 i+=1
241 if i > reduce:
242 break
243 test_losses.append(test_model(x,y))
244 test_score = numpy.mean(test_losses) 215 test_score = numpy.mean(test_losses)
245 216
246 self.series["test_error"].\ 217 self.series["test_error"].\
247 append((epoch, minibatch_index), test_score*100.) 218 append((epoch, minibatch_index), test_score*100.)
248 219
249 print((' epoch %i, minibatch %i, test error of best ' 220 print((' epoch %i, minibatch %i, test error of best '
250 'model %f %%') % 221 'model %f %%') %
251 (epoch, minibatch_index, 222 (epoch, minibatch_index+1,
252 test_score*100.)) 223 test_score*100.))
253 224
254 sys.stdout.flush() 225 sys.stdout.flush()
255 226
227 # useful when doing tests
228 if self.max_minibatches and minibatch_index >= self.max_minibatches:
229 break
230
256 self.series['params'].append((epoch,), self.classifier.all_params) 231 self.series['params'].append((epoch,), self.classifier.all_params)
257 232
258 if patience <= iter : 233 if patience <= total_mb_index:
259 done_looping = True 234 done_looping = True
260 break 235 break
261 236
262 end_time = time.clock() 237 end_time = time.clock()
263 self.hp.update({'finetuning_time':end_time-start_time,\ 238 self.hp.update({'finetuning_time':end_time-start_time,\
267 242
268 print(('Optimization complete with best validation score of %f %%,' 243 print(('Optimization complete with best validation score of %f %%,'
269 'with test performance %f %%') % 244 'with test performance %f %%') %
270 (best_validation_loss * 100., test_score*100.)) 245 (best_validation_loss * 100., test_score*100.))
271 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) 246 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
272 247
273 248
274 249 #Set parameters like they where right after pre-train
250 def reload_parameters(self):
251
252 for idx,x in enumerate(self.parameters_pre):
253 self.classifier.params[idx].value=copy(x)
254
255
256
257
258