comparison deep/stacked_dae/v2/sgd_optimization.py @ 239:42005ec87747

Mergé (manuellement) les changements de Sylvain pour utiliser le code de dataset d'Arnaud, à cette différence près que je n'utilse pas les givens. J'ai probablement une approche différente pour limiter la taille du dataset dans mon débuggage, aussi.
author fsavard
date Mon, 15 Mar 2010 18:30:21 -0400
parents 02eb98d051fe
children f213a0fb2b08
comparison
equal deleted inserted replaced
238:9fc641d7adda 239:42005ec87747
14 import jobman, jobman.sql 14 import jobman, jobman.sql
15 15
16 from stacked_dae import SdA 16 from stacked_dae import SdA
17 17
18 from ift6266.utils.seriestables import * 18 from ift6266.utils.seriestables import *
19
20 def shared_dataset(data_xy):
21 data_x, data_y = data_xy
22 if theano.config.device.startswith("gpu"):
23 print "TRANSFERING DATASETS (via shared()) TO GPU"
24 shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
25 shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
26 shared_y = T.cast(shared_y, 'int32')
27 else:
28 print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES"
29 shared_x = theano.shared(data_x)
30 shared_y = theano.shared(data_y)
31 return shared_x, shared_y
32 19
33 default_series = { \ 20 default_series = { \
34 'reconstruction_error' : DummySeries(), 21 'reconstruction_error' : DummySeries(),
35 'training_error' : DummySeries(), 22 'training_error' : DummySeries(),
36 'validation_error' : DummySeries(), 23 'validation_error' : DummySeries(),
37 'test_error' : DummySeries(), 24 'test_error' : DummySeries(),
38 'params' : DummySeries() 25 'params' : DummySeries()
39 } 26 }
40 27
28 def itermax(iter, max):
29 for i,it in enumerate(iter):
30 if i >= max:
31 break
32 yield i
33
41 class SdaSgdOptimizer: 34 class SdaSgdOptimizer:
42 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series): 35 def __init__(self, dataset, hyperparameters, n_ins, n_outs,
36 examples_per_epoch, series=default_series, max_minibatches=None):
43 self.dataset = dataset 37 self.dataset = dataset
44 self.hp = hyperparameters 38 self.hp = hyperparameters
45 self.n_ins = n_ins 39 self.n_ins = n_ins
46 self.n_outs = n_outs 40 self.n_outs = n_outs
47 self.input_divider = input_divider
48 41
42 self.max_minibatches = max_minibatches
43 print "SdaSgdOptimizer, max_minibatches =", max_minibatches
44
45 self.ex_per_epoch = examples_per_epoch
46 self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
47
49 self.series = series 48 self.series = series
50 49
51 self.rng = numpy.random.RandomState(1234) 50 self.rng = numpy.random.RandomState(1234)
52 51
53 self.init_datasets()
54 self.init_classifier() 52 self.init_classifier()
55 53
56 sys.stdout.flush() 54 sys.stdout.flush()
57
58 def init_datasets(self):
59 print "init_datasets"
60 sys.stdout.flush()
61
62 train_set, valid_set, test_set = self.dataset
63 self.test_set_x, self.test_set_y = shared_dataset(test_set)
64 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
65 self.train_set_x, self.train_set_y = shared_dataset(train_set)
66
67 # compute number of minibatches for training, validation and testing
68 self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
69 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
70 # remove last batch in case it's incomplete
71 self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1
72 55
73 def init_classifier(self): 56 def init_classifier(self):
74 print "Constructing classifier" 57 print "Constructing classifier"
75 58
76 # we don't want to save arrays in DD objects, so 59 # we don't want to save arrays in DD objects, so
79 layers_sizes = [self.hp.hidden_layers_sizes] * nhl 62 layers_sizes = [self.hp.hidden_layers_sizes] * nhl
80 corruption_levels = [self.hp.corruption_levels] * nhl 63 corruption_levels = [self.hp.corruption_levels] * nhl
81 64
82 # construct the stacked denoising autoencoder class 65 # construct the stacked denoising autoencoder class
83 self.classifier = SdA( \ 66 self.classifier = SdA( \
84 train_set_x= self.train_set_x, \
85 train_set_y = self.train_set_y,\
86 batch_size = self.hp.minibatch_size, \ 67 batch_size = self.hp.minibatch_size, \
87 n_ins= self.n_ins, \ 68 n_ins= self.n_ins, \
88 hidden_layers_sizes = layers_sizes, \ 69 hidden_layers_sizes = layers_sizes, \
89 n_outs = self.n_outs, \ 70 n_outs = self.n_outs, \
90 corruption_levels = corruption_levels,\ 71 corruption_levels = corruption_levels,\
91 rng = self.rng,\ 72 rng = self.rng,\
92 pretrain_lr = self.hp.pretraining_lr, \ 73 pretrain_lr = self.hp.pretraining_lr, \
93 finetune_lr = self.hp.finetuning_lr,\ 74 finetune_lr = self.hp.finetuning_lr)
94 input_divider = self.input_divider )
95 75
96 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") 76 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
97 77
98 sys.stdout.flush() 78 sys.stdout.flush()
99 79
100 def train(self): 80 def train(self):
101 self.pretrain() 81 self.pretrain(self.dataset)
102 self.finetune() 82 self.finetune(self.dataset)
103 83
104 def pretrain(self): 84 def pretrain(self,dataset):
105 print "STARTING PRETRAINING, time = ", datetime.datetime.now() 85 print "STARTING PRETRAINING, time = ", datetime.datetime.now()
106 sys.stdout.flush() 86 sys.stdout.flush()
107 87
108 start_time = time.clock() 88 start_time = time.clock()
109 ## Pre-train layer-wise 89 ## Pre-train layer-wise
110 for i in xrange(self.classifier.n_layers): 90 for i in xrange(self.classifier.n_layers):
111 # go through pretraining epochs 91 # go through pretraining epochs
112 for epoch in xrange(self.hp.pretraining_epochs_per_layer): 92 for epoch in xrange(self.hp.pretraining_epochs_per_layer):
113 # go through the training set 93 # go through the training set
114 for batch_index in xrange(self.n_train_batches): 94 batch_index=0
115 c = self.classifier.pretrain_functions[i](batch_index) 95 for x,y in dataset.train(self.hp.minibatch_size):
96 c = self.classifier.pretrain_functions[i](x)
116 97
117 self.series["reconstruction_error"].append((epoch, batch_index), c) 98 self.series["reconstruction_error"].append((epoch, batch_index), c)
99 batch_index+=1
100
101 if batch_index % 10000 == 0:
102 print "10000 batches"
103
104 # useful when doing tests
105 if self.max_minibatches and batch_index >= self.max_minibatches:
106 break
118 107
119 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c 108 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
120 sys.stdout.flush() 109 sys.stdout.flush()
121 110
122 self.series['params'].append((epoch,), self.classifier.all_params) 111 self.series['params'].append((epoch,), self.classifier.all_params)
126 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) 115 print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
127 self.hp.update({'pretraining_time': end_time-start_time}) 116 self.hp.update({'pretraining_time': end_time-start_time})
128 117
129 sys.stdout.flush() 118 sys.stdout.flush()
130 119
131 def finetune(self): 120 def finetune(self,dataset):
132 print "STARTING FINETUNING, time = ", datetime.datetime.now() 121 print "STARTING FINETUNING, time = ", datetime.datetime.now()
133 122
134 index = T.lscalar() # index to a [mini]batch
135 minibatch_size = self.hp.minibatch_size 123 minibatch_size = self.hp.minibatch_size
136 124
137 # create a function to compute the mistakes that are made by the model 125 # create a function to compute the mistakes that are made by the model
138 # on the validation set, or testing set 126 # on the validation set, or testing set
139 shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) 127 test_model = \
140 test_model = theano.function([index], self.classifier.errors, 128 theano.function(
141 givens = { 129 [self.classifier.x,self.classifier.y], self.classifier.errors)
142 self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, 130 # givens = {
143 self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) 131 # self.classifier.x: ensemble_x,
144 132 # self.classifier.y: ensemble_y]})
145 validate_model = theano.function([index], self.classifier.errors, 133
146 givens = { 134 validate_model = \
147 self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, 135 theano.function(
148 self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) 136 [self.classifier.x,self.classifier.y], self.classifier.errors)
137 # givens = {
138 # self.classifier.x: ,
139 # self.classifier.y: ]})
149 140
150 141
151 # early-stopping parameters 142 # early-stopping parameters
152 patience = 10000 # look as this many examples regardless 143 patience = 10000 # look as this many examples regardless
153 patience_increase = 2. # wait this much longer when a new best is 144 patience_increase = 2. # wait this much longer when a new best is
154 # found 145 # found
155 improvement_threshold = 0.995 # a relative improvement of this much is 146 improvement_threshold = 0.995 # a relative improvement of this much is
156 # considered significant 147 # considered significant
157 validation_frequency = min(self.n_train_batches, patience/2) 148 validation_frequency = min(self.mb_per_epoch, patience/2)
158 # go through this many 149 # go through this many
159 # minibatche before checking the network 150 # minibatche before checking the network
160 # on the validation set; in this case we 151 # on the validation set; in this case we
161 # check every epoch 152 # check every epoch
162 153
166 start_time = time.clock() 157 start_time = time.clock()
167 158
168 done_looping = False 159 done_looping = False
169 epoch = 0 160 epoch = 0
170 161
162 total_mb_index = 0
163
171 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): 164 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
172 epoch = epoch + 1 165 epoch = epoch + 1
173 for minibatch_index in xrange(self.n_train_batches): 166 minibatch_index = -1
174 167 for x,y in dataset.train(minibatch_size):
175 cost_ij = self.classifier.finetune(minibatch_index) 168 minibatch_index += 1
176 iter = epoch * self.n_train_batches + minibatch_index 169 cost_ij = self.classifier.finetune(x,y)
170 total_mb_index += 1
177 171
178 self.series["training_error"].append((epoch, minibatch_index), cost_ij) 172 self.series["training_error"].append((epoch, minibatch_index), cost_ij)
179 173
180 if (iter+1) % validation_frequency == 0: 174 if (total_mb_index+1) % validation_frequency == 0:
181 175
182 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] 176 iter = dataset.valid(minibatch_size)
177 if self.max_minibatches:
178 iter = itermax(iter, self.max_minibatches)
179 validation_losses = [validate_model(x,y) for x,y in iter]
183 this_validation_loss = numpy.mean(validation_losses) 180 this_validation_loss = numpy.mean(validation_losses)
184 181
185 self.series["validation_error"].\ 182 self.series["validation_error"].\
186 append((epoch, minibatch_index), this_validation_loss*100.) 183 append((epoch, minibatch_index), this_validation_loss*100.)
187 184
194 if this_validation_loss < best_validation_loss: 191 if this_validation_loss < best_validation_loss:
195 192
196 #improve patience if loss improvement is good enough 193 #improve patience if loss improvement is good enough
197 if this_validation_loss < best_validation_loss * \ 194 if this_validation_loss < best_validation_loss * \
198 improvement_threshold : 195 improvement_threshold :
199 patience = max(patience, iter * patience_increase) 196 patience = max(patience, total_mb_index * patience_increase)
200 197
201 # save best validation score and iteration number 198 # save best validation score and iteration number
202 best_validation_loss = this_validation_loss 199 best_validation_loss = this_validation_loss
203 best_iter = iter 200 best_iter = total_mb_index
204 201
205 # test it on the test set 202 # test it on the test set
206 test_losses = [test_model(i) for i in xrange(self.n_test_batches)] 203 iter = dataset.test(minibatch_size)
204 if self.max_minibatches:
205 iter = itermax(iter, self.max_minibatches)
206 test_losses = [test_model(x,y) for x,y in iter]
207 test_score = numpy.mean(test_losses) 207 test_score = numpy.mean(test_losses)
208 208
209 self.series["test_error"].\ 209 self.series["test_error"].\
210 append((epoch, minibatch_index), test_score*100.) 210 append((epoch, minibatch_index), test_score*100.)
211 211
214 (epoch, minibatch_index+1, self.n_train_batches, 214 (epoch, minibatch_index+1, self.n_train_batches,
215 test_score*100.)) 215 test_score*100.))
216 216
217 sys.stdout.flush() 217 sys.stdout.flush()
218 218
219 # useful when doing tests
220 if self.max_minibatches and batch_index >= self.max_minibatches:
221 break
222
219 self.series['params'].append((epoch,), self.classifier.all_params) 223 self.series['params'].append((epoch,), self.classifier.all_params)
220 224
221 if patience <= iter : 225 if patience <= total_mb_index:
222 done_looping = True 226 done_looping = True
223 break 227 break
224 228
225 end_time = time.clock() 229 end_time = time.clock()
226 self.hp.update({'finetuning_time':end_time-start_time,\ 230 self.hp.update({'finetuning_time':end_time-start_time,\