Mercurial > ift6266
comparison deep/stacked_dae/v2/sgd_optimization.py @ 239:42005ec87747
Mergé (manuellement) les changements de Sylvain pour utiliser le code de dataset d'Arnaud, à cette différence près que je n'utilse pas les givens. J'ai probablement une approche différente pour limiter la taille du dataset dans mon débuggage, aussi.
author | fsavard |
---|---|
date | Mon, 15 Mar 2010 18:30:21 -0400 |
parents | 02eb98d051fe |
children | f213a0fb2b08 |
comparison
equal
deleted
inserted
replaced
238:9fc641d7adda | 239:42005ec87747 |
---|---|
14 import jobman, jobman.sql | 14 import jobman, jobman.sql |
15 | 15 |
16 from stacked_dae import SdA | 16 from stacked_dae import SdA |
17 | 17 |
18 from ift6266.utils.seriestables import * | 18 from ift6266.utils.seriestables import * |
19 | |
20 def shared_dataset(data_xy): | |
21 data_x, data_y = data_xy | |
22 if theano.config.device.startswith("gpu"): | |
23 print "TRANSFERING DATASETS (via shared()) TO GPU" | |
24 shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) | |
25 shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) | |
26 shared_y = T.cast(shared_y, 'int32') | |
27 else: | |
28 print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES" | |
29 shared_x = theano.shared(data_x) | |
30 shared_y = theano.shared(data_y) | |
31 return shared_x, shared_y | |
32 | 19 |
33 default_series = { \ | 20 default_series = { \ |
34 'reconstruction_error' : DummySeries(), | 21 'reconstruction_error' : DummySeries(), |
35 'training_error' : DummySeries(), | 22 'training_error' : DummySeries(), |
36 'validation_error' : DummySeries(), | 23 'validation_error' : DummySeries(), |
37 'test_error' : DummySeries(), | 24 'test_error' : DummySeries(), |
38 'params' : DummySeries() | 25 'params' : DummySeries() |
39 } | 26 } |
40 | 27 |
28 def itermax(iter, max): | |
29 for i,it in enumerate(iter): | |
30 if i >= max: | |
31 break | |
32 yield i | |
33 | |
41 class SdaSgdOptimizer: | 34 class SdaSgdOptimizer: |
42 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series): | 35 def __init__(self, dataset, hyperparameters, n_ins, n_outs, |
36 examples_per_epoch, series=default_series, max_minibatches=None): | |
43 self.dataset = dataset | 37 self.dataset = dataset |
44 self.hp = hyperparameters | 38 self.hp = hyperparameters |
45 self.n_ins = n_ins | 39 self.n_ins = n_ins |
46 self.n_outs = n_outs | 40 self.n_outs = n_outs |
47 self.input_divider = input_divider | |
48 | 41 |
42 self.max_minibatches = max_minibatches | |
43 print "SdaSgdOptimizer, max_minibatches =", max_minibatches | |
44 | |
45 self.ex_per_epoch = examples_per_epoch | |
46 self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size | |
47 | |
49 self.series = series | 48 self.series = series |
50 | 49 |
51 self.rng = numpy.random.RandomState(1234) | 50 self.rng = numpy.random.RandomState(1234) |
52 | 51 |
53 self.init_datasets() | |
54 self.init_classifier() | 52 self.init_classifier() |
55 | 53 |
56 sys.stdout.flush() | 54 sys.stdout.flush() |
57 | |
58 def init_datasets(self): | |
59 print "init_datasets" | |
60 sys.stdout.flush() | |
61 | |
62 train_set, valid_set, test_set = self.dataset | |
63 self.test_set_x, self.test_set_y = shared_dataset(test_set) | |
64 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set) | |
65 self.train_set_x, self.train_set_y = shared_dataset(train_set) | |
66 | |
67 # compute number of minibatches for training, validation and testing | |
68 self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size | |
69 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size | |
70 # remove last batch in case it's incomplete | |
71 self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 | |
72 | 55 |
73 def init_classifier(self): | 56 def init_classifier(self): |
74 print "Constructing classifier" | 57 print "Constructing classifier" |
75 | 58 |
76 # we don't want to save arrays in DD objects, so | 59 # we don't want to save arrays in DD objects, so |
79 layers_sizes = [self.hp.hidden_layers_sizes] * nhl | 62 layers_sizes = [self.hp.hidden_layers_sizes] * nhl |
80 corruption_levels = [self.hp.corruption_levels] * nhl | 63 corruption_levels = [self.hp.corruption_levels] * nhl |
81 | 64 |
82 # construct the stacked denoising autoencoder class | 65 # construct the stacked denoising autoencoder class |
83 self.classifier = SdA( \ | 66 self.classifier = SdA( \ |
84 train_set_x= self.train_set_x, \ | |
85 train_set_y = self.train_set_y,\ | |
86 batch_size = self.hp.minibatch_size, \ | 67 batch_size = self.hp.minibatch_size, \ |
87 n_ins= self.n_ins, \ | 68 n_ins= self.n_ins, \ |
88 hidden_layers_sizes = layers_sizes, \ | 69 hidden_layers_sizes = layers_sizes, \ |
89 n_outs = self.n_outs, \ | 70 n_outs = self.n_outs, \ |
90 corruption_levels = corruption_levels,\ | 71 corruption_levels = corruption_levels,\ |
91 rng = self.rng,\ | 72 rng = self.rng,\ |
92 pretrain_lr = self.hp.pretraining_lr, \ | 73 pretrain_lr = self.hp.pretraining_lr, \ |
93 finetune_lr = self.hp.finetuning_lr,\ | 74 finetune_lr = self.hp.finetuning_lr) |
94 input_divider = self.input_divider ) | |
95 | 75 |
96 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") | 76 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") |
97 | 77 |
98 sys.stdout.flush() | 78 sys.stdout.flush() |
99 | 79 |
100 def train(self): | 80 def train(self): |
101 self.pretrain() | 81 self.pretrain(self.dataset) |
102 self.finetune() | 82 self.finetune(self.dataset) |
103 | 83 |
104 def pretrain(self): | 84 def pretrain(self,dataset): |
105 print "STARTING PRETRAINING, time = ", datetime.datetime.now() | 85 print "STARTING PRETRAINING, time = ", datetime.datetime.now() |
106 sys.stdout.flush() | 86 sys.stdout.flush() |
107 | 87 |
108 start_time = time.clock() | 88 start_time = time.clock() |
109 ## Pre-train layer-wise | 89 ## Pre-train layer-wise |
110 for i in xrange(self.classifier.n_layers): | 90 for i in xrange(self.classifier.n_layers): |
111 # go through pretraining epochs | 91 # go through pretraining epochs |
112 for epoch in xrange(self.hp.pretraining_epochs_per_layer): | 92 for epoch in xrange(self.hp.pretraining_epochs_per_layer): |
113 # go through the training set | 93 # go through the training set |
114 for batch_index in xrange(self.n_train_batches): | 94 batch_index=0 |
115 c = self.classifier.pretrain_functions[i](batch_index) | 95 for x,y in dataset.train(self.hp.minibatch_size): |
96 c = self.classifier.pretrain_functions[i](x) | |
116 | 97 |
117 self.series["reconstruction_error"].append((epoch, batch_index), c) | 98 self.series["reconstruction_error"].append((epoch, batch_index), c) |
99 batch_index+=1 | |
100 | |
101 if batch_index % 10000 == 0: | |
102 print "10000 batches" | |
103 | |
104 # useful when doing tests | |
105 if self.max_minibatches and batch_index >= self.max_minibatches: | |
106 break | |
118 | 107 |
119 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c | 108 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c |
120 sys.stdout.flush() | 109 sys.stdout.flush() |
121 | 110 |
122 self.series['params'].append((epoch,), self.classifier.all_params) | 111 self.series['params'].append((epoch,), self.classifier.all_params) |
126 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) | 115 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) |
127 self.hp.update({'pretraining_time': end_time-start_time}) | 116 self.hp.update({'pretraining_time': end_time-start_time}) |
128 | 117 |
129 sys.stdout.flush() | 118 sys.stdout.flush() |
130 | 119 |
131 def finetune(self): | 120 def finetune(self,dataset): |
132 print "STARTING FINETUNING, time = ", datetime.datetime.now() | 121 print "STARTING FINETUNING, time = ", datetime.datetime.now() |
133 | 122 |
134 index = T.lscalar() # index to a [mini]batch | |
135 minibatch_size = self.hp.minibatch_size | 123 minibatch_size = self.hp.minibatch_size |
136 | 124 |
137 # create a function to compute the mistakes that are made by the model | 125 # create a function to compute the mistakes that are made by the model |
138 # on the validation set, or testing set | 126 # on the validation set, or testing set |
139 shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) | 127 test_model = \ |
140 test_model = theano.function([index], self.classifier.errors, | 128 theano.function( |
141 givens = { | 129 [self.classifier.x,self.classifier.y], self.classifier.errors) |
142 self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, | 130 # givens = { |
143 self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) | 131 # self.classifier.x: ensemble_x, |
144 | 132 # self.classifier.y: ensemble_y]}) |
145 validate_model = theano.function([index], self.classifier.errors, | 133 |
146 givens = { | 134 validate_model = \ |
147 self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, | 135 theano.function( |
148 self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) | 136 [self.classifier.x,self.classifier.y], self.classifier.errors) |
137 # givens = { | |
138 # self.classifier.x: , | |
139 # self.classifier.y: ]}) | |
149 | 140 |
150 | 141 |
151 # early-stopping parameters | 142 # early-stopping parameters |
152 patience = 10000 # look as this many examples regardless | 143 patience = 10000 # look as this many examples regardless |
153 patience_increase = 2. # wait this much longer when a new best is | 144 patience_increase = 2. # wait this much longer when a new best is |
154 # found | 145 # found |
155 improvement_threshold = 0.995 # a relative improvement of this much is | 146 improvement_threshold = 0.995 # a relative improvement of this much is |
156 # considered significant | 147 # considered significant |
157 validation_frequency = min(self.n_train_batches, patience/2) | 148 validation_frequency = min(self.mb_per_epoch, patience/2) |
158 # go through this many | 149 # go through this many |
159 # minibatche before checking the network | 150 # minibatche before checking the network |
160 # on the validation set; in this case we | 151 # on the validation set; in this case we |
161 # check every epoch | 152 # check every epoch |
162 | 153 |
166 start_time = time.clock() | 157 start_time = time.clock() |
167 | 158 |
168 done_looping = False | 159 done_looping = False |
169 epoch = 0 | 160 epoch = 0 |
170 | 161 |
162 total_mb_index = 0 | |
163 | |
171 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): | 164 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): |
172 epoch = epoch + 1 | 165 epoch = epoch + 1 |
173 for minibatch_index in xrange(self.n_train_batches): | 166 minibatch_index = -1 |
174 | 167 for x,y in dataset.train(minibatch_size): |
175 cost_ij = self.classifier.finetune(minibatch_index) | 168 minibatch_index += 1 |
176 iter = epoch * self.n_train_batches + minibatch_index | 169 cost_ij = self.classifier.finetune(x,y) |
170 total_mb_index += 1 | |
177 | 171 |
178 self.series["training_error"].append((epoch, minibatch_index), cost_ij) | 172 self.series["training_error"].append((epoch, minibatch_index), cost_ij) |
179 | 173 |
180 if (iter+1) % validation_frequency == 0: | 174 if (total_mb_index+1) % validation_frequency == 0: |
181 | 175 |
182 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] | 176 iter = dataset.valid(minibatch_size) |
177 if self.max_minibatches: | |
178 iter = itermax(iter, self.max_minibatches) | |
179 validation_losses = [validate_model(x,y) for x,y in iter] | |
183 this_validation_loss = numpy.mean(validation_losses) | 180 this_validation_loss = numpy.mean(validation_losses) |
184 | 181 |
185 self.series["validation_error"].\ | 182 self.series["validation_error"].\ |
186 append((epoch, minibatch_index), this_validation_loss*100.) | 183 append((epoch, minibatch_index), this_validation_loss*100.) |
187 | 184 |
194 if this_validation_loss < best_validation_loss: | 191 if this_validation_loss < best_validation_loss: |
195 | 192 |
196 #improve patience if loss improvement is good enough | 193 #improve patience if loss improvement is good enough |
197 if this_validation_loss < best_validation_loss * \ | 194 if this_validation_loss < best_validation_loss * \ |
198 improvement_threshold : | 195 improvement_threshold : |
199 patience = max(patience, iter * patience_increase) | 196 patience = max(patience, total_mb_index * patience_increase) |
200 | 197 |
201 # save best validation score and iteration number | 198 # save best validation score and iteration number |
202 best_validation_loss = this_validation_loss | 199 best_validation_loss = this_validation_loss |
203 best_iter = iter | 200 best_iter = total_mb_index |
204 | 201 |
205 # test it on the test set | 202 # test it on the test set |
206 test_losses = [test_model(i) for i in xrange(self.n_test_batches)] | 203 iter = dataset.test(minibatch_size) |
204 if self.max_minibatches: | |
205 iter = itermax(iter, self.max_minibatches) | |
206 test_losses = [test_model(x,y) for x,y in iter] | |
207 test_score = numpy.mean(test_losses) | 207 test_score = numpy.mean(test_losses) |
208 | 208 |
209 self.series["test_error"].\ | 209 self.series["test_error"].\ |
210 append((epoch, minibatch_index), test_score*100.) | 210 append((epoch, minibatch_index), test_score*100.) |
211 | 211 |
214 (epoch, minibatch_index+1, self.n_train_batches, | 214 (epoch, minibatch_index+1, self.n_train_batches, |
215 test_score*100.)) | 215 test_score*100.)) |
216 | 216 |
217 sys.stdout.flush() | 217 sys.stdout.flush() |
218 | 218 |
219 # useful when doing tests | |
220 if self.max_minibatches and batch_index >= self.max_minibatches: | |
221 break | |
222 | |
219 self.series['params'].append((epoch,), self.classifier.all_params) | 223 self.series['params'].append((epoch,), self.classifier.all_params) |
220 | 224 |
221 if patience <= iter : | 225 if patience <= total_mb_index: |
222 done_looping = True | 226 done_looping = True |
223 break | 227 break |
224 | 228 |
225 end_time = time.clock() | 229 end_time = time.clock() |
226 self.hp.update({'finetuning_time':end_time-start_time,\ | 230 self.hp.update({'finetuning_time':end_time-start_time,\ |