Mercurial > ift6266
comparison deep/stacked_dae/sgd_optimization.py @ 275:7b4507295eba
merge
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Mon, 22 Mar 2010 10:20:10 -0400 |
parents | acb942530923 c8fe09a65039 |
children | 8a3af19ae272 |
comparison
equal
deleted
inserted
replaced
274:44409b6652aa | 275:7b4507295eba |
---|---|
13 from jobman import DD | 13 from jobman import DD |
14 import jobman, jobman.sql | 14 import jobman, jobman.sql |
15 | 15 |
16 from stacked_dae import SdA | 16 from stacked_dae import SdA |
17 | 17 |
18 def shared_dataset(data_xy): | 18 from ift6266.utils.seriestables import * |
19 data_x, data_y = data_xy | 19 |
20 #shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) | 20 default_series = { \ |
21 #shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) | 21 'reconstruction_error' : DummySeries(), |
22 #shared_y = T.cast(shared_y, 'int32') | 22 'training_error' : DummySeries(), |
23 shared_x = theano.shared(data_x) | 23 'validation_error' : DummySeries(), |
24 shared_y = theano.shared(data_y) | 24 'test_error' : DummySeries(), |
25 return shared_x, shared_y | 25 'params' : DummySeries() |
26 | 26 } |
27 class DummyMux(): | 27 |
28 def append(self, param1, param2): | 28 def itermax(iter, max): |
29 pass | 29 for i,it in enumerate(iter): |
30 if i >= max: | |
31 break | |
32 yield it | |
30 | 33 |
31 class SdaSgdOptimizer: | 34 class SdaSgdOptimizer: |
32 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None): | 35 def __init__(self, dataset, hyperparameters, n_ins, n_outs, |
36 examples_per_epoch, series=default_series, max_minibatches=None): | |
33 self.dataset = dataset | 37 self.dataset = dataset |
34 self.hp = hyperparameters | 38 self.hp = hyperparameters |
35 self.n_ins = n_ins | 39 self.n_ins = n_ins |
36 self.n_outs = n_outs | 40 self.n_outs = n_outs |
37 self.input_divider = input_divider | |
38 | 41 |
39 if not series_mux: | 42 self.max_minibatches = max_minibatches |
40 series_mux = DummyMux() | 43 print "SdaSgdOptimizer, max_minibatches =", max_minibatches |
41 print "No series multiplexer set" | 44 |
42 self.series_mux = series_mux | 45 self.ex_per_epoch = examples_per_epoch |
46 self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size | |
47 | |
48 self.series = series | |
43 | 49 |
44 self.rng = numpy.random.RandomState(1234) | 50 self.rng = numpy.random.RandomState(1234) |
45 | 51 |
46 self.init_datasets() | |
47 self.init_classifier() | 52 self.init_classifier() |
48 | 53 |
49 sys.stdout.flush() | 54 sys.stdout.flush() |
50 | |
51 def init_datasets(self): | |
52 print "init_datasets" | |
53 sys.stdout.flush() | |
54 | |
55 train_set, valid_set, test_set = self.dataset | |
56 self.test_set_x, self.test_set_y = shared_dataset(test_set) | |
57 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set) | |
58 self.train_set_x, self.train_set_y = shared_dataset(train_set) | |
59 | |
60 # compute number of minibatches for training, validation and testing | |
61 self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size | |
62 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size | |
63 # remove last batch in case it's incomplete | |
64 self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 | |
65 | 55 |
66 def init_classifier(self): | 56 def init_classifier(self): |
67 print "Constructing classifier" | 57 print "Constructing classifier" |
68 | 58 |
69 # we don't want to save arrays in DD objects, so | 59 # we don't want to save arrays in DD objects, so |
72 layers_sizes = [self.hp.hidden_layers_sizes] * nhl | 62 layers_sizes = [self.hp.hidden_layers_sizes] * nhl |
73 corruption_levels = [self.hp.corruption_levels] * nhl | 63 corruption_levels = [self.hp.corruption_levels] * nhl |
74 | 64 |
75 # construct the stacked denoising autoencoder class | 65 # construct the stacked denoising autoencoder class |
76 self.classifier = SdA( \ | 66 self.classifier = SdA( \ |
77 train_set_x= self.train_set_x, \ | |
78 train_set_y = self.train_set_y,\ | |
79 batch_size = self.hp.minibatch_size, \ | 67 batch_size = self.hp.minibatch_size, \ |
80 n_ins= self.n_ins, \ | 68 n_ins= self.n_ins, \ |
81 hidden_layers_sizes = layers_sizes, \ | 69 hidden_layers_sizes = layers_sizes, \ |
82 n_outs = self.n_outs, \ | 70 n_outs = self.n_outs, \ |
83 corruption_levels = corruption_levels,\ | 71 corruption_levels = corruption_levels,\ |
84 rng = self.rng,\ | 72 rng = self.rng,\ |
85 pretrain_lr = self.hp.pretraining_lr, \ | 73 pretrain_lr = self.hp.pretraining_lr, \ |
86 finetune_lr = self.hp.finetuning_lr,\ | 74 finetune_lr = self.hp.finetuning_lr) |
87 input_divider = self.input_divider ) | |
88 | 75 |
89 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") | 76 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") |
90 | 77 |
91 sys.stdout.flush() | 78 sys.stdout.flush() |
92 | 79 |
93 def train(self): | 80 def train(self): |
94 self.pretrain() | 81 self.pretrain(self.dataset) |
95 self.finetune() | 82 self.finetune(self.dataset) |
96 | 83 |
97 def pretrain(self): | 84 def pretrain(self,dataset): |
98 print "STARTING PRETRAINING, time = ", datetime.datetime.now() | 85 print "STARTING PRETRAINING, time = ", datetime.datetime.now() |
99 sys.stdout.flush() | 86 sys.stdout.flush() |
100 | |
101 #time_acc_func = 0.0 | |
102 #time_acc_total = 0.0 | |
103 | 87 |
104 start_time = time.clock() | 88 start_time = time.clock() |
105 ## Pre-train layer-wise | 89 ## Pre-train layer-wise |
106 for i in xrange(self.classifier.n_layers): | 90 for i in xrange(self.classifier.n_layers): |
107 # go through pretraining epochs | 91 # go through pretraining epochs |
108 for epoch in xrange(self.hp.pretraining_epochs_per_layer): | 92 for epoch in xrange(self.hp.pretraining_epochs_per_layer): |
109 # go through the training set | 93 # go through the training set |
110 for batch_index in xrange(self.n_train_batches): | 94 batch_index=0 |
111 #t1 = time.clock() | 95 for x,y in dataset.train(self.hp.minibatch_size): |
112 c = self.classifier.pretrain_functions[i](batch_index) | 96 c = self.classifier.pretrain_functions[i](x) |
113 #t2 = time.clock() | 97 |
114 | 98 self.series["reconstruction_error"].append((epoch, batch_index), c) |
115 #time_acc_func += t2 - t1 | 99 batch_index+=1 |
116 | 100 |
117 #if batch_index % 500 == 0: | 101 #if batch_index % 100 == 0: |
118 # print "acc / total", time_acc_func / (t2 - start_time), time_acc_func | 102 # print "100 batches" |
119 | 103 |
120 self.series_mux.append("reconstruction_error", c) | 104 # useful when doing tests |
105 if self.max_minibatches and batch_index >= self.max_minibatches: | |
106 break | |
121 | 107 |
122 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c | 108 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c |
123 sys.stdout.flush() | 109 sys.stdout.flush() |
124 | 110 |
125 self.series_mux.append("params", self.classifier.all_params) | 111 self.series['params'].append((epoch,), self.classifier.all_params) |
126 | 112 |
127 end_time = time.clock() | 113 end_time = time.clock() |
128 | 114 |
129 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) | 115 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) |
130 self.hp.update({'pretraining_time': end_time-start_time}) | 116 self.hp.update({'pretraining_time': end_time-start_time}) |
131 | 117 |
132 sys.stdout.flush() | 118 sys.stdout.flush() |
133 | 119 |
134 def finetune(self): | 120 def finetune(self,dataset): |
135 print "STARTING FINETUNING, time = ", datetime.datetime.now() | 121 print "STARTING FINETUNING, time = ", datetime.datetime.now() |
136 | 122 |
137 index = T.lscalar() # index to a [mini]batch | |
138 minibatch_size = self.hp.minibatch_size | 123 minibatch_size = self.hp.minibatch_size |
139 | 124 |
140 # create a function to compute the mistakes that are made by the model | 125 # create a function to compute the mistakes that are made by the model |
141 # on the validation set, or testing set | 126 # on the validation set, or testing set |
142 shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) | 127 test_model = \ |
143 test_model = theano.function([index], self.classifier.errors, | 128 theano.function( |
144 givens = { | 129 [self.classifier.x,self.classifier.y], self.classifier.errors) |
145 self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, | 130 # givens = { |
146 self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) | 131 # self.classifier.x: ensemble_x, |
147 | 132 # self.classifier.y: ensemble_y]}) |
148 validate_model = theano.function([index], self.classifier.errors, | 133 |
149 givens = { | 134 validate_model = \ |
150 self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, | 135 theano.function( |
151 self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) | 136 [self.classifier.x,self.classifier.y], self.classifier.errors) |
137 # givens = { | |
138 # self.classifier.x: , | |
139 # self.classifier.y: ]}) | |
152 | 140 |
153 | 141 |
154 # early-stopping parameters | 142 # early-stopping parameters |
155 patience = 10000 # look as this many examples regardless | 143 patience = 10000 # look as this many examples regardless |
156 patience_increase = 2. # wait this much longer when a new best is | 144 patience_increase = 2. # wait this much longer when a new best is |
157 # found | 145 # found |
158 improvement_threshold = 0.995 # a relative improvement of this much is | 146 improvement_threshold = 0.995 # a relative improvement of this much is |
159 # considered significant | 147 # considered significant |
160 validation_frequency = min(self.n_train_batches, patience/2) | 148 validation_frequency = min(self.mb_per_epoch, patience/2) |
161 # go through this many | 149 # go through this many |
162 # minibatche before checking the network | 150 # minibatche before checking the network |
163 # on the validation set; in this case we | 151 # on the validation set; in this case we |
164 # check every epoch | 152 # check every epoch |
153 if self.max_minibatches and validation_frequency > self.max_minibatches: | |
154 validation_frequency = self.max_minibatches / 2 | |
165 | 155 |
166 best_params = None | 156 best_params = None |
167 best_validation_loss = float('inf') | 157 best_validation_loss = float('inf') |
168 test_score = 0. | 158 test_score = 0. |
169 start_time = time.clock() | 159 start_time = time.clock() |
170 | 160 |
171 done_looping = False | 161 done_looping = False |
172 epoch = 0 | 162 epoch = 0 |
173 | 163 |
164 total_mb_index = 0 | |
165 | |
174 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): | 166 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): |
175 epoch = epoch + 1 | 167 epoch = epoch + 1 |
176 for minibatch_index in xrange(self.n_train_batches): | 168 minibatch_index = -1 |
177 | 169 for x,y in dataset.train(minibatch_size): |
178 cost_ij = self.classifier.finetune(minibatch_index) | 170 minibatch_index += 1 |
179 iter = epoch * self.n_train_batches + minibatch_index | 171 cost_ij = self.classifier.finetune(x,y) |
180 | 172 total_mb_index += 1 |
181 self.series_mux.append("training_error", cost_ij) | 173 |
182 | 174 self.series["training_error"].append((epoch, minibatch_index), cost_ij) |
183 if (iter+1) % validation_frequency == 0: | 175 |
176 if (total_mb_index+1) % validation_frequency == 0: | |
184 | 177 |
185 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] | 178 iter = dataset.valid(minibatch_size) |
179 if self.max_minibatches: | |
180 iter = itermax(iter, self.max_minibatches) | |
181 validation_losses = [validate_model(x,y) for x,y in iter] | |
186 this_validation_loss = numpy.mean(validation_losses) | 182 this_validation_loss = numpy.mean(validation_losses) |
187 self.series_mux.append("validation_error", this_validation_loss) | 183 |
184 self.series["validation_error"].\ | |
185 append((epoch, minibatch_index), this_validation_loss*100.) | |
186 | |
188 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ | 187 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ |
189 (epoch, minibatch_index+1, self.n_train_batches, \ | 188 (epoch, minibatch_index+1, self.mb_per_epoch, \ |
190 this_validation_loss*100.)) | 189 this_validation_loss*100.)) |
191 | 190 |
192 | 191 |
193 # if we got the best validation score until now | 192 # if we got the best validation score until now |
194 if this_validation_loss < best_validation_loss: | 193 if this_validation_loss < best_validation_loss: |
195 | 194 |
196 #improve patience if loss improvement is good enough | 195 #improve patience if loss improvement is good enough |
197 if this_validation_loss < best_validation_loss * \ | 196 if this_validation_loss < best_validation_loss * \ |
198 improvement_threshold : | 197 improvement_threshold : |
199 patience = max(patience, iter * patience_increase) | 198 patience = max(patience, total_mb_index * patience_increase) |
200 | 199 |
201 # save best validation score and iteration number | 200 # save best validation score and iteration number |
202 best_validation_loss = this_validation_loss | 201 best_validation_loss = this_validation_loss |
203 best_iter = iter | 202 best_iter = total_mb_index |
204 | 203 |
205 # test it on the test set | 204 # test it on the test set |
206 test_losses = [test_model(i) for i in xrange(self.n_test_batches)] | 205 iter = dataset.test(minibatch_size) |
206 if self.max_minibatches: | |
207 iter = itermax(iter, self.max_minibatches) | |
208 test_losses = [test_model(x,y) for x,y in iter] | |
207 test_score = numpy.mean(test_losses) | 209 test_score = numpy.mean(test_losses) |
208 self.series_mux.append("test_error", test_score) | 210 |
211 self.series["test_error"].\ | |
212 append((epoch, minibatch_index), test_score*100.) | |
213 | |
209 print((' epoch %i, minibatch %i/%i, test error of best ' | 214 print((' epoch %i, minibatch %i/%i, test error of best ' |
210 'model %f %%') % | 215 'model %f %%') % |
211 (epoch, minibatch_index+1, self.n_train_batches, | 216 (epoch, minibatch_index+1, self.mb_per_epoch, |
212 test_score*100.)) | 217 test_score*100.)) |
213 | 218 |
214 sys.stdout.flush() | 219 sys.stdout.flush() |
215 | 220 |
216 self.series_mux.append("params", self.classifier.all_params) | 221 # useful when doing tests |
217 | 222 if self.max_minibatches and minibatch_index >= self.max_minibatches: |
218 if patience <= iter : | 223 break |
224 | |
225 self.series['params'].append((epoch,), self.classifier.all_params) | |
226 | |
227 if patience <= total_mb_index: | |
219 done_looping = True | 228 done_looping = True |
220 break | 229 break |
221 | 230 |
222 end_time = time.clock() | 231 end_time = time.clock() |
223 self.hp.update({'finetuning_time':end_time-start_time,\ | 232 self.hp.update({'finetuning_time':end_time-start_time,\ |