Mercurial > ift6266
comparison deep/stacked_dae/sgd_optimization.py @ 185:b9ea8e2d071a
Enlevé ce qui concernait la réutilisation de résultats de préentraînement (trop compliqué pour peu de bénéfice: c'est le finetuning qui est vraiment long
author | fsavard |
---|---|
date | Fri, 26 Feb 2010 17:45:52 -0500 |
parents | 1f5937e9e530 |
children | d364a130b221 |
comparison
equal
deleted
inserted
replaced
172:4d3d3627df3e | 185:b9ea8e2d071a |
---|---|
5 | 5 |
6 import numpy | 6 import numpy |
7 import theano | 7 import theano |
8 import time | 8 import time |
9 import theano.tensor as T | 9 import theano.tensor as T |
10 import copy | |
11 import sys | 10 import sys |
12 | 11 |
13 from jobman import DD | 12 from jobman import DD |
14 import jobman, jobman.sql | 13 import jobman, jobman.sql |
15 | 14 |
22 #shared_y = T.cast(shared_y, 'int32') | 21 #shared_y = T.cast(shared_y, 'int32') |
23 shared_x = theano.shared(data_x) | 22 shared_x = theano.shared(data_x) |
24 shared_y = theano.shared(data_y) | 23 shared_y = theano.shared(data_y) |
25 return shared_x, shared_y | 24 return shared_x, shared_y |
26 | 25 |
26 class DummyMux(): | |
27 def append(self, param1, param2): | |
28 pass | |
29 | |
27 class SdaSgdOptimizer: | 30 class SdaSgdOptimizer: |
28 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0,\ | 31 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None): |
29 job_tree=False, results_db=None,\ | |
30 experiment="",\ | |
31 num_hidden_layers_to_try=[1,2,3], \ | |
32 finetuning_lr_to_try=[0.1, 0.01, 0.001, 0.0001, 0.00001]): | |
33 | |
34 self.dataset = dataset | 32 self.dataset = dataset |
35 self.hp = copy.copy(hyperparameters) | 33 self.hp = hyperparameters |
36 self.n_ins = n_ins | 34 self.n_ins = n_ins |
37 self.n_outs = n_outs | 35 self.n_outs = n_outs |
38 self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX) | 36 self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX) |
39 | 37 |
40 self.job_tree = job_tree | 38 if not series_mux: |
41 self.results_db = results_db | 39 series_mux = DummyMux() |
42 self.experiment = experiment | 40 print "No series multiplexer set" |
43 if self.job_tree: | 41 self.series_mux = series_mux |
44 assert(not results_db is None) | |
45 # these hp should not be there, so we insert default values | |
46 # we use 3 hidden layers as we'll iterate through 1,2,3 | |
47 self.hp.finetuning_lr = 0.1 # dummy value, will be replaced anyway | |
48 cl = self.hp.corruption_levels | |
49 nh = self.hp.hidden_layers_sizes | |
50 self.hp.corruption_levels = [cl,cl,cl] | |
51 self.hp.hidden_layers_sizes = [nh,nh,nh] | |
52 | |
53 self.num_hidden_layers_to_try = num_hidden_layers_to_try | |
54 self.finetuning_lr_to_try = finetuning_lr_to_try | |
55 | |
56 self.printout_frequency = 1000 | |
57 | 42 |
58 self.rng = numpy.random.RandomState(1234) | 43 self.rng = numpy.random.RandomState(1234) |
59 | 44 |
60 self.init_datasets() | 45 self.init_datasets() |
61 self.init_classifier() | 46 self.init_classifier() |
47 | |
48 sys.stdout.flush() | |
62 | 49 |
63 def init_datasets(self): | 50 def init_datasets(self): |
64 print "init_datasets" | 51 print "init_datasets" |
52 sys.stdout.flush() | |
53 | |
65 train_set, valid_set, test_set = self.dataset | 54 train_set, valid_set, test_set = self.dataset |
66 self.test_set_x, self.test_set_y = shared_dataset(test_set) | 55 self.test_set_x, self.test_set_y = shared_dataset(test_set) |
67 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set) | 56 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set) |
68 self.train_set_x, self.train_set_y = shared_dataset(train_set) | 57 self.train_set_x, self.train_set_y = shared_dataset(train_set) |
69 | 58 |
72 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size | 61 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size |
73 self.n_test_batches = self.test_set_x.value.shape[0] / self.hp.minibatch_size | 62 self.n_test_batches = self.test_set_x.value.shape[0] / self.hp.minibatch_size |
74 | 63 |
75 def init_classifier(self): | 64 def init_classifier(self): |
76 print "Constructing classifier" | 65 print "Constructing classifier" |
66 | |
77 # construct the stacked denoising autoencoder class | 67 # construct the stacked denoising autoencoder class |
78 self.classifier = SdA( \ | 68 self.classifier = SdA( \ |
79 train_set_x= self.train_set_x, \ | 69 train_set_x= self.train_set_x, \ |
80 train_set_y = self.train_set_y,\ | 70 train_set_y = self.train_set_y,\ |
81 batch_size = self.hp.minibatch_size, \ | 71 batch_size = self.hp.minibatch_size, \ |
86 rng = self.rng,\ | 76 rng = self.rng,\ |
87 pretrain_lr = self.hp.pretraining_lr, \ | 77 pretrain_lr = self.hp.pretraining_lr, \ |
88 finetune_lr = self.hp.finetuning_lr,\ | 78 finetune_lr = self.hp.finetuning_lr,\ |
89 input_divider = self.input_divider ) | 79 input_divider = self.input_divider ) |
90 | 80 |
81 sys.stdout.flush() | |
82 | |
91 def train(self): | 83 def train(self): |
92 self.pretrain() | 84 self.pretrain() |
93 if not self.job_tree: | 85 self.finetune() |
94 # if job_tree is True, finetuning was already performed | |
95 self.finetune() | |
96 | 86 |
97 def pretrain(self): | 87 def pretrain(self): |
98 print "STARTING PRETRAINING" | 88 print "STARTING PRETRAINING" |
99 | 89 sys.stdout.flush() |
100 printout_acc = 0.0 | |
101 last_error = 0.0 | |
102 | 90 |
103 start_time = time.clock() | 91 start_time = time.clock() |
104 ## Pre-train layer-wise | 92 ## Pre-train layer-wise |
105 for i in xrange(self.classifier.n_layers): | 93 for i in xrange(self.classifier.n_layers): |
106 # go through pretraining epochs | 94 # go through pretraining epochs |
107 for epoch in xrange(self.hp.pretraining_epochs_per_layer): | 95 for epoch in xrange(self.hp.pretraining_epochs_per_layer): |
108 # go through the training set | 96 # go through the training set |
109 for batch_index in xrange(self.n_train_batches): | 97 for batch_index in xrange(self.n_train_batches): |
110 c = self.classifier.pretrain_functions[i](batch_index) | 98 c = self.classifier.pretrain_functions[i](batch_index) |
111 | 99 |
112 printout_acc += c / self.printout_frequency | 100 self.series_mux.append("reconstruction_error", c) |
113 if (batch_index+1) % self.printout_frequency == 0: | |
114 print batch_index, "reconstruction cost avg=", printout_acc | |
115 last_error = printout_acc | |
116 printout_acc = 0.0 | |
117 | 101 |
118 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c | 102 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c |
119 | 103 sys.stdout.flush() |
120 self.job_splitter(i+1, time.clock()-start_time, last_error) | |
121 | 104 |
122 end_time = time.clock() | 105 end_time = time.clock() |
123 | 106 |
124 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) | 107 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) |
125 | 108 self.hp.update({'pretraining_time': end_time-start_time}) |
126 # Save time by reusing intermediate results | 109 |
127 def job_splitter(self, current_pretraining_layer, pretraining_time, last_error): | 110 sys.stdout.flush() |
128 | |
129 state_copy = None | |
130 original_classifier = None | |
131 | |
132 if self.job_tree and current_pretraining_layer in self.num_hidden_layers_to_try: | |
133 for lr in self.finetuning_lr_to_try: | |
134 sys.stdout.flush() | |
135 sys.stderr.flush() | |
136 | |
137 state_copy = copy.copy(self.hp) | |
138 | |
139 self.hp.update({'num_hidden_layers':current_pretraining_layer, \ | |
140 'finetuning_lr':lr,\ | |
141 'pretraining_time':pretraining_time,\ | |
142 'last_reconstruction_error':last_error}) | |
143 | |
144 original_classifier = self.classifier | |
145 print "ORIGINAL CLASSIFIER MEANS",original_classifier.get_params_means() | |
146 self.classifier = SdA.copy_reusing_lower_layers(original_classifier, current_pretraining_layer, new_finetuning_lr=lr) | |
147 | |
148 self.finetune() | |
149 | |
150 self.insert_finished_job() | |
151 | |
152 print "NEW CLASSIFIER MEANS AFTERWARDS",self.classifier.get_params_means() | |
153 print "ORIGINAL CLASSIFIER MEANS AFTERWARDS",original_classifier.get_params_means() | |
154 self.classifier = original_classifier | |
155 self.hp = state_copy | |
156 | |
157 def insert_finished_job(self): | |
158 job = copy.copy(self.hp) | |
159 job[jobman.sql.STATUS] = jobman.sql.DONE | |
160 job[jobman.sql.EXPERIMENT] = self.experiment | |
161 | |
162 # don,t try to store arrays in db | |
163 job['hidden_layers_sizes'] = job.hidden_layers_sizes[0] | |
164 job['corruption_levels'] = job.corruption_levels[0] | |
165 | |
166 print "Will insert finished job", job | |
167 jobman.sql.insert_dict(jobman.flatten(job), self.results_db) | |
168 | 111 |
169 def finetune(self): | 112 def finetune(self): |
170 print "STARTING FINETUNING" | 113 print "STARTING FINETUNING" |
171 | 114 |
172 index = T.lscalar() # index to a [mini]batch | 115 index = T.lscalar() # index to a [mini]batch |
203 start_time = time.clock() | 146 start_time = time.clock() |
204 | 147 |
205 done_looping = False | 148 done_looping = False |
206 epoch = 0 | 149 epoch = 0 |
207 | 150 |
208 printout_acc = 0.0 | |
209 | |
210 if not self.hp.has_key('max_finetuning_epochs'): | |
211 self.hp.max_finetuning_epochs = 1000 | |
212 | |
213 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): | 151 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): |
214 epoch = epoch + 1 | 152 epoch = epoch + 1 |
215 for minibatch_index in xrange(self.n_train_batches): | 153 for minibatch_index in xrange(self.n_train_batches): |
216 | 154 |
217 cost_ij = self.classifier.finetune(minibatch_index) | 155 cost_ij = self.classifier.finetune(minibatch_index) |
218 iter = epoch * self.n_train_batches + minibatch_index | 156 iter = epoch * self.n_train_batches + minibatch_index |
219 | 157 |
220 printout_acc += cost_ij / float(self.printout_frequency * minibatch_size) | 158 self.series_mux.append("training_error", cost_ij) |
221 if (iter+1) % self.printout_frequency == 0: | |
222 print iter, "cost avg=", printout_acc | |
223 printout_acc = 0.0 | |
224 | 159 |
225 if (iter+1) % validation_frequency == 0: | 160 if (iter+1) % validation_frequency == 0: |
226 | 161 |
227 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] | 162 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] |
228 this_validation_loss = numpy.mean(validation_losses) | 163 this_validation_loss = numpy.mean(validation_losses) |
249 print((' epoch %i, minibatch %i/%i, test error of best ' | 184 print((' epoch %i, minibatch %i/%i, test error of best ' |
250 'model %f %%') % | 185 'model %f %%') % |
251 (epoch, minibatch_index+1, self.n_train_batches, | 186 (epoch, minibatch_index+1, self.n_train_batches, |
252 test_score*100.)) | 187 test_score*100.)) |
253 | 188 |
189 sys.stdout.flush() | |
190 | |
191 self.series_mux.append("params", self.classifier.params) | |
254 | 192 |
255 if patience <= iter : | 193 if patience <= iter : |
256 done_looping = True | 194 done_looping = True |
257 break | 195 break |
258 | 196 |
259 end_time = time.clock() | 197 end_time = time.clock() |
260 self.hp.update({'finetuning_time':end_time-start_time,\ | 198 self.hp.update({'finetuning_time':end_time-start_time,\ |
261 'best_validation_error':best_validation_loss,\ | 199 'best_validation_error':best_validation_loss,\ |
262 'test_score':test_score, | 200 'test_score':test_score, |
263 'num_finetuning_epochs':epoch}) | 201 'num_finetuning_epochs':epoch}) |
202 | |
264 print(('Optimization complete with best validation score of %f %%,' | 203 print(('Optimization complete with best validation score of %f %%,' |
265 'with test performance %f %%') % | 204 'with test performance %f %%') % |
266 (best_validation_loss * 100., test_score*100.)) | 205 (best_validation_loss * 100., test_score*100.)) |
267 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) | 206 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) |
268 | 207 |