comparison deep/stacked_dae/sgd_optimization.py @ 185:b9ea8e2d071a

Enlevé ce qui concernait la réutilisation de résultats de préentraînement (trop compliqué pour peu de bénéfice: c'est le finetuning qui est vraiment long
author fsavard
date Fri, 26 Feb 2010 17:45:52 -0500
parents 1f5937e9e530
children d364a130b221
comparison
equal deleted inserted replaced
172:4d3d3627df3e 185:b9ea8e2d071a
5 5
6 import numpy 6 import numpy
7 import theano 7 import theano
8 import time 8 import time
9 import theano.tensor as T 9 import theano.tensor as T
10 import copy
11 import sys 10 import sys
12 11
13 from jobman import DD 12 from jobman import DD
14 import jobman, jobman.sql 13 import jobman, jobman.sql
15 14
22 #shared_y = T.cast(shared_y, 'int32') 21 #shared_y = T.cast(shared_y, 'int32')
23 shared_x = theano.shared(data_x) 22 shared_x = theano.shared(data_x)
24 shared_y = theano.shared(data_y) 23 shared_y = theano.shared(data_y)
25 return shared_x, shared_y 24 return shared_x, shared_y
26 25
26 class DummyMux():
27 def append(self, param1, param2):
28 pass
29
27 class SdaSgdOptimizer: 30 class SdaSgdOptimizer:
28 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0,\ 31 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None):
29 job_tree=False, results_db=None,\
30 experiment="",\
31 num_hidden_layers_to_try=[1,2,3], \
32 finetuning_lr_to_try=[0.1, 0.01, 0.001, 0.0001, 0.00001]):
33
34 self.dataset = dataset 32 self.dataset = dataset
35 self.hp = copy.copy(hyperparameters) 33 self.hp = hyperparameters
36 self.n_ins = n_ins 34 self.n_ins = n_ins
37 self.n_outs = n_outs 35 self.n_outs = n_outs
38 self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX) 36 self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
39 37
40 self.job_tree = job_tree 38 if not series_mux:
41 self.results_db = results_db 39 series_mux = DummyMux()
42 self.experiment = experiment 40 print "No series multiplexer set"
43 if self.job_tree: 41 self.series_mux = series_mux
44 assert(not results_db is None)
45 # these hp should not be there, so we insert default values
46 # we use 3 hidden layers as we'll iterate through 1,2,3
47 self.hp.finetuning_lr = 0.1 # dummy value, will be replaced anyway
48 cl = self.hp.corruption_levels
49 nh = self.hp.hidden_layers_sizes
50 self.hp.corruption_levels = [cl,cl,cl]
51 self.hp.hidden_layers_sizes = [nh,nh,nh]
52
53 self.num_hidden_layers_to_try = num_hidden_layers_to_try
54 self.finetuning_lr_to_try = finetuning_lr_to_try
55
56 self.printout_frequency = 1000
57 42
58 self.rng = numpy.random.RandomState(1234) 43 self.rng = numpy.random.RandomState(1234)
59 44
60 self.init_datasets() 45 self.init_datasets()
61 self.init_classifier() 46 self.init_classifier()
47
48 sys.stdout.flush()
62 49
63 def init_datasets(self): 50 def init_datasets(self):
64 print "init_datasets" 51 print "init_datasets"
52 sys.stdout.flush()
53
65 train_set, valid_set, test_set = self.dataset 54 train_set, valid_set, test_set = self.dataset
66 self.test_set_x, self.test_set_y = shared_dataset(test_set) 55 self.test_set_x, self.test_set_y = shared_dataset(test_set)
67 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set) 56 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
68 self.train_set_x, self.train_set_y = shared_dataset(train_set) 57 self.train_set_x, self.train_set_y = shared_dataset(train_set)
69 58
72 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size 61 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
73 self.n_test_batches = self.test_set_x.value.shape[0] / self.hp.minibatch_size 62 self.n_test_batches = self.test_set_x.value.shape[0] / self.hp.minibatch_size
74 63
75 def init_classifier(self): 64 def init_classifier(self):
76 print "Constructing classifier" 65 print "Constructing classifier"
66
77 # construct the stacked denoising autoencoder class 67 # construct the stacked denoising autoencoder class
78 self.classifier = SdA( \ 68 self.classifier = SdA( \
79 train_set_x= self.train_set_x, \ 69 train_set_x= self.train_set_x, \
80 train_set_y = self.train_set_y,\ 70 train_set_y = self.train_set_y,\
81 batch_size = self.hp.minibatch_size, \ 71 batch_size = self.hp.minibatch_size, \
86 rng = self.rng,\ 76 rng = self.rng,\
87 pretrain_lr = self.hp.pretraining_lr, \ 77 pretrain_lr = self.hp.pretraining_lr, \
88 finetune_lr = self.hp.finetuning_lr,\ 78 finetune_lr = self.hp.finetuning_lr,\
89 input_divider = self.input_divider ) 79 input_divider = self.input_divider )
90 80
81 sys.stdout.flush()
82
91 def train(self): 83 def train(self):
92 self.pretrain() 84 self.pretrain()
93 if not self.job_tree: 85 self.finetune()
94 # if job_tree is True, finetuning was already performed
95 self.finetune()
96 86
97 def pretrain(self): 87 def pretrain(self):
98 print "STARTING PRETRAINING" 88 print "STARTING PRETRAINING"
99 89 sys.stdout.flush()
100 printout_acc = 0.0
101 last_error = 0.0
102 90
103 start_time = time.clock() 91 start_time = time.clock()
104 ## Pre-train layer-wise 92 ## Pre-train layer-wise
105 for i in xrange(self.classifier.n_layers): 93 for i in xrange(self.classifier.n_layers):
106 # go through pretraining epochs 94 # go through pretraining epochs
107 for epoch in xrange(self.hp.pretraining_epochs_per_layer): 95 for epoch in xrange(self.hp.pretraining_epochs_per_layer):
108 # go through the training set 96 # go through the training set
109 for batch_index in xrange(self.n_train_batches): 97 for batch_index in xrange(self.n_train_batches):
110 c = self.classifier.pretrain_functions[i](batch_index) 98 c = self.classifier.pretrain_functions[i](batch_index)
111 99
112 printout_acc += c / self.printout_frequency 100 self.series_mux.append("reconstruction_error", c)
113 if (batch_index+1) % self.printout_frequency == 0:
114 print batch_index, "reconstruction cost avg=", printout_acc
115 last_error = printout_acc
116 printout_acc = 0.0
117 101
118 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c 102 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
119 103 sys.stdout.flush()
120 self.job_splitter(i+1, time.clock()-start_time, last_error)
121 104
122 end_time = time.clock() 105 end_time = time.clock()
123 106
124 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) 107 print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
125 108 self.hp.update({'pretraining_time': end_time-start_time})
126 # Save time by reusing intermediate results 109
127 def job_splitter(self, current_pretraining_layer, pretraining_time, last_error): 110 sys.stdout.flush()
128
129 state_copy = None
130 original_classifier = None
131
132 if self.job_tree and current_pretraining_layer in self.num_hidden_layers_to_try:
133 for lr in self.finetuning_lr_to_try:
134 sys.stdout.flush()
135 sys.stderr.flush()
136
137 state_copy = copy.copy(self.hp)
138
139 self.hp.update({'num_hidden_layers':current_pretraining_layer, \
140 'finetuning_lr':lr,\
141 'pretraining_time':pretraining_time,\
142 'last_reconstruction_error':last_error})
143
144 original_classifier = self.classifier
145 print "ORIGINAL CLASSIFIER MEANS",original_classifier.get_params_means()
146 self.classifier = SdA.copy_reusing_lower_layers(original_classifier, current_pretraining_layer, new_finetuning_lr=lr)
147
148 self.finetune()
149
150 self.insert_finished_job()
151
152 print "NEW CLASSIFIER MEANS AFTERWARDS",self.classifier.get_params_means()
153 print "ORIGINAL CLASSIFIER MEANS AFTERWARDS",original_classifier.get_params_means()
154 self.classifier = original_classifier
155 self.hp = state_copy
156
157 def insert_finished_job(self):
158 job = copy.copy(self.hp)
159 job[jobman.sql.STATUS] = jobman.sql.DONE
160 job[jobman.sql.EXPERIMENT] = self.experiment
161
162 # don,t try to store arrays in db
163 job['hidden_layers_sizes'] = job.hidden_layers_sizes[0]
164 job['corruption_levels'] = job.corruption_levels[0]
165
166 print "Will insert finished job", job
167 jobman.sql.insert_dict(jobman.flatten(job), self.results_db)
168 111
169 def finetune(self): 112 def finetune(self):
170 print "STARTING FINETUNING" 113 print "STARTING FINETUNING"
171 114
172 index = T.lscalar() # index to a [mini]batch 115 index = T.lscalar() # index to a [mini]batch
203 start_time = time.clock() 146 start_time = time.clock()
204 147
205 done_looping = False 148 done_looping = False
206 epoch = 0 149 epoch = 0
207 150
208 printout_acc = 0.0
209
210 if not self.hp.has_key('max_finetuning_epochs'):
211 self.hp.max_finetuning_epochs = 1000
212
213 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): 151 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
214 epoch = epoch + 1 152 epoch = epoch + 1
215 for minibatch_index in xrange(self.n_train_batches): 153 for minibatch_index in xrange(self.n_train_batches):
216 154
217 cost_ij = self.classifier.finetune(minibatch_index) 155 cost_ij = self.classifier.finetune(minibatch_index)
218 iter = epoch * self.n_train_batches + minibatch_index 156 iter = epoch * self.n_train_batches + minibatch_index
219 157
220 printout_acc += cost_ij / float(self.printout_frequency * minibatch_size) 158 self.series_mux.append("training_error", cost_ij)
221 if (iter+1) % self.printout_frequency == 0:
222 print iter, "cost avg=", printout_acc
223 printout_acc = 0.0
224 159
225 if (iter+1) % validation_frequency == 0: 160 if (iter+1) % validation_frequency == 0:
226 161
227 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] 162 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
228 this_validation_loss = numpy.mean(validation_losses) 163 this_validation_loss = numpy.mean(validation_losses)
249 print((' epoch %i, minibatch %i/%i, test error of best ' 184 print((' epoch %i, minibatch %i/%i, test error of best '
250 'model %f %%') % 185 'model %f %%') %
251 (epoch, minibatch_index+1, self.n_train_batches, 186 (epoch, minibatch_index+1, self.n_train_batches,
252 test_score*100.)) 187 test_score*100.))
253 188
189 sys.stdout.flush()
190
191 self.series_mux.append("params", self.classifier.params)
254 192
255 if patience <= iter : 193 if patience <= iter :
256 done_looping = True 194 done_looping = True
257 break 195 break
258 196
259 end_time = time.clock() 197 end_time = time.clock()
260 self.hp.update({'finetuning_time':end_time-start_time,\ 198 self.hp.update({'finetuning_time':end_time-start_time,\
261 'best_validation_error':best_validation_loss,\ 199 'best_validation_error':best_validation_loss,\
262 'test_score':test_score, 200 'test_score':test_score,
263 'num_finetuning_epochs':epoch}) 201 'num_finetuning_epochs':epoch})
202
264 print(('Optimization complete with best validation score of %f %%,' 203 print(('Optimization complete with best validation score of %f %%,'
265 'with test performance %f %%') % 204 'with test performance %f %%') %
266 (best_validation_loss * 100., test_score*100.)) 205 (best_validation_loss * 100., test_score*100.))
267 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) 206 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
268 207