comparison deep/stacked_dae/sgd_optimization.py @ 275:7b4507295eba

merge
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Mon, 22 Mar 2010 10:20:10 -0400
parents acb942530923 c8fe09a65039
children 8a3af19ae272
comparison
equal deleted inserted replaced
274:44409b6652aa 275:7b4507295eba
13 from jobman import DD 13 from jobman import DD
14 import jobman, jobman.sql 14 import jobman, jobman.sql
15 15
16 from stacked_dae import SdA 16 from stacked_dae import SdA
17 17
18 def shared_dataset(data_xy): 18 from ift6266.utils.seriestables import *
19 data_x, data_y = data_xy 19
20 #shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) 20 default_series = { \
21 #shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) 21 'reconstruction_error' : DummySeries(),
22 #shared_y = T.cast(shared_y, 'int32') 22 'training_error' : DummySeries(),
23 shared_x = theano.shared(data_x) 23 'validation_error' : DummySeries(),
24 shared_y = theano.shared(data_y) 24 'test_error' : DummySeries(),
25 return shared_x, shared_y 25 'params' : DummySeries()
26 26 }
27 class DummyMux(): 27
28 def append(self, param1, param2): 28 def itermax(iter, max):
29 pass 29 for i,it in enumerate(iter):
30 if i >= max:
31 break
32 yield it
30 33
31 class SdaSgdOptimizer: 34 class SdaSgdOptimizer:
32 def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None): 35 def __init__(self, dataset, hyperparameters, n_ins, n_outs,
36 examples_per_epoch, series=default_series, max_minibatches=None):
33 self.dataset = dataset 37 self.dataset = dataset
34 self.hp = hyperparameters 38 self.hp = hyperparameters
35 self.n_ins = n_ins 39 self.n_ins = n_ins
36 self.n_outs = n_outs 40 self.n_outs = n_outs
37 self.input_divider = input_divider
38 41
39 if not series_mux: 42 self.max_minibatches = max_minibatches
40 series_mux = DummyMux() 43 print "SdaSgdOptimizer, max_minibatches =", max_minibatches
41 print "No series multiplexer set" 44
42 self.series_mux = series_mux 45 self.ex_per_epoch = examples_per_epoch
46 self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
47
48 self.series = series
43 49
44 self.rng = numpy.random.RandomState(1234) 50 self.rng = numpy.random.RandomState(1234)
45 51
46 self.init_datasets()
47 self.init_classifier() 52 self.init_classifier()
48 53
49 sys.stdout.flush() 54 sys.stdout.flush()
50
51 def init_datasets(self):
52 print "init_datasets"
53 sys.stdout.flush()
54
55 train_set, valid_set, test_set = self.dataset
56 self.test_set_x, self.test_set_y = shared_dataset(test_set)
57 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
58 self.train_set_x, self.train_set_y = shared_dataset(train_set)
59
60 # compute number of minibatches for training, validation and testing
61 self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
62 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
63 # remove last batch in case it's incomplete
64 self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1
65 55
66 def init_classifier(self): 56 def init_classifier(self):
67 print "Constructing classifier" 57 print "Constructing classifier"
68 58
69 # we don't want to save arrays in DD objects, so 59 # we don't want to save arrays in DD objects, so
72 layers_sizes = [self.hp.hidden_layers_sizes] * nhl 62 layers_sizes = [self.hp.hidden_layers_sizes] * nhl
73 corruption_levels = [self.hp.corruption_levels] * nhl 63 corruption_levels = [self.hp.corruption_levels] * nhl
74 64
75 # construct the stacked denoising autoencoder class 65 # construct the stacked denoising autoencoder class
76 self.classifier = SdA( \ 66 self.classifier = SdA( \
77 train_set_x= self.train_set_x, \
78 train_set_y = self.train_set_y,\
79 batch_size = self.hp.minibatch_size, \ 67 batch_size = self.hp.minibatch_size, \
80 n_ins= self.n_ins, \ 68 n_ins= self.n_ins, \
81 hidden_layers_sizes = layers_sizes, \ 69 hidden_layers_sizes = layers_sizes, \
82 n_outs = self.n_outs, \ 70 n_outs = self.n_outs, \
83 corruption_levels = corruption_levels,\ 71 corruption_levels = corruption_levels,\
84 rng = self.rng,\ 72 rng = self.rng,\
85 pretrain_lr = self.hp.pretraining_lr, \ 73 pretrain_lr = self.hp.pretraining_lr, \
86 finetune_lr = self.hp.finetuning_lr,\ 74 finetune_lr = self.hp.finetuning_lr)
87 input_divider = self.input_divider )
88 75
89 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") 76 #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
90 77
91 sys.stdout.flush() 78 sys.stdout.flush()
92 79
93 def train(self): 80 def train(self):
94 self.pretrain() 81 self.pretrain(self.dataset)
95 self.finetune() 82 self.finetune(self.dataset)
96 83
97 def pretrain(self): 84 def pretrain(self,dataset):
98 print "STARTING PRETRAINING, time = ", datetime.datetime.now() 85 print "STARTING PRETRAINING, time = ", datetime.datetime.now()
99 sys.stdout.flush() 86 sys.stdout.flush()
100
101 #time_acc_func = 0.0
102 #time_acc_total = 0.0
103 87
104 start_time = time.clock() 88 start_time = time.clock()
105 ## Pre-train layer-wise 89 ## Pre-train layer-wise
106 for i in xrange(self.classifier.n_layers): 90 for i in xrange(self.classifier.n_layers):
107 # go through pretraining epochs 91 # go through pretraining epochs
108 for epoch in xrange(self.hp.pretraining_epochs_per_layer): 92 for epoch in xrange(self.hp.pretraining_epochs_per_layer):
109 # go through the training set 93 # go through the training set
110 for batch_index in xrange(self.n_train_batches): 94 batch_index=0
111 #t1 = time.clock() 95 for x,y in dataset.train(self.hp.minibatch_size):
112 c = self.classifier.pretrain_functions[i](batch_index) 96 c = self.classifier.pretrain_functions[i](x)
113 #t2 = time.clock() 97
114 98 self.series["reconstruction_error"].append((epoch, batch_index), c)
115 #time_acc_func += t2 - t1 99 batch_index+=1
116 100
117 #if batch_index % 500 == 0: 101 #if batch_index % 100 == 0:
118 # print "acc / total", time_acc_func / (t2 - start_time), time_acc_func 102 # print "100 batches"
119 103
120 self.series_mux.append("reconstruction_error", c) 104 # useful when doing tests
105 if self.max_minibatches and batch_index >= self.max_minibatches:
106 break
121 107
122 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c 108 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
123 sys.stdout.flush() 109 sys.stdout.flush()
124 110
125 self.series_mux.append("params", self.classifier.all_params) 111 self.series['params'].append((epoch,), self.classifier.all_params)
126 112
127 end_time = time.clock() 113 end_time = time.clock()
128 114
129 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) 115 print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
130 self.hp.update({'pretraining_time': end_time-start_time}) 116 self.hp.update({'pretraining_time': end_time-start_time})
131 117
132 sys.stdout.flush() 118 sys.stdout.flush()
133 119
134 def finetune(self): 120 def finetune(self,dataset):
135 print "STARTING FINETUNING, time = ", datetime.datetime.now() 121 print "STARTING FINETUNING, time = ", datetime.datetime.now()
136 122
137 index = T.lscalar() # index to a [mini]batch
138 minibatch_size = self.hp.minibatch_size 123 minibatch_size = self.hp.minibatch_size
139 124
140 # create a function to compute the mistakes that are made by the model 125 # create a function to compute the mistakes that are made by the model
141 # on the validation set, or testing set 126 # on the validation set, or testing set
142 shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) 127 test_model = \
143 test_model = theano.function([index], self.classifier.errors, 128 theano.function(
144 givens = { 129 [self.classifier.x,self.classifier.y], self.classifier.errors)
145 self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, 130 # givens = {
146 self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) 131 # self.classifier.x: ensemble_x,
147 132 # self.classifier.y: ensemble_y]})
148 validate_model = theano.function([index], self.classifier.errors, 133
149 givens = { 134 validate_model = \
150 self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, 135 theano.function(
151 self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) 136 [self.classifier.x,self.classifier.y], self.classifier.errors)
137 # givens = {
138 # self.classifier.x: ,
139 # self.classifier.y: ]})
152 140
153 141
154 # early-stopping parameters 142 # early-stopping parameters
155 patience = 10000 # look as this many examples regardless 143 patience = 10000 # look as this many examples regardless
156 patience_increase = 2. # wait this much longer when a new best is 144 patience_increase = 2. # wait this much longer when a new best is
157 # found 145 # found
158 improvement_threshold = 0.995 # a relative improvement of this much is 146 improvement_threshold = 0.995 # a relative improvement of this much is
159 # considered significant 147 # considered significant
160 validation_frequency = min(self.n_train_batches, patience/2) 148 validation_frequency = min(self.mb_per_epoch, patience/2)
161 # go through this many 149 # go through this many
162 # minibatche before checking the network 150 # minibatche before checking the network
163 # on the validation set; in this case we 151 # on the validation set; in this case we
164 # check every epoch 152 # check every epoch
153 if self.max_minibatches and validation_frequency > self.max_minibatches:
154 validation_frequency = self.max_minibatches / 2
165 155
166 best_params = None 156 best_params = None
167 best_validation_loss = float('inf') 157 best_validation_loss = float('inf')
168 test_score = 0. 158 test_score = 0.
169 start_time = time.clock() 159 start_time = time.clock()
170 160
171 done_looping = False 161 done_looping = False
172 epoch = 0 162 epoch = 0
173 163
164 total_mb_index = 0
165
174 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): 166 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
175 epoch = epoch + 1 167 epoch = epoch + 1
176 for minibatch_index in xrange(self.n_train_batches): 168 minibatch_index = -1
177 169 for x,y in dataset.train(minibatch_size):
178 cost_ij = self.classifier.finetune(minibatch_index) 170 minibatch_index += 1
179 iter = epoch * self.n_train_batches + minibatch_index 171 cost_ij = self.classifier.finetune(x,y)
180 172 total_mb_index += 1
181 self.series_mux.append("training_error", cost_ij) 173
182 174 self.series["training_error"].append((epoch, minibatch_index), cost_ij)
183 if (iter+1) % validation_frequency == 0: 175
176 if (total_mb_index+1) % validation_frequency == 0:
184 177
185 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] 178 iter = dataset.valid(minibatch_size)
179 if self.max_minibatches:
180 iter = itermax(iter, self.max_minibatches)
181 validation_losses = [validate_model(x,y) for x,y in iter]
186 this_validation_loss = numpy.mean(validation_losses) 182 this_validation_loss = numpy.mean(validation_losses)
187 self.series_mux.append("validation_error", this_validation_loss) 183
184 self.series["validation_error"].\
185 append((epoch, minibatch_index), this_validation_loss*100.)
186
188 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ 187 print('epoch %i, minibatch %i/%i, validation error %f %%' % \
189 (epoch, minibatch_index+1, self.n_train_batches, \ 188 (epoch, minibatch_index+1, self.mb_per_epoch, \
190 this_validation_loss*100.)) 189 this_validation_loss*100.))
191 190
192 191
193 # if we got the best validation score until now 192 # if we got the best validation score until now
194 if this_validation_loss < best_validation_loss: 193 if this_validation_loss < best_validation_loss:
195 194
196 #improve patience if loss improvement is good enough 195 #improve patience if loss improvement is good enough
197 if this_validation_loss < best_validation_loss * \ 196 if this_validation_loss < best_validation_loss * \
198 improvement_threshold : 197 improvement_threshold :
199 patience = max(patience, iter * patience_increase) 198 patience = max(patience, total_mb_index * patience_increase)
200 199
201 # save best validation score and iteration number 200 # save best validation score and iteration number
202 best_validation_loss = this_validation_loss 201 best_validation_loss = this_validation_loss
203 best_iter = iter 202 best_iter = total_mb_index
204 203
205 # test it on the test set 204 # test it on the test set
206 test_losses = [test_model(i) for i in xrange(self.n_test_batches)] 205 iter = dataset.test(minibatch_size)
206 if self.max_minibatches:
207 iter = itermax(iter, self.max_minibatches)
208 test_losses = [test_model(x,y) for x,y in iter]
207 test_score = numpy.mean(test_losses) 209 test_score = numpy.mean(test_losses)
208 self.series_mux.append("test_error", test_score) 210
211 self.series["test_error"].\
212 append((epoch, minibatch_index), test_score*100.)
213
209 print((' epoch %i, minibatch %i/%i, test error of best ' 214 print((' epoch %i, minibatch %i/%i, test error of best '
210 'model %f %%') % 215 'model %f %%') %
211 (epoch, minibatch_index+1, self.n_train_batches, 216 (epoch, minibatch_index+1, self.mb_per_epoch,
212 test_score*100.)) 217 test_score*100.))
213 218
214 sys.stdout.flush() 219 sys.stdout.flush()
215 220
216 self.series_mux.append("params", self.classifier.all_params) 221 # useful when doing tests
217 222 if self.max_minibatches and minibatch_index >= self.max_minibatches:
218 if patience <= iter : 223 break
224
225 self.series['params'].append((epoch,), self.classifier.all_params)
226
227 if patience <= total_mb_index:
219 done_looping = True 228 done_looping = True
220 break 229 break
221 230
222 end_time = time.clock() 231 end_time = time.clock()
223 self.hp.update({'finetuning_time':end_time-start_time,\ 232 self.hp.update({'finetuning_time':end_time-start_time,\