comparison scripts/stacked_dae/stacked_dae.py @ 139:7d8366fb90bf

Ajouté des __init__.py dans l'arborescence pour que les scripts puissent être utilisés avec des paths pour jobman, et fait pas mal de modifs dans stacked_dae pour pouvoir réutiliser le travail fait pour des tests où le pretraining est le même.
author fsavard
date Mon, 22 Feb 2010 13:38:25 -0500
parents 5c79a2557f2f
children
comparison
equal deleted inserted replaced
138:128507ac4edf 139:7d8366fb90bf
4 import numpy 4 import numpy
5 import theano 5 import theano
6 import time 6 import time
7 import theano.tensor as T 7 import theano.tensor as T
8 from theano.tensor.shared_randomstreams import RandomStreams 8 from theano.tensor.shared_randomstreams import RandomStreams
9 import copy
10
11 from utils import update_locals
9 12
10 class LogisticRegression(object): 13 class LogisticRegression(object):
11 def __init__(self, input, n_in, n_out): 14 def __init__(self, input, n_in, n_out):
12 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 15 # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
13 self.W = theano.shared( value=numpy.zeros((n_in,n_out), 16 self.W = theano.shared( value=numpy.zeros((n_in,n_out),
138 141
139 142
140 class SdA(object): 143 class SdA(object):
141 def __init__(self, train_set_x, train_set_y, batch_size, n_ins, 144 def __init__(self, train_set_x, train_set_y, batch_size, n_ins,
142 hidden_layers_sizes, n_outs, 145 hidden_layers_sizes, n_outs,
143 corruption_levels, rng, pretrain_lr, finetune_lr): 146 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
144 147 update_locals(self, locals())
148
145 self.layers = [] 149 self.layers = []
146 self.pretrain_functions = [] 150 self.pretrain_functions = []
147 self.params = [] 151 self.params = []
148 self.n_layers = len(hidden_layers_sizes) 152 self.n_layers = len(hidden_layers_sizes)
153
154 self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
149 155
150 if len(hidden_layers_sizes) < 1 : 156 if len(hidden_layers_sizes) < 1 :
151 raiseException (' You must have at least one hidden layer ') 157 raiseException (' You must have at least one hidden layer ')
152 158
153 159
198 204
199 # create a function that trains the dA 205 # create a function that trains the dA
200 update_fn = theano.function([index], dA_layer.cost, \ 206 update_fn = theano.function([index], dA_layer.cost, \
201 updates = updates, 207 updates = updates,
202 givens = { 208 givens = {
203 self.x : train_set_x[index*batch_size:(index+1)*batch_size]}) 209 self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider})
204 # collect this function into a list 210 # collect this function into a list
205 self.pretrain_functions += [update_fn] 211 self.pretrain_functions += [update_fn]
206 212
207 213
208 # We now need to add a logistic layer on top of the MLP 214 # We now need to add a logistic layer on top of the MLP
223 updates[param] = param - gparam*finetune_lr 229 updates[param] = param - gparam*finetune_lr
224 230
225 self.finetune = theano.function([index], cost, 231 self.finetune = theano.function([index], cost,
226 updates = updates, 232 updates = updates,
227 givens = { 233 givens = {
228 self.x : train_set_x[index*batch_size:(index+1)*batch_size], 234 self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider,
229 self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) 235 self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
230 236
231 # symbolic variable that points to the number of errors made on the 237 # symbolic variable that points to the number of errors made on the
232 # minibatch given by self.x and self.y 238 # minibatch given by self.x and self.y
233 239
234 self.errors = self.logLayer.errors(self.y) 240 self.errors = self.logLayer.errors(self.y)
241
242 @classmethod
243 def copy_reusing_lower_layers(cls, obj, num_hidden_layers, new_finetuning_lr=None):
244 assert(num_hidden_layers <= obj.n_layers)
245
246 if not new_finetuning_lr:
247 new_finetuning_lr = obj.finetune_lr
248
249 new_sda = cls(train_set_x= obj.train_set_x, \
250 train_set_y = obj.train_set_y,\
251 batch_size = obj.batch_size, \
252 n_ins= obj.n_ins, \
253 hidden_layers_sizes = obj.hidden_layers_sizes[:num_hidden_layers], \
254 n_outs = obj.n_outs, \
255 corruption_levels = obj.corruption_levels[:num_hidden_layers],\
256 rng = obj.rng,\
257 pretrain_lr = obj.pretrain_lr, \
258 finetune_lr = new_finetuning_lr, \
259 input_divider = obj.input_divider )
260
261 # new_sda.layers contains only the hidden layers actually
262 for i, layer in enumerate(new_sda.layers):
263 original_layer = obj.layers[i]
264 for p1,p2 in zip(layer.params, original_layer.params):
265 p1.value = p2.value.copy()
266
267 return new_sda
268
269 def get_params_copy(self):
270 return copy.deepcopy(self.params)
271
272 def set_params_from_copy(self, copy):
273 # We don't want to replace the var, as the functions have pointers in there
274 # We only want to replace values.
275 for i, p in enumerate(self.params):
276 p.value = copy[i].value
277
278 def get_params_means(self):
279 s = []
280 for p in self.params:
281 s.append(numpy.mean(p.value))
282 return s
235 283
236 if __name__ == '__main__': 284 if __name__ == '__main__':
237 import sys 285 import sys
238 args = sys.argv[1:] 286 args = sys.argv[1:]
239 287
240 if len(args) < 1:
241 print "Options: mnist, jobman_add, load_nist"
242 sys.exit(0)
243
244 if args[0] == "jobman_add":
245 jobman_add()
246 elif args[0] == "mnist":
247 sgd_optimization_mnist(dataset=MNIST_LOCATION)
248 elif args[0] == "load_nist":
249 load_nist_test()
250 elif args[0] == "nist":
251 sgd_optimization_nist()
252 elif args[0] == "pc":
253 test_produit_croise_jobs()
254
255