# HG changeset patch # User SylvainPL # Date 1268593637 14400 # Node ID 02ed13244133ebf3fa41898613984d16eaabe88a # Parent 4ce1fc11f4b24117facae1a2a2d6c9a3895dc737 version pour utilisation du module dataset diff -r 4ce1fc11f4b2 -r 02ed13244133 deep/stacked_dae/v_sylvain/sgd_optimization.py --- a/deep/stacked_dae/v_sylvain/sgd_optimization.py Sat Mar 13 15:45:43 2010 -0500 +++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py Sun Mar 14 15:07:17 2010 -0400 @@ -105,10 +105,10 @@ sys.stdout.flush() def train(self): - self.pretrain() - self.finetune() + self.pretrain(self.dataset) + self.finetune(self.dataset) - def pretrain(self): + def pretrain(self,dataset): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() @@ -118,8 +118,8 @@ # go through pretraining epochs for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set - for batch_index in xrange(self.n_train_batches): - c = self.classifier.pretrain_functions[i](batch_index) + for x,y in dataset.train(self.hp.minibatch_size): + c = self.classifier.pretrain_functions[i](x) self.series["reconstruction_error"].append((epoch, batch_index), c) @@ -135,24 +135,28 @@ sys.stdout.flush() - def finetune(self): + def finetune(self,dataset): print "STARTING FINETUNING, time = ", datetime.datetime.now() - index = T.lscalar() # index to a [mini]batch + #index = T.lscalar() # index to a [mini]batch minibatch_size = self.hp.minibatch_size # create a function to compute the mistakes that are made by the model # on the validation set, or testing set shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) - test_model = theano.function([index], self.classifier.errors, + test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, givens = { - self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) + #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, + #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) + self.classifier.x: ensemble_x, + self.classifier.y: ensemble_y}) - validate_model = theano.function([index], self.classifier.errors, + validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, givens = { - self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) + #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, + #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) + self.classifier.x: ensemble_x, + self.classifier.y: ensemble_y}) # early-stopping parameters @@ -177,16 +181,18 @@ while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(self.n_train_batches): - - cost_ij = self.classifier.finetune(minibatch_index) + minibatch_index=int(-1) + for x,y in dataset.train(minibatch_size): + + minibatch_index+=1 + cost_ij = self.classifier.finetune(x,y) iter = epoch * self.n_train_batches + minibatch_index self.series["training_error"].append((epoch, minibatch_index), cost_ij) if (iter+1) % validation_frequency == 0: - validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] + validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].\ @@ -210,7 +216,7 @@ best_iter = iter # test it on the test set - test_losses = [test_model(i) for i in xrange(self.n_test_batches)] + test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] test_score = numpy.mean(test_losses) self.series["test_error"].\ diff -r 4ce1fc11f4b2 -r 02ed13244133 deep/stacked_dae/v_sylvain/stacked_dae.py --- a/deep/stacked_dae/v_sylvain/stacked_dae.py Sat Mar 13 15:45:43 2010 -0500 +++ b/deep/stacked_dae/v_sylvain/stacked_dae.py Sun Mar 14 15:07:17 2010 -0400 @@ -193,14 +193,14 @@ print "input_divider", input_divider print "----" - self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) + #self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) if len(hidden_layers_sizes) < 1 : raiseException (' You must have at least one hidden layer ') # allocate symbolic variables for the data - index = T.lscalar() # index to a [mini]batch + ##index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels @@ -247,10 +247,10 @@ updates[param] = param - gparam * pretrain_lr # create a function that trains the dA - update_fn = theano.function([index], dA_layer.cost, \ + update_fn = theano.function([ensemble], dA_layer.cost, \ updates = updates, givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) + self.x : ensemble}) # collect this function into a list self.pretrain_functions += [update_fn] @@ -273,11 +273,13 @@ for param,gparam in zip(self.params, gparams): updates[param] = param - gparam*finetune_lr - self.finetune = theano.function([index], cost, + self.finetune = theano.function([ensemble_x,ensemble_y], cost, updates = updates, givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, - self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) + #self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, + #self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) + self.x : ensemble_x, + self.y : ensemble_y} ) # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y