Mercurial > ift6266
changeset 358:31641a84e0ae
Initial commit for the experimental setup of the denoising convolutional network
author | humel |
---|---|
date | Thu, 22 Apr 2010 00:49:42 -0400 |
parents | 9a7b74927f7d |
children | 969ad25e78cc c05680f8c92f |
files | deep/convolutional_dae/salah_exp/nist_csda.py deep/convolutional_dae/salah_exp/sgd_optimization.py deep/convolutional_dae/salah_exp/stacked_convolutional_dae_uit.py deep/convolutional_dae/salah_exp/utils.py |
diffstat | 4 files changed, 948 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/convolutional_dae/salah_exp/nist_csda.py Thu Apr 22 00:49:42 2010 -0400 @@ -0,0 +1,266 @@ +#!/usr/bin/python +# coding: utf-8 + +import ift6266 +import pylearn + +import numpy +import theano +import time + +import pylearn.version +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +import copy +import sys +import os +import os.path + +from jobman import DD +import jobman, jobman.sql +from pylearn.io import filetensor + +from utils import produit_cartesien_jobs +from copy import copy + +from sgd_optimization_new import CSdASgdOptimizer + +#from ift6266.utils.scalar_series import * +from ift6266.utils.seriestables import * +import tables + +from ift6266 import datasets +from config import * + +''' +Function called by jobman upon launching each job +Its path is the one given when inserting jobs: see EXPERIMENT_PATH +''' +def jobman_entrypoint(state, channel): + # record mercurial versions of each package + pylearn.version.record_versions(state,[theano,ift6266,pylearn]) + # TODO: remove this, bad for number of simultaneous requests on DB + channel.save() + + # For test runs, we don't want to use the whole dataset so + # reduce it to fewer elements if asked to. + rtt = None + #REDUCE_TRAIN_TO = 40000 + if state.has_key('reduce_train_to'): + rtt = state['reduce_train_to'] + elif REDUCE_TRAIN_TO: + rtt = REDUCE_TRAIN_TO + + if state.has_key('decrease_lr'): + decrease_lr = state['decrease_lr'] + else : + decrease_lr = 0 + + n_ins = 32*32 + n_outs = 62 # 10 digits, 26*2 (lower, capitals) + + examples_per_epoch = 100000#NIST_ALL_TRAIN_SIZE + + #To be sure variables will not be only in the if statement + PATH = '' + nom_reptrain = '' + nom_serie = "" + if state['pretrain_choice'] == 0: + nom_serie="series_NIST.h5" + elif state['pretrain_choice'] == 1: + nom_serie="series_P07.h5" + + series = create_series(state.num_hidden_layers,nom_serie) + + + print "Creating optimizer with state, ", state + + optimizer = CSdASgdOptimizer(dataset=datasets.nist_all(), + hyperparameters=state, \ + n_ins=n_ins, n_outs=n_outs,\ + examples_per_epoch=examples_per_epoch, \ + series=series, + max_minibatches=rtt) + + parameters=[] + #Number of files of P07 used for pretraining + nb_file=0 + if state['pretrain_choice'] == 0: + print('\n\tpretraining with NIST\n') + optimizer.pretrain(datasets.nist_all()) + elif state['pretrain_choice'] == 1: + #To know how many file will be used during pretraining + nb_file = int(state['pretraining_epochs_per_layer']) + state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset + if nb_file >=100: + sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+ + "You have to correct the code (and be patient, P07 is huge !!)\n"+ + "or reduce the number of pretraining epoch to run the code (better idea).\n") + print('\n\tpretraining with P07') + optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) + channel.save() + + #Set some of the parameters used for the finetuning + if state.has_key('finetune_set'): + finetune_choice=state['finetune_set'] + else: + finetune_choice=FINETUNE_SET + + if state.has_key('max_finetuning_epochs'): + max_finetune_epoch_NIST=state['max_finetuning_epochs'] + else: + max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS + + if state.has_key('max_finetuning_epochs_P07'): + max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] + else: + max_finetune_epoch_P07=max_finetune_epoch_NIST + + #Decide how the finetune is done + + if finetune_choice == 0: + print('\n\n\tfinetune with NIST\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) + channel.save() + if finetune_choice == 1: + print('\n\n\tfinetune with P07\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) + channel.save() + if finetune_choice == 2: + print('\n\n\tfinetune with P07 followed by NIST\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr) + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) + channel.save() + if finetune_choice == 3: + print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\ + All hidden units output are input of the logistic regression\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) + + + if finetune_choice==-1: + print('\nSERIE OF 4 DIFFERENT FINETUNINGS') + print('\n\n\tfinetune with NIST\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with P07\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_finetune_P07.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\ + All hidden units output are input of the logistic regression\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) + channel.save() + + channel.save() + + return channel.COMPLETE + +# These Series objects are used to save various statistics +# during the training. +def create_series(num_hidden_layers, nom_serie): + + # Replace series we don't want to save with DummySeries, e.g. + # series['training_error'] = DummySeries() + + series = {} + + basedir = os.getcwd() + + h5f = tables.openFile(os.path.join(basedir, nom_serie), "w") + + #REDUCE_EVERY=10 + # reconstruction + reconstruction_base = \ + ErrorSeries(error_name="reconstruction_error", + table_name="reconstruction_error", + hdf5_file=h5f, + index_names=('epoch','minibatch'), + title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)") + series['reconstruction_error'] = \ + AccumulatorSeriesWrapper(base_series=reconstruction_base, + reduce_every=REDUCE_EVERY) + + # train + training_base = \ + ErrorSeries(error_name="training_error", + table_name="training_error", + hdf5_file=h5f, + index_names=('epoch','minibatch'), + title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)") + series['training_error'] = \ + AccumulatorSeriesWrapper(base_series=training_base, + reduce_every=REDUCE_EVERY) + + # valid and test are not accumulated/mean, saved directly + series['validation_error'] = \ + ErrorSeries(error_name="validation_error", + table_name="validation_error", + hdf5_file=h5f, + index_names=('epoch','minibatch')) + + series['test_error'] = \ + ErrorSeries(error_name="test_error", + table_name="test_error", + hdf5_file=h5f, + index_names=('epoch','minibatch')) + + param_names = [] + for i in range(num_hidden_layers): + param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i] + param_names += ['logreg_layer_W', 'logreg_layer_b'] + + # comment out series we don't want to save + series['params'] = SharedParamsStatisticsWrapper( + new_group_name="params", + base_group="/", + arrays_names=param_names, + hdf5_file=h5f, + index_names=('epoch',)) + + return series + +# Perform insertion into the Postgre DB based on combination +# of hyperparameter values above +# (see comment for produit_cartesien_jobs() to know how it works) +def jobman_insert_nist(): + jobs = produit_cartesien_jobs(JOB_VALS) + + db = jobman.sql.db(JOBDB) + for job in jobs: + job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) + jobman.sql.insert_dict(job, db) + + print "inserted" + +if __name__ == '__main__': + + args = sys.argv[1:] + + #if len(args) > 0 and args[0] == 'load_nist': + # test_load_nist() + + if len(args) > 0 and args[0] == 'jobman_insert': + jobman_insert_nist() + + elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': + chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) + jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock) + + else: + print "Bad arguments" +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/convolutional_dae/salah_exp/sgd_optimization.py Thu Apr 22 00:49:42 2010 -0400 @@ -0,0 +1,371 @@ +#!/usr/bin/python +# coding: utf-8 + +# Generic SdA optimization loop, adapted from the deeplearning.net tutorial + +import numpy +import theano +import time +import datetime +import theano.tensor as T +import sys +import pickle + +from jobman import DD +import jobman, jobman.sql +from copy import copy + +from stacked_dae import SdA + +from ift6266.utils.seriestables import * + +#For test purpose only +buffersize=1000 + +default_series = { \ + 'reconstruction_error' : DummySeries(), + 'training_error' : DummySeries(), + 'validation_error' : DummySeries(), + 'test_error' : DummySeries(), + 'params' : DummySeries() + } + +def itermax(iter, max): + for i,it in enumerate(iter): + if i >= max: + break + yield it + +class SdaSgdOptimizer: + def __init__(self, dataset, hyperparameters, n_ins, n_outs, + examples_per_epoch, series=default_series, max_minibatches=None): + self.dataset = dataset + self.hp = hyperparameters + self.n_ins = n_ins + self.n_outs = n_outs + self.parameters_pre=[] + + self.max_minibatches = max_minibatches + print "SdaSgdOptimizer, max_minibatches =", max_minibatches + + self.ex_per_epoch = examples_per_epoch + self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size + + self.series = series + + self.rng = numpy.random.RandomState(1234) + + self.init_classifier() + + sys.stdout.flush() + + def init_classifier(self): + print "Constructing classifier" + + # we don't want to save arrays in DD objects, so + # we recreate those arrays here + nhl = self.hp.num_hidden_layers + layers_sizes = [self.hp.hidden_layers_sizes] * nhl + corruption_levels = [self.hp.corruption_levels] * nhl + + # construct the stacked denoising autoencoder class + self.classifier = SdA( \ + batch_size = self.hp.minibatch_size, \ + n_ins= self.n_ins, \ + hidden_layers_sizes = layers_sizes, \ + n_outs = self.n_outs, \ + corruption_levels = corruption_levels,\ + rng = self.rng,\ + pretrain_lr = self.hp.pretraining_lr, \ + finetune_lr = self.hp.finetuning_lr) + + #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") + + sys.stdout.flush() + + def train(self): + self.pretrain(self.dataset) + self.finetune(self.dataset) + + def pretrain(self,dataset): + print "STARTING PRETRAINING, time = ", datetime.datetime.now() + sys.stdout.flush() + + un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 file + + start_time = time.clock() + ## Pre-train layer-wise + for i in xrange(self.classifier.n_layers): + # go through pretraining epochs + for epoch in xrange(self.hp.pretraining_epochs_per_layer): + # go through the training set + batch_index=0 + count=0 + num_files=0 + for x,y in dataset.train(self.hp.minibatch_size): + c = self.classifier.pretrain_functions[i](x) + count +=1 + + self.series["reconstruction_error"].append((epoch, batch_index), c) + batch_index+=1 + + #if batch_index % 100 == 0: + # print "100 batches" + + # useful when doing tests + if self.max_minibatches and batch_index >= self.max_minibatches: + break + + #When we pass through the data only once (the case with P07) + #There is approximately 800*1024=819200 examples per file (1k per example and files are 800M) + if self.hp.pretraining_epochs_per_layer == 1 and count%un_fichier == 0: + print 'Pre-training layer %i, epoch %d, cost '%(i,num_files),c + num_files+=1 + sys.stdout.flush() + self.series['params'].append((num_files,), self.classifier.all_params) + + #When NIST is used + if self.hp.pretraining_epochs_per_layer > 1: + print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c + sys.stdout.flush() + + self.series['params'].append((epoch,), self.classifier.all_params) + + end_time = time.clock() + + print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) + self.hp.update({'pretraining_time': end_time-start_time}) + + sys.stdout.flush() + + #To be able to load them later for tests on finetune + self.parameters_pre=[copy(x.value) for x in self.classifier.params] + f = open('params_pretrain.txt', 'w') + pickle.dump(self.parameters_pre,f) + f.close() + + + def finetune(self,dataset,dataset_test,num_finetune,ind_test,special=0,decrease=0): + + if special != 0 and special != 1: + sys.exit('Bad value for variable special. Must be in {0,1}') + print "STARTING FINETUNING, time = ", datetime.datetime.now() + + minibatch_size = self.hp.minibatch_size + if ind_test == 0 or ind_test == 20: + nom_test = "NIST" + nom_train="P07" + else: + nom_test = "P07" + nom_train = "NIST" + + + # create a function to compute the mistakes that are made by the model + # on the validation set, or testing set + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: ensemble_x, + # self.classifier.y: ensemble_y]}) + + validate_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: , + # self.classifier.y: ]}) + + + # early-stopping parameters + patience = 10000 # look as this many examples regardless + patience_increase = 2. # wait this much longer when a new best is + # found + improvement_threshold = 0.995 # a relative improvement of this much is + # considered significant + validation_frequency = min(self.mb_per_epoch, patience/2) + # go through this many + # minibatche before checking the network + # on the validation set; in this case we + # check every epoch + if self.max_minibatches and validation_frequency > self.max_minibatches: + validation_frequency = self.max_minibatches / 2 + + best_params = None + best_validation_loss = float('inf') + test_score = 0. + start_time = time.clock() + + done_looping = False + epoch = 0 + + total_mb_index = 0 + minibatch_index = 0 + parameters_finetune=[] + + if ind_test == 21: + learning_rate = self.hp.finetuning_lr / 10.0 + else: + learning_rate = self.hp.finetuning_lr #The initial finetune lr + + + while (epoch < num_finetune) and (not done_looping): + epoch = epoch + 1 + + for x,y in dataset.train(minibatch_size,bufsize=buffersize): + minibatch_index += 1 + + + if special == 0: + cost_ij = self.classifier.finetune(x,y,learning_rate) + elif special == 1: + cost_ij = self.classifier.finetune2(x,y) + total_mb_index += 1 + + self.series["training_error"].append((epoch, minibatch_index), cost_ij) + + if (total_mb_index+1) % validation_frequency == 0: + #minibatch_index += 1 + #The validation set is always NIST (we want the model to be good on NIST) + if ind_test == 0 | ind_test == 20: + iter=dataset_test.valid(minibatch_size,bufsize=buffersize) + else: + iter = dataset.valid(minibatch_size,bufsize=buffersize) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + validation_losses = [validate_model(x,y) for x,y in iter] + this_validation_loss = numpy.mean(validation_losses) + + self.series["validation_error"].\ + append((epoch, minibatch_index), this_validation_loss*100.) + + print('epoch %i, minibatch %i, validation error on NIST : %f %%' % \ + (epoch, minibatch_index+1, \ + this_validation_loss*100.)) + + + # if we got the best validation score until now + if this_validation_loss < best_validation_loss: + + #improve patience if loss improvement is good enough + if this_validation_loss < best_validation_loss * \ + improvement_threshold : + patience = max(patience, total_mb_index * patience_increase) + + # save best validation score, iteration number and parameters + best_validation_loss = this_validation_loss + best_iter = total_mb_index + parameters_finetune=[copy(x.value) for x in self.classifier.params] + + # test it on the test set + iter = dataset.test(minibatch_size,bufsize=buffersize) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + test_losses = [test_model(x,y) for x,y in iter] + test_score = numpy.mean(test_losses) + + #test it on the second test set + iter2 = dataset_test.test(minibatch_size,bufsize=buffersize) + if self.max_minibatches: + iter2 = itermax(iter2, self.max_minibatches) + test_losses2 = [test_model(x,y) for x,y in iter2] + test_score2 = numpy.mean(test_losses2) + + self.series["test_error"].\ + append((epoch, minibatch_index), test_score*100.) + + print((' epoch %i, minibatch %i, test error on dataset %s (train data) of best ' + 'model %f %%') % + (epoch, minibatch_index+1,nom_train, + test_score*100.)) + + print((' epoch %i, minibatch %i, test error on dataset %s of best ' + 'model %f %%') % + (epoch, minibatch_index+1,nom_test, + test_score2*100.)) + + if patience <= total_mb_index: + done_looping = True + break #to exit the FOR loop + + sys.stdout.flush() + + # useful when doing tests + if self.max_minibatches and minibatch_index >= self.max_minibatches: + break + + if decrease == 1: + learning_rate /= 2 #divide the learning rate by 2 for each new epoch + + self.series['params'].append((epoch,), self.classifier.all_params) + + if done_looping == True: #To exit completly the fine-tuning + break #to exit the WHILE loop + + end_time = time.clock() + self.hp.update({'finetuning_time':end_time-start_time,\ + 'best_validation_error':best_validation_loss,\ + 'test_score':test_score, + 'num_finetuning_epochs':epoch}) + + print(('\nOptimization complete with best validation score of %f %%,' + 'with test performance %f %% on dataset %s ') % + (best_validation_loss * 100., test_score*100.,nom_train)) + print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.)) + + print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) + + sys.stdout.flush() + + #Save a copy of the parameters in a file to be able to get them in the future + + if special == 1: #To keep a track of the value of the parameters + f = open('params_finetune_stanford.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + elif ind_test == 0 | ind_test == 20: #To keep a track of the value of the parameters + f = open('params_finetune_P07.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + + elif ind_test== 1: #For the run with 2 finetunes. It will be faster. + f = open('params_finetune_NIST.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + elif ind_test== 21: #To keep a track of the value of the parameters + f = open('params_finetune_P07_then_NIST.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + + #Set parameters like they where right after pre-train or finetune + def reload_parameters(self,which): + + #self.parameters_pre=pickle.load('params_pretrain.txt') + f = open(which) + self.parameters_pre=pickle.load(f) + f.close() + for idx,x in enumerate(self.parameters_pre): + if x.dtype=='float64': + self.classifier.params[idx].value=theano._asarray(copy(x),dtype=theano.config.floatX) + else: + self.classifier.params[idx].value=copy(x) + + def training_error(self,dataset): + # create a function to compute the mistakes that are made by the model + # on the validation set, or testing set + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + + iter2 = dataset.train(self.hp.minibatch_size,bufsize=buffersize) + train_losses2 = [test_model(x,y) for x,y in iter2] + train_score2 = numpy.mean(train_losses2) + print "Training error is: " + str(train_score2) + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/convolutional_dae/salah_exp/stacked_convolutional_dae_uit.py Thu Apr 22 00:49:42 2010 -0400 @@ -0,0 +1,242 @@ +import numpy +import theano +import time +import sys +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams +import theano.sandbox.softsign +import copy +from theano.tensor.signal import downsample +from theano.tensor.nnet import conv + + +import ift6266.datasets +from ift6266.baseline.log_reg.log_reg import LogisticRegression + +from theano.tensor.xlogx import xlogx, xlogy0 +# it's target*log(output) +def binary_cross_entropy(target, output, sum_axis=1): + XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output)) + return -T.sum(XE, axis=sum_axis) + + + +class SigmoidalLayer(object): + def __init__(self, rng, input, n_in, n_out): + + self.input = input + + W_values = numpy.asarray( rng.uniform( \ + low = -numpy.sqrt(6./(n_in+n_out)), \ + high = numpy.sqrt(6./(n_in+n_out)), \ + size = (n_in, n_out)), dtype = theano.config.floatX) + self.W = theano.shared(value = W_values) + + b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) + self.b = theano.shared(value= b_values) + + self.output = T.tanh(T.dot(input, self.W) + self.b) + self.params = [self.W, self.b] + +class dA_conv(object): + + def __init__(self, input, filter_shape, corruption_level = 0.1, + shared_W = None, shared_b = None, image_shape = None, num = 0,batch_size=20): + + theano_rng = RandomStreams() + + fan_in = numpy.prod(filter_shape[1:]) + fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) + + center = theano.shared(value = 1, name="center") + scale = theano.shared(value = 2, name="scale") + + if shared_W != None and shared_b != None : + self.W = shared_W + self.b = shared_b + else: + initial_W = numpy.asarray( numpy.random.uniform( + low = -numpy.sqrt(6./(fan_in+fan_out)), + high = numpy.sqrt(6./(fan_in+fan_out)), + size = filter_shape), dtype = theano.config.floatX) + initial_b = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) + self.W = theano.shared(value = initial_W, name = "W") + self.b = theano.shared(value = initial_b, name = "b") + + + initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) + + self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") + + self.x = input + + self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x + + conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, + image_shape=image_shape, + unroll_kern=4,unroll_batch=4, + border_mode='valid') + + + self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) + + + da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ + filter_shape[3] ] + da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, + image_shape[3]-filter_shape[3]+1 ] + #import pdb; pdb.set_trace() + initial_W_prime = numpy.asarray( numpy.random.uniform( \ + low = -numpy.sqrt(6./(fan_in+fan_out)), \ + high = numpy.sqrt(6./(fan_in+fan_out)), \ + size = da_filter_shape), dtype = theano.config.floatX) + self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") + + conv2_out = conv.conv2d(self.y, self.W_prime, + filter_shape = da_filter_shape,\ + image_shape = da_image_shape, \ + unroll_kern=4,unroll_batch=4, \ + border_mode='full') + + self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale + + if num != 0 : + scaled_x = (self.x + center) / scale + else: + scaled_x = self.x + self.L = - T.sum( scaled_x*T.log(self.z) + (1-scaled_x)*T.log(1-self.z), axis=1 ) + + self.cost = T.mean(self.L) + + self.params = [ self.W, self.b, self.b_prime ] + +class LeNetConvPoolLayer(object): + + def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): + self.input = input + + W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX) + self.W = theano.shared(value=W_values) + + b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) + self.b = theano.shared(value=b_values) + + conv_out = conv.conv2d(input, self.W, + filter_shape=filter_shape, image_shape=image_shape, + unroll_kern=4,unroll_batch=4) + + + fan_in = numpy.prod(filter_shape[1:]) + fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) + + W_bound = numpy.sqrt(6./(fan_in + fan_out)) + self.W.value = numpy.asarray( + rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), + dtype = theano.config.floatX) + + + pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True) + + self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) + self.params = [self.W, self.b] + + +class CSdA(): + def __init__(self, n_ins_mlp,batch_size, conv_hidden_layers_sizes, + mlp_hidden_layers_sizes, corruption_levels, rng, n_out, + pretrain_lr, finetune_lr): + + # Just to make sure those are not modified somewhere else afterwards + hidden_layers_sizes = copy.deepcopy(mlp_hidden_layers_sizes) + corruption_levels = copy.deepcopy(corruption_levels) + + #update_locals(self, locals()) + + + + self.layers = [] + self.pretrain_functions = [] + self.params = [] + self.n_layers = len(conv_hidden_layers_sizes) + self.mlp_n_layers = len(mlp_hidden_layers_sizes) + + self.x = T.matrix('x') # the data is presented as rasterized images + self.y = T.ivector('y') # the labels are presented as 1D vector of + + for i in xrange( self.n_layers ): + filter_shape=conv_hidden_layers_sizes[i][0] + image_shape=conv_hidden_layers_sizes[i][1] + max_poolsize=conv_hidden_layers_sizes[i][2] + + if i == 0 : + layer_input=self.x.reshape((batch_size, 1, 32, 32)) + else: + layer_input=self.layers[-1].output + + layer = LeNetConvPoolLayer(rng, input=layer_input, + image_shape=image_shape, + filter_shape=filter_shape, + poolsize=max_poolsize) + print 'Convolutional layer', str(i+1), 'created' + + self.layers += [layer] + self.params += layer.params + + da_layer = dA_conv(corruption_level = corruption_levels[0], + input = layer_input, + shared_W = layer.W, shared_b = layer.b, + filter_shape=filter_shape, + image_shape = image_shape, num=i , batch_size=batch_size) + + gparams = T.grad(da_layer.cost, da_layer.params) + + updates = {} + for param, gparam in zip(da_layer.params, gparams): + updates[param] = param - gparam * pretrain_lr + + update_fn = theano.function([self.x], da_layer.cost, updates = updates) + + self.pretrain_functions += [update_fn] + + for i in xrange( self.mlp_n_layers ): + if i == 0 : + input_size = n_ins_mlp + else: + input_size = mlp_hidden_layers_sizes[i-1] + + if i == 0 : + if len( self.layers ) == 0 : + layer_input=self.x + else : + layer_input = self.layers[-1].output.flatten(2) + else: + layer_input = self.layers[-1].output + + layer = SigmoidalLayer(rng, layer_input, input_size, + mlp_hidden_layers_sizes[i] ) + + self.layers += [layer] + self.params += layer.params + + print 'MLP layer', str(i+1), 'created' + + self.logLayer = LogisticRegression(input=self.layers[-1].output, \ + n_in=mlp_hidden_layers_sizes[-1], n_out=n_out) + + + self.params += self.logLayer.params + self.all_params = self.params + cost = self.logLayer.negative_log_likelihood(self.y) + + gparams = T.grad(cost, self.params) + + updates = {} + for param,gparam in zip(self.params, gparams): + updates[param] = param - gparam*finetune_lr + + self.finetune = theano.function([self.x, self.y], cost, updates = updates) + + self.errors = self.logLayer.errors(self.y) + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/convolutional_dae/salah_exp/utils.py Thu Apr 22 00:49:42 2010 -0400 @@ -0,0 +1,69 @@ +#!/usr/bin/python +# coding: utf-8 + +from __future__ import with_statement + +from jobman import DD + +# from pylearn codebase +# useful in __init__(param1, param2, etc.) to save +# values in self.param1, self.param2... just call +# update_locals(self, locals()) +def update_locals(obj, dct): + if 'self' in dct: + del dct['self'] + obj.__dict__.update(dct) + +# from a dictionary of possible values for hyperparameters, e.g. +# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]} +# create a list of other dictionaries representing all the possible +# combinations, thus in this example creating: +# [{'learning_rate': 0.1, 'num_layers': 1}, ...] +# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2)) +def produit_cartesien_jobs(val_dict): + job_list = [DD()] + all_keys = val_dict.keys() + + for key in all_keys: + possible_values = val_dict[key] + new_job_list = [] + for val in possible_values: + for job in job_list: + to_insert = job.copy() + to_insert.update({key: val}) + new_job_list.append(to_insert) + job_list = new_job_list + + return job_list + +def test_produit_cartesien_jobs(): + vals = {'a': [1,2], 'b': [3,4,5]} + print produit_cartesien_jobs(vals) + + +# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python +"""Simple module for getting amount of memory used by a specified user's +processes on a UNIX system. +It uses UNIX ps utility to get the memory usage for a specified username and +pipe it to awk for summing up per application memory usage and return the total. +Python's Popen() from subprocess module is used for spawning ps and awk. + +""" + +import subprocess + +class MemoryMonitor(object): + + def __init__(self, username): + """Create new MemoryMonitor instance.""" + self.username = username + + def usage(self): + """Return int containing memory used by user's processes.""" + self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username, + shell=True, + stdout=subprocess.PIPE, + ) + self.stdout_list = self.process.communicate()[0].split('\n') + return int(self.stdout_list[0]) +