# HG changeset patch # User fsavard # Date 1267638700 18000 # Node ID e656edaedb486c452835e3c1fbe10e38b578858a # Parent 3632e6258642499640868362bfe9bd294429b5c2 Commented a few things, renamed the produit_croise_jobs function, replaced the cost function (NOT TESTED YET). diff -r 3632e6258642 -r e656edaedb48 deep/stacked_dae/nist_sda.py --- a/deep/stacked_dae/nist_sda.py Tue Mar 02 14:47:18 2010 -0500 +++ b/deep/stacked_dae/nist_sda.py Wed Mar 03 12:51:40 2010 -0500 @@ -21,7 +21,7 @@ import jobman, jobman.sql from pylearn.io import filetensor -from utils import produit_croise_jobs +from utils import produit_cartesien_jobs from sgd_optimization import SdaSgdOptimizer @@ -31,7 +31,7 @@ NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' -JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/fsavard_sda2' +JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4' REDUCE_TRAIN_TO = None MAX_FINETUNING_EPOCHS = 1000 @@ -43,6 +43,10 @@ EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" +# Possible values the hyperparameters can take. These are then +# combined with produit_cartesien_jobs so we get a list of all +# possible combinations, each one resulting in a job inserted +# in the jobman DB. JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], 'pretraining_epochs_per_layer': [10,20], 'hidden_layers_sizes': [300,800], @@ -63,7 +67,11 @@ #'reduce_train_to':300, 'num_hidden_layers':2}) +# Function called by jobman upon launching each job +# Its path is the one given when inserting jobs: +# ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint def jobman_entrypoint(state, channel): + # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) channel.save() @@ -71,10 +79,12 @@ print "Will load NIST" - nist = NIST(20) + nist = NIST(minibatch_size=20) print "NIST loaded" + # For test runs, we don't want to use the whole dataset so + # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] @@ -82,7 +92,7 @@ rtt = REDUCE_TRAIN_TO if rtt: - print "Reducing training set to "+str( rtt)+ " examples" + print "Reducing training set to "+str(rtt)+ " examples" nist.reduce_train_set(rtt) train,valid,test = nist.get_tvt() @@ -91,14 +101,9 @@ n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) - hls = state.hidden_layers_sizes - cl = state.corruption_levels - nhl = state.num_hidden_layers - state.hidden_layers_sizes = [hls] * nhl - state.corruption_levels = [cl] * nhl - - # b,b',W for each hidden layer + b,W of last layer (logreg) - numparams = nhl * 3 + 2 + # b,b',W for each hidden layer + # + b,W of last layer (logreg) + numparams = state.num_hidden_layers * 3 + 2 series_mux = None series_mux = create_series(workingdir, numparams) @@ -114,11 +119,10 @@ optimizer.finetune() channel.save() - pylearn.version.record_versions(state,[theano,ift6266,pylearn]) - channel.save() - return channel.COMPLETE +# These Series objects are used to save various statistics +# during the training. def create_series(basedir, numparams): mux = SeriesMultiplexer() @@ -140,8 +144,11 @@ return mux +# Perform insertion into the Postgre DB based on combination +# of hyperparameter values above +# (see comment for produit_cartesien_jobs() to know how it works) def jobman_insert_nist(): - jobs = produit_croise_jobs(JOB_VALS) + jobs = produit_cartesien_jobs(JOB_VALS) db = jobman.sql.db(JOBDB) for job in jobs: @@ -227,35 +234,6 @@ raw_input("Press any key") -# hp for hyperparameters -def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'): - global DEFAULT_HP_NIST - hp = hp and hp or DEFAULT_HP_NIST - - print "Will load NIST" - - import time - t1 = time.time() - nist = NIST(20, reduce_train_to=100) - t2 = time.time() - - print "NIST loaded. time delta = ", t2-t1 - - train,valid,test = nist.get_tvt() - dataset = (train,valid,test) - - print train[0][15] - print type(train[0][1]) - - - print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) - - n_ins = 32*32 - n_outs = 62 # 10 digits, 26*2 (lower, capitals) - - optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0) - optimizer.train() - if __name__ == '__main__': import sys @@ -277,5 +255,4 @@ elif len(args) > 0 and args[0] == 'estimate': estimate_total_time() else: - sgd_optimization_nist() - + print "Bad arguments" diff -r 3632e6258642 -r e656edaedb48 deep/stacked_dae/sgd_optimization.py --- a/deep/stacked_dae/sgd_optimization.py Tue Mar 02 14:47:18 2010 -0500 +++ b/deep/stacked_dae/sgd_optimization.py Wed Mar 03 12:51:40 2010 -0500 @@ -60,20 +60,27 @@ # compute number of minibatches for training, validation and testing self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size - self.n_test_batches = self.test_set_x.value.shape[0] / self.hp.minibatch_size + # remove last batch in case it's incomplete + self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 def init_classifier(self): print "Constructing classifier" + # we don't want to save arrays in DD objects, so + # we recreate those arrays here + nhl = self.hp.num_hidden_layers + layers_sizes = [self.hp.hidden_layers_sizes] * nhl + corruption_levels = [self.hp.corruption_levels] * nhl + # construct the stacked denoising autoencoder class self.classifier = SdA( \ train_set_x= self.train_set_x, \ train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ - hidden_layers_sizes = self.hp.hidden_layers_sizes, \ + hidden_layers_sizes = layers_sizes, \ n_outs = self.n_outs, \ - corruption_levels = self.hp.corruption_levels,\ + corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ finetune_lr = self.hp.finetuning_lr,\ diff -r 3632e6258642 -r e656edaedb48 deep/stacked_dae/stacked_dae.py --- a/deep/stacked_dae/stacked_dae.py Tue Mar 02 14:47:18 2010 -0500 +++ b/deep/stacked_dae/stacked_dae.py Wed Mar 03 12:51:40 2010 -0500 @@ -10,6 +10,15 @@ from utils import update_locals +# taken from LeDeepNet/daa.py +# has a special case when taking log(0) (defined =0) +# modified to not take the mean anymore +from theano.tensor.xlogx import xlogx, xlogy0 +# it's target*log(output) +def binary_cross_entropy(target, output, sum_axis=1): + XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output)) + return -T.sum(XE, axis=sum_axis) + class LogisticRegression(object): def __init__(self, input, n_in, n_out): # initialize with 0 the weights W as a matrix of shape (n_in, n_out) @@ -128,7 +137,8 @@ # Equation (4) # note : we sum over the size of a datapoint; if we are using minibatches, # L will be a vector, with one entry per example in minibatch - self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) + #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) + self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) # note : L is now a vector, where each element is the cross-entropy cost # of the reconstruction of the corresponding example of the # minibatch. We need to compute the average of all these to get @@ -156,6 +166,17 @@ self.all_params = [] self.n_layers = len(hidden_layers_sizes) + print "Creating SdA with params:" + print "batch_size", batch_size + print "hidden_layers_sizes", hidden_layers_sizes + print "corruption_levels", corruption_levels + print "n_ins", n_ins + print "n_outs", n_outs + print "pretrain_lr", pretrain_lr + print "finetune_lr", finetune_lr + print "input_divider", input_divider + print "----" + self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) if len(hidden_layers_sizes) < 1 : diff -r 3632e6258642 -r e656edaedb48 deep/stacked_dae/utils.py --- a/deep/stacked_dae/utils.py Tue Mar 02 14:47:18 2010 -0500 +++ b/deep/stacked_dae/utils.py Wed Mar 03 12:51:40 2010 -0500 @@ -6,12 +6,21 @@ from jobman import DD # from pylearn codebase +# useful in __init__(param1, param2, etc.) to save +# values in self.param1, self.param2... just call +# update_locals(self, locals()) def update_locals(obj, dct): if 'self' in dct: del dct['self'] obj.__dict__.update(dct) -def produit_croise_jobs(val_dict): +# from a dictionary of possible values for hyperparameters, e.g. +# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]} +# create a list of other dictionaries representing all the possible +# combinations, thus in this example creating: +# [{'learning_rate': 0.1, 'num_layers': 1}, ...] +# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2)) +def produit_cartesien_jobs(val_dict): job_list = [DD()] all_keys = val_dict.keys() @@ -27,9 +36,9 @@ return job_list -def test_produit_croise_jobs(): +def test_produit_cartesien_jobs(): vals = {'a': [1,2], 'b': [3,4,5]} - print produit_croise_jobs(vals) + print produit_cartesien_jobs(vals) # taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python