# HG changeset patch # User youssouf # Date 1272213082 14400 # Node ID 8cf52a1c8055e9e7ec290660bd3ff448339f2200 # Parent a79db7cee0355e25f370b08836e5d0512056dd50 initial commit of sda with 36 classes diff -r a79db7cee035 -r 8cf52a1c8055 deep/stacked_dae/v_youssouf/__init__.py diff -r a79db7cee035 -r 8cf52a1c8055 deep/stacked_dae/v_youssouf/config.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/stacked_dae/v_youssouf/config.py Sun Apr 25 12:31:22 2010 -0400 @@ -0,0 +1,98 @@ +''' +These are parameters used by nist_sda_retrieve.py. They'll end up as globals in there. + +Rename this file to config.py and configure as needed. +DON'T add the renamed file to the repository, as others might use it +without realizing it, with dire consequences. +''' + +# Set this to True when you want to run cluster tests, ie. you want +# to run on the cluster, many jobs, but want to reduce the training +# set size and the number of epochs, so you know everything runs +# fine on the cluster. +# Set this PRIOR to inserting your test jobs in the DB. +TEST_CONFIG = False + +NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' +NIST_ALL_TRAIN_SIZE = 649081 +# valid et test =82587 82587 + +#Path of two pre-train done earlier +PATH_NIST = '/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/NIST_big' +PATH_P07 = '/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/P07_big/' + +''' +# change "sandbox" when you're ready +JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_SDA_retrieve' +EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v_sylvain.nist_sda_retrieve.jobman_entrypoint" +''' + +##Pour lancer des travaux sur le cluster: (il faut etre ou se trouve les fichiers) +##python nist_sda_retrieve.py jobman_insert +##dbidispatch --condor --repeat_jobs=2 jobman sql 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_finetuningSDA0' . #C'est le path dans config.py + +##Pour lancer sur GPU sur boltzmann (changer device=gpuX pour X le bon assigne) +##THEANO_FLAGS=floatX=float32,device=gpu2 python nist_sda_retrieve.py test_jobman_entrypoint + + +# reduce training set to that many examples +REDUCE_TRAIN_TO = None +# that's a max, it usually doesn't get to that point +MAX_FINETUNING_EPOCHS = 1000 +# number of minibatches before taking means for valid error etc. +REDUCE_EVERY = 100 +#Set the finetune dataset +FINETUNE_SET=0 +#Set the pretrain dataset used. 0: NIST, 1:P07 +PRETRAIN_CHOICE=0 + + +if TEST_CONFIG: + REDUCE_TRAIN_TO = 1000 + MAX_FINETUNING_EPOCHS = 2 + REDUCE_EVERY = 10 + +# select detection or classification +DETECTION_MODE = 0 +# consider maj and minuscule as the same +REDUCE_LABEL = 1 + + +# This is to configure insertion of jobs on the cluster. +# Possible values the hyperparameters can take. These are then +# combined with produit_cartesien_jobs so we get a list of all +# possible combinations, each one resulting in a job inserted +# in the jobman DB. +JOB_VALS = {'pretraining_lr': [0.1],#, 0.001],#, 0.0001], + 'pretraining_epochs_per_layer': [10], + 'hidden_layers_sizes': [800], + 'corruption_levels': [0.2], + 'minibatch_size': [100], + 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], + 'max_finetuning_epochs_P07':[1], + 'finetuning_lr':[0.01], #0.001 was very bad, so we leave it out + 'num_hidden_layers':[4], + 'finetune_set':[-1], + 'pretrain_choice':[0,1] + } + +# Just useful for tests... minimal number of epochs +# (This is used when running a single job, locally, when +# calling ./nist_sda.py test_jobman_entrypoint +DEFAULT_HP_NIST = {'finetuning_lr':0.05, + 'pretraining_lr':0.01, + 'pretraining_epochs_per_layer':15, + 'max_finetuning_epochs':MAX_FINETUNING_EPOCHS, + #'max_finetuning_epochs':1, + 'max_finetuning_epochs_P07':7, + 'hidden_layers_sizes':1500, + 'corruption_levels':0.2, + 'minibatch_size':100, + #'reduce_train_to':2000, + 'decrease_lr':1, + 'num_hidden_layers':4, + 'finetune_set':2, + 'pretrain_choice':1, + 'detection_mode':DETECTION_MODE, + 'reduce_label':REDUCE_LABEL} + diff -r a79db7cee035 -r 8cf52a1c8055 deep/stacked_dae/v_youssouf/nist_sda.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/stacked_dae/v_youssouf/nist_sda.py Sun Apr 25 12:31:22 2010 -0400 @@ -0,0 +1,266 @@ +#!/usr/bin/python +# coding: utf-8 + +import ift6266 +import pylearn + +import numpy +import theano +import time + +import pylearn.version +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +import copy +import sys +import os +import os.path + +from jobman import DD +import jobman, jobman.sql +from pylearn.io import filetensor + +from utils import produit_cartesien_jobs +from copy import copy + +from sgd_optimization import SdaSgdOptimizer + +#from ift6266.utils.scalar_series import * +from ift6266.utils.seriestables import * +import tables + +from ift6266 import datasets +from config import * + + + +''' +Function called by jobman upon launching each job +Its path is the one given when inserting jobs: see EXPERIMENT_PATH +''' +def jobman_entrypoint(state, channel): + # record mercurial versions of each package + pylearn.version.record_versions(state,[theano,ift6266,pylearn]) + # TODO: remove this, bad for number of simultaneous requests on DB + channel.save() + + # For test runs, we don't want to use the whole dataset so + # reduce it to fewer elements if asked to. + rtt = None + if state.has_key('reduce_train_to'): + rtt = state['reduce_train_to'] + elif REDUCE_TRAIN_TO: + rtt = REDUCE_TRAIN_TO + + if state.has_key('decrease_lr'): + decrease_lr = state['decrease_lr'] + else : + decrease_lr = 0 + + n_ins = 32*32 + n_outs = 36 # 10 digits, 26 characters (merged lower and capitals) + + examples_per_epoch = NIST_ALL_TRAIN_SIZE + + #To be sure variables will not be only in the if statement + PATH = '' + nom_reptrain = '' + nom_serie = "" + if state['pretrain_choice'] == 0: + nom_serie="series_NIST.h5" + elif state['pretrain_choice'] == 1: + nom_serie="series_P07.h5" + + series = create_series(state.num_hidden_layers,nom_serie) + + + print "Creating optimizer with state, ", state + + optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), + hyperparameters=state, \ + n_ins=n_ins, n_outs=n_outs,\ + examples_per_epoch=examples_per_epoch, \ + series=series, + max_minibatches=rtt) + + parameters=[] + #Number of files of P07 used for pretraining + nb_file=0 + if state['pretrain_choice'] == 0: + print('\n\tpretraining with NIST\n') + optimizer.pretrain(datasets.nist_all()) + elif state['pretrain_choice'] == 1: + #To know how many file will be used during pretraining + nb_file = int(state['pretraining_epochs_per_layer']) + state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset + if nb_file >=100: + sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+ + "You have to correct the code (and be patient, P07 is huge !!)\n"+ + "or reduce the number of pretraining epoch to run the code (better idea).\n") + print('\n\tpretraining with P07') + optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) + channel.save() + + #Set some of the parameters used for the finetuning + if state.has_key('finetune_set'): + finetune_choice=state['finetune_set'] + else: + finetune_choice=FINETUNE_SET + + if state.has_key('max_finetuning_epochs'): + max_finetune_epoch_NIST=state['max_finetuning_epochs'] + else: + max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS + + if state.has_key('max_finetuning_epochs_P07'): + max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] + else: + max_finetune_epoch_P07=max_finetune_epoch_NIST + + #Decide how the finetune is done + + if finetune_choice == 0: + print('\n\n\tfinetune with NIST\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) + channel.save() + if finetune_choice == 1: + print('\n\n\tfinetune with P07\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) + channel.save() + if finetune_choice == 2: + print('\n\n\tfinetune with P07\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr) + #optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) + channel.save() + if finetune_choice == 3: + print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\ + All hidden units output are input of the logistic regression\n\n') + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) + + + if finetune_choice==-1: + print('\nSERIE OF 4 DIFFERENT FINETUNINGS') + print('\n\n\tfinetune with NIST\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with P07\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_finetune_P07.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\ + All hidden units output are input of the logistic regression\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) + channel.save() + + channel.save() + + return channel.COMPLETE + +# These Series objects are used to save various statistics +# during the training. +def create_series(num_hidden_layers, nom_serie): + + # Replace series we don't want to save with DummySeries, e.g. + # series['training_error'] = DummySeries() + + series = {} + + basedir = os.getcwd() + + h5f = tables.openFile(os.path.join(basedir, nom_serie), "w") + + # reconstruction + reconstruction_base = \ + ErrorSeries(error_name="reconstruction_error", + table_name="reconstruction_error", + hdf5_file=h5f, + index_names=('epoch','minibatch'), + title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)") + series['reconstruction_error'] = \ + AccumulatorSeriesWrapper(base_series=reconstruction_base, + reduce_every=REDUCE_EVERY) + + # train + training_base = \ + ErrorSeries(error_name="training_error", + table_name="training_error", + hdf5_file=h5f, + index_names=('epoch','minibatch'), + title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)") + series['training_error'] = \ + AccumulatorSeriesWrapper(base_series=training_base, + reduce_every=REDUCE_EVERY) + + # valid and test are not accumulated/mean, saved directly + series['validation_error'] = \ + ErrorSeries(error_name="validation_error", + table_name="validation_error", + hdf5_file=h5f, + index_names=('epoch','minibatch')) + + series['test_error'] = \ + ErrorSeries(error_name="test_error", + table_name="test_error", + hdf5_file=h5f, + index_names=('epoch','minibatch')) + + param_names = [] + for i in range(num_hidden_layers): + param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i] + param_names += ['logreg_layer_W', 'logreg_layer_b'] + + # comment out series we don't want to save + series['params'] = SharedParamsStatisticsWrapper( + new_group_name="params", + base_group="/", + arrays_names=param_names, + hdf5_file=h5f, + index_names=('epoch',)) + + return series + +# Perform insertion into the Postgre DB based on combination +# of hyperparameter values above +# (see comment for produit_cartesien_jobs() to know how it works) +def jobman_insert_nist(): + jobs = produit_cartesien_jobs(JOB_VALS) + + db = jobman.sql.db(JOBDB) + for job in jobs: + job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) + jobman.sql.insert_dict(job, db) + + print "inserted" + +if __name__ == '__main__': + + args = sys.argv[1:] + + #if len(args) > 0 and args[0] == 'load_nist': + # test_load_nist() + + if len(args) > 0 and args[0] == 'jobman_insert': + jobman_insert_nist() + + elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': + chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) + jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock) + + else: + print "Bad arguments" + diff -r a79db7cee035 -r 8cf52a1c8055 deep/stacked_dae/v_youssouf/nist_sda_retrieve.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/stacked_dae/v_youssouf/nist_sda_retrieve.py Sun Apr 25 12:31:22 2010 -0400 @@ -0,0 +1,272 @@ +#!/usr/bin/python +# coding: utf-8 + +import ift6266 +import pylearn + +import numpy +import theano +import time + +import pylearn.version +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +import copy +import sys +import os +import os.path + +from jobman import DD +import jobman, jobman.sql +from pylearn.io import filetensor + +from utils import produit_cartesien_jobs +from copy import copy + +from sgd_optimization import SdaSgdOptimizer + +#from ift6266.utils.scalar_series import * +from ift6266.utils.seriestables import * +import tables + +from ift6266 import datasets +from config2 import * + +''' +Function called by jobman upon launching each job +Its path is the one given when inserting jobs: see EXPERIMENT_PATH +''' +def jobman_entrypoint(state, channel): + # record mercurial versions of each package + pylearn.version.record_versions(state,[theano,ift6266,pylearn]) + # TODO: remove this, bad for number of simultaneous requests on DB + channel.save() + + # For test runs, we don't want to use the whole dataset so + # reduce it to fewer elements if asked to. + rtt = None + if state.has_key('reduce_train_to'): + rtt = state['reduce_train_to'] + elif REDUCE_TRAIN_TO: + rtt = REDUCE_TRAIN_TO + + if state.has_key('decrease_lr'): + decrease_lr = state['decrease_lr'] + else : + decrease_lr = 0 + + n_ins = 32*32 + n_outs = 62 # 10 digits, 26*2 (lower, capitals) + + examples_per_epoch = NIST_ALL_TRAIN_SIZE + #To be sure variables will not be only in the if statement + PATH = '' + nom_reptrain = '' + nom_serie = "" + if state['pretrain_choice'] == 0: + PATH=PATH_NIST + nom_pretrain='NIST' + nom_serie="series_NIST.h5" + elif state['pretrain_choice'] == 1: + PATH=PATH_P07 + nom_pretrain='P07' + nom_serie="series_P07.h5" + + series = create_series(state.num_hidden_layers,nom_serie) + + print "Creating optimizer with state, ", state + + optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), + hyperparameters=state, \ + n_ins=n_ins, n_outs=n_outs,\ + examples_per_epoch=examples_per_epoch, \ + series=series, + max_minibatches=rtt) + + parameters=[] + #Number of files of P07 used for pretraining + nb_file=0 +## if state['pretrain_choice'] == 0: +## print('\n\tpretraining with NIST\n') +## optimizer.pretrain(datasets.nist_all()) +## elif state['pretrain_choice'] == 1: +## #To know how many file will be used during pretraining +## nb_file = state['pretraining_epochs_per_layer'] +## state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset +## if nb_file >=100: +## sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+ +## "You have to correct the code (and be patient, P07 is huge !!)\n"+ +## "or reduce the number of pretraining epoch to run the code (better idea).\n") +## print('\n\tpretraining with P07') +## optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) + + print ('Retrieve pre-train done earlier ( '+nom_pretrain+' )') + + + + sys.stdout.flush() + channel.save() + + #Set some of the parameters used for the finetuning + if state.has_key('finetune_set'): + finetune_choice=state['finetune_set'] + else: + finetune_choice=FINETUNE_SET + + if state.has_key('max_finetuning_epochs'): + max_finetune_epoch_NIST=state['max_finetuning_epochs'] + else: + max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS + + if state.has_key('max_finetuning_epochs_P07'): + max_finetune_epoch_P07=state['max_finetuning_epochs_P07'] + else: + max_finetune_epoch_P07=max_finetune_epoch_NIST + + #Decide how the finetune is done + + if finetune_choice == 0: + print('\n\n\tfinetune with NIST\n\n') + optimizer.reload_parameters(PATH+'params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) + channel.save() + if finetune_choice == 1: + print('\n\n\tfinetune with P07\n\n') + optimizer.reload_parameters(PATH+'params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) + channel.save() + if finetune_choice == 2: + print('\n\n\tfinetune with P07 followed by NIST\n\n') + optimizer.reload_parameters(PATH+'params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr) + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) + channel.save() + if finetune_choice == 3: + print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\ + All hidden units output are input of the logistic regression\n\n') + optimizer.reload_parameters(PATH+'params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) + + + if finetune_choice==-1: + print('\nSERIE OF 4 DIFFERENT FINETUNINGS') + print('\n\n\tfinetune with NIST\n\n') + sys.stdout.flush() + optimizer.reload_parameters(PATH+'params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with P07\n\n') + sys.stdout.flush() + optimizer.reload_parameters(PATH+'params_pretrain.txt') + optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n') + sys.stdout.flush() + optimizer.reload_parameters('params_finetune_P07.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr) + channel.save() + print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\ + All hidden units output are input of the logistic regression\n\n') + sys.stdout.flush() + optimizer.reload_parameters(PATH+'params_pretrain.txt') + optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr) + channel.save() + + channel.save() + + return channel.COMPLETE + +# These Series objects are used to save various statistics +# during the training. +def create_series(num_hidden_layers, nom_serie): + + # Replace series we don't want to save with DummySeries, e.g. + # series['training_error'] = DummySeries() + + series = {} + + basedir = os.getcwd() + + h5f = tables.openFile(os.path.join(basedir, nom_serie), "w") + + # reconstruction + reconstruction_base = \ + ErrorSeries(error_name="reconstruction_error", + table_name="reconstruction_error", + hdf5_file=h5f, + index_names=('epoch','minibatch'), + title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)") + series['reconstruction_error'] = \ + AccumulatorSeriesWrapper(base_series=reconstruction_base, + reduce_every=REDUCE_EVERY) + + # train + training_base = \ + ErrorSeries(error_name="training_error", + table_name="training_error", + hdf5_file=h5f, + index_names=('epoch','minibatch'), + title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)") + series['training_error'] = \ + AccumulatorSeriesWrapper(base_series=training_base, + reduce_every=REDUCE_EVERY) + + # valid and test are not accumulated/mean, saved directly + series['validation_error'] = \ + ErrorSeries(error_name="validation_error", + table_name="validation_error", + hdf5_file=h5f, + index_names=('epoch','minibatch')) + + series['test_error'] = \ + ErrorSeries(error_name="test_error", + table_name="test_error", + hdf5_file=h5f, + index_names=('epoch','minibatch')) + + param_names = [] + for i in range(num_hidden_layers): + param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i] + param_names += ['logreg_layer_W', 'logreg_layer_b'] + + # comment out series we don't want to save + series['params'] = SharedParamsStatisticsWrapper( + new_group_name="params", + base_group="/", + arrays_names=param_names, + hdf5_file=h5f, + index_names=('epoch',)) + + return series + +# Perform insertion into the Postgre DB based on combination +# of hyperparameter values above +# (see comment for produit_cartesien_jobs() to know how it works) +def jobman_insert_nist(): + jobs = produit_cartesien_jobs(JOB_VALS) + + db = jobman.sql.db(JOBDB) + for job in jobs: + job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) + jobman.sql.insert_dict(job, db) + + print "inserted" + +if __name__ == '__main__': + + args = sys.argv[1:] + + #if len(args) > 0 and args[0] == 'load_nist': + # test_load_nist() + + if len(args) > 0 and args[0] == 'jobman_insert': + jobman_insert_nist() + + elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': + chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) + jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock) + + else: + print "Bad arguments" + diff -r a79db7cee035 -r 8cf52a1c8055 deep/stacked_dae/v_youssouf/sgd_optimization.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/stacked_dae/v_youssouf/sgd_optimization.py Sun Apr 25 12:31:22 2010 -0400 @@ -0,0 +1,376 @@ +#!/usr/bin/python +# coding: utf-8 + +# Generic SdA optimization loop, adapted from the deeplearning.net tutorial + +import numpy +import theano +import time +import datetime +import theano.tensor as T +import sys +import pickle + +from jobman import DD +import jobman, jobman.sql +from copy import copy + +from stacked_dae import SdA + +from ift6266.utils.seriestables import * + +#For test purpose only +buffersize=1000 + +default_series = { \ + 'reconstruction_error' : DummySeries(), + 'training_error' : DummySeries(), + 'validation_error' : DummySeries(), + 'test_error' : DummySeries(), + 'params' : DummySeries() + } + +def itermax(iter, max): + for i,it in enumerate(iter): + if i >= max: + break + yield it + +class SdaSgdOptimizer: + def __init__(self, dataset, hyperparameters, n_ins, n_outs, + examples_per_epoch, series=default_series, max_minibatches=None): + self.dataset = dataset + self.hp = hyperparameters + self.n_ins = n_ins + self.n_outs = n_outs + self.parameters_pre=[] + + self.max_minibatches = max_minibatches + print "SdaSgdOptimizer, max_minibatches =", max_minibatches + print "Reduce Label: ", self.hp.reduce_label + + self.ex_per_epoch = examples_per_epoch + self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size + + self.series = series + + self.rng = numpy.random.RandomState(1234) + + self.init_classifier() + + sys.stdout.flush() + + def init_classifier(self): + print "Constructing classifier" + + # we don't want to save arrays in DD objects, so + # we recreate those arrays here + nhl = self.hp.num_hidden_layers + layers_sizes = [self.hp.hidden_layers_sizes] * nhl + corruption_levels = [self.hp.corruption_levels] * nhl + + # construct the stacked denoising autoencoder class + self.classifier = SdA( \ + batch_size = self.hp.minibatch_size, \ + n_ins= self.n_ins, \ + hidden_layers_sizes = layers_sizes, \ + n_outs = self.n_outs, \ + corruption_levels = corruption_levels,\ + rng = self.rng,\ + pretrain_lr = self.hp.pretraining_lr, \ + finetune_lr = self.hp.finetuning_lr, \ + detection_mode = self.hp.detection_mode, \ + ) + + #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") + + sys.stdout.flush() + + def train(self): + self.pretrain(self.dataset) + self.finetune(self.dataset) + + def pretrain(self,dataset): + print "STARTING PRETRAINING, time = ", datetime.datetime.now() + sys.stdout.flush() + + un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 file + + start_time = time.clock() + ## Pre-train layer-wise + for i in xrange(self.classifier.n_layers): + # go through pretraining epochs + for epoch in xrange(self.hp.pretraining_epochs_per_layer): + # go through the training set + batch_index=0 + count=0 + num_files=0 + for x,y in dataset.train(self.hp.minibatch_size): + c = self.classifier.pretrain_functions[i](x) + count +=1 + + self.series["reconstruction_error"].append((epoch, batch_index), c) + batch_index+=1 + + #if batch_index % 100 == 0: + # print "100 batches" + + # useful when doing tests + if self.max_minibatches and batch_index >= self.max_minibatches: + break + + #When we pass through the data only once (the case with P07) + #There is approximately 800*1024=819200 examples per file (1k per example and files are 800M) + if self.hp.pretraining_epochs_per_layer == 1 and count%un_fichier == 0: + print 'Pre-training layer %i, epoch %d, cost '%(i,num_files),c + num_files+=1 + sys.stdout.flush() + self.series['params'].append((num_files,), self.classifier.all_params) + + #When NIST is used + if self.hp.pretraining_epochs_per_layer > 1: + print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c + sys.stdout.flush() + + self.series['params'].append((epoch,), self.classifier.all_params) + + end_time = time.clock() + + print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) + self.hp.update({'pretraining_time': end_time-start_time}) + + sys.stdout.flush() + + #To be able to load them later for tests on finetune + self.parameters_pre=[copy(x.value) for x in self.classifier.params] + f = open('params_pretrain.txt', 'w') + pickle.dump(self.parameters_pre,f) + f.close() + + + def finetune(self,dataset,dataset_test,num_finetune,ind_test,special=0,decrease=0): + + if special != 0 and special != 1: + sys.exit('Bad value for variable special. Must be in {0,1}') + print "STARTING FINETUNING, time = ", datetime.datetime.now() + + minibatch_size = self.hp.minibatch_size + if ind_test == 0 or ind_test == 20: + nom_test = "NIST" + nom_train="P07" + else: + nom_test = "P07" + nom_train = "NIST" + + + # create a function to compute the mistakes that are made by the model + # on the validation set, or testing set + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: ensemble_x, + # self.classifier.y: ensemble_y]}) + + validate_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: , + # self.classifier.y: ]}) + + + # early-stopping parameters + patience = 10000 # look as this many examples regardless + patience_increase = 2. # wait this much longer when a new best is + # found + improvement_threshold = 0.995 # a relative improvement of this much is + # considered significant + validation_frequency = min(self.mb_per_epoch, patience/2) + # go through this many + # minibatche before checking the network + # on the validation set; in this case we + # check every epoch + if self.max_minibatches and validation_frequency > self.max_minibatches: + validation_frequency = self.max_minibatches / 2 + + best_params = None + best_validation_loss = float('inf') + test_score = 0. + start_time = time.clock() + + done_looping = False + epoch = 0 + + total_mb_index = 0 + minibatch_index = 0 + parameters_finetune=[] + + if ind_test == 21: + learning_rate = self.hp.finetuning_lr / 10.0 + else: + learning_rate = self.hp.finetuning_lr #The initial finetune lr + + + while (epoch < num_finetune) and (not done_looping): + epoch = epoch + 1 + + for x,y in dataset.train(minibatch_size,bufsize=buffersize): + minibatch_index += 1 + + if self.hp.reduce_label: + y[y > 35] = y[y > 35]-26 + + if special == 0: + cost_ij = self.classifier.finetune(x,y,learning_rate) + elif special == 1: + cost_ij = self.classifier.finetune2(x,y) + total_mb_index += 1 + + self.series["training_error"].append((epoch, minibatch_index), cost_ij) + + if (total_mb_index+1) % validation_frequency == 0: + #minibatch_index += 1 + #The validation set is always NIST (we want the model to be good on NIST) + if ind_test == 0 | ind_test == 20: + iter=dataset_test.valid(minibatch_size,bufsize=buffersize) + else: + iter = dataset.valid(minibatch_size,bufsize=buffersize) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + validation_losses = [validate_model(x,y) for x,y in iter] + this_validation_loss = numpy.mean(validation_losses) + + self.series["validation_error"].\ + append((epoch, minibatch_index), this_validation_loss*100.) + + print('epoch %i, minibatch %i, validation error on NIST : %f %%' % \ + (epoch, minibatch_index+1, \ + this_validation_loss*100.)) + + + # if we got the best validation score until now + if this_validation_loss < best_validation_loss: + + #improve patience if loss improvement is good enough + if this_validation_loss < best_validation_loss * \ + improvement_threshold : + patience = max(patience, total_mb_index * patience_increase) + + # save best validation score, iteration number and parameters + best_validation_loss = this_validation_loss + best_iter = total_mb_index + parameters_finetune=[copy(x.value) for x in self.classifier.params] + + # test it on the test set + iter = dataset.test(minibatch_size,bufsize=buffersize) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + test_losses = [test_model(x,y) for x,y in iter] + test_score = numpy.mean(test_losses) + + #test it on the second test set + iter2 = dataset_test.test(minibatch_size,bufsize=buffersize) + if self.max_minibatches: + iter2 = itermax(iter2, self.max_minibatches) + test_losses2 = [test_model(x,y) for x,y in iter2] + test_score2 = numpy.mean(test_losses2) + + self.series["test_error"].\ + append((epoch, minibatch_index), test_score*100.) + + print((' epoch %i, minibatch %i, test error on dataset %s (train data) of best ' + 'model %f %%') % + (epoch, minibatch_index+1,nom_train, + test_score*100.)) + + print((' epoch %i, minibatch %i, test error on dataset %s of best ' + 'model %f %%') % + (epoch, minibatch_index+1,nom_test, + test_score2*100.)) + + if patience <= total_mb_index: + done_looping = True + break #to exit the FOR loop + + sys.stdout.flush() + + # useful when doing tests + if self.max_minibatches and minibatch_index >= self.max_minibatches: + break + + if decrease == 1: + learning_rate /= 2 #divide the learning rate by 2 for each new epoch + + self.series['params'].append((epoch,), self.classifier.all_params) + + if done_looping == True: #To exit completly the fine-tuning + break #to exit the WHILE loop + + end_time = time.clock() + self.hp.update({'finetuning_time':end_time-start_time,\ + 'best_validation_error':best_validation_loss,\ + 'test_score':test_score, + 'num_finetuning_epochs':epoch}) + + print(('\nOptimization complete with best validation score of %f %%,' + 'with test performance %f %% on dataset %s ') % + (best_validation_loss * 100., test_score*100.,nom_train)) + print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.)) + + print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) + + sys.stdout.flush() + + #Save a copy of the parameters in a file to be able to get them in the future + + if special == 1: #To keep a track of the value of the parameters + f = open('params_finetune_stanford.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + elif ind_test == 0 | ind_test == 20: #To keep a track of the value of the parameters + f = open('params_finetune_P07.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + + elif ind_test== 1: #For the run with 2 finetunes. It will be faster. + f = open('params_finetune_NIST.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + elif ind_test== 21: #To keep a track of the value of the parameters + f = open('params_finetune_P07_then_NIST.txt', 'w') + pickle.dump(parameters_finetune,f) + f.close() + + + #Set parameters like they where right after pre-train or finetune + def reload_parameters(self,which): + + #self.parameters_pre=pickle.load('params_pretrain.txt') + f = open(which) + self.parameters_pre=pickle.load(f) + f.close() + for idx,x in enumerate(self.parameters_pre): + if x.dtype=='float64': + self.classifier.params[idx].value=theano._asarray(copy(x),dtype=theano.config.floatX) + else: + self.classifier.params[idx].value=copy(x) + + def training_error(self,dataset): + # create a function to compute the mistakes that are made by the model + # on the validation set, or testing set + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + + iter2 = dataset.train(self.hp.minibatch_size,bufsize=buffersize) + train_losses2 = [test_model(x,y) for x,y in iter2] + train_score2 = numpy.mean(train_losses2) + print "Training error is: " + str(train_score2) + + + + diff -r a79db7cee035 -r 8cf52a1c8055 deep/stacked_dae/v_youssouf/stacked_dae.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/stacked_dae/v_youssouf/stacked_dae.py Sun Apr 25 12:31:22 2010 -0400 @@ -0,0 +1,353 @@ +#!/usr/bin/python +# coding: utf-8 + +import numpy +import theano +import time +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams +import copy + +from utils import update_locals + +# taken from LeDeepNet/daa.py +# has a special case when taking log(0) (defined =0) +# modified to not take the mean anymore +from theano.tensor.xlogx import xlogx, xlogy0 +# it's target*log(output) +def binary_cross_entropy(target, output, sum_axis=1): + XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output)) + return -T.sum(XE, axis=sum_axis) + +class LogisticRegression(object): + def __init__(self, input, n_in, n_out, detection_mode): + # initialize with 0 the weights W as a matrix of shape (n_in, n_out) + self.W = theano.shared( value=numpy.zeros((n_in,n_out), + dtype = theano.config.floatX) ) + # initialize the baises b as a vector of n_out 0s + self.b = theano.shared( value=numpy.zeros((n_out,), + dtype = theano.config.floatX) ) + # compute vector of class-membership probabilities in symbolic form + if detection_mode: + self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b) + else: + self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) + + # compute prediction as class whose probability is maximal in + # symbolic form + self.y_pred=T.argmax(self.p_y_given_x, axis=1) + + # list of parameters for this layer + self.params = [self.W, self.b] + + + def negative_log_likelihood(self, y): + return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) + + def cross_entropy(self, y): + return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]+T.sum(T.log(1-self.p_y_given_x), axis=1)-T.log(1-self.p_y_given_x)[T.arange(y.shape[0]),y]) + + def errors(self, y): + # check if y has same dimension of y_pred + if y.ndim != self.y_pred.ndim: + raise TypeError('y should have the same shape as self.y_pred', + ('y', target.type, 'y_pred', self.y_pred.type)) + + # check if y is of the correct datatype + if y.dtype.startswith('int'): + # the T.neq operator returns a vector of 0s and 1s, where 1 + # represents a mistake in prediction + return T.mean(T.neq(self.y_pred, y)) + else: + raise NotImplementedError() + + +class SigmoidalLayer(object): + def __init__(self, rng, input, n_in, n_out): + self.input = input + + W_values = numpy.asarray( rng.uniform( \ + low = -numpy.sqrt(6./(n_in+n_out)), \ + high = numpy.sqrt(6./(n_in+n_out)), \ + size = (n_in, n_out)), dtype = theano.config.floatX) + self.W = theano.shared(value = W_values) + + b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) + self.b = theano.shared(value= b_values) + + self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b) + self.params = [self.W, self.b] + + + +class dA(object): + def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\ + input = None, shared_W = None, shared_b = None): + self.n_visible = n_visible + self.n_hidden = n_hidden + + # create a Theano random generator that gives symbolic random values + theano_rng = RandomStreams() + + if shared_W != None and shared_b != None : + self.W = shared_W + self.b = shared_b + else: + # initial values for weights and biases + # note : W' was written as `W_prime` and b' as `b_prime` + + # W is initialized with `initial_W` which is uniformely sampled + # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible) + # the output of uniform if converted using asarray to dtype + # theano.config.floatX so that the code is runable on GPU + initial_W = numpy.asarray( numpy.random.uniform( \ + low = -numpy.sqrt(6./(n_hidden+n_visible)), \ + high = numpy.sqrt(6./(n_hidden+n_visible)), \ + size = (n_visible, n_hidden)), dtype = theano.config.floatX) + initial_b = numpy.zeros(n_hidden, dtype = theano.config.floatX) + + + # theano shared variables for weights and biases + self.W = theano.shared(value = initial_W, name = "W") + self.b = theano.shared(value = initial_b, name = "b") + + + initial_b_prime= numpy.zeros(n_visible) + # tied weights, therefore W_prime is W transpose + self.W_prime = self.W.T + self.b_prime = theano.shared(value = initial_b_prime, name = "b'") + + # if no input is given, generate a variable representing the input + if input == None : + # we use a matrix because we expect a minibatch of several examples, + # each example being a row + self.x = T.matrix(name = 'input') + else: + self.x = input + # Equation (1) + # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs + # note : first argument of theano.rng.binomial is the shape(size) of + # random numbers that it should produce + # second argument is the number of trials + # third argument is the probability of success of any trial + # + # this will produce an array of 0s and 1s where 1 has a + # probability of 1 - ``corruption_level`` and 0 with + # ``corruption_level`` + self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x + # Equation (2) + # note : y is stored as an attribute of the class so that it can be + # used later when stacking dAs. + self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) + # Equation (3) + #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) + # Equation (4) + # note : we sum over the size of a datapoint; if we are using minibatches, + # L will be a vector, with one entry per example in minibatch + #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) + #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) + + # bypassing z to avoid running to log(0) + z_a = T.dot(self.y, self.W_prime) + self.b_prime + log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a)) + # log(1-sigmoid(z_a)) + log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a)) + self.L = -T.sum( self.x * (log_sigmoid) \ + + (1.0-self.x) * (log_1_sigmoid), axis=1 ) + + # I added this epsilon to avoid getting log(0) and 1/0 in grad + # This means conceptually that there'd be no probability of 0, but that + # doesn't seem to me as important (maybe I'm wrong?). + #eps = 0.00000001 + #eps_1 = 1-eps + #self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ + # + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) + # note : L is now a vector, where each element is the cross-entropy cost + # of the reconstruction of the corresponding example of the + # minibatch. We need to compute the average of all these to get + # the cost of the minibatch + self.cost = T.mean(self.L) + + self.params = [ self.W, self.b, self.b_prime ] + + +class SdA(object): + def __init__(self, batch_size, n_ins, + hidden_layers_sizes, n_outs, + corruption_levels, rng, pretrain_lr, finetune_lr, detection_mode): + # Just to make sure those are not modified somewhere else afterwards + hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) + corruption_levels = copy.deepcopy(corruption_levels) + + update_locals(self, locals()) + + self.layers = [] + self.pretrain_functions = [] + self.params = [] + # MODIF: added this so we also get the b_primes + # (not used for finetuning... still using ".params") + self.all_params = [] + self.n_layers = len(hidden_layers_sizes) + self.logistic_params = [] + + print "Creating SdA with params:" + print "batch_size", batch_size + print "hidden_layers_sizes", hidden_layers_sizes + print "corruption_levels", corruption_levels + print "n_ins", n_ins + print "n_outs", n_outs + print "pretrain_lr", pretrain_lr + print "finetune_lr", finetune_lr + print "detection_mode", detection_mode + print "----" + + if len(hidden_layers_sizes) < 1 : + raiseException (' You must have at least one hidden layer ') + + + # allocate symbolic variables for the data + #index = T.lscalar() # index to a [mini]batch + self.x = T.matrix('x') # the data is presented as rasterized images + self.y = T.ivector('y') # the labels are presented as 1D vector of + # [int] labels + self.finetune_lr = T.fscalar('finetune_lr') #To get a dynamic finetune learning rate + + for i in xrange( self.n_layers ): + # construct the sigmoidal layer + + # the size of the input is either the number of hidden units of + # the layer below or the input size if we are on the first layer + if i == 0 : + input_size = n_ins + else: + input_size = hidden_layers_sizes[i-1] + + # the input to this layer is either the activation of the hidden + # layer below or the input of the SdA if you are on the first + # layer + if i == 0 : + layer_input = self.x + else: + layer_input = self.layers[-1].output + + layer = SigmoidalLayer(rng, layer_input, input_size, + hidden_layers_sizes[i] ) + # add the layer to the + self.layers += [layer] + self.params += layer.params + + # Construct a denoising autoencoder that shared weights with this + # layer + dA_layer = dA(input_size, hidden_layers_sizes[i], \ + corruption_level = corruption_levels[0],\ + input = layer_input, \ + shared_W = layer.W, shared_b = layer.b) + + self.all_params += dA_layer.params + + # Construct a function that trains this dA + # compute gradients of layer parameters + gparams = T.grad(dA_layer.cost, dA_layer.params) + # compute the list of updates + updates = {} + for param, gparam in zip(dA_layer.params, gparams): + updates[param] = param - gparam * pretrain_lr + + # create a function that trains the dA + update_fn = theano.function([self.x], dA_layer.cost, \ + updates = updates)#, + # givens = { + # self.x : ensemble}) + # collect this function into a list + #update_fn = theano.function([index], dA_layer.cost, \ + # updates = updates, + # givens = { + # self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) + # collect this function into a list + self.pretrain_functions += [update_fn] + + + # We now need to add a logistic layer on top of the SDA + self.logLayer = LogisticRegression(\ + input = self.layers[-1].output,\ + n_in = hidden_layers_sizes[-1], n_out = n_outs, detection_mode = detection_mode) + + self.params += self.logLayer.params + self.all_params += self.logLayer.params + # construct a function that implements one step of finetunining + + + if detection_mode: + # compute the cost, defined as the negative log likelihood + cost = self.logLayer.cross_entropy(self.y) + # compute the gradients with respect to the logistic regression parameters + gparams = T.grad(cost, self.logLayer.params) + # compute list of updates + updates = {} + for param,gparam in zip(self.logLayer.params, gparams): + updates[param] = param - gparam*finetune_lr + + else: + # compute the cost, defined as the negative log likelihood + cost = self.logLayer.negative_log_likelihood(self.y) + # compute the gradients with respect to the model parameters + gparams = T.grad(cost, self.params) + # compute list of updates + updates = {} + for param,gparam in zip(self.params, gparams): + updates[param] = param - gparam*self.finetune_lr + + self.finetune = theano.function([self.x,self.y,self.finetune_lr], cost, + updates = updates)#, + + # symbolic variable that points to the number of errors made on the + # minibatch given by self.x and self.y + + self.errors = self.logLayer.errors(self.y) + + + #STRUCTURE FOR THE FINETUNING OF THE LOGISTIC REGRESSION ON THE TOP WITH + #ALL HIDDEN LAYERS AS INPUT + ''' + + all_h=[] + for i in xrange(self.n_layers): + all_h.append(self.layers[i].output) + self.all_hidden=T.concatenate(all_h,axis=1) + + + self.logLayer2 = LogisticRegression(\ + input = self.all_hidden,\ + n_in = sum(hidden_layers_sizes), n_out = n_outs) + #n_in=hidden_layers_sizes[0],n_out=n_outs) + + #self.logistic_params+= self.logLayer2.params + # construct a function that implements one step of finetunining + + self.logistic_params+=self.logLayer2.params + # compute the cost, defined as the negative log likelihood + if DETECTION_MODE: + cost2 = self.logLayer2.cross_entropy(self.y) + else: + cost2 = self.logLayer2.negative_log_likelihood(self.y) + # compute the gradients with respect to the model parameters + gparams2 = T.grad(cost2, self.logistic_params) + + # compute list of updates + updates2 = {} + for param,gparam in zip(self.logistic_params, gparams2): + updates2[param] = param - gparam*finetune_lr + + self.finetune2 = theano.function([self.x,self.y], cost2, + updates = updates2) + + # symbolic variable that points to the number of errors made on the + # minibatch given by self.x and self.y + + self.errors2 = self.logLayer2.errors(self.y) + ''' + +if __name__ == '__main__': + import sys + args = sys.argv[1:] + diff -r a79db7cee035 -r 8cf52a1c8055 deep/stacked_dae/v_youssouf/train_error.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/stacked_dae/v_youssouf/train_error.py Sun Apr 25 12:31:22 2010 -0400 @@ -0,0 +1,118 @@ +#!/usr/bin/python +# coding: utf-8 + +import ift6266 +import pylearn + +import numpy +import theano +import time + +import pylearn.version +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +import copy +import sys +import os +import os.path + +from jobman import DD +import jobman, jobman.sql +from pylearn.io import filetensor + +from utils import produit_cartesien_jobs +from copy import copy + +from sgd_optimization import SdaSgdOptimizer + +#from ift6266.utils.scalar_series import * +from ift6266.utils.seriestables import * +import tables + +from ift6266 import datasets +from config import * + +''' +Function called by jobman upon launching each job +Its path is the one given when inserting jobs: see EXPERIMENT_PATH +''' +def jobman_entrypoint(state, channel): + # record mercurial versions of each package + pylearn.version.record_versions(state,[theano,ift6266,pylearn]) + # TODO: remove this, bad for number of simultaneous requests on DB + channel.save() + + # For test runs, we don't want to use the whole dataset so + # reduce it to fewer elements if asked to. + rtt = None + if state.has_key('reduce_train_to'): + rtt = state['reduce_train_to'] + elif REDUCE_TRAIN_TO: + rtt = REDUCE_TRAIN_TO + + n_ins = 32*32 + n_outs = 62 # 10 digits, 26*2 (lower, capitals) + + examples_per_epoch = NIST_ALL_TRAIN_SIZE + + PATH = '' + maximum_exemples=int(500000) #Maximum number of exemples seen + + + + print "Creating optimizer with state, ", state + + optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), + hyperparameters=state, \ + n_ins=n_ins, n_outs=n_outs,\ + examples_per_epoch=examples_per_epoch, \ + max_minibatches=rtt) + + + + + + if os.path.exists(PATH+'params_finetune_NIST.txt'): + print ('\n finetune = NIST ') + optimizer.reload_parameters(PATH+'params_finetune_NIST.txt') + print "For" + str(maximum_exemples) + "over the NIST training set: " + optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples)) + + + if os.path.exists(PATH+'params_finetune_P07.txt'): + print ('\n finetune = P07 ') + optimizer.reload_parameters(PATH+'params_finetune_P07.txt') + print "For" + str(maximum_exemples) + "over the P07 training set: " + optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples)) + + + if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'): + print ('\n finetune = NIST then P07') + optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt') + print "For" + str(maximum_exemples) + "over the NIST training set: " + optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples)) + print "For" + str(maximum_exemples) + "over the P07 training set: " + optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples)) + + if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'): + print ('\n finetune = P07 then NIST') + optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt') + print "For" + str(maximum_exemples) + "over the P07 training set: " + optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples)) + print "For" + str(maximum_exemples) + "over the NIST training set: " + optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples)) + + channel.save() + + return channel.COMPLETE + + + +if __name__ == '__main__': + + + chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) + jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock) + + diff -r a79db7cee035 -r 8cf52a1c8055 deep/stacked_dae/v_youssouf/utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/stacked_dae/v_youssouf/utils.py Sun Apr 25 12:31:22 2010 -0400 @@ -0,0 +1,69 @@ +#!/usr/bin/python +# coding: utf-8 + +from __future__ import with_statement + +from jobman import DD + +# from pylearn codebase +# useful in __init__(param1, param2, etc.) to save +# values in self.param1, self.param2... just call +# update_locals(self, locals()) +def update_locals(obj, dct): + if 'self' in dct: + del dct['self'] + obj.__dict__.update(dct) + +# from a dictionary of possible values for hyperparameters, e.g. +# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]} +# create a list of other dictionaries representing all the possible +# combinations, thus in this example creating: +# [{'learning_rate': 0.1, 'num_layers': 1}, ...] +# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2)) +def produit_cartesien_jobs(val_dict): + job_list = [DD()] + all_keys = val_dict.keys() + + for key in all_keys: + possible_values = val_dict[key] + new_job_list = [] + for val in possible_values: + for job in job_list: + to_insert = job.copy() + to_insert.update({key: val}) + new_job_list.append(to_insert) + job_list = new_job_list + + return job_list + +def test_produit_cartesien_jobs(): + vals = {'a': [1,2], 'b': [3,4,5]} + print produit_cartesien_jobs(vals) + + +# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python +"""Simple module for getting amount of memory used by a specified user's +processes on a UNIX system. +It uses UNIX ps utility to get the memory usage for a specified username and +pipe it to awk for summing up per application memory usage and return the total. +Python's Popen() from subprocess module is used for spawning ps and awk. + +""" + +import subprocess + +class MemoryMonitor(object): + + def __init__(self, username): + """Create new MemoryMonitor instance.""" + self.username = username + + def usage(self): + """Return int containing memory used by user's processes.""" + self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username, + shell=True, + stdout=subprocess.PIPE, + ) + self.stdout_list = self.process.communicate()[0].split('\n') + return int(self.stdout_list[0]) +