changeset 443:89a49dae6cf3

merge
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Mon, 03 May 2010 18:38:58 -0400
parents d5b2b6397a5a (current diff) 1272dc84a30c (diff)
children 18841eeb433f
files writeup/techreport.tex
diffstat 12 files changed, 1840 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/baseline/mlp/ratio_classes/mlp_nist_ratio.py	Mon May 03 18:38:27 2010 -0400
+++ b/baseline/mlp/ratio_classes/mlp_nist_ratio.py	Mon May 03 18:38:58 2010 -0400
@@ -24,8 +24,7 @@
 """
 __docformat__ = 'restructedtext en'
 
-import ift6266
-from scripts import setup_batches
+import setup_batches
 import pdb
 import numpy
 
@@ -50,7 +49,7 @@
 
 
 
-    def __init__(self, input, n_in, n_hidden, n_out,learning_rate):
+    def __init__(self, input, n_in, n_hidden, n_out,learning_rate, test_subclass):
         """Initialize the parameters for the multilayer perceptron
 
         :param input: symbolic variable that describes the input of the 
@@ -113,12 +112,20 @@
 
         # compute prediction as class whose probability is maximal in 
         # symbolic form
-        self.y_pred = T.argmax( self.p_y_given_x, axis =1)
-        self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1)
+        #self.y_pred = T.argmax( self.p_y_given_x, axis =1)
+        #self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1)
         
-        
-        
-        
+	self.test_subclass = test_subclass
+
+	#if (self.test_subclass == "u"):
+	#  self.y_pred = T.argmax( self.p_y_given_x[10:35], axis =1) + 10
+        #elif (self.test_subclass == "l"):
+	#  self.y_pred = T.argmax( self.p_y_given_x[35:], axis =1) + 35
+        #elif (self.test_subclass == "d"):
+	#  self.y_pred = T.argmax( self.p_y_given_x[0:9], axis =1)
+        #else:
+	self.y_pred = T.argmax( self.p_y_given_x, axis =1)
+
         # L1 norm ; one regularization option is to enforce L1 norm to 
         # be small 
         self.L1     = abs(self.W1).sum() + abs(self.W2).sum()
@@ -178,9 +185,9 @@
                         nb_max_exemples=1000000,\
                         batch_size=20,\
                         nb_hidden = 500,\
-                        nb_targets = 62,\
+                        nb_targets = 26,\
 			tau=1e6,\
-			main_class="d",\
+			main_class="l",\
 			start_ratio=1,\
 			end_ratio=1):
    
@@ -216,8 +223,9 @@
     classifier = MLP( input=x.reshape((batch_size,32*32)),\
                         n_in=32*32,\
                         n_hidden=nb_hidden,\
-                        n_out=nb_targets,
-                        learning_rate=learning_rate)
+                        n_out=nb_targets,\
+                        learning_rate=learning_rate,\
+			test_subclass=main_class)
                         
                         
    
@@ -285,7 +293,13 @@
     n_iter=max(1,n_iter) # run at least once on short debug call
     time_n=0 #in unit of exemples
     
-    
+    if (main_class == "u"):
+      class_offset = 10
+    elif (main_class == "l"):
+      class_offset = 36
+    else:
+      class_offset = 0
+
    
     if verbose == True:
         print 'looping at most %d times through the data set' %n_iter
@@ -302,6 +316,9 @@
         # get the minibatches corresponding to `iter` modulo
         # `len(train_batches)`
         x,y = train_batches[ minibatch_index ]
+
+	y = y - class_offset
+
         # convert to float
         x_float = x/255.0
         cost_ij = train_model(x_float,y)
@@ -312,6 +329,7 @@
             this_validation_loss = 0.
             for x,y in validation_batches:
                 # sum up the errors for each minibatch
+		y = y - class_offset
                 x_float = x/255.0
                 this_validation_loss += test_model(x_float,y)
             # get the average by dividing with the number of minibatches
@@ -323,6 +341,7 @@
             this_train_loss=0
             for x,y in train_batches:
                 # sum up the errors for each minibatch
+		y = y - class_offset
                 x_float = x/255.0
                 this_train_loss += test_model(x_float,y)
             # get the average by dividing with the number of minibatches
@@ -355,6 +374,7 @@
                 # test it on the test set
                 test_score = 0.
                 for x,y in test_batches:
+		    y = y - class_offset
                     x_float=x/255.0
                     test_score += test_model(x_float,y)
                 test_score /= len(test_batches)
@@ -381,6 +401,7 @@
                 #calculation before aborting
                 patience = iter+validation_frequency+1
                 for x,y in test_batches:
+		    y = y - class_offset
                     x_float=x/255.0
                     test_score += test_model(x_float,y)
                 test_score /= len(test_batches)
@@ -421,13 +442,10 @@
 
 def jobman_mlp_full_nist(state,channel):
     (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\
-                                                                nb_max_exemples=state.nb_max_exemples,\
                                                                 nb_hidden=state.nb_hidden,\
-                                                                adaptive_lr=state.adaptive_lr,\
-								tau=state.tau,\
-								main_class=state.main_class,\
-								start_ratio=state.start_ratio,\
-								end_ratio=state.end_ratio)
+                                                                main_class=state.main_class,\
+								start_ratio=state.ratio,\
+								end_ratio=state.ratio)
     state.train_error=train_error
     state.validation_error=validation_error
     state.test_error=test_error
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_guillaume/config.py	Mon May 03 18:38:58 2010 -0400
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+'''
+These are parameters used by nist_sda_retrieve.py. They'll end up as globals in there.
+
+Rename this file to config.py and configure as needed.
+DON'T add the renamed file to the repository, as others might use it
+without realizing it, with dire consequences.
+'''
+
+# Set this to True when you want to run cluster tests, ie. you want
+# to run on the cluster, many jobs, but want to reduce the training
+# set size and the number of epochs, so you know everything runs
+# fine on the cluster.
+# Set this PRIOR to inserting your test jobs in the DB.
+TEST_CONFIG = False
+
+NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
+NIST_UPPER_LOCATION = '/data/lisa/data/nist/by_class/upper'
+NIST_LOWER_LOCATION = '/data/lisa/data/nist/by_class/lower'
+NIST_DIGITS_LOCATION = '/data/lisa/data/nist/by_class/digits'
+
+NIST_ALL_TRAIN_SIZE = 649081
+# valid et test =82587 82587 
+NIST_UPPER_TRAIN_SIZE = 196422
+NIST_LOWER_TRAIN_SIZE = 166998
+NIST_DIGITS_TRAIN_SIZE = 285661
+
+SUBDATASET_NIST = 'all'
+
+#Path of two pre-train done earlier
+PATH_NIST = '/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/NIST_big'
+PATH_P07 = '/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/P07_big/'
+
+# change "sandbox" when you're ready
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_SDA_retrieve'
+EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v_sylvain.nist_sda_retrieve.jobman_entrypoint"
+
+##Pour lancer des travaux sur le cluster: (il faut etre ou se trouve les fichiers)
+##python nist_sda_retrieve.py jobman_insert
+##dbidispatch --condor --repeat_jobs=2 jobman sql 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_finetuningSDA0' .  #C'est le path dans config.py
+
+##Pour lancer sur GPU sur boltzmann (changer device=gpuX pour X le bon assigne)
+##THEANO_FLAGS=floatX=float32,device=gpu2 python nist_sda_retrieve.py test_jobman_entrypoint
+
+
+# reduce training set to that many examples
+REDUCE_TRAIN_TO = None
+# that's a max, it usually doesn't get to that point
+MAX_FINETUNING_EPOCHS = 1000
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 100
+#Set the finetune dataset
+FINETUNE_SET=0
+#Set the pretrain dataset used. 0: NIST, 1:P07
+PRETRAIN_CHOICE=0
+
+
+if TEST_CONFIG:
+    REDUCE_TRAIN_TO = 1000
+    MAX_FINETUNING_EPOCHS = 2
+    REDUCE_EVERY = 10
+
+
+# This is to configure insertion of jobs on the cluster.
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
+JOB_VALS = {'pretraining_lr': [0.1],#, 0.001],#, 0.0001],
+        'pretraining_epochs_per_layer': [10],
+        'hidden_layers_sizes': [800],
+        'corruption_levels': [0.2],
+        'minibatch_size': [100],
+        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
+        'max_finetuning_epochs_P07':[1],
+        'finetuning_lr':[0.01], #0.001 was very bad, so we leave it out
+        'num_hidden_layers':[4],
+        'finetune_set':[-1],
+        'pretrain_choice':[0,1]
+        }
+
+# Just useful for tests... minimal number of epochs
+# (This is used when running a single job, locally, when
+# calling ./nist_sda.py test_jobman_entrypoint
+DEFAULT_HP_NIST = {'finetuning_lr':0.1,
+                       'pretraining_lr':0.01,
+                       'pretraining_epochs_per_layer':15,
+                       'max_finetuning_epochs':MAX_FINETUNING_EPOCHS,
+                       #'max_finetuning_epochs':1,
+                       'max_finetuning_epochs_P07':7,
+                       'hidden_layers_sizes':1000,
+                       'corruption_levels':0.2,
+                       'minibatch_size':100,
+                       #'reduce_train_to':2000,
+		       'decrease_lr':1,
+                       'num_hidden_layers':3,
+                       'finetune_set':0,
+                       'pretrain_choice':0}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_guillaume/config2.py	Mon May 03 18:38:58 2010 -0400
@@ -0,0 +1,90 @@
+'''
+These are parameters used by nist_sda_retrieve.py. They'll end up as globals in there.
+
+Rename this file to config.py and configure as needed.
+DON'T add the renamed file to the repository, as others might use it
+without realizing it, with dire consequences.
+'''
+
+# Set this to True when you want to run cluster tests, ie. you want
+# to run on the cluster, many jobs, but want to reduce the training
+# set size and the number of epochs, so you know everything runs
+# fine on the cluster.
+# Set this PRIOR to inserting your test jobs in the DB.
+TEST_CONFIG = False
+
+NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
+NIST_ALL_TRAIN_SIZE = 649081
+# valid et test =82587 82587 
+
+#Path of two pre-train done earlier
+PATH_NIST = '/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/NIST_big'
+PATH_P07 = '/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/P07_demo/'
+
+# change "sandbox" when you're ready
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_SDA_retrieve'
+EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v_sylvain.nist_sda_retrieve.jobman_entrypoint"
+
+##Pour lancer des travaux sur le cluster: (il faut etre ou se trouve les fichiers)
+##python nist_sda_retrieve.py jobman_insert
+##dbidispatch --condor --repeat_jobs=2 jobman sql 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_finetuningSDA0' .  #C'est le path dans config.py
+
+##Pour lancer sur GPU sur boltzmann (changer device=gpuX pour X le bon assigne)
+##THEANO_FLAGS=floatX=float32,device=gpu2 python nist_sda_retrieve.py test_jobman_entrypoint
+
+
+# reduce training set to that many examples
+REDUCE_TRAIN_TO = None
+# that's a max, it usually doesn't get to that point
+MAX_FINETUNING_EPOCHS = 1000
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 100
+#Set the finetune dataset
+FINETUNE_SET=0
+#Set the pretrain dataset used. 0: NIST, 1:P07
+PRETRAIN_CHOICE=0
+
+
+if TEST_CONFIG:
+    REDUCE_TRAIN_TO = 1000
+    MAX_FINETUNING_EPOCHS = 2
+    REDUCE_EVERY = 10
+
+
+# This is to configure insertion of jobs on the cluster.
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
+JOB_VALS = {'pretraining_lr': [0.1],#, 0.001],#, 0.0001],
+        'pretraining_epochs_per_layer': [10],
+        'hidden_layers_sizes': [800],
+        'corruption_levels': [0.2],
+        'minibatch_size': [100],
+        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
+        'max_finetuning_epochs_P07':[1],
+        'finetuning_lr':[0.01], #0.001 was very bad, so we leave it out
+        'num_hidden_layers':[4],
+        'finetune_set':[-1],
+        'pretrain_choice':[0,1]
+        }
+
+# Just useful for tests... minimal number of epochs
+# (This is used when running a single job, locally, when
+# calling ./nist_sda.py test_jobman_entrypoint
+DEFAULT_HP_NIST = {'finetuning_lr':0.1,
+                       'pretraining_lr':0.01,
+                       'pretraining_epochs_per_layer':15,
+                       'max_finetuning_epochs':MAX_FINETUNING_EPOCHS,
+                       #'max_finetuning_epochs':1,
+                       'max_finetuning_epochs_P07':7,
+                       'hidden_layers_sizes':1000,
+                       'corruption_levels':0.2,
+                       'minibatch_size':100,
+                       #'reduce_train_to':2000,
+		       'decrease_lr':1,
+                       'num_hidden_layers':3,
+                       'finetune_set':2,
+                       'pretrain_choice':1}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_guillaume/nist_sda.py	Mon May 03 18:38:58 2010 -0400
@@ -0,0 +1,285 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+from copy import copy
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config import *
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+        
+    if state.has_key('decrease_lr'):
+        decrease_lr = state['decrease_lr']
+    else :
+        decrease_lr = 0
+        
+    if state.has_key('decrease_lr_pretrain'):
+        dec=state['decrease_lr_pretrain']
+    else :
+        dec=0
+ 
+    n_ins = 32*32
+
+    if state.has_key('subdataset'):
+        subdataset_name=state['subdataset']
+    else:
+        subdataset_name=SUBDATASET_NIST
+
+    #n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+    if subdataset_name == "upper":
+	n_outs = 26
+	subdataset = datasets.nist_upper()
+	examples_per_epoch = NIST_UPPER_TRAIN_SIZE
+    elif subdataset_name == "lower":
+	n_outs = 26
+	subdataset = datasets.nist_lower()
+	examples_per_epoch = NIST_LOWER_TRAIN_SIZE
+    elif subdataset_name == "digits":
+	n_outs = 10
+	subdataset = datasets.nist_digits()
+	examples_per_epoch = NIST_DIGITS_TRAIN_SIZE
+    else:
+	n_outs = 62
+	subdataset = datasets.nist_all()
+	examples_per_epoch = NIST_ALL_TRAIN_SIZE
+    
+    print 'Using subdataset ', subdataset_name
+
+    #To be sure variables will not be only in the if statement
+    PATH = ''
+    nom_reptrain = ''
+    nom_serie = ""
+    if state['pretrain_choice'] == 0:
+        nom_serie="series_NIST.h5"
+    elif state['pretrain_choice'] == 1:
+        nom_serie="series_P07.h5"
+
+    series = create_series(state.num_hidden_layers,nom_serie)
+
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset_name=subdataset_name,\
+				    dataset=subdataset,\
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    series=series,
+                                    max_minibatches=rtt)
+
+    parameters=[]
+    #Number of files of P07 used for pretraining
+    nb_file=0
+
+    print('\n\tpretraining with NIST\n')
+
+    optimizer.pretrain(subdataset, decrease = dec) 
+
+    channel.save()
+    
+    #Set some of the parameters used for the finetuning
+    if state.has_key('finetune_set'):
+        finetune_choice=state['finetune_set']
+    else:
+        finetune_choice=FINETUNE_SET
+    
+    if state.has_key('max_finetuning_epochs'):
+        max_finetune_epoch_NIST=state['max_finetuning_epochs']
+    else:
+        max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
+    
+    if state.has_key('max_finetuning_epochs_P07'):
+        max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
+    else:
+        max_finetune_epoch_P07=max_finetune_epoch_NIST
+    
+    #Decide how the finetune is done
+    
+    if finetune_choice == 0:
+        print('\n\n\tfinetune with NIST\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(subdataset,subdataset,max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 1:
+        print('\n\n\tfinetune with P07\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 2:
+        print('\n\n\tfinetune with P07 followed by NIST\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr)
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 3:
+        print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
+        All hidden units output are input of the logistic regression\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        
+        
+    if finetune_choice==-1:
+        print('\nSERIE OF 4 DIFFERENT FINETUNINGS')
+        print('\n\n\tfinetune with NIST\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_finetune_P07.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
+        All hidden units output are input of the logistic regression\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        channel.save()
+    
+    channel.save()
+
+    return channel.COMPLETE
+
+# These Series objects are used to save various statistics
+# during the training.
+def create_series(num_hidden_layers, nom_serie):
+
+    # Replace series we don't want to save with DummySeries, e.g.
+    # series['training_error'] = DummySeries()
+
+    series = {}
+
+    basedir = os.getcwd()
+
+    h5f = tables.openFile(os.path.join(basedir, nom_serie), "w")
+
+    # reconstruction
+    reconstruction_base = \
+                ErrorSeries(error_name="reconstruction_error",
+                    table_name="reconstruction_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['reconstruction_error'] = \
+                AccumulatorSeriesWrapper(base_series=reconstruction_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # train
+    training_base = \
+                ErrorSeries(error_name="training_error",
+                    table_name="training_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['training_error'] = \
+                AccumulatorSeriesWrapper(base_series=training_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # valid and test are not accumulated/mean, saved directly
+    series['validation_error'] = \
+                ErrorSeries(error_name="validation_error",
+                    table_name="validation_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    series['test_error'] = \
+                ErrorSeries(error_name="test_error",
+                    table_name="test_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    param_names = []
+    for i in range(num_hidden_layers):
+        param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
+    param_names += ['logreg_layer_W', 'logreg_layer_b']
+
+    # comment out series we don't want to save
+    series['params'] = SharedParamsStatisticsWrapper(
+                        new_group_name="params",
+                        base_group="/",
+                        arrays_names=param_names,
+                        hdf5_file=h5f,
+                        index_names=('epoch',))
+
+    return series
+
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
+def jobman_insert_nist():
+    jobs = produit_cartesien_jobs(JOB_VALS)
+
+    db = jobman.sql.db(JOBDB)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+        jobman.sql.insert_dict(job, db)
+
+    print "inserted"
+
+if __name__ == '__main__':
+
+    args = sys.argv[1:]
+
+    #if len(args) > 0 and args[0] == 'load_nist':
+    #    test_load_nist()
+
+    if len(args) > 0 and args[0] == 'jobman_insert':
+        jobman_insert_nist()
+
+    elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+        jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
+
+    else:
+        print "Bad arguments"
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_guillaume/nist_sda_retrieve.py	Mon May 03 18:38:58 2010 -0400
@@ -0,0 +1,272 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+from copy import copy
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config2 import *
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+        
+    if state.has_key('decrease_lr'):
+        decrease_lr = state['decrease_lr']
+    else :
+        decrease_lr = 0
+ 
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+     
+    examples_per_epoch = NIST_ALL_TRAIN_SIZE
+    #To be sure variables will not be only in the if statement
+    PATH = ''
+    nom_reptrain = ''
+    nom_serie = ""
+    if state['pretrain_choice'] == 0:
+        PATH=PATH_NIST
+        nom_pretrain='NIST'
+        nom_serie="series_NIST.h5"
+    elif state['pretrain_choice'] == 1:
+        PATH=PATH_P07
+        nom_pretrain='P07'
+        nom_serie="series_P07.h5"
+
+    series = create_series(state.num_hidden_layers,nom_serie)
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), 
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    series=series,
+                                    max_minibatches=rtt)	
+
+    parameters=[]
+    #Number of files of P07 used for pretraining
+    nb_file=0
+##    if state['pretrain_choice'] == 0:
+##        print('\n\tpretraining with NIST\n')
+##        optimizer.pretrain(datasets.nist_all()) 
+##    elif state['pretrain_choice'] == 1:
+##        #To know how many file will be used during pretraining
+##        nb_file = state['pretraining_epochs_per_layer'] 
+##        state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset
+##        if nb_file >=100:
+##            sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+
+##            "You have to correct the code (and be patient, P07 is huge !!)\n"+
+##             "or reduce the number of pretraining epoch to run the code (better idea).\n")
+##        print('\n\tpretraining with P07')
+##        optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) 
+    
+    print ('Retrieve pre-train done earlier ( '+nom_pretrain+' )')
+    
+
+        
+    sys.stdout.flush()
+    channel.save()
+    
+    #Set some of the parameters used for the finetuning
+    if state.has_key('finetune_set'):
+        finetune_choice=state['finetune_set']
+    else:
+        finetune_choice=FINETUNE_SET
+    
+    if state.has_key('max_finetuning_epochs'):
+        max_finetune_epoch_NIST=state['max_finetuning_epochs']
+    else:
+        max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
+    
+    if state.has_key('max_finetuning_epochs_P07'):
+        max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
+    else:
+        max_finetune_epoch_P07=max_finetune_epoch_NIST
+    
+    #Decide how the finetune is done
+    
+    if finetune_choice == 0:
+        print('\n\n\tfinetune with NIST\n\n')
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 1:
+        print('\n\n\tfinetune with P07\n\n')
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 2:
+        print('\n\n\tfinetune with P07 followed by NIST\n\n')
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr)
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 3:
+        print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
+        All hidden units output are input of the logistic regression\n\n')
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        
+        
+    if finetune_choice==-1:
+        print('\nSERIE OF 4 DIFFERENT FINETUNINGS')
+        print('\n\n\tfinetune with NIST\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_finetune_P07.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
+        All hidden units output are input of the logistic regression\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        channel.save()
+    
+    channel.save()
+
+    return channel.COMPLETE
+
+# These Series objects are used to save various statistics
+# during the training.
+def create_series(num_hidden_layers, nom_serie):
+
+    # Replace series we don't want to save with DummySeries, e.g.
+    # series['training_error'] = DummySeries()
+
+    series = {}
+
+    basedir = os.getcwd()
+
+    h5f = tables.openFile(os.path.join(basedir, nom_serie), "w")
+
+    # reconstruction
+    reconstruction_base = \
+                ErrorSeries(error_name="reconstruction_error",
+                    table_name="reconstruction_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['reconstruction_error'] = \
+                AccumulatorSeriesWrapper(base_series=reconstruction_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # train
+    training_base = \
+                ErrorSeries(error_name="training_error",
+                    table_name="training_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['training_error'] = \
+                AccumulatorSeriesWrapper(base_series=training_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # valid and test are not accumulated/mean, saved directly
+    series['validation_error'] = \
+                ErrorSeries(error_name="validation_error",
+                    table_name="validation_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    series['test_error'] = \
+                ErrorSeries(error_name="test_error",
+                    table_name="test_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    param_names = []
+    for i in range(num_hidden_layers):
+        param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
+    param_names += ['logreg_layer_W', 'logreg_layer_b']
+
+    # comment out series we don't want to save
+    series['params'] = SharedParamsStatisticsWrapper(
+                        new_group_name="params",
+                        base_group="/",
+                        arrays_names=param_names,
+                        hdf5_file=h5f,
+                        index_names=('epoch',))
+
+    return series
+
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
+def jobman_insert_nist():
+    jobs = produit_cartesien_jobs(JOB_VALS)
+
+    db = jobman.sql.db(JOBDB)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+        jobman.sql.insert_dict(job, db)
+
+    print "inserted"
+
+if __name__ == '__main__':
+
+    args = sys.argv[1:]
+
+    #if len(args) > 0 and args[0] == 'load_nist':
+    #    test_load_nist()
+
+    if len(args) > 0 and args[0] == 'jobman_insert':
+        jobman_insert_nist()
+
+    elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+        jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
+
+    else:
+        print "Bad arguments"
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_guillaume/sgd_optimization.py	Mon May 03 18:38:58 2010 -0400
@@ -0,0 +1,395 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# coding: utf-8
+
+# Generic SdA optimization loop, adapted from the deeplearning.net tutorial
+
+import numpy 
+import theano
+import time
+import datetime
+import theano.tensor as T
+import sys
+#import pickle
+import cPickle
+
+from jobman import DD
+import jobman, jobman.sql
+from copy import copy
+
+from stacked_dae import SdA
+
+from ift6266.utils.seriestables import *
+
+#For test purpose only
+buffersize=1000
+
+default_series = { \
+        'reconstruction_error' : DummySeries(),
+        'training_error' : DummySeries(),
+        'validation_error' : DummySeries(),
+        'test_error' : DummySeries(),
+        'params' : DummySeries()
+        }
+
+def itermax(iter, max):
+    for i,it in enumerate(iter):
+        if i >= max:
+            break
+        yield it
+
+class SdaSgdOptimizer:
+    def __init__(self, dataset_name, dataset, hyperparameters, n_ins, n_outs,
+                    examples_per_epoch, series=default_series, max_minibatches=None):
+	self.dataset_name = dataset_name
+        self.dataset = dataset
+        self.hp = hyperparameters
+        self.n_ins = n_ins
+        self.n_outs = n_outs
+        self.parameters_pre=[]
+   
+	if (self.dataset_name == "upper"):
+	  self.class_offset = 10
+	elif (self.dataset_name == "lower"):
+	  self.class_offset = 36
+	else:
+	  self.class_offset = 0
+
+
+        self.max_minibatches = max_minibatches
+        print "SdaSgdOptimizer, max_minibatches =", max_minibatches
+
+        self.ex_per_epoch = examples_per_epoch
+        self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
+
+        self.series = series
+
+        self.rng = numpy.random.RandomState(1234)
+
+        self.init_classifier()
+
+        sys.stdout.flush()
+
+    def init_classifier(self):
+        print "Constructing classifier"
+
+        # we don't want to save arrays in DD objects, so
+        # we recreate those arrays here
+        nhl = self.hp.num_hidden_layers
+        layers_sizes = [self.hp.hidden_layers_sizes] * nhl
+        corruption_levels = [self.hp.corruption_levels] * nhl
+
+        # construct the stacked denoising autoencoder class
+        self.classifier = SdA( \
+                          batch_size = self.hp.minibatch_size, \
+                          n_ins= self.n_ins, \
+                          hidden_layers_sizes = layers_sizes, \
+                          n_outs = self.n_outs, \
+                          corruption_levels = corruption_levels,\
+                          rng = self.rng,\
+                          pretrain_lr = self.hp.pretraining_lr, \
+                          finetune_lr = self.hp.finetuning_lr)
+
+        #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
+
+        sys.stdout.flush()
+
+    def train(self):
+        self.pretrain(self.dataset)
+        self.finetune(self.dataset)
+
+    def pretrain(self,dataset,decrease=0):
+        print "STARTING PRETRAINING, time = ", datetime.datetime.now()
+        sys.stdout.flush()
+        
+        un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 file
+
+        start_time = time.clock()  
+        
+        ########  This is hardcoaded. THe 0.95 parameter is hardcoaded and can be changed at will  ###
+        #Set the decreasing rate of the learning rate. We want the final learning rate to
+        #be 5% of the original learning rate. The decreasing factor is linear
+        decreasing = (decrease*self.hp.pretraining_lr)/float(self.hp.pretraining_epochs_per_layer*800000/self.hp.minibatch_size)
+        
+        ## Pre-train layer-wise 
+        for i in xrange(self.classifier.n_layers):
+            # go through pretraining epochs 
+            
+            #To reset the learning rate to his original value
+            learning_rate=self.hp.pretraining_lr
+            for epoch in xrange(self.hp.pretraining_epochs_per_layer):
+                # go through the training set
+                batch_index=0
+                count=0
+                num_files=0
+                for x,y in dataset.train(self.hp.minibatch_size):
+		    y = y - self.class_offset
+                    c = self.classifier.pretrain_functions[i](x,learning_rate)
+                    count +=1
+
+                    self.series["reconstruction_error"].append((epoch, batch_index), c)
+                    batch_index+=1
+
+                    #If we need to decrease the learning rate for the pretrain
+                    if decrease != 0:
+                        learning_rate -= decreasing
+
+                    # useful when doing tests
+                    if self.max_minibatches and batch_index >= self.max_minibatches:
+                        break
+                    
+                    #When we pass through the data only once (the case with P07)
+                    #There is approximately 800*1024=819200 examples per file (1k per example and files are 800M)
+                    if self.hp.pretraining_epochs_per_layer == 1 and count%un_fichier == 0:
+                        print 'Pre-training layer %i, epoch %d, cost '%(i,num_files),c
+                        num_files+=1
+                        sys.stdout.flush()
+                        self.series['params'].append((num_files,), self.classifier.all_params)
+                
+                #When NIST is used
+                if self.hp.pretraining_epochs_per_layer > 1:        
+                    print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+                    sys.stdout.flush()
+
+                    self.series['params'].append((epoch,), self.classifier.all_params)
+     
+        end_time = time.clock()
+
+        print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+        self.hp.update({'pretraining_time': end_time-start_time})
+        
+        sys.stdout.flush()
+        
+        #To be able to load them later for tests on finetune
+        self.parameters_pre=[copy(x.value) for x in self.classifier.params]
+        f = open('params_pretrain.txt', 'w')
+        cPickle.dump(self.parameters_pre,f,protocol=-1)
+        f.close()
+
+
+    def finetune(self,dataset,dataset_test,num_finetune,ind_test,special=0,decrease=0):
+        
+        if special != 0 and special != 1:
+            sys.exit('Bad value for variable special. Must be in {0,1}')
+        print "STARTING FINETUNING, time = ", datetime.datetime.now()
+
+        minibatch_size = self.hp.minibatch_size
+        if ind_test == 0 or ind_test == 20:
+            nom_test = "NIST"
+            nom_train="P07"
+        else:
+            nom_test = "P07"
+            nom_train = "NIST"
+
+
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #         givens = {
+        #           self.classifier.x: ensemble_x,
+        #           self.classifier.y: ensemble_y]})
+
+        validate_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #        givens = {
+        #           self.classifier.x: ,
+        #           self.classifier.y: ]})
+
+
+        # early-stopping parameters
+        patience              = 10000 # look as this many examples regardless
+        patience_increase     = 2.    # wait this much longer when a new best is 
+                                      # found
+        improvement_threshold = 0.995 # a relative improvement of this much is 
+                                      # considered significant
+        validation_frequency  = min(self.mb_per_epoch, patience/2)
+                                      # go through this many 
+                                      # minibatche before checking the network 
+                                      # on the validation set; in this case we 
+                                      # check every epoch 
+        if self.max_minibatches and validation_frequency > self.max_minibatches:
+            validation_frequency = self.max_minibatches / 2
+
+        best_params          = None
+        best_validation_loss = float('inf')
+        test_score           = 0.
+        start_time = time.clock()
+
+        done_looping = False
+        epoch = 0
+
+        total_mb_index = 0
+        minibatch_index = 0
+        parameters_finetune=[]
+        
+        if ind_test == 21:
+            learning_rate = self.hp.finetuning_lr / 10.0
+        else:
+            learning_rate = self.hp.finetuning_lr  #The initial finetune lr
+
+
+        while (epoch < num_finetune) and (not done_looping):
+            epoch = epoch + 1
+
+            for x,y in dataset.train(minibatch_size,bufsize=buffersize):
+                minibatch_index += 1
+                
+                y = y - self.class_offset
+
+                if special == 0:
+                    cost_ij = self.classifier.finetune(x,y,learning_rate)
+                elif special == 1:
+                    cost_ij = self.classifier.finetune2(x,y)
+                total_mb_index += 1
+
+                self.series["training_error"].append((epoch, minibatch_index), cost_ij)
+
+                if (total_mb_index+1) % validation_frequency == 0: 
+                    #minibatch_index += 1
+                    #The validation set is always NIST (we want the model to be good on NIST)
+                    if ind_test == 0 | ind_test == 20:
+                        iter=dataset_test.valid(minibatch_size,bufsize=buffersize)
+                    else:
+                        iter = dataset.valid(minibatch_size,bufsize=buffersize)
+                    if self.max_minibatches:
+                        iter = itermax(iter, self.max_minibatches)
+                    validation_losses = [validate_model(x,y - self.class_offset) for x,y in iter]
+                    this_validation_loss = numpy.mean(validation_losses)
+
+                    self.series["validation_error"].\
+                        append((epoch, minibatch_index), this_validation_loss*100.)
+
+                    print('epoch %i, minibatch %i, validation error on NIST : %f %%' % \
+                           (epoch, minibatch_index+1, \
+                            this_validation_loss*100.))
+
+
+                    # if we got the best validation score until now
+                    if this_validation_loss < best_validation_loss:
+
+                        #improve patience if loss improvement is good enough
+                        if this_validation_loss < best_validation_loss *  \
+                               improvement_threshold :
+                            patience = max(patience, total_mb_index * patience_increase)
+
+                        # save best validation score, iteration number and parameters
+                        best_validation_loss = this_validation_loss
+                        best_iter = total_mb_index
+                        parameters_finetune=[copy(x.value) for x in self.classifier.params]
+
+                        # test it on the test set
+                        iter = dataset.test(minibatch_size,bufsize=buffersize)
+                        if self.max_minibatches:
+                            iter = itermax(iter, self.max_minibatches)
+                        test_losses = [test_model(x,y - self.class_offset) for x,y in iter]
+                        test_score = numpy.mean(test_losses)
+                        
+                        #test it on the second test set
+                        iter2 = dataset_test.test(minibatch_size,bufsize=buffersize)
+                        if self.max_minibatches:
+                            iter2 = itermax(iter2, self.max_minibatches)
+                        test_losses2 = [test_model(x,y - self.class_offset) for x,y in iter2]
+                        test_score2 = numpy.mean(test_losses2)
+
+                        self.series["test_error"].\
+                            append((epoch, minibatch_index), test_score*100.)
+
+                        print(('     epoch %i, minibatch %i, test error on dataset %s  (train data) of best '
+                              'model %f %%') % 
+                                     (epoch, minibatch_index+1,nom_train,
+                                      test_score*100.))
+                                    
+                        print(('     epoch %i, minibatch %i, test error on dataset %s of best '
+                              'model %f %%') % 
+                                     (epoch, minibatch_index+1,nom_test,
+                                      test_score2*100.))
+                    
+                    if patience <= total_mb_index:
+                        done_looping = True
+                        break   #to exit the FOR loop
+                    
+                    sys.stdout.flush()
+
+                # useful when doing tests
+                if self.max_minibatches and minibatch_index >= self.max_minibatches:
+                    break
+            
+            if decrease == 1:
+                if (ind_test == 21 & epoch % 100 == 0) | ind_test == 20 | (ind_test == 1 & epoch % 100 == 0) :
+                    learning_rate /= 2 #divide the learning rate by 2 for each new epoch of P07 (or 100 of NIST)
+            
+            self.series['params'].append((epoch,), self.classifier.all_params)
+
+            if done_looping == True:    #To exit completly the fine-tuning
+                break   #to exit the WHILE loop
+
+        end_time = time.clock()
+        self.hp.update({'finetuning_time':end_time-start_time,\
+                    'best_validation_error':best_validation_loss,\
+                    'test_score':test_score,
+                    'num_finetuning_epochs':epoch})
+
+        print(('\nOptimization complete with best validation score of %f %%,'
+               'with test performance %f %% on dataset %s ') %  
+                     (best_validation_loss * 100., test_score*100.,nom_train))
+        print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.))
+        
+        print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+        
+        sys.stdout.flush()
+        
+        #Save a copy of the parameters in a file to be able to get them in the future
+        
+        if special == 1:    #To keep a track of the value of the parameters
+            f = open('params_finetune_stanford.txt', 'w')
+            cPickle.dump(parameters_finetune,f,protocol=-1)
+            f.close()
+        
+        elif ind_test == 0 | ind_test == 20:    #To keep a track of the value of the parameters
+            f = open('params_finetune_P07.txt', 'w')
+            cPickle.dump(parameters_finetune,f,protocol=-1)
+            f.close()
+               
+
+        elif ind_test== 1:    #For the run with 2 finetunes. It will be faster.
+            f = open('params_finetune_NIST.txt', 'w')
+            cPickle.dump(parameters_finetune,f,protocol=-1)
+            f.close()
+        
+        elif ind_test== 21:    #To keep a track of the value of the parameters
+            f = open('params_finetune_P07_then_NIST.txt', 'w')
+            cPickle.dump(parameters_finetune,f,protocol=-1)
+            f.close()
+        
+
+    #Set parameters like they where right after pre-train or finetune
+    def reload_parameters(self,which):
+        
+        #self.parameters_pre=pickle.load('params_pretrain.txt')
+        f = open(which)
+        self.parameters_pre=cPickle.load(f)
+        f.close()
+        for idx,x in enumerate(self.parameters_pre):
+            if x.dtype=='float64':
+                self.classifier.params[idx].value=theano._asarray(copy(x),dtype=theano.config.floatX)
+            else:
+                self.classifier.params[idx].value=copy(x)
+
+    def training_error(self,dataset):
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+                
+        iter2 = dataset.train(self.hp.minibatch_size,bufsize=buffersize)
+        train_losses2 = [test_model(x,y - self.class_offset) for x,y in iter2]
+        train_score2 = numpy.mean(train_losses2)
+        print "Training error is: " + str(train_score2)
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_guillaume/stacked_dae.py	Mon May 03 18:38:58 2010 -0400
@@ -0,0 +1,377 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy 
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+from utils import update_locals
+
+# taken from LeDeepNet/daa.py
+# has a special case when taking log(0) (defined =0)
+# modified to not take the mean anymore
+from theano.tensor.xlogx import xlogx, xlogy0
+# it's target*log(output)
+def binary_cross_entropy(target, output, sum_axis=1):
+    XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
+    return -T.sum(XE, axis=sum_axis)
+
+class LogisticRegression(object):
+    def __init__(self, input, n_in, n_out):
+        # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 
+        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+                                            dtype = theano.config.floatX) )
+        # initialize the baises b as a vector of n_out 0s
+        self.b = theano.shared( value=numpy.zeros((n_out,), 
+                                            dtype = theano.config.floatX) )
+        # compute vector of class-membership. This is a sigmoid instead of
+        #a softmax to be able later to classify as nothing
+        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) #row-wise
+##        self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b)
+        
+        # compute prediction as class whose probability is maximal in 
+        # symbolic form
+        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+        # list of parameters for this layer
+        self.params = [self.W, self.b]
+        
+
+    def negative_log_likelihood(self, y):
+        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+##        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]+T.sum(T.log(1-self.p_y_given_x), axis=1)-T.log(1-self.p_y_given_x)[T.arange(y.shape[0]),y])
+    
+    
+##    def kullback_leibler(self,y):
+##        return -T.mean(T.log(1/float(self.p_y_given_x))[T.arange(y.shape[0]),y])
+
+
+    def errors(self, y):
+        # check if y has same dimension of y_pred 
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred', 
+                ('y', target.type, 'y_pred', self.y_pred.type))
+
+        # check if y is of the correct datatype        
+        if y.dtype.startswith('int'):
+            # the T.neq operator returns a vector of 0s and 1s, where 1
+            # represents a mistake in prediction
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+class SigmoidalLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+        self.params = [self.W, self.b]
+    
+
+class TanhLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = (T.tanh(T.dot(input, self.W) + self.b) + 1.0)/2.0
+        # ( *+ 1) /2  is because tanh goes from -1 to 1 and sigmoid goes from 0 to 1
+        # I want to use tanh, but the image has to stay the same. The correction is necessary.
+        self.params = [self.W, self.b]
+
+
+class dA(object):
+  def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
+               input = None, shared_W = None, shared_b = None):
+    self.n_visible = n_visible
+    self.n_hidden  = n_hidden
+    
+    # create a Theano random generator that gives symbolic random values
+    theano_rng = RandomStreams()
+    
+    if shared_W != None and shared_b != None : 
+        self.W = shared_W
+        self.b = shared_b
+    else:
+        # initial values for weights and biases
+        # note : W' was written as `W_prime` and b' as `b_prime`
+
+        # W is initialized with `initial_W` which is uniformely sampled
+        # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
+        # the output of uniform if converted using asarray to dtype 
+        # theano.config.floatX so that the code is runable on GPU
+        initial_W = numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(n_hidden+n_visible)), \
+              high = numpy.sqrt(6./(n_hidden+n_visible)), \
+              size = (n_visible, n_hidden)), dtype = theano.config.floatX)
+        initial_b       = numpy.zeros(n_hidden, dtype = theano.config.floatX)
+    
+    
+        # theano shared variables for weights and biases
+        self.W       = theano.shared(value = initial_W,       name = "W")
+        self.b       = theano.shared(value = initial_b,       name = "b")
+    
+ 
+    initial_b_prime= numpy.zeros(n_visible)
+    # tied weights, therefore W_prime is W transpose
+    self.W_prime = self.W.T 
+    self.b_prime = theano.shared(value = initial_b_prime, name = "b'")
+
+    # if no input is given, generate a variable representing the input
+    if input == None : 
+        # we use a matrix because we expect a minibatch of several examples,
+        # each example being a row
+        self.x = T.matrix(name = 'input') 
+    else:
+        self.x = input
+    # Equation (1)
+    # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
+    # note : first argument of theano.rng.binomial is the shape(size) of 
+    #        random numbers that it should produce
+    #        second argument is the number of trials 
+    #        third argument is the probability of success of any trial
+    #
+    #        this will produce an array of 0s and 1s where 1 has a 
+    #        probability of 1 - ``corruption_level`` and 0 with
+    #        ``corruption_level``
+    self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level, dtype=theano.config.floatX) * self.x
+    # Equation (2)
+    # note  : y is stored as an attribute of the class so that it can be 
+    #         used later when stacking dAs. 
+    
+##    self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
+##        
+##    # Equation (3)
+##    #self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
+##    # Equation (4)
+##    # note : we sum over the size of a datapoint; if we are using minibatches,
+##    #        L will  be a vector, with one entry per example in minibatch
+##    #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+##    #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+##
+##    # bypassing z to avoid running to log(0)
+##    z_a = T.dot(self.y, self.W_prime) + self.b_prime
+##    log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a))
+##    # log(1-sigmoid(z_a))
+##    log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a))
+##    self.L = -T.sum( self.x * (log_sigmoid) \
+##                    + (1.0-self.x) * (log_1_sigmoid), axis=1 )
+
+    # I added this epsilon to avoid getting log(0) and 1/0 in grad
+    # This means conceptually that there'd be no probability of 0, but that
+    # doesn't seem to me as important (maybe I'm wrong?).
+    #eps = 0.00000001
+    #eps_1 = 1-eps
+    #self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
+    #                + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
+    # note : L is now a vector, where each element is the cross-entropy cost 
+    #        of the reconstruction of the corresponding example of the 
+    #        minibatch. We need to compute the average of all these to get 
+    #        the cost of the minibatch
+    
+    #Or use a Tanh everything is always between 0 and 1, the range is 
+    #changed so it remain the same as when sigmoid is used
+    self.y   = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0
+    
+    self.z =  (T.tanh(T.dot(self.y, self.W_prime) + self.b_prime)+1.0) / 2.0
+    #To ensure to do not have a log(0) operation
+    if self.z <= 0:
+        self.z = 0.000001
+    if self.z >= 1:
+        self.z = 0.999999
+        
+    self.L = - T.sum( self.x*T.log(self.z) + (1.0-self.x)*T.log(1.0-self.z), axis=1 )
+    
+    self.cost = T.mean(self.L)
+
+    self.params = [ self.W, self.b, self.b_prime ]
+
+
+class SdA(object):
+    def __init__(self, batch_size, n_ins, 
+                 hidden_layers_sizes, n_outs, 
+                 corruption_levels, rng, pretrain_lr, finetune_lr):
+        # Just to make sure those are not modified somewhere else afterwards
+        hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes)
+        corruption_levels = copy.deepcopy(corruption_levels)
+
+        update_locals(self, locals())      
+ 
+        self.layers             = []
+        self.pretrain_functions = []
+        self.params             = []
+        # MODIF: added this so we also get the b_primes
+        # (not used for finetuning... still using ".params")
+        self.all_params         = []
+        self.n_layers           = len(hidden_layers_sizes)
+        self.logistic_params    = []
+
+        print "Creating SdA with params:"
+        print "batch_size", batch_size
+        print "hidden_layers_sizes", hidden_layers_sizes
+        print "corruption_levels", corruption_levels
+        print "n_ins", n_ins
+        print "n_outs", n_outs
+        print "pretrain_lr", pretrain_lr
+        print "finetune_lr", finetune_lr
+        print "----"
+
+        if len(hidden_layers_sizes) < 1 :
+            raiseException (' You must have at least one hidden layer ')
+
+
+        # allocate symbolic variables for the data
+        #index   = T.lscalar()    # index to a [mini]batch 
+        self.x  = T.matrix('x')  # the data is presented as rasterized images
+        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
+                                 # [int] labels
+        self.finetune_lr = T.fscalar('finetune_lr') #To get a dynamic finetune learning rate
+        self.pretrain_lr = T.fscalar('pretrain_lr') #To get a dynamic pretrain learning rate
+
+        for i in xrange( self.n_layers ):
+            # construct the sigmoidal layer
+
+            # the size of the input is either the number of hidden units of 
+            # the layer below or the input size if we are on the first layer
+            if i == 0 :
+                input_size = n_ins
+            else:
+                input_size = hidden_layers_sizes[i-1]
+
+            # the input to this layer is either the activation of the hidden
+            # layer below or the input of the SdA if you are on the first
+            # layer
+            if i == 0 : 
+                layer_input = self.x
+            else:
+                layer_input = self.layers[-1].output
+            #We have to choose between sigmoidal layer or tanh layer !
+
+##            layer = SigmoidalLayer(rng, layer_input, input_size, 
+##                                   hidden_layers_sizes[i] )
+                                
+            layer = TanhLayer(rng, layer_input, input_size, 
+                                   hidden_layers_sizes[i] )
+            # add the layer to the 
+            self.layers += [layer]
+            self.params += layer.params
+        
+            # Construct a denoising autoencoder that shared weights with this
+            # layer
+            dA_layer = dA(input_size, hidden_layers_sizes[i], \
+                          corruption_level = corruption_levels[0],\
+                          input = layer_input, \
+                          shared_W = layer.W, shared_b = layer.b)
+
+            self.all_params += dA_layer.params
+        
+            # Construct a function that trains this dA
+            # compute gradients of layer parameters
+            gparams = T.grad(dA_layer.cost, dA_layer.params)
+            # compute the list of updates
+            updates = {}
+            for param, gparam in zip(dA_layer.params, gparams):
+                updates[param] = param - gparam * self.pretrain_lr
+            
+            # create a function that trains the dA
+            update_fn = theano.function([self.x, self.pretrain_lr], dA_layer.cost, \
+                  updates = updates)#,
+            #     givens = { 
+            #         self.x : ensemble})
+            # collect this function into a list
+            #update_fn = theano.function([index], dA_layer.cost, \
+            #      updates = updates,
+            #      givens = { 
+            #         self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider})
+            # collect this function into a list
+            self.pretrain_functions += [update_fn]
+
+        
+        # We now need to add a logistic layer on top of the SDA
+        self.logLayer = LogisticRegression(\
+                         input = self.layers[-1].output,\
+                         n_in = hidden_layers_sizes[-1], n_out = n_outs)
+
+        self.params += self.logLayer.params
+        self.all_params += self.logLayer.params
+        # construct a function that implements one step of finetunining
+
+        # compute the cost, defined as the negative log likelihood 
+        cost = self.logLayer.negative_log_likelihood(self.y)
+        # compute the gradients with respect to the model parameters
+        gparams = T.grad(cost, self.params)
+        # compute list of updates
+        updates = {}
+        for param,gparam in zip(self.params, gparams):
+            updates[param] = param - gparam*self.finetune_lr
+            
+        self.finetune = theano.function([self.x,self.y,self.finetune_lr], cost, 
+                updates = updates)#,
+
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+
+        self.errors = self.logLayer.errors(self.y)
+        
+        
+        #STRUCTURE FOR THE FINETUNING OF THE LOGISTIC REGRESSION ON THE TOP WITH
+        #ALL HIDDEN LAYERS AS INPUT
+        
+        all_h=[]
+        for i in xrange(self.n_layers):
+            all_h.append(self.layers[i].output)
+        self.all_hidden=T.concatenate(all_h,axis=1)
+
+
+        self.logLayer2 = LogisticRegression(\
+                         input = self.all_hidden,\
+                         n_in = sum(hidden_layers_sizes), n_out = n_outs)
+                         #n_in=hidden_layers_sizes[0],n_out=n_outs)
+
+        #self.logistic_params+= self.logLayer2.params
+        # construct a function that implements one step of finetunining
+        
+        self.logistic_params+=self.logLayer2.params
+        # compute the cost, defined as the negative log likelihood 
+        cost2 = self.logLayer2.negative_log_likelihood(self.y)
+        # compute the gradients with respect to the model parameters
+        gparams2 = T.grad(cost2, self.logistic_params)
+
+        # compute list of updates
+        updates2 = {}
+        for param,gparam in zip(self.logistic_params, gparams2):
+            updates2[param] = param - gparam*finetune_lr
+   
+        self.finetune2 = theano.function([self.x,self.y], cost2, 
+                updates = updates2)
+
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+
+        self.errors2 = self.logLayer2.errors(self.y)
+        
+
+if __name__ == '__main__':
+    import sys
+    args = sys.argv[1:]
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_guillaume/train_error.py	Mon May 03 18:38:58 2010 -0400
@@ -0,0 +1,118 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+from copy import copy
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config import *
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+ 
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+     
+    examples_per_epoch = NIST_ALL_TRAIN_SIZE
+
+    PATH = ''
+    maximum_exemples=int(500000) #Maximum number of exemples seen
+
+
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), 
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    max_minibatches=rtt)	
+
+
+    
+    
+
+    if os.path.exists(PATH+'params_finetune_NIST.txt'):
+        print ('\n finetune = NIST ')
+        optimizer.reload_parameters(PATH+'params_finetune_NIST.txt')
+        print "For" + str(maximum_exemples) + "over the NIST training set: "
+        optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples))
+        
+    
+    if os.path.exists(PATH+'params_finetune_P07.txt'):
+        print ('\n finetune = P07 ')
+        optimizer.reload_parameters(PATH+'params_finetune_P07.txt')
+        print "For" + str(maximum_exemples) + "over the P07 training set: "
+        optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples))
+
+    
+    if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'):
+        print ('\n finetune = NIST then P07')
+        optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt')
+        print "For" + str(maximum_exemples) + "over the NIST training set: "
+        optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples))
+        print "For" + str(maximum_exemples) + "over the P07 training set: "
+        optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples))
+    
+    if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'):
+        print ('\n finetune = P07 then NIST')
+        optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt')
+        print "For" + str(maximum_exemples) + "over the P07 training set: "
+        optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples))
+        print "For" + str(maximum_exemples) + "over the NIST training set: "
+        optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples))
+    
+    channel.save()
+
+    return channel.COMPLETE
+
+
+
+if __name__ == '__main__':
+
+
+    chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+    jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_guillaume/utils.py	Mon May 03 18:38:58 2010 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+# coding: utf-8
+
+from __future__ import with_statement
+
+from jobman import DD
+
+# from pylearn codebase
+# useful in __init__(param1, param2, etc.) to save
+# values in self.param1, self.param2... just call
+# update_locals(self, locals())
+def update_locals(obj, dct):
+    if 'self' in dct:
+        del dct['self']
+    obj.__dict__.update(dct)
+
+# from a dictionary of possible values for hyperparameters, e.g.
+# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]}
+# create a list of other dictionaries representing all the possible
+# combinations, thus in this example creating:
+# [{'learning_rate': 0.1, 'num_layers': 1}, ...]
+# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2))
+def produit_cartesien_jobs(val_dict):
+    job_list = [DD()]
+    all_keys = val_dict.keys()
+
+    for key in all_keys:
+        possible_values = val_dict[key]
+        new_job_list = []
+        for val in possible_values:
+            for job in job_list:
+                to_insert = job.copy()
+                to_insert.update({key: val})
+                new_job_list.append(to_insert)
+        job_list = new_job_list
+
+    return job_list
+
+def test_produit_cartesien_jobs():
+    vals = {'a': [1,2], 'b': [3,4,5]}
+    print produit_cartesien_jobs(vals)
+
+
+# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
+"""Simple module for getting amount of memory used by a specified user's
+processes on a UNIX system.
+It uses UNIX ps utility to get the memory usage for a specified username and
+pipe it to awk for summing up per application memory usage and return the total.
+Python's Popen() from subprocess module is used for spawning ps and awk.
+
+"""
+
+import subprocess
+
+class MemoryMonitor(object):
+
+    def __init__(self, username):
+        """Create new MemoryMonitor instance."""
+        self.username = username
+
+    def usage(self):
+        """Return int containing memory used by user's processes."""
+        self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
+                                        shell=True,
+                                        stdout=subprocess.PIPE,
+                                        )
+        self.stdout_list = self.process.communicate()[0].split('\n')
+        return int(self.stdout_list[0])
+
--- a/deep/stacked_dae/v_sylvain/nist_apriori_error.py	Mon May 03 18:38:27 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/nist_apriori_error.py	Mon May 03 18:38:58 2010 -0400
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 __docformat__ = 'restructedtext en'
 
 import pdb
@@ -65,6 +66,8 @@
     
     total_error_count=0
     total_exemple_count=0
+    total_error_count_wap=0
+
     if part == 0:
         iter = dataset.train(1)
     if part == 1:
@@ -111,8 +114,8 @@
 
         #get grouped based error
         #with a priori
-        if(y>9 and y<35):
-            predicted_class=numpy.argmax(out[0,10:35])+10
+        if(y>9 and y<36):
+            predicted_class=numpy.argmax(out[0,10:36])+10
             if(predicted_class!=y):
                 total_error_count+=1
                 
@@ -120,14 +123,20 @@
             predicted_class=numpy.argmax(out[0,0:10])
             if(predicted_class!=y):
                 total_error_count+=1
-        if(y>34):
-            predicted_class=numpy.argmax(out[0,35:])+35
+        if(y>35):
+            predicted_class=numpy.argmax(out[0,36:])+36
             if(predicted_class!=y):
                 total_error_count+=1
-                
+	#without a priori
+	predicted_class=numpy.argmax(out)
+	if(predicted_class!=y):
+	  total_error_count_wap+=1
+
     print '\t total exemples count: '+str(total_exemple_count)
     print '\t total error count: '+str(total_error_count)
     print '\t percentage of error: '+str(total_error_count*100.0/total_exemple_count*1.0)+' %'
+    print '\t total error count without a priori: '+str(total_error_count_wap)
+    print '\t percentage of error without a priori: '+str(total_error_count_wap*100.0/total_exemple_count*1.0)+' %'
     
 
 def sigmoid(value):
--- a/writeup/ml.bib	Mon May 03 18:38:27 2010 -0400
+++ b/writeup/ml.bib	Mon May 03 18:38:58 2010 -0400
@@ -1,6 +1,7 @@
 %%WARNING: READ THE README FILE BEFORE ANY MODIFICATION!!!
 
 
+
 %%submitted papers
 %%%
 
@@ -37,6 +38,13 @@
 %%accepted or published papers
 %%%
 
+@Article{Grother,
+  author = "Grother Patrick J.",
+  title = "NIST special database. Handprinted forms and characters database",
+  publisher = "National institute of standards and technology",
+  year = "1995"
+}
+
 @InCollection{Trentin+al-2002,
   author =       "E. Trentin and F. Brugnara and Y. Bengio and C. Furlanello and R.  De Mori",
   editor =       "R. Daniloff",
@@ -25719,3 +25727,27 @@
 }
 
 
+@inproceedings{SnowEtAl2008,
+    author = {Snow, R. and O'Connor, B. and Jurafsky, D. and Ng, A.},
+    booktitle = {Proc. Empirical Methods in NLP},
+    pages = {254--263},
+    title = {Cheap and Fast -- But is it Good? Evaluating Non-Expert Annotations for Natural Language Tasks},
+    year = {2008}
+}
+
+
+@inproceedings{SorokinAndForsyth2008,
+    author = {Sorokin, A. and Forsyth, D.},
+    booktitle = {CVPR Workshops},
+    pages = {1--8},
+    title = {Utility data annotation with Amazon Mechanical Turk},
+    year = {2008}
+}
+
+@inproceedings{ whitehill09,
+ title = {Whose Vote Should Count More: Optimal Integration of Labels from Labelers of Unknown Expertise},
+ author = {J. Whitehill and P. Ruvolo and T. Wu and J. Bergsma and J. Movellan},
+ booktitle = {NIPS 22},
+ pages = {2035--2043},
+ year = 2009
+}
--- a/writeup/techreport.tex	Mon May 03 18:38:27 2010 -0400
+++ b/writeup/techreport.tex	Mon May 03 18:38:58 2010 -0400
@@ -31,7 +31,10 @@
 We find that the SDA outperforms its
 shallow counterpart, an ordinary Multi-Layer Perceptron,
 and that it is better able to take advantage of the additional
-generated data.
+generated data, as well as better able to take advantage of
+training from more classes than those of interest in the end.
+In fact, we find that the SDA reaches human performance as
+estimated by the Amazon Mechanical Turk on the NIST test characters.
 \end{abstract}
 
 \section{Introduction}
@@ -263,8 +266,25 @@
 
 \begin{itemize}
 \item {\bf NIST}
+The NIST Special Database 19 (NIST19) is a very widely used dataset for training and testing OCR systems. 
+The dataset is composed with over 800 000 digits and characters (upper and lower cases), with hand checked classifications,
+extracted from handwritten sample forms of 3600 writers. The characters are labelled by one of the 62 classes 
+corresponding to "0"-"9","A"-"Z" and "a"-"z". The dataset contains 8 series of different complexity. 
+The fourth series, $hsf_4$, experimentally recognized to be the most difficult one for classification task is recommended 
+by NIST as testing set and is used in our work for that purpose.
+The performances reported by previous work on that dataset mostly use only the digits.
+Here we use the whole classes both in the training and testing phase.   
+
+
 \item {\bf Fonts}
 \item {\bf Captchas}
+The Captcha data source is an adaptation of the \emph{pycaptcha} library (a python based captcha generator library) for 
+generating characters of the same format as the NIST dataset. The core of this data source is composed with a random character
+generator and various kinds of tranformations similar to those described in the previous sections. 
+In order to increase the variability of the data generated, different fonts are used for generating the characters. 
+Transformations (slant, distorsions, rotation, translation) are applied to each randomly generated character with a complexity
+depending on the value of the complexity parameter provided by the user of the data source. Two levels of complexity are 
+allowed and can be controlled via an easy to use facade class.    
 \item {\bf OCR data}
 \end{itemize}
 
@@ -323,21 +343,45 @@
 
 \section{Experimental Results}
 
-\subsection{SDA vs MLP}
+\subsection{SDA vs MLP vs Humans}
 
+We compare here the best MLP (according to validation set error) that we found against
+the best SDA (again according to validation set error), along with a precise estimate
+of human performance obtained via Amazon's Mechanical Turk (AMT)
+service\footnote{http://mturk.com}. AMT users are paid small amounts
+of money to perform tasks for which human intelligence is required.
+Mechanical Turk has been used extensively in natural language
+processing \cite{SnowEtAl2008} and vision
+\cite{SorokinAndForsyth2008,whitehill09}. AMT users where presented
+with 10 character images and asked to type 10 corresponding ascii
+characters. Hence they were forced to make a hard choice among the
+62 character classes. Three users classified each image, allowing
+to estimate inter-human variability (shown as +/- in parenthesis below).
+
+\begin{table}
+\caption{Overall comparison of error rates on 62 character classes (10 digits +
+26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training
+(SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture 
+(MLP=Multi-Layer Perceptron). }
+\label{tab:sda-vs-mlp-vs-humans}
 \begin{center}
-\begin{tabular}{lcc}
-      & train w/   & train w/    \\
-      & NIST       & P07 + NIST  \\ \hline 
-SDA   &            &             \\ \hline 
-MLP   &            &             \\ \hline 
+\begin{tabular}{|l|r|r|r|r|} \hline
+      & NIST test & NISTP test & P07 test  & NIST test digits   \\ \hline
+Humans&            &           &   & \\ \hline 
+SDA   &            &           &  &\\ \hline 
+MLP   &            &           &  & \\ \hline 
 \end{tabular}
 \end{center}
+\end{table}
 
 \subsection{Perturbed Training Data More Helpful for SDAE}
 
 \subsection{Training with More Classes than Necessary}
 
+As previously seen, the SDA is better able to benefit from the transformations applied to the data than the MLP. We are now training SDAs and MLPs on single classes from NIST (respectively digits, lower case characters and upper case characters), to compare the test results with those from models trained on the entire NIST database (per-class test error, with an a priori on the desired class). The goal is to find out if training the model with more classes than necessary reduces the test error on a single class, as opposed to training it only with the desired class. We use a single hidden layer MLP with 1000 hidden units, and a SDA with 3 hidden layers (1000 hidden units per layer), pre-trained and fine-tuned on NIST.
+
+Our results show that the MLP only benefits from a full NIST training on digits, and the test error is only 5\% smaller than a digits-specialized MLP. On the other hand, the SDA always gives better results when it is trained with the entire NIST database, compared to its specialized counterparts (with upper case character, the test errors is 12\% smaller, 27\% smaller on digits, and 15\% smaller on lower case characters).
+
 \section{Conclusions}
 
 \bibliography{strings,ml,aigaion,specials}