# HG changeset patch
# User Arnaud Bergeron <abergeron@gmail.com>
# Date 1272229923 14400
# Node ID 0b7e64e8e93fbd2768c3d76e06a829652c771d3e
# Parent  01445a75c702dd5b56fe4f6e777ce98c53e1d811# Parent  e36ccffb38700c0ba2fcb26255693648083a7f5b
branch merge

diff -r 01445a75c702 -r 0b7e64e8e93f baseline/mlp/mlp_nist.py
--- a/baseline/mlp/mlp_nist.py	Sun Apr 25 17:10:09 2010 -0400
+++ b/baseline/mlp/mlp_nist.py	Sun Apr 25 17:12:03 2010 -0400
@@ -23,6 +23,7 @@
 """
 __docformat__ = 'restructedtext en'
 
+import sys
 import pdb
 import numpy
 import pylab
@@ -163,6 +164,75 @@
         else:
             raise NotImplementedError()
 
+def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz',
+                  data_set=0):
+    
+
+
+    # allocate symbolic variables for the data
+    x = T.fmatrix()  # the data is presented as rasterized images
+    y = T.lvector()  # the labels are presented as 1D vector of 
+                          # [long int] labels
+
+    # load the data set and create an mlp based on the dimensions of the model
+    model=numpy.load(model_name)
+    W1=model['W1']
+    W2=model['W2']
+    b1=model['b1']
+    b2=model['b2']
+    nb_hidden=b1.shape[0]
+    input_dim=W1.shape[0]
+    nb_targets=b2.shape[0]
+    learning_rate=0.1
+
+
+    if data_set==0:
+        dataset=datasets.nist_all()
+    elif data_set==1:
+        dataset=datasets.nist_P07()
+
+
+    classifier = MLP( input=x,\
+                        n_in=input_dim,\
+                        n_hidden=nb_hidden,\
+                        n_out=nb_targets,
+                        learning_rate=learning_rate)
+
+
+    #overwrite weights with weigths from model
+    classifier.W1.value=W1
+    classifier.W2.value=W2
+    classifier.b1.value=b1
+    classifier.b2.value=b2
+
+
+    cost = classifier.negative_log_likelihood(y) \
+         + 0.0 * classifier.L1 \
+         + 0.0 * classifier.L2_sqr 
+
+    # compiling a theano function that computes the mistakes that are made by 
+    # the model on a minibatch
+    test_model = theano.function([x,y], classifier.errors(y))
+
+
+
+    #get the test error
+    #use a batch size of 1 so we can get the sub-class error
+    #without messing with matrices (will be upgraded later)
+    test_score=0
+    temp=0
+    for xt,yt in dataset.test(20):
+        test_score += test_model(xt,yt)
+        temp = temp+1
+    test_score /= temp
+
+
+    return test_score*100
+    
+
+
+
+
 
 def mlp_full_nist(      verbose = 1,\
                         adaptive_lr = 0,\
@@ -174,15 +244,19 @@
                         batch_size=20,\
                         nb_hidden = 30,\
                         nb_targets = 62,
-			tau=1e6,\
-			lr_t2_factor=0.5):
+                        tau=1e6,\
+                        lr_t2_factor=0.5,\
+                        init_model=0,\
+                        channel=0):
    
     
+    if channel!=0:
+        channel.save()
     configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
     
     #save initial learning rate if classical adaptive lr is used
     initial_lr=learning_rate
-    max_div_count=3
+    max_div_count=1000
     
     
     total_validation_error_list = []
@@ -195,6 +269,8 @@
     	dataset=datasets.nist_all()
     elif data_set==1:
         dataset=datasets.nist_P07()
+    elif data_set==2:
+        dataset=datasets.PNIST07()
     
     
     
@@ -215,6 +291,14 @@
                         learning_rate=learning_rate)
                         
                         
+    # check if we want to initialise the weights with a previously calculated model
+    # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets)
+    if init_model!=0:
+        old_model=numpy.load(init_model)
+        classifier.W1.value=old_model['W1']
+        classifier.W2.value=old_model['W2']
+        classifier.b1.value=old_model['b1']
+        classifier.b2.value=old_model['b2']
    
 
     # the cost we minimize during training is the negative log likelihood of 
@@ -289,8 +373,9 @@
     
     
     
-    if verbose == 1:
-        print 'starting training'
+    
+    print 'starting training'
+    sys.stdout.flush()
     while(minibatch_index*batch_size<nb_max_exemples):
         
         for x, y in dataset.train(batch_size):
@@ -303,10 +388,12 @@
             #train model
             cost_ij = train_model(x,y)
     
-            if (minibatch_index+1) % validation_frequency == 0: 
+            if (minibatch_index) % validation_frequency == 0: 
                 #save the current learning rate
                 learning_rate_list.append(classifier.lr.value)
                 divergence_flag_list.append(divergence_flag)
+
+                
                 
                 # compute the validation error
                 this_validation_loss = 0.
@@ -319,10 +406,15 @@
                 this_validation_loss /= temp
                 #save the validation loss
                 total_validation_error_list.append(this_validation_loss)
-                if verbose == 1:
-                    print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % 
-                                (epoch, minibatch_index+1,classifier.lr.value,
-                                this_validation_loss*100.))
+                
+		print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % 
+			(epoch, minibatch_index+1,classifier.lr.value,
+			this_validation_loss*100.))
+		sys.stdout.flush()
+				
+		#save temp results to check during training
+                numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\
+                learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list)
     
                 # if we got the best validation score until now
                 if this_validation_loss < best_validation_loss:
@@ -344,11 +436,12 @@
                         test_score += test_model(xt,yt)
                         temp = temp+1
                     test_score /= temp
-                    if verbose == 1:
-                        print(('epoch %i, minibatch %i, test error of best '
-                            'model %f %%') % 
-                                    (epoch, minibatch_index+1,
-                                    test_score*100.))
+                    
+		    print(('epoch %i, minibatch %i, test error of best '
+			'model %f %%') % 
+				(epoch, minibatch_index+1,
+				test_score*100.))
+                    sys.stdout.flush()
                                     
                 # if the validation error is going up, we are overfitting (or oscillating)
                 # check if we are allowed to continue and if we will adjust the learning rate
@@ -374,12 +467,13 @@
                         test_score += test_model(xt,yt)
                         temp=temp+1
                     test_score /= temp
-                    if verbose == 1:
-                        print ' validation error is going up, possibly stopping soon'
-                        print(('     epoch %i, minibatch %i, test error of best '
-                            'model %f %%') % 
-                                    (epoch, minibatch_index+1,
-                                    test_score*100.))
+                    
+                    print ' validation error is going up, possibly stopping soon'
+                    print(('     epoch %i, minibatch %i, test error of best '
+                        'model %f %%') % 
+                                (epoch, minibatch_index+1,
+                                test_score*100.))
+                    sys.stdout.flush()
                                     
                     
     
@@ -393,6 +487,9 @@
             #force one epoch at least
             if epoch>0 and minibatch_index*batch_size>nb_max_exemples:
                 break
+
+
+                       
     
     
             time_n= time_n + batch_size
@@ -401,12 +498,13 @@
         # we have finished looping through the training set
         epoch = epoch+1
     end_time = time.clock()
-    if verbose == 1:
-        print(('Optimization complete. Best validation score of %f %% '
-            'obtained at iteration %i, with test performance %f %%') %  
-                    (best_validation_loss * 100., best_iter, test_score*100.))
-        print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
-        print minibatch_index
+   
+    print(('Optimization complete. Best validation score of %f %% '
+        'obtained at iteration %i, with test performance %f %%') %  
+                (best_validation_loss * 100., best_iter, test_score*100.))
+    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
+    print minibatch_index
+    sys.stdout.flush()
         
     #save the model and the weights
     numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)
@@ -427,7 +525,8 @@
 										tau=state.tau,\
 										verbose = state.verbose,\
 										lr_t2_factor=state.lr_t2_factor,
-                                                                                data_set=state.data_set)
+                                        data_set=state.data_set,
+                                        channel=channel)
     state.train_error=train_error
     state.validation_error=validation_error
     state.test_error=test_error
diff -r 01445a75c702 -r 0b7e64e8e93f baseline/mlp/ratio_classes/mlp_nist_ratio.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/mlp/ratio_classes/mlp_nist_ratio.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,438 @@
+# -*- coding: utf-8 -*-
+"""
+This tutorial introduces the multilayer perceptron using Theano.  
+
+ A multilayer perceptron is a logistic regressor where
+instead of feeding the input to the logistic regression you insert a
+intermidiate layer, called the hidden layer, that has a nonlinear 
+activation function (usually tanh or sigmoid) . One can use many such 
+hidden layers making the architecture deep. The tutorial will also tackle 
+the problem of MNIST digit classification.
+
+.. math::
+
+    f(x) = G( b^{(2)} + W^{(2)}( s( b^{(1)} + W^{(1)} x))),
+
+References:
+
+    - textbooks: "Pattern Recognition and Machine Learning" - 
+                 Christopher M. Bishop, section 5
+
+TODO: recommended preprocessing, lr ranges, regularization ranges (explain 
+      to do lr first, then add regularization)
+
+"""
+__docformat__ = 'restructedtext en'
+
+import ift6266
+from scripts import setup_batches
+import pdb
+import numpy
+
+import theano
+import theano.tensor as T
+import time 
+import theano.tensor.nnet
+import pylearn
+import theano,pylearn.version
+from pylearn.io import filetensor as ft
+
+data_path = '/data/lisa/data/nist/by_class/'
+
+class MLP(object):
+    """Multi-Layer Perceptron Class
+
+    A multilayer perceptron is a feedforward artificial neural network model 
+    that has one layer or more of hidden units and nonlinear activations. 
+    Intermidiate layers usually have as activation function thanh or the 
+    sigmoid function  while the top layer is a softamx layer. 
+    """
+
+
+
+    def __init__(self, input, n_in, n_hidden, n_out,learning_rate):
+        """Initialize the parameters for the multilayer perceptron
+
+        :param input: symbolic variable that describes the input of the 
+        architecture (one minibatch)
+
+        :param n_in: number of input units, the dimension of the space in 
+        which the datapoints lie
+
+        :param n_hidden: number of hidden units 
+
+        :param n_out: number of output units, the dimension of the space in 
+        which the labels lie
+
+        """
+
+        # initialize the parameters theta = (W1,b1,W2,b2) ; note that this 
+        # example contains only one hidden layer, but one can have as many 
+        # layers as he/she wishes, making the network deeper. The only 
+        # problem making the network deep this way is during learning, 
+        # backpropagation being unable to move the network from the starting
+        # point towards; this is where pre-training helps, giving a good 
+        # starting point for backpropagation, but more about this in the 
+        # other tutorials
+        
+        # `W1` is initialized with `W1_values` which is uniformely sampled
+        # from -6./sqrt(n_in+n_hidden) and 6./sqrt(n_in+n_hidden)
+        # the output of uniform if converted using asarray to dtype 
+        # theano.config.floatX so that the code is runable on GPU
+        W1_values = numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_hidden)), \
+              high = numpy.sqrt(6./(n_in+n_hidden)), \
+              size = (n_in, n_hidden)), dtype = theano.config.floatX)
+        # `W2` is initialized with `W2_values` which is uniformely sampled 
+        # from -6./sqrt(n_hidden+n_out) and 6./sqrt(n_hidden+n_out)
+        # the output of uniform if converted using asarray to dtype 
+        # theano.config.floatX so that the code is runable on GPU
+        W2_values = numpy.asarray( numpy.random.uniform( 
+              low = -numpy.sqrt(6./(n_hidden+n_out)), \
+              high= numpy.sqrt(6./(n_hidden+n_out)),\
+              size= (n_hidden, n_out)), dtype = theano.config.floatX)
+
+        self.W1 = theano.shared( value = W1_values )
+        self.b1 = theano.shared( value = numpy.zeros((n_hidden,), 
+                                                dtype= theano.config.floatX))
+        self.W2 = theano.shared( value = W2_values )
+        self.b2 = theano.shared( value = numpy.zeros((n_out,), 
+                                                dtype= theano.config.floatX))
+
+        #include the learning rate in the classifer so
+        #we can modify it on the fly when we want
+        lr_value=learning_rate
+        self.lr=theano.shared(value=lr_value)
+        # symbolic expression computing the values of the hidden layer
+        self.hidden = T.tanh(T.dot(input, self.W1)+ self.b1)
+        
+        
+
+        # symbolic expression computing the values of the top layer 
+        self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2)
+
+        # compute prediction as class whose probability is maximal in 
+        # symbolic form
+        self.y_pred = T.argmax( self.p_y_given_x, axis =1)
+        self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1)
+        
+        
+        
+        
+        # L1 norm ; one regularization option is to enforce L1 norm to 
+        # be small 
+        self.L1     = abs(self.W1).sum() + abs(self.W2).sum()
+
+        # square of L2 norm ; one regularization option is to enforce 
+        # square of L2 norm to be small
+        self.L2_sqr = (self.W1**2).sum() + (self.W2**2).sum()
+
+
+
+    def negative_log_likelihood(self, y):
+        """Return the mean of the negative log-likelihood of the prediction
+        of this model under a given target distribution.
+
+        .. math::
+
+            \frac{1}{|\mathcal{D}|}\mathcal{L} (\theta=\{W,b\}, \mathcal{D}) = 
+            \frac{1}{|\mathcal{D}|}\sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
+                \ell (\theta=\{W,b\}, \mathcal{D}) 
+
+
+        :param y: corresponds to a vector that gives for each example the
+        :correct label
+        """
+        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+
+
+
+    def errors(self, y):
+        """Return a float representing the number of errors in the minibatch 
+        over the total number of examples of the minibatch 
+        """
+
+        # check if y has same dimension of y_pred 
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred', 
+                ('y', target.type, 'y_pred', self.y_pred.type))
+        # check if y is of the correct datatype        
+        if y.dtype.startswith('int'):
+            # the T.neq operator returns a vector of 0s and 1s, where 1
+            # represents a mistake in prediction
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+def mlp_full_nist(      verbose = False,\
+                        adaptive_lr = 1,\
+                        train_data = 'all/all_train_data.ft',\
+                        train_labels = 'all/all_train_labels.ft',\
+                        test_data = 'all/all_test_data.ft',\
+                        test_labels = 'all/all_test_labels.ft',\
+                        learning_rate=0.5,\
+                        L1_reg = 0.00,\
+                        L2_reg = 0.0001,\
+                        nb_max_exemples=1000000,\
+                        batch_size=20,\
+                        nb_hidden = 500,\
+                        nb_targets = 62,\
+			tau=1e6,\
+			main_class="d",\
+			start_ratio=1,\
+			end_ratio=1):
+   
+    
+    configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
+    
+    #save initial learning rate if classical adaptive lr is used
+    initial_lr=learning_rate
+    
+    total_validation_error_list = []
+    total_train_error_list = []
+    learning_rate_list=[]
+    best_training_error=float('inf');
+
+    # set up batches
+    batches = setup_batches.Batches()
+    batches.set_batches(main_class, start_ratio,end_ratio,batch_size,verbose)
+
+    train_batches = batches.get_train_batches()
+    test_batches = batches.get_test_batches()
+    validation_batches = batches.get_validation_batches()
+
+    ishape     = (32,32) # this is the size of NIST images
+
+    # allocate symbolic variables for the data
+    x = T.fmatrix()  # the data is presented as rasterized images
+    y = T.lvector()  # the labels are presented as 1D vector of 
+                          # [long int] labels
+
+    if verbose==True:
+        print 'finished parsing the data'
+    # construct the logistic regression class
+    classifier = MLP( input=x.reshape((batch_size,32*32)),\
+                        n_in=32*32,\
+                        n_hidden=nb_hidden,\
+                        n_out=nb_targets,
+                        learning_rate=learning_rate)
+                        
+                        
+   
+
+    # the cost we minimize during training is the negative log likelihood of 
+    # the model plus the regularization terms (L1 and L2); cost is expressed
+    # here symbolically
+    cost = classifier.negative_log_likelihood(y) \
+         + L1_reg * classifier.L1 \
+         + L2_reg * classifier.L2_sqr 
+
+    # compiling a theano function that computes the mistakes that are made by 
+    # the model on a minibatch
+    test_model = theano.function([x,y], classifier.errors(y))
+
+    # compute the gradient of cost with respect to theta = (W1, b1, W2, b2) 
+    g_W1 = T.grad(cost, classifier.W1)
+    g_b1 = T.grad(cost, classifier.b1)
+    g_W2 = T.grad(cost, classifier.W2)
+    g_b2 = T.grad(cost, classifier.b2)
+
+    # specify how to update the parameters of the model as a dictionary
+    updates = \
+        { classifier.W1: classifier.W1 - classifier.lr*g_W1 \
+        , classifier.b1: classifier.b1 - classifier.lr*g_b1 \
+        , classifier.W2: classifier.W2 - classifier.lr*g_W2 \
+        , classifier.b2: classifier.b2 - classifier.lr*g_b2 }
+
+    # compiling a theano function `train_model` that returns the cost, but in 
+    # the same time updates the parameter of the model based on the rules 
+    # defined in `updates`
+    train_model = theano.function([x, y], cost, updates = updates )
+    n_minibatches        = len(train_batches)
+
+   
+   
+    
+   
+   
+   #conditions for stopping the adaptation:
+   #1) we have reached  nb_max_exemples (this is rounded up to be a multiple of the train size)
+   #2) validation error is going up twice in a row(probable overfitting)
+   
+   # This means we no longer stop on slow convergence as low learning rates stopped
+   # too fast. 
+   
+   # no longer relevant
+    patience              =nb_max_exemples/batch_size
+    patience_increase     = 2     # wait this much longer when a new best is 
+                                  # found
+    improvement_threshold = 0.995 # a relative improvement of this much is 
+                                  # considered significant
+    validation_frequency = n_minibatches/4
+   
+     
+
+   
+    best_params          = None
+    best_validation_loss = float('inf')
+    best_iter            = 0
+    test_score           = 0.
+    start_time = time.clock()
+    n_iter = nb_max_exemples/batch_size  # nb of max times we are allowed to run through all exemples
+    n_iter = n_iter/n_minibatches + 1 #round up
+    n_iter=max(1,n_iter) # run at least once on short debug call
+    time_n=0 #in unit of exemples
+    
+    
+   
+    if verbose == True:
+        print 'looping at most %d times through the data set' %n_iter
+    for iter in xrange(n_iter* n_minibatches):
+
+        # get epoch and minibatch index
+        epoch           = iter / n_minibatches
+        minibatch_index =  iter % n_minibatches
+        
+	
+	if adaptive_lr==2:
+	    classifier.lr.value = tau*initial_lr/(tau+time_n)
+        
+        # get the minibatches corresponding to `iter` modulo
+        # `len(train_batches)`
+        x,y = train_batches[ minibatch_index ]
+        # convert to float
+        x_float = x/255.0
+        cost_ij = train_model(x_float,y)
+
+        if (iter+1) % validation_frequency == 0: 
+            # compute zero-one loss on validation set 
+            
+            this_validation_loss = 0.
+            for x,y in validation_batches:
+                # sum up the errors for each minibatch
+                x_float = x/255.0
+                this_validation_loss += test_model(x_float,y)
+            # get the average by dividing with the number of minibatches
+            this_validation_loss /= len(validation_batches)
+            #save the validation loss
+            total_validation_error_list.append(this_validation_loss)
+            
+            #get the training error rate
+            this_train_loss=0
+            for x,y in train_batches:
+                # sum up the errors for each minibatch
+                x_float = x/255.0
+                this_train_loss += test_model(x_float,y)
+            # get the average by dividing with the number of minibatches
+            this_train_loss /= len(train_batches)
+            #save the validation loss
+            total_train_error_list.append(this_train_loss)
+            if(this_train_loss<best_training_error):
+                best_training_error=this_train_loss
+                
+            if verbose == True:
+                print('epoch %i, minibatch %i/%i, validation error %f, training error %f %%' % \
+                    (epoch, minibatch_index+1, n_minibatches, \
+                        this_validation_loss*100.,this_train_loss*100))
+		print 'learning rate = %f' %classifier.lr.value
+		print 'time  = %i' %time_n
+                        
+                        
+            #save the learning rate
+            learning_rate_list.append(classifier.lr.value)
+
+
+            # if we got the best validation score until now
+            if this_validation_loss < best_validation_loss:
+                # save best validation score and iteration number
+                best_validation_loss = this_validation_loss
+                best_iter = iter
+                # reset patience if we are going down again
+                # so we continue exploring
+                patience=nb_max_exemples/batch_size
+                # test it on the test set
+                test_score = 0.
+                for x,y in test_batches:
+                    x_float=x/255.0
+                    test_score += test_model(x_float,y)
+                test_score /= len(test_batches)
+                if verbose == True:
+                    print(('     epoch %i, minibatch %i/%i, test error of best '
+                        'model %f %%') % 
+                                (epoch, minibatch_index+1, n_minibatches,
+                                test_score*100.))
+                                
+            # if the validation error is going up, we are overfitting (or oscillating)
+            # stop converging but run at least to next validation
+            # to check overfitting or ocsillation
+            # the saved weights of the model will be a bit off in that case
+            elif this_validation_loss >= best_validation_loss:
+                #calculate the test error at this point and exit
+                # test it on the test set
+                # however, if adaptive_lr is true, try reducing the lr to
+                # get us out of an oscilliation
+                if adaptive_lr==1:
+                    classifier.lr.value=classifier.lr.value/2.0
+
+                test_score = 0.
+                #cap the patience so we are allowed one more validation error
+                #calculation before aborting
+                patience = iter+validation_frequency+1
+                for x,y in test_batches:
+                    x_float=x/255.0
+                    test_score += test_model(x_float,y)
+                test_score /= len(test_batches)
+                if verbose == True:
+                    print ' validation error is going up, possibly stopping soon'
+                    print(('     epoch %i, minibatch %i/%i, test error of best '
+                        'model %f %%') % 
+                                (epoch, minibatch_index+1, n_minibatches,
+                                test_score*100.))
+                                
+                
+
+
+        if iter>patience:
+            print 'we have diverged'
+            break
+
+
+    	time_n= time_n + batch_size
+    end_time = time.clock()
+    if verbose == True:
+        print(('Optimization complete. Best validation score of %f %% '
+            'obtained at iteration %i, with test performance %f %%') %  
+                    (best_validation_loss * 100., best_iter, test_score*100.))
+        print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
+        print iter
+        
+    #save the model and the weights
+    numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)
+    numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\
+    learning_rate_list=learning_rate_list)
+    
+    return (best_training_error*100.0,best_validation_loss * 100.,test_score*100.,best_iter*batch_size,(end_time-start_time)/60)
+
+
+if __name__ == '__main__':
+    mlp_full_nist(True)
+
+def jobman_mlp_full_nist(state,channel):
+    (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\
+                                                                nb_max_exemples=state.nb_max_exemples,\
+                                                                nb_hidden=state.nb_hidden,\
+                                                                adaptive_lr=state.adaptive_lr,\
+								tau=state.tau,\
+								main_class=state.main_class,\
+								start_ratio=state.start_ratio,\
+								end_ratio=state.end_ratio)
+    state.train_error=train_error
+    state.validation_error=validation_error
+    state.test_error=test_error
+    state.nb_exemples=nb_exemples
+    state.time=time
+    return channel.COMPLETE
+                                                                
+                                                                
\ No newline at end of file
diff -r 01445a75c702 -r 0b7e64e8e93f datasets/defs.py
--- a/datasets/defs.py	Sun Apr 25 17:10:09 2010 -0400
+++ b/datasets/defs.py	Sun Apr 25 17:12:03 2010 -0400
@@ -1,5 +1,5 @@
 __all__ = ['nist_digits', 'nist_lower', 'nist_upper', 'nist_all', 'ocr', 
-           'nist_P07', 'mnist']
+           'nist_P07', 'PNIST07', 'mnist']
 
 from ftfile import FTDataSet
 from gzpklfile import GzpklDataSet
@@ -52,6 +52,15 @@
                      valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')],
                      valid_lbl = [os.path.join(DATA_PATH,'data/P07_valid_labels.ft')],
                      indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
+		     
+#Added PNIST07
+PNIST07 = lambda maxsize=None, min_file=0, max_file=100: FTDataSet(train_data = [os.path.join(DATA_PATH,'data/PNIST07_train'+str(i)+'_data.ft') for i in range(min_file, max_file)],
+                     train_lbl = [os.path.join(DATA_PATH,'data/PNIST07_train'+str(i)+'_labels.ft') for i in range(min_file, max_file)],
+                     test_data = [os.path.join(DATA_PATH,'data/PNIST07_test_data.ft')],
+                     test_lbl = [os.path.join(DATA_PATH,'data/PNIST07_test_labels.ft')],
+                     valid_data = [os.path.join(DATA_PATH,'data/PNIST07_valid_data.ft')],
+                     valid_lbl = [os.path.join(DATA_PATH,'data/PNIST07_valid_labels.ft')],
+                     indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
 
 mnist = lambda maxsize=None: GzpklDataSet(os.path.join(DATA_PATH,'mnist.pkl.gz'),
                                           maxsize=maxsize)
diff -r 01445a75c702 -r 0b7e64e8e93f deep/convolutional_dae/salah_exp/config.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/convolutional_dae/salah_exp/config.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,177 @@
+'''
+These are parameters used by nist_sda.py. They'll end up as globals in there.
+
+Rename this file to config.py and configure as needed.
+DON'T add the renamed file to the repository, as others might use it
+without realizing it, with dire consequences.
+'''
+
+# Set this to True when you want to run cluster tests, ie. you want
+# to run on the cluster, many jobs, but want to reduce the training
+# set size and the number of epochs, so you know everything runs
+# fine on the cluster.
+# Set this PRIOR to inserting your test jobs in the DB.
+TEST_CONFIG = False
+
+NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
+NIST_ALL_TRAIN_SIZE = 649081
+# valid et test =82587 82587 
+
+# change "sandbox" when you're ready
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/rifaisal_csda'
+EXPERIMENT_PATH = "ift6266.deep.convolutional_dae.salah_exp.nist_csda.jobman_entrypoint"
+
+##Pour lancer des travaux sur le cluster: (il faut etre ou se trouve les fichiers)
+##python nist_sda.py jobman_insert
+##dbidispatch --condor --repeat_jobs=2 jobman sql 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_finetuningSDA0' .  #C'est le path dans config.py
+
+# reduce training set to that many examples
+REDUCE_TRAIN_TO = None
+# that's a max, it usually doesn't get to that point
+MAX_FINETUNING_EPOCHS = 1000
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 100
+#Set the finetune dataset
+FINETUNE_SET=1
+#Set the pretrain dataset used. 0: NIST, 1:P07
+PRETRAIN_CHOICE=1
+
+
+if TEST_CONFIG:
+    REDUCE_TRAIN_TO = 1000
+    MAX_FINETUNING_EPOCHS = 2
+    REDUCE_EVERY = 10
+
+
+# This is to configure insertion of jobs on the cluster.
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
+
+
+JOB_VALS = {'pretraining_lr': [0.01],#, 0.001],#, 0.0001],
+        'pretraining_epochs_per_layer': [10],
+        'kernels' : [[[52,5,5], [32,3,3]], [[52,7,7], [52,3,3]]],
+        'mlp_size' : [[1000],[500]],
+        'imgshp' : [[32,32]],
+        'max_pool_layers' : [[[2,2],[2,2]]],
+        'corruption_levels': [[0.2,0.1]],
+        'minibatch_size': [100],
+        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
+        'max_finetuning_epochs_P07':[1000],
+        'finetuning_lr':[0.1,0.01], #0.001 was very bad, so we leave it out
+        'num_hidden_layers':[2],
+        'finetune_set':[1],
+        'pretrain_choice':[1]
+        }
+
+DEFAULT_HP_NIST = {'pretraining_lr': 0.01,
+        'pretraining_epochs_per_layer': 1,
+        'kernels' : [[4,5,5], [2,3,3]],
+        'mlp_size' : [10],
+        'imgshp' : [32,32],
+        'max_pool_layers' : [[2,2],[2,2]],
+        'corruption_levels': [0.1,0.2],
+        'minibatch_size': 20,
+        'max_finetuning_epochs':MAX_FINETUNING_EPOCHS,
+        'max_finetuning_epochs_P07':1000,
+        'finetuning_lr':0.1, #0.001 was very bad, so we leave it out
+        'num_hidden_layers':2,
+        'finetune_set':1,
+        'pretrain_choice':1,
+        #'reduce_train_to':1000,
+        }
+
+                    
+                    
+##[pannetis@ceylon test]$ python nist_sda.py test_jobman_entrypoint
+##WARNING: untracked file /u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/TMP_DBI/configobj.py
+##WARNING: untracked file /u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/TMP_DBI/utils.py
+##WARNING: untracked file /u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/config.py
+##WARNING: untracked file /u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/config2.py
+##Creating optimizer with state,  DD{'reduce_train_to': 11000, 'pretraining_epochs_per_layer': 2, 'hidden_layers_sizes': 300, 'num_hidden_layers': 2, 'corruption_levels': 0.20000000000000001, 'finetuning_lr': 0.10000000000000001, 'pretrain_choice': 0, 'max_finetuning_epochs': 2, 'version_pylearn': '08b37147dec1', 'finetune_set': -1, 'pretraining_lr': 0.10000000000000001, 'version_ift6266': 'a6b6b1140de9', 'version_theano': 'fb6c3a06cb65', 'minibatch_size': 20}
+##SdaSgdOptimizer, max_minibatches = 11000
+##C##n_outs 62
+##pretrain_lr 0.1
+##finetune_lr 0.1
+##----
+##
+##pretraining with NIST
+##
+##STARTING PRETRAINING, time =  2010-03-29 15:07:43.945981
+##Pre-training layer 0, epoch 0, cost  113.562562494
+##Pre-training layer 0, epoch 1, cost  113.410032944
+##Pre-training layer 1, epoch 0, cost  98.4539954687
+##Pre-training layer 1, epoch 1, cost  97.8658966686
+##Pretraining took 9.011333 minutes
+##
+##SERIE OF 3 DIFFERENT FINETUNINGS
+##
+##
+##finetune with NIST
+##
+##
+##STARTING FINETUNING, time =  2010-03-29 15:16:46.512235
+##epoch 1, minibatch 4999, validation error on P07 : 29.511250 %
+##     epoch 1, minibatch 4999, test error on dataset NIST  (train data) of best model 40.408509 %
+##     epoch 1, minibatch 4999, test error on dataset P07 of best model 96.700000 %
+##epoch 1, minibatch 9999, validation error on P07 : 25.560000 %
+##     epoch 1, minibatch 9999, test error on dataset NIST  (train data) of best model 34.778969 %
+##     epoch 1, minibatch 9999, test error on dataset P07 of best model 97.037500 %
+##
+##Optimization complete with best validation score of 25.560000 %,with test performance 34.778969 % on dataset NIST 
+##The test score on the P07 dataset is 97.037500
+##The finetuning ran for 3.281833 minutes
+##
+##
+##finetune with P07
+##
+##
+##STARTING FINETUNING, time =  2010-03-29 15:20:06.346009
+##epoch 1, minibatch 4999, validation error on NIST : 65.226250 %
+##     epoch 1, minibatch 4999, test error on dataset P07  (train data) of best model 84.465000 %
+##     epoch 1, minibatch 4999, test error on dataset NIST of best model 65.965237 %
+##epoch 1, minibatch 9999, validation error on NIST : 58.745000 %
+##     epoch 1, minibatch 9999, test error on dataset P07  (train data) of best model 80.405000 %
+##     epoch 1, minibatch 9999, test error on dataset NIST of best model 61.341923 %
+##
+##Optimization complete with best validation score of 58.745000 %,with test performance 80.405000 % on dataset P07 
+##The test score on the NIST dataset is 61.341923
+##The finetuning ran for 3.299500 minutes
+##
+##
+##finetune with NIST (done earlier) followed by P07 (written here)
+##
+##
+##STARTING FINETUNING, time =  2010-03-29 15:23:27.947374
+##epoch 1, minibatch 4999, validation error on NIST : 83.975000 %
+##     epoch 1, minibatch 4999, test error on dataset P07  (train data) of best model 83.872500 %
+##     epoch 1, minibatch 4999, test error on dataset NIST of best model 43.170010 %
+##epoch 1, minibatch 9999, validation error on NIST : 79.775000 %
+##     epoch 1, minibatch 9999, test error on dataset P07  (train data) of best model 80.971250 %
+##     epoch 1, minibatch 9999, test error on dataset NIST of best model 49.017468 %
+##
+##Optimization complete with best validation score of 79.775000 %,with test performance 80.971250 % on dataset P07 
+##The test score on the NIST dataset is 49.017468
+##The finetuning ran for 2.851500 minutes
+##
+##
+##finetune with NIST only on the logistic regression on top.
+##        All hidden units output are input of the logistic regression
+##
+##
+##STARTING FINETUNING, time =  2010-03-29 15:26:21.430557
+##epoch 1, minibatch 4999, validation error on P07 : 95.223750 %
+##     epoch 1, minibatch 4999, test error on dataset NIST  (train data) of best model 93.268765 %
+##     epoch 1, minibatch 4999, test error on dataset P07 of best model 96.535000 %
+##epoch 1, minibatch 9999, validation error on P07 : 95.223750 %
+##
+##Optimization complete with best validation score of 95.223750 %,with test performance 93.268765 % on dataset NIST 
+##The test score on the P07 dataset is 96.535000
+##The finetuning ran for 2.013167 minutes
+##Closing remaining open files: /u/pannetis/IFT6266/test/series.h5... done
+##[pannetis@ceylon test]$ 
+
+
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/convolutional_dae/salah_exp/nist_csda.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/convolutional_dae/salah_exp/nist_csda.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,261 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+from copy import copy
+
+from sgd_optimization_new import CSdASgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config import *
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    #REDUCE_TRAIN_TO = 40000
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+        
+    if state.has_key('decrease_lr'):
+        decrease_lr = state['decrease_lr']
+    else :
+        decrease_lr = 0
+ 
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+     
+    examples_per_epoch = 100000#NIST_ALL_TRAIN_SIZE
+    
+    #To be sure variables will not be only in the if statement
+    PATH = ''
+    nom_reptrain = ''
+    nom_serie = ""
+    if state['pretrain_choice'] == 0:
+        nom_serie="series_NIST.h5"
+    elif state['pretrain_choice'] == 1:
+        nom_serie="series_P07.h5"
+
+    series = create_series(state.num_hidden_layers,nom_serie)
+
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = CSdASgdOptimizer(dataset=datasets.nist_all(), 
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    series=series,
+                                    max_minibatches=rtt)
+
+    parameters=[]
+    #Number of files of P07 used for pretraining
+    nb_file=0
+    if state['pretrain_choice'] == 0:
+        print('\n\tpretraining with NIST\n')
+        optimizer.pretrain(datasets.nist_all()) 
+    elif state['pretrain_choice'] == 1:
+        #To know how many file will be used during pretraining
+        nb_file = int(state['pretraining_epochs_per_layer']) 
+        state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset
+        if nb_file >=100:
+            sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+
+            "You have to correct the code (and be patient, P07 is huge !!)\n"+
+             "or reduce the number of pretraining epoch to run the code (better idea).\n")
+        print('\n\tpretraining with P07')
+        optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) 
+    channel.save()
+    
+    #Set some of the parameters used for the finetuning
+    if state.has_key('finetune_set'):
+        finetune_choice=state['finetune_set']
+    else:
+        finetune_choice=FINETUNE_SET
+    
+    if state.has_key('max_finetuning_epochs'):
+        max_finetune_epoch_NIST=state['max_finetuning_epochs']
+    else:
+        max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
+    
+    if state.has_key('max_finetuning_epochs_P07'):
+        max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
+    else:
+        max_finetune_epoch_P07=max_finetune_epoch_NIST
+    
+    #Decide how the finetune is done
+    
+    if finetune_choice == 0:
+        print('\n\n\tfinetune with NIST\n\n')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 1:
+        print('\n\n\tfinetune with P07\n\n')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 2:
+        print('\n\n\tfinetune with P07 followed by NIST\n\n')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr)
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 3:
+        print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
+        All hidden units output are input of the logistic regression\n\n')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        
+        
+    if finetune_choice==-1:
+        print('\nSERIE OF 4 DIFFERENT FINETUNINGS')
+        print('\n\n\tfinetune with NIST\n\n')
+        sys.stdout.flush()
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_finetune_P07.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
+        All hidden units output are input of the logistic regression\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        channel.save()
+    
+    channel.save()
+
+    return channel.COMPLETE
+
+# These Series objects are used to save various statistics
+# during the training.
+def create_series(num_hidden_layers, nom_serie):
+
+    # Replace series we don't want to save with DummySeries, e.g.
+    # series['training_error'] = DummySeries()
+
+    series = {}
+
+    basedir = os.getcwd()
+
+    h5f = tables.openFile(os.path.join(basedir, nom_serie), "w")
+
+    #REDUCE_EVERY=10
+    # reconstruction
+    reconstruction_base = \
+                ErrorSeries(error_name="reconstruction_error",
+                    table_name="reconstruction_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['reconstruction_error'] = \
+                AccumulatorSeriesWrapper(base_series=reconstruction_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # train
+    training_base = \
+                ErrorSeries(error_name="training_error",
+                    table_name="training_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['training_error'] = \
+                AccumulatorSeriesWrapper(base_series=training_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # valid and test are not accumulated/mean, saved directly
+    series['validation_error'] = \
+                ErrorSeries(error_name="validation_error",
+                    table_name="validation_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    series['test_error'] = \
+                ErrorSeries(error_name="test_error",
+                    table_name="test_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    param_names = []
+    for i in range(num_hidden_layers):
+        param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
+    param_names += ['logreg_layer_W', 'logreg_layer_b']
+
+    # comment out series we don't want to save
+    series['params'] = SharedParamsStatisticsWrapper(
+                        new_group_name="params",
+                        base_group="/",
+                        arrays_names=param_names,
+                        hdf5_file=h5f,
+                        index_names=('epoch',))
+
+    return series
+
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
+def jobman_insert_nist():
+    jobs = produit_cartesien_jobs(JOB_VALS)
+
+    db = jobman.sql.db(JOBDB)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+        jobman.sql.insert_dict(job, db)
+
+    print "inserted"
+
+if __name__ == '__main__':
+
+    args = sys.argv[1:]
+
+    #if len(args) > 0 and args[0] == 'load_nist':
+    #    test_load_nist()
+
+    if len(args) > 0 and args[0] == 'jobman_insert':
+        jobman_insert_nist()
+
+    elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+        jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
+
+    else:
+        print "Bad arguments"
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/convolutional_dae/salah_exp/sgd_optimization_new.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/convolutional_dae/salah_exp/sgd_optimization_new.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,394 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy
+import theano
+import time
+import datetime
+import theano.tensor as T
+import sys
+import pickle
+
+from jobman import DD
+import jobman, jobman.sql
+from copy import copy
+
+from stacked_convolutional_dae_uit import CSdA
+
+from ift6266.utils.seriestables import *
+
+buffersize=1000
+
+default_series = { \
+        'reconstruction_error' : DummySeries(),
+        'training_error' : DummySeries(),
+        'validation_error' : DummySeries(),
+        'test_error' : DummySeries(),
+        'params' : DummySeries()
+        }
+
+def itermax(iter, max):
+    for i,it in enumerate(iter):
+        if i >= max:
+            break
+        yield it
+def get_conv_shape(kernels,imgshp,batch_size,max_pool_layers):
+    # Returns the dimension at the output of the convoluational net
+    # and a list of Image and kernel shape for every
+    # Convolutional layer
+    conv_layers=[]
+    init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\
+                   [ batch_size , 1, imgshp[0], imgshp[1] ],
+                    max_pool_layers[0] ]
+    conv_layers.append(init_layer)
+
+    conv_n_out = int((32-kernels[0][2]+1)/max_pool_layers[0][0])
+
+    for i in range(1,len(kernels)):
+        layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\
+                  [ batch_size, kernels[i-1][0],conv_n_out,conv_n_out ],
+                   max_pool_layers[i] ]
+        conv_layers.append(layer)
+        conv_n_out = int( (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0])
+    conv_n_out=kernels[-1][0]*conv_n_out**2
+    return conv_n_out,conv_layers
+
+
+
+
+
+class CSdASgdOptimizer:
+    def __init__(self, dataset, hyperparameters, n_ins, n_outs,
+                    examples_per_epoch, series=default_series, max_minibatches=None):
+        self.dataset = dataset
+        self.hp = hyperparameters
+        self.n_ins = n_ins
+        self.n_outs = n_outs
+        self.parameters_pre=[]
+
+        self.max_minibatches = max_minibatches
+        print "CSdASgdOptimizer, max_minibatches =", max_minibatches
+
+        self.ex_per_epoch = examples_per_epoch
+        self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
+
+        self.series = series
+
+        self.rng = numpy.random.RandomState(1234)
+        self.init_classifier()
+
+        sys.stdout.flush()
+
+    def init_classifier(self):
+        print "Constructing classifier"
+
+        n_ins,convlayers = get_conv_shape(self.hp.kernels,self.hp.imgshp,self.hp.minibatch_size,self.hp.max_pool_layers)
+
+        self.classifier = CSdA(n_ins_mlp = n_ins,
+                               batch_size = self.hp.minibatch_size,
+                               conv_hidden_layers_sizes = convlayers,
+                               mlp_hidden_layers_sizes = self.hp.mlp_size, 
+                               corruption_levels = self.hp.corruption_levels,
+                               rng = self.rng, 
+                               n_out = self.n_outs,
+                               pretrain_lr = self.hp.pretraining_lr, 
+                               finetune_lr = self.hp.finetuning_lr)
+
+
+
+        #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
+
+        sys.stdout.flush()
+
+    def train(self):
+        self.pretrain(self.dataset)
+        self.finetune(self.dataset)
+
+    def pretrain(self,dataset):
+        print "STARTING PRETRAINING, time = ", datetime.datetime.now()
+        sys.stdout.flush()
+
+        un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 file
+
+        start_time = time.clock()
+        ## Pre-train layer-wise
+        for i in xrange(self.classifier.n_layers):
+            # go through pretraining epochs
+            for epoch in xrange(self.hp.pretraining_epochs_per_layer):
+                # go through the training set
+                batch_index=0
+                count=0
+                num_files=0
+                for x,y in dataset.train(self.hp.minibatch_size):
+                    if x.shape[0] != self.hp.minibatch_size:
+                        continue
+                    c = self.classifier.pretrain_functions[i](x)
+                    count +=1
+
+                    self.series["reconstruction_error"].append((epoch, batch_index), c)
+                    batch_index+=1
+
+                    #if batch_index % 100 == 0:
+                    #    print "100 batches"
+
+                    # useful when doing tests
+                    if self.max_minibatches and batch_index >= self.max_minibatches:
+                        break
+
+                    #When we pass through the data only once (the case with P07)
+                    #There is approximately 800*1024=819200 examples per file (1k per example and files are 800M)
+                    if self.hp.pretraining_epochs_per_layer == 1 and count%un_fichier == 0:
+                        print 'Pre-training layer %i, epoch %d, cost '%(i,num_files),c
+                        num_files+=1
+                        sys.stdout.flush()
+                        self.series['params'].append((num_files,), self.classifier.all_params)
+
+                #When NIST is used
+                if self.hp.pretraining_epochs_per_layer > 1:
+                    print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+                    sys.stdout.flush()
+
+                    self.series['params'].append((epoch,), self.classifier.all_params)
+        end_time = time.clock()
+
+        print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+        self.hp.update({'pretraining_time': end_time-start_time})
+
+        sys.stdout.flush()
+
+        #To be able to load them later for tests on finetune
+        self.parameters_pre=[copy(x.value) for x in self.classifier.params]
+        f = open('params_pretrain.txt', 'w')
+        pickle.dump(self.parameters_pre,f)
+        f.close()
+    def finetune(self,dataset,dataset_test,num_finetune,ind_test,special=0,decrease=0):
+
+        if special != 0 and special != 1:
+            sys.exit('Bad value for variable special. Must be in {0,1}')
+        print "STARTING FINETUNING, time = ", datetime.datetime.now()
+
+        minibatch_size = self.hp.minibatch_size
+        if ind_test == 0 or ind_test == 20:
+            nom_test = "NIST"
+            nom_train="P07"
+        else:
+            nom_test = "P07"
+            nom_train = "NIST"
+
+
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #         givens = {
+        #           self.classifier.x: ensemble_x,
+        #           self.classifier.y: ensemble_y]})
+
+        validate_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #        givens = {
+        #           self.classifier.x: ,
+        #           self.classifier.y: ]})
+        # early-stopping parameters
+        patience              = 10000 # look as this many examples regardless
+        patience_increase     = 2.    # wait this much longer when a new best is
+                                      # found
+        improvement_threshold = 0.995 # a relative improvement of this much is
+                                      # considered significant
+        validation_frequency  = min(self.mb_per_epoch, patience/2)
+                                      # go through this many
+                                      # minibatche before checking the network
+                                      # on the validation set; in this case we
+                                      # check every epoch
+        if self.max_minibatches and validation_frequency > self.max_minibatches:
+            validation_frequency = self.max_minibatches / 2
+        best_params          = None
+        best_validation_loss = float('inf')
+        test_score           = 0.
+        start_time = time.clock()
+
+        done_looping = False
+        epoch = 0
+
+        total_mb_index = 0
+        minibatch_index = 0
+        parameters_finetune=[]
+        learning_rate = self.hp.finetuning_lr
+
+
+        while (epoch < num_finetune) and (not done_looping):
+            epoch = epoch + 1
+
+            for x,y in dataset.train(minibatch_size,bufsize=buffersize):
+
+                minibatch_index += 1
+
+                if x.shape[0] != self.hp.minibatch_size:
+                    print 'bim'
+                    continue
+
+                cost_ij = self.classifier.finetune(x,y)#,learning_rate)
+                total_mb_index += 1
+
+                self.series["training_error"].append((epoch, minibatch_index), cost_ij)
+
+                if (total_mb_index+1) % validation_frequency == 0:
+                    #minibatch_index += 1
+                    #The validation set is always NIST (we want the model to be good on NIST)
+
+                    iter=dataset_test.valid(minibatch_size,bufsize=buffersize)
+ 
+
+                    if self.max_minibatches:
+                        iter = itermax(iter, self.max_minibatches)
+
+                    validation_losses = []
+
+                    for x,y in iter:
+                        if x.shape[0] != self.hp.minibatch_size:
+                            print 'bim'
+                            continue
+                        validation_losses.append(validate_model(x,y))
+
+                    this_validation_loss = numpy.mean(validation_losses)
+
+                    self.series["validation_error"].\
+                        append((epoch, minibatch_index), this_validation_loss*100.)
+
+                    print('epoch %i, minibatch %i, validation error on NIST : %f %%' % \
+                           (epoch, minibatch_index+1, \
+                            this_validation_loss*100.))
+
+
+                    # if we got the best validation score until now
+                    if this_validation_loss < best_validation_loss:
+
+                        #improve patience if loss improvement is good enough
+                        if this_validation_loss < best_validation_loss *  \
+                               improvement_threshold :
+                            patience = max(patience, total_mb_index * patience_increase)
+
+                        # save best validation score, iteration number and parameters
+                        best_validation_loss = this_validation_loss
+                        best_iter = total_mb_index
+                        parameters_finetune=[copy(x.value) for x in self.classifier.params]
+
+                        # test it on the test set
+                        iter = dataset.test(minibatch_size,bufsize=buffersize)
+                        if self.max_minibatches:
+                            iter = itermax(iter, self.max_minibatches)
+                        test_losses = []
+                        test_losses2 = []
+                        for x,y in iter:
+                            if x.shape[0] != self.hp.minibatch_size:
+                                print 'bim'
+                                continue
+                            test_losses.append(test_model(x,y))
+
+                        test_score = numpy.mean(test_losses)
+
+                        #test it on the second test set
+                        iter2 = dataset_test.test(minibatch_size,bufsize=buffersize)
+                        if self.max_minibatches:
+                            iter2 = itermax(iter2, self.max_minibatches)
+                        for x,y in iter2:
+                            if x.shape[0] != self.hp.minibatch_size:
+                                continue
+                            test_losses2.append(test_model(x,y))
+
+                        test_score2 = numpy.mean(test_losses2)
+
+                        self.series["test_error"].\
+                            append((epoch, minibatch_index), test_score*100.)
+
+                        print(('     epoch %i, minibatch %i, test error on dataset %s  (train data) of best '
+                              'model %f %%') %
+                                     (epoch, minibatch_index+1,nom_train,
+                                      test_score*100.))
+
+                        print(('     epoch %i, minibatch %i, test error on dataset %s of best '
+                              'model %f %%') %
+                                     (epoch, minibatch_index+1,nom_test,
+                                      test_score2*100.))
+
+                    if patience <= total_mb_index:
+                        done_looping = True
+                        break   #to exit the FOR loop
+
+                    sys.stdout.flush()
+
+                # useful when doing tests
+                if self.max_minibatches and minibatch_index >= self.max_minibatches:
+                    break
+
+            if decrease == 1:
+                learning_rate /= 2 #divide the learning rate by 2 for each new epoch
+
+            self.series['params'].append((epoch,), self.classifier.all_params)
+
+            if done_looping == True:    #To exit completly the fine-tuning
+                break   #to exit the WHILE loop
+
+        end_time = time.clock()
+        self.hp.update({'finetuning_time':end_time-start_time,\
+                    'best_validation_error':best_validation_loss,\
+                    'test_score':test_score,
+                    'num_finetuning_epochs':epoch})
+
+        print(('\nOptimization complete with best validation score of %f %%,'
+               'with test performance %f %% on dataset %s ') %
+                     (best_validation_loss * 100., test_score*100.,nom_train))
+        print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.))
+
+        print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+
+        sys.stdout.flush()
+
+        #Save a copy of the parameters in a file to be able to get them in the future
+
+        if special == 1:    #To keep a track of the value of the parameters
+            f = open('params_finetune_stanford.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+
+        elif ind_test == 0 | ind_test == 20:    #To keep a track of the value of the parameters
+            f = open('params_finetune_P07.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+
+
+        elif ind_test== 1:    #For the run with 2 finetunes. It will be faster.
+            f = open('params_finetune_NIST.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+
+        elif ind_test== 21:    #To keep a track of the value of the parameters
+            f = open('params_finetune_P07_then_NIST.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+    #Set parameters like they where right after pre-train or finetune
+    def reload_parameters(self,which):
+
+        #self.parameters_pre=pickle.load('params_pretrain.txt')
+        f = open(which)
+        self.parameters_pre=pickle.load(f)
+        f.close()
+        for idx,x in enumerate(self.parameters_pre):
+            if x.dtype=='float64':
+                self.classifier.params[idx].value=theano._asarray(copy(x),dtype=theano.config.floatX)
+            else:
+                self.classifier.params[idx].value=copy(x)
+
+    def training_error(self,dataset):
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+
+        iter2 = dataset.train(self.hp.minibatch_size,bufsize=buffersize)
+        train_losses2 = [test_model(x,y) for x,y in iter2]
+        train_score2 = numpy.mean(train_losses2)
+        print "Training error is: " + str(train_score2)
diff -r 01445a75c702 -r 0b7e64e8e93f deep/convolutional_dae/salah_exp/stacked_convolutional_dae_uit.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/convolutional_dae/salah_exp/stacked_convolutional_dae_uit.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,243 @@
+import numpy
+import theano
+import time
+import sys
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import theano.sandbox.softsign
+import copy
+from theano.tensor.signal import downsample
+from theano.tensor.nnet import conv 
+
+sys.path.append('../../')
+#import ift6266.datasets
+import ift6266.datasets
+from ift6266.baseline.log_reg.log_reg import LogisticRegression
+
+from theano.tensor.xlogx import xlogx, xlogy0
+# it's target*log(output)
+def binary_cross_entropy(target, output, sum_axis=1):
+    XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
+    return -T.sum(XE, axis=sum_axis)
+
+
+
+class SigmoidalLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+ 
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+ 
+        self.output = T.tanh(T.dot(input, self.W) + self.b)
+        self.params = [self.W, self.b]
+ 
+class dA_conv(object):
+ 
+  def __init__(self, input, filter_shape, corruption_level = 0.1, 
+               shared_W = None, shared_b = None, image_shape = None, num = 0,batch_size=20):
+
+    theano_rng = RandomStreams()
+    
+    fan_in = numpy.prod(filter_shape[1:])
+    fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
+
+    center = theano.shared(value = 1, name="center")
+    scale = theano.shared(value = 2, name="scale")
+
+    if shared_W != None and shared_b != None :
+        self.W = shared_W
+        self.b = shared_b
+    else:
+        initial_W = numpy.asarray( numpy.random.uniform(
+              low = -numpy.sqrt(6./(fan_in+fan_out)),
+              high = numpy.sqrt(6./(fan_in+fan_out)),
+              size = filter_shape), dtype = theano.config.floatX)
+        initial_b = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
+        self.W = theano.shared(value = initial_W, name = "W")
+        self.b = theano.shared(value = initial_b, name = "b")
+    
+ 
+    initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX)
+
+    self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime")
+ 
+    self.x = input
+
+    self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x
+
+    conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
+                            image_shape=image_shape,
+                            unroll_kern=4,unroll_batch=4, 
+                            border_mode='valid')
+
+    
+    self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+
+    
+    da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
+                       filter_shape[3] ]
+    da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, 
+                       image_shape[3]-filter_shape[3]+1 ]
+    #import pdb; pdb.set_trace()
+    initial_W_prime =  numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(fan_in+fan_out)), \
+              high = numpy.sqrt(6./(fan_in+fan_out)), \
+              size = da_filter_shape), dtype = theano.config.floatX)
+    self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
+
+    conv2_out = conv.conv2d(self.y, self.W_prime,
+                            filter_shape = da_filter_shape,\
+                            image_shape = da_image_shape, \
+                            unroll_kern=4,unroll_batch=4, \
+                            border_mode='full')
+
+    self.z =  (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
+    
+    if num != 0 :
+        scaled_x = (self.x + center) / scale
+    else: 
+        scaled_x = self.x
+    self.L = - T.sum( scaled_x*T.log(self.z) + (1-scaled_x)*T.log(1-self.z), axis=1 )
+
+    self.cost = T.mean(self.L)
+
+    self.params = [ self.W, self.b, self.b_prime ] 
+
+class LeNetConvPoolLayer(object):
+
+    def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)):
+        self.input = input
+  
+        W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
+        self.W = theano.shared(value=W_values)
+ 
+        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
+        self.b = theano.shared(value=b_values)
+ 
+        conv_out = conv.conv2d(input, self.W,
+                filter_shape=filter_shape, image_shape=image_shape,
+                               unroll_kern=4,unroll_batch=4)
+ 
+
+        fan_in = numpy.prod(filter_shape[1:])
+        fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
+
+        W_bound = numpy.sqrt(6./(fan_in + fan_out))
+        self.W.value = numpy.asarray(
+                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
+                dtype = theano.config.floatX)
+  
+
+        pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True)
+ 
+        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+        self.params = [self.W, self.b]
+ 
+
+class CSdA():
+    def __init__(self, n_ins_mlp,batch_size, conv_hidden_layers_sizes,
+                 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, 
+                 pretrain_lr, finetune_lr):
+
+        # Just to make sure those are not modified somewhere else afterwards
+        hidden_layers_sizes = copy.deepcopy(mlp_hidden_layers_sizes)
+        corruption_levels = copy.deepcopy(corruption_levels)
+
+        #update_locals(self, locals())
+
+
+        
+        self.layers = []
+        self.pretrain_functions = []
+        self.params = []
+        self.n_layers = len(conv_hidden_layers_sizes)
+        self.mlp_n_layers = len(mlp_hidden_layers_sizes)
+        
+        self.x = T.matrix('x') # the data is presented as rasterized images
+        self.y = T.ivector('y') # the labels are presented as 1D vector of
+        
+        for i in xrange( self.n_layers ):
+            filter_shape=conv_hidden_layers_sizes[i][0]
+            image_shape=conv_hidden_layers_sizes[i][1]
+            max_poolsize=conv_hidden_layers_sizes[i][2]
+                
+            if i == 0 :
+                layer_input=self.x.reshape((batch_size, 1, 32, 32))
+            else:
+                layer_input=self.layers[-1].output
+            
+            layer = LeNetConvPoolLayer(rng, input=layer_input,
+                                       image_shape=image_shape,
+                                       filter_shape=filter_shape,
+                                       poolsize=max_poolsize)
+            print 'Convolutional layer', str(i+1), 'created'
+            
+            self.layers += [layer]
+            self.params += layer.params
+
+            da_layer = dA_conv(corruption_level = corruption_levels[0],
+                               input = layer_input,
+                               shared_W = layer.W, shared_b = layer.b,
+                               filter_shape=filter_shape,
+                               image_shape = image_shape, num=i , batch_size=batch_size)
+            
+            gparams = T.grad(da_layer.cost, da_layer.params)
+            
+            updates = {}
+            for param, gparam in zip(da_layer.params, gparams):
+                updates[param] = param - gparam * pretrain_lr
+            
+            update_fn = theano.function([self.x], da_layer.cost, updates = updates)
+            
+            self.pretrain_functions += [update_fn]
+            
+        for i in xrange( self.mlp_n_layers ): 
+            if i == 0 :
+                input_size = n_ins_mlp
+            else:
+                input_size = mlp_hidden_layers_sizes[i-1]
+            
+            if i == 0 :
+                if len( self.layers ) == 0 :
+                    layer_input=self.x
+                else :
+                    layer_input = self.layers[-1].output.flatten(2)
+            else:
+                layer_input = self.layers[-1].output
+            
+            layer = SigmoidalLayer(rng, layer_input, input_size,
+                                        mlp_hidden_layers_sizes[i] )
+            
+            self.layers += [layer]
+            self.params += layer.params
+            
+            print 'MLP layer', str(i+1), 'created'
+            
+        self.logLayer = LogisticRegression(input=self.layers[-1].output, \
+                                                     n_in=mlp_hidden_layers_sizes[-1], n_out=n_out)
+
+
+        self.params += self.logLayer.params
+        self.all_params = self.params
+        cost = self.logLayer.negative_log_likelihood(self.y)
+        
+        gparams = T.grad(cost, self.params)
+
+        updates = {}
+        for param,gparam in zip(self.params, gparams):
+            updates[param] = param - gparam*finetune_lr
+        
+        self.finetune = theano.function([self.x, self.y], cost, updates = updates)
+        
+        self.errors = self.logLayer.errors(self.y)
+
+
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/convolutional_dae/salah_exp/utils.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/convolutional_dae/salah_exp/utils.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+# coding: utf-8
+
+from __future__ import with_statement
+
+from jobman import DD
+
+# from pylearn codebase
+# useful in __init__(param1, param2, etc.) to save
+# values in self.param1, self.param2... just call
+# update_locals(self, locals())
+def update_locals(obj, dct):
+    if 'self' in dct:
+        del dct['self']
+    obj.__dict__.update(dct)
+
+# from a dictionary of possible values for hyperparameters, e.g.
+# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]}
+# create a list of other dictionaries representing all the possible
+# combinations, thus in this example creating:
+# [{'learning_rate': 0.1, 'num_layers': 1}, ...]
+# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2))
+def produit_cartesien_jobs(val_dict):
+    job_list = [DD()]
+    all_keys = val_dict.keys()
+
+    for key in all_keys:
+        possible_values = val_dict[key]
+        new_job_list = []
+        for val in possible_values:
+            for job in job_list:
+                to_insert = job.copy()
+                to_insert.update({key: val})
+                new_job_list.append(to_insert)
+        job_list = new_job_list
+
+    return job_list
+
+def test_produit_cartesien_jobs():
+    vals = {'a': [1,2], 'b': [3,4,5]}
+    print produit_cartesien_jobs(vals)
+
+
+# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
+"""Simple module for getting amount of memory used by a specified user's
+processes on a UNIX system.
+It uses UNIX ps utility to get the memory usage for a specified username and
+pipe it to awk for summing up per application memory usage and return the total.
+Python's Popen() from subprocess module is used for spawning ps and awk.
+
+"""
+
+import subprocess
+
+class MemoryMonitor(object):
+
+    def __init__(self, username):
+        """Create new MemoryMonitor instance."""
+        self.username = username
+
+    def usage(self):
+        """Return int containing memory used by user's processes."""
+        self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
+                                        shell=True,
+                                        stdout=subprocess.PIPE,
+                                        )
+        self.stdout_list = self.process.communicate()[0].split('\n')
+        return int(self.stdout_list[0])
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/crbm/__init__.py
diff -r 01445a75c702 -r 0b7e64e8e93f deep/crbm/crbm.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/crbm/crbm.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,374 @@
+import sys
+import os, os.path
+
+import numpy
+
+import theano
+
+USING_GPU = "gpu" in theano.config.device
+
+import theano.tensor as T
+from theano.tensor.nnet import conv, sigmoid
+
+if not USING_GPU:
+    from theano.tensor.shared_randomstreams import RandomStreams
+else:
+    from theano.sandbox.rng_mrg import MRG_RandomStreams
+
+_PRINT_GRAPHS = True
+
+def _init_conv_biases(num_filters, varname, rng=numpy.random):
+    b_shp = (num_filters,)
+    b = theano.shared( numpy.asarray(
+                rng.uniform(low=-.5, high=.5, size=b_shp),
+                dtype=theano.config.floatX), name=varname)
+    return b
+
+def _init_conv_weights(conv_params, varname, rng=numpy.random):
+    cp = conv_params
+
+    # initialize shared variable for weights.
+    w_shp = conv_params.as_conv2d_shape_tuple()
+    w_bound =  numpy.sqrt(cp.num_input_planes * \
+                    cp.height_filters * cp.width_filters)
+    W = theano.shared( numpy.asarray(
+                rng.uniform(
+                    low=-1.0 / w_bound,
+                    high=1.0 / w_bound,
+                    size=w_shp),
+                dtype=theano.config.floatX), name=varname)
+
+    return W
+
+# Shape of W for conv2d
+class ConvolutionParams:
+    def __init__(self, num_filters, num_input_planes, height_filters, width_filters):
+        self.num_filters = num_filters
+        self.num_input_planes = num_input_planes
+        self.height_filters = height_filters
+        self.width_filters = width_filters
+
+    def as_conv2d_shape_tuple(self):
+        cp = self
+        return (cp.num_filters, cp.num_input_planes,
+                    cp.height_filters, cp.width_filters)
+
+class CRBM:
+    def __init__(self, minibatch_size, image_size, conv_params,
+                 learning_rate, sparsity_lambda, sparsity_p):
+        '''
+        Parameters
+        ----------
+        image_size
+            height, width
+        '''
+        self.minibatch_size = minibatch_size
+        self.image_size = image_size
+        self.conv_params = conv_params
+
+        '''
+        Dimensions:
+        0- minibatch
+        1- plane/color
+        2- y (rows)
+        3- x (cols)
+        '''
+        self.x = T.tensor4('x')
+        self.h = T.tensor4('h')
+
+        self.lr = theano.shared(numpy.asarray(learning_rate,
+                                    dtype=theano.config.floatX))
+        self.sparsity_lambda = \
+                theano.shared( \
+                    numpy.asarray( \
+                        sparsity_lambda, 
+                        dtype=theano.config.floatX))
+        self.sparsity_p = \
+                theano.shared( \
+                    numpy.asarray(sparsity_p, \
+                            dtype=theano.config.floatX))
+
+        self.numpy_rng = numpy.random.RandomState(1234)
+
+        if not USING_GPU:
+            self.theano_rng = RandomStreams(self.numpy_rng.randint(2**30))
+        else:
+            self.theano_rng = MRG_RandomStreams(234, use_cuda=True)
+
+        self._init_params()
+        self._init_functions()
+
+    def _get_visibles_shape(self):
+        imsz = self.image_size
+        return (self.minibatch_size,
+                    self.conv_params.num_input_planes,
+                    imsz[0], imsz[1])
+
+    def _get_hiddens_shape(self):
+        cp = self.conv_params
+        imsz = self.image_size
+        wf, hf = cp.height_filters, cp.width_filters
+        return (self.minibatch_size, cp.num_filters, 
+                    imsz[0] - hf + 1, imsz[1] - wf + 1)
+
+    def _init_params(self):
+        cp = self.conv_params
+
+        self.W = _init_conv_weights(cp, 'W')
+        self.b_h = _init_conv_biases(cp.num_filters, 'b_h')
+        '''
+        Lee09 mentions "all visible units share a single bias c"
+        but for upper layers it's pretty clear we need one
+        per plane, by symmetry
+        '''
+        self.b_x = _init_conv_biases(cp.num_input_planes, 'b_x')
+
+        self.params = [self.W, self.b_h, self.b_x]
+
+        # flip filters horizontally and vertically
+        W_flipped = self.W[:, :, ::-1, ::-1]
+        # also have to invert the filters/num_planes
+        self.W_tilde = W_flipped.dimshuffle(1,0,2,3)
+
+    '''
+    I_up and I_down come from the symbol used in the 
+    Lee 2009 CRBM paper
+    '''
+    def _I_up(self, visibles_mb):
+        '''
+        output of conv is features maps of size
+                image_size - filter_size + 1
+        The dimshuffle serves to broadcast b_h so that it 
+        corresponds to output planes
+        '''
+        fshp = self.conv_params.as_conv2d_shape_tuple()
+        return conv.conv2d(visibles_mb, self.W,
+                    filter_shape=fshp) + \
+                    self.b_h.dimshuffle('x',0,'x','x')
+
+    def _I_down(self, hiddens_mb):
+        '''
+        notice border_mode='full'... we want to get
+        back the original size
+        so we get feature_map_size + filter_size - 1
+        The dimshuffle serves to broadcast b_x so that
+        it corresponds to output planes
+        '''
+        fshp = list(self.conv_params.as_conv2d_shape_tuple())
+        # num_filters and num_planes swapped
+        fshp[0], fshp[1] = fshp[1], fshp[0]
+        return conv.conv2d(hiddens_mb, self.W_tilde, 
+                    border_mode='full',filter_shape=tuple(fshp)) + \
+                self.b_x.dimshuffle('x',0,'x','x')
+
+    def _mean_free_energy(self, visibles_mb):
+        '''
+        visibles_mb is mb_size x num_planes x h x w
+
+        we want to match the summed input planes
+            (second dimension, first is mb index)
+        to respective bias terms for the visibles
+        The dimshuffle isn't really necessary, 
+            but I put it there for clarity.
+        '''
+        vbias_term = \
+            self.b_x.dimshuffle('x',0) * \
+            T.sum(visibles_mb,axis=[2,3])
+        # now sum over term per planes, get one free energy 
+        # contribution per element of minibatch
+        vbias_term = - T.sum(vbias_term, axis=1)
+
+        '''
+        Here it's a bit more complex, a few points:
+        - The usual free energy, in the fully connected case,
+            is a sum over all hiddens.
+          We do the same thing here, but each unit has limited
+            connectivity and there's weight reuse.
+          Therefore we only need to first do the convolutions
+            (with I_up) which gives us
+          what would normally be the Wx+b_h for each hidden.
+            Once we have this,
+          we take the log(1+exp(sum for this hidden)) elemwise 
+            for each hidden,
+          then we sum for all hiddens in one example of the minibatch.
+          
+        - Notice that we reuse the same b_h everywhere instead of 
+            using one b per hidden,
+          so the broadcasting for b_h done in I_up is all right.
+        
+        That sum is over all hiddens, so all filters
+             (planes of hiddens), x, and y.
+        In the end we get one free energy contribution per
+            example of the minibatch.
+        '''
+        softplused = T.log(1.0+T.exp(self._I_up(visibles_mb)))
+        # h_sz = self._get_hiddens_shape()
+        # this simplifies the sum
+        # num_hiddens = h_sz[1] * h_sz[2] * h_sz[3]
+        # reshaped = T.reshape(softplused, 
+        #       (self.minibatch_size, num_hiddens))
+
+        # this is because the 0,1,1,1 sum pattern is not 
+        # implemented on gpu, but the 1,0,1,1 pattern is
+        dimshuffled = softplused.dimshuffle(1,0,2,3)
+        xh_and_hbias_term = - T.sum(dimshuffled, axis=[0,2,3])
+
+        '''
+        both bias_term and vbias_term end up with one
+        contributor to free energy per minibatch
+        so we mean over minibatches
+        '''
+        return T.mean(vbias_term + xh_and_hbias_term)
+
+    def _init_functions(self):
+        # propup
+        # b_h is broadcasted keeping in mind we want it to
+        # correspond to each new plane (corresponding to filters)
+        I_up = self._I_up(self.x)
+        # expected values for the distributions for each hidden
+        E_h_given_x = sigmoid(I_up) 
+        # might be needed if we ever want a version where we
+        # take expectations instead of samples for CD learning
+        self.E_h_given_x_func = theano.function([self.x], E_h_given_x)
+
+        if _PRINT_GRAPHS:
+            print "----------------------\nE_h_given_x_func"
+            theano.printing.debugprint(self.E_h_given_x_func)
+
+        h_sample_given_x = \
+            self.theano_rng.binomial( \
+                            size = self._get_hiddens_shape(),
+                            n = 1, 
+                            p = E_h_given_x, 
+                            dtype = theano.config.floatX)
+
+        self.h_sample_given_x_func = \
+            theano.function([self.x],
+                    h_sample_given_x)
+
+        if _PRINT_GRAPHS:
+            print "----------------------\nh_sample_given_x_func"
+            theano.printing.debugprint(self.h_sample_given_x_func)
+
+        # propdown
+        I_down = self._I_down(self.h)
+        E_x_given_h = sigmoid(I_down)
+        self.E_x_given_h_func = theano.function([self.h], E_x_given_h)
+
+        if _PRINT_GRAPHS:
+            print "----------------------\nE_x_given_h_func"
+            theano.printing.debugprint(self.E_x_given_h_func)
+
+        x_sample_given_h = \
+            self.theano_rng.binomial( \
+                            size = self._get_visibles_shape(),
+                            n = 1, 
+                            p = E_x_given_h, 
+                            dtype = theano.config.floatX)
+
+        self.x_sample_given_h_func = \
+            theano.function([self.h], 
+                    x_sample_given_h)
+
+        if _PRINT_GRAPHS:
+            print "----------------------\nx_sample_given_h_func"
+            theano.printing.debugprint(self.x_sample_given_h_func)
+
+        ##############################################
+        # cd update done by grad of free energy
+        
+        x_tilde = T.tensor4('x_tilde') 
+        cd_update_cost = self._mean_free_energy(self.x) - \
+                            self._mean_free_energy(x_tilde)
+
+        cd_grad = T.grad(cd_update_cost, self.params)
+        # This is NLL minimization so we use a -
+        cd_updates = {self.W: self.W - self.lr * cd_grad[0],
+                    self.b_h: self.b_h - self.lr * cd_grad[1],
+                    self.b_x: self.b_x - self.lr * cd_grad[2]}
+
+        cd_returned = [cd_update_cost,
+                        cd_grad[0], cd_grad[1], cd_grad[2],
+                        self.lr * cd_grad[0],
+                        self.lr * cd_grad[1],
+                        self.lr * cd_grad[2]]
+        self.cd_return_desc = \
+            ['cd_update_cost',
+                'cd_grad_W', 'cd_grad_b_h', 'cd_grad_b_x',
+                'lr_times_cd_grad_W',
+                'lr_times_cd_grad_b_h',
+                'lr_times_cd_grad_b_x']
+    
+        self.cd_update_function = \
+                theano.function([self.x, x_tilde], 
+                    cd_returned, updates=cd_updates)
+
+        if _PRINT_GRAPHS:
+            print "----------------------\ncd_update_function"
+            theano.printing.debugprint(self.cd_update_function)
+
+        ##############
+        # sparsity update, based on grad for b_h only
+
+        '''
+        This mean returns an array of shape 
+            (num_hiddens_planes, feature_map_height, feature_map_width)
+        (so it's a mean over each unit's activation)
+        '''
+        mean_expected_activation = T.mean(E_h_given_x, axis=0)
+        # sparsity_p is broadcasted everywhere
+        sparsity_update_cost = \
+                T.sqr(self.sparsity_p - mean_expected_activation)
+        sparsity_update_cost = \
+            T.sum(T.sum(T.sum( \
+                sparsity_update_cost, axis=2), axis=1), axis=0)
+        sparsity_grad = T.grad(sparsity_update_cost, [self.W, self.b_h])
+
+        sparsity_returned = \
+            [sparsity_update_cost,
+             sparsity_grad[0], sparsity_grad[1],
+             self.sparsity_lambda * self.lr * sparsity_grad[0],
+             self.sparsity_lambda * self.lr * sparsity_grad[1]]
+        self.sparsity_return_desc = \
+            ['sparsity_update_cost',
+                'sparsity_grad_W',
+                'sparsity_grad_b_h',
+                'lambda_lr_times_sparsity_grad_W',
+                'lambda_lr_times_sparsity_grad_b_h']
+
+        # gradient _descent_ so we use a -
+        sparsity_update = \
+            {self.b_h: self.b_h - \
+                self.sparsity_lambda * self.lr * sparsity_grad[1],
+            self.W: self.W - \
+                self.sparsity_lambda * self.lr * sparsity_grad[0]}
+        self.sparsity_update_function = \
+            theano.function([self.x], 
+                sparsity_returned, updates=sparsity_update)
+
+        if _PRINT_GRAPHS:
+            print "----------------------\nsparsity_update_function"
+            theano.printing.debugprint(self.sparsity_update_function)
+
+    def CD_step(self, x):
+        h1 = self.h_sample_given_x_func(x)
+        x2 = self.x_sample_given_h_func(h1)
+        return self.cd_update_function(x, x2)
+
+    def sparsity_step(self, x):
+        return self.sparsity_update_function(x)
+
+    # these two also operate on minibatches
+
+    def random_gibbs_samples(self, num_updown_steps):
+        start_x = self.numpy_rng.rand(*self._get_visibles_shape())
+        return self.gibbs_samples_from(start_x, num_updown_steps)
+
+    def gibbs_samples_from(self, start_x, num_updown_steps):
+        x_sample = start_x
+        for i in xrange(num_updown_steps):
+            h_sample = self.h_sample_given_x_func(x_sample)
+            x_sample = self.x_sample_given_h_func(h_sample)
+        return x_sample
+
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/crbm/mnist_config.py.example
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/crbm/mnist_config.py.example	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,111 @@
+# ----------------------------------------------------------------------------
+# BEGIN EXPERIMENT ISOLATION CODE
+
+# Path to pass to jobman sqlschedule. IMPORTANT TO CHANGE TO REFLECT YOUR CLONE.
+# Make sure this is accessible from the default $PYTHONPATH (in your .bashrc)
+# (and make sure every subdirectory has its __init__.py file)
+EXPERIMENT_PATH = "ift6266_mnistcrbm_exp1.ift6266.deep.crbm.mnist_crbm.jobman_entrypoint"
+
+def isolate_experiment():
+    '''
+    This makes sure we use the codebase clone created for this experiment.
+    I.e. if you want to make modifications to the codebase but don't want your
+    running experiment code to be impacted by those changes, first copy the
+    codebase somewhere, and configure this section. It will make sure we import
+    from the right place.
+
+    MUST BE DONE BEFORE IMPORTING ANYTHING ELSE
+    (Leave this comment there so others will understand what's going on)
+    '''
+
+    # Place where you copied modules that should be frozen for this experiment
+    codebase_clone_path = "/u/savardf/ift6266/experiment_clones/ift6266_mnistcrbm_exp1"
+
+    # Places where there might be conflicting modules from your $PYTHONPATH
+    remove_these_from_pythonpath = ["/u/savardf/ift6266/dev_code"]
+
+    import sys
+    sys.path[0:0] = [codebase_clone_path]
+
+    # remove paths we specifically don't want in $PYTHONPATH
+    for bad_path in remove_these_from_pythonpath:
+        sys.path[:] = [el for el in sys.path if not el in (bad_path, bad_path+"/")]
+
+    # Make the imports
+    import ift6266
+
+    # Just making sure we're importing from the right place
+    modules_to_check = [ift6266]
+    for module in modules_to_check:
+        if not codebase_clone_path in module.__path__[0]:
+            raise RuntimeError("Module loaded from incorrect path "+module.__path__[0])
+
+# END EXPERIMENT ISOLATION CODE
+# ----------------------------------------------------------------------------
+
+from jobman import DD
+
+'''
+These are parameters used by mnist_crbm.py. They'll end up as globals in there.
+
+Rename this file to config.py and configure as needed.
+DON'T add the renamed file to the repository, as others might use it
+without realizing it, with dire consequences.
+'''
+
+# change "sandbox" when you're ready
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/yourtablenamehere'
+
+# Set this to True when you want to run cluster tests, ie. you want
+# to run on the cluster, many jobs, but want to reduce the training
+# set size and the number of epochs, so you know everything runs
+# fine on the cluster.
+# Set this PRIOR to inserting your test jobs in the DB.
+TEST_CONFIG = False
+
+# save params at training end
+SAVE_PARAMS = True
+
+IMAGE_OUTPUT_DIR = 'img/'
+
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 100
+
+# print series to stdout too (otherwise just produce the HDF5 file)
+SERIES_STDOUT_TOO = False
+
+VISUALIZE_EVERY = 20000
+GIBBS_STEPS_IN_VIZ_CHAIN = 1000
+
+if TEST_CONFIG:
+    REDUCE_EVERY = 10
+    VISUALIZE_EVERY = 20
+
+# This is to configure insertion of jobs on the cluster.
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
+JOB_VALS = {'learning_rate': [1.0, 0.1, 0.01],
+        'sparsity_lambda': [3.0,0.5],
+        'sparsity_p': [0.3,0.05],
+        'num_filters': [40,15],
+        'filter_size': [12,7],
+        'minibatch_size': [20],
+        'num_epochs': [20]}
+
+# Just useful for tests... minimal number of epochs
+# Useful when launching a single local job
+DEFAULT_STATE = DD({'learning_rate': 0.1,
+        'sparsity_lambda': 1.0,
+        'sparsity_p': 0.05,
+        'num_filters': 40,
+        'filter_size': 12,
+        'minibatch_size': 10,
+        'num_epochs': 20})
+
+# To reinsert duplicate of jobs that crashed
+REINSERT_COLS = ['learning_rate','sparsity_lambda','sparsity_p','num_filters','filter_size','minibatch_size','dupe']
+#REINSERT_JOB_VALS = [\
+#            [,2],]
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/crbm/mnist_crbm.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/crbm/mnist_crbm.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,234 @@
+#!/usr/bin/python
+
+import sys
+import os, os.path
+
+# do this before importing custom modules
+from mnist_config import *
+
+if not (len(sys.argv) > 1 and sys.argv[1] in \
+            ('test_jobman_entrypoint', 'run_local')):
+    # in those cases don't use isolated code, use dev code
+    print "Running experiment isolation code"
+    isolate_experiment()
+
+import numpy as N
+
+import theano
+import theano.tensor as T
+
+from crbm import CRBM, ConvolutionParams
+
+import pylearn, pylearn.version
+from pylearn.datasets import MNIST
+from pylearn.io.image_tiling import tile_raster_images
+
+import Image
+
+from pylearn.io.seriestables import *
+import tables
+
+import ift6266
+
+import utils
+
+def setup_workdir():
+    if not os.path.exists(IMAGE_OUTPUT_DIR):
+        os.mkdir(IMAGE_OUTPUT_DIR)
+        if not os.path.exists(IMAGE_OUTPUT_DIR):
+            print "For some reason mkdir(IMAGE_OUTPUT_DIR) failed!"
+            sys.exit(1)
+        print "Created image output dir"
+    elif os.path.isfile(IMAGE_OUTPUT_DIR):
+        print "IMAGE_OUTPUT_DIR is not a directory!"
+        sys.exit(1)
+
+#def filename_from_time(suffix):
+#    import datetime
+#    return str(datetime.datetime.now()) + suffix + ".png"
+
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    channel.save()
+
+    setup_workdir()
+
+    crbm = MnistCrbm(state)
+    crbm.train()
+
+    return channel.COMPLETE
+
+class MnistCrbm(object):
+    def __init__(self, state):
+        self.state = state
+
+        if TEST_CONFIG:
+            self.mnist = MNIST.first_1k()
+            print "Test config, so loaded MNIST first 1000"
+        else:
+            self.mnist = MNIST.full()#first_10k()
+            print "Loaded MNIST full"
+
+        self.cp = ConvolutionParams( \
+                    num_filters=state.num_filters,
+                    num_input_planes=1,
+                    height_filters=state.filter_size,
+                    width_filters=state.filter_size)
+
+        self.image_size = (28,28)
+
+        self.minibatch_size = state.minibatch_size
+
+        self.lr = state.learning_rate
+        self.sparsity_lambda = state.sparsity_lambda
+        # about 1/num_filters, so only one filter active at a time
+        # 40 * 0.05 = ~2 filters active for any given pixel
+        self.sparsity_p = state.sparsity_p
+
+        self.crbm = CRBM( \
+                    minibatch_size=self.minibatch_size,
+                    image_size=self.image_size,
+                    conv_params=self.cp,
+                    learning_rate=self.lr,
+                    sparsity_lambda=self.sparsity_lambda,
+                    sparsity_p=self.sparsity_p)
+        
+        self.num_epochs = state.num_epochs
+
+        self.init_series()
+ 
+    def init_series(self):
+        series = {}
+
+        basedir = os.getcwd()
+
+        h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w")
+
+        cd_series_names = self.crbm.cd_return_desc
+        series['cd'] = \
+            utils.get_accumulator_series_array( \
+                h5f, 'cd', cd_series_names,
+                REDUCE_EVERY,
+                stdout_too=SERIES_STDOUT_TOO)
+
+        sparsity_series_names = self.crbm.sparsity_return_desc
+        series['sparsity'] = \
+            utils.get_accumulator_series_array( \
+                h5f, 'sparsity', sparsity_series_names,
+                REDUCE_EVERY,
+                stdout_too=SERIES_STDOUT_TOO)
+
+        # so first we create the names for each table, based on 
+        # position of each param in the array
+        params_stdout = []
+        if SERIES_STDOUT_TOO:
+            params_stdout = [StdoutAppendTarget()]
+        series['params'] = SharedParamsStatisticsWrapper(
+                            new_group_name="params",
+                            base_group="/",
+                            arrays_names=['W','b_h','b_x'],
+                            hdf5_file=h5f,
+                            index_names=('epoch','minibatch'),
+                            other_targets=params_stdout)
+
+        self.series = series
+
+    def train(self):
+        num_minibatches = len(self.mnist.train.x) / self.minibatch_size
+
+        for epoch in xrange(self.num_epochs):
+            for mb_index in xrange(num_minibatches):
+                mb_x = self.mnist.train.x \
+                         [mb_index : mb_index+self.minibatch_size]
+                mb_x = mb_x.reshape((self.minibatch_size, 1, 28, 28))
+
+                #E_h = crbm.E_h_given_x_func(mb_x)
+                #print "Shape of E_h", E_h.shape
+
+                cd_return = self.crbm.CD_step(mb_x)
+                sp_return = self.crbm.sparsity_step(mb_x)
+
+                self.series['cd'].append( \
+                        (epoch, mb_index), cd_return)
+                self.series['sparsity'].append( \
+                        (epoch, mb_index), sp_return)
+
+                total_idx = epoch*num_minibatches + mb_index
+
+                if (total_idx+1) % REDUCE_EVERY == 0:
+                    self.series['params'].append( \
+                        (epoch, mb_index), self.crbm.params)
+
+                if total_idx % VISUALIZE_EVERY == 0:
+                    self.visualize_gibbs_result(\
+                        mb_x, GIBBS_STEPS_IN_VIZ_CHAIN,
+                        "gibbs_chain_"+str(epoch)+"_"+str(mb_index))
+                    self.visualize_gibbs_result(mb_x, 1,
+                        "gibbs_1_"+str(epoch)+"_"+str(mb_index))
+                    self.visualize_filters(
+                        "filters_"+str(epoch)+"_"+str(mb_index))
+            if TEST_CONFIG:
+                # do a single epoch for cluster tests config
+                break
+
+        if SAVE_PARAMS:
+            utils.save_params(self.crbm.params, "params.pkl")
+    
+    def visualize_gibbs_result(self, start_x, gibbs_steps, filename):
+        # Run minibatch_size chains for gibbs_steps
+        x_samples = None
+        if not start_x is None:
+            x_samples = self.crbm.gibbs_samples_from(start_x, gibbs_steps)
+        else:
+            x_samples = self.crbm.random_gibbs_samples(gibbs_steps)
+        x_samples = x_samples.reshape((self.minibatch_size, 28*28))
+ 
+        tile = tile_raster_images(x_samples, self.image_size,
+                    (1, self.minibatch_size), output_pixel_vals=True)
+
+        filepath = os.path.join(IMAGE_OUTPUT_DIR, filename+".png")
+        img = Image.fromarray(tile)
+        img.save(filepath)
+
+        print "Result of running Gibbs", \
+                gibbs_steps, "times outputed to", filepath
+
+    def visualize_filters(self, filename):
+        cp = self.cp
+
+        # filter size
+        fsz = (cp.height_filters, cp.width_filters)
+        tile_shape = (cp.num_filters, cp.num_input_planes)
+
+        filters_flattened = self.crbm.W.value.reshape(
+                                (tile_shape[0]*tile_shape[1],
+                                fsz[0]*fsz[1]))
+
+        tile = tile_raster_images(filters_flattened, fsz, 
+                                    tile_shape, output_pixel_vals=True)
+
+        filepath = os.path.join(IMAGE_OUTPUT_DIR, filename+".png")
+        img = Image.fromarray(tile)
+        img.save(filepath)
+
+        print "Filters (as images) outputed to", filepath
+
+
+
+if __name__ == '__main__':
+    args = sys.argv[1:]
+
+    if len(args) == 0:
+        print "Bad usage"
+    elif args[0] == 'jobman_insert':
+        utils.jobman_insert_job_vals(JOBDB, EXPERIMENT_PATH, JOB_VALS)
+    elif args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+        jobman_entrypoint(DEFAULT_STATE, chanmock)
+    elif args[0] == 'run_default':
+        setup_workdir()
+        mc = MnistCrbm(DEFAULT_STATE)
+        mc.train()
+
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/crbm/utils.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/crbm/utils.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,118 @@
+#!/usr/bin/python
+# coding: utf-8
+
+from __future__ import with_statement
+
+import jobman
+from jobman import DD
+
+from pylearn.io.seriestables import *
+import tables
+
+
+
+# from pylearn codebase
+# useful in __init__(param1, param2, etc.) to save
+# values in self.param1, self.param2... just call
+# update_locals(self, locals())
+def update_locals(obj, dct):
+    if 'self' in dct:
+        del dct['self']
+    obj.__dict__.update(dct)
+
+# from a dictionary of possible values for hyperparameters, e.g.
+# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]}
+# create a list of other dictionaries representing all the possible
+# combinations, thus in this example creating:
+# [{'learning_rate': 0.1, 'num_layers': 1}, ...]
+# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2))
+def produit_cartesien_jobs(val_dict):
+    job_list = [DD()]
+    all_keys = val_dict.keys()
+
+    for key in all_keys:
+        possible_values = val_dict[key]
+        new_job_list = []
+        for val in possible_values:
+            for job in job_list:
+                to_insert = job.copy()
+                to_insert.update({key: val})
+                new_job_list.append(to_insert)
+        job_list = new_job_list
+
+    return job_list
+
+def jobs_from_reinsert_list(cols, job_vals):
+    job_list = []
+    for vals in job_vals:
+        job = DD()
+        for i, col in enumerate(cols):
+            job[col] = vals[i]
+        job_list.append(job)
+
+    return job_list
+
+def save_params(all_params, filename):
+    import pickle
+    with open(filename, 'wb') as f:
+        values = [p.value for p in all_params]
+
+        # -1 for HIGHEST_PROTOCOL
+        pickle.dump(values, f, -1)
+
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
+def jobman_insert_job_vals(job_db, experiment_path, job_vals):
+    jobs = produit_cartesien_jobs(job_vals)
+
+    db = jobman.sql.db(job_db)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: experiment_path})
+        jobman.sql.insert_dict(job, db)
+
+def jobman_insert_specific_jobs(job_db, experiment_path,
+                        insert_cols, insert_vals):
+    jobs = jobs_from_reinsert_list(insert_cols, insert_vals)
+
+    db = jobman.sql.db(job_db)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: experiment_path})
+        jobman.sql.insert_dict(job, db)
+
+# Just a shortcut for a common case where we need a few
+# related Error (float) series
+def get_accumulator_series_array( \
+                hdf5_file, group_name, series_names, 
+                reduce_every,
+                index_names=('epoch','minibatch'),
+                stdout_too=True,
+                skip_hdf5_append=False):
+    all_series = []
+
+    new_group = hdf5_file.createGroup('/', group_name)
+
+    other_targets = []
+    if stdout_too:
+        other_targets = [StdoutAppendTarget()]
+
+    for sn in series_names:
+        series_base = \
+            ErrorSeries(error_name=sn,
+                table_name=sn,
+                hdf5_file=hdf5_file,
+                hdf5_group=new_group._v_pathname,
+                index_names=index_names,
+                other_targets=other_targets,
+                skip_hdf5_append=skip_hdf5_append)
+
+        all_series.append( \
+            AccumulatorSeriesWrapper( \
+                    base_series=series_base,
+                    reduce_every=reduce_every))
+
+    ret_wrapper = SeriesArrayWrapper(all_series)
+
+    return ret_wrapper
+
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/rbm/mnistrbm.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/rbm/mnistrbm.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,215 @@
+import sys
+import os, os.path
+
+import numpy as N
+
+import theano
+import theano.tensor as T
+
+from crbm import CRBM, ConvolutionParams
+
+from pylearn.datasets import MNIST
+from pylearn.io.image_tiling import tile_raster_images
+
+import Image
+
+from pylearn.io.seriestables import *
+import tables
+
+IMAGE_OUTPUT_DIR = 'img/'
+
+REDUCE_EVERY = 100
+
+def filename_from_time(suffix):
+    import datetime
+    return str(datetime.datetime.now()) + suffix + ".png"
+
+# Just a shortcut for a common case where we need a few
+# related Error (float) series
+
+def get_accumulator_series_array( \
+                hdf5_file, group_name, series_names, 
+                reduce_every,
+                index_names=('epoch','minibatch'),
+                stdout_too=True,
+                skip_hdf5_append=False):
+    all_series = []
+
+    hdf5_file.createGroup('/', group_name)
+
+    other_targets = []
+    if stdout_too:
+        other_targets = [StdoutAppendTarget()]
+
+    for sn in series_names:
+        series_base = \
+            ErrorSeries(error_name=sn,
+                table_name=sn,
+                hdf5_file=hdf5_file,
+                hdf5_group='/'+group_name,
+                index_names=index_names,
+                other_targets=other_targets,
+                skip_hdf5_append=skip_hdf5_append)
+
+        all_series.append( \
+            AccumulatorSeriesWrapper( \
+                    base_series=series_base,
+                    reduce_every=reduce_every))
+
+    ret_wrapper = SeriesArrayWrapper(all_series)
+
+    return ret_wrapper
+
+class ExperienceRbm(object):
+    def __init__(self):
+        self.mnist = MNIST.full()#first_10k()
+
+        
+        datasets = load_data(dataset)
+
+        train_set_x, train_set_y = datasets[0]
+        test_set_x , test_set_y  = datasets[2]
+
+
+        batch_size = 100    # size of the minibatch
+
+        # compute number of minibatches for training, validation and testing
+        n_train_batches = train_set_x.value.shape[0] / batch_size
+
+        # allocate symbolic variables for the data
+        index = T.lscalar()    # index to a [mini]batch 
+        x     = T.matrix('x')  # the data is presented as rasterized images
+
+        rng        = numpy.random.RandomState(123)
+        theano_rng = RandomStreams( rng.randint(2**30))
+
+        # initialize storage fot the persistent chain (state = hidden layer of chain)
+        persistent_chain = theano.shared(numpy.zeros((batch_size, 500)))
+
+        # construct the RBM class
+        self.rbm = RBM( input = x, n_visible=28*28, \
+                   n_hidden = 500,numpy_rng = rng, theano_rng = theano_rng)
+
+        # get the cost and the gradient corresponding to one step of CD
+        
+
+        self.init_series()
+ 
+    def init_series(self):
+
+        series = {}
+
+        basedir = os.getcwd()
+
+        h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w")
+
+        cd_series_names = self.rbm.cd_return_desc
+        series['cd'] = \
+            get_accumulator_series_array( \
+                h5f, 'cd', cd_series_names,
+                REDUCE_EVERY,
+                stdout_too=True)
+
+       
+
+        # so first we create the names for each table, based on 
+        # position of each param in the array
+        params_stdout = StdoutAppendTarget("\n------\nParams")
+        series['params'] = SharedParamsStatisticsWrapper(
+                            new_group_name="params",
+                            base_group="/",
+                            arrays_names=['W','b_h','b_x'],
+                            hdf5_file=h5f,
+                            index_names=('epoch','minibatch'),
+                            other_targets=[params_stdout])
+
+        self.series = series
+
+    def train(self, persistent, learning_rate):
+
+        training_epochs = 15
+
+        #get the cost and the gradient corresponding to one step of CD
+        if persistant:
+            persistent_chain = theano.shared(numpy.zeros((batch_size, self.rbm.n_hidden)))
+            cost, updates = self.rbm.cd(lr=learning_rate, persistent=persistent_chain)
+
+        else:
+            cost, updates = self.rbm.cd(lr=learning_rate)
+    
+        dirname = 'lr=%.5f'%self.rbm.learning_rate
+        os.makedirs(dirname)
+        os.chdir(dirname)
+
+        # the purpose of train_rbm is solely to update the RBM parameters
+        train_rbm = theano.function([index], cost,
+               updates = updates, 
+               givens = { x: train_set_x[index*batch_size:(index+1)*batch_size]})
+
+        plotting_time = 0.
+        start_time = time.clock()  
+
+
+        # go through training epochs 
+        for epoch in xrange(training_epochs):
+
+            # go through the training set
+            mean_cost = []
+            for batch_index in xrange(n_train_batches):
+               mean_cost += [train_rbm(batch_index)]
+
+    
+        pretraining_time = (end_time - start_time)
+            
+
+       
+    
+    def sample_from_rbm(self, gibbs_steps, test_set_x):
+
+        # find out the number of test samples  
+        number_of_test_samples = test_set_x.value.shape[0]
+
+        # pick random test examples, with which to initialize the persistent chain
+        test_idx = rng.randint(number_of_test_samples-20)
+        persistent_vis_chain = theano.shared(test_set_x.value[test_idx:test_idx+20])
+
+        # define one step of Gibbs sampling (mf = mean-field)
+        [hid_mf, hid_sample, vis_mf, vis_sample] =  self.rbm.gibbs_vhv(persistent_vis_chain)
+
+        # the sample at the end of the channel is returned by ``gibbs_1`` as 
+        # its second output; note that this is computed as a binomial draw, 
+        # therefore it is formed of ints (0 and 1) and therefore needs to 
+        # be converted to the same dtype as ``persistent_vis_chain``
+        vis_sample = T.cast(vis_sample, dtype=theano.config.floatX)
+
+        # construct the function that implements our persistent chain 
+        # we generate the "mean field" activations for plotting and the actual samples for
+        # reinitializing the state of our persistent chain
+        sample_fn = theano.function([], [vis_mf, vis_sample],
+                          updates = { persistent_vis_chain:vis_sample})
+
+        # sample the RBM, plotting every `plot_every`-th sample; do this 
+        # until you plot at least `n_samples`
+        n_samples = 10
+        plot_every = 1000
+
+        for idx in xrange(n_samples):
+
+            # do `plot_every` intermediate samplings of which we do not care
+            for jdx in  xrange(plot_every):
+                vis_mf, vis_sample = sample_fn()
+
+            # construct image
+            image = PIL.Image.fromarray(tile_raster_images( 
+                                             X          = vis_mf,
+                                             img_shape  = (28,28),
+                                             tile_shape = (10,10),
+                                             tile_spacing = (1,1) ) )
+            
+            image.save('sample_%i_step_%i.png'%(idx,idx*jdx))    
+
+
+if __name__ == '__main__':
+    mc = ExperienceRbm()
+    mc.train()
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/rbm/rbm.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/rbm/rbm.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,473 @@
+"""This tutorial introduces restricted boltzmann machines (RBM) using Theano.
+
+Boltzmann Machines (BMs) are a particular form of energy-based model which
+contain hidden variables. Restricted Boltzmann Machines further restrict BMs 
+to those without visible-visible and hidden-hidden connections. 
+"""
+
+import numpy, time, cPickle, gzip, PIL.Image
+
+import theano
+import theano.tensor as T
+import os
+import pdb
+import numpy
+import pylab
+import time 
+import theano.tensor.nnet
+import pylearn
+#import ift6266
+import theano,pylearn.version #,ift6266
+from pylearn.io import filetensor as ft
+#from ift6266 import datasets
+
+from jobman.tools import DD, flatten
+from jobman import sql
+
+from theano.tensor.shared_randomstreams import RandomStreams
+
+from utils import tile_raster_images
+from logistic_sgd import load_data
+
+class RBM(object):
+    """Restricted Boltzmann Machine (RBM)  """
+    def __init__(self, input=None, n_visible=32*32, n_hidden=500, \
+        W = None, hbias = None, vbias = None, numpy_rng = None, 
+        theano_rng = None):
+        """ 
+        RBM constructor. Defines the parameters of the model along with
+        basic operations for inferring hidden from visible (and vice-versa), 
+        as well as for performing CD updates.
+
+        :param input: None for standalone RBMs or symbolic variable if RBM is
+        part of a larger graph.
+
+        :param n_visible: number of visible units
+
+        :param n_hidden: number of hidden units
+
+        :param W: None for standalone RBMs or symbolic variable pointing to a
+        shared weight matrix in case RBM is part of a DBN network; in a DBN,
+        the weights are shared between RBMs and layers of a MLP
+
+        :param hbias: None for standalone RBMs or symbolic variable pointing 
+        to a shared hidden units bias vector in case RBM is part of a 
+        different network
+
+        :param vbias: None for standalone RBMs or a symbolic variable 
+        pointing to a shared visible units bias
+        """
+
+        self.n_visible = n_visible
+        self.n_hidden  = n_hidden
+
+
+        if W is None : 
+           # W is initialized with `initial_W` which is uniformely sampled
+           # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
+           # the output of uniform if converted using asarray to dtype 
+           # theano.config.floatX so that the code is runable on GPU
+           initial_W = numpy.asarray( numpy.random.uniform( 
+                     low = -numpy.sqrt(6./(n_hidden+n_visible)), 
+                     high = numpy.sqrt(6./(n_hidden+n_visible)), 
+                     size = (n_visible, n_hidden)), 
+                     dtype = theano.config.floatX)
+           # theano shared variables for weights and biases
+           W = theano.shared(value = initial_W, name = 'W')
+
+        if hbias is None :
+           # create shared variable for hidden units bias
+           hbias = theano.shared(value = numpy.zeros(n_hidden, 
+                               dtype = theano.config.floatX), name='hbias')
+
+        if vbias is None :
+            # create shared variable for visible units bias
+            vbias = theano.shared(value =numpy.zeros(n_visible, 
+                                dtype = theano.config.floatX),name='vbias')
+
+        if numpy_rng is None:    
+            # create a number generator 
+            numpy_rng = numpy.random.RandomState(1234)
+
+        if theano_rng is None : 
+            theano_rng = RandomStreams(numpy_rng.randint(2**30))
+
+
+        # initialize input layer for standalone RBM or layer0 of DBN
+        self.input = input if input else T.dmatrix('input')
+
+        self.W          = W
+        self.hbias      = hbias
+        self.vbias      = vbias
+        self.theano_rng = theano_rng
+        
+        # **** WARNING: It is not a good idea to put things in this list 
+        # other than shared variables created in this function.
+        self.params     = [self.W, self.hbias, self.vbias]
+        self.batch_size = self.input.shape[0]
+
+    def free_energy(self, v_sample):
+        ''' Function to compute the free energy '''
+        wx_b = T.dot(v_sample, self.W) + self.hbias
+        vbias_term = T.sum(T.dot(v_sample, self.vbias))
+        hidden_term = T.sum(T.log(1+T.exp(wx_b)))
+        return -hidden_term - vbias_term
+
+    def sample_h_given_v(self, v0_sample):
+        ''' This function infers state of hidden units given visible units '''
+        # compute the activation of the hidden units given a sample of the visibles
+        h1_mean = T.nnet.sigmoid(T.dot(v0_sample, self.W) + self.hbias)
+        # get a sample of the hiddens given their activation
+        h1_sample = self.theano_rng.binomial(size = h1_mean.shape, n = 1, prob = h1_mean)
+        return [h1_mean, h1_sample]
+
+    def sample_v_given_h(self, h0_sample):
+        ''' This function infers state of visible units given hidden units '''
+        # compute the activation of the visible given the hidden sample
+        v1_mean = T.nnet.sigmoid(T.dot(h0_sample, self.W.T) + self.vbias)
+        # get a sample of the visible given their activation
+        v1_sample = self.theano_rng.binomial(size = v1_mean.shape,n = 1,prob = v1_mean)
+        return [v1_mean, v1_sample]
+
+    def gibbs_hvh(self, h0_sample):
+        ''' This function implements one step of Gibbs sampling, 
+            starting from the hidden state'''
+        v1_mean, v1_sample = self.sample_v_given_h(h0_sample)
+        h1_mean, h1_sample = self.sample_h_given_v(v1_sample)
+        return [v1_mean, v1_sample, h1_mean, h1_sample]
+ 
+    def gibbs_vhv(self, v0_sample):
+        ''' This function implements one step of Gibbs sampling, 
+            starting from the visible state'''
+        h1_mean, h1_sample = self.sample_h_given_v(v0_sample)
+        v1_mean, v1_sample = self.sample_v_given_h(h1_sample)
+        return [h1_mean, h1_sample, v1_mean, v1_sample]
+ 
+    def cd(self, lr = 0.1, persistent=None, k=1):
+        """ 
+        This functions implements one step of CD-1 or PCD-1
+
+        :param lr: learning rate used to train the RBM 
+        :param persistent: None for CD. For PCD, shared variable containing old state
+        of Gibbs chain. This must be a shared variable of size (batch size, number of
+        hidden units).
+
+        Returns the updates dictionary. The dictionary contains the update rules for weights
+        and biases but also an update of the shared variable used to store the persistent
+        chain, if one is used.
+        """
+
+        # compute positive phase
+        ph_mean, ph_sample = self.sample_h_given_v(self.input)
+
+        # decide how to initialize persistent chain:
+        # for CD, we use the newly generate hidden sample
+        # for PCD, we initialize from the old state of the chain
+        if persistent is None:
+            chain_start = ph_sample
+        else:
+            chain_start = persistent
+
+        # perform actual negative phase (the CD-1)
+        [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(chain_start)
+
+        #perform CD-k
+        if k-1>0:
+            for i in range(k-1):
+                [nv_mean, nv_sample, nh_mean, nh_sample] = self.gibbs_hvh(nh_sample)
+
+                
+
+        # determine gradients on RBM parameters
+        g_vbias = T.sum( self.input - nv_mean, axis = 0)/self.batch_size
+        g_hbias = T.sum( ph_mean    - nh_mean, axis = 0)/self.batch_size
+        g_W = T.dot(ph_mean.T, self.input   )/ self.batch_size - \
+              T.dot(nh_mean.T, nv_mean      )/ self.batch_size
+
+        gparams = [g_W.T, g_hbias, g_vbias]
+
+        # constructs the update dictionary
+        updates = {}
+        for gparam, param in zip(gparams, self.params):
+           updates[param] = param + gparam * lr
+
+        if persistent:
+            # Note that this works only if persistent is a shared variable
+            updates[persistent] = T.cast(nh_sample, dtype=theano.config.floatX)
+            # pseudo-likelihood is a better proxy for PCD
+            cost = self.get_pseudo_likelihood_cost(updates)
+        else:
+            # reconstruction cross-entropy is a better proxy for CD
+            cost = self.get_reconstruction_cost(updates, nv_mean)
+
+        return cost, updates
+
+    def get_pseudo_likelihood_cost(self, updates):
+        """Stochastic approximation to the pseudo-likelihood"""
+
+        # index of bit i in expression p(x_i | x_{\i})
+        bit_i_idx = theano.shared(value=0, name = 'bit_i_idx')
+
+        # binarize the input image by rounding to nearest integer
+        xi = T.iround(self.input)
+
+        # calculate free energy for the given bit configuration
+        fe_xi = self.free_energy(xi)
+
+        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
+        # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx]
+        # NB: slice(start,stop,step) is the python object used for
+        # slicing, e.g. to index matrix x as follows: x[start:stop:step]
+        xi_flip = T.setsubtensor(xi, 1-xi[:, bit_i_idx], 
+                                 idx_list=(slice(None,None,None),bit_i_idx))
+
+        # calculate free energy with bit flipped
+        fe_xi_flip = self.free_energy(xi_flip)
+
+        # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) 
+        cost = self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))
+
+        # increment bit_i_idx % number as part of updates
+        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible
+
+        return cost
+
+    def get_reconstruction_cost(self, updates, nv_mean):
+        """Approximation to the reconstruction error"""
+
+        cross_entropy = T.mean(
+                T.sum(self.input*T.log(nv_mean) + 
+                (1 - self.input)*T.log(1-nv_mean), axis = 1))
+
+        return cross_entropy
+
+
+
+def test_rbm(b_size = 20, nhidden = 1000, kk = 1, persistance = 0):
+    """
+    Demonstrate ***
+
+    This is demonstrated on MNIST.
+
+    :param learning_rate: learning rate used for training the RBM 
+
+    :param training_epochs: number of epochs used for training
+
+    :param dataset: path the the pickled dataset
+
+    """
+
+    learning_rate=0.1
+
+#    if data_set==0:
+#   	datasets=datasets.nist_all()
+#    elif data_set==1:
+#        datasets=datasets.nist_P07()
+#    elif data_set==2:
+#        datasets=datasets.PNIST07()
+
+
+    data_path = '/data/lisa/data/nist/by_class/'
+    f = open(data_path+'all/all_train_data.ft')
+    g = open(data_path+'all/all_train_labels.ft')
+    h = open(data_path+'all/all_test_data.ft')
+    i = open(data_path+'all/all_test_labels.ft')
+    
+    train_set_x_uint8 = theano.shared(ft.read(f))
+    test_set_x_uint8 = theano.shared(ft.read(h))
+
+
+    train_set_x = T.cast(train_set_x_uint8/255.,theano.config.floatX)
+    train_set_y = ft.read(g)
+    test_set_x = T.cast(test_set_x_uint8/255.,theano.config.floatX)
+    test_set_y = ft.read(i)
+    
+    f.close()
+    g.close()
+    i.close()
+    h.close()
+
+    #t = len(train_set_x)
+    
+    # revoir la recuperation des donnees
+##    dataset = load_data(dataset)
+##
+##    train_set_x, train_set_y = datasets[0]
+##    test_set_x , test_set_y  = datasets[2]
+    training_epochs = 1 # a determiner
+
+    batch_size = b_size    # size of the minibatch
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x_uint8.value.shape[0] / batch_size
+
+    # allocate symbolic variables for the data
+    index = T.lscalar()    # index to a [mini]batch 
+    x     = T.matrix('x')  # the data is presented as rasterized images
+
+    rng        = numpy.random.RandomState(123)
+    theano_rng = RandomStreams( rng.randint(2**30))
+
+    
+    # construct the RBM class
+    rbm = RBM( input = x, n_visible=32*32, \
+               n_hidden = nhidden, numpy_rng = rng, theano_rng = theano_rng)
+
+    
+    # initialize storage fot the persistent chain (state = hidden layer of chain)
+    if persistance == 1:
+        persistent_chain = theano.shared(numpy.zeros((batch_size, 500)))
+        # get the cost and the gradient corresponding to one step of CD
+        cost, updates = rbm.cd(lr=learning_rate, persistent=persistent_chain, k= kk)
+        
+    else:
+        # get the cost and the gradient corresponding to one step of CD
+        #persistance_chain = None
+        cost, updates = rbm.cd(lr=learning_rate, persistent=None, k= kk)
+        
+    #################################
+    #     Training the RBM          #
+    #################################
+    #os.chdir('~')
+    dirname = str(persistance) + '_' + str(nhidden) + '_' + str(b_size) + '_'+ str(kk)
+    os.makedirs(dirname)
+    os.chdir(dirname)
+    print 'yes'
+    # it is ok for a theano function to have no output
+    # the purpose of train_rbm is solely to update the RBM parameters
+    print type(batch_size)
+    print index.dtype
+    train_rbm = theano.function([index], cost,
+           updates = updates, 
+           givens = { x: train_set_x[index*batch_size:(index+1)*batch_size]})
+
+    print 'yep'
+    plotting_time = 0.0
+    start_time = time.clock()  
+    bufsize = 1000
+
+    # go through training epochs
+    costs = []
+    for epoch in xrange(training_epochs):
+
+        # go through the training set
+        mean_cost = []
+        for batch_index in xrange(n_train_batches):
+            mean_cost += [train_rbm(batch_index)]
+#        for mini_x, mini_y in datasets.train(b_size):
+#           mean_cost += [train_rbm(mini_x)]
+##           learning_rate = learning_rate - 0.0001
+##           learning_rate = learning_rate/(tau+( epoch*batch_index*batch_size))
+
+        #learning_rate = learning_rate/10
+
+        costs.append(numpy.mean(mean_cost))
+
+        # Plot filters after each training epoch
+        plotting_start = time.clock()
+        # Construct image from the weight matrix 
+        image = PIL.Image.fromarray(tile_raster_images( X = rbm.W.value.T,
+                 img_shape = (32,32),tile_shape = (10,10), 
+                 tile_spacing=(1,1)))
+        image.save('filters_at_epoch_%i.png'%epoch)
+        plotting_stop = time.clock()
+        plotting_time += (plotting_stop - plotting_start)
+
+    end_time = time.clock()
+
+    pretraining_time = (end_time - start_time) - plotting_time
+   
+    
+  
+    #################################
+    #     Sampling from the RBM     #
+    #################################
+
+    # find out the number of test samples  
+    #number_of_test_samples = 100
+    number_of_test_samples = test_set_x.value.shape[0]
+
+    #test_set_x, test_y  = datasets.test(100*b_size)
+    # pick random test examples, with which to initialize the persistent chain
+    test_idx = rng.randint(number_of_test_samples - b_size)
+    persistent_vis_chain = theano.shared(test_set_x.value[test_idx:test_idx+b_size])
+
+    # define one step of Gibbs sampling (mf = mean-field)
+    [hid_mf, hid_sample, vis_mf, vis_sample] =  rbm.gibbs_vhv(persistent_vis_chain)
+
+    # the sample at the end of the channel is returned by ``gibbs_1`` as 
+    # its second output; note that this is computed as a binomial draw, 
+    # therefore it is formed of ints (0 and 1) and therefore needs to 
+    # be converted to the same dtype as ``persistent_vis_chain``
+    vis_sample = T.cast(vis_sample, dtype=theano.config.floatX)
+
+    # construct the function that implements our persistent chain 
+    # we generate the "mean field" activations for plotting and the actual samples for
+    # reinitializing the state of our persistent chain
+    sample_fn = theano.function([], [vis_mf, vis_sample],
+                      updates = { persistent_vis_chain:vis_sample})
+
+    # sample the RBM, plotting every `plot_every`-th sample; do this 
+    # until you plot at least `n_samples`
+    n_samples = 10
+    # run minibatch size chains for gibbs samples (number of negative particles)
+    plot_every = b_size
+
+    for idx in xrange(n_samples):
+
+        # do `plot_every` intermediate samplings of which we do not care
+        for jdx in  xrange(plot_every):
+            vis_mf, vis_sample = sample_fn()
+
+        # construct image
+        image = PIL.Image.fromarray(tile_raster_images( 
+                                         X          = vis_mf,
+                                         img_shape  = (32,32),
+                                         tile_shape = (10,10),
+                                         tile_spacing = (1,1) ) )
+        #print ' ... plotting sample ', idx
+        image.save('sample_%i_step_%i.png'%(idx,idx*jdx))
+
+    #save the model
+    model = [rbm.W, rbm.vbias, rbm.hbias]
+    f = fopen('params.txt', 'w')
+    cPickle.dump(model, f, protocol = -1)
+    f.close()
+    #os.chdir('./..')
+    return numpy.mean(costs), pretraining_time*36
+
+
+def experiment(state, channel):
+
+    (mean_cost, time_execution) = test_rbm(b_size = state.b_size,\
+                                           nhidden = state.ndidden,\
+                                           kk = state.kk,\
+                                           persistance = state.persistance,\
+                                           )
+
+    state.mean_costs = mean_costs
+    state.time_execution = time_execution
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    return channel.COMPLETE
+
+if __name__ == '__main__':
+    
+    TABLE_NAME='RBM_tapha'
+
+    # DB path...
+    test_rbm()
+    #db = sql.db('postgres://ift6266h10:f0572cd63b@gershwin/ift6266h10_db/'+ TABLE_NAME)
+
+    #state = DD()
+    #for b_size in 50, 75, 100:
+    #    state.b_size = b_size
+    #    for nhidden in 1000,1250,1500:
+    #        state.nhidden = nhidden
+    #        for kk in 1,2,3,4:
+    #            state.kk = kk
+    #            for persistance in 0,1:
+    #                state.persistance = persistance
+    #                sql.insert_job(rbm.experiment, flatten(state), db)
+
+    
+    #db.createView(TABLE_NAME + 'view')
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_sylvain/nist_sda.py
--- a/deep/stacked_dae/v_sylvain/nist_sda.py	Sun Apr 25 17:10:09 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/nist_sda.py	Sun Apr 25 17:12:03 2010 -0400
@@ -55,6 +55,11 @@
         decrease_lr = state['decrease_lr']
     else :
         decrease_lr = 0
+        
+    if state.has_key('decrease_lr_pretrain'):
+        dec=state['decrease_lr_pretrain']
+    else :
+        dec=0
  
     n_ins = 32*32
     n_outs = 62 # 10 digits, 26*2 (lower, capitals)
@@ -87,7 +92,7 @@
     nb_file=0
     if state['pretrain_choice'] == 0:
         print('\n\tpretraining with NIST\n')
-        optimizer.pretrain(datasets.nist_all()) 
+        optimizer.pretrain(datasets.nist_all(), decrease = dec) 
     elif state['pretrain_choice'] == 1:
         #To know how many file will be used during pretraining
         nb_file = int(state['pretraining_epochs_per_layer']) 
@@ -97,7 +102,7 @@
             "You have to correct the code (and be patient, P07 is huge !!)\n"+
              "or reduce the number of pretraining epoch to run the code (better idea).\n")
         print('\n\tpretraining with P07')
-        optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) 
+        optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file),decrease = dec) 
     channel.save()
     
     #Set some of the parameters used for the finetuning
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_sylvain/sgd_optimization.py
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py	Sun Apr 25 17:10:09 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py	Sun Apr 25 17:12:03 2010 -0400
@@ -9,7 +9,8 @@
 import datetime
 import theano.tensor as T
 import sys
-import pickle
+#import pickle
+import cPickle
 
 from jobman import DD
 import jobman, jobman.sql
@@ -87,30 +88,40 @@
         self.pretrain(self.dataset)
         self.finetune(self.dataset)
 
-    def pretrain(self,dataset):
+    def pretrain(self,dataset,decrease=0):
         print "STARTING PRETRAINING, time = ", datetime.datetime.now()
         sys.stdout.flush()
         
         un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 file
 
         start_time = time.clock()  
+        
+        ########  This is hardcoaded. THe 0.95 parameter is hardcoaded and can be changed at will  ###
+        #Set the decreasing rate of the learning rate. We want the final learning rate to
+        #be 5% of the original learning rate. The decreasing factor is linear
+        decreasing = (decrease*self.hp.pretraining_lr)/float(self.hp.pretraining_epochs_per_layer*800000/self.hp.minibatch_size)
+        
         ## Pre-train layer-wise 
         for i in xrange(self.classifier.n_layers):
             # go through pretraining epochs 
+            
+            #To reset the learning rate to his original value
+            learning_rate=self.hp.pretraining_lr
             for epoch in xrange(self.hp.pretraining_epochs_per_layer):
                 # go through the training set
                 batch_index=0
                 count=0
                 num_files=0
                 for x,y in dataset.train(self.hp.minibatch_size):
-                    c = self.classifier.pretrain_functions[i](x)
+                    c = self.classifier.pretrain_functions[i](x,learning_rate)
                     count +=1
 
                     self.series["reconstruction_error"].append((epoch, batch_index), c)
                     batch_index+=1
 
-                    #if batch_index % 100 == 0:
-                    #    print "100 batches"
+                    #If we need to decrease the learning rate for the pretrain
+                    if decrease != 0:
+                        learning_rate -= decreasing
 
                     # useful when doing tests
                     if self.max_minibatches and batch_index >= self.max_minibatches:
@@ -141,7 +152,7 @@
         #To be able to load them later for tests on finetune
         self.parameters_pre=[copy(x.value) for x in self.classifier.params]
         f = open('params_pretrain.txt', 'w')
-        pickle.dump(self.parameters_pre,f)
+        cPickle.dump(self.parameters_pre,f,protocol=-1)
         f.close()
 
 
@@ -295,7 +306,8 @@
                     break
             
             if decrease == 1:
-                learning_rate /= 2 #divide the learning rate by 2 for each new epoch
+                if (ind_test == 21 & epoch % 100 == 0) | ind_test == 20:
+                    learning_rate /= 2 #divide the learning rate by 2 for each new epoch of P07 (or 100 of NIST)
             
             self.series['params'].append((epoch,), self.classifier.all_params)
 
@@ -321,23 +333,23 @@
         
         if special == 1:    #To keep a track of the value of the parameters
             f = open('params_finetune_stanford.txt', 'w')
-            pickle.dump(parameters_finetune,f)
+            cPickle.dump(parameters_finetune,f,protocol=-1)
             f.close()
         
         elif ind_test == 0 | ind_test == 20:    #To keep a track of the value of the parameters
             f = open('params_finetune_P07.txt', 'w')
-            pickle.dump(parameters_finetune,f)
+            cPickle.dump(parameters_finetune,f,protocol=-1)
             f.close()
                
 
         elif ind_test== 1:    #For the run with 2 finetunes. It will be faster.
             f = open('params_finetune_NIST.txt', 'w')
-            pickle.dump(parameters_finetune,f)
+            cPickle.dump(parameters_finetune,f,protocol=-1)
             f.close()
         
         elif ind_test== 21:    #To keep a track of the value of the parameters
             f = open('params_finetune_P07_then_NIST.txt', 'w')
-            pickle.dump(parameters_finetune,f)
+            cPickle.dump(parameters_finetune,f,protocol=-1)
             f.close()
         
 
@@ -346,7 +358,7 @@
         
         #self.parameters_pre=pickle.load('params_pretrain.txt')
         f = open(which)
-        self.parameters_pre=pickle.load(f)
+        self.parameters_pre=cPickle.load(f)
         f.close()
         for idx,x in enumerate(self.parameters_pre):
             if x.dtype=='float64':
@@ -365,6 +377,147 @@
         train_losses2 = [test_model(x,y) for x,y in iter2]
         train_score2 = numpy.mean(train_losses2)
         print "Training error is: " + str(train_score2)
+    
+    #To see the prediction of the model, the real answer and the image to judge    
+    def see_error(self, dataset):
+        import pylab
+        #The function to know the prediction
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.logLayer.y_pred)
+        user = []
+        nb_total = 0     #total number of exemples seen
+        nb_error = 0   #total number of errors
+        for x,y in dataset.test(1):
+            nb_total += 1
+            pred = self.translate(test_model(x,y))
+            rep =  self.translate(y)
+            error = pred != rep
+            print 'prediction: ' + str(pred) +'\t answer: ' + str(rep) + '\t right: ' + str(not(error))
+            pylab.imshow(x.reshape((32,32)))
+            pylab.draw()
+            if error:
+                nb_error += 1
+                user.append(int(raw_input("1 = The error is normal, 0 = The error is not normal : ")))
+                print '\t\t character is hard to distinguish: ' + str(user[-1])
+            else:
+                time.sleep(3)
+        print '\n Over the '+str(nb_total)+' exemples, there is '+str(nb_error)+' errors. \nThe percentage of errors is'+ str(float(nb_error)/float(nb_total))
+        print 'The percentage of errors done by the model that an human will also do: ' + str(numpy.mean(user))
+        
+            
+            
+            
+    #To translate the numeric prediction in character if necessary     
+    def translate(self,y):
+        
+        if y <= 9:
+            return y[0]
+        elif y == 10:
+            return 'A'
+        elif y == 11:
+            return 'B'
+        elif y == 12:
+            return 'C'
+        elif y == 13:
+            return 'D'
+        elif y == 14:
+            return 'E'
+        elif y == 15:
+            return 'F'
+        elif y == 16:
+            return 'G'
+        elif y == 17:
+            return 'H'
+        elif y == 18:
+            return 'I'
+        elif y == 19:
+            return 'J'
+        elif y == 20:
+            return 'K'
+        elif y == 21:
+            return 'L'
+        elif y == 22:
+            return 'M'
+        elif y == 23:
+            return 'N'
+        elif y == 24:
+            return 'O'
+        elif y == 25:
+            return 'P'
+        elif y == 26:
+            return 'Q'
+        elif y == 27:
+            return 'R'
+        elif y == 28:
+            return 'S'
+        elif y == 29:
+            return 'T'
+        elif y == 30:
+            return 'U'
+        elif y == 31:
+            return 'V'
+        elif y == 32:
+            return 'W'
+        elif y == 33:
+            return 'X'
+        elif y == 34:
+            return 'Y'
+        elif y == 35:
+            return 'Z'
+            
+        elif y == 36:
+            return 'a'
+        elif y == 37:
+            return 'b'
+        elif y == 38:
+            return 'c'
+        elif y == 39:
+            return 'd'
+        elif y == 40:
+            return 'e'
+        elif y == 41:
+            return 'f'
+        elif y == 42:
+            return 'g'
+        elif y == 43:
+            return 'h'
+        elif y == 44:
+            return 'i'
+        elif y == 45:
+            return 'j'
+        elif y == 46:
+            return 'k'
+        elif y == 47:
+            return 'l'
+        elif y == 48:
+            return 'm'
+        elif y == 49:
+            return 'n'
+        elif y == 50:
+            return 'o'
+        elif y == 51:
+            return 'p'
+        elif y == 52:
+            return 'q'
+        elif y == 53:
+            return 'r'
+        elif y == 54:
+            return 's'
+        elif y == 55:
+            return 't'
+        elif y == 56:
+            return 'u'
+        elif y == 57:
+            return 'v'
+        elif y == 58:
+            return 'w'
+        elif y == 59:
+            return 'x'
+        elif y == 60:
+            return 'y'
+        elif y == 61:
+            return 'z'    
 
 
 
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_sylvain/stacked_dae.py
--- a/deep/stacked_dae/v_sylvain/stacked_dae.py	Sun Apr 25 17:10:09 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/stacked_dae.py	Sun Apr 25 17:12:03 2010 -0400
@@ -27,8 +27,10 @@
         # initialize the baises b as a vector of n_out 0s
         self.b = theano.shared( value=numpy.zeros((n_out,), 
                                             dtype = theano.config.floatX) )
-        # compute vector of class-membership probabilities in symbolic form
-        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+        # compute vector of class-membership. This is a sigmoid instead of
+        #a softmax to be able later to classify as nothing
+##        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) #row-wise
+        self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b)
         
         # compute prediction as class whose probability is maximal in 
         # symbolic form
@@ -39,7 +41,13 @@
         
 
     def negative_log_likelihood(self, y):
-       return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+##        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]+T.sum(T.log(1-self.p_y_given_x), axis=1)-T.log(1-self.p_y_given_x)[T.arange(y.shape[0]),y])
+    
+    
+##    def kullback_leibler(self,y):
+##        return -T.mean(T.log(1/float(self.p_y_given_x))[T.arange(y.shape[0]),y])
+
 
     def errors(self, y):
         # check if y has same dimension of y_pred 
@@ -71,7 +79,25 @@
 
         self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
         self.params = [self.W, self.b]
+    
 
+class TanhLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = (T.tanh(T.dot(input, self.W) + self.b) + 1.0)/2.0
+        # ( *+ 1) /2  is because tanh goes from -1 to 1 and sigmoid goes from 0 to 1
+        # I want to use tanh, but the image has to stay the same. The correction is necessary.
+        self.params = [self.W, self.b]
 
 
 class dA(object):
@@ -132,22 +158,24 @@
     # Equation (2)
     # note  : y is stored as an attribute of the class so that it can be 
     #         used later when stacking dAs. 
-    self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
-    # Equation (3)
-    #self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
-    # Equation (4)
-    # note : we sum over the size of a datapoint; if we are using minibatches,
-    #        L will  be a vector, with one entry per example in minibatch
-    #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
-    #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
-
-    # bypassing z to avoid running to log(0)
-    z_a = T.dot(self.y, self.W_prime) + self.b_prime
-    log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a))
-    # log(1-sigmoid(z_a))
-    log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a))
-    self.L = -T.sum( self.x * (log_sigmoid) \
-                    + (1.0-self.x) * (log_1_sigmoid), axis=1 )
+    
+##    self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
+##        
+##    # Equation (3)
+##    #self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
+##    # Equation (4)
+##    # note : we sum over the size of a datapoint; if we are using minibatches,
+##    #        L will  be a vector, with one entry per example in minibatch
+##    #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+##    #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+##
+##    # bypassing z to avoid running to log(0)
+##    z_a = T.dot(self.y, self.W_prime) + self.b_prime
+##    log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a))
+##    # log(1-sigmoid(z_a))
+##    log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a))
+##    self.L = -T.sum( self.x * (log_sigmoid) \
+##                    + (1.0-self.x) * (log_1_sigmoid), axis=1 )
 
     # I added this epsilon to avoid getting log(0) and 1/0 in grad
     # This means conceptually that there'd be no probability of 0, but that
@@ -160,6 +188,20 @@
     #        of the reconstruction of the corresponding example of the 
     #        minibatch. We need to compute the average of all these to get 
     #        the cost of the minibatch
+    
+    #Or use a Tanh everything is always between 0 and 1, the range is 
+    #changed so it remain the same as when sigmoid is used
+    self.y   = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0
+    
+    self.z =  (T.tanh(T.dot(self.y, self.W_prime) + self.b_prime)+1.0) / 2.0
+    #To ensure to do not have a log(0) operation
+    if self.z <= 0:
+        self.z = 0.000001
+    if self.z >= 1:
+        self.z = 0.999999
+        
+    self.L = - T.sum( self.x*T.log(self.z) + (1.0-self.x)*T.log(1.0-self.z), axis=1 )
+    
     self.cost = T.mean(self.L)
 
     self.params = [ self.W, self.b, self.b_prime ]
@@ -204,6 +246,7 @@
         self.y  = T.ivector('y') # the labels are presented as 1D vector of 
                                  # [int] labels
         self.finetune_lr = T.fscalar('finetune_lr') #To get a dynamic finetune learning rate
+        self.pretrain_lr = T.fscalar('pretrain_lr') #To get a dynamic pretrain learning rate
 
         for i in xrange( self.n_layers ):
             # construct the sigmoidal layer
@@ -222,8 +265,12 @@
                 layer_input = self.x
             else:
                 layer_input = self.layers[-1].output
+            #We have to choose between sigmoidal layer or tanh layer !
 
-            layer = SigmoidalLayer(rng, layer_input, input_size, 
+##            layer = SigmoidalLayer(rng, layer_input, input_size, 
+##                                   hidden_layers_sizes[i] )
+                                
+            layer = TanhLayer(rng, layer_input, input_size, 
                                    hidden_layers_sizes[i] )
             # add the layer to the 
             self.layers += [layer]
@@ -244,10 +291,10 @@
             # compute the list of updates
             updates = {}
             for param, gparam in zip(dA_layer.params, gparams):
-                updates[param] = param - gparam * pretrain_lr
+                updates[param] = param - gparam * self.pretrain_lr
             
             # create a function that trains the dA
-            update_fn = theano.function([self.x], dA_layer.cost, \
+            update_fn = theano.function([self.x, self.pretrain_lr], dA_layer.cost, \
                   updates = updates)#,
             #     givens = { 
             #         self.x : ensemble})
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_sylvain/voir_erreurs.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_sylvain/voir_erreurs.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,118 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+from copy import copy
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config import *
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+ 
+    n_ins = 32*32
+    n_outs = 62 # 10 digits + 26*2 (lower, capitals)
+     
+    examples_per_epoch = NIST_ALL_TRAIN_SIZE
+
+    PATH = PATH_P07
+    maximum_exemples=int(100) #Maximum number of exemples seen
+
+
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), 
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    max_minibatches=rtt)	
+
+
+    
+    
+    print 'The model is created'
+    if os.path.exists(PATH+'params_finetune_NIST.txt'):
+        print ('\n finetune = NIST ')
+        optimizer.reload_parameters(PATH+'params_finetune_NIST.txt')
+        print "For" + str(maximum_exemples) + "over the NIST test set: "
+        optimizer.see_error(datasets.nist_all(maxsize=maximum_exemples))
+        
+    
+    if os.path.exists(PATH+'params_finetune_P07.txt'):
+        print ('\n finetune = P07 ')
+        optimizer.reload_parameters(PATH+'params_finetune_P07.txt')
+        print "For" + str(maximum_exemples) + "over the P07 test set: "
+        optimizer.see_error(datasets.nist_P07(maxsize=maximum_exemples))
+
+    
+    if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'):
+        print ('\n finetune = NIST then P07')
+        optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt')
+        print "For" + str(maximum_exemples) + "over the NIST test set: "
+        optimizer.see_error(datasets.nist_all(maxsize=maximum_exemples))
+        print "For" + str(maximum_exemples) + "over the P07 test set: "
+        optimizer.see_error(datasets.nist_P07(maxsize=maximum_exemples))
+    
+    if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'):
+        print ('\n finetune = P07 then NIST')
+        optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt')
+        print "For" + str(maximum_exemples) + "over the P07 test set: "
+        optimizer.see_error(datasets.nist_P07(maxsize=maximum_exemples))
+        print "For" + str(maximum_exemples) + "over the NIST test set: "
+        optimizer.see_error(datasets.nist_all(maxsize=maximum_exemples))
+    
+    channel.save()
+
+    return channel.COMPLETE
+
+
+
+if __name__ == '__main__':
+
+
+    chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+    jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
+
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_youssouf/__init__.py
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_youssouf/config.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_youssouf/config.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,98 @@
+'''
+These are parameters used by nist_sda_retrieve.py. They'll end up as globals in there.
+
+Rename this file to config.py and configure as needed.
+DON'T add the renamed file to the repository, as others might use it
+without realizing it, with dire consequences.
+'''
+
+# Set this to True when you want to run cluster tests, ie. you want
+# to run on the cluster, many jobs, but want to reduce the training
+# set size and the number of epochs, so you know everything runs
+# fine on the cluster.
+# Set this PRIOR to inserting your test jobs in the DB.
+TEST_CONFIG = False
+
+NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
+NIST_ALL_TRAIN_SIZE = 649081
+# valid et test =82587 82587 
+
+#Path of two pre-train done earlier
+PATH_NIST = '/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/NIST_big'
+PATH_P07 = '/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/P07_big/'
+
+'''
+# change "sandbox" when you're ready
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_SDA_retrieve'
+EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v_sylvain.nist_sda_retrieve.jobman_entrypoint"
+'''
+
+##Pour lancer des travaux sur le cluster: (il faut etre ou se trouve les fichiers)
+##python nist_sda_retrieve.py jobman_insert
+##dbidispatch --condor --repeat_jobs=2 jobman sql 'postgres://ift6266h10@gershwin/ift6266h10_db/pannetis_finetuningSDA0' .  #C'est le path dans config.py
+
+##Pour lancer sur GPU sur boltzmann (changer device=gpuX pour X le bon assigne)
+##THEANO_FLAGS=floatX=float32,device=gpu2 python nist_sda_retrieve.py test_jobman_entrypoint
+
+
+# reduce training set to that many examples
+REDUCE_TRAIN_TO = None
+# that's a max, it usually doesn't get to that point
+MAX_FINETUNING_EPOCHS = 1000
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 100
+#Set the finetune dataset
+FINETUNE_SET=0
+#Set the pretrain dataset used. 0: NIST, 1:P07
+PRETRAIN_CHOICE=0
+
+
+if TEST_CONFIG:
+    REDUCE_TRAIN_TO = 1000
+    MAX_FINETUNING_EPOCHS = 2
+    REDUCE_EVERY = 10
+	
+# select detection or classification
+DETECTION_MODE = 0
+# consider maj and minuscule as the same
+REDUCE_LABEL = 1
+
+
+# This is to configure insertion of jobs on the cluster.
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
+JOB_VALS = {'pretraining_lr': [0.1],#, 0.001],#, 0.0001],
+        'pretraining_epochs_per_layer': [10],
+        'hidden_layers_sizes': [800],
+        'corruption_levels': [0.2],
+        'minibatch_size': [100],
+        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
+        'max_finetuning_epochs_P07':[1],
+        'finetuning_lr':[0.01], #0.001 was very bad, so we leave it out
+        'num_hidden_layers':[4],
+        'finetune_set':[-1],
+        'pretrain_choice':[0,1]
+        }
+
+# Just useful for tests... minimal number of epochs
+# (This is used when running a single job, locally, when
+# calling ./nist_sda.py test_jobman_entrypoint
+DEFAULT_HP_NIST = {'finetuning_lr':0.05,
+                       'pretraining_lr':0.01,
+                       'pretraining_epochs_per_layer':15,
+                       'max_finetuning_epochs':MAX_FINETUNING_EPOCHS,
+                       #'max_finetuning_epochs':1,
+                       'max_finetuning_epochs_P07':7,
+                       'hidden_layers_sizes':1500,
+                       'corruption_levels':0.2,
+                       'minibatch_size':100,
+                       #'reduce_train_to':2000,
+		       'decrease_lr':1,
+                       'num_hidden_layers':4,
+                       'finetune_set':2,
+                       'pretrain_choice':1,
+					   'detection_mode':DETECTION_MODE,
+					   'reduce_label':REDUCE_LABEL}
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_youssouf/nist_sda.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_youssouf/nist_sda.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,266 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+from copy import copy
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config import *
+
+
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+        
+    if state.has_key('decrease_lr'):
+        decrease_lr = state['decrease_lr']
+    else :
+        decrease_lr = 0
+ 
+    n_ins = 32*32
+    n_outs = 36 # 10 digits, 26 characters (merged lower and capitals)
+     
+    examples_per_epoch = NIST_ALL_TRAIN_SIZE
+    
+    #To be sure variables will not be only in the if statement
+    PATH = ''
+    nom_reptrain = ''
+    nom_serie = ""
+    if state['pretrain_choice'] == 0:
+        nom_serie="series_NIST.h5"
+    elif state['pretrain_choice'] == 1:
+        nom_serie="series_P07.h5"
+
+    series = create_series(state.num_hidden_layers,nom_serie)
+
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), 
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    series=series,
+                                    max_minibatches=rtt)
+
+    parameters=[]
+    #Number of files of P07 used for pretraining
+    nb_file=0
+    if state['pretrain_choice'] == 0:
+        print('\n\tpretraining with NIST\n')
+        optimizer.pretrain(datasets.nist_all()) 
+    elif state['pretrain_choice'] == 1:
+        #To know how many file will be used during pretraining
+        nb_file = int(state['pretraining_epochs_per_layer']) 
+        state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset
+        if nb_file >=100:
+            sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+
+            "You have to correct the code (and be patient, P07 is huge !!)\n"+
+             "or reduce the number of pretraining epoch to run the code (better idea).\n")
+        print('\n\tpretraining with P07')
+        optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) 
+    channel.save()
+    
+    #Set some of the parameters used for the finetuning
+    if state.has_key('finetune_set'):
+        finetune_choice=state['finetune_set']
+    else:
+        finetune_choice=FINETUNE_SET
+    
+    if state.has_key('max_finetuning_epochs'):
+        max_finetune_epoch_NIST=state['max_finetuning_epochs']
+    else:
+        max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
+    
+    if state.has_key('max_finetuning_epochs_P07'):
+        max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
+    else:
+        max_finetune_epoch_P07=max_finetune_epoch_NIST
+    
+    #Decide how the finetune is done
+    
+    if finetune_choice == 0:
+        print('\n\n\tfinetune with NIST\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 1:
+        print('\n\n\tfinetune with P07\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 2:
+        print('\n\n\tfinetune with P07\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr)
+        #optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 3:
+        print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
+        All hidden units output are input of the logistic regression\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        
+        
+    if finetune_choice==-1:
+        print('\nSERIE OF 4 DIFFERENT FINETUNINGS')
+        print('\n\n\tfinetune with NIST\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_finetune_P07.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
+        All hidden units output are input of the logistic regression\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        channel.save()
+    
+    channel.save()
+
+    return channel.COMPLETE
+
+# These Series objects are used to save various statistics
+# during the training.
+def create_series(num_hidden_layers, nom_serie):
+
+    # Replace series we don't want to save with DummySeries, e.g.
+    # series['training_error'] = DummySeries()
+
+    series = {}
+
+    basedir = os.getcwd()
+
+    h5f = tables.openFile(os.path.join(basedir, nom_serie), "w")
+
+    # reconstruction
+    reconstruction_base = \
+                ErrorSeries(error_name="reconstruction_error",
+                    table_name="reconstruction_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['reconstruction_error'] = \
+                AccumulatorSeriesWrapper(base_series=reconstruction_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # train
+    training_base = \
+                ErrorSeries(error_name="training_error",
+                    table_name="training_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['training_error'] = \
+                AccumulatorSeriesWrapper(base_series=training_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # valid and test are not accumulated/mean, saved directly
+    series['validation_error'] = \
+                ErrorSeries(error_name="validation_error",
+                    table_name="validation_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    series['test_error'] = \
+                ErrorSeries(error_name="test_error",
+                    table_name="test_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    param_names = []
+    for i in range(num_hidden_layers):
+        param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
+    param_names += ['logreg_layer_W', 'logreg_layer_b']
+
+    # comment out series we don't want to save
+    series['params'] = SharedParamsStatisticsWrapper(
+                        new_group_name="params",
+                        base_group="/",
+                        arrays_names=param_names,
+                        hdf5_file=h5f,
+                        index_names=('epoch',))
+
+    return series
+
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
+def jobman_insert_nist():
+    jobs = produit_cartesien_jobs(JOB_VALS)
+
+    db = jobman.sql.db(JOBDB)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+        jobman.sql.insert_dict(job, db)
+
+    print "inserted"
+
+if __name__ == '__main__':
+
+    args = sys.argv[1:]
+
+    #if len(args) > 0 and args[0] == 'load_nist':
+    #    test_load_nist()
+
+    if len(args) > 0 and args[0] == 'jobman_insert':
+        jobman_insert_nist()
+
+    elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+        jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
+
+    else:
+        print "Bad arguments"
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_youssouf/nist_sda_retrieve.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_youssouf/nist_sda_retrieve.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,272 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+from copy import copy
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config2 import *
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+        
+    if state.has_key('decrease_lr'):
+        decrease_lr = state['decrease_lr']
+    else :
+        decrease_lr = 0
+ 
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+     
+    examples_per_epoch = NIST_ALL_TRAIN_SIZE
+    #To be sure variables will not be only in the if statement
+    PATH = ''
+    nom_reptrain = ''
+    nom_serie = ""
+    if state['pretrain_choice'] == 0:
+        PATH=PATH_NIST
+        nom_pretrain='NIST'
+        nom_serie="series_NIST.h5"
+    elif state['pretrain_choice'] == 1:
+        PATH=PATH_P07
+        nom_pretrain='P07'
+        nom_serie="series_P07.h5"
+
+    series = create_series(state.num_hidden_layers,nom_serie)
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), 
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    series=series,
+                                    max_minibatches=rtt)	
+
+    parameters=[]
+    #Number of files of P07 used for pretraining
+    nb_file=0
+##    if state['pretrain_choice'] == 0:
+##        print('\n\tpretraining with NIST\n')
+##        optimizer.pretrain(datasets.nist_all()) 
+##    elif state['pretrain_choice'] == 1:
+##        #To know how many file will be used during pretraining
+##        nb_file = state['pretraining_epochs_per_layer'] 
+##        state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset
+##        if nb_file >=100:
+##            sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+
+##            "You have to correct the code (and be patient, P07 is huge !!)\n"+
+##             "or reduce the number of pretraining epoch to run the code (better idea).\n")
+##        print('\n\tpretraining with P07')
+##        optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) 
+    
+    print ('Retrieve pre-train done earlier ( '+nom_pretrain+' )')
+    
+
+        
+    sys.stdout.flush()
+    channel.save()
+    
+    #Set some of the parameters used for the finetuning
+    if state.has_key('finetune_set'):
+        finetune_choice=state['finetune_set']
+    else:
+        finetune_choice=FINETUNE_SET
+    
+    if state.has_key('max_finetuning_epochs'):
+        max_finetune_epoch_NIST=state['max_finetuning_epochs']
+    else:
+        max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
+    
+    if state.has_key('max_finetuning_epochs_P07'):
+        max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
+    else:
+        max_finetune_epoch_P07=max_finetune_epoch_NIST
+    
+    #Decide how the finetune is done
+    
+    if finetune_choice == 0:
+        print('\n\n\tfinetune with NIST\n\n')
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 1:
+        print('\n\n\tfinetune with P07\n\n')
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 2:
+        print('\n\n\tfinetune with P07 followed by NIST\n\n')
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr)
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+    if finetune_choice == 3:
+        print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
+        All hidden units output are input of the logistic regression\n\n')
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        
+        
+    if finetune_choice==-1:
+        print('\nSERIE OF 4 DIFFERENT FINETUNINGS')
+        print('\n\n\tfinetune with NIST\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters('params_finetune_P07.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
+        channel.save()
+        print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
+        All hidden units output are input of the logistic regression\n\n')
+        sys.stdout.flush()
+        optimizer.reload_parameters(PATH+'params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
+        channel.save()
+    
+    channel.save()
+
+    return channel.COMPLETE
+
+# These Series objects are used to save various statistics
+# during the training.
+def create_series(num_hidden_layers, nom_serie):
+
+    # Replace series we don't want to save with DummySeries, e.g.
+    # series['training_error'] = DummySeries()
+
+    series = {}
+
+    basedir = os.getcwd()
+
+    h5f = tables.openFile(os.path.join(basedir, nom_serie), "w")
+
+    # reconstruction
+    reconstruction_base = \
+                ErrorSeries(error_name="reconstruction_error",
+                    table_name="reconstruction_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['reconstruction_error'] = \
+                AccumulatorSeriesWrapper(base_series=reconstruction_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # train
+    training_base = \
+                ErrorSeries(error_name="training_error",
+                    table_name="training_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['training_error'] = \
+                AccumulatorSeriesWrapper(base_series=training_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # valid and test are not accumulated/mean, saved directly
+    series['validation_error'] = \
+                ErrorSeries(error_name="validation_error",
+                    table_name="validation_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    series['test_error'] = \
+                ErrorSeries(error_name="test_error",
+                    table_name="test_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    param_names = []
+    for i in range(num_hidden_layers):
+        param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
+    param_names += ['logreg_layer_W', 'logreg_layer_b']
+
+    # comment out series we don't want to save
+    series['params'] = SharedParamsStatisticsWrapper(
+                        new_group_name="params",
+                        base_group="/",
+                        arrays_names=param_names,
+                        hdf5_file=h5f,
+                        index_names=('epoch',))
+
+    return series
+
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
+def jobman_insert_nist():
+    jobs = produit_cartesien_jobs(JOB_VALS)
+
+    db = jobman.sql.db(JOBDB)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+        jobman.sql.insert_dict(job, db)
+
+    print "inserted"
+
+if __name__ == '__main__':
+
+    args = sys.argv[1:]
+
+    #if len(args) > 0 and args[0] == 'load_nist':
+    #    test_load_nist()
+
+    if len(args) > 0 and args[0] == 'jobman_insert':
+        jobman_insert_nist()
+
+    elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+        jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
+
+    else:
+        print "Bad arguments"
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_youssouf/sgd_optimization.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_youssouf/sgd_optimization.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,376 @@
+#!/usr/bin/python
+# coding: utf-8
+
+# Generic SdA optimization loop, adapted from the deeplearning.net tutorial
+
+import numpy 
+import theano
+import time
+import datetime
+import theano.tensor as T
+import sys
+import pickle
+
+from jobman import DD
+import jobman, jobman.sql
+from copy import copy
+
+from stacked_dae import SdA
+
+from ift6266.utils.seriestables import *
+
+#For test purpose only
+buffersize=1000
+
+default_series = { \
+        'reconstruction_error' : DummySeries(),
+        'training_error' : DummySeries(),
+        'validation_error' : DummySeries(),
+        'test_error' : DummySeries(),
+        'params' : DummySeries()
+        }
+
+def itermax(iter, max):
+    for i,it in enumerate(iter):
+        if i >= max:
+            break
+        yield it
+
+class SdaSgdOptimizer:
+    def __init__(self, dataset, hyperparameters, n_ins, n_outs,
+                    examples_per_epoch, series=default_series, max_minibatches=None):
+        self.dataset = dataset
+        self.hp = hyperparameters
+        self.n_ins = n_ins
+        self.n_outs = n_outs
+        self.parameters_pre=[]
+   
+        self.max_minibatches = max_minibatches
+        print "SdaSgdOptimizer, max_minibatches =", max_minibatches
+        print "Reduce Label: ", self.hp.reduce_label
+
+        self.ex_per_epoch = examples_per_epoch
+        self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
+
+        self.series = series
+
+        self.rng = numpy.random.RandomState(1234)
+
+        self.init_classifier()
+
+        sys.stdout.flush()
+
+    def init_classifier(self):
+        print "Constructing classifier"
+
+        # we don't want to save arrays in DD objects, so
+        # we recreate those arrays here
+        nhl = self.hp.num_hidden_layers
+        layers_sizes = [self.hp.hidden_layers_sizes] * nhl
+        corruption_levels = [self.hp.corruption_levels] * nhl
+
+        # construct the stacked denoising autoencoder class
+        self.classifier = SdA( \
+                          batch_size = self.hp.minibatch_size, \
+                          n_ins= self.n_ins, \
+                          hidden_layers_sizes = layers_sizes, \
+                          n_outs = self.n_outs, \
+                          corruption_levels = corruption_levels,\
+                          rng = self.rng,\
+                          pretrain_lr = self.hp.pretraining_lr, \
+                          finetune_lr = self.hp.finetuning_lr, \
+                          detection_mode = self.hp.detection_mode, \
+                          )
+
+        #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
+
+        sys.stdout.flush()
+
+    def train(self):
+        self.pretrain(self.dataset)
+        self.finetune(self.dataset)
+
+    def pretrain(self,dataset):
+        print "STARTING PRETRAINING, time = ", datetime.datetime.now()
+        sys.stdout.flush()
+        
+        un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 file
+
+        start_time = time.clock()  
+        ## Pre-train layer-wise 
+        for i in xrange(self.classifier.n_layers):
+            # go through pretraining epochs 
+            for epoch in xrange(self.hp.pretraining_epochs_per_layer):
+                # go through the training set
+                batch_index=0
+                count=0
+                num_files=0
+                for x,y in dataset.train(self.hp.minibatch_size):
+                    c = self.classifier.pretrain_functions[i](x)
+                    count +=1
+
+                    self.series["reconstruction_error"].append((epoch, batch_index), c)
+                    batch_index+=1
+
+                    #if batch_index % 100 == 0:
+                    #    print "100 batches"
+
+                    # useful when doing tests
+                    if self.max_minibatches and batch_index >= self.max_minibatches:
+                        break
+                    
+                    #When we pass through the data only once (the case with P07)
+                    #There is approximately 800*1024=819200 examples per file (1k per example and files are 800M)
+                    if self.hp.pretraining_epochs_per_layer == 1 and count%un_fichier == 0:
+                        print 'Pre-training layer %i, epoch %d, cost '%(i,num_files),c
+                        num_files+=1
+                        sys.stdout.flush()
+                        self.series['params'].append((num_files,), self.classifier.all_params)
+                
+                #When NIST is used
+                if self.hp.pretraining_epochs_per_layer > 1:        
+                    print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+                    sys.stdout.flush()
+
+                    self.series['params'].append((epoch,), self.classifier.all_params)
+     
+        end_time = time.clock()
+
+        print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+        self.hp.update({'pretraining_time': end_time-start_time})
+        
+        sys.stdout.flush()
+        
+        #To be able to load them later for tests on finetune
+        self.parameters_pre=[copy(x.value) for x in self.classifier.params]
+        f = open('params_pretrain.txt', 'w')
+        pickle.dump(self.parameters_pre,f)
+        f.close()
+
+
+    def finetune(self,dataset,dataset_test,num_finetune,ind_test,special=0,decrease=0):
+        
+        if special != 0 and special != 1:
+            sys.exit('Bad value for variable special. Must be in {0,1}')
+        print "STARTING FINETUNING, time = ", datetime.datetime.now()
+
+        minibatch_size = self.hp.minibatch_size
+        if ind_test == 0 or ind_test == 20:
+            nom_test = "NIST"
+            nom_train="P07"
+        else:
+            nom_test = "P07"
+            nom_train = "NIST"
+
+
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #         givens = {
+        #           self.classifier.x: ensemble_x,
+        #           self.classifier.y: ensemble_y]})
+
+        validate_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #        givens = {
+        #           self.classifier.x: ,
+        #           self.classifier.y: ]})
+
+
+        # early-stopping parameters
+        patience              = 10000 # look as this many examples regardless
+        patience_increase     = 2.    # wait this much longer when a new best is 
+                                      # found
+        improvement_threshold = 0.995 # a relative improvement of this much is 
+                                      # considered significant
+        validation_frequency  = min(self.mb_per_epoch, patience/2)
+                                      # go through this many 
+                                      # minibatche before checking the network 
+                                      # on the validation set; in this case we 
+                                      # check every epoch 
+        if self.max_minibatches and validation_frequency > self.max_minibatches:
+            validation_frequency = self.max_minibatches / 2
+
+        best_params          = None
+        best_validation_loss = float('inf')
+        test_score           = 0.
+        start_time = time.clock()
+
+        done_looping = False
+        epoch = 0
+
+        total_mb_index = 0
+        minibatch_index = 0
+        parameters_finetune=[]
+        
+        if ind_test == 21:
+            learning_rate = self.hp.finetuning_lr / 10.0
+        else:
+            learning_rate = self.hp.finetuning_lr  #The initial finetune lr
+
+
+        while (epoch < num_finetune) and (not done_looping):
+            epoch = epoch + 1
+
+            for x,y in dataset.train(minibatch_size,bufsize=buffersize):
+                minibatch_index += 1
+
+                if self.hp.reduce_label:
+                    y[y > 35] = y[y > 35]-26	
+                
+                if special == 0:
+                    cost_ij = self.classifier.finetune(x,y,learning_rate)
+                elif special == 1:
+                    cost_ij = self.classifier.finetune2(x,y)
+                total_mb_index += 1
+
+                self.series["training_error"].append((epoch, minibatch_index), cost_ij)
+
+                if (total_mb_index+1) % validation_frequency == 0: 
+                    #minibatch_index += 1
+                    #The validation set is always NIST (we want the model to be good on NIST)
+                    if ind_test == 0 | ind_test == 20:
+                        iter=dataset_test.valid(minibatch_size,bufsize=buffersize)
+                    else:
+                        iter = dataset.valid(minibatch_size,bufsize=buffersize)
+                    if self.max_minibatches:
+                        iter = itermax(iter, self.max_minibatches)
+                    validation_losses = [validate_model(x,y) for x,y in iter]
+                    this_validation_loss = numpy.mean(validation_losses)
+
+                    self.series["validation_error"].\
+                        append((epoch, minibatch_index), this_validation_loss*100.)
+
+                    print('epoch %i, minibatch %i, validation error on NIST : %f %%' % \
+                           (epoch, minibatch_index+1, \
+                            this_validation_loss*100.))
+
+
+                    # if we got the best validation score until now
+                    if this_validation_loss < best_validation_loss:
+
+                        #improve patience if loss improvement is good enough
+                        if this_validation_loss < best_validation_loss *  \
+                               improvement_threshold :
+                            patience = max(patience, total_mb_index * patience_increase)
+
+                        # save best validation score, iteration number and parameters
+                        best_validation_loss = this_validation_loss
+                        best_iter = total_mb_index
+                        parameters_finetune=[copy(x.value) for x in self.classifier.params]
+
+                        # test it on the test set
+                        iter = dataset.test(minibatch_size,bufsize=buffersize)
+                        if self.max_minibatches:
+                            iter = itermax(iter, self.max_minibatches)
+                        test_losses = [test_model(x,y) for x,y in iter]
+                        test_score = numpy.mean(test_losses)
+                        
+                        #test it on the second test set
+                        iter2 = dataset_test.test(minibatch_size,bufsize=buffersize)
+                        if self.max_minibatches:
+                            iter2 = itermax(iter2, self.max_minibatches)
+                        test_losses2 = [test_model(x,y) for x,y in iter2]
+                        test_score2 = numpy.mean(test_losses2)
+
+                        self.series["test_error"].\
+                            append((epoch, minibatch_index), test_score*100.)
+
+                        print(('     epoch %i, minibatch %i, test error on dataset %s  (train data) of best '
+                              'model %f %%') % 
+                                     (epoch, minibatch_index+1,nom_train,
+                                      test_score*100.))
+                                    
+                        print(('     epoch %i, minibatch %i, test error on dataset %s of best '
+                              'model %f %%') % 
+                                     (epoch, minibatch_index+1,nom_test,
+                                      test_score2*100.))
+                    
+                    if patience <= total_mb_index:
+                        done_looping = True
+                        break   #to exit the FOR loop
+                    
+                    sys.stdout.flush()
+
+                # useful when doing tests
+                if self.max_minibatches and minibatch_index >= self.max_minibatches:
+                    break
+            
+            if decrease == 1:
+                learning_rate /= 2 #divide the learning rate by 2 for each new epoch
+            
+            self.series['params'].append((epoch,), self.classifier.all_params)
+
+            if done_looping == True:    #To exit completly the fine-tuning
+                break   #to exit the WHILE loop
+
+        end_time = time.clock()
+        self.hp.update({'finetuning_time':end_time-start_time,\
+                    'best_validation_error':best_validation_loss,\
+                    'test_score':test_score,
+                    'num_finetuning_epochs':epoch})
+
+        print(('\nOptimization complete with best validation score of %f %%,'
+               'with test performance %f %% on dataset %s ') %  
+                     (best_validation_loss * 100., test_score*100.,nom_train))
+        print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.))
+        
+        print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+        
+        sys.stdout.flush()
+        
+        #Save a copy of the parameters in a file to be able to get them in the future
+        
+        if special == 1:    #To keep a track of the value of the parameters
+            f = open('params_finetune_stanford.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+        
+        elif ind_test == 0 | ind_test == 20:    #To keep a track of the value of the parameters
+            f = open('params_finetune_P07.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+               
+
+        elif ind_test== 1:    #For the run with 2 finetunes. It will be faster.
+            f = open('params_finetune_NIST.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+        
+        elif ind_test== 21:    #To keep a track of the value of the parameters
+            f = open('params_finetune_P07_then_NIST.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+        
+
+    #Set parameters like they where right after pre-train or finetune
+    def reload_parameters(self,which):
+        
+        #self.parameters_pre=pickle.load('params_pretrain.txt')
+        f = open(which)
+        self.parameters_pre=pickle.load(f)
+        f.close()
+        for idx,x in enumerate(self.parameters_pre):
+            if x.dtype=='float64':
+                self.classifier.params[idx].value=theano._asarray(copy(x),dtype=theano.config.floatX)
+            else:
+                self.classifier.params[idx].value=copy(x)
+
+    def training_error(self,dataset):
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+                
+        iter2 = dataset.train(self.hp.minibatch_size,bufsize=buffersize)
+        train_losses2 = [test_model(x,y) for x,y in iter2]
+        train_score2 = numpy.mean(train_losses2)
+        print "Training error is: " + str(train_score2)
+
+
+
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_youssouf/stacked_dae.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_youssouf/stacked_dae.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,353 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy 
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+from utils import update_locals
+
+# taken from LeDeepNet/daa.py
+# has a special case when taking log(0) (defined =0)
+# modified to not take the mean anymore
+from theano.tensor.xlogx import xlogx, xlogy0
+# it's target*log(output)
+def binary_cross_entropy(target, output, sum_axis=1):
+    XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
+    return -T.sum(XE, axis=sum_axis)
+
+class LogisticRegression(object):
+    def __init__(self, input, n_in, n_out, detection_mode):
+        # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 
+        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+                                            dtype = theano.config.floatX) )
+        # initialize the baises b as a vector of n_out 0s
+        self.b = theano.shared( value=numpy.zeros((n_out,), 
+                                            dtype = theano.config.floatX) )
+        # compute vector of class-membership probabilities in symbolic form
+        if detection_mode:
+            self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b)
+        else:
+            self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+        
+        # compute prediction as class whose probability is maximal in 
+        # symbolic form
+        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+        # list of parameters for this layer
+        self.params = [self.W, self.b]
+        
+
+    def negative_log_likelihood(self, y):
+       return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+	   
+    def cross_entropy(self, y):
+        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]+T.sum(T.log(1-self.p_y_given_x), axis=1)-T.log(1-self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+    def errors(self, y):
+        # check if y has same dimension of y_pred 
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred', 
+                ('y', target.type, 'y_pred', self.y_pred.type))
+
+        # check if y is of the correct datatype        
+        if y.dtype.startswith('int'):
+            # the T.neq operator returns a vector of 0s and 1s, where 1
+            # represents a mistake in prediction
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+class SigmoidalLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+        self.params = [self.W, self.b]
+
+
+
+class dA(object):
+  def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
+               input = None, shared_W = None, shared_b = None):
+    self.n_visible = n_visible
+    self.n_hidden  = n_hidden
+    
+    # create a Theano random generator that gives symbolic random values
+    theano_rng = RandomStreams()
+    
+    if shared_W != None and shared_b != None : 
+        self.W = shared_W
+        self.b = shared_b
+    else:
+        # initial values for weights and biases
+        # note : W' was written as `W_prime` and b' as `b_prime`
+
+        # W is initialized with `initial_W` which is uniformely sampled
+        # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
+        # the output of uniform if converted using asarray to dtype 
+        # theano.config.floatX so that the code is runable on GPU
+        initial_W = numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(n_hidden+n_visible)), \
+              high = numpy.sqrt(6./(n_hidden+n_visible)), \
+              size = (n_visible, n_hidden)), dtype = theano.config.floatX)
+        initial_b       = numpy.zeros(n_hidden, dtype = theano.config.floatX)
+    
+    
+        # theano shared variables for weights and biases
+        self.W       = theano.shared(value = initial_W,       name = "W")
+        self.b       = theano.shared(value = initial_b,       name = "b")
+    
+ 
+    initial_b_prime= numpy.zeros(n_visible)
+    # tied weights, therefore W_prime is W transpose
+    self.W_prime = self.W.T 
+    self.b_prime = theano.shared(value = initial_b_prime, name = "b'")
+
+    # if no input is given, generate a variable representing the input
+    if input == None : 
+        # we use a matrix because we expect a minibatch of several examples,
+        # each example being a row
+        self.x = T.matrix(name = 'input') 
+    else:
+        self.x = input
+    # Equation (1)
+    # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
+    # note : first argument of theano.rng.binomial is the shape(size) of 
+    #        random numbers that it should produce
+    #        second argument is the number of trials 
+    #        third argument is the probability of success of any trial
+    #
+    #        this will produce an array of 0s and 1s where 1 has a 
+    #        probability of 1 - ``corruption_level`` and 0 with
+    #        ``corruption_level``
+    self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level, dtype=theano.config.floatX) * self.x
+    # Equation (2)
+    # note  : y is stored as an attribute of the class so that it can be 
+    #         used later when stacking dAs. 
+    self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
+    # Equation (3)
+    #self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
+    # Equation (4)
+    # note : we sum over the size of a datapoint; if we are using minibatches,
+    #        L will  be a vector, with one entry per example in minibatch
+    #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+    #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+
+    # bypassing z to avoid running to log(0)
+    z_a = T.dot(self.y, self.W_prime) + self.b_prime
+    log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a))
+    # log(1-sigmoid(z_a))
+    log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a))
+    self.L = -T.sum( self.x * (log_sigmoid) \
+                    + (1.0-self.x) * (log_1_sigmoid), axis=1 )
+
+    # I added this epsilon to avoid getting log(0) and 1/0 in grad
+    # This means conceptually that there'd be no probability of 0, but that
+    # doesn't seem to me as important (maybe I'm wrong?).
+    #eps = 0.00000001
+    #eps_1 = 1-eps
+    #self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
+    #                + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
+    # note : L is now a vector, where each element is the cross-entropy cost 
+    #        of the reconstruction of the corresponding example of the 
+    #        minibatch. We need to compute the average of all these to get 
+    #        the cost of the minibatch
+    self.cost = T.mean(self.L)
+
+    self.params = [ self.W, self.b, self.b_prime ]
+
+
+class SdA(object):
+    def __init__(self, batch_size, n_ins, 
+                 hidden_layers_sizes, n_outs, 
+                 corruption_levels, rng, pretrain_lr, finetune_lr, detection_mode):
+        # Just to make sure those are not modified somewhere else afterwards
+        hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes)
+        corruption_levels = copy.deepcopy(corruption_levels)
+
+        update_locals(self, locals())      
+ 
+        self.layers             = []
+        self.pretrain_functions = []
+        self.params             = []
+        # MODIF: added this so we also get the b_primes
+        # (not used for finetuning... still using ".params")
+        self.all_params         = []
+        self.n_layers           = len(hidden_layers_sizes)
+        self.logistic_params    = []
+
+        print "Creating SdA with params:"
+        print "batch_size", batch_size
+        print "hidden_layers_sizes", hidden_layers_sizes
+        print "corruption_levels", corruption_levels
+        print "n_ins", n_ins
+        print "n_outs", n_outs
+        print "pretrain_lr", pretrain_lr
+        print "finetune_lr", finetune_lr
+        print "detection_mode", detection_mode
+        print "----"
+
+        if len(hidden_layers_sizes) < 1 :
+            raiseException (' You must have at least one hidden layer ')
+
+
+        # allocate symbolic variables for the data
+        #index   = T.lscalar()    # index to a [mini]batch 
+        self.x  = T.matrix('x')  # the data is presented as rasterized images
+        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
+                                 # [int] labels
+        self.finetune_lr = T.fscalar('finetune_lr') #To get a dynamic finetune learning rate
+
+        for i in xrange( self.n_layers ):
+            # construct the sigmoidal layer
+
+            # the size of the input is either the number of hidden units of 
+            # the layer below or the input size if we are on the first layer
+            if i == 0 :
+                input_size = n_ins
+            else:
+                input_size = hidden_layers_sizes[i-1]
+
+            # the input to this layer is either the activation of the hidden
+            # layer below or the input of the SdA if you are on the first
+            # layer
+            if i == 0 : 
+                layer_input = self.x
+            else:
+                layer_input = self.layers[-1].output
+
+            layer = SigmoidalLayer(rng, layer_input, input_size, 
+                                   hidden_layers_sizes[i] )
+            # add the layer to the 
+            self.layers += [layer]
+            self.params += layer.params
+        
+            # Construct a denoising autoencoder that shared weights with this
+            # layer
+            dA_layer = dA(input_size, hidden_layers_sizes[i], \
+                          corruption_level = corruption_levels[0],\
+                          input = layer_input, \
+                          shared_W = layer.W, shared_b = layer.b)
+
+            self.all_params += dA_layer.params
+        
+            # Construct a function that trains this dA
+            # compute gradients of layer parameters
+            gparams = T.grad(dA_layer.cost, dA_layer.params)
+            # compute the list of updates
+            updates = {}
+            for param, gparam in zip(dA_layer.params, gparams):
+                updates[param] = param - gparam * pretrain_lr
+            
+            # create a function that trains the dA
+            update_fn = theano.function([self.x], dA_layer.cost, \
+                  updates = updates)#,
+            #     givens = { 
+            #         self.x : ensemble})
+            # collect this function into a list
+            #update_fn = theano.function([index], dA_layer.cost, \
+            #      updates = updates,
+            #      givens = { 
+            #         self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider})
+            # collect this function into a list
+            self.pretrain_functions += [update_fn]
+
+        
+        # We now need to add a logistic layer on top of the SDA
+        self.logLayer = LogisticRegression(\
+                         input = self.layers[-1].output,\
+                         n_in = hidden_layers_sizes[-1], n_out = n_outs, detection_mode = detection_mode)
+
+        self.params += self.logLayer.params
+        self.all_params += self.logLayer.params
+        # construct a function that implements one step of finetunining
+
+        
+        if detection_mode:
+            # compute the cost, defined as the negative log likelihood 
+            cost = self.logLayer.cross_entropy(self.y)
+            # compute the gradients with respect to the logistic regression parameters
+            gparams = T.grad(cost, self.logLayer.params)
+            # compute list of updates
+            updates = {}
+            for param,gparam in zip(self.logLayer.params, gparams):
+                updates[param] = param - gparam*finetune_lr
+         
+        else:
+            # compute the cost, defined as the negative log likelihood 
+            cost = self.logLayer.negative_log_likelihood(self.y)
+            # compute the gradients with respect to the model parameters
+            gparams = T.grad(cost, self.params)
+            # compute list of updates
+            updates = {}
+            for param,gparam in zip(self.params, gparams):
+                updates[param] = param - gparam*self.finetune_lr
+            
+        self.finetune = theano.function([self.x,self.y,self.finetune_lr], cost, 
+                updates = updates)#,
+                
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+
+        self.errors = self.logLayer.errors(self.y)
+        
+        
+        #STRUCTURE FOR THE FINETUNING OF THE LOGISTIC REGRESSION ON THE TOP WITH
+        #ALL HIDDEN LAYERS AS INPUT
+        '''
+        
+        all_h=[]
+        for i in xrange(self.n_layers):
+            all_h.append(self.layers[i].output)
+        self.all_hidden=T.concatenate(all_h,axis=1)
+
+
+        self.logLayer2 = LogisticRegression(\
+                         input = self.all_hidden,\
+                         n_in = sum(hidden_layers_sizes), n_out = n_outs)
+                         #n_in=hidden_layers_sizes[0],n_out=n_outs)
+
+        #self.logistic_params+= self.logLayer2.params
+        # construct a function that implements one step of finetunining
+        
+        self.logistic_params+=self.logLayer2.params
+        # compute the cost, defined as the negative log likelihood
+        if DETECTION_MODE:
+            cost2 = self.logLayer2.cross_entropy(self.y)
+        else:
+			cost2 = self.logLayer2.negative_log_likelihood(self.y)
+        # compute the gradients with respect to the model parameters
+        gparams2 = T.grad(cost2, self.logistic_params)
+
+        # compute list of updates
+        updates2 = {}
+        for param,gparam in zip(self.logistic_params, gparams2):
+            updates2[param] = param - gparam*finetune_lr
+   
+        self.finetune2 = theano.function([self.x,self.y], cost2, 
+                updates = updates2)
+
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+
+        self.errors2 = self.logLayer2.errors(self.y)
+        '''
+
+if __name__ == '__main__':
+    import sys
+    args = sys.argv[1:]
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_youssouf/train_error.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_youssouf/train_error.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,118 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+from copy import copy
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config import *
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+ 
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+     
+    examples_per_epoch = NIST_ALL_TRAIN_SIZE
+
+    PATH = ''
+    maximum_exemples=int(500000) #Maximum number of exemples seen
+
+
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), 
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    max_minibatches=rtt)	
+
+
+    
+    
+
+    if os.path.exists(PATH+'params_finetune_NIST.txt'):
+        print ('\n finetune = NIST ')
+        optimizer.reload_parameters(PATH+'params_finetune_NIST.txt')
+        print "For" + str(maximum_exemples) + "over the NIST training set: "
+        optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples))
+        
+    
+    if os.path.exists(PATH+'params_finetune_P07.txt'):
+        print ('\n finetune = P07 ')
+        optimizer.reload_parameters(PATH+'params_finetune_P07.txt')
+        print "For" + str(maximum_exemples) + "over the P07 training set: "
+        optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples))
+
+    
+    if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'):
+        print ('\n finetune = NIST then P07')
+        optimizer.reload_parameters(PATH+'params_finetune_NIST_then_P07.txt')
+        print "For" + str(maximum_exemples) + "over the NIST training set: "
+        optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples))
+        print "For" + str(maximum_exemples) + "over the P07 training set: "
+        optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples))
+    
+    if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'):
+        print ('\n finetune = P07 then NIST')
+        optimizer.reload_parameters(PATH+'params_finetune_P07_then_NIST.txt')
+        print "For" + str(maximum_exemples) + "over the P07 training set: "
+        optimizer.training_error(datasets.nist_P07(maxsize=maximum_exemples))
+        print "For" + str(maximum_exemples) + "over the NIST training set: "
+        optimizer.training_error(datasets.nist_all(maxsize=maximum_exemples))
+    
+    channel.save()
+
+    return channel.COMPLETE
+
+
+
+if __name__ == '__main__':
+
+
+    chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+    jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
+
+
diff -r 01445a75c702 -r 0b7e64e8e93f deep/stacked_dae/v_youssouf/utils.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_youssouf/utils.py	Sun Apr 25 17:12:03 2010 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+# coding: utf-8
+
+from __future__ import with_statement
+
+from jobman import DD
+
+# from pylearn codebase
+# useful in __init__(param1, param2, etc.) to save
+# values in self.param1, self.param2... just call
+# update_locals(self, locals())
+def update_locals(obj, dct):
+    if 'self' in dct:
+        del dct['self']
+    obj.__dict__.update(dct)
+
+# from a dictionary of possible values for hyperparameters, e.g.
+# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]}
+# create a list of other dictionaries representing all the possible
+# combinations, thus in this example creating:
+# [{'learning_rate': 0.1, 'num_layers': 1}, ...]
+# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2))
+def produit_cartesien_jobs(val_dict):
+    job_list = [DD()]
+    all_keys = val_dict.keys()
+
+    for key in all_keys:
+        possible_values = val_dict[key]
+        new_job_list = []
+        for val in possible_values:
+            for job in job_list:
+                to_insert = job.copy()
+                to_insert.update({key: val})
+                new_job_list.append(to_insert)
+        job_list = new_job_list
+
+    return job_list
+
+def test_produit_cartesien_jobs():
+    vals = {'a': [1,2], 'b': [3,4,5]}
+    print produit_cartesien_jobs(vals)
+
+
+# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
+"""Simple module for getting amount of memory used by a specified user's
+processes on a UNIX system.
+It uses UNIX ps utility to get the memory usage for a specified username and
+pipe it to awk for summing up per application memory usage and return the total.
+Python's Popen() from subprocess module is used for spawning ps and awk.
+
+"""
+
+import subprocess
+
+class MemoryMonitor(object):
+
+    def __init__(self, username):
+        """Create new MemoryMonitor instance."""
+        self.username = username
+
+    def usage(self):
+        """Return int containing memory used by user's processes."""
+        self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
+                                        shell=True,
+                                        stdout=subprocess.PIPE,
+                                        )
+        self.stdout_list = self.process.communicate()[0].split('\n')
+        return int(self.stdout_list[0])
+
diff -r 01445a75c702 -r 0b7e64e8e93f scripts/launch_generate100.py
--- a/scripts/launch_generate100.py	Sun Apr 25 17:10:09 2010 -0400
+++ b/scripts/launch_generate100.py	Sun Apr 25 17:12:03 2010 -0400
@@ -1,9 +1,10 @@
 #!/usr/bin/env python
 
 import os
-dir1 = "/data/lisa/data/ift6266h10/"
+dir1 = "/data/lisa8/data/ift6266h10/"
+dir2="/data/lisa/data/ift6266h10/"
 
-mach = ["maggie16.iro.umontreal.ca,zappa8@iro.umontreal.ca"]
+mach = "maggie16.iro.umontreal.ca,zappa8.iro.umontreal.ca,maggie15.iro.umontreal.ca,brams04.iro.umontreal.ca"
 
 #test and valid sets
 for i,s in enumerate(['valid','test']):
@@ -16,11 +17,18 @@
     os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i))
 
 #PNIST07
-for i in range(100):
-    os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/PNIST07_train%d_data.ft -p %sdata/PNIST07_train%d_params -x %sdata/PNIST07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d -t %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i,1))
+for i in xrange(10,100):
+   os.system("dbidispatch --condor --mem=3900 --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/PNIST07_train%d_data.ft -p %sdata/PNIST07_train%d_params -x %sdata/PNIST07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d -t 1" % (mach, dir1, i, dir1, i, dir1, i, dir2, dir2, dir2, dir2, 100+i))
 
 
 
-#P07
-#for i in [90,94]:#[2,10,13,15,20,49,68,82,86,90,94]:
-   #os.system("dbidispatch --condor --mem=3900 --os=fc4,fc7,fc9 --machine=maggie16.iro.umontreal.ca --machine=maggie15.iro.umontreal.ca --machine=zappa8@iro.umontreal.ca ./run_pipeline.sh -o %sdata2/P07_train%d_data.ft -p %sdata2/P07_train%d_params -x %sdata2/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1,100+i))
+#PNIST_full_noise
+for i in range(100):
+   os.system("dbidispatch --condor --mem=3900 --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/Pin07_train%d_data.ft -p %sdata/Pin07_train%d_params -x %sdata/Pin07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir2, dir2, dir2, dir2,100+i))
+
+
+
+#P07_safe
+for i in xrange(89,100,1):
+   os.system("dbidispatch --condor --mem=3900 --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P07safe_train%d_data.ft -p %sdata/P07safe_train%d_params -x %sdata/P07safe_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.0 -b 0.0 -g 0.0 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir2, dir2, dir2,dir2,100+i))
+
diff -r 01445a75c702 -r 0b7e64e8e93f scripts/setup_batches.py
--- a/scripts/setup_batches.py	Sun Apr 25 17:10:09 2010 -0400
+++ b/scripts/setup_batches.py	Sun Apr 25 17:12:03 2010 -0400
@@ -15,8 +15,14 @@
 
     lower_train_data = 'lower/lower_train_data.ft'
     lower_train_labels = 'lower/lower_train_labels.ft'
+    lower_test_data = 'lower/lower_test_data.ft'
+    lower_test_labels = 'lower/lower_test_labels.ft'
+
     upper_train_data = 'upper/upper_train_data.ft'
     upper_train_labels = 'upper/upper_train_labels.ft'
+    upper_test_data = 'upper/upper_test_data.ft'
+    upper_test_labels = 'upper/upper_test_labels.ft'
+
     test_data = 'all/all_test_data.ft'
     test_labels = 'all/all_test_labels.ft'
 
@@ -29,11 +35,16 @@
 
     f_lower_train_data = open(data_path + lower_train_data)
     f_lower_train_labels = open(data_path + lower_train_labels)
+    f_lower_test_data = open(data_path + lower_test_data)
+    f_lower_test_labels = open(data_path + lower_test_labels)
+
     f_upper_train_data = open(data_path + upper_train_data)
     f_upper_train_labels = open(data_path + upper_train_labels)
+    f_upper_test_data = open(data_path + upper_test_data)
+    f_upper_test_labels = open(data_path + upper_test_labels)
 
-    f_test_data = open(data_path + test_data)
-    f_test_labels = open(data_path + test_labels)
+    #f_test_data = open(data_path + test_data)
+    #f_test_labels = open(data_path + test_labels)
 
     self.raw_digits_train_data = ft.read(f_digits_train_data)
     self.raw_digits_train_labels = ft.read(f_digits_train_labels)
@@ -42,11 +53,16 @@
 
     self.raw_lower_train_data = ft.read(f_lower_train_data)
     self.raw_lower_train_labels = ft.read(f_lower_train_labels)
+    self.raw_lower_test_data = ft.read(f_lower_test_data)
+    self.raw_lower_test_labels = ft.read(f_lower_test_labels)
+
     self.raw_upper_train_data = ft.read(f_upper_train_data)
     self.raw_upper_train_labels = ft.read(f_upper_train_labels)
+    self.raw_upper_test_data = ft.read(f_upper_test_data)
+    self.raw_upper_test_labels = ft.read(f_upper_test_labels)
 
-    self.raw_test_data = ft.read(f_test_data)
-    self.raw_test_labels = ft.read(f_test_labels)
+    #self.raw_test_data = ft.read(f_test_data)
+    #self.raw_test_labels = ft.read(f_test_labels)
 
     f_digits_train_data.close()
     f_digits_train_labels.close()
@@ -55,41 +71,73 @@
 
     f_lower_train_data.close()
     f_lower_train_labels.close()
+    f_lower_test_data.close()
+    f_lower_test_labels.close()
+
     f_upper_train_data.close()
     f_upper_train_labels.close()
+    f_upper_test_data.close()
+    f_upper_test_labels.close()
 
-    f_test_data.close()
-    f_test_labels.close()
+    #f_test_data.close()
+    #f_test_labels.close()
 
     print 'Data opened'
 
-  def set_batches(self, start_ratio = -1, end_ratio = -1, batch_size = 20, verbose = False):
+  def set_batches(self, main_class = "d", start_ratio = -1, end_ratio = -1, batch_size = 20, verbose = False):
     self.batch_size = batch_size
 
     digits_train_size = len(self.raw_digits_train_labels)
     digits_test_size = len(self.raw_digits_test_labels)
 
     lower_train_size = len(self.raw_lower_train_labels)
+
     upper_train_size = len(self.raw_upper_train_labels)
+    upper_test_size = len(self.raw_upper_test_labels)
 
     if verbose == True:
       print 'digits_train_size = %d' %digits_train_size
       print 'digits_test_size = %d' %digits_test_size
       print 'lower_train_size = %d' %lower_train_size
       print 'upper_train_size = %d' %upper_train_size
+      print 'upper_test_size = %d' %upper_test_size
 
-    # define main and other datasets
-    raw_main_train_data = self.raw_digits_train_data
-    raw_other_train_data1 = self.raw_lower_train_labels
-    raw_other_train_data2 = self.raw_upper_train_labels
-    raw_test_data = self.raw_digits_test_data
-    #raw_test_data = self.raw_test_data
+    if main_class == "u":
+	# define main and other datasets
+	raw_main_train_data = self.raw_upper_train_data
+	raw_other_train_data1 = self.raw_lower_train_labels
+	raw_other_train_data2 = self.raw_digits_train_labels
+	raw_test_data = self.raw_upper_test_data
+
+	raw_main_train_labels = self.raw_upper_train_labels
+	raw_other_train_labels1 = self.raw_lower_train_labels
+	raw_other_train_labels2 = self.raw_digits_train_labels
+	raw_test_labels = self.raw_upper_test_labels
 
-    raw_main_train_labels = self.raw_digits_train_labels
-    raw_other_train_labels1 = self.raw_lower_train_labels
-    raw_other_train_labels2 = self.raw_upper_train_labels
-    raw_test_labels = self.raw_digits_test_labels
-    #raw_test_labels = self.raw_test_labels
+    elif main_class == "l":
+	# define main and other datasets
+	raw_main_train_data = self.raw_lower_train_data
+	raw_other_train_data1 = self.raw_upper_train_labels
+	raw_other_train_data2 = self.raw_digits_train_labels
+	raw_test_data = self.raw_lower_test_data
+
+	raw_main_train_labels = self.raw_lower_train_labels
+	raw_other_train_labels1 = self.raw_upper_train_labels
+	raw_other_train_labels2 = self.raw_digits_train_labels
+	raw_test_labels = self.raw_lower_test_labels
+
+    else:
+	main_class = "d"
+	# define main and other datasets
+	raw_main_train_data = self.raw_digits_train_data
+	raw_other_train_data1 = self.raw_lower_train_labels
+	raw_other_train_data2 = self.raw_upper_train_labels
+	raw_test_data = self.raw_digits_test_data
+
+	raw_main_train_labels = self.raw_digits_train_labels
+	raw_other_train_labels1 = self.raw_lower_train_labels
+	raw_other_train_labels2 = self.raw_upper_train_labels
+	raw_test_labels = self.raw_digits_test_labels
 
     main_train_size = len(raw_main_train_labels)
     other_train_size1 = len(raw_other_train_labels1)
@@ -113,6 +161,7 @@
       self.end_ratio = end_ratio
 
     if verbose == True:
+      print 'main class : %s' %main_class
       print 'start_ratio = %f' %self.start_ratio
       print 'end_ratio = %f' %self.end_ratio