# HG changeset patch
# User Xavier Glorot <glorotxa@iro.umontreal.ca>
# Date 1268756050 14400
# Node ID 2024368a8d3d0ab3e7b359b0a3bd7ed74e40ebee
# Parent  0de14b2034c60f89d0635b4b803072aba30db291# Parent  3c54cb3713ef1b1ab708d2dcb28f86ade57949e3
merge

diff -r 0de14b2034c6 -r 2024368a8d3d baseline/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d baseline/conv_mlp/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d baseline/conv_mlp/convolutional_mlp.py
--- a/baseline/conv_mlp/convolutional_mlp.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/baseline/conv_mlp/convolutional_mlp.py	Tue Mar 16 12:14:10 2010 -0400
@@ -26,7 +26,8 @@
 import theano.sandbox.softsign
 import pylearn.datasets.MNIST
 from pylearn.io import filetensor as ft
-from theano.sandbox import conv, downsample
+from theano.tensor.signal import downsample
+from theano.tensor.nnet import conv
 
 class LeNetConvPoolLayer(object):
 
diff -r 0de14b2034c6 -r 2024368a8d3d baseline/deep_mlp/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d baseline/log_reg/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d baseline/log_reg/log_reg.py
--- a/baseline/log_reg/log_reg.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/baseline/log_reg/log_reg.py	Tue Mar 16 12:14:10 2010 -0400
@@ -35,11 +35,11 @@
 """
 __docformat__ = 'restructedtext en'
 
-import numpy, time, cPickle, gzip
+import numpy, time
 
 import theano
 import theano.tensor as T
-
+from ift6266 import datasets
 
 class LogisticRegression(object):
     """Multi-class Logistic Regression Class
@@ -112,6 +112,8 @@
         # i.e., the mean log-likelihood across the minibatch.
         return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] )
 
+    def MSE(self, y):
+        return -T.mean(abs((self.p_t_given_x)[T.arange(y.shape[0]), y]-y)**2)
 
     def errors( self, y ):
         """Return a float representing the number of errors in the minibatch 
@@ -135,109 +137,15 @@
         else:
             raise NotImplementedError()
         
-def shared_dataset( data_xy ):
-        """ Function that loads the dataset into shared variables
-        
-        The reason we store our dataset in shared variables is to allow 
-        Theano to copy it into the GPU memory (when code is run on GPU). 
-        Since copying data into the GPU is slow, copying a minibatch everytime
-        is needed (the default behaviour if the data is not in a shared 
-        variable) would lead to a large decrease in performance.
-        """
-        data_x, data_y = data_xy
-        shared_x = theano.shared( numpy.asarray( data_x, dtype = theano.config.floatX ) )
-        shared_y = theano.shared( numpy.asarray( data_y, dtype = theano.config.floatX ) )
-        # When storing data on the GPU it has to be stored as floats
-        # therefore we will store the labels as ``floatX`` as well
-        # (``shared_y`` does exactly that). But during our computations
-        # we need them as ints (we use labels as index, and if they are 
-        # floats it doesn't make sense) therefore instead of returning 
-        # ``shared_y`` we will have to cast it to int. This little hack
-        # lets ous get around this issue
-        return shared_x, T.cast( shared_y, 'int32' )
-
-def load_data_pkl_gz( dataset ):
-    ''' Loads the dataset
-
-    :type dataset: string
-    :param dataset: the path to the dataset (here MNIST)
-    '''
-
-    #--------------------------------------------------------------------------------------------------------------------
-    # Load Data
-    #--------------------------------------------------------------------------------------------------------------------
-
-
-    print '... loading data'
-
-    # Load the dataset 
-    f = gzip.open(dataset,'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    f.close()
-    
-    test_set_x,  test_set_y  = shared_dataset( test_set )
-    valid_set_x, valid_set_y = shared_dataset( valid_set )
-    train_set_x, train_set_y = shared_dataset( train_set )
-
-    rval = [ ( train_set_x, train_set_y ), ( valid_set_x,valid_set_y ), ( test_set_x, test_set_y ) ]
-    return rval
-
-##def load_data_ft(      verbose = False,\
-##                                    data_path = '/data/lisa/data/nist/by_class/'\
-##                                    train_data = 'all/all_train_data.ft',\
-##                                    train_labels = 'all/all_train_labels.ft',\
-##                                    test_data = 'all/all_test_data.ft',\
-##                                    test_labels = 'all/all_test_labels.ft'):
-##   
-##    train_data_file = open(data_path + train_data)
-##    train_labels_file = open(data_path + train_labels)
-##    test_labels_file = open(data_path + test_data)
-##    test_data_file = open(data_path + test_labels)
-##    
-##    raw_train_data = ft.read( train_data_file)
-##    raw_train_labels = ft.read(train_labels_file)
-##    raw_test_data = ft.read( test_labels_file)
-##    raw_test_labels = ft.read( test_data_file)
-##    
-##    f.close()
-##    g.close()
-##    i.close()
-##    h.close()
-##    
-##    
-##    test_set_x,  test_set_y  = shared_dataset(test_set)
-##    valid_set_x, valid_set_y = shared_dataset(valid_set)
-##    train_set_x, train_set_y = shared_dataset(train_set)
-##
-##    rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
-##    return rval
-##    #create a validation set the same size as the test size
-##    #use the end of the training array for this purpose
-##    #discard the last remaining so we get a %batch_size number
-##    test_size=len(raw_test_labels)
-##    test_size = int(test_size/batch_size)
-##    test_size*=batch_size
-##    train_size = len(raw_train_data)
-##    train_size = int(train_size/batch_size)
-##    train_size*=batch_size
-##    validation_size =test_size 
-##    offset = train_size-test_size
-##    if verbose == True:
-##        print 'train size = %d' %train_size
-##        print 'test size = %d' %test_size
-##        print 'valid size = %d' %validation_size
-##        print 'offset = %d' %offset
-##    
-##    
-
 #--------------------------------------------------------------------------------------------------------------------
 # MAIN
 #--------------------------------------------------------------------------------------------------------------------
 
 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \
-                    dataset_name = 'mnist.pkl.gz', image_size = 28 * 28, nb_class = 10,  \
+                    dataset=datasets.nist_digits, image_size = 32 * 32, nb_class = 10,  \
                     patience = 5000, patience_increase = 2, improvement_threshold = 0.995):
     
+    #28 * 28 = 784
     """
     Demonstrate stochastic gradient descent optimization of a log-linear 
     model
@@ -254,9 +162,8 @@
     :type batch_size: int  
     :param batch_size:  size of the minibatch
 
-    :type dataset_name: string
-    :param dataset: the path of the MNIST dataset file from 
-                         http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
+    :type dataset: dataset
+    :param dataset: a dataset instance from ift6266.datasets
                         
     :type image_size: int
     :param image_size: size of the input image in pixels (width * height)
@@ -275,17 +182,6 @@
 
 
     """
-    datasets = load_data_pkl_gz( dataset_name )
-
-    train_set_x, train_set_y = datasets[0]
-    valid_set_x, valid_set_y = datasets[1]
-    test_set_x , test_set_y   = datasets[2]
-
-    # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.value.shape[0] / batch_size
-    n_valid_batches = valid_set_x.value.shape[0] / batch_size
-    n_test_batches  = test_set_x.value.shape[0]  / batch_size
-
     #--------------------------------------------------------------------------------------------------------------------
     # Build actual model
     #--------------------------------------------------------------------------------------------------------------------
@@ -308,17 +204,11 @@
 
     # compiling a Theano function that computes the mistakes that are made by 
     # the model on a minibatch
-    test_model = theano.function( inputs = [ index ], 
-            outputs = classifier.errors( y ),
-            givens = {
-                x:test_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
-                y:test_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
+    test_model = theano.function( inputs = [ x, y ], 
+            outputs = classifier.errors( y ))
 
-    validate_model = theano.function( inputs = [ index ], 
-            outputs = classifier.errors( y ),
-            givens = {
-                x:valid_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
-                y:valid_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
+    validate_model = theano.function( inputs = [ x, y ], 
+            outputs = classifier.errors( y ))
 
     # compute the gradient of cost with respect to theta = ( W, b ) 
     g_W = T.grad( cost = cost, wrt = classifier.W )
@@ -331,12 +221,9 @@
     # compiling a Theano function `train_model` that returns the cost, but in 
     # the same time updates the parameter of the model based on the rules 
     # defined in `updates`
-    train_model = theano.function( inputs = [ index ], 
+    train_model = theano.function( inputs = [ x, y ], 
             outputs = cost, 
-            updates = updates,
-            givens = {
-                x: train_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
-                y: train_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
+            updates = updates)
 
     #--------------------------------------------------------------------------------------------------------------------
     # Train model
@@ -349,38 +236,38 @@
                                   # found
     improvement_threshold = 0.995 # a relative improvement of this much is 
                                   # considered significant
-    validation_frequency  = min( n_train_batches, patience * 0.5 )  
+    validation_frequency  = patience * 0.5
                                   # go through this many 
                                   # minibatche before checking the network 
                                   # on the validation set; in this case we 
                                   # check every epoch 
 
-    best_params             = None
+    best_params          = None
     best_validation_loss = float('inf')
-    test_score                 = 0.
-    start_time                  = time.clock()
+    test_score           = 0.
+    start_time           = time.clock()
 
     done_looping = False 
-    n_epochs       = nb_max_examples / train_set_x.value.shape[0]
-    epoch             = 0  
+    n_iters      = nb_max_examples / batch_size
+    epoch        = 0
+    iter        = 0
     
-    while ( epoch < n_epochs ) and ( not done_looping ):
+    while ( iter < n_iters ) and ( not done_looping ):
         
       epoch = epoch + 1
-      for minibatch_index in xrange( n_train_batches ):
+      for x, y in dataset.train(batch_size):
 
-        minibatch_avg_cost = train_model( minibatch_index )
+        minibatch_avg_cost = train_model( x, y )
         # iteration number
-        iter = epoch * n_train_batches + minibatch_index
+        iter += 1
 
-        if ( iter + 1 ) % validation_frequency == 0: 
+        if iter % validation_frequency == 0: 
             # compute zero-one loss on validation set 
-            validation_losses     = [ validate_model( i ) for i in xrange( n_valid_batches ) ]
+            validation_losses     = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ]
             this_validation_loss = numpy.mean( validation_losses )
 
-            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
-                 ( epoch, minibatch_index + 1,n_train_batches, \
-                  this_validation_loss*100. ) )
+            print('epoch %i, iter %i, validation error %f %%' % \
+                 ( epoch, iter, this_validation_loss*100. ) )
 
 
             # if we got the best validation score until now
@@ -393,12 +280,12 @@
                 best_validation_loss = this_validation_loss
                 # test it on the test set
 
-                test_losses = [test_model(i) for i in xrange(n_test_batches)]
+                test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
                 test_score  = numpy.mean(test_losses)
 
-                print(('     epoch %i, minibatch %i/%i, test error of best ' 
+                print(('     epoch %i, iter %i, test error of best ' 
                        'model %f %%') % \
-                  (epoch, minibatch_index+1, n_train_batches,test_score*100.))
+                  (epoch, iter, test_score*100.))
 
         if patience <= iter :
                 done_looping = True
@@ -410,20 +297,25 @@
                  ( best_validation_loss * 100., test_score * 100.))
     print ('The code ran for %f minutes' % ((end_time-start_time) / 60.))
     
- ######   return validation_error, test_error, nb_exemples, time
+    return best_validation_loss, test_score, iter*batch_size, (end_time-start_time) / 60.
 
 if __name__ == '__main__':
     log_reg()
     
  
 def jobman_log_reg(state, channel):
-    (validation_error, test_error, nb_exemples, time) = log_reg( learning_rate = state.learning_rate,\
-                                                                                        nb_max_examples = state.nb_max_examples,\
-                                                                                                    batch_size  = state.batch_size,\
-                                                                                                dataset_name = state.dataset_name, \
+    print state
+    (validation_error, test_error, nb_exemples, time) = log_reg( learning_rate = state.learning_rate, \
+                                                                                        nb_max_examples = state.nb_max_examples, \
+                                                                                                   batch_size  = state.batch_size,\
                                                                                                     image_size = state.image_size,  \
-                                                                                                       nb_class  = state.nb_class )
-
+                                                                                                      nb_class  = state.nb_class, \
+                                                                                                   patience = state.patience, \
+                                                                                                    patience_increase = state.patience_increase, \
+                                                                                                    improvement_threshold = state.improvement_threshold ) 
+                                                                                                    
+                                                                                                   
+    print state
     state.validation_error = validation_error
     state.test_error = test_error
     state.nb_exemples = nb_exemples
diff -r 0de14b2034c6 -r 2024368a8d3d baseline/mlp/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d baseline/mlp/mlp_get_error_from_model.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/mlp/mlp_get_error_from_model.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,151 @@
+__docformat__ = 'restructedtext en'
+
+import pdb
+import numpy as np
+import pylab
+import time 
+import pylearn
+from pylearn.io import filetensor as ft
+
+data_path = '/data/lisa/data/nist/by_class/'
+test_data = 'all/all_train_data.ft'
+test_labels = 'all/all_train_labels.ft'
+
+def read_test_data(mlp_model):
+    
+    
+    #read the data
+    h = open(data_path+test_data)
+    i= open(data_path+test_labels)
+    raw_test_data = ft.read(h)
+    raw_test_labels = ft.read(i)
+    i.close()
+    h.close()
+    
+    #read the model chosen
+    a=np.load(mlp_model)
+    W1=a['W1']
+    W2=a['W2']
+    b1=a['b1']
+    b2=a['b2']
+    
+    return (W1,b1,W2,b2,raw_test_data,raw_test_labels)
+    
+    
+    
+
+def get_total_test_error(everything):
+    
+    W1=everything[0]
+    b1=everything[1]
+    W2=everything[2]
+    b2=everything[3]
+    test_data=everything[4]
+    test_labels=everything[5]
+    total_error_count=0
+    total_exemple_count=0
+    
+    nb_error_count=0
+    nb_exemple_count=0
+    
+    char_error_count=0
+    char_exemple_count=0
+    
+    min_error_count=0
+    min_exemple_count=0
+    
+    maj_error_count=0
+    maj_exemple_count=0
+    
+    for i in range(test_labels.size):
+        total_exemple_count = total_exemple_count +1
+        #get activation for layer 1
+        a0=np.dot(np.transpose(W1),np.transpose(test_data[i]/255.0)) + b1
+        #add non linear function to layer 1 activation
+        a0_out=np.tanh(a0)
+        
+        #get activation for output layer
+        a1= np.dot(np.transpose(W2),a0_out) + b2
+        #add non linear function for output activation (softmax)
+        a1_exp = np.exp(a1)
+        sum_a1=np.sum(a1_exp)
+        a1_out=a1_exp/sum_a1
+        
+        predicted_class=np.argmax(a1_out)
+        wanted_class=test_labels[i]
+        
+        if(predicted_class!=wanted_class):
+            total_error_count = total_error_count +1
+            
+        #get grouped based error
+	#with a priori
+#        if(wanted_class>9 and wanted_class<35):
+#            min_exemple_count=min_exemple_count+1
+#            predicted_class=np.argmax(a1_out[10:35])+10
+#            if(predicted_class!=wanted_class):
+#		min_error_count=min_error_count+1
+#        if(wanted_class<10):
+#           nb_exemple_count=nb_exemple_count+1
+#            predicted_class=np.argmax(a1_out[0:10])
+#            if(predicted_class!=wanted_class):
+#                nb_error_count=nb_error_count+1
+#        if(wanted_class>34):
+#            maj_exemple_count=maj_exemple_count+1
+#            predicted_class=np.argmax(a1_out[35:])+35
+#            if(predicted_class!=wanted_class):
+#                maj_error_count=maj_error_count+1
+#                
+#        if(wanted_class>9):
+#            char_exemple_count=char_exemple_count+1
+#            predicted_class=np.argmax(a1_out[10:])+10
+#            if(predicted_class!=wanted_class):
+#                char_error_count=char_error_count+1
+		
+		
+		
+	#get grouped based error
+	#with no a priori
+        if(wanted_class>9 and wanted_class<35):
+            min_exemple_count=min_exemple_count+1
+            predicted_class=np.argmax(a1_out)
+            if(predicted_class!=wanted_class):
+		min_error_count=min_error_count+1
+        if(wanted_class<10):
+            nb_exemple_count=nb_exemple_count+1
+            predicted_class=np.argmax(a1_out)
+            if(predicted_class!=wanted_class):
+                nb_error_count=nb_error_count+1
+        if(wanted_class>34):
+            maj_exemple_count=maj_exemple_count+1
+            predicted_class=np.argmax(a1_out)
+            if(predicted_class!=wanted_class):
+                maj_error_count=maj_error_count+1
+                
+        if(wanted_class>9):
+            char_exemple_count=char_exemple_count+1
+            predicted_class=np.argmax(a1_out)
+            if(predicted_class!=wanted_class):
+                char_error_count=char_error_count+1
+    
+    
+    #convert to float 
+    return ( total_exemple_count,nb_exemple_count,char_exemple_count,min_exemple_count,maj_exemple_count,\
+            total_error_count,nb_error_count,char_error_count,min_error_count,maj_error_count,\
+            total_error_count*100.0/total_exemple_count*1.0,\
+            nb_error_count*100.0/nb_exemple_count*1.0,\
+            char_error_count*100.0/char_exemple_count*1.0,\
+            min_error_count*100.0/min_exemple_count*1.0,\
+            maj_error_count*100.0/maj_exemple_count*1.0)
+            
+            
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+ 
\ No newline at end of file
diff -r 0de14b2034c6 -r 2024368a8d3d baseline/mlp/mlp_nist.py
--- a/baseline/mlp/mlp_nist.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/baseline/mlp/mlp_nist.py	Tue Mar 16 12:14:10 2010 -0400
@@ -31,6 +31,7 @@
 import time 
 import theano.tensor.nnet
 import pylearn
+import theano,pylearn.version
 from pylearn.io import filetensor as ft
 
 data_path = '/data/lisa/data/nist/by_class/'
@@ -174,17 +175,22 @@
                         nb_max_exemples=1000000,\
                         batch_size=20,\
                         nb_hidden = 500,\
-                        nb_targets = 62):
+                        nb_targets = 62,
+			tau=1e6):
    
     
     configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
     
+    #save initial learning rate if classical adaptive lr is used
+    initial_lr=learning_rate
+    
     total_validation_error_list = []
     total_train_error_list = []
     learning_rate_list=[]
     best_training_error=float('inf');
     
     
+    
    
     f = open(data_path+train_data)
     g= open(data_path+train_labels)
@@ -315,6 +321,8 @@
     n_iter = nb_max_exemples/batch_size  # nb of max times we are allowed to run through all exemples
     n_iter = n_iter/n_minibatches + 1 #round up
     n_iter=max(1,n_iter) # run at least once on short debug call
+    time_n=0 #in unit of exemples
+    
     
    
     if verbose == True:
@@ -325,6 +333,9 @@
         epoch           = iter / n_minibatches
         minibatch_index =  iter % n_minibatches
         
+	
+	if adaptive_lr==2:
+	    classifier.lr.value = tau*initial_lr/(tau+time_n)
       
         
         # get the minibatches corresponding to `iter` modulo
@@ -364,6 +375,8 @@
                 print('epoch %i, minibatch %i/%i, validation error %f, training error %f %%' % \
                     (epoch, minibatch_index+1, n_minibatches, \
                         this_validation_loss*100.,this_train_loss*100))
+		print 'learning rate = %f' %classifier.lr.value
+		print 'time  = %i' %time_n
                         
                         
             #save the learning rate
@@ -425,6 +438,7 @@
             break
 
 
+    	time_n= time_n + batch_size
     end_time = time.clock()
     if verbose == True:
         print(('Optimization complete. Best validation score of %f %% '
@@ -448,7 +462,8 @@
     (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\
                                                                 nb_max_exemples=state.nb_max_exemples,\
                                                                 nb_hidden=state.nb_hidden,\
-                                                                adaptive_lr=state.adaptive_lr)
+                                                                adaptive_lr=state.adaptive_lr,\
+								tau=state.tau)
     state.train_error=train_error
     state.validation_error=validation_error
     state.test_error=test_error
diff -r 0de14b2034c6 -r 2024368a8d3d datasets/defs.py
--- a/datasets/defs.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/datasets/defs.py	Tue Mar 16 12:14:10 2010 -0400
@@ -1,38 +1,54 @@
-__all__ = ['nist_digits', 'nist_lower', 'nist_upper', 'nist_all', 'ocr']
+__all__ = ['nist_digits', 'nist_lower', 'nist_upper', 'nist_all', 'ocr', 
+           'nist_P07', 'mnist']
 
 from ftfile import FTDataSet
+from gzpklfile import GzpklDataSet
 import theano
-
-NIST_PATH = '/data/lisa/data/nist/by_class/'
-DATA_PATH = '/data/lisa/data/ift6266h10/'
+import os
 
-nist_digits = FTDataSet(train_data = [NIST_PATH+'digits/digits_train_data.ft'],
-                        train_lbl = [NIST_PATH+'digits/digits_train_labels.ft'],
-                        test_data = [NIST_PATH+'digits/digits_test_data.ft'],
-                        test_lbl = [NIST_PATH+'digits/digits_test_labels.ft'],
+# if the environmental variables exist, get the path from them, 
+# otherwise fall back on the default
+NIST_PATH = os.getenv('NIST_PATH','/data/lisa/data/nist/by_class/')
+DATA_PATH = os.getenv('DATA_PATH','/data/lisa/data/ift6266h10/')
+
+nist_digits = FTDataSet(train_data = [os.path.join(NIST_PATH,'digits/digits_train_data.ft')],
+                        train_lbl = [os.path.join(NIST_PATH,'digits/digits_train_labels.ft')],
+                        test_data = [os.path.join(NIST_PATH,'digits/digits_test_data.ft')],
+                        test_lbl = [os.path.join(NIST_PATH,'digits/digits_test_labels.ft')],
                         indtype=theano.config.floatX, inscale=255.)
-nist_lower = FTDataSet(train_data = [NIST_PATH+'lower/lower_train_data.ft'],
-                        train_lbl = [NIST_PATH+'lower/lower_train_labels.ft'],
-                        test_data = [NIST_PATH+'lower/lower_test_data.ft'],
-                        test_lbl = [NIST_PATH+'lower/lower_test_labels.ft'],
+nist_lower = FTDataSet(train_data = [os.path.join(NIST_PATH,'lower/lower_train_data.ft')],
+                        train_lbl = [os.path.join(NIST_PATH,'lower/lower_train_labels.ft')],
+                        test_data = [os.path.join(NIST_PATH,'lower/lower_test_data.ft')],
+                        test_lbl = [os.path.join(NIST_PATH,'lower/lower_test_labels.ft')],
                         indtype=theano.config.floatX, inscale=255.)
-nist_upper = FTDataSet(train_data = [NIST_PATH+'upper/upper_train_data.ft'],
-                        train_lbl = [NIST_PATH+'upper/upper_train_labels.ft'],
-                        test_data = [NIST_PATH+'upper/upper_test_data.ft'],
-                        test_lbl = [NIST_PATH+'upper/upper_test_labels.ft'],
+nist_upper = FTDataSet(train_data = [os.path.join(NIST_PATH,'upper/upper_train_data.ft')],
+                        train_lbl = [os.path.join(NIST_PATH,'upper/upper_train_labels.ft')],
+                        test_data = [os.path.join(NIST_PATH,'upper/upper_test_data.ft')],
+                        test_lbl = [os.path.join(NIST_PATH,'upper/upper_test_labels.ft')],
                         indtype=theano.config.floatX, inscale=255.)
 
-nist_all = FTDataSet(train_data = [DATA_PATH+'train_data.ft'],
-                     train_lbl = [DATA_PATH+'train_labels.ft'],
-                     test_data = [DATA_PATH+'test_data.ft'],
-                     test_lbl = [DATA_PATH+'test_labels.ft'],
-                     valid_data = [DATA_PATH+'valid_data.ft'],
-                     valid_lbl = [DATA_PATH+'valid_labels.ft'],
+nist_all = FTDataSet(train_data = [os.path.join(DATA_PATH,'train_data.ft')],
+                     train_lbl = [os.path.join(DATA_PATH,'train_labels.ft')],
+                     test_data = [os.path.join(DATA_PATH,'test_data.ft')],
+                     test_lbl = [os.path.join(DATA_PATH,'test_labels.ft')],
+                     valid_data = [os.path.join(DATA_PATH,'valid_data.ft')],
+                     valid_lbl = [os.path.join(DATA_PATH,'valid_labels.ft')],
                      indtype=theano.config.floatX, inscale=255.)
 
-ocr = FTDataSet(train_data = [DATA_PATH+'ocr_train_data.ft'],
-                train_lbl = [DATA_PATH+'ocr_train_labels.ft'],
-                test_data = [DATA_PATH+'ocr_test_data.ft'],
-                test_lbl = [DATA_PATH+'ocr_test_labels.ft'],
-                valid_data = [DATA_PATH+'ocr_valid_data.ft'],
-                valid_lbl = [DATA_PATH+'ocr_valid_labels.ft'])
+ocr = FTDataSet(train_data = [os.path.join(DATA_PATH,'ocr_train_data.ft')],
+                train_lbl = [os.path.join(DATA_PATH,'ocr_train_labels.ft')],
+                test_data = [os.path.join(DATA_PATH,'ocr_test_data.ft')],
+                test_lbl = [os.path.join(DATA_PATH,'ocr_test_labels.ft')],
+                valid_data = [os.path.join(DATA_PATH,'ocr_valid_data.ft')],
+                valid_lbl = [os.path.join(DATA_PATH,'ocr_valid_labels.ft')],
+                indtype=theano.config.floatX, inscale=255.)
+
+nist_P07 = FTDataSet(train_data = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_data.ft') for i in range(100)],
+                     train_lbl = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_labels.ft') for i in range(100)],
+                     test_data = [os.path.join(DATA_PATH,'data/P07_test_data.ft')],
+                     test_lbl = [os.path.join(DATA_PATH,'data/P07_test_labels.ft')],
+                     valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')],
+                     valid_lbl = [os.path.join(DATA_PATH,'data/P07_valid_labels.ft')],
+                     indtype=theano.config.floatX, inscale=255.)
+
+mnist = GzpklDataSet(os.path.join(DATA_PATH,'mnist.pkl.gz'))
diff -r 0de14b2034c6 -r 2024368a8d3d datasets/ftfile.py
--- a/datasets/ftfile.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/datasets/ftfile.py	Tue Mar 16 12:14:10 2010 -0400
@@ -193,12 +193,19 @@
         if valid_data is None:
             total_valid_size = sum(FTFile(td).size for td in test_data)
             valid_size = total_valid_size/len(train_data)
-            self._train = FTData(train_data, train_lbl, size=-valid_size)
-            self._valid = FTData(train_data, train_lbl, skip=-valid_size)
+            self._train = FTData(train_data, train_lbl, size=-valid_size,
+                    inscale=inscale, outscale=outscale, indtype=indtype,
+                    outdtype=outdtype)
+            self._valid = FTData(train_data, train_lbl, skip=-valid_size,
+                    inscale=inscale, outscale=outscale, indtype=indtype, 
+                    outdtype=outdtype)
         else:
-            self._train = FTData(train_data, train_lbl)
-            self._valid = FTData(valid_data, valid_lbl)
-        self._test = FTData(test_data, test_lbl)
+            self._train = FTData(train_data, train_lbl,inscale=inscale,
+                    outscale=outscale, indtype=indtype, outdtype=outdtype)
+            self._valid = FTData(valid_data, valid_lbl,inscale=inscale,
+                    outscale=outscale, indtype=indtype, outdtype=outdtype)
+        self._test = FTData(test_data, test_lbl,inscale=inscale,
+                outscale=outscale, indtype=indtype, outdtype=outdtype)
 
     def _return_it(self, batchsize, bufsize, ftdata):
         return izip(DataIterator(ftdata.open_inputs(), batchsize, bufsize),
diff -r 0de14b2034c6 -r 2024368a8d3d datasets/gzpklfile.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datasets/gzpklfile.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,39 @@
+import gzip
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+from dataset import DataSet
+from dsetiter import DataIterator
+from itertools import izip
+
+class ArrayFile(object):
+    def __init__(self, ary):
+        self.ary = ary
+        self.pos = 0
+
+    def read(self, num):
+        res = self.ary[self.pos:self.pos+num]
+        self.pos += num
+        return res
+
+class GzpklDataSet(DataSet):
+    def __init__(self, fname):
+        self._fname = fname
+        self._train = 0
+        self._valid = 1
+        self._test = 2
+
+    def _load(self):
+        f = gzip.open(self._fname, 'rb')
+        try:
+            self.datas = pickle.load(f)
+        finally:
+            f.close()
+
+    def _return_it(self, batchsz, bufsz, id):
+        if not hasattr(self, 'datas'):
+            self._load()
+        return izip(DataIterator([ArrayFile(self.datas[id][0])], batchsz, bufsz),
+                    DataIterator([ArrayFile(self.datas[id][1])], batchsz, bufsz))
diff -r 0de14b2034c6 -r 2024368a8d3d deep/autoencoder/DA_training.py
--- a/deep/autoencoder/DA_training.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/deep/autoencoder/DA_training.py	Tue Mar 16 12:14:10 2010 -0400
@@ -93,7 +93,12 @@
         theano_rng = RandomStreams()
         # create a numpy random generator
         numpy_rng = numpy.random.RandomState()
-        
+		
+        # print the parameter of the DA
+        if True :
+            print 'input size = %d' %n_visible
+            print 'hidden size = %d' %n_hidden
+            print 'complexity = %2.2f' %complexity
          
         # initial values for weights and biases
         # note : W' was written as `W_prime` and b' as `b_prime`
@@ -250,7 +255,7 @@
 
     # construct the denoising autoencoder class
     n_ins = 32*32
-    encoder = dA(n_ins, n_code_layer, input = x.reshape((batch_size,n_ins)))
+    encoder = dA(n_ins, n_code_layer, complexity, input = x.reshape((batch_size,n_ins)))
 
     # Train autoencoder
     
@@ -363,7 +368,7 @@
                               test_score))
 
         if patience <= iter :
-                print('iter (%i) is superior than patience(%i). break', iter, patience)
+                print('iter (%i) is superior than patience(%i). break', (iter, patience))
                 break
 
         
@@ -451,7 +456,7 @@
 
     # construct the denoising autoencoder class
     n_ins = 28*28
-    encoder = dA(n_ins, n_code_layer, input = x.reshape((batch_size,n_ins)))
+    encoder = dA(n_ins, n_code_layer, complexity, input = x.reshape((batch_size,n_ins)))
 
     # Train autoencoder
     
diff -r 0de14b2034c6 -r 2024368a8d3d deep/autoencoder/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d deep/convolutional_dae/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d deep/convolutional_dae/stacked_convolutional_dae.py
--- a/deep/convolutional_dae/stacked_convolutional_dae.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/deep/convolutional_dae/stacked_convolutional_dae.py	Tue Mar 16 12:14:10 2010 -0400
@@ -7,44 +7,10 @@
 
 from theano.tensor.signal import downsample
 from theano.tensor.nnet import conv 
-import gzip
-import cPickle
- 
- 
-class LogisticRegression(object):
- 
-    def __init__(self, input, n_in, n_out):
- 
-        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
-                                            dtype = theano.config.floatX) )
-
-        self.b = theano.shared( value=numpy.zeros((n_out,),
-                                            dtype = theano.config.floatX) )
-
-        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
-        
 
-        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
- 
-        self.params = [self.W, self.b]
- 
-    def negative_log_likelihood(self, y):
-        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
- 
-    def MSE(self, y):
-        return -T.mean(abs((self.p_y_given_x)[T.arange(y.shape[0]),y]-y)**2)
+from ift6266 import datasets
 
-    def errors(self, y):
-        if y.ndim != self.y_pred.ndim:
-            raise TypeError('y should have the same shape as self.y_pred',
-                ('y', target.type, 'y_pred', self.y_pred.type))
- 
-
-        if y.dtype.startswith('int'):
-            return T.mean(T.neq(self.y_pred, y))
-        else:
-            raise NotImplementedError()
- 
+from ift6266.baseline.log_reg.log_reg import LogisticRegression
  
 class SigmoidalLayer(object):
     def __init__(self, rng, input, n_in, n_out):
@@ -65,8 +31,9 @@
  
 class dA_conv(object):
  
-  def __init__(self, corruption_level = 0.1, input = None, shared_W = None,\
-                   shared_b = None, filter_shape = None, image_shape = None, poolsize = (2,2)):
+  def __init__(self, input, filter_shape, corruption_level = 0.1, 
+               shared_W = None, shared_b = None, image_shape = None, 
+               poolsize = (2,2)):
 
     theano_rng = RandomStreams()
     
@@ -80,18 +47,16 @@
         self.W = shared_W
         self.b = shared_b
     else:
-        initial_W = numpy.asarray( numpy.random.uniform( \
-              low = -numpy.sqrt(6./(fan_in+fan_out)), \
-              high = numpy.sqrt(6./(fan_in+fan_out)), \
+        initial_W = numpy.asarray( numpy.random.uniform(
+              low = -numpy.sqrt(6./(fan_in+fan_out)),
+              high = numpy.sqrt(6./(fan_in+fan_out)),
               size = filter_shape), dtype = theano.config.floatX)
-        initial_b = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
-    
-    
+        initial_b = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
         self.W = theano.shared(value = initial_W, name = "W")
         self.b = theano.shared(value = initial_b, name = "b")
     
  
-    initial_b_prime= numpy.zeros((filter_shape[1],))
+    initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX)
         
     self.W_prime=T.dtensor4('W_prime')
 
@@ -99,11 +64,10 @@
  
     self.x = input
 
-    self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
+    self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x
 
-    conv1_out = conv.conv2d(self.tilde_x, self.W, \
-                             filter_shape=filter_shape, \
-                                image_shape=image_shape, border_mode='valid')
+    conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
+                            image_shape=image_shape, border_mode='valid')
 
     
     self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
@@ -111,19 +75,15 @@
     
     da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
                        filter_shape[3] ]
-    da_image_shape = [ image_shape[0],filter_shape[0],image_shape[2]-filter_shape[2]+1, \
-                         image_shape[3]-filter_shape[3]+1 ]
     initial_W_prime =  numpy.asarray( numpy.random.uniform( \
               low = -numpy.sqrt(6./(fan_in+fan_out)), \
               high = numpy.sqrt(6./(fan_in+fan_out)), \
               size = da_filter_shape), dtype = theano.config.floatX)
     self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
 
-    #import pdb;pdb.set_trace()
-
-    conv2_out = conv.conv2d(self.y, self.W_prime, \
-                               filter_shape = da_filter_shape, image_shape = da_image_shape ,\
-                                border_mode='full')
+    conv2_out = conv.conv2d(self.y, self.W_prime,
+                            filter_shape = da_filter_shape,
+                            border_mode='full')
 
     self.z =  (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
 
@@ -134,19 +94,16 @@
     self.cost = T.mean(self.L)
 
     self.params = [ self.W, self.b, self.b_prime ] 
- 
- 
 
 class LeNetConvPoolLayer(object):
-    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
-        assert image_shape[1]==filter_shape[1]
+    def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)):
         self.input = input
   
         W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
-        self.W = theano.shared(value = W_values)
+        self.W = theano.shared(value=W_values)
  
-        b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
-        self.b = theano.shared(value= b_values)
+        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
+        self.b = theano.shared(value=b_values)
  
         conv_out = conv.conv2d(input, self.W,
                 filter_shape=filter_shape, image_shape=image_shape)
@@ -168,67 +125,60 @@
  
 
 class SdA():
-    def __init__(self, input, n_ins_conv, n_ins_mlp, train_set_x, train_set_y, batch_size, \
-                     conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, \
-                     rng, n_out, pretrain_lr, finetune_lr):
-
+    def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes,
+                 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, 
+                 pretrain_lr, finetune_lr):
+        
         self.layers = []
         self.pretrain_functions = []
         self.params = []
         self.conv_n_layers = len(conv_hidden_layers_sizes)
         self.mlp_n_layers = len(mlp_hidden_layers_sizes)
-         
-        index = T.lscalar() # index to a [mini]batch
-        self.x = T.dmatrix('x') # the data is presented as rasterized images
+        
+        self.x = T.matrix('x') # the data is presented as rasterized images
         self.y = T.ivector('y') # the labels are presented as 1D vector of
         
- 
-        
         for i in xrange( self.conv_n_layers ):
-
             filter_shape=conv_hidden_layers_sizes[i][0]
             image_shape=conv_hidden_layers_sizes[i][1]
             max_poolsize=conv_hidden_layers_sizes[i][2]
                 
             if i == 0 :
-                layer_input=self.x.reshape((batch_size,1,28,28))
+                layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32))
             else:
                 layer_input=self.layers[-1].output
-
-            layer = LeNetConvPoolLayer(rng, input=layer_input, \
-                                image_shape=image_shape, \
-                                filter_shape=filter_shape,poolsize=max_poolsize)
-            print 'Convolutional layer '+str(i+1)+' created'
-                
+            
+            layer = LeNetConvPoolLayer(rng, input=layer_input,
+                                       image_shape=image_shape,
+                                       filter_shape=filter_shape,
+                                       poolsize=max_poolsize)
+            print 'Convolutional layer', str(i+1), 'created'
+            
             self.layers += [layer]
             self.params += layer.params
-                
-            da_layer = dA_conv(corruption_level = corruption_levels[0],\
-                                  input = layer_input, \
-                                  shared_W = layer.W, shared_b = layer.b,\
-                                  filter_shape = filter_shape , image_shape = image_shape )
-                
-                
+
+            da_layer = dA_conv(corruption_level = corruption_levels[0],
+                               input = layer_input,
+                               shared_W = layer.W, shared_b = layer.b,
+                               filter_shape = filter_shape,
+                               image_shape = image_shape )
+            
             gparams = T.grad(da_layer.cost, da_layer.params)
-                
+            
             updates = {}
             for param, gparam in zip(da_layer.params, gparams):
-                    updates[param] = param - gparam * pretrain_lr
-                    
-                
-            update_fn = theano.function([index], da_layer.cost, \
-                                        updates = updates,
-                                        givens = {
-                    self.x : train_set_x[index*batch_size:(index+1)*batch_size]} )
-             
+                updates[param] = param - gparam * pretrain_lr
+            
+            update_fn = theano.function([self.x], da_layer.cost, updates = updates)
+            
             self.pretrain_functions += [update_fn]
-
+            
         for i in xrange( self.mlp_n_layers ): 
             if i == 0 :
                 input_size = n_ins_mlp
             else:
                 input_size = mlp_hidden_layers_sizes[i-1]
-
+            
             if i == 0 :
                 if len( self.layers ) == 0 :
                     layer_input=self.x
@@ -236,72 +186,43 @@
                     layer_input = self.layers[-1].output.flatten(2)
             else:
                 layer_input = self.layers[-1].output
-     
+            
             layer = SigmoidalLayer(rng, layer_input, input_size,
                                         mlp_hidden_layers_sizes[i] )
-              
+            
             self.layers += [layer]
             self.params += layer.params
             
-
-            print 'MLP layer '+str(i+1)+' created'
+            print 'MLP layer', str(i+1), 'created'
             
         self.logLayer = LogisticRegression(input=self.layers[-1].output, \
                                                      n_in=mlp_hidden_layers_sizes[-1], n_out=n_out)
         self.params += self.logLayer.params
-
+        
         cost = self.logLayer.negative_log_likelihood(self.y)
+        
+        gparams = T.grad(cost, self.params)
 
-        gparams = T.grad(cost, self.params)
         updates = {}
-
         for param,gparam in zip(self.params, gparams):
             updates[param] = param - gparam*finetune_lr
-            
-        self.finetune = theano.function([index], cost,
-                updates = updates,
-                givens = {
-                  self.x : train_set_x[index*batch_size:(index+1)*batch_size],
-                  self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
- 
+        
+        self.finetune = theano.function([self.x, self.y], cost, updates = updates)
+        
+        self.errors = self.logLayer.errors(self.y)
 
-        self.errors = self.logLayer.errors(self.y)
- 
- 
- 
 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \
                             pretrain_lr = 0.01, training_epochs = 1000, \
-                            dataset='mnist.pkl.gz'):
-
-    f = gzip.open(dataset,'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    f.close()
- 
- 
-    def shared_dataset(data_xy):
-        data_x, data_y = data_xy
-        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
-        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
-        return shared_x, T.cast(shared_y, 'int32')
- 
-
-    test_set_x, test_set_y = shared_dataset(test_set)
-    valid_set_x, valid_set_y = shared_dataset(valid_set)
-    train_set_x, train_set_y = shared_dataset(train_set)
- 
+                            dataset=datasets.nist_digits):
+    
     batch_size = 500 # size of the minibatch
  
-
-    n_train_batches = train_set_x.value.shape[0] / batch_size
-    n_valid_batches = valid_set_x.value.shape[0] / batch_size
-    n_test_batches = test_set_x.value.shape[0] / batch_size
- 
     # allocate symbolic variables for the data
     index = T.lscalar() # index to a [mini]batch
     x = T.matrix('x') # the data is presented as rasterized images
     y = T.ivector('y') # the labels are presented as 1d vector of
-                           # [int] labels
-    layer0_input = x.reshape((batch_size,1,28,28))
+    # [int] labels
+    layer0_input = x.reshape((x.shape[0],1,32,32))
     
 
     # Setup the convolutional layers with their DAs(add as many as you want)
@@ -310,45 +231,34 @@
     ker1=2
     ker2=2
     conv_layers=[]
-    conv_layers.append([[ker1,1,5,5], [batch_size,1,28,28], [2,2] ])
-    conv_layers.append([[ker2,ker1,5,5], [batch_size,ker1,12,12], [2,2] ])
+    conv_layers.append([[ker1,1,5,5], None, [2,2] ])
+    conv_layers.append([[ker2,ker1,5,5], None, [2,2] ])
 
     # Setup the MLP layers of the network
     mlp_layers=[500]
   
-    network = SdA(input = layer0_input, n_ins_conv = 28*28, n_ins_mlp = ker2*4*4, \
-                      train_set_x = train_set_x, train_set_y = train_set_y, batch_size = batch_size,
-                      conv_hidden_layers_sizes = conv_layers,  \
-                      mlp_hidden_layers_sizes = mlp_layers, \
-                      corruption_levels = corruption_levels , n_out = 10, \
-                      rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate )
+    network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4,
+                  conv_hidden_layers_sizes = conv_layers,
+                  mlp_hidden_layers_sizes = mlp_layers,
+                  corruption_levels = corruption_levels , n_out = 10,
+                  rng = rng , pretrain_lr = pretrain_lr ,
+                  finetune_lr = learning_rate )
 
-    test_model = theano.function([index], network.errors,
-             givens = {
-                network.x: test_set_x[index*batch_size:(index+1)*batch_size],
-                network.y: test_set_y[index*batch_size:(index+1)*batch_size]})
+    test_model = theano.function([network.x, network.y], network.errors)
  
-    validate_model = theano.function([index], network.errors,
-           givens = {
-                network.x: valid_set_x[index*batch_size:(index+1)*batch_size],
-                network.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
-
-
-
     start_time = time.clock()
     for i in xrange(len(network.layers)-len(mlp_layers)):
         for epoch in xrange(pretraining_epochs):
-            for batch_index in xrange(n_train_batches):
-                c = network.pretrain_functions[i](batch_index)
-            print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch),c
+            for x, y in dataset.train(batch_size):
+                c = network.pretrain_functions[i](x)
+            print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c
 
     patience = 10000 # look as this many examples regardless
     patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
                                   # FOUND
     improvement_threshold = 0.995 # a relative improvement of this much is
 
-    validation_frequency = min(n_train_batches, patience/2)
- 
+    validation_frequency = patience/2
  
     best_params = None
     best_validation_loss = float('inf')
@@ -357,23 +267,21 @@
  
     done_looping = False
     epoch = 0
- 
+    iter = 0
+
     while (epoch < training_epochs) and (not done_looping):
       epoch = epoch + 1
-      for minibatch_index in xrange(n_train_batches):
+      for x, y in dataset.train(batch_size):
  
-        cost_ij = network.finetune(minibatch_index)
-        iter = epoch * n_train_batches + minibatch_index
- 
-        if (iter+1) % validation_frequency == 0:
+        cost_ij = network.finetune(x, y)
+        iter += 1
+        
+        if iter % validation_frequency == 0:
+            validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)]
+            this_validation_loss = numpy.mean(validation_losses)
+            print('epoch %i, iter %i, validation error %f %%' % \
+                   (epoch, iter, this_validation_loss*100.))
             
-            validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
-            this_validation_loss = numpy.mean(validation_losses)
-            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
-                   (epoch, minibatch_index+1, n_train_batches, \
-                    this_validation_loss*100.))
- 
- 
             # if we got the best validation score until now
             if this_validation_loss < best_validation_loss:
  
@@ -381,35 +289,28 @@
                 if this_validation_loss < best_validation_loss * \
                        improvement_threshold :
                     patience = max(patience, iter * patience_increase)
- 
+                
                 # save best validation score and iteration number
                 best_validation_loss = this_validation_loss
                 best_iter = iter
- 
+                
                 # test it on the test set
-                test_losses = [test_model(i) for i in xrange(n_test_batches)]
+                test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
                 test_score = numpy.mean(test_losses)
-                print((' epoch %i, minibatch %i/%i, test error of best '
+                print((' epoch %i, iter %i, test error of best '
                       'model %f %%') %
-                             (epoch, minibatch_index+1, n_train_batches,
-                              test_score*100.))
- 
- 
+                             (epoch, iter, test_score*100.))
+                
         if patience <= iter :
-                done_looping = True
-                break
- 
+            done_looping = True
+            break
+    
     end_time = time.clock()
     print(('Optimization complete with best validation score of %f %%,'
            'with test performance %f %%') %
                  (best_validation_loss * 100., test_score*100.))
     print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
  
- 
- 
- 
- 
- 
 if __name__ == '__main__':
     sgd_optimization_mnist()
  
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/nist_sda.py
--- a/deep/stacked_dae/nist_sda.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/deep/stacked_dae/nist_sda.py	Tue Mar 16 12:14:10 2010 -0400
@@ -21,28 +21,35 @@
 import jobman, jobman.sql
 from pylearn.io import filetensor
 
-from utils import produit_croise_jobs
+from utils import produit_cartesien_jobs
 
 from sgd_optimization import SdaSgdOptimizer
 
 from ift6266.utils.scalar_series import *
 
+##############################################################################
+# GLOBALS
+
 TEST_CONFIG = False
 
 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
-
-JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/fsavard_sda2'
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4'
+EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint"
 
 REDUCE_TRAIN_TO = None
 MAX_FINETUNING_EPOCHS = 1000
-REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc.
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 1000
+
 if TEST_CONFIG:
     REDUCE_TRAIN_TO = 1000
     MAX_FINETUNING_EPOCHS = 2
     REDUCE_EVERY = 10
 
-EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint"
-
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001],
         'pretraining_epochs_per_layer': [10,20],
         'hidden_layers_sizes': [300,800],
@@ -57,13 +64,19 @@
                        'pretraining_lr':0.1,
                        'pretraining_epochs_per_layer':20,
                        'max_finetuning_epochs':2,
-                       'hidden_layers_sizes':300,
+                       'hidden_layers_sizes':800,
                        'corruption_levels':0.2,
                        'minibatch_size':20,
                        #'reduce_train_to':300,
                        'num_hidden_layers':2})
 
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs:
+ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint
+'''
 def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
     pylearn.version.record_versions(state,[theano,ift6266,pylearn])
     channel.save()
 
@@ -71,10 +84,12 @@
 
     print "Will load NIST"
 
-    nist = NIST(20)
+    nist = NIST(minibatch_size=20)
 
     print "NIST loaded"
 
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
     rtt = None
     if state.has_key('reduce_train_to'):
         rtt = state['reduce_train_to']
@@ -82,7 +97,7 @@
         rtt = REDUCE_TRAIN_TO
 
     if rtt:
-        print "Reducing training set to "+str( rtt)+ " examples"
+        print "Reducing training set to "+str(rtt)+ " examples"
         nist.reduce_train_set(rtt)
 
     train,valid,test = nist.get_tvt()
@@ -91,14 +106,9 @@
     n_ins = 32*32
     n_outs = 62 # 10 digits, 26*2 (lower, capitals)
 
-    hls = state.hidden_layers_sizes
-    cl = state.corruption_levels
-    nhl = state.num_hidden_layers
-    state.hidden_layers_sizes = [hls] * nhl
-    state.corruption_levels = [cl] * nhl
-
-    # b,b',W for each hidden layer + b,W of last layer (logreg)
-    numparams = nhl * 3 + 2
+    # b,b',W for each hidden layer 
+    # + b,W of last layer (logreg)
+    numparams = state.num_hidden_layers * 3 + 2
     series_mux = None
     series_mux = create_series(workingdir, numparams)
 
@@ -114,11 +124,10 @@
     optimizer.finetune()
     channel.save()
 
-    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
-    channel.save()
-
     return channel.COMPLETE
 
+# These Series objects are used to save various statistics
+# during the training.
 def create_series(basedir, numparams):
     mux = SeriesMultiplexer()
 
@@ -140,8 +149,11 @@
 
     return mux
 
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
 def jobman_insert_nist():
-    jobs = produit_croise_jobs(JOB_VALS)
+    jobs = produit_cartesien_jobs(JOB_VALS)
 
     db = jobman.sql.db(JOBDB)
     for job in jobs:
@@ -227,35 +239,6 @@
 
     raw_input("Press any key")
 
-# hp for hyperparameters
-def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
-    global DEFAULT_HP_NIST
-    hp = hp and hp or DEFAULT_HP_NIST
-
-    print "Will load NIST"
-
-    import time
-    t1 = time.time()
-    nist = NIST(20, reduce_train_to=100)
-    t2 = time.time()
-
-    print "NIST loaded. time delta = ", t2-t1
-
-    train,valid,test = nist.get_tvt()
-    dataset = (train,valid,test)
-
-    print train[0][15]
-    print type(train[0][1])
-
-
-    print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
-
-    n_ins = 32*32
-    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
-
-    optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
-    optimizer.train()
-
 if __name__ == '__main__':
 
     import sys
@@ -269,13 +252,9 @@
         jobman_insert_nist()
 
     elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
-        def f():
-            pass
-        chanmock = DD({'COMPLETE':0,'save':f})
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
         jobman_entrypoint(DEFAULT_HP_NIST, chanmock)
 
-    elif len(args) > 0 and args[0] == 'estimate':
-        estimate_total_time()
     else:
-        sgd_optimization_nist()
+        print "Bad arguments"
 
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/sgd_optimization.py
--- a/deep/stacked_dae/sgd_optimization.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/deep/stacked_dae/sgd_optimization.py	Tue Mar 16 12:14:10 2010 -0400
@@ -60,25 +60,34 @@
         # compute number of minibatches for training, validation and testing
         self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
         self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
-        self.n_test_batches  = self.test_set_x.value.shape[0]  / self.hp.minibatch_size
+        # remove last batch in case it's incomplete
+        self.n_test_batches  = (self.test_set_x.value.shape[0]  / self.hp.minibatch_size) - 1
 
     def init_classifier(self):
         print "Constructing classifier"
 
+        # we don't want to save arrays in DD objects, so
+        # we recreate those arrays here
+        nhl = self.hp.num_hidden_layers
+        layers_sizes = [self.hp.hidden_layers_sizes] * nhl
+        corruption_levels = [self.hp.corruption_levels] * nhl
+
         # construct the stacked denoising autoencoder class
         self.classifier = SdA( \
                           train_set_x= self.train_set_x, \
                           train_set_y = self.train_set_y,\
                           batch_size = self.hp.minibatch_size, \
                           n_ins= self.n_ins, \
-                          hidden_layers_sizes = self.hp.hidden_layers_sizes, \
+                          hidden_layers_sizes = layers_sizes, \
                           n_outs = self.n_outs, \
-                          corruption_levels = self.hp.corruption_levels,\
+                          corruption_levels = corruption_levels,\
                           rng = self.rng,\
                           pretrain_lr = self.hp.pretraining_lr, \
                           finetune_lr = self.hp.finetuning_lr,\
                           input_divider = self.input_divider )
 
+        #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
+
         sys.stdout.flush()
 
     def train(self):
@@ -89,6 +98,9 @@
         print "STARTING PRETRAINING, time = ", datetime.datetime.now()
         sys.stdout.flush()
 
+        #time_acc_func = 0.0
+        #time_acc_total = 0.0
+
         start_time = time.clock()  
         ## Pre-train layer-wise 
         for i in xrange(self.classifier.n_layers):
@@ -96,7 +108,14 @@
             for epoch in xrange(self.hp.pretraining_epochs_per_layer):
                 # go through the training set
                 for batch_index in xrange(self.n_train_batches):
+                    #t1 = time.clock()
                     c = self.classifier.pretrain_functions[i](batch_index)
+                    #t2 = time.clock()
+
+                    #time_acc_func += t2 - t1
+
+                    #if batch_index % 500 == 0:
+                    #    print "acc / total", time_acc_func / (t2 - start_time), time_acc_func
 
                     self.series_mux.append("reconstruction_error", c)
                         
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/stacked_dae.py
--- a/deep/stacked_dae/stacked_dae.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/deep/stacked_dae/stacked_dae.py	Tue Mar 16 12:14:10 2010 -0400
@@ -10,6 +10,15 @@
 
 from utils import update_locals
 
+# taken from LeDeepNet/daa.py
+# has a special case when taking log(0) (defined =0)
+# modified to not take the mean anymore
+from theano.tensor.xlogx import xlogx, xlogy0
+# it's target*log(output)
+def binary_cross_entropy(target, output, sum_axis=1):
+    XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
+    return -T.sum(XE, axis=sum_axis)
+
 class LogisticRegression(object):
     def __init__(self, input, n_in, n_out):
         # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 
@@ -128,7 +137,21 @@
     # Equation (4)
     # note : we sum over the size of a datapoint; if we are using minibatches,
     #        L will  be a vector, with one entry per example in minibatch
-    self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+    #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+    #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+
+    # bypassing z to avoid running to log(0)
+    #self.z_a = T.dot(self.y, self.W_prime) + self.b_prime)
+    #self.L = -T.sum( self.x * (T.log(1)-T.log(1+T.exp(-self.z_a))) \
+    #                + (1.0-self.x) * (T.log(1)-T.log(1+T.exp(-self.z_a))), axis=1 )
+
+    # I added this epsilon to avoid getting log(0) and 1/0 in grad
+    # This means conceptually that there'd be no probability of 0, but that
+    # doesn't seem to me as important (maybe I'm wrong?).
+    eps = 0.00000001
+    eps_1 = 1-eps
+    self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
+                    + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
     # note : L is now a vector, where each element is the cross-entropy cost 
     #        of the reconstruction of the corresponding example of the 
     #        minibatch. We need to compute the average of all these to get 
@@ -156,6 +179,17 @@
         self.all_params         = []
         self.n_layers           = len(hidden_layers_sizes)
 
+        print "Creating SdA with params:"
+        print "batch_size", batch_size
+        print "hidden_layers_sizes", hidden_layers_sizes
+        print "corruption_levels", corruption_levels
+        print "n_ins", n_ins
+        print "n_outs", n_outs
+        print "pretrain_lr", pretrain_lr
+        print "finetune_lr", finetune_lr
+        print "input_divider", input_divider
+        print "----"
+
         self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX))
 
         if len(hidden_layers_sizes) < 1 :
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/utils.py
--- a/deep/stacked_dae/utils.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/deep/stacked_dae/utils.py	Tue Mar 16 12:14:10 2010 -0400
@@ -6,12 +6,21 @@
 from jobman import DD
 
 # from pylearn codebase
+# useful in __init__(param1, param2, etc.) to save
+# values in self.param1, self.param2... just call
+# update_locals(self, locals())
 def update_locals(obj, dct):
     if 'self' in dct:
         del dct['self']
     obj.__dict__.update(dct)
 
-def produit_croise_jobs(val_dict):
+# from a dictionary of possible values for hyperparameters, e.g.
+# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]}
+# create a list of other dictionaries representing all the possible
+# combinations, thus in this example creating:
+# [{'learning_rate': 0.1, 'num_layers': 1}, ...]
+# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2))
+def produit_cartesien_jobs(val_dict):
     job_list = [DD()]
     all_keys = val_dict.keys()
 
@@ -27,9 +36,9 @@
 
     return job_list
 
-def test_produit_croise_jobs():
+def test_produit_cartesien_jobs():
     vals = {'a': [1,2], 'b': [3,4,5]}
-    print produit_croise_jobs(vals)
+    print produit_cartesien_jobs(vals)
 
 
 # taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v2/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v2/config.py.example
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v2/config.py.example	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,64 @@
+'''
+These are parameters used by nist_sda.py. They'll end up as globals in there.
+
+Rename this file to config.py and configure as needed.
+DON'T add the renamed file to the repository, as others might use it
+without realizing it, with dire consequences.
+'''
+
+# Set this to True when you want to run cluster tests, ie. you want
+# to run on the cluster, many jobs, but want to reduce the training
+# set size and the number of epochs, so you know everything runs
+# fine on the cluster.
+# Set this PRIOR to inserting your test jobs in the DB.
+TEST_CONFIG = False
+
+NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
+NIST_ALL_TRAIN_SIZE = 649081
+# valid et test =82587 82587 
+
+# change "sandbox" when you're ready
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/yourtablenamehere'
+EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v2.nist_sda.jobman_entrypoint"
+
+# reduce training set to that many examples
+REDUCE_TRAIN_TO = None
+# that's a max, it usually doesn't get to that point
+MAX_FINETUNING_EPOCHS = 1000
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 100
+
+if TEST_CONFIG:
+    REDUCE_TRAIN_TO = 1000
+    MAX_FINETUNING_EPOCHS = 2
+    REDUCE_EVERY = 10
+
+
+# This is to configure insertion of jobs on the cluster.
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
+JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001],
+        'pretraining_epochs_per_layer': [10,20],
+        'hidden_layers_sizes': [300,800],
+        'corruption_levels': [0.1,0.2,0.3],
+        'minibatch_size': [20],
+        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
+        'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out
+        'num_hidden_layers':[2,3]}
+
+# Just useful for tests... minimal number of epochs
+# (This is used when running a single job, locally, when
+# calling ./nist_sda.py test_jobman_entrypoint
+DEFAULT_HP_NIST = DD({'finetuning_lr':0.1,
+                       'pretraining_lr':0.1,
+                       'pretraining_epochs_per_layer':2,
+                       'max_finetuning_epochs':2,
+                       'hidden_layers_sizes':800,
+                       'corruption_levels':0.2,
+                       'minibatch_size':20,
+                       'reduce_train_to':10000,
+                       'num_hidden_layers':1})
+
+
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v2/nist_sda.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v2/nist_sda.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,169 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_cartesien_jobs
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+from ift6266 import datasets
+from config import *
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+ 
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+     
+    examples_per_epoch = NIST_ALL_TRAIN_SIZE
+
+    series = create_series(state.num_hidden_layers)
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all, 
+                                    hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    examples_per_epoch=examples_per_epoch, \
+                                    series=series,
+                                    max_minibatches=rtt)
+
+    optimizer.pretrain(datasets.nist_all)
+    channel.save()
+
+    optimizer.finetune(datasets.nist_all)
+    channel.save()
+
+    return channel.COMPLETE
+
+# These Series objects are used to save various statistics
+# during the training.
+def create_series(num_hidden_layers):
+
+    # Replace series we don't want to save with DummySeries, e.g.
+    # series['training_error'] = DummySeries()
+
+    series = {}
+
+    basedir = os.getcwd()
+
+    h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w")
+
+    # reconstruction
+    reconstruction_base = \
+                ErrorSeries(error_name="reconstruction_error",
+                    table_name="reconstruction_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['reconstruction_error'] = \
+                AccumulatorSeriesWrapper(base_series=reconstruction_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # train
+    training_base = \
+                ErrorSeries(error_name="training_error",
+                    table_name="training_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['training_error'] = \
+                AccumulatorSeriesWrapper(base_series=training_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # valid and test are not accumulated/mean, saved directly
+    series['validation_error'] = \
+                ErrorSeries(error_name="validation_error",
+                    table_name="validation_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    series['test_error'] = \
+                ErrorSeries(error_name="test_error",
+                    table_name="test_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    param_names = []
+    for i in range(num_hidden_layers):
+        param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
+    param_names += ['logreg_layer_W', 'logreg_layer_b']
+
+    # comment out series we don't want to save
+    series['params'] = SharedParamsStatisticsWrapper(
+                        new_group_name="params",
+                        base_group="/",
+                        arrays_names=param_names,
+                        hdf5_file=h5f,
+                        index_names=('epoch',))
+
+    return series
+
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
+def jobman_insert_nist():
+    jobs = produit_cartesien_jobs(JOB_VALS)
+
+    db = jobman.sql.db(JOBDB)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+        jobman.sql.insert_dict(job, db)
+
+    print "inserted"
+
+if __name__ == '__main__':
+
+    args = sys.argv[1:]
+
+    #if len(args) > 0 and args[0] == 'load_nist':
+    #    test_load_nist()
+
+    if len(args) > 0 and args[0] == 'jobman_insert':
+        jobman_insert_nist()
+
+    elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+        jobman_entrypoint(DEFAULT_HP_NIST, chanmock)
+
+    else:
+        print "Bad arguments"
+
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v2/sgd_optimization.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v2/sgd_optimization.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,243 @@
+#!/usr/bin/python
+# coding: utf-8
+
+# Generic SdA optimization loop, adapted from the deeplearning.net tutorial
+
+import numpy 
+import theano
+import time
+import datetime
+import theano.tensor as T
+import sys
+
+from jobman import DD
+import jobman, jobman.sql
+
+from stacked_dae import SdA
+
+from ift6266.utils.seriestables import *
+
+default_series = { \
+        'reconstruction_error' : DummySeries(),
+        'training_error' : DummySeries(),
+        'validation_error' : DummySeries(),
+        'test_error' : DummySeries(),
+        'params' : DummySeries()
+        }
+
+def itermax(iter, max):
+    for i,it in enumerate(iter):
+        if i >= max:
+            break
+        yield it
+
+class SdaSgdOptimizer:
+    def __init__(self, dataset, hyperparameters, n_ins, n_outs,
+                    examples_per_epoch, series=default_series, max_minibatches=None):
+        self.dataset = dataset
+        self.hp = hyperparameters
+        self.n_ins = n_ins
+        self.n_outs = n_outs
+   
+        self.max_minibatches = max_minibatches
+        print "SdaSgdOptimizer, max_minibatches =", max_minibatches
+
+        self.ex_per_epoch = examples_per_epoch
+        self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
+
+        self.series = series
+
+        self.rng = numpy.random.RandomState(1234)
+
+        self.init_classifier()
+
+        sys.stdout.flush()
+
+    def init_classifier(self):
+        print "Constructing classifier"
+
+        # we don't want to save arrays in DD objects, so
+        # we recreate those arrays here
+        nhl = self.hp.num_hidden_layers
+        layers_sizes = [self.hp.hidden_layers_sizes] * nhl
+        corruption_levels = [self.hp.corruption_levels] * nhl
+
+        # construct the stacked denoising autoencoder class
+        self.classifier = SdA( \
+                          batch_size = self.hp.minibatch_size, \
+                          n_ins= self.n_ins, \
+                          hidden_layers_sizes = layers_sizes, \
+                          n_outs = self.n_outs, \
+                          corruption_levels = corruption_levels,\
+                          rng = self.rng,\
+                          pretrain_lr = self.hp.pretraining_lr, \
+                          finetune_lr = self.hp.finetuning_lr)
+
+        #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
+
+        sys.stdout.flush()
+
+    def train(self):
+        self.pretrain(self.dataset)
+        self.finetune(self.dataset)
+
+    def pretrain(self,dataset):
+        print "STARTING PRETRAINING, time = ", datetime.datetime.now()
+        sys.stdout.flush()
+
+        start_time = time.clock()  
+        ## Pre-train layer-wise 
+        for i in xrange(self.classifier.n_layers):
+            # go through pretraining epochs 
+            for epoch in xrange(self.hp.pretraining_epochs_per_layer):
+                # go through the training set
+                batch_index=0
+                for x,y in dataset.train(self.hp.minibatch_size):
+                    c = self.classifier.pretrain_functions[i](x)
+
+                    self.series["reconstruction_error"].append((epoch, batch_index), c)
+                    batch_index+=1
+
+                    #if batch_index % 100 == 0:
+                    #    print "100 batches"
+
+                    # useful when doing tests
+                    if self.max_minibatches and batch_index >= self.max_minibatches:
+                        break
+                        
+                print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+                sys.stdout.flush()
+
+                self.series['params'].append((epoch,), self.classifier.all_params)
+     
+        end_time = time.clock()
+
+        print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+        self.hp.update({'pretraining_time': end_time-start_time})
+
+        sys.stdout.flush()
+
+    def finetune(self,dataset):
+        print "STARTING FINETUNING, time = ", datetime.datetime.now()
+
+        minibatch_size = self.hp.minibatch_size
+
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #         givens = {
+        #           self.classifier.x: ensemble_x,
+        #           self.classifier.y: ensemble_y]})
+
+        validate_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #        givens = {
+        #           self.classifier.x: ,
+        #           self.classifier.y: ]})
+
+
+        # early-stopping parameters
+        patience              = 10000 # look as this many examples regardless
+        patience_increase     = 2.    # wait this much longer when a new best is 
+                                      # found
+        improvement_threshold = 0.995 # a relative improvement of this much is 
+                                      # considered significant
+        validation_frequency  = min(self.mb_per_epoch, patience/2)
+                                      # go through this many 
+                                      # minibatche before checking the network 
+                                      # on the validation set; in this case we 
+                                      # check every epoch 
+        if self.max_minibatches and validation_frequency > self.max_minibatches:
+            validation_frequency = self.max_minibatches / 2
+
+        best_params          = None
+        best_validation_loss = float('inf')
+        test_score           = 0.
+        start_time = time.clock()
+
+        done_looping = False
+        epoch = 0
+
+        total_mb_index = 0
+
+        while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
+            epoch = epoch + 1
+            minibatch_index = -1
+            for x,y in dataset.train(minibatch_size):
+                minibatch_index += 1
+                cost_ij = self.classifier.finetune(x,y)
+                total_mb_index += 1
+
+                self.series["training_error"].append((epoch, minibatch_index), cost_ij)
+
+                if (total_mb_index+1) % validation_frequency == 0: 
+                    
+                    iter = dataset.valid(minibatch_size)
+                    if self.max_minibatches:
+                        iter = itermax(iter, self.max_minibatches)
+                    validation_losses = [validate_model(x,y) for x,y in iter]
+                    this_validation_loss = numpy.mean(validation_losses)
+
+                    self.series["validation_error"].\
+                        append((epoch, minibatch_index), this_validation_loss*100.)
+
+                    print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                           (epoch, minibatch_index+1, self.mb_per_epoch, \
+                            this_validation_loss*100.))
+
+
+                    # if we got the best validation score until now
+                    if this_validation_loss < best_validation_loss:
+
+                        #improve patience if loss improvement is good enough
+                        if this_validation_loss < best_validation_loss *  \
+                               improvement_threshold :
+                            patience = max(patience, total_mb_index * patience_increase)
+
+                        # save best validation score and iteration number
+                        best_validation_loss = this_validation_loss
+                        best_iter = total_mb_index
+
+                        # test it on the test set
+                        iter = dataset.test(minibatch_size)
+                        if self.max_minibatches:
+                            iter = itermax(iter, self.max_minibatches)
+                        test_losses = [test_model(x,y) for x,y in iter]
+                        test_score = numpy.mean(test_losses)
+
+                        self.series["test_error"].\
+                            append((epoch, minibatch_index), test_score*100.)
+
+                        print(('     epoch %i, minibatch %i/%i, test error of best '
+                              'model %f %%') % 
+                                     (epoch, minibatch_index+1, self.mb_per_epoch,
+                                      test_score*100.))
+
+                    sys.stdout.flush()
+
+                # useful when doing tests
+                if self.max_minibatches and minibatch_index >= self.max_minibatches:
+                    break
+
+            self.series['params'].append((epoch,), self.classifier.all_params)
+
+            if patience <= total_mb_index:
+                done_looping = True
+                break
+
+        end_time = time.clock()
+        self.hp.update({'finetuning_time':end_time-start_time,\
+                    'best_validation_error':best_validation_loss,\
+                    'test_score':test_score,
+                    'num_finetuning_epochs':epoch})
+
+        print(('Optimization complete with best validation score of %f %%,'
+               'with test performance %f %%') %  
+                     (best_validation_loss * 100., test_score*100.))
+        print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+
+
+
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v2/stacked_dae.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v2/stacked_dae.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,292 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy 
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+from utils import update_locals
+
+# taken from LeDeepNet/daa.py
+# has a special case when taking log(0) (defined =0)
+# modified to not take the mean anymore
+from theano.tensor.xlogx import xlogx, xlogy0
+# it's target*log(output)
+def binary_cross_entropy(target, output, sum_axis=1):
+    XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
+    return -T.sum(XE, axis=sum_axis)
+
+class LogisticRegression(object):
+    def __init__(self, input, n_in, n_out):
+        # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 
+        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+                                            dtype = theano.config.floatX) )
+        # initialize the baises b as a vector of n_out 0s
+        self.b = theano.shared( value=numpy.zeros((n_out,), 
+                                            dtype = theano.config.floatX) )
+        # compute vector of class-membership probabilities in symbolic form
+        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+        
+        # compute prediction as class whose probability is maximal in 
+        # symbolic form
+        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+        # list of parameters for this layer
+        self.params = [self.W, self.b]
+
+    def negative_log_likelihood(self, y):
+       return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+    def errors(self, y):
+        # check if y has same dimension of y_pred 
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred', 
+                ('y', target.type, 'y_pred', self.y_pred.type))
+
+        # check if y is of the correct datatype        
+        if y.dtype.startswith('int'):
+            # the T.neq operator returns a vector of 0s and 1s, where 1
+            # represents a mistake in prediction
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+class SigmoidalLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+        self.params = [self.W, self.b]
+
+
+
+class dA(object):
+  def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
+               input = None, shared_W = None, shared_b = None):
+    self.n_visible = n_visible
+    self.n_hidden  = n_hidden
+    
+    # create a Theano random generator that gives symbolic random values
+    theano_rng = RandomStreams()
+    
+    if shared_W != None and shared_b != None : 
+        self.W = shared_W
+        self.b = shared_b
+    else:
+        # initial values for weights and biases
+        # note : W' was written as `W_prime` and b' as `b_prime`
+
+        # W is initialized with `initial_W` which is uniformely sampled
+        # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
+        # the output of uniform if converted using asarray to dtype 
+        # theano.config.floatX so that the code is runable on GPU
+        initial_W = numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(n_hidden+n_visible)), \
+              high = numpy.sqrt(6./(n_hidden+n_visible)), \
+              size = (n_visible, n_hidden)), dtype = theano.config.floatX)
+        initial_b       = numpy.zeros(n_hidden, dtype = theano.config.floatX)
+    
+    
+        # theano shared variables for weights and biases
+        self.W       = theano.shared(value = initial_W,       name = "W")
+        self.b       = theano.shared(value = initial_b,       name = "b")
+    
+ 
+    initial_b_prime= numpy.zeros(n_visible)
+    # tied weights, therefore W_prime is W transpose
+    self.W_prime = self.W.T 
+    self.b_prime = theano.shared(value = initial_b_prime, name = "b'")
+
+    # if no input is given, generate a variable representing the input
+    if input == None : 
+        # we use a matrix because we expect a minibatch of several examples,
+        # each example being a row
+        self.x = T.dmatrix(name = 'input') 
+    else:
+        self.x = input
+    # Equation (1)
+    # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
+    # note : first argument of theano.rng.binomial is the shape(size) of 
+    #        random numbers that it should produce
+    #        second argument is the number of trials 
+    #        third argument is the probability of success of any trial
+    #
+    #        this will produce an array of 0s and 1s where 1 has a 
+    #        probability of 1 - ``corruption_level`` and 0 with
+    #        ``corruption_level``
+    self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level, dtype=theano.config.floatX) * self.x
+    # Equation (2)
+    # note  : y is stored as an attribute of the class so that it can be 
+    #         used later when stacking dAs. 
+    self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
+    # Equation (3)
+    #self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
+    # Equation (4)
+    # note : we sum over the size of a datapoint; if we are using minibatches,
+    #        L will  be a vector, with one entry per example in minibatch
+    #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+    #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+
+    # bypassing z to avoid running to log(0)
+    z_a = T.dot(self.y, self.W_prime) + self.b_prime
+    log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a))
+    # log(1-sigmoid(z_a))
+    log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a))
+    self.L = -T.sum( self.x * (log_sigmoid) \
+                    + (1.0-self.x) * (log_1_sigmoid), axis=1 )
+
+    # I added this epsilon to avoid getting log(0) and 1/0 in grad
+    # This means conceptually that there'd be no probability of 0, but that
+    # doesn't seem to me as important (maybe I'm wrong?).
+    #eps = 0.00000001
+    #eps_1 = 1-eps
+    #self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
+    #                + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
+    # note : L is now a vector, where each element is the cross-entropy cost 
+    #        of the reconstruction of the corresponding example of the 
+    #        minibatch. We need to compute the average of all these to get 
+    #        the cost of the minibatch
+    self.cost = T.mean(self.L)
+
+    self.params = [ self.W, self.b, self.b_prime ]
+
+
+class SdA(object):
+    def __init__(self, batch_size, n_ins, 
+                 hidden_layers_sizes, n_outs, 
+                 corruption_levels, rng, pretrain_lr, finetune_lr):
+        # Just to make sure those are not modified somewhere else afterwards
+        hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes)
+        corruption_levels = copy.deepcopy(corruption_levels)
+
+        update_locals(self, locals())      
+ 
+        self.layers             = []
+        self.pretrain_functions = []
+        self.params             = []
+        # MODIF: added this so we also get the b_primes
+        # (not used for finetuning... still using ".params")
+        self.all_params         = []
+        self.n_layers           = len(hidden_layers_sizes)
+
+        print "Creating SdA with params:"
+        print "batch_size", batch_size
+        print "hidden_layers_sizes", hidden_layers_sizes
+        print "corruption_levels", corruption_levels
+        print "n_ins", n_ins
+        print "n_outs", n_outs
+        print "pretrain_lr", pretrain_lr
+        print "finetune_lr", finetune_lr
+        print "----"
+
+        if len(hidden_layers_sizes) < 1 :
+            raiseException (' You must have at least one hidden layer ')
+
+
+        # allocate symbolic variables for the data
+        #index   = T.lscalar()    # index to a [mini]batch 
+        self.x  = T.matrix('x')  # the data is presented as rasterized images
+        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
+                                 # [int] labels
+
+        for i in xrange( self.n_layers ):
+            # construct the sigmoidal layer
+
+            # the size of the input is either the number of hidden units of 
+            # the layer below or the input size if we are on the first layer
+            if i == 0 :
+                input_size = n_ins
+            else:
+                input_size = hidden_layers_sizes[i-1]
+
+            # the input to this layer is either the activation of the hidden
+            # layer below or the input of the SdA if you are on the first
+            # layer
+            if i == 0 : 
+                layer_input = self.x
+            else:
+                layer_input = self.layers[-1].output
+
+            layer = SigmoidalLayer(rng, layer_input, input_size, 
+                                   hidden_layers_sizes[i] )
+            # add the layer to the 
+            self.layers += [layer]
+            self.params += layer.params
+        
+            # Construct a denoising autoencoder that shared weights with this
+            # layer
+            dA_layer = dA(input_size, hidden_layers_sizes[i], \
+                          corruption_level = corruption_levels[0],\
+                          input = layer_input, \
+                          shared_W = layer.W, shared_b = layer.b)
+
+            self.all_params += dA_layer.params
+        
+            # Construct a function that trains this dA
+            # compute gradients of layer parameters
+            gparams = T.grad(dA_layer.cost, dA_layer.params)
+            # compute the list of updates
+            updates = {}
+            for param, gparam in zip(dA_layer.params, gparams):
+                updates[param] = param - gparam * pretrain_lr
+            
+            # create a function that trains the dA
+            update_fn = theano.function([self.x], dA_layer.cost, \
+                  updates = updates)#,
+            #     givens = { 
+            #         self.x : ensemble})
+            # collect this function into a list
+            #update_fn = theano.function([index], dA_layer.cost, \
+            #      updates = updates,
+            #      givens = { 
+            #         self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider})
+            # collect this function into a list
+            self.pretrain_functions += [update_fn]
+
+        
+        # We now need to add a logistic layer on top of the MLP
+        self.logLayer = LogisticRegression(\
+                         input = self.layers[-1].output,\
+                         n_in = hidden_layers_sizes[-1], n_out = n_outs)
+
+        self.params += self.logLayer.params
+        self.all_params += self.logLayer.params
+        # construct a function that implements one step of finetunining
+
+        # compute the cost, defined as the negative log likelihood 
+        cost = self.logLayer.negative_log_likelihood(self.y)
+        # compute the gradients with respect to the model parameters
+        gparams = T.grad(cost, self.params)
+        # compute list of updates
+        updates = {}
+        for param,gparam in zip(self.params, gparams):
+            updates[param] = param - gparam*finetune_lr
+            
+        self.finetune = theano.function([self.x,self.y], cost, 
+                updates = updates)#,
+        #        givens = {
+        #          self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider,
+        #          self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
+
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+
+        self.errors = self.logLayer.errors(self.y)
+
+if __name__ == '__main__':
+    import sys
+    args = sys.argv[1:]
+
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v2/utils.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v2/utils.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+# coding: utf-8
+
+from __future__ import with_statement
+
+from jobman import DD
+
+# from pylearn codebase
+# useful in __init__(param1, param2, etc.) to save
+# values in self.param1, self.param2... just call
+# update_locals(self, locals())
+def update_locals(obj, dct):
+    if 'self' in dct:
+        del dct['self']
+    obj.__dict__.update(dct)
+
+# from a dictionary of possible values for hyperparameters, e.g.
+# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]}
+# create a list of other dictionaries representing all the possible
+# combinations, thus in this example creating:
+# [{'learning_rate': 0.1, 'num_layers': 1}, ...]
+# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2))
+def produit_cartesien_jobs(val_dict):
+    job_list = [DD()]
+    all_keys = val_dict.keys()
+
+    for key in all_keys:
+        possible_values = val_dict[key]
+        new_job_list = []
+        for val in possible_values:
+            for job in job_list:
+                to_insert = job.copy()
+                to_insert.update({key: val})
+                new_job_list.append(to_insert)
+        job_list = new_job_list
+
+    return job_list
+
+def test_produit_cartesien_jobs():
+    vals = {'a': [1,2], 'b': [3,4,5]}
+    print produit_cartesien_jobs(vals)
+
+
+# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
+"""Simple module for getting amount of memory used by a specified user's
+processes on a UNIX system.
+It uses UNIX ps utility to get the memory usage for a specified username and
+pipe it to awk for summing up per application memory usage and return the total.
+Python's Popen() from subprocess module is used for spawning ps and awk.
+
+"""
+
+import subprocess
+
+class MemoryMonitor(object):
+
+    def __init__(self, username):
+        """Create new MemoryMonitor instance."""
+        self.username = username
+
+    def usage(self):
+        """Return int containing memory used by user's processes."""
+        self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
+                                        shell=True,
+                                        stdout=subprocess.PIPE,
+                                        )
+        self.stdout_list = self.process.communicate()[0].split('\n')
+        return int(self.stdout_list[0])
+
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v_sylvain/__init__.py
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v_sylvain/nist_sda.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_sylvain/nist_sda.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,305 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import ift6266
+import pylearn
+
+import numpy 
+import theano
+import time
+
+import pylearn.version
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+import copy
+import sys
+import os
+import os.path
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from ift6266 import datasets
+
+from utils import produit_cartesien_jobs
+
+from sgd_optimization import SdaSgdOptimizer
+
+#from ift6266.utils.scalar_series import *
+from ift6266.utils.seriestables import *
+import tables
+
+##############################################################################
+# GLOBALS
+
+TEST_CONFIG = False
+
+#NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/sylvainpl_sda_vsylvain'
+EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v_sylvain.nist_sda.jobman_entrypoint"
+
+REDUCE_TRAIN_TO = None
+MAX_FINETUNING_EPOCHS = 1000
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 100
+
+if TEST_CONFIG:
+    REDUCE_TRAIN_TO = 1000
+    MAX_FINETUNING_EPOCHS = 2
+    REDUCE_EVERY = 10
+    MINIBATCH_SIZE=20
+
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
+JOB_VALS = {'pretraining_lr': [0.1],#, 0.01],#, 0.001],#, 0.0001],
+        'pretraining_epochs_per_layer': [10],
+        'hidden_layers_sizes': [500],
+        'corruption_levels': [0.1],
+        'minibatch_size': [20],
+        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
+        'finetuning_lr':[0.1], #0.001 was very bad, so we leave it out
+        'num_hidden_layers':[1,1]}
+
+# Just useful for tests... minimal number of epochs
+DEFAULT_HP_NIST = DD({'finetuning_lr':0.1,
+                       'pretraining_lr':0.1,
+                       'pretraining_epochs_per_layer':2,
+                       'max_finetuning_epochs':2,
+                       'hidden_layers_sizes':500,
+                       'corruption_levels':0.2,
+                       'minibatch_size':20,
+                       'reduce_train_to':10000,
+                       'num_hidden_layers':1})
+
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs: see EXPERIMENT_PATH
+'''
+def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    # TODO: remove this, bad for number of simultaneous requests on DB
+    channel.save()
+
+    workingdir = os.getcwd()
+
+      ###########   Il faudrait arranger ici pour train plus petit 
+
+##    print "Will load NIST"
+##
+##    nist = NIST(minibatch_size=20)
+##
+##    print "NIST loaded"
+##
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = int(state['reduce_train_to']/state['minibatch_size'])
+    elif REDUCE_TRAIN_TO:
+        rtt = int(REDUCE_TRAIN_TO/MINIBATCH_SIZE)
+
+    if rtt:
+        print "Reducing training set to "+str(rtt*state['minibatch_size'])+ " examples"
+    else:
+        rtt=float('inf')    #No reduction
+##        nist.reduce_train_set(rtt)
+##
+##    train,valid,test = nist.get_tvt()
+##    dataset = (train,valid,test)
+
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+    
+    series = create_series(state.num_hidden_layers)
+
+    print "Creating optimizer with state, ", state
+
+    optimizer = SdaSgdOptimizer(dataset=datasets.nist_all, hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    series=series)
+
+    optimizer.pretrain(datasets.nist_all,rtt)
+    channel.save()
+
+    optimizer.finetune(datasets.nist_all,rtt)
+    channel.save()
+
+    return channel.COMPLETE
+
+# These Series objects are used to save various statistics
+# during the training.
+def create_series(num_hidden_layers):
+
+    # Replace series we don't want to save with DummySeries, e.g.
+    # series['training_error'] = DummySeries()
+
+    series = {}
+
+    basedir = os.getcwd()
+
+    h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w")
+
+    # reconstruction
+    reconstruction_base = \
+                ErrorSeries(error_name="reconstruction_error",
+                    table_name="reconstruction_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['reconstruction_error'] = \
+                AccumulatorSeriesWrapper(base_series=reconstruction_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # train
+    training_base = \
+                ErrorSeries(error_name="training_error",
+                    table_name="training_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'),
+                    title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
+    series['training_error'] = \
+                AccumulatorSeriesWrapper(base_series=training_base,
+                    reduce_every=REDUCE_EVERY)
+
+    # valid and test are not accumulated/mean, saved directly
+    series['validation_error'] = \
+                ErrorSeries(error_name="validation_error",
+                    table_name="validation_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    series['test_error'] = \
+                ErrorSeries(error_name="test_error",
+                    table_name="test_error",
+                    hdf5_file=h5f,
+                    index_names=('epoch','minibatch'))
+
+    param_names = []
+    for i in range(num_hidden_layers):
+        param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
+    param_names += ['logreg_layer_W', 'logreg_layer_b']
+
+    # comment out series we don't want to save
+    series['params'] = SharedParamsStatisticsWrapper(
+                        new_group_name="params",
+                        base_group="/",
+                        arrays_names=param_names,
+                        hdf5_file=h5f,
+                        index_names=('epoch',))
+
+    return series
+
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
+def jobman_insert_nist():
+    jobs = produit_cartesien_jobs(JOB_VALS)
+
+    db = jobman.sql.db(JOBDB)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+        jobman.sql.insert_dict(job, db)
+
+    print "inserted"
+
+class NIST:
+    def __init__(self, minibatch_size, basepath=None, reduce_train_to=None):
+        global NIST_ALL_LOCATION
+
+        self.minibatch_size = minibatch_size
+        self.basepath = basepath and basepath or NIST_ALL_LOCATION
+
+        self.set_filenames()
+
+        # arrays of 2 elements: .x, .y
+        self.train = [None, None]
+        self.test = [None, None]
+
+        self.load_train_test()
+
+        self.valid = [[], []]
+        self.split_train_valid()
+        if reduce_train_to:
+            self.reduce_train_set(reduce_train_to)
+
+    def get_tvt(self):
+        return self.train, self.valid, self.test
+
+    def set_filenames(self):
+        self.train_files = ['all_train_data.ft',
+                                'all_train_labels.ft']
+
+        self.test_files = ['all_test_data.ft',
+                            'all_test_labels.ft']
+
+    def load_train_test(self):
+        self.load_data_labels(self.train_files, self.train)
+        self.load_data_labels(self.test_files, self.test)
+
+    def load_data_labels(self, filenames, pair):
+        for i, fn in enumerate(filenames):
+            f = open(os.path.join(self.basepath, fn))
+            pair[i] = filetensor.read(f)
+            f.close()
+
+    def reduce_train_set(self, max):
+        self.train[0] = self.train[0][:max]
+        self.train[1] = self.train[1][:max]
+
+        if max < len(self.test[0]):
+            for ar in (self.test, self.valid):
+                ar[0] = ar[0][:max]
+                ar[1] = ar[1][:max]
+
+    def split_train_valid(self):
+        test_len = len(self.test[0])
+        
+        new_train_x = self.train[0][:-test_len]
+        new_train_y = self.train[1][:-test_len]
+
+        self.valid[0] = self.train[0][-test_len:]
+        self.valid[1] = self.train[1][-test_len:]
+
+        self.train[0] = new_train_x
+        self.train[1] = new_train_y
+
+def test_load_nist():
+    print "Will load NIST"
+
+    import time
+    t1 = time.time()
+    nist = NIST(20)
+    t2 = time.time()
+
+    print "NIST loaded. time delta = ", t2-t1
+
+    tr,v,te = nist.get_tvt()
+
+    print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
+
+    raw_input("Press any key")
+
+if __name__ == '__main__':
+
+    import sys
+
+    args = sys.argv[1:]
+
+    if len(args) > 0 and args[0] == 'load_nist':
+        test_load_nist()
+
+    elif len(args) > 0 and args[0] == 'jobman_insert':
+        jobman_insert_nist()
+
+    elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
+        jobman_entrypoint(DEFAULT_HP_NIST, chanmock)
+
+    else:
+        print "Bad arguments"
+
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v_sylvain/sgd_optimization.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,274 @@
+#!/usr/bin/python
+# coding: utf-8
+
+# Generic SdA optimization loop, adapted from the deeplearning.net tutorial
+
+import numpy 
+import theano
+import time
+import datetime
+import theano.tensor as T
+import sys
+
+from jobman import DD
+import jobman, jobman.sql
+
+from stacked_dae import SdA
+
+from ift6266.utils.seriestables import *
+
+##def shared_dataset(data_xy):
+##    data_x, data_y = data_xy
+##    if theano.config.device.startswith("gpu"):
+##        print "TRANSFERING DATASETS (via shared()) TO GPU"
+##        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+##        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+##        shared_y = T.cast(shared_y, 'int32')
+##    else:
+##        print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES"
+##        shared_x = theano.shared(data_x)
+##        shared_y = theano.shared(data_y)
+##    return shared_x, shared_y
+
+    ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin
+def shared_dataset(batch_size, n_in):
+    
+    shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX))
+    shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX))
+    return shared_x, shared_y
+
+default_series = { \
+        'reconstruction_error' : DummySeries(),
+        'training_error' : DummySeries(),
+        'validation_error' : DummySeries(),
+        'test_error' : DummySeries(),
+        'params' : DummySeries()
+        }
+
+class SdaSgdOptimizer:
+    def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series):
+        self.dataset = dataset
+        self.hp = hyperparameters
+        self.n_ins = n_ins
+        self.n_outs = n_outs
+        self.input_divider = input_divider
+   
+        self.series = series
+
+        self.rng = numpy.random.RandomState(1234)
+
+        self.init_datasets()
+        self.init_classifier()
+
+        sys.stdout.flush()
+     
+    def init_datasets(self):
+        print "init_datasets"
+        sys.stdout.flush()
+
+        #train_set, valid_set, test_set = self.dataset
+        self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
+        self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
+        self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
+
+        # compute number of minibatches for training, validation and testing
+        self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
+        self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
+        # remove last batch in case it's incomplete
+        self.n_test_batches  = (self.test_set_x.value.shape[0]  / self.hp.minibatch_size) - 1
+
+    def init_classifier(self):
+        print "Constructing classifier"
+
+        # we don't want to save arrays in DD objects, so
+        # we recreate those arrays here
+        nhl = self.hp.num_hidden_layers
+        layers_sizes = [self.hp.hidden_layers_sizes] * nhl
+        corruption_levels = [self.hp.corruption_levels] * nhl
+
+        # construct the stacked denoising autoencoder class
+        self.classifier = SdA( \
+                          train_set_x= self.train_set_x, \
+                          train_set_y = self.train_set_y,\
+                          batch_size = self.hp.minibatch_size, \
+                          n_ins= self.n_ins, \
+                          hidden_layers_sizes = layers_sizes, \
+                          n_outs = self.n_outs, \
+                          corruption_levels = corruption_levels,\
+                          rng = self.rng,\
+                          pretrain_lr = self.hp.pretraining_lr, \
+                          finetune_lr = self.hp.finetuning_lr,\
+                          input_divider = self.input_divider )
+
+        #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
+
+        sys.stdout.flush()
+
+    def train(self):
+        self.pretrain(self.dataset)
+        self.finetune(self.dataset)
+
+    def pretrain(self,dataset,reduce):
+        print "STARTING PRETRAINING, time = ", datetime.datetime.now()
+        sys.stdout.flush()
+
+        start_time = time.clock()  
+        ## Pre-train layer-wise 
+        for i in xrange(self.classifier.n_layers):
+            # go through pretraining epochs 
+            for epoch in xrange(self.hp.pretraining_epochs_per_layer):
+                # go through the training set
+                batch_index=int(0)
+                for x,y in dataset.train(self.hp.minibatch_size):
+                    batch_index+=1
+                    if batch_index > reduce: #If maximum number of mini-batch is used
+                        break
+                    c = self.classifier.pretrain_functions[i](x)
+
+                    
+                    self.series["reconstruction_error"].append((epoch, batch_index), c)
+                        
+                print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+                sys.stdout.flush()
+
+                self.series['params'].append((epoch,), self.classifier.all_params)
+     
+        end_time = time.clock()
+
+        print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+        self.hp.update({'pretraining_time': end_time-start_time})
+
+        sys.stdout.flush()
+
+    def finetune(self,dataset,reduce):
+        print "STARTING FINETUNING, time = ", datetime.datetime.now()
+
+        #index   = T.lscalar()    # index to a [mini]batch 
+        minibatch_size = self.hp.minibatch_size
+        ensemble_x = T.matrix('ensemble_x')
+        ensemble_y = T.ivector('ensemble_y')
+
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX))
+        test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors,
+                 givens = {
+                   #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
+                   #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+                   self.classifier.x: ensemble_x,
+                   self.classifier.y: ensemble_y})
+
+        validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors,
+                givens = {
+                   #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
+                   #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+                   self.classifier.x: ensemble_x,
+                   self.classifier.y: ensemble_y})
+
+
+        # early-stopping parameters
+        patience              = 10000 # look as this many examples regardless
+        patience_increase     = 2.    # wait this much longer when a new best is 
+                                      # found
+        improvement_threshold = 0.995 # a relative improvement of this much is 
+                                      # considered significant
+        validation_frequency  = min(self.n_train_batches, patience/2)
+                                      # go through this many 
+                                      # minibatche before checking the network 
+                                      # on the validation set; in this case we 
+                                      # check every epoch 
+
+        best_params          = None
+        best_validation_loss = float('inf')
+        test_score           = 0.
+        start_time = time.clock()
+
+        done_looping = False
+        epoch = 0
+
+        while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
+            epoch = epoch + 1
+            minibatch_index=int(0)
+            for x,y in dataset.train(minibatch_size):
+                minibatch_index +=1
+                
+                if minibatch_index > reduce:   #If maximum number of mini-batchs is used 
+                    break
+                
+                cost_ij = self.classifier.finetune(x,y)
+                iter    = epoch * self.n_train_batches + minibatch_index
+
+                self.series["training_error"].append((epoch, minibatch_index), cost_ij)
+
+                if (iter+1) % validation_frequency == 0: 
+                    
+                    #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)]
+                    test_index=int(0)
+                    validation_losses=[]    
+                    for x,y in dataset.valid(minibatch_size):
+                        test_index+=1
+                        if test_index > reduce:
+                            break
+                        validation_losses.append(validate_model(x,y))
+                    this_validation_loss = numpy.mean(validation_losses)
+
+                    self.series["validation_error"].\
+                        append((epoch, minibatch_index), this_validation_loss*100.)
+
+                    print('epoch %i, minibatch %i, validation error %f %%' % \
+                           (epoch, minibatch_index, \
+                            this_validation_loss*100.))
+
+
+                    # if we got the best validation score until now
+                    if this_validation_loss < best_validation_loss:
+
+                        #improve patience if loss improvement is good enough
+                        if this_validation_loss < best_validation_loss *  \
+                               improvement_threshold :
+                            patience = max(patience, iter * patience_increase)
+
+                        # save best validation score and iteration number
+                        best_validation_loss = this_validation_loss
+                        best_iter = iter
+
+                        # test it on the test set
+                        #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)]
+                        test_losses=[]
+                        i=0
+                        for x,y in dataset.test(minibatch_size):
+                            i+=1
+                            if i > reduce:
+                                break
+                            test_losses.append(test_model(x,y))
+                        test_score = numpy.mean(test_losses)
+
+                        self.series["test_error"].\
+                            append((epoch, minibatch_index), test_score*100.)
+
+                        print(('     epoch %i, minibatch %i, test error of best '
+                              'model %f %%') % 
+                                     (epoch, minibatch_index,
+                                      test_score*100.))
+
+                    sys.stdout.flush()
+
+            self.series['params'].append((epoch,), self.classifier.all_params)
+
+            if patience <= iter :
+                done_looping = True
+                break
+
+        end_time = time.clock()
+        self.hp.update({'finetuning_time':end_time-start_time,\
+                    'best_validation_error':best_validation_loss,\
+                    'test_score':test_score,
+                    'num_finetuning_epochs':epoch})
+
+        print(('Optimization complete with best validation score of %f %%,'
+               'with test performance %f %%') %  
+                     (best_validation_loss * 100., test_score*100.))
+        print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+
+
+
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v_sylvain/stacked_dae.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_sylvain/stacked_dae.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,295 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy 
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+from utils import update_locals
+
+# taken from LeDeepNet/daa.py
+# has a special case when taking log(0) (defined =0)
+# modified to not take the mean anymore
+from theano.tensor.xlogx import xlogx, xlogy0
+# it's target*log(output)
+def binary_cross_entropy(target, output, sum_axis=1):
+    XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
+    return -T.sum(XE, axis=sum_axis)
+
+class LogisticRegression(object):
+    def __init__(self, input, n_in, n_out):
+        # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 
+        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+                                            dtype = theano.config.floatX) )
+        # initialize the baises b as a vector of n_out 0s
+        self.b = theano.shared( value=numpy.zeros((n_out,), 
+                                            dtype = theano.config.floatX) )
+        # compute vector of class-membership probabilities in symbolic form
+        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+        
+        # compute prediction as class whose probability is maximal in 
+        # symbolic form
+        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+        # list of parameters for this layer
+        self.params = [self.W, self.b]
+
+    def negative_log_likelihood(self, y):
+       return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+    def errors(self, y):
+        # check if y has same dimension of y_pred 
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred', 
+                ('y', target.type, 'y_pred', self.y_pred.type))
+
+        # check if y is of the correct datatype        
+        if y.dtype.startswith('int'):
+            # the T.neq operator returns a vector of 0s and 1s, where 1
+            # represents a mistake in prediction
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+class SigmoidalLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+        self.params = [self.W, self.b]
+
+
+
+class dA(object):
+  def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
+               input = None, shared_W = None, shared_b = None):
+    self.n_visible = n_visible
+    self.n_hidden  = n_hidden
+    
+    # create a Theano random generator that gives symbolic random values
+    theano_rng = RandomStreams()
+    
+    if shared_W != None and shared_b != None : 
+        self.W = shared_W
+        self.b = shared_b
+    else:
+        # initial values for weights and biases
+        # note : W' was written as `W_prime` and b' as `b_prime`
+
+        # W is initialized with `initial_W` which is uniformely sampled
+        # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
+        # the output of uniform if converted using asarray to dtype 
+        # theano.config.floatX so that the code is runable on GPU
+        initial_W = numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(n_hidden+n_visible)), \
+              high = numpy.sqrt(6./(n_hidden+n_visible)), \
+              size = (n_visible, n_hidden)), dtype = theano.config.floatX)
+        initial_b       = numpy.zeros(n_hidden, dtype = theano.config.floatX)
+    
+    
+        # theano shared variables for weights and biases
+        self.W       = theano.shared(value = initial_W,       name = "W")
+        self.b       = theano.shared(value = initial_b,       name = "b")
+    
+ 
+    initial_b_prime= numpy.zeros(n_visible)
+    # tied weights, therefore W_prime is W transpose
+    self.W_prime = self.W.T 
+    self.b_prime = theano.shared(value = initial_b_prime, name = "b'")
+
+    # if no input is given, generate a variable representing the input
+    if input == None : 
+        # we use a matrix because we expect a minibatch of several examples,
+        # each example being a row
+        self.x = T.dmatrix(name = 'input') 
+    else:
+        self.x = input
+    # Equation (1)
+    # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
+    # note : first argument of theano.rng.binomial is the shape(size) of 
+    #        random numbers that it should produce
+    #        second argument is the number of trials 
+    #        third argument is the probability of success of any trial
+    #
+    #        this will produce an array of 0s and 1s where 1 has a 
+    #        probability of 1 - ``corruption_level`` and 0 with
+    #        ``corruption_level``
+    self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level, dtype=theano.config.floatX) * self.x
+    # Equation (2)
+    # note  : y is stored as an attribute of the class so that it can be 
+    #         used later when stacking dAs. 
+    self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
+    # Equation (3)
+    #self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
+    # Equation (4)
+    # note : we sum over the size of a datapoint; if we are using minibatches,
+    #        L will  be a vector, with one entry per example in minibatch
+    #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+    #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+
+    # bypassing z to avoid running to log(0)
+    z_a = T.dot(self.y, self.W_prime) + self.b_prime
+    log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a))
+    # log(1-sigmoid(z_a))
+    log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a))
+    self.L = -T.sum( self.x * (log_sigmoid) \
+                    + (1.0-self.x) * (log_1_sigmoid), axis=1 )
+
+    # I added this epsilon to avoid getting log(0) and 1/0 in grad
+    # This means conceptually that there'd be no probability of 0, but that
+    # doesn't seem to me as important (maybe I'm wrong?).
+    #eps = 0.00000001
+    #eps_1 = 1-eps
+    #self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
+    #                + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
+    # note : L is now a vector, where each element is the cross-entropy cost 
+    #        of the reconstruction of the corresponding example of the 
+    #        minibatch. We need to compute the average of all these to get 
+    #        the cost of the minibatch
+    self.cost = T.mean(self.L)
+
+    self.params = [ self.W, self.b, self.b_prime ]
+
+
+class SdA(object):
+    def __init__(self, train_set_x, train_set_y, batch_size, n_ins, 
+                 hidden_layers_sizes, n_outs, 
+                 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
+        # Just to make sure those are not modified somewhere else afterwards
+        hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes)
+        corruption_levels = copy.deepcopy(corruption_levels)
+
+        update_locals(self, locals())      
+ 
+        self.layers             = []
+        self.pretrain_functions = []
+        self.params             = []
+        # MODIF: added this so we also get the b_primes
+        # (not used for finetuning... still using ".params")
+        self.all_params         = []
+        self.n_layers           = len(hidden_layers_sizes)
+
+        print "Creating SdA with params:"
+        print "batch_size", batch_size
+        print "hidden_layers_sizes", hidden_layers_sizes
+        print "corruption_levels", corruption_levels
+        print "n_ins", n_ins
+        print "n_outs", n_outs
+        print "pretrain_lr", pretrain_lr
+        print "finetune_lr", finetune_lr
+        print "input_divider", input_divider
+        print "----"
+
+        #self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX))
+
+        if len(hidden_layers_sizes) < 1 :
+            raiseException (' You must have at least one hidden layer ')
+
+
+        # allocate symbolic variables for the data
+        ##index   = T.lscalar()    # index to a [mini]batch 
+        self.x  = T.matrix('x')  # the data is presented as rasterized images
+        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
+                                 # [int] labels
+        ensemble = T.matrix('ensemble')
+        ensemble_x = T.matrix('ensemble_x')
+        ensemble_y = T.ivector('ensemble_y')
+
+        for i in xrange( self.n_layers ):
+            # construct the sigmoidal layer
+
+            # the size of the input is either the number of hidden units of 
+            # the layer below or the input size if we are on the first layer
+            if i == 0 :
+                input_size = n_ins
+            else:
+                input_size = hidden_layers_sizes[i-1]
+
+            # the input to this layer is either the activation of the hidden
+            # layer below or the input of the SdA if you are on the first
+            # layer
+            if i == 0 : 
+                layer_input = self.x
+            else:
+                layer_input = self.layers[-1].output
+
+            layer = SigmoidalLayer(rng, layer_input, input_size, 
+                                   hidden_layers_sizes[i] )
+            # add the layer to the 
+            self.layers += [layer]
+            self.params += layer.params
+        
+            # Construct a denoising autoencoder that shared weights with this
+            # layer
+            dA_layer = dA(input_size, hidden_layers_sizes[i], \
+                          corruption_level = corruption_levels[0],\
+                          input = layer_input, \
+                          shared_W = layer.W, shared_b = layer.b)
+
+            self.all_params += dA_layer.params
+        
+            # Construct a function that trains this dA
+            # compute gradients of layer parameters
+            gparams = T.grad(dA_layer.cost, dA_layer.params)
+            # compute the list of updates
+            updates = {}
+            for param, gparam in zip(dA_layer.params, gparams):
+                updates[param] = param - gparam * pretrain_lr
+            
+            # create a function that trains the dA
+            update_fn = theano.function([ensemble], dA_layer.cost, \
+                  updates = updates,
+                  givens = { 
+                     self.x : ensemble})
+            # collect this function into a list
+            self.pretrain_functions += [update_fn]
+
+        
+        # We now need to add a logistic layer on top of the MLP
+        self.logLayer = LogisticRegression(\
+                         input = self.layers[-1].output,\
+                         n_in = hidden_layers_sizes[-1], n_out = n_outs)
+
+        self.params += self.logLayer.params
+        self.all_params += self.logLayer.params
+        # construct a function that implements one step of finetunining
+
+        # compute the cost, defined as the negative log likelihood 
+        cost = self.logLayer.negative_log_likelihood(self.y)
+        # compute the gradients with respect to the model parameters
+        gparams = T.grad(cost, self.params)
+        # compute list of updates
+        updates = {}
+        for param,gparam in zip(self.params, gparams):
+            updates[param] = param - gparam*finetune_lr
+            
+        self.finetune = theano.function([ensemble_x,ensemble_y], cost, 
+                updates = updates,
+                givens = {
+                  #self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider,
+                  #self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
+                  self.x : ensemble_x,
+                  self.y : ensemble_y} )
+
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+
+        self.errors = self.logLayer.errors(self.y)
+
+if __name__ == '__main__':
+    import sys
+    args = sys.argv[1:]
+
diff -r 0de14b2034c6 -r 2024368a8d3d deep/stacked_dae/v_sylvain/utils.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_sylvain/utils.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+# coding: utf-8
+
+from __future__ import with_statement
+
+from jobman import DD
+
+# from pylearn codebase
+# useful in __init__(param1, param2, etc.) to save
+# values in self.param1, self.param2... just call
+# update_locals(self, locals())
+def update_locals(obj, dct):
+    if 'self' in dct:
+        del dct['self']
+    obj.__dict__.update(dct)
+
+# from a dictionary of possible values for hyperparameters, e.g.
+# hp_values = {'learning_rate':[0.1, 0.01], 'num_layers': [1,2]}
+# create a list of other dictionaries representing all the possible
+# combinations, thus in this example creating:
+# [{'learning_rate': 0.1, 'num_layers': 1}, ...]
+# (similarly for combinations (0.1, 2), (0.01, 1), (0.01, 2))
+def produit_cartesien_jobs(val_dict):
+    job_list = [DD()]
+    all_keys = val_dict.keys()
+
+    for key in all_keys:
+        possible_values = val_dict[key]
+        new_job_list = []
+        for val in possible_values:
+            for job in job_list:
+                to_insert = job.copy()
+                to_insert.update({key: val})
+                new_job_list.append(to_insert)
+        job_list = new_job_list
+
+    return job_list
+
+def test_produit_cartesien_jobs():
+    vals = {'a': [1,2], 'b': [3,4,5]}
+    print produit_cartesien_jobs(vals)
+
+
+# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
+"""Simple module for getting amount of memory used by a specified user's
+processes on a UNIX system.
+It uses UNIX ps utility to get the memory usage for a specified username and
+pipe it to awk for summing up per application memory usage and return the total.
+Python's Popen() from subprocess module is used for spawning ps and awk.
+
+"""
+
+import subprocess
+
+class MemoryMonitor(object):
+
+    def __init__(self, username):
+        """Create new MemoryMonitor instance."""
+        self.username = username
+
+    def usage(self):
+        """Return int containing memory used by user's processes."""
+        self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
+                                        shell=True,
+                                        stdout=subprocess.PIPE,
+                                        )
+        self.stdout_list = self.process.communicate()[0].split('\n')
+        return int(self.stdout_list[0])
+
diff -r 0de14b2034c6 -r 2024368a8d3d scripts/launch_generate100.py
--- a/scripts/launch_generate100.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/scripts/launch_generate100.py	Tue Mar 16 12:14:10 2010 -0400
@@ -3,10 +3,12 @@
 import os
 dir1 = "/data/lisa/data/ift6266h10/"
 
+mach = "brams0c.iro.umontreal.ca,brams02.iro.umontreal.ca,brams03.iro.umontreal.ca,maggie22.iro.umontreal.ca"
+
 for i,s in enumerate(['valid','test']):
     for j,c in enumerate([0.3,0.5,0.7,1]):
         l = str(c).replace('.','')
-        os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j))
+        os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (mach, dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j))
 
 for i in range(100):
-    os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i))
+    os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i))
diff -r 0de14b2034c6 -r 2024368a8d3d test.py
--- a/test.py	Tue Mar 16 12:13:49 2010 -0400
+++ b/test.py	Tue Mar 16 12:14:10 2010 -0400
@@ -1,8 +1,7 @@
 import doctest, sys, pkgutil
 
-def runTests(options = doctest.ELLIPSIS or doctest.DONT_ACCEPT_TRUE_FOR_1):
+def runTests():
     import ift6266
-    predefs = ift6266.__dict__
     for (_, name, ispkg) in pkgutil.walk_packages(ift6266.__path__, ift6266.__name__+'.'):
         if not ispkg:
             if name.startswith('ift6266.scripts.') or \
@@ -11,9 +10,21 @@
                         'ift6266.data_generation.transformations.testmod',
                         'ift6266.data_generation.transformations.gimp_script']:
                 continue
-            print "Testing:", name
-            __import__(name)
-            doctest.testmod(sys.modules[name], extraglobs=predefs, optionflags=options)
+            test(name)
+
+def test(name):
+    import ift6266
+    predefs = ift6266.__dict__
+    options = doctest.ELLIPSIS or doctest.DONT_ACCEPT_TRUE_FOR_1
+    print "Testing:", name
+    __import__(name)
+    doctest.testmod(sys.modules[name], extraglobs=predefs, optionflags=options)
 
 if __name__ == '__main__':
-    runTests()
+    if len(sys.argv) > 1:
+        for mod in sys.argv[1:]:
+            if mod.endswith('.py'):
+                mod = mod[:-3]
+            test(mod)
+    else:
+        runTests()
diff -r 0de14b2034c6 -r 2024368a8d3d utils/seriestables/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/seriestables/__init__.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,2 @@
+from series import ErrorSeries, BasicStatisticsSeries, AccumulatorSeriesWrapper, SeriesArrayWrapper, SharedParamsStatisticsWrapper, DummySeries
+
diff -r 0de14b2034c6 -r 2024368a8d3d utils/seriestables/series.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/seriestables/series.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,605 @@
+import tables
+
+import numpy
+import time
+
+##############################################################################
+# Utility functions to create IsDescription objects (pytables data types)
+
+'''
+The way these "IsDescription constructor" work is simple: write the
+code as if it were in a file, then exec()ute it, leaving us with
+a local-scoped LocalDescription which may be used to call createTable.
+
+It's a small hack, but it's necessary as the names of the columns
+are retrieved based on the variable name, which we can't programmatically set
+otherwise.
+'''
+
+def _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock, pos=0):
+    toexec = ""
+
+    if store_timestamp:
+        toexec += "\ttimestamp = tables.Time32Col(pos="+str(pos)+")\n"
+        pos += 1
+
+    if store_cpuclock:
+        toexec += "\tcpuclock = tables.Float64Col(pos="+str(pos)+")\n"
+        pos += 1
+
+    return toexec, pos
+
+def _get_description_n_ints(int_names, int_width=64, pos=0):
+    """
+    Begins construction of a class inheriting from IsDescription
+    to construct an HDF5 table with index columns named with int_names.
+
+    See Series().__init__ to see how those are used.
+    """
+    int_constructor = "tables.Int64Col"
+    if int_width == 32:
+        int_constructor = "tables.Int32Col"
+    elif not int_width in (32, 64):
+        raise "int_width must be left unspecified, or should equal 32 or 64"
+
+    toexec = ""
+
+    for n in int_names:
+        toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n"
+        pos += 1
+
+    return toexec, pos
+
+def _get_description_with_n_ints_n_floats(int_names, float_names, 
+                        int_width=64, float_width=32,
+                        store_timestamp=True, store_cpuclock=True):
+    """
+    Constructs a class to be used when constructing a table with PyTables.
+
+    This is useful to construct a series with an index with multiple levels.
+    E.g. if you want to index your "validation error" with "epoch" first, then
+    "minibatch_index" second, you'd use two "int_names".
+
+    Parameters
+    ----------
+    int_names : tuple of str
+        Names of the int (e.g. index) columns
+    float_names : tuple of str
+        Names of the float (e.g. error) columns
+    int_width : {'32', '64'}
+        Type of ints.
+    float_width : {'32', '64'}
+        Type of floats.
+    store_timestamp : bool
+        See __init__ of Series
+    store_cpuclock : bool
+        See __init__ of Series
+
+    Returns
+    -------
+    A class object, to pass to createTable()
+    """
+
+    toexec = "class LocalDescription(tables.IsDescription):\n"
+
+    toexec_, pos = _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock)
+    toexec += toexec_
+
+    toexec_, pos = _get_description_n_ints(int_names, int_width=int_width, pos=pos)
+    toexec += toexec_
+
+    float_constructor = "tables.Float32Col"
+    if float_width == 64:
+        float_constructor = "tables.Float64Col"
+    elif not float_width in (32, 64):
+        raise "float_width must be left unspecified, or should equal 32 or 64"
+
+    for n in float_names:
+        toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n"
+        pos += 1
+
+    exec(toexec)
+
+    return LocalDescription
+
+##############################################################################
+# Series classes
+
+# Shortcut to allow passing a single int as index, instead of a tuple
+def _index_to_tuple(index):
+    if type(index) == tuple:
+        return index
+
+    if type(index) == list:
+        index = tuple(index)
+        return index
+
+    try:
+        if index % 1 > 0.001 and index % 1 < 0.999:
+            raise
+        idx = long(index)
+        return (idx,)
+    except:
+        raise TypeError("index must be a tuple of integers, or at least a single integer")
+
+class Series():
+    """
+    Base Series class, with minimal arguments and type checks. 
+
+    Yet cannot be used by itself (it's append() method raises an error)
+    """
+
+    def __init__(self, table_name, hdf5_file, index_names=('epoch',), 
+                    title="", hdf5_group='/', 
+                    store_timestamp=True, store_cpuclock=True):
+        """Basic arguments each Series must get.
+
+        Parameters
+        ----------
+        table_name : str
+            Name of the table to create under group "hd5_group" (other 
+            parameter). No spaces, ie. follow variable naming restrictions.
+        hdf5_file : open HDF5 file
+            File opened with openFile() in PyTables (ie. return value of 
+            openFile).
+        index_names : tuple of str
+            Columns to use as index for elements in the series, other 
+            example would be ('epoch', 'minibatch'). This would then allow
+            you to call append(index, element) with index made of two ints,
+            one for epoch index, one for minibatch index in epoch.
+        title : str
+            Title to attach to this table as metadata. Can contain spaces 
+            and be longer then the table_name.
+        hdf5_group : str
+            Path of the group (kind of a file) in the HDF5 file under which
+            to create the table.
+        store_timestamp : bool
+            Whether to create a column for timestamps and store them with 
+            each record.
+        store_cpuclock : bool
+            Whether to create a column for cpu clock and store it with 
+            each record.
+        """
+
+        #########################################
+        # checks
+
+        if type(table_name) != str:
+            raise TypeError("table_name must be a string")
+        if table_name == "":
+            raise ValueError("table_name must not be empty")
+
+        if not isinstance(hdf5_file, tables.file.File):
+            raise TypeError("hdf5_file must be an open HDF5 file (use tables.openFile)")
+        #if not ('w' in hdf5_file.mode or 'a' in hdf5_file.mode):
+        #    raise ValueError("hdf5_file must be opened in write or append mode")
+
+        if type(index_names) != tuple:
+            raise TypeError("index_names must be a tuple of strings." + \
+                    "If you have only one element in the tuple, don't forget " +\
+                    "to add a comma, e.g. ('epoch',).")
+        for name in index_names:
+            if type(name) != str:
+                raise TypeError("index_names must only contain strings, but also"+\
+                        "contains a "+str(type(name))+".")
+
+        if type(title) != str:
+            raise TypeError("title must be a string, even if empty")
+
+        if type(hdf5_group) != str:
+            raise TypeError("hdf5_group must be a string")
+
+        if type(store_timestamp) != bool:
+            raise TypeError("store_timestamp must be a bool")
+
+        if type(store_cpuclock) != bool:
+            raise TypeError("store_timestamp must be a bool")
+
+        #########################################
+
+        self.table_name = table_name
+        self.hdf5_file = hdf5_file
+        self.index_names = index_names
+        self.title = title
+        self.hdf5_group = hdf5_group
+
+        self.store_timestamp = store_timestamp
+        self.store_cpuclock = store_cpuclock
+
+    def append(self, index, element):
+        raise NotImplementedError
+
+    def _timestamp_cpuclock(self, newrow):
+        if self.store_timestamp:
+            newrow["timestamp"] = time.time()
+
+        if self.store_cpuclock:
+            newrow["cpuclock"] = time.clock()
+
+class DummySeries():
+    """
+    To put in a series dictionary instead of a real series, to do nothing
+    when we don't want a given series to be saved.
+
+    E.g. if we'd normally have a "training_error" series in a dictionary
+    of series, the training loop would have something like this somewhere:
+
+        series["training_error"].append((15,), 20.0)
+
+    but if we don't want to save the training errors this time, we simply
+    do
+
+        series["training_error"] = DummySeries()
+    """
+    def append(self, index, element):
+        pass
+
+class ErrorSeries(Series):
+    """
+    Most basic Series: saves a single float (called an Error as this is
+    the most common use case I foresee) along with an index (epoch, for
+    example) and timestamp/cpu.clock for each of these floats.
+    """
+
+    def __init__(self, error_name, table_name, 
+                    hdf5_file, index_names=('epoch',), 
+                    title="", hdf5_group='/', 
+                    store_timestamp=True, store_cpuclock=True):
+        """
+        For most parameters, see Series.__init__
+
+        Parameters
+        ----------
+        error_name : str
+            In the HDF5 table, column name for the error float itself.
+        """
+
+        # most type/value checks are performed in Series.__init__
+        Series.__init__(self, table_name, hdf5_file, index_names, title, 
+                            store_timestamp=store_timestamp,
+                            store_cpuclock=store_cpuclock)
+
+        if type(error_name) != str:
+            raise TypeError("error_name must be a string")
+        if error_name == "":
+            raise ValueError("error_name must not be empty")
+
+        self.error_name = error_name
+
+        self._create_table()
+
+    def _create_table(self):
+       table_description = _get_description_with_n_ints_n_floats( \
+                                  self.index_names, (self.error_name,),
+                                  store_timestamp=self.store_timestamp,
+                                  store_cpuclock=self.store_cpuclock)
+
+       self._table = self.hdf5_file.createTable(self.hdf5_group,
+                            self.table_name, 
+                            table_description,
+                            title=self.title)
+
+
+    def append(self, index, error):
+        """
+        Parameters
+        ----------
+        index : tuple of int
+            Following index_names passed to __init__, e.g. (12, 15) if 
+            index_names were ('epoch', 'minibatch_size').
+            A single int (not tuple) is acceptable if index_names has a single 
+            element.
+            An array will be casted to a tuple, as a convenience.
+
+        error : float
+            Next error in the series.
+        """
+        index = _index_to_tuple(index)
+
+        if len(index) != len(self.index_names):
+            raise ValueError("index provided does not have the right length (expected " \
+                            + str(len(self.index_names)) + " got " + str(len(index)))
+
+        # other checks are implicit when calling newrow[..] =,
+        # which should throw an error if not of the right type
+
+        newrow = self._table.row
+
+        # Columns for index in table are based on index_names
+        for col_name, value in zip(self.index_names, index):
+            newrow[col_name] = value
+        newrow[self.error_name] = error
+
+        # adds timestamp and cpuclock to newrow if necessary
+        self._timestamp_cpuclock(newrow)
+
+        newrow.append()
+
+        self.hdf5_file.flush()
+
+# Does not inherit from Series because it does not itself need to
+# access the hdf5_file and does not need a series_name (provided
+# by the base_series.)
+class AccumulatorSeriesWrapper():
+    '''
+    Wraps a Series by accumulating objects passed its Accumulator.append()
+    method and "reducing" (e.g. calling numpy.mean(list)) once in a while,
+    every "reduce_every" calls in fact.
+    '''
+
+    def __init__(self, base_series, reduce_every, reduce_function=numpy.mean):
+        """
+        Parameters
+        ----------
+        base_series : Series
+            This object must have an append(index, value) function.
+
+        reduce_every : int
+            Apply the reduction function (e.g. mean()) every time we get this 
+            number of elements. E.g. if this is 100, then every 100 numbers 
+            passed to append(), we'll take the mean and call append(this_mean) 
+            on the BaseSeries.
+
+        reduce_function : function
+            Must take as input an array of "elements", as passed to (this 
+            accumulator's) append(). Basic case would be to take an array of 
+            floats and sum them into one float, for example.
+        """
+        self.base_series = base_series
+        self.reduce_function = reduce_function
+        self.reduce_every = reduce_every
+
+        self._buffer = []
+
+    
+    def append(self, index, element):
+        """
+        Parameters
+        ----------
+        index : tuple of int
+            The index used is the one of the last element reduced. E.g. if
+            you accumulate over the first 1000 minibatches, the index
+            passed to the base_series.append() function will be 1000.
+            A single int (not tuple) is acceptable if index_names has a single 
+            element.
+            An array will be casted to a tuple, as a convenience.
+
+        element : float
+            Element that will be accumulated.
+        """
+        self._buffer.append(element)
+
+        if len(self._buffer) == self.reduce_every:
+            reduced = self.reduce_function(self._buffer)
+            self.base_series.append(index, reduced)
+            self._buffer = []
+
+        # The >= case should never happen, except if lists
+        # were appended by accessing _buffer externally (when it's
+        # intended to be private), which should be a red flag.
+        assert len(self._buffer) < self.reduce_every
+
+# Outside of class to fix an issue with exec in Python 2.6.
+# My sorries to the god of pretty code.
+def _BasicStatisticsSeries_construct_table_toexec(index_names, store_timestamp, store_cpuclock):
+    toexec = "class LocalDescription(tables.IsDescription):\n"
+
+    toexec_, pos = _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock)
+    toexec += toexec_
+
+    toexec_, pos = _get_description_n_ints(index_names, pos=pos)
+    toexec += toexec_
+
+    toexec += "\tmean = tables.Float32Col(pos=" + str(pos) + ")\n"
+    toexec += "\tmin = tables.Float32Col(pos=" + str(pos+1) + ")\n"
+    toexec += "\tmax = tables.Float32Col(pos=" + str(pos+2) + ")\n"
+    toexec += "\tstd = tables.Float32Col(pos=" + str(pos+3) + ")\n"
+   
+    # This creates "LocalDescription", which we may then use
+    exec(toexec)
+
+    return LocalDescription
+
+# Defaults functions for BasicStatsSeries. These can be replaced.
+_basic_stats_functions = {'mean': lambda(x): numpy.mean(x),
+                    'min': lambda(x): numpy.min(x),
+                    'max': lambda(x): numpy.max(x),
+                    'std': lambda(x): numpy.std(x)}
+
+class BasicStatisticsSeries(Series):
+    
+    def __init__(self, table_name, hdf5_file, 
+                    stats_functions=_basic_stats_functions, 
+                    index_names=('epoch',), title="", hdf5_group='/', 
+                    store_timestamp=True, store_cpuclock=True):
+        """
+        For most parameters, see Series.__init__
+
+        Parameters
+        ----------
+        series_name : str
+            Not optional here. Will be prepended with "Basic statistics for "
+
+        stats_functions : dict, optional
+            Dictionary with a function for each key "mean", "min", "max", 
+            "std". The function must take whatever is passed to append(...) 
+            and return a single number (float).
+        """
+
+        # Most type/value checks performed in Series.__init__
+        Series.__init__(self, table_name, hdf5_file, index_names, title, 
+                            store_timestamp=store_timestamp,
+                            store_cpuclock=store_cpuclock)
+
+        if type(hdf5_group) != str:
+            raise TypeError("hdf5_group must be a string")
+
+        if type(stats_functions) != dict:
+            # just a basic check. We'll suppose caller knows what he's doing.
+            raise TypeError("stats_functions must be a dict")
+
+        self.hdf5_group = hdf5_group
+
+        self.stats_functions = stats_functions
+
+        self._create_table()
+
+    def _create_table(self):
+        table_description = \
+                _BasicStatisticsSeries_construct_table_toexec( \
+                    self.index_names,
+                    self.store_timestamp, self.store_cpuclock)
+
+        self._table = self.hdf5_file.createTable(self.hdf5_group,
+                         self.table_name, table_description)
+
+    def append(self, index, array):
+        """
+        Parameters
+        ----------
+        index : tuple of int
+            Following index_names passed to __init__, e.g. (12, 15) 
+            if index_names were ('epoch', 'minibatch_size')
+            A single int (not tuple) is acceptable if index_names has a single 
+            element.
+            An array will be casted to a tuple, as a convenience.
+
+        array
+            Is of whatever type the stats_functions passed to
+            __init__ can take. Default is anything numpy.mean(),
+            min(), max(), std() can take. 
+        """
+        index = _index_to_tuple(index)
+
+        if len(index) != len(self.index_names):
+            raise ValueError("index provided does not have the right length (expected " \
+                            + str(len(self.index_names)) + " got " + str(len(index)))
+
+        newrow = self._table.row
+
+        for col_name, value in zip(self.index_names, index):
+            newrow[col_name] = value
+
+        newrow["mean"] = self.stats_functions['mean'](array)
+        newrow["min"] = self.stats_functions['min'](array)
+        newrow["max"] = self.stats_functions['max'](array)
+        newrow["std"] = self.stats_functions['std'](array)
+
+        self._timestamp_cpuclock(newrow)
+
+        newrow.append()
+
+        self.hdf5_file.flush()
+
+class SeriesArrayWrapper():
+    """
+    Simply redistributes any number of elements to sub-series to respective 
+    append()s.
+
+    To use if you have many elements to append in similar series, e.g. if you 
+    have an array containing [train_error, valid_error, test_error], and 3 
+    corresponding series, this allows you to simply pass this array of 3 
+    values to append() instead of passing each element to each individual 
+    series in turn.
+    """
+
+    def __init__(self, base_series_list):
+        """
+        Parameters
+        ----------
+        base_series_list : array or tuple of Series
+            You must have previously created and configured each of those
+            series, then put them in an array. This array must follow the
+            same order as the array passed as ``elements`` parameter of
+            append().
+        """
+        self.base_series_list = base_series_list
+
+    def append(self, index, elements):
+        """
+        Parameters
+        ----------
+        index : tuple of int
+            See for example ErrorSeries.append()
+
+        elements : array or tuple
+            Array or tuple of elements that will be passed down to
+            the base_series passed to __init__, in the same order.
+        """
+        if len(elements) != len(self.base_series_list):
+            raise ValueError("not enough or too much elements provided (expected " \
+                            + str(len(self.base_series_list)) + " got " + str(len(elements)))
+
+        for series, el in zip(self.base_series_list, elements):
+            series.append(index, el)
+
+class SharedParamsStatisticsWrapper(SeriesArrayWrapper):
+    '''
+    Save mean, min/max, std of shared parameters place in an array.
+
+    Here "shared" means "theano.shared", which means elements of the
+    array will have a .value to use for numpy.mean(), etc.
+
+    This inherits from SeriesArrayWrapper, which provides the append()
+    method.
+    '''
+
+    def __init__(self, arrays_names, new_group_name, hdf5_file,
+                    base_group='/', index_names=('epoch',), title="",
+                    store_timestamp=True, store_cpuclock=True):
+        """
+        For other parameters, see Series.__init__
+
+        Parameters
+        ----------
+        array_names : array or tuple of str
+            Name of each array, in order of the array passed to append(). E.g. 
+            ('layer1_b', 'layer1_W', 'layer2_b', 'layer2_W')
+
+        new_group_name : str
+            Name of a new HDF5 group which will be created under base_group to 
+            store the new series.
+
+        base_group : str
+            Path of the group under which to create the new group which will
+            store the series.
+
+        title : str
+            Here the title is attached to the new group, not a table.
+
+        store_timestamp : bool
+            Here timestamp and cpuclock are stored in *each* table
+
+        store_cpuclock : bool
+            Here timestamp and cpuclock are stored in *each* table
+        """
+
+        # most other checks done when calling BasicStatisticsSeries
+        if type(new_group_name) != str:
+            raise TypeError("new_group_name must be a string")
+        if new_group_name == "":
+            raise ValueError("new_group_name must not be empty")
+
+        base_series_list = []
+
+        new_group = hdf5_file.createGroup(base_group, new_group_name, title=title)
+
+        stats_functions = {'mean': lambda(x): numpy.mean(x.value),
+                    'min': lambda(x): numpy.min(x.value),
+                    'max': lambda(x): numpy.max(x.value),
+                    'std': lambda(x): numpy.std(x.value)}
+
+        for name in arrays_names:
+            base_series_list.append(
+                        BasicStatisticsSeries(
+                                table_name=name,
+                                hdf5_file=hdf5_file,
+                                index_names=index_names,
+                                stats_functions=stats_functions,
+                                hdf5_group=new_group._v_pathname,
+                                store_timestamp=store_timestamp,
+                                store_cpuclock=store_cpuclock))
+
+        SeriesArrayWrapper.__init__(self, base_series_list)
+
+
diff -r 0de14b2034c6 -r 2024368a8d3d utils/seriestables/test_series.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/seriestables/test_series.py	Tue Mar 16 12:14:10 2010 -0400
@@ -0,0 +1,311 @@
+import tempfile
+
+import numpy
+import numpy.random
+
+from jobman import DD
+
+import tables
+
+from series import *
+import series
+
+#################################################
+# Utils
+
+def compare_floats(f1,f2):
+    if f1-f2 < 1e-3:
+        return True
+    return False
+
+def compare_lists(it1, it2, floats=False):
+    if len(it1) != len(it2):
+        return False
+
+    for el1,  el2 in zip(it1, it2):
+        if floats:
+            if not compare_floats(el1,el2):
+                return False
+        elif el1 != el2:
+            return False
+
+    return True
+
+#################################################
+# Basic Series class tests
+
+def test_Series_types():
+    pass
+
+#################################################
+# ErrorSeries tests
+
+def test_ErrorSeries_common_case(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = tables.openFile(h5f_path, "w")
+
+    validation_error = series.ErrorSeries(error_name="validation_error", table_name="validation_error",
+                                hdf5_file=h5f, index_names=('epoch','minibatch'),
+                                title="Validation error indexed by epoch and minibatch")
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    validation_error.append((1,1), 32.0)
+    validation_error.append((1,2), 30.0)
+    validation_error.append((2,1), 28.0)
+    validation_error.append((2,2), 26.0)
+
+    h5f.close()
+
+    h5f = tables.openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'validation_error')
+
+    assert compare_lists(table.cols.epoch[:], [1,1,2,2])
+    assert compare_lists(table.cols.minibatch[:], [1,2,1,2])
+    assert compare_lists(table.cols.validation_error[:], [32.0, 30.0, 28.0, 26.0])
+
+def test_ErrorSeries_no_index(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = tables.openFile(h5f_path, "w")
+
+    validation_error = series.ErrorSeries(error_name="validation_error",
+                                table_name="validation_error",
+                                hdf5_file=h5f, 
+                                # empty tuple
+                                index_names=tuple(),
+                                title="Validation error with no index")
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    validation_error.append(tuple(), 32.0)
+    validation_error.append(tuple(), 30.0)
+    validation_error.append(tuple(), 28.0)
+    validation_error.append(tuple(), 26.0)
+
+    h5f.close()
+
+    h5f = tables.openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'validation_error')
+
+    assert compare_lists(table.cols.validation_error[:], [32.0, 30.0, 28.0, 26.0])
+    assert not ("epoch" in dir(table.cols))
+
+def test_ErrorSeries_notimestamp(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = tables.openFile(h5f_path, "w")
+
+    validation_error = series.ErrorSeries(error_name="validation_error", table_name="validation_error",
+                                hdf5_file=h5f, index_names=('epoch','minibatch'),
+                                title="Validation error indexed by epoch and minibatch", 
+                                store_timestamp=False)
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    validation_error.append((1,1), 32.0)
+
+    h5f.close()
+
+    h5f = tables.openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'validation_error')
+
+    assert compare_lists(table.cols.epoch[:], [1])
+    assert not ("timestamp" in dir(table.cols))
+    assert "cpuclock" in dir(table.cols)
+
+def test_ErrorSeries_nocpuclock(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = tables.openFile(h5f_path, "w")
+
+    validation_error = series.ErrorSeries(error_name="validation_error", table_name="validation_error",
+                                hdf5_file=h5f, index_names=('epoch','minibatch'),
+                                title="Validation error indexed by epoch and minibatch", 
+                                store_cpuclock=False)
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    validation_error.append((1,1), 32.0)
+
+    h5f.close()
+
+    h5f = tables.openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'validation_error')
+
+    assert compare_lists(table.cols.epoch[:], [1])
+    assert not ("cpuclock" in dir(table.cols))
+    assert "timestamp" in dir(table.cols)
+
+def test_AccumulatorSeriesWrapper_common_case(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = tables.openFile(h5f_path, "w")
+
+    validation_error = ErrorSeries(error_name="accumulated_validation_error",
+                                table_name="accumulated_validation_error",
+                                hdf5_file=h5f,
+                                index_names=('epoch','minibatch'),
+                                title="Validation error, summed every 3 minibatches, indexed by epoch and minibatch")
+
+    accumulator = AccumulatorSeriesWrapper(base_series=validation_error,
+                                    reduce_every=3, reduce_function=numpy.sum)
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    accumulator.append((1,1), 32.0)
+    accumulator.append((1,2), 30.0)
+    accumulator.append((2,1), 28.0)
+    accumulator.append((2,2), 26.0)
+    accumulator.append((3,1), 24.0)
+    accumulator.append((3,2), 22.0)
+
+    h5f.close()
+
+    h5f = tables.openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'accumulated_validation_error')
+
+    assert compare_lists(table.cols.epoch[:], [2,3])
+    assert compare_lists(table.cols.minibatch[:], [1,2])
+    assert compare_lists(table.cols.accumulated_validation_error[:], [90.0,72.0], floats=True)
+
+def test_BasicStatisticsSeries_common_case(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = tables.openFile(h5f_path, "w")
+
+    stats_series = BasicStatisticsSeries(table_name="b_vector_statistics",
+                                hdf5_file=h5f, index_names=('epoch','minibatch'),
+                                title="Basic statistics for b vector indexed by epoch and minibatch")
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    stats_series.append((1,1), [0.15, 0.20, 0.30])
+    stats_series.append((1,2), [-0.18, 0.30, 0.58])
+    stats_series.append((2,1), [0.18, -0.38, -0.68])
+    stats_series.append((2,2), [0.15, 0.02, 1.9])
+
+    h5f.close()
+
+    h5f = tables.openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'b_vector_statistics')
+
+    assert compare_lists(table.cols.epoch[:], [1,1,2,2])
+    assert compare_lists(table.cols.minibatch[:], [1,2,1,2])
+    assert compare_lists(table.cols.mean[:], [0.21666667,  0.23333333, -0.29333332,  0.69], floats=True)
+    assert compare_lists(table.cols.min[:], [0.15000001, -0.18000001, -0.68000001,  0.02], floats=True)
+    assert compare_lists(table.cols.max[:], [0.30, 0.58, 0.18, 1.9], floats=True)
+    assert compare_lists(table.cols.std[:], [0.06236095, 0.31382939,  0.35640177, 0.85724366], floats=True)
+
+def test_SharedParamsStatisticsWrapper_commoncase(h5f=None):
+    import numpy.random
+
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = tables.openFile(h5f_path, "w")
+
+    stats = SharedParamsStatisticsWrapper(new_group_name="params", base_group="/",
+                                arrays_names=('b1','b2','b3'), hdf5_file=h5f,
+                                index_names=('epoch','minibatch'))
+
+    b1 = DD({'value':numpy.random.rand(5)})
+    b2 = DD({'value':numpy.random.rand(5)})
+    b3 = DD({'value':numpy.random.rand(5)})
+    stats.append((1,1), [b1,b2,b3])
+
+    h5f.close()
+
+    h5f = tables.openFile(h5f_path, "r")
+
+    b1_table = h5f.getNode('/params', 'b1')
+    b3_table = h5f.getNode('/params', 'b3')
+
+    assert b1_table.cols.mean[0] - numpy.mean(b1.value) < 1e-3
+    assert b3_table.cols.mean[0] - numpy.mean(b3.value) < 1e-3
+    assert b1_table.cols.min[0] - numpy.min(b1.value) < 1e-3
+    assert b3_table.cols.min[0] - numpy.min(b3.value) < 1e-3
+
+def test_SharedParamsStatisticsWrapper_notimestamp(h5f=None):
+    import numpy.random
+
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = tables.openFile(h5f_path, "w")
+
+    stats = SharedParamsStatisticsWrapper(new_group_name="params", base_group="/",
+                                arrays_names=('b1','b2','b3'), hdf5_file=h5f,
+                                index_names=('epoch','minibatch'),
+                                store_timestamp=False)
+
+    b1 = DD({'value':numpy.random.rand(5)})
+    b2 = DD({'value':numpy.random.rand(5)})
+    b3 = DD({'value':numpy.random.rand(5)})
+    stats.append((1,1), [b1,b2,b3])
+
+    h5f.close()
+
+    h5f = tables.openFile(h5f_path, "r")
+
+    b1_table = h5f.getNode('/params', 'b1')
+    b3_table = h5f.getNode('/params', 'b3')
+
+    assert b1_table.cols.mean[0] - numpy.mean(b1.value) < 1e-3
+    assert b3_table.cols.mean[0] - numpy.mean(b3.value) < 1e-3
+    assert b1_table.cols.min[0] - numpy.min(b1.value) < 1e-3
+    assert b3_table.cols.min[0] - numpy.min(b3.value) < 1e-3
+
+    assert not ('timestamp' in dir(b1_table.cols))
+
+def test_get_desc():
+    h5f_path = tempfile.NamedTemporaryFile().name
+    h5f = tables.openFile(h5f_path, "w")
+
+    desc = series._get_description_with_n_ints_n_floats(("col1","col2"), ("col3","col4"))
+
+    mytable = h5f.createTable('/', 'mytable', desc)
+
+    # just make sure the columns are there... otherwise this will throw an exception
+    mytable.cols.col1
+    mytable.cols.col2
+    mytable.cols.col3
+    mytable.cols.col4
+
+    try:
+        # this should fail... LocalDescription must be local to get_desc_etc
+        test = LocalDescription
+        assert False
+    except:
+        assert True
+
+    assert True
+
+def test_index_to_tuple_floaterror():
+    try:
+        series._index_to_tuple(5.1)
+        assert False
+    except TypeError:
+        assert True
+
+def test_index_to_tuple_arrayok():
+    tpl = series._index_to_tuple([1,2,3])
+    assert type(tpl) == tuple and tpl[1] == 2 and tpl[2] == 3
+
+def test_index_to_tuple_intbecomestuple():
+    tpl = series._index_to_tuple(32)
+
+    assert type(tpl) == tuple and tpl == (32,)
+
+def test_index_to_tuple_longbecomestuple():
+    tpl = series._index_to_tuple(928374928374928L)
+
+    assert type(tpl) == tuple and tpl == (928374928374928L,)
+
+if __name__ == '__main__':
+    import tempfile
+    test_get_desc()
+    test_ErrorSeries_common_case()
+    test_BasicStatisticsSeries_common_case()
+    test_AccumulatorSeriesWrapper_common_case()
+    test_SharedParamsStatisticsWrapper_commoncase()
+