Mercurial > ift6266

--- a/baseline_algorithms/mlp/mlp_nist.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/baseline_algorithms/mlp/mlp_nist.py	Thu Feb 25 09:05:48 2010 -0500
@@ -30,6 +30,7 @@
 import theano.tensor as T
 import time
 import theano.tensor.nnet
+import pylearn
 from pylearn.io import filetensor as ft

 data_path = '/data/lisa/data/nist/by_class/'
@@ -45,7 +46,7 @@


-    def __init__(self, input, n_in, n_hidden, n_out):
+    def __init__(self, input, n_in, n_hidden, n_out,learning_rate):
         """Initialize the parameters for the multilayer perceptron

         :param input: symbolic variable that describes the input of the
@@ -94,8 +95,14 @@
         self.b2 = theano.shared( value = numpy.zeros((n_out,),
                                                 dtype= theano.config.floatX))

+        #include the learning rate in the classifer so
+        #we can modify it on the fly when we want
+        lr_value=learning_rate
+        self.lr=theano.shared(value=lr_value)
         # symbolic expression computing the values of the hidden layer
         self.hidden = T.tanh(T.dot(input, self.W1)+ self.b1)
+
+

         # symbolic expression computing the values of the top layer
         self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2)
@@ -103,6 +110,10 @@
         # compute prediction as class whose probability is maximal in
         # symbolic form
         self.y_pred = T.argmax( self.p_y_given_x, axis =1)
+        self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1)
+
+
+

         # L1 norm ; one regularization option is to enforce L1 norm to
         # be small
@@ -150,21 +161,9 @@
         else:
             raise NotImplementedError()

-#def jobman_mlp(state,channel):
-#    (validation_error,test_error,nb_exemples,time)=mlp_full_nist(state.learning_rate,\
- #                                                                state.n_iter,\
- #                                                                state.batch_size,\
- #                                                                state.nb_hidden_units)
- #   state.validation_error = validation_error
- #   state.test_error = test_error
- #   state.nb_exemples = nb_exemples
-  #  state.time=time
-   # return channel.COMPLETE
-
-
-

 def mlp_full_nist(      verbose = False,\
+                        adaptive_lr = 0,\
                         train_data = 'all/all_train_data.ft',\
                         train_labels = 'all/all_train_labels.ft',\
                         test_data = 'all/all_test_data.ft',\
@@ -178,6 +177,14 @@
                         nb_targets = 62):


+    configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
+
+    total_validation_error_list = []
+    total_train_error_list = []
+    learning_rate_list=[]
+    best_training_error=float('inf');
+
+

     f = open(data_path+train_data)
     g= open(data_path+train_labels)
@@ -235,11 +242,17 @@
     y = T.lvector()  # the labels are presented as 1D vector of
                           # [long int] labels

+    if verbose==True:
+        print 'finished parsing the data'
     # construct the logistic regression class
     classifier = MLP( input=x.reshape((batch_size,32*32)),\
                         n_in=32*32,\
                         n_hidden=nb_hidden,\
-                        n_out=nb_targets)
+                        n_out=nb_targets,
+                        learning_rate=learning_rate)
+
+
+

     # the cost we minimize during training is the negative log likelihood of
     # the model plus the regularization terms (L1 and L2); cost is expressed
@@ -260,10 +273,10 @@

     # specify how to update the parameters of the model as a dictionary
     updates = \
-        { classifier.W1: classifier.W1 - learning_rate*g_W1 \
-        , classifier.b1: classifier.b1 - learning_rate*g_b1 \
-        , classifier.W2: classifier.W2 - learning_rate*g_W2 \
-        , classifier.b2: classifier.b2 - learning_rate*g_b2 }
+        { classifier.W1: classifier.W1 - classifier.lr*g_W1 \
+        , classifier.b1: classifier.b1 - classifier.lr*g_b1 \
+        , classifier.W2: classifier.W2 - classifier.lr*g_W2 \
+        , classifier.b2: classifier.b2 - classifier.lr*g_b2 }

     # compiling a theano function `train_model` that returns the cost, but in
     # the same time updates the parameter of the model based on the rules
@@ -273,13 +286,17 @@


+
+

    #conditions for stopping the adaptation:
    #1) we have reached  nb_max_exemples (this is rounded up to be a multiple of the train size)
-   #2) validation error is going up (probable overfitting)
+   #2) validation error is going up twice in a row(probable overfitting)

    # This means we no longer stop on slow convergence as low learning rates stopped
    # too fast.
+
+   # no longer relevant
     patience              =nb_max_exemples/batch_size
     patience_increase     = 2     # wait this much longer when a new best is
                                   # found
@@ -296,9 +313,9 @@
     test_score           = 0.
     start_time = time.clock()
     n_iter = nb_max_exemples/batch_size  # nb of max times we are allowed to run through all exemples
-    n_iter = n_iter/n_minibatches + 1
+    n_iter = n_iter/n_minibatches + 1 #round up
     n_iter=max(1,n_iter) # run at least once on short debug call
-    # have a maximum of `n_iter` iterations through the entire dataset
+

     if verbose == True:
         print 'looping at most %d times through the data set' %n_iter
@@ -307,7 +324,9 @@
         # get epoch and minibatch index
         epoch           = iter / n_minibatches
         minibatch_index =  iter % n_minibatches
-
+
+
+
         # get the minibatches corresponding to `iter` modulo
         # `len(train_batches)`
         x,y = train_batches[ minibatch_index ]
@@ -317,7 +336,7 @@

         if (iter+1) % validation_frequency == 0:
             # compute zero-one loss on validation set
-
+
             this_validation_loss = 0.
             for x,y in validation_batches:
                 # sum up the errors for each minibatch
@@ -325,26 +344,40 @@
                 this_validation_loss += test_model(x_float,y)
             # get the average by dividing with the number of minibatches
             this_validation_loss /= len(validation_batches)
+            #save the validation loss
+            total_validation_error_list.append(this_validation_loss)
+
+            #get the training error rate
+            this_train_loss=0
+            for x,y in train_batches:
+                # sum up the errors for each minibatch
+                x_float = x/255.0
+                this_train_loss += test_model(x_float,y)
+            # get the average by dividing with the number of minibatches
+            this_train_loss /= len(train_batches)
+            #save the validation loss
+            total_train_error_list.append(this_train_loss)
+            if(this_train_loss<best_training_error):
+                best_training_error=this_train_loss
+
             if verbose == True:
-                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                print('epoch %i, minibatch %i/%i, validation error %f, training error %f %%' % \
                     (epoch, minibatch_index+1, n_minibatches, \
-                        this_validation_loss*100.))
+                        this_validation_loss*100.,this_train_loss*100))
+
+
+            #save the learning rate
+            learning_rate_list.append(classifier.lr.value)


             # if we got the best validation score until now
             if this_validation_loss < best_validation_loss:
-
-                #improve patience if loss improvement is good enough
-                if this_validation_loss < best_validation_loss *  \
-                       improvement_threshold :
-                    patience = max(patience, iter * patience_increase)
-                elif verbose == True:
-                    print 'slow convergence stop'
-
                 # save best validation score and iteration number
                 best_validation_loss = this_validation_loss
                 best_iter = iter
-
+                # reset patience if we are going down again
+                # so we continue exploring
+                patience=nb_max_exemples/batch_size
                 # test it on the test set
                 test_score = 0.
                 for x,y in test_batches:
@@ -357,33 +390,40 @@
                                 (epoch, minibatch_index+1, n_minibatches,
                                 test_score*100.))

-            #if the validation error is going up, we are overfitting
-            #stop converging
-            elif this_validation_loss > best_validation_loss:
+            # if the validation error is going up, we are overfitting (or oscillating)
+            # stop converging but run at least to next validation
+            # to check overfitting or ocsillation
+            # the saved weights of the model will be a bit off in that case
+            elif this_validation_loss >= best_validation_loss:
                 #calculate the test error at this point and exit
                 # test it on the test set
-                if verbose==True:
-                    print ' We are diverging'
-                best_iter = iter
+                # however, if adaptive_lr is true, try reducing the lr to
+                # get us out of an oscilliation
+                if adaptive_lr==1:
+                    classifier.lr.value=classifier.lr.value/2.0
+
                 test_score = 0.
+                #cap the patience so we are allowed one more validation error
+                #calculation before aborting
+                patience = iter+validation_frequency+1
                 for x,y in test_batches:
                     x_float=x/255.0
                     test_score += test_model(x_float,y)
                 test_score /= len(test_batches)
                 if verbose == True:
-                    print ' validation error is going up, stopping now'
+                    print ' validation error is going up, possibly stopping soon'
                     print(('     epoch %i, minibatch %i/%i, test error of best '
                         'model %f %%') %
                                 (epoch, minibatch_index+1, n_minibatches,
                                 test_score*100.))

-                break
+


-
-            if patience <= iter :
-               break
-
+        if iter>patience:
+            print 'we have diverged'
+            break
+

     end_time = time.clock()
     if verbose == True:
@@ -391,17 +431,25 @@
             'obtained at iteration %i, with test performance %f %%') %
                     (best_validation_loss * 100., best_iter, test_score*100.))
         print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
-    print iter
-    return (best_validation_loss * 100.,test_score*100.,best_iter*batch_size,(end_time-start_time)/60)
+        print iter
+
+    #save the model and the weights
+    numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)
+    numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\
+    learning_rate_list=learning_rate_list)
+
+    return (best_training_error*100.0,best_validation_loss * 100.,test_score*100.,best_iter*batch_size,(end_time-start_time)/60)


 if __name__ == '__main__':
     mlp_full_mnist()

 def jobman_mlp_full_nist(state,channel):
-    (validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\
+    (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\
                                                                 nb_max_exemples=state.nb_max_exemples,\
-                                                                nb_hidden=state.nb_hidden)
+                                                                nb_hidden=state.nb_hidden,\
+                                                                adaptive_lr=state.adaptive_lr)
+    state.train_error=train_error
     state.validation_error=validation_error
     state.test_error=test_error
     state.nb_exemples=nb_exemples
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conv_mlp/convolutional_mlp.conf	Thu Feb 25 09:05:48 2010 -0500
@@ -0,0 +1,7 @@
+learning_rate=0.01
+n_iter=1
+batch_size=20
+n_kern0=20
+n_kern1=50
+filter_shape=5
+n_layer=3
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conv_mlp/convolutional_mlp.py	Thu Feb 25 09:05:48 2010 -0500
@@ -0,0 +1,472 @@
+"""
+This tutorial introduces the LeNet5 neural network architecture using Theano.  LeNet5 is a
+convolutional neural network, good for classifying images. This tutorial shows how to build the
+architecture, and comes with all the hyper-parameters you need to reproduce the paper's MNIST
+results.
+
+The best results are obtained after X iterations of the main program loop, which takes ***
+minutes on my workstation (an Intel Core i7, circa July 2009), and *** minutes on my GPU (an
+NVIDIA GTX 285 graphics processor).
+
+This implementation simplifies the model in the following ways:
+
+ - LeNetConvPool doesn't implement location-specific gain and bias parameters
+ - LeNetConvPool doesn't implement pooling by average, it implements pooling by max.
+ - Digit classification is implemented with a logistic regression rather than an RBF network
+ - LeNet5 was not fully-connected convolutions at second layer
+
+References:
+ - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: Gradient-Based Learning Applied to Document
+   Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
+   http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
+"""
+
+import numpy, theano, cPickle, gzip, time
+import theano.tensor as T
+import theano.sandbox.softsign
+import pylearn.datasets.MNIST
+from pylearn.io import filetensor as ft
+from theano.sandbox import conv, downsample
+
+class LeNetConvPoolLayer(object):
+
+    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
+        """
+        Allocate a LeNetConvPoolLayer with shared variable internal parameters.
+        :type rng: numpy.random.RandomState
+        :param rng: a random number generator used to initialize weights
+        :type input: theano.tensor.dtensor4
+        :param input: symbolic image tensor, of shape image_shape
+        :type filter_shape: tuple or list of length 4
+        :param filter_shape: (number of filters, num input feature maps,
+                              filter height,filter width)
+        :type image_shape: tuple or list of length 4
+        :param image_shape: (batch size, num input feature maps,
+                             image height, image width)
+        :type poolsize: tuple or list of length 2
+        :param poolsize: the downsampling (pooling) factor (#rows,#cols)
+        """
+        assert image_shape[1]==filter_shape[1]
+        self.input = input
+
+        # initialize weight values: the fan-in of each hidden neuron is
+        # restricted by the size of the receptive fields.
+        fan_in =  numpy.prod(filter_shape[1:])
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(3./fan_in), \
+              high = numpy.sqrt(3./fan_in), \
+              size = filter_shape), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        # the bias is a 1D tensor -- one bias per output feature map
+        b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        # convolve input feature maps with filters
+        conv_out = conv.conv2d(input, self.W,
+                filter_shape=filter_shape, image_shape=image_shape)
+
+        # downsample each feature map individually, using maxpooling
+        pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True)
+
+        # add the bias term. Since the bias is a vector (1D array), we first
+        # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will thus
+        # be broadcasted across mini-batches and feature map width & height
+        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+
+        # store parameters of this layer
+        self.params = [self.W, self.b]
+
+
+class SigmoidalLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        """
+        Typical hidden layer of a MLP: units are fully-connected and have
+        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
+        and the bias vector b is of shape (n_out,).
+
+        Hidden unit activation is given by: sigmoid(dot(input,W) + b)
+
+        :type rng: numpy.random.RandomState
+        :param rng: a random number generator used to initialize weights
+        :type input: theano.tensor.dmatrix
+        :param input: a symbolic tensor of shape (n_examples, n_in)
+        :type n_in: int
+        :param n_in: dimensionality of input
+        :type n_out: int
+        :param n_out: number of hidden units
+        """
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = T.tanh(T.dot(input, self.W) + self.b)
+        self.params = [self.W, self.b]
+
+
+class LogisticRegression(object):
+    """Multi-class Logistic Regression Class
+
+    The logistic regression is fully described by a weight matrix :math:`W`
+    and bias vector :math:`b`. Classification is done by projecting data
+    points onto a set of hyperplanes, the distance to which is used to
+    determine a class membership probability.
+    """
+
+    def __init__(self, input, n_in, n_out):
+        """ Initialize the parameters of the logistic regression
+        :param input: symbolic variable that describes the input of the
+                      architecture (one minibatch)
+        :type n_in: int
+        :param n_in: number of input units, the dimension of the space in
+                     which the datapoints lie
+        :type n_out: int
+        :param n_out: number of output units, the dimension of the space in
+                      which the labels lie
+        """
+
+        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
+        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+                                            dtype = theano.config.floatX) )
+        # initialize the baises b as a vector of n_out 0s
+        self.b = theano.shared( value=numpy.zeros((n_out,),
+                                            dtype = theano.config.floatX) )
+        # compute vector of class-membership probabilities in symbolic form
+        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+
+        # compute prediction as class whose probability is maximal in
+        # symbolic form
+        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+        # list of parameters for this layer
+        self.params = [self.W, self.b]
+
+    def negative_log_likelihood(self, y):
+        """Return the mean of the negative log-likelihood of the prediction
+        of this model under a given target distribution.
+        :param y: corresponds to a vector that gives for each example the
+                  correct label
+        Note: we use the mean instead of the sum so that
+        the learning rate is less dependent on the batch size
+        """
+        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+    def errors(self, y):
+        """Return a float representing the number of errors in the minibatch
+        over the total number of examples of the minibatch ; zero one
+        loss over the size of the minibatch
+        """
+        # check if y has same dimension of y_pred
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred',
+                ('y', target.type, 'y_pred', self.y_pred.type))
+
+        # check if y is of the correct datatype
+        if y.dtype.startswith('int'):
+            # the T.neq operator returns a vector of 0s and 1s, where 1
+            # represents a mistake in prediction
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+def load_dataset(fname,batch=20):
+
+    # repertoire qui contient les donnees NIST
+    # le repertoire suivant va fonctionner si vous etes connecte sur un ordinateur
+    # du reseau DIRO
+    datapath = '/data/lisa/data/nist/by_class/'
+    # le fichier .ft contient chiffres NIST dans un format efficace. Les chiffres
+    # sont stockes dans une matrice de NxD, ou N est le nombre d'images, est D est
+    # le nombre de pixels par image (32x32 = 1024). Chaque pixel de l'image est une
+    # valeur entre 0 et 255, correspondant a un niveau de gris. Les valeurs sont
+    # stockees comme des uint8, donc des bytes.
+    f = open(datapath+'digits/digits_train_data.ft')
+    # Verifier que vous avez assez de memoire pour loader les donnees au complet
+    # dans le memoire. Sinon, utilisez ft.arraylike, une classe construite
+    # specialement pour des fichiers qu'on ne souhaite pas loader dans RAM.
+    d = ft.read(f)
+
+    # NB: N'oubliez pas de diviser les valeurs des pixels par 255. si jamais vous
+    # utilisez les donnees commes entrees dans un reseaux de neurones et que vous
+    # voulez des entres entre 0 et 1.
+    # digits_train_data.ft contient les images, digits_train_labels.ft contient les
+    # etiquettes
+    f = open(datapath+'digits/digits_train_labels.ft')
+    labels = ft.read(f)
+
+
+    # Load the dataset
+    #f = gzip.open(fname,'rb')
+    #train_set, valid_set, test_set = cPickle.load(f)
+    #f.close()
+
+    # make minibatches of size 20
+    batch_size = batch   # sized of the minibatch
+
+    # Dealing with the training set
+    # get the list of training images (x) and their labels (y)
+    (train_set_x, train_set_y) = (d[:4000,:],labels[:4000])
+    # initialize the list of training minibatches with empty list
+    train_batches = []
+    for i in xrange(0, len(train_set_x), batch_size):
+        # add to the list of minibatches the minibatch starting at
+        # position i, ending at position i+batch_size
+        # a minibatch is a pair ; the first element of the pair is a list
+        # of datapoints, the second element is the list of corresponding
+        # labels
+        train_batches = train_batches + \
+               [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
+
+    #print train_batches[500]
+
+    # Dealing with the validation set
+    (valid_set_x, valid_set_y) = (d[4000:5000,:],labels[4000:5000])
+    # initialize the list of validation minibatches
+    valid_batches = []
+    for i in xrange(0, len(valid_set_x), batch_size):
+        valid_batches = valid_batches + \
+               [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
+
+    # Dealing with the testing set
+    (test_set_x, test_set_y) = (d[5000:6000,:],labels[5000:6000])
+    # initialize the list of testing minibatches
+    test_batches = []
+    for i in xrange(0, len(test_set_x), batch_size):
+        test_batches = test_batches + \
+              [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
+
+    return train_batches, valid_batches, test_batches
+
+
+def evaluate_lenet5(learning_rate=0.1, n_iter=1, batch_size=20, n_kern0=20,n_kern1=50,filter_shape=5,n_layer=3, dataset='mnist.pkl.gz'):
+    rng = numpy.random.RandomState(23455)
+
+    print 'Before load dataset'
+    train_batches, valid_batches, test_batches = load_dataset(dataset,batch_size)
+    print 'After load dataset'
+
+    ishape = (32,32)     # this is the size of NIST images
+    n_kern2=80
+
+    # allocate symbolic variables for the data
+    x = T.matrix('x')  # rasterized images
+    y = T.lvector()  # the labels are presented as 1D vector of [long int] labels
+
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+
+    # Reshape matrix of rasterized images of shape (batch_size,28*28)
+    # to a 4D tensor, compatible with our LeNetConvPoolLayer
+    layer0_input = x.reshape((batch_size,1,32,32))
+
+    # Construct the first convolutional pooling layer:
+    # filtering reduces the image size to (32-5+1,32-5+1)=(28,28)
+    # maxpooling reduces this further to (28/2,28/2) = (14,14)
+    # 4D output tensor is thus of shape (20,20,14,14)
+    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
+            image_shape=(batch_size,1,32,32),
+            filter_shape=(n_kern0,1,filter_shape,filter_shape), poolsize=(2,2))
+
+    if(n_layer>2):
+
+	# Construct the second convolutional pooling layer
+	# filtering reduces the image size to (14-5+1,14-5+1)=(10,10)
+	# maxpooling reduces this further to (10/2,10/2) = (5,5)
+	# 4D output tensor is thus of shape (20,50,5,5)
+	fshape=(32-filter_shape+1)/2
+	layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
+		image_shape=(batch_size,n_kern0,fshape,fshape),
+		filter_shape=(n_kern1,n_kern0,filter_shape,filter_shape), poolsize=(2,2))
+
+    else:
+
+	fshape=(32-filter_shape+1)/2
+	layer1_input = layer0.output.flatten(2)
+		# construct a fully-connected sigmoidal layer
+	layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape*fshape, n_out=500)
+
+	layer2 = LogisticRegression(input=layer1.output, n_in=500, n_out=10)
+	cost = layer2.negative_log_likelihood(y)
+	test_model = theano.function([x,y], layer2.errors(y))
+	params = layer2.params+ layer1.params + layer0.params
+
+
+    if(n_layer>3):
+
+	fshape=(32-filter_shape+1)/2
+	fshape2=(fshape-filter_shape+1)/2
+	fshape3=(fshape2-filter_shape+1)/2
+	layer2 = LeNetConvPoolLayer(rng, input=layer1.output,
+		image_shape=(batch_size,n_kern1,fshape2,fshape2),
+		filter_shape=(n_kern2,n_kern1,filter_shape,filter_shape), poolsize=(2,2))
+
+	layer3_input = layer2.output.flatten(2)
+
+	layer3 = SigmoidalLayer(rng, input=layer3_input,
+					n_in=n_kern2*fshape3*fshape3, n_out=500)
+
+
+	layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10)
+
+	cost = layer4.negative_log_likelihood(y)
+
+	test_model = theano.function([x,y], layer4.errors(y))
+
+	params = layer4.params+ layer3.params+ layer2.params+ layer1.params + layer0.params
+
+
+    elif(n_layer>2):
+
+	fshape=(32-filter_shape+1)/2
+	fshape2=(fshape-filter_shape+1)/2
+
+	# the SigmoidalLayer being fully-connected, it operates on 2D matrices of
+	# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
+	# This will generate a matrix of shape (20,32*4*4) = (20,512)
+	layer2_input = layer1.output.flatten(2)
+
+	# construct a fully-connected sigmoidal layer
+	layer2 = SigmoidalLayer(rng, input=layer2_input,
+					n_in=n_kern1*fshape2*fshape2, n_out=500)
+
+
+	# classify the values of the fully-connected sigmoidal layer
+	layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
+
+	# the cost we minimize during training is the NLL of the model
+	cost = layer3.negative_log_likelihood(y)
+
+	# create a function to compute the mistakes that are made by the model
+	test_model = theano.function([x,y], layer3.errors(y))
+
+	# create a list of all model parameters to be fit by gradient descent
+	params = layer3.params+ layer2.params+ layer1.params + layer0.params
+
+
+
+
+
+    # create a list of gradients for all model parameters
+    grads = T.grad(cost, params)
+
+    # train_model is a function that updates the model parameters by SGD
+    # Since this model has many parameters, it would be tedious to manually
+    # create an update rule for each model parameter. We thus create the updates
+    # dictionary by automatically looping over all (params[i],grads[i])  pairs.
+    updates = {}
+    for param_i, grad_i in zip(params, grads):
+        updates[param_i] = param_i - learning_rate * grad_i
+    train_model = theano.function([x, y], cost, updates=updates)
+
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+
+    n_minibatches        = len(train_batches)
+
+    # early-stopping parameters
+    patience              = 10000 # look as this many examples regardless
+    patience_increase     = 2     # wait this much longer when a new best is
+                                  # found
+    improvement_threshold = 0.995 # a relative improvement of this much is
+                                  # considered significant
+    validation_frequency  = n_minibatches  # go through this many
+                                  # minibatche before checking the network
+                                  # on the validation set; in this case we
+                                  # check every epoch
+
+    best_params          = None
+    best_validation_loss = float('inf')
+    best_iter            = 0
+    test_score           = 0.
+    start_time = time.clock()
+
+    # have a maximum of `n_iter` iterations through the entire dataset
+    for iter in xrange(n_iter * n_minibatches):
+
+        # get epoch and minibatch index
+        epoch           = iter / n_minibatches
+        minibatch_index =  iter % n_minibatches
+
+        # get the minibatches corresponding to `iter` modulo
+        # `len(train_batches)`
+        x,y = train_batches[ minibatch_index ]
+
+        if iter %100 == 0:
+            print 'training @ iter = ', iter
+        cost_ij = train_model(x,y)
+
+        if (iter+1) % validation_frequency == 0:
+
+            # compute zero-one loss on validation set
+            this_validation_loss = 0.
+            for x,y in valid_batches:
+                # sum up the errors for each minibatch
+                this_validation_loss += test_model(x,y)
+
+            # get the average by dividing with the number of minibatches
+            this_validation_loss /= len(valid_batches)
+            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                   (epoch, minibatch_index+1, n_minibatches, \
+                    this_validation_loss*100.))
+
+
+            # if we got the best validation score until now
+            if this_validation_loss < best_validation_loss:
+
+                #improve patience if loss improvement is good enough
+                if this_validation_loss < best_validation_loss *  \
+                       improvement_threshold :
+                    patience = max(patience, iter * patience_increase)
+
+                # save best validation score and iteration number
+                best_validation_loss = this_validation_loss
+                best_iter = iter
+
+                # test it on the test set
+                test_score = 0.
+                for x,y in test_batches:
+                    test_score += test_model(x,y)
+                test_score /= len(test_batches)
+                print(('     epoch %i, minibatch %i/%i, test error of best '
+                      'model %f %%') %
+                             (epoch, minibatch_index+1, n_minibatches,
+                              test_score*100.))
+
+        if patience <= iter :
+            break
+
+    end_time = time.clock()
+    print('Optimization complete.')
+    print('Best validation score of %f %% obtained at iteration %i,'\
+          'with test performance %f %%' %
+          (best_validation_loss * 100., best_iter, test_score*100.))
+    print('The code ran for %f minutes' % ((end_time-start_time)/60.))
+
+    return (best_validation_loss * 100., test_score*100., (end_time-start_time)/60., best_iter)
+
+if __name__ == '__main__':
+    evaluate_lenet5()
+
+def experiment(state, channel):
+    print 'start experiment'
+    (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.filter_shape, state.n_layer)
+    print 'end experiment'
+
+    state.best_validation_loss = best_validation_loss
+    state.test_score = test_score
+    state.minutes_trained = minutes_trained
+    state.iter = iter
+
+    return channel.COMPLETE
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/creer_jeu_occlusion.py	Thu Feb 25 09:05:48 2010 -0500
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Sert a creer un petit jeu de donnees afin de pouvoir avoir des fragments
+de lettres pour ajouter bruit d'occlusion
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+from pylearn.io import filetensor as ft
+import pylab
+import random as r
+from numpy import *
+
+nombre = 20000  #La grandeur de l'echantillon
+
+f = open('all_train_data.ft')  #Le jeu de donnees est en local.
+d = ft.read(f)
+f.close()
+print len(d)
+random.seed(3525)
+
+echantillon=r.sample(xrange(len(d)),nombre)
+nouveau=d[0:nombre]
+for i in xrange(nombre):
+    nouveau[i]=d[echantillon[i]]
+
+
+f2 = open('echantillon_occlusion.ft', 'w')
+ft.write(f2,nouveau)
+f2.close()
+
+
+##Tester si ca a fonctionne
+f3 = open('echantillon_occlusion.ft')
+
+d2=ft.read(f3)
+pylab.imshow(d2[0].reshape((32,32)))
+pylab.show()
+f3.close()
\ No newline at end of file
--- a/scripts/fonts_test.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/scripts/fonts_test.py	Thu Feb 25 09:05:48 2010 -0500
@@ -1,15 +1,19 @@
+#!/usr/bin/python

-import random, os
+import os
 import ImageFont, ImageDraw, Image

 dir1 =  "/data/lisa/data/ift6266h10/allfonts/"
+#dir1 = "/Tmp/allfonts/"

-img = Image.new("L", (100,100))
+img = Image.new("L", (132,132))
 draw = ImageDraw.Draw(img)
+L = [chr(ord('0')+x) for x in range(10)] + [chr(ord('A')+x) for x in range(26)] + [chr(ord('a')+x) for x in range(26)]

 for f in os.listdir(dir1):
     try:
         font = ImageFont.truetype(dir1+f, 25)
-        textSize = font.getsize("kD9")
+        for l in L:
+            draw.text((60,60), l, font=font, fill="white")
     except:
         print dir1+f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/imgbg_test.py	Thu Feb 25 09:05:48 2010 -0500
@@ -0,0 +1,15 @@
+#!/usr/bin/python
+
+import Image, cPickle
+
+f=open('/Tmp/image_net/filelist.pkl')
+image_files = cPickle.load(f)
+f.close()
+
+for i in range(len(image_files)):
+    filename = '/Tmp/image_net/' + image_files[i]
+    try:
+        image = Image.open(filename).convert('L')
+    except:
+        print filename
+
--- a/scripts/launch_generate100.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/scripts/launch_generate100.py	Thu Feb 25 09:05:48 2010 -0500
@@ -4,9 +4,9 @@
 dir1 = "/data/lisa/data/ift6266h10/"

 for i,s in enumerate(['valid','test']):
-    for c in [0.3,0.5,0.7,1]:
+    for j,c in enumerate([0.3,0.5,0.7,1]):
         l = str(c).replace('.','')
-        os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d" % (dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i]))
+        os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j))

 for i in range(100):
-    os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200" % (dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1))
+    os.system("dbidispatch --condor --os=fc9 --machine=brams0c.iro.umontreal.ca ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i))
--- a/transformations/BruitGauss.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/transformations/BruitGauss.py	Thu Feb 25 09:05:48 2010 -0500
@@ -22,17 +22,23 @@
 '''

 import numpy
-import random
+#import random
 import scipy
 from scipy import ndimage

 class BruitGauss():

-    def __init__(self,complexity=1):
+    def __init__(self,complexity=1,seed=6378):
         self.nb_chngmax =10 #Le nombre de pixels changes. Seulement pour fin de calcul
         self.grandeurmax = 20
         self.sigmamax = 6.0
         self.regenerate_parameters(complexity)
+        self.seed=seed
+
+        #numpy.random.seed(self.seed)
+
+    def get_seed(self):
+        return self.seed

     def get_settings_names(self):
         return ['nb_chng','sigma_gauss','grandeur']
--- a/transformations/DistorsionGauss.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/transformations/DistorsionGauss.py	Thu Feb 25 09:05:48 2010 -0500
@@ -16,13 +16,21 @@

 class DistorsionGauss():

-    def __init__(self):
+    def __init__(self,seed=3459):
         self.ecart_type=0.1 #L'ecart type de la gaussienne
         self.effectuer=1    #1=on effectue et 0=rien faire
+        self.seed=seed
+
+        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+        #numpy.random.seed(self.seed)
+        #random.seed(self.seed)

     def get_settings_names(self):
         return ['effectuer']

+    def get_seed(self):
+        return self.seed
+
     def get_settings_names_determined_by_complexity(self,complexity):
         return ['ecart_type']
--- a/transformations/Occlusion.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/transformations/Occlusion.py	Thu Feb 25 09:05:48 2010 -0500
@@ -24,7 +24,7 @@

 class Occlusion():

-    def __init__(self):
+    def __init__(self,seed=9854):
         #Ces 4 variables representent la taille du "crop" sur l'image2
         #Ce "crop" est pris a partie de image1[15,15], le milieu de l'image1
         self.haut=2
@@ -47,6 +47,9 @@
         #Sert a dire si on fait quelque chose. 0=faire rien, 1 on fait quelque chose
         self.appliquer=1

+        self.seed=seed
+        #numpy.random.seed(self.seed)
+
         f3 = open('/data/lisa/data/ift6266h10/echantillon_occlusion.ft')   #Doit etre sur le reseau DIRO.
         #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/echantillon_occlusion.ft')
         #Il faut arranger le path sinon
@@ -59,6 +62,9 @@

     def get_settings_names(self):
         return ['haut','bas','gauche','droite','x_arrivee','y_arrivee','endroit','rajout','appliquer']
+
+    def get_seed(self):
+        return self.seed

     def regenerate_parameters(self, complexity):
         self.haut=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
@@ -144,6 +150,7 @@

         img_trans=transfo.transform_image(img.reshape((32,32)))

+        print transfo.get_seed()
         pylab.imshow(img_trans.reshape((32,32)))
         pylab.show()
--- a/transformations/PermutPixel.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/transformations/PermutPixel.py	Thu Feb 25 09:05:48 2010 -0500
@@ -18,10 +18,18 @@

 class PermutPixel():

-    def __init__(self):
+    def __init__(self,seed=7152):
         self.nombre=10 #Le nombre de pixels a permuter
         self.proportion=0.3
         self.effectuer=1    #1=on effectue, 0=rien faire
+        self.seed=seed
+
+        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+        #numpy.random.seed(self.seed)
+        #random.seed(self.seed)
+
+    def get_seed(self):
+        return self.seed

     def get_settings_names(self):
         return ['effectuer']
--- a/transformations/PoivreSel.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/transformations/PoivreSel.py	Thu Feb 25 09:05:48 2010 -0500
@@ -22,11 +22,19 @@

 class PoivreSel():

-    def __init__(self):
+    def __init__(self,seed=9361):
         self.proportion_bruit=0.08 #Le pourcentage des pixels qui seront bruites
         self.nb_chng=10 #Le nombre de pixels changes. Seulement pour fin de calcul
         self.effectuer=1    #Vaut 1 si on effectue et 0 sinon.

+        self.seed=seed
+        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+        #numpy.random.seed(self.seed)
+        #random.seed(self.seed)
+
+    def get_seed(self):
+        return self.seed
+
     def get_settings_names(self):
         return ['effectuer']
--- a/transformations/Rature.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/transformations/Rature.py	Thu Feb 25 09:05:48 2010 -0500
@@ -23,7 +23,7 @@

 class Rature():

-    def __init__(self):
+    def __init__(self,seed=1256):
         self.angle=0 #Angle en degre de la rotation (entre 0 et 180)
         self.numero=0 #Le numero du 1 choisi dans la banque de 1
         self.gauche=-1   #Le numero de la colonne la plus a gauche contenant le 1
@@ -41,6 +41,9 @@
         self.nb_ratures=-1   #Le nombre de ratures appliques
         self.fini=0 #1=fini de mettre toutes les couches 0=pas fini
         self.complexity=0   #Pour garder en memoire la complexite si plusieurs couches sont necessaires
+        self.seed=seed
+
+        #numpy.random.seed(self.seed)

         f3 = open('/data/lisa/data/ift6266h10/un_rature.ft')   #Doit etre sur le reseau DIRO.
         #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/un_rature.ft')
@@ -53,6 +56,9 @@

     def get_settings_names(self):
         return ['angle','numero','faire','crop_haut','crop_gauche','largeur_bande','smooth','nb_ratures']
+
+    def get_seed(self):
+        return self.seed

     def regenerate_parameters(self, complexity,next_rature = False):
--- a/transformations/affine_transform.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/transformations/affine_transform.py	Thu Feb 25 09:05:48 2010 -0500
@@ -16,9 +16,8 @@
 class AffineTransformation():
     def __init__( self, complexity = .5):
         self.shape = (32,32)
-        self.rng = numpy.random.RandomState()
         self.complexity = complexity
-        params = self.rng.uniform(size=6) -.5
+        params = numpy.random.uniform(size=6) -.5
         self.a = 1. + params[0]*.6*complexity
         self.b = 0. + params[1]*.6*complexity
         self.c = params[2]*8.*complexity
@@ -43,7 +42,7 @@
         # the ranges are set manually as to look acceptable

         self.complexity = complexity
-        params = self.rng.uniform(size=6) -.5
+        params = numpy.random.uniform(size=6) -.5
         self.a = 1. + params[0]*.8*complexity
         self.b = 0. + params[1]*.8*complexity
         self.c = params[2]*9.*complexity
--- a/transformations/pipeline.py	Thu Feb 25 09:04:40 2010 -0500
+++ b/transformations/pipeline.py	Thu Feb 25 09:05:48 2010 -0500
@@ -37,6 +37,45 @@
 DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft'
 ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE']

+# PARSE COMMAND LINE ARGUMENTS
+def get_argv():
+    with open(ARGS_FILE) as f:
+        args = [l.rstrip() for l in f.readlines()]
+    return args
+
+def usage():
+    print '''
+Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...]
+    -m, --max-complexity: max complexity to generate for an image
+    -z, --probability-zero: probability of using complexity=0 for an image
+    -o, --output-file: full path to file to use for output of images
+    -p, --params-output-file: path to file to output params to
+    -x, --labels-output-file: path to file to output labels to
+    -f, --data-file: path to filetensor (.ft) data file (NIST)
+    -l, --label-file: path to filetensor (.ft) labels file (NIST labels)
+    -c, --ocr-file: path to filetensor (.ft) data file (OCR)
+    -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
+    -a, --prob-font: probability of using a raw font image
+    -b, --prob-captcha: probability of using a captcha image
+    -g, --prob-ocr: probability of using an ocr image
+    -y, --seed: the job seed
+    '''
+
+try:
+    opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=",
+"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
+except getopt.GetoptError, err:
+        # print help information and exit:
+        print str(err) # will print something like "option -a not recognized"
+        usage()
+        pdb.gimp_quit(0)
+        sys.exit(2)
+
+for o, a in opts:
+    if o in ('-y','--seed'):
+        random.seed(int(a))
+        numpy.random.seed(int(a))
+
 if DEBUG_X:
     import pylab
     pylab.ion()
@@ -55,7 +94,7 @@
 from add_background_image import AddBackground
 from affine_transform import AffineTransformation
 from ttf2jpg import ttf2jpg
-from pycaptcha.Facade import generateCaptcha
+from Facade import generateCaptcha

 if DEBUG:
     from visualizer import Visualizer
@@ -254,28 +293,6 @@
 ##############################################################################
 # MAIN

-def usage():
-    print '''
-Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...]
-    -m, --max-complexity: max complexity to generate for an image
-    -z, --probability-zero: probability of using complexity=0 for an image
-    -o, --output-file: full path to file to use for output of images
-    -p, --params-output-file: path to file to output params to
-    -x, --labels-output-file: path to file to output labels to
-    -f, --data-file: path to filetensor (.ft) data file (NIST)
-    -l, --label-file: path to filetensor (.ft) labels file (NIST labels)
-    -c, --ocr-file: path to filetensor (.ft) data file (OCR)
-    -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
-    -a, --prob-font: probability of using a raw font image
-    -b, --prob-captcha: probability of using a captcha image
-    -g, --prob-ocr: probability of using an ocr image
-    '''
-
-# See run_pipeline.py
-def get_argv():
-    with open(ARGS_FILE) as f:
-        args = [l.rstrip() for l in f.readlines()]
-    return args

 # Might be called locally or through dbidispatch. In all cases it should be
 # passed to the GIMP executable to be able to use GIMP filters.
@@ -299,16 +316,6 @@
     stop_after = None
     reload_mode = False

-    try:
-        opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=",
-"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr="])
-    except getopt.GetoptError, err:
-        # print help information and exit:
-        print str(err) # will print something like "option -a not recognized"
-        usage()
-        pdb.gimp_quit(0)
-        sys.exit(2)
-
     for o, a in opts:
         if o in ('-m', '--max-complexity'):
             max_complexity = float(a)
@@ -340,6 +347,8 @@
             prob_captcha = float(a)
         elif o in ('-g', "--prob-ocr"):
             prob_ocr = float(a)
+        elif o in ('-y', "--seed"):
+            pass
         else:
             assert False, "unhandled option"