# HG changeset patch
# User Yoshua Bengio <bengioy@iro.umontreal.ca>
# Date 1300368124 14400
# Node ID ca20f94448dc6b902719af103760cd91dac1ea51
# Parent  249a180795e39674b88f46aea878a55d0d49f8ca# Parent  75dbbe40957887a9793e145cd3ade55b9425ae61
merge

diff -r 249a180795e3 -r ca20f94448dc data_generation/mnist_resized/rescale_mnist.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/mnist_resized/rescale_mnist.py	Thu Mar 17 09:22:04 2011 -0400
@@ -0,0 +1,46 @@
+import numpy,cPickle,gzip,Image,pdb,sys
+
+
+def zeropad(vect,img_size=(28,28),out_size=(32,32)):
+    delta = (numpy.abs(img_size[0]-out_size[0])/2,numpy.abs(img_size[1]-out_size[1])/2)
+    newvect = numpy.zeros(out_size)
+    newvect[delta[0]:-delta[0],delta[1]:-delta[1]] = vect.reshape(img_size)
+    return newvect.flatten()
+
+def rescale(vect,img_size=(28,28),out_size=(32,32), filter=Image.NEAREST):
+    im = Image.fromarray(numpy.asarray(vect.reshape(img_size)*255.,dtype='uint8'))
+    return (numpy.asarray(im.resize(out_size,filter),dtype='float32')/255.).flatten()
+
+ 
+#pdb.set_trace()
+def rescale_mnist(newsize=(32,32),output_file='mnist_rescaled_32_32.pkl',mnist=cPickle.load(gzip.open('mnist.pkl.gz'))):
+    newmnist = []
+    for set in mnist:
+        newset=numpy.zeros((len(set[0]),newsize[0]*newsize[1]))
+        for i in xrange(len(set[0])):
+            print i,
+            sys.stdout.flush()
+            newset[i] = rescale(set[0][i])
+        newmnist.append((newset,set[1]))
+    cPickle.dump(newmnist,open(output_file,'w'),protocol=-1)
+    print 'Done rescaling'
+
+
+def zeropad_mnist(newsize=(32,32),output_file='mnist_zeropadded_32_32.pkl',mnist=cPickle.load(gzip.open('mnist.pkl.gz'))):
+    newmnist = []
+    for set in mnist:
+        newset=numpy.zeros((len(set[0]),newsize[0]*newsize[1]))
+        for i in xrange(len(set[0])):
+            print i,
+            sys.stdout.flush()
+            newset[i] = zeropad(set[0][i])
+        newmnist.append((newset,set[1]))
+    cPickle.dump(newmnist,open(output_file,'w'),protocol=-1)
+    print 'Done padding'
+
+if __name__ =='__main__':
+    print 'Creating resized datasets'
+    mnist_ds = cPickle.load(gzip.open('mnist.pkl.gz'))
+    #zeropad_mnist(mnist=mnist_ds)
+    rescale_mnist(mnist=mnist_ds)
+    print 'Finished.'
diff -r 249a180795e3 -r ca20f94448dc data_generation/pipeline/filter_nist.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/pipeline/filter_nist.py	Thu Mar 17 09:22:04 2011 -0400
@@ -0,0 +1,62 @@
+import numpy
+from pylearn.io import filetensor as ft
+from ift6266 import datasets
+from ift6266.datasets.ftfile import FTDataSet
+
+dataset_str = 'P07_' # NISTP # 'P07safe_' 
+
+#base_path = '/data/lisatmp/ift6266h10/data/'+dataset_str
+#base_output_path = '/data/lisatmp/ift6266h10/data/transformed_digits/'+dataset_str+'train'
+
+base_path = '/data/lisa/data/ift6266h10/data/'+dataset_str
+base_output_path = '/data/lisatmp/ift6266h10/data/transformed_digits/'+dataset_str+'train'
+
+for fileno in range(100):
+    print "Processing file no ", fileno
+
+    output_data_file = base_output_path+str(fileno)+'_data.ft'
+    output_labels_file = base_output_path+str(fileno)+'_labels.ft'
+
+    print "Reading from ",base_path+'train'+str(fileno)+'_data.ft'
+
+    dataset = lambda maxsize=None, min_file=0, max_file=100: \
+                    FTDataSet(train_data = [base_path+'train'+str(fileno)+'_data.ft'],
+                       train_lbl = [base_path+'train'+str(fileno)+'_labels.ft'],
+                       test_data = [base_path+'_test_data.ft'],
+                       test_lbl = [base_path+'_test_labels.ft'],
+                       valid_data = [base_path+'_valid_data.ft'],
+                       valid_lbl = [base_path+'_valid_labels.ft'])
+                       # no conversion or scaling... keep data as is
+                       #indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
+
+    ds = dataset()
+
+    all_x = []
+    all_y = []
+
+    all_count = 0
+
+    for mb_x,mb_y in ds.train(1):
+        if mb_y[0] <= 9:
+            all_x.append(mb_x[0])
+            all_y.append(mb_y[0])
+
+        if (all_count+1) % 100000 == 0:
+            print "Done next 100k"
+
+        all_count += 1
+   
+    # data is stored as uint8 on 0-255
+    merged_x = numpy.asarray(all_x, dtype=numpy.uint8)
+    merged_y = numpy.asarray(all_y, dtype=numpy.int32)
+
+    print "Kept", len(all_x), "(shape ", merged_x.shape, ") examples from", all_count
+
+    f = open(output_data_file, 'wb')
+    ft.write(f, merged_x)
+    f.close()
+
+    f = open(output_labels_file, 'wb')
+    ft.write(f, merged_y)
+    f.close()
+    
diff -r 249a180795e3 -r ca20f94448dc data_generation/pipeline/visualize_filtered.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/pipeline/visualize_filtered.py	Thu Mar 17 09:22:04 2011 -0400
@@ -0,0 +1,43 @@
+import numpy
+import pylab
+from pylearn.io import filetensor as ft
+from ift6266 import datasets
+from ift6266.datasets.ftfile import FTDataSet
+
+import time
+import matplotlib.cm as cm
+
+
+dataset_str = 'P07safe_' #'PNIST07_' # NISTP
+
+base_path = '/data/lisatmp/ift6266h10/data/'+dataset_str
+base_output_path = '/data/lisatmp/ift6266h10/data/transformed_digits/'+dataset_str+'train'
+
+fileno = 15
+
+output_data_file = base_output_path+str(fileno)+'_data.ft'
+output_labels_file = base_output_path+str(fileno)+'_labels.ft'
+
+dataset_obj = lambda maxsize=None, min_file=0, max_file=100: \
+                FTDataSet(train_data = [output_data_file],
+                   train_lbl = [output_labels_file],
+                   test_data = [base_path+'_test_data.ft'],
+                   test_lbl = [base_path+'_test_labels.ft'],
+                   valid_data = [base_path+'_valid_data.ft'],
+                   valid_lbl = [base_path+'_valid_labels.ft'])
+                   # no conversion or scaling... keep data as is
+                   #indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
+
+dataset = dataset_obj()
+train_ds = dataset.train(1)
+
+for i in range(2983):
+    if i < 2900:
+        continue
+    ex = train_ds.next()
+    pylab.ion()
+    pylab.clf()
+    pylab.imshow(ex[0].reshape(32,32),cmap=cm.gray)
+    pylab.draw()
+    time.sleep(0.5)
+
diff -r 249a180795e3 -r ca20f94448dc deep/deep_mlp/job.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/deep_mlp/job.py	Thu Mar 17 09:22:04 2011 -0400
@@ -0,0 +1,335 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+'''
+Launching
+
+jobman sqlschedules postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/mlp_dumi mlp_jobman.experiment mlp_jobman.conf
+'n_hidden={{500,1000,2000}}'
+'n_hidden_layers={{2,3}}'
+'train_on={{NIST,NISTP,P07}}'
+'train_subset={{DIGITS_ONLY,ALL}}'
+'learning_rate_log10={{-1.,-2.,-3.}}'
+
+in mlp_jobman.conf:
+rng_seed=1234
+L1_reg=0.0
+L2_reg=0.0
+n_epochs=10
+minibatch_size=20
+'''
+
+import os, sys, copy, operator, time
+import theano
+import theano.tensor as T
+import numpy
+from mlp import MLP
+from ift6266 import datasets
+from pylearn.io.seriestables import *
+import tables
+from jobman.tools import DD
+
+N_INPUTS = 32*32
+REDUCE_EVERY = 250
+
+TEST_RUN = False
+
+TEST_HP = DD({'n_hidden':200,
+            'n_hidden_layers': 2,
+            'train_on':'NIST',
+            'train_subset':'ALL',
+            'learning_rate_log10':-2,
+            'rng_seed':1234,
+            'L1_reg':0.0,
+            'L2_reg':0.0,
+            'n_epochs':2,
+            'minibatch_size':20})
+
+###########################################
+# digits datasets
+# nist_digits is already in NIST_PATH and in ift6266.datasets
+# NOTE: for these datasets the test and valid sets are wrong
+#   (don't correspond to the training set... they're just placeholders)
+
+from ift6266.datasets.defs import NIST_PATH, DATA_PATH
+TRANSFORMED_DIGITS_PATH = '/data/lisatmp/ift6266h10/data/transformed_digits'
+
+P07_digits = FTDataSet(\
+                     train_data = [os.path.join(TRANSFORMED_DIGITS_PATH,\
+                                     'data/P07_train'+str(i)+'_data.ft')\
+                                        for i in range(0, 100)],
+                     train_lbl = [os.path.join(TRANSFORMED_DIGITS_PATH,\
+                                     'data/P07_train'+str(i)+'_labels.ft')\
+                                        for i in range(0,100)],
+                     test_data = [os.path.join(DATA_PATH,'data/P07_test_data.ft')],
+                     test_lbl = [os.path.join(DATA_PATH,'data/P07_test_labels.ft')],
+                     valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')],
+                     valid_lbl = [os.path.join(DATA_PATH,'data/P07_valid_labels.ft')],
+                     indtype=theano.config.floatX, inscale=255., maxsize=None)
+             
+#Added PNIST
+PNIST07_digits = FTDataSet(train_data = [os.path.join(TRANSFORMED_DIGITS_PATH,\
+                                            'PNIST07_train'+str(i)+'_data.ft')\
+                                                for i in range(0,100)],
+                     train_lbl = [os.path.join(TRANSFORMED_DIGITS_PATH,\
+                                            'PNIST07_train'+str(i)+'_labels.ft')\
+                                                for i in range(0,100)],
+                     test_data = [os.path.join(DATA_PATH,'data/PNIST07_test_data.ft')],
+                     test_lbl = [os.path.join(DATA_PATH,'data/PNIST07_test_labels.ft')],
+                     valid_data = [os.path.join(DATA_PATH,'data/PNIST07_valid_data.ft')],
+                     valid_lbl = [os.path.join(DATA_PATH,'data/PNIST07_valid_labels.ft')],
+                     indtype=theano.config.floatX, inscale=255., maxsize=None)
+
+
+# building valid_test_datasets
+# - on veut des dataset_obj pour les 3 datasets
+#       - donc juste à bâtir FTDataset(train=nimportequoi, test, valid=pNIST etc.)
+# - on veut dans l'array mettre des pointeurs vers la fonction either test ou valid
+#        donc PAS dataset_obj, mais dataset_obj.train (sans les parenthèses)
+def build_test_valid_sets():
+    nist_ds = datasets.nist_all()
+    pnist_ds = datasets.PNIST07()
+    p07_ds = datasets.nist_P07()
+
+    test_valid_fns = [nist_ds.test, nist_ds.valid,
+                    pnist_ds.test, pnist_ds.valid,
+                    p07_ds.test, p07_ds.valid]
+
+    test_valid_names = ["nist_all__test", "nist_all__valid",
+                        "NISTP__test", "NISTP__valid",
+                        "P07__test", "P07__valid"]
+
+    return test_valid_fns, test_valid_names
+
+def add_error_series(series, error_name, hdf5_file,
+                    index_names=('minibatch_idx',), use_accumulator=False,
+                    reduce_every=250):
+    # train
+    series_base = ErrorSeries(error_name=error_name,
+                    table_name=error_name,
+                    hdf5_file=hdf5_file,
+                    index_names=index_names)
+
+    if use_accumulator:
+        series[error_name] = \
+                    AccumulatorSeriesWrapper(base_series=series_base,
+                        reduce_every=reduce_every)
+    else:
+        series[error_name] = series_base
+
+TEST_VALID_FNS,TEST_VALID_NAMES = None, None
+def compute_and_save_errors(state, mlp, series, hdf5_file, minibatch_idx):
+    global TEST_VALID_FNS,TEST_VALID_NAMES
+
+    TEST_VALID_FNS,TEST_VALID_NAMES = build_test_valid_sets()
+
+    # if the training is on digits only, then there'll be a 100%
+    # error on digits in the valid/test set... just ignore them
+    
+    test_fn = theano.function([mlp.input], mlp.logRegressionLayer.y_pred)
+
+    test_batch_size = 100
+    for test_ds_fn,test_ds_name in zip(TEST_VALID_FNS,TEST_VALID_NAMES):
+        # reset error counts for every test/valid set
+        # note: float
+        total_errors = total_digit_errors = \
+                total_uppercase_errors = total_lowercase_errors = 0.
+
+        total_all = total_lowercase = total_uppercase = total_digit = 0
+
+        for mb_x,mb_y in test_ds_fn(test_batch_size):
+            digit_mask = mb_y < 10
+            uppercase_mask = mb_y >= 36
+            lowercase_mask = numpy.ones((len(mb_x),)) \
+                                    - digit_mask - uppercase_mask
+
+            total_all += len(mb_x)
+            total_digit += sum(digit_mask)
+            total_uppercase += sum(uppercase_mask)
+            total_lowercase += sum(lowercase_mask)
+
+            predictions = test_fn(mb_x)
+
+            all_errors = (mb_y != predictions)
+            total_errors += sum(all_errors)
+
+            if len(all_errors) != len(digit_mask):
+                print "size all", all_errors.shape, " digit", digit_mask.shape
+            total_digit_errors += sum(numpy.multiply(all_errors, digit_mask))
+            total_uppercase_errors += sum(numpy.multiply(all_errors, uppercase_mask))
+            total_lowercase_errors += sum(numpy.multiply(all_errors, lowercase_mask))
+
+        four_errors = [float(total_errors) / total_all,
+                        float(total_digit_errors) / total_digit, 
+                        float(total_lowercase_errors) / total_lowercase, 
+                        float(total_uppercase_errors) / total_uppercase]
+
+        four_errors_names = ["all", "digits", "lower", "upper"]
+
+        # record stats per set
+        print "Errors on", test_ds_name, ",".join(four_errors_names),\
+                ":", ",".join([str(e) for e in four_errors])
+
+        # now in the state
+        for err, errname in zip(four_errors, four_errors_names):
+            error_full_name = 'error__'+test_ds_name+'_'+errname
+            min_name = 'min_'+error_full_name
+            minpos_name = 'minpos_'+error_full_name
+
+            if state.has_key(min_name):
+                if state[min_name] > err:
+                    state[min_name] = err
+                    state[minpos_name] = pos_str
+            else:
+                # also create the series
+                add_error_series(series, error_full_name, hdf5_file,
+                            index_names=('minibatch_idx',))
+                state[min_name] = err
+                state[minpos_name] = minibatch_idx 
+
+            state[minpos_name] = pos_str
+            series[error_full_name].append((minibatch_idx,), err)
+
+def jobman_entrypoint(state, channel):
+    global TEST_RUN
+    minibatch_size = state.minibatch_size
+
+    print_every = 100000
+    COMPUTE_ERROR_EVERY = 10**7 / minibatch_size # compute error every 10 million examples
+    if TEST_RUN:
+        print_every = 100
+        COMPUTE_ERROR_EVERY = 1000 / minibatch_size
+
+    print "entrypoint, state is"
+    print state
+
+    ######################
+    # select dataset and dataset subset, plus adjust epoch num to make number
+    # of examples seen independent of dataset
+    # exemple: pour le cas DIGITS_ONLY, il faut changer le nombre d'époques
+    # et pour le cas NIST pur (pas de transformations), il faut multiplier par 100
+    # en partant car on a pas les variations
+
+    # compute this in terms of the P07 dataset size (=80M)
+    MINIBATCHES_TO_SEE = state.n_epochs * 8 * (10**6) / minibatch_size
+
+    if state.train_on == 'NIST' and state.train_subset == 'ALL':
+        dataset_obj = datasets.nist_all()
+    elif state.train_on == 'NIST' and state.train_subset == 'DIGITS_ONLY':
+        dataset_obj = datasets.nist_digits()
+    elif state.train_on == 'NISTP' and state.train_subset == 'ALL':
+        dataset_obj = datasets.PNIST07()
+    elif state.train_on == 'NISTP' and state.train_subset == 'DIGITS_ONLY':
+        dataset_obj = PNIST07_digits
+    elif state.train_on == 'P07' and state.train_subset == 'ALL':
+        dataset_obj = datasets.nist_P07()
+    elif state.train_on == 'P07' and state.train_subset == 'DIGITS_ONLY':
+        dataset_obj = datasets.P07_digits
+
+    dataset = dataset_obj
+    
+    if state.train_subset == 'ALL':
+        n_classes = 62
+    elif state.train_subset == 'DIGITS_ONLY':
+        n_classes = 10
+    else:
+        raise NotImplementedError()
+
+    ###############################
+    # construct model
+
+    print "constructing model..."
+    x     = T.matrix('x')
+    y     = T.ivector('y')
+
+    rng = numpy.random.RandomState(state.rng_seed)
+
+    # construct the MLP class
+    model = MLP(rng = rng, input=x, n_in=N_INPUTS,
+                        n_hidden_layers = state.n_hidden_layers,
+                        n_hidden = state.n_hidden, n_out=n_classes)
+
+
+    # cost and training fn
+    cost = T.mean(model.negative_log_likelihood(y)) \
+                 + state.L1_reg * model.L1 \
+                 + state.L2_reg * model.L2_sqr 
+
+    print "L1, L2: ", state.L1_reg, state.L2_reg
+
+    gradient_nll_wrt_params = []
+    for param in model.params:
+        gparam = T.grad(cost, param)
+        gradient_nll_wrt_params.append(gparam)
+
+    learning_rate = 10**float(state.learning_rate_log10)
+    print "Learning rate", learning_rate
+
+    train_updates = {}
+    for param, gparam in zip(model.params, gradient_nll_wrt_params):
+        train_updates[param] = param - learning_rate * gparam
+
+    train_fn = theano.function([x,y], cost, updates=train_updates)
+
+    #######################
+    # create series
+    basedir = os.getcwd()
+
+    h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w")
+
+    series = {}
+    add_error_series(series, "training_error", h5f,
+                    index_names=('minibatch_idx',), use_accumulator=True,
+                    reduce_every=REDUCE_EVERY)
+
+    ##########################
+    # training loop
+
+    start_time = time.clock()
+
+    print "begin training..."
+    print "will train for", MINIBATCHES_TO_SEE, "examples"
+
+    mb_idx = 0
+
+    while(mb_idx*minibatch_size<nb_max_exemples):
+
+        last_costs = []
+
+        for mb_x, mb_y in dataset.train(minibatch_size):
+            if TEST_RUN and mb_idx > 1000:
+                break
+                
+            last_cost = train_fn(mb_x, mb_y)
+            series["training_error"].append((mb_idx,), last_cost)
+
+            last_costs.append(last_cost)
+            if (len(last_costs)+1) % print_every == 0:
+                print "Mean over last", print_every, "minibatches: ", numpy.mean(last_costs)
+                last_costs = []
+
+            if (mb_idx+1) % COMPUTE_ERROR_EVERY == 0:
+                # compute errors
+                print "computing errors on all datasets..."
+                print "Time since training began: ", (time.clock()-start_time)/60., "minutes"
+                compute_and_save_errors(state, model, series, h5f, mb_idx)
+
+        channel.save()
+
+        sys.stdout.flush()
+
+    end_time = time.clock()
+
+    print "-"*80
+    print "Finished. Training took", (end_time-start_time)/60., "minutes"
+    print state
+
+def run_test():
+    global TEST_RUN
+    from fsml.job_management import mock_channel
+    TEST_RUN = True
+    jobman_entrypoint(TEST_HP, mock_channel)
+
+if __name__ == '__main__':
+    run_test()
+
diff -r 249a180795e3 -r ca20f94448dc deep/deep_mlp/logistic_sgd.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/deep_mlp/logistic_sgd.py	Thu Mar 17 09:22:04 2011 -0400
@@ -0,0 +1,223 @@
+import numpy, time, cPickle, gzip, sys, os
+
+import theano
+import theano.tensor as T
+
+class LogisticRegression(object):
+    def __init__(self, input, n_in, n_out):
+        self.W = theano.shared(value=numpy.zeros((n_in,n_out),
+                                dtype = theano.config.floatX),
+                                name='W')
+        self.b = theano.shared(value=numpy.zeros((n_out,),
+                                dtype = theano.config.floatX),
+                               name='b')
+
+        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+
+        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+        self.params = [self.W, self.b]
+
+    def negative_log_likelihood(self, y):
+        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+
+    def errors(self, y):
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred', 
+                ('y', target.type, 'y_pred', self.y_pred.type))
+
+        if y.dtype.startswith('int'):
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+def load_data(dataset):
+    ''' Loads the dataset
+
+    :type dataset: string
+    :param dataset: the path to the dataset (here MNIST)
+    '''
+
+    #############
+    # LOAD DATA #
+    #############
+    print '... loading data'
+
+    # Load the dataset 
+    f = gzip.open(dataset,'rb')
+    train_set, valid_set, test_set = cPickle.load(f)
+    f.close()
+
+
+    def shared_dataset(data_xy):
+        """ Function that loads the dataset into shared variables
+        
+        The reason we store our dataset in shared variables is to allow 
+        Theano to copy it into the GPU memory (when code is run on GPU). 
+        Since copying data into the GPU is slow, copying a minibatch everytime
+        is needed (the default behaviour if the data is not in a shared 
+        variable) would lead to a large decrease in performance.
+        """
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+        # When storing data on the GPU it has to be stored as floats
+        # therefore we will store the labels as ``floatX`` as well
+        # (``shared_y`` does exactly that). But during our computations
+        # we need them as ints (we use labels as index, and if they are 
+        # floats it doesn't make sense) therefore instead of returning 
+        # ``shared_y`` we will have to cast it to int. This little hack
+        # lets ous get around this issue
+        return shared_x, T.cast(shared_y, 'int32')
+
+    test_set_x,  test_set_y  = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
+
+    rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
+    return rval
+
+
+
+
+def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='../data/mnist.pkl.gz',
+        batch_size = 600):
+    datasets = load_data(dataset)
+
+    train_set_x, train_set_y = datasets[0]
+    valid_set_x, valid_set_y = datasets[1]
+    test_set_x , test_set_y  = datasets[2]
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+    n_valid_batches = valid_set_x.value.shape[0] / batch_size
+    n_test_batches  = test_set_x.value.shape[0]  / batch_size
+
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+    print '... building the model'
+
+
+    # allocate symbolic variables for the data
+    index = T.lscalar()    # index to a [mini]batch 
+    x     = T.matrix('x')  # the data is presented as rasterized images
+    y     = T.ivector('y') # the labels are presented as 1D vector of 
+                           # [int] labels
+
+    # construct the logistic regression class
+    # Each MNIST image has size 28*28
+    classifier = LogisticRegression( input=x, n_in=28*28, n_out=10)
+
+    # the cost we minimize during training is the negative log likelihood of 
+    # the model in symbolic format
+    cost = classifier.negative_log_likelihood(y) 
+
+    # compiling a Theano function that computes the mistakes that are made by 
+    # the model on a minibatch
+    test_model = theano.function(inputs = [index], 
+            outputs = classifier.errors(y),
+            givens={
+                x:test_set_x[index*batch_size:(index+1)*batch_size],
+                y:test_set_y[index*batch_size:(index+1)*batch_size]})
+
+    validate_model = theano.function( inputs = [index], 
+            outputs = classifier.errors(y),
+            givens={
+                x:valid_set_x[index*batch_size:(index+1)*batch_size],
+                y:valid_set_y[index*batch_size:(index+1)*batch_size]})
+
+    # compute the gradient of cost with respect to theta = (W,b) 
+    g_W = T.grad(cost = cost, wrt = classifier.W)
+    g_b = T.grad(cost = cost, wrt = classifier.b)
+
+    # specify how to update the parameters of the model as a dictionary
+    updates ={classifier.W: classifier.W - learning_rate*g_W,\
+              classifier.b: classifier.b - learning_rate*g_b}
+
+    # compiling a Theano function `train_model` that returns the cost, but in 
+    # the same time updates the parameter of the model based on the rules 
+    # defined in `updates`
+    train_model = theano.function(inputs = [index], 
+            outputs = cost, 
+            updates = updates,
+            givens={
+                x:train_set_x[index*batch_size:(index+1)*batch_size],
+                y:train_set_y[index*batch_size:(index+1)*batch_size]})
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training the model'
+    # early-stopping parameters
+    patience              = 5000  # look as this many examples regardless
+    patience_increase     = 2     # wait this much longer when a new best is 
+                                  # found
+    improvement_threshold = 0.995 # a relative improvement of this much is 
+                                  # considered significant
+    validation_frequency  = min(n_train_batches, patience/2)  
+                                  # go through this many 
+                                  # minibatche before checking the network 
+                                  # on the validation set; in this case we 
+                                  # check every epoch 
+
+    best_params          = None
+    best_validation_loss = float('inf')
+    test_score           = 0.
+    start_time = time.clock()
+
+    done_looping = False 
+    epoch = 0  
+    while (epoch < n_epochs) and (not done_looping):
+        epoch = epoch + 1
+        for minibatch_index in xrange(n_train_batches):
+
+            minibatch_avg_cost = train_model(minibatch_index)
+            # iteration number
+            iter = epoch * n_train_batches + minibatch_index
+
+            if (iter+1) % validation_frequency == 0: 
+                # compute zero-one loss on validation set 
+                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
+                this_validation_loss = numpy.mean(validation_losses)
+
+                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                    (epoch, minibatch_index+1,n_train_batches, \
+                    this_validation_loss*100.))
+
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+                    #improve patience if loss improvement is good enough
+                    if this_validation_loss < best_validation_loss *  \
+                       improvement_threshold :
+                        patience = max(patience, iter * patience_increase)
+
+                    best_validation_loss = this_validation_loss
+                    # test it on the test set
+
+                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
+                    test_score  = numpy.mean(test_losses)
+
+                    print(('     epoch %i, minibatch %i/%i, test error of best ' 
+                       'model %f %%') % \
+                        (epoch, minibatch_index+1, n_train_batches,test_score*100.))
+
+            if patience <= iter :
+                done_looping = True
+                break
+
+    end_time = time.clock()
+    print(('Optimization complete with best validation score of %f %%,'
+           'with test performance %f %%') %  
+                 (best_validation_loss * 100., test_score*100.))
+    print 'The code run for %d epochs, with %f epochs/sec'%(epoch,1.*epoch/(end_time-start_time))
+    print >> sys.stderr, ('The code for file '+os.path.split(__file__)[1]+' ran for %.1fs' % ((end_time-start_time)))
+
+if __name__ == '__main__':
+    sgd_optimization_mnist()
+
+
diff -r 249a180795e3 -r ca20f94448dc deep/deep_mlp/mlp.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/deep_mlp/mlp.py	Thu Mar 17 09:22:04 2011 -0400
@@ -0,0 +1,210 @@
+__docformat__ = 'restructedtext en'
+
+import numpy, time, cPickle, gzip, sys, os
+
+import theano
+import theano.tensor as T
+
+from logistic_sgd import LogisticRegression, load_data
+
+class HiddenLayer(object):
+    def __init__(self, rng, input, n_in, n_out, activation = T.tanh):
+        print "Creating HiddenLayer with params"
+        print locals()
+
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform(
+                low  = - numpy.sqrt(6./(n_in+n_out)),
+                high = numpy.sqrt(6./(n_in+n_out)),
+                size = (n_in, n_out)), dtype = theano.config.floatX)
+        if activation == theano.tensor.nnet.sigmoid:
+            W_values *= 4
+
+        self.W = theano.shared(value = W_values, name ='W')
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values, name ='b')
+
+        self.output = activation(T.dot(input, self.W) + self.b)
+
+        self.params = [self.W, self.b]
+
+
+class MLP(object):
+    def __init__(self, rng, input, n_in, n_hidden_layers, n_hidden, n_out):
+        print "Creating MLP with params"
+        print locals()
+
+        self.input = input
+
+        self.hiddenLayers = []
+
+        last_input = input
+        last_n_out = n_in
+        for i in range(n_hidden_layers):
+            self.hiddenLayers.append(\
+                    HiddenLayer(rng = rng, input = last_input, 
+                                             n_in = last_n_out,
+                                             n_out = n_hidden,
+                                             activation = T.tanh))
+            last_input = self.hiddenLayers[-1].output
+            last_n_out = n_hidden
+
+        self.logRegressionLayer = LogisticRegression( 
+                                    input = self.hiddenLayers[-1].output,
+                                    n_in  = n_hidden,
+                                    n_out = n_out)
+
+        self.L1 = abs(self.logRegressionLayer.W).sum()
+        for h in self.hiddenLayers:
+            self.L1 += abs(h.W).sum()
+
+        self.L2_sqr = (self.logRegressionLayer.W**2).sum()
+        for h in self.hiddenLayers:
+            self.L2_sqr += (h.W**2).sum()
+
+        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
+
+        self.errors = self.logRegressionLayer.errors
+
+        self.params = []
+        for hl in self.hiddenLayers:
+            self.params += hl.params
+        self.params += self.logRegressionLayer.params
+
+
+def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000,
+            dataset = '../data/mnist.pkl.gz', batch_size = 20):
+    datasets = load_data(dataset)
+
+    train_set_x, train_set_y = datasets[0]
+    valid_set_x, valid_set_y = datasets[1]
+    test_set_x , test_set_y  = datasets[2]
+
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+    n_valid_batches = valid_set_x.value.shape[0] / batch_size
+    n_test_batches  = test_set_x.value.shape[0]  / batch_size
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ###################### 
+    print '... building the model'
+
+    # allocate symbolic variables for the data
+    index = T.lscalar()    # index to a [mini]batch 
+    x     = T.matrix('x')  # the data is presented as rasterized images
+    y     = T.ivector('y') # the labels are presented as 1D vector of 
+                           # [int] labels
+
+    rng = numpy.random.RandomState(1234)
+
+    # construct the MLP class
+    classifier = MLP( rng = rng, input=x, n_in=28*28, n_hidden = 500, n_out=10)
+
+    # the cost we minimize during training is the negative log likelihood of 
+    # the model plus the regularization terms (L1 and L2); cost is expressed
+    # here symbolically
+    cost = classifier.negative_log_likelihood(y) \
+         + L1_reg * classifier.L1 \
+         + L2_reg * classifier.L2_sqr 
+
+    # compiling a Theano function that computes the mistakes that are made
+    # by the model on a minibatch
+    test_model = theano.function(inputs = [index], 
+            outputs = classifier.errors(y),
+            givens={
+                x:test_set_x[index*batch_size:(index+1)*batch_size],
+                y:test_set_y[index*batch_size:(index+1)*batch_size]})
+
+    validate_model = theano.function(inputs = [index], 
+            outputs = classifier.errors(y),
+            givens={
+                x:valid_set_x[index*batch_size:(index+1)*batch_size],
+                y:valid_set_y[index*batch_size:(index+1)*batch_size]})
+
+    # compute the gradient of cost with respect to theta (sotred in params)
+    # the resulting gradients will be stored in a list gparams
+    gparams = []
+    for param in classifier.params:
+        gparam  = T.grad(cost, param)
+        gparams.append(gparam)
+
+
+    # specify how to update the parameters of the model as a dictionary
+    updates = {}
+    # given two list the zip A = [ a1,a2,a3,a4] and B = [b1,b2,b3,b4] of 
+    # same length, zip generates a list C of same size, where each element
+    # is a pair formed from the two lists : 
+    #    C = [ (a1,b1), (a2,b2), (a3,b3) , (a4,b4) ] 
+    for param, gparam in zip(classifier.params, gparams):
+        updates[param] = param - learning_rate*gparam
+
+    # compiling a Theano function `train_model` that returns the cost, but  
+    # in the same time updates the parameter of the model based on the rules 
+    # defined in `updates`
+    train_model =theano.function( inputs = [index], outputs = cost, 
+            updates = updates,
+            givens={
+                x:train_set_x[index*batch_size:(index+1)*batch_size],
+                y:train_set_y[index*batch_size:(index+1)*batch_size]})
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training'
+
+    # early-stopping parameters
+    patience              = 10000 # look as this many examples regardless
+    patience_increase     = 2     # wait this much longer when a new best is 
+                                  # found
+    improvement_threshold = 0.995 # a relative improvement of this much is 
+                                  # considered significant
+    validation_frequency  = min(n_train_batches,patience/2)  
+                                  # go through this many 
+                                  # minibatche before checking the network 
+                                  # on the validation set; in this case we 
+                                  # check every epoch 
+
+
+    best_params          = None
+    best_validation_loss = float('inf')
+    best_iter            = 0
+    test_score           = 0.
+    start_time = time.clock()
+
+    epoch = 0
+    done_looping = False
+
+    while (epoch < n_epochs) and (not done_looping):
+      epoch = epoch + 1
+      for minibatch_index in xrange(n_train_batches):
+
+        minibatch_avg_cost = train_model(minibatch_index)
+        # iteration number
+        iter = epoch * n_train_batches + minibatch_index
+
+        if (iter+1) % validation_frequency == 0: 
+            # compute zero-one loss on validation set 
+            validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
+            this_validation_loss = numpy.mean(validation_losses)
+
+            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                 (epoch, minibatch_index+1,n_train_batches, \
+                  this_validation_loss*100.))
+
+
+            # if we got the best validation score until now
+            if this_validation_loss < best_validation_loss:
+                #improve patience if loss improvement is good enough
+                if this_validation_loss < best_validation_loss *  \
+                       improvement_threshold :
+                    patience = max(patience, iter * patience_increase)
+
+                best_validation_loss = this_validation_loss
+                # test it on the test set
+
+                test_losses = [test_model(i) for i in xrange(n_test_batches)]
+                test_score = numpy.mean(test_losses)
+
+