changeset 201:25444fc301e0

Branch merge
author Arnaud Bergeron <abergeron@gmail.com>
date Wed, 03 Mar 2010 16:46:16 -0500
parents 3f2cc90ad51c (diff) e656edaedb48 (current diff)
children 6ea5dcf0541e
files
diffstat 4 files changed, 141 insertions(+), 342 deletions(-) [+]
line wrap: on
line diff
--- a/baseline/conv_mlp/convolutional_mlp.py	Wed Mar 03 12:51:40 2010 -0500
+++ b/baseline/conv_mlp/convolutional_mlp.py	Wed Mar 03 16:46:16 2010 -0500
@@ -26,7 +26,8 @@
 import theano.sandbox.softsign
 import pylearn.datasets.MNIST
 from pylearn.io import filetensor as ft
-from theano.sandbox import conv, downsample
+from theano.tensor.signal import downsample
+from theano.tensor.nnet import conv
 
 class LeNetConvPoolLayer(object):
 
--- a/baseline/log_reg/log_reg.py	Wed Mar 03 12:51:40 2010 -0500
+++ b/baseline/log_reg/log_reg.py	Wed Mar 03 16:46:16 2010 -0500
@@ -35,11 +35,11 @@
 """
 __docformat__ = 'restructedtext en'
 
-import numpy, time, cPickle, gzip
+import numpy, time
 
 import theano
 import theano.tensor as T
-
+from ift6266 import datasets
 
 class LogisticRegression(object):
     """Multi-class Logistic Regression Class
@@ -112,6 +112,8 @@
         # i.e., the mean log-likelihood across the minibatch.
         return -T.mean( T.log( self.p_y_given_x )[ T.arange( y.shape[0] ), y ] )
 
+    def MSE(self, y):
+        return -T.mean(abs((self.p_t_given_x)[T.arange(y.shape[0]), y]-y)**2)
 
     def errors( self, y ):
         """Return a float representing the number of errors in the minibatch 
@@ -135,107 +137,12 @@
         else:
             raise NotImplementedError()
         
-def shared_dataset( data_xy ):
-        """ Function that loads the dataset into shared variables
-        
-        The reason we store our dataset in shared variables is to allow 
-        Theano to copy it into the GPU memory (when code is run on GPU). 
-        Since copying data into the GPU is slow, copying a minibatch everytime
-        is needed (the default behaviour if the data is not in a shared 
-        variable) would lead to a large decrease in performance.
-        """
-        data_x, data_y = data_xy
-        shared_x = theano.shared( numpy.asarray( data_x, dtype = theano.config.floatX ) )
-        shared_y = theano.shared( numpy.asarray( data_y, dtype = theano.config.floatX ) )
-        # When storing data on the GPU it has to be stored as floats
-        # therefore we will store the labels as ``floatX`` as well
-        # (``shared_y`` does exactly that). But during our computations
-        # we need them as ints (we use labels as index, and if they are 
-        # floats it doesn't make sense) therefore instead of returning 
-        # ``shared_y`` we will have to cast it to int. This little hack
-        # lets ous get around this issue
-        return shared_x, T.cast( shared_y, 'int32' )
-
-def load_data_pkl_gz( dataset ):
-    ''' Loads the dataset
-
-    :type dataset: string
-    :param dataset: the path to the dataset (here MNIST)
-    '''
-
-    #--------------------------------------------------------------------------------------------------------------------
-    # Load Data
-    #--------------------------------------------------------------------------------------------------------------------
-
-
-    print '... loading data'
-
-    # Load the dataset 
-    f = gzip.open(dataset,'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    f.close()
-    
-    test_set_x,  test_set_y  = shared_dataset( test_set )
-    valid_set_x, valid_set_y = shared_dataset( valid_set )
-    train_set_x, train_set_y = shared_dataset( train_set )
-
-    rval = [ ( train_set_x, train_set_y ), ( valid_set_x,valid_set_y ), ( test_set_x, test_set_y ) ]
-    return rval
-
-##def load_data_ft(      verbose = False,\
-##                                    data_path = '/data/lisa/data/nist/by_class/'\
-##                                    train_data = 'all/all_train_data.ft',\
-##                                    train_labels = 'all/all_train_labels.ft',\
-##                                    test_data = 'all/all_test_data.ft',\
-##                                    test_labels = 'all/all_test_labels.ft'):
-##   
-##    train_data_file = open(data_path + train_data)
-##    train_labels_file = open(data_path + train_labels)
-##    test_labels_file = open(data_path + test_data)
-##    test_data_file = open(data_path + test_labels)
-##    
-##    raw_train_data = ft.read( train_data_file)
-##    raw_train_labels = ft.read(train_labels_file)
-##    raw_test_data = ft.read( test_labels_file)
-##    raw_test_labels = ft.read( test_data_file)
-##    
-##    f.close()
-##    g.close()
-##    i.close()
-##    h.close()
-##    
-##    
-##    test_set_x,  test_set_y  = shared_dataset(test_set)
-##    valid_set_x, valid_set_y = shared_dataset(valid_set)
-##    train_set_x, train_set_y = shared_dataset(train_set)
-##
-##    rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
-##    return rval
-##    #create a validation set the same size as the test size
-##    #use the end of the training array for this purpose
-##    #discard the last remaining so we get a %batch_size number
-##    test_size=len(raw_test_labels)
-##    test_size = int(test_size/batch_size)
-##    test_size*=batch_size
-##    train_size = len(raw_train_data)
-##    train_size = int(train_size/batch_size)
-##    train_size*=batch_size
-##    validation_size =test_size 
-##    offset = train_size-test_size
-##    if verbose == True:
-##        print 'train size = %d' %train_size
-##        print 'test size = %d' %test_size
-##        print 'valid size = %d' %validation_size
-##        print 'offset = %d' %offset
-##    
-##    
-
 #--------------------------------------------------------------------------------------------------------------------
 # MAIN
 #--------------------------------------------------------------------------------------------------------------------
 
 def log_reg( learning_rate = 0.13, nb_max_examples =1000000, batch_size = 50, \
-                    dataset_name = 'mnist.pkl.gz', image_size = 28 * 28, nb_class = 10,  \
+                    dataset=datasets.nist_digits, image_size = 32 * 32, nb_class = 10,  \
                     patience = 5000, patience_increase = 2, improvement_threshold = 0.995):
     
     """
@@ -254,9 +161,8 @@
     :type batch_size: int  
     :param batch_size:  size of the minibatch
 
-    :type dataset_name: string
-    :param dataset: the path of the MNIST dataset file from 
-                         http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
+    :type dataset: dataset
+    :param dataset: a dataset instance from ift6266.datasets
                         
     :type image_size: int
     :param image_size: size of the input image in pixels (width * height)
@@ -275,17 +181,6 @@
 
 
     """
-    datasets = load_data_pkl_gz( dataset_name )
-
-    train_set_x, train_set_y = datasets[0]
-    valid_set_x, valid_set_y = datasets[1]
-    test_set_x , test_set_y   = datasets[2]
-
-    # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.value.shape[0] / batch_size
-    n_valid_batches = valid_set_x.value.shape[0] / batch_size
-    n_test_batches  = test_set_x.value.shape[0]  / batch_size
-
     #--------------------------------------------------------------------------------------------------------------------
     # Build actual model
     #--------------------------------------------------------------------------------------------------------------------
@@ -308,17 +203,11 @@
 
     # compiling a Theano function that computes the mistakes that are made by 
     # the model on a minibatch
-    test_model = theano.function( inputs = [ index ], 
-            outputs = classifier.errors( y ),
-            givens = {
-                x:test_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
-                y:test_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
+    test_model = theano.function( inputs = [ x, y ], 
+            outputs = classifier.errors( y ))
 
-    validate_model = theano.function( inputs = [ index ], 
-            outputs = classifier.errors( y ),
-            givens = {
-                x:valid_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
-                y:valid_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
+    validate_model = theano.function( inputs = [ x, y ], 
+            outputs = classifier.errors( y ))
 
     # compute the gradient of cost with respect to theta = ( W, b ) 
     g_W = T.grad( cost = cost, wrt = classifier.W )
@@ -331,12 +220,9 @@
     # compiling a Theano function `train_model` that returns the cost, but in 
     # the same time updates the parameter of the model based on the rules 
     # defined in `updates`
-    train_model = theano.function( inputs = [ index ], 
+    train_model = theano.function( inputs = [ x, y ], 
             outputs = cost, 
-            updates = updates,
-            givens = {
-                x: train_set_x[ index * batch_size: ( index + 1 ) * batch_size ],
-                y: train_set_y[ index * batch_size: ( index + 1 ) * batch_size ] } )
+            updates = updates)
 
     #--------------------------------------------------------------------------------------------------------------------
     # Train model
@@ -349,38 +235,38 @@
                                   # found
     improvement_threshold = 0.995 # a relative improvement of this much is 
                                   # considered significant
-    validation_frequency  = min( n_train_batches, patience * 0.5 )  
+    validation_frequency  = patience * 0.5
                                   # go through this many 
                                   # minibatche before checking the network 
                                   # on the validation set; in this case we 
                                   # check every epoch 
 
-    best_params             = None
+    best_params          = None
     best_validation_loss = float('inf')
-    test_score                 = 0.
-    start_time                  = time.clock()
+    test_score           = 0.
+    start_time           = time.clock()
 
     done_looping = False 
-    n_epochs       = nb_max_examples / train_set_x.value.shape[0]
-    epoch             = 0  
+    n_iters      = nb_max_examples / batch_size
+    epoch        = 0
+    iter        = 0
     
-    while ( epoch < n_epochs ) and ( not done_looping ):
+    while ( iter < n_iters ) and ( not done_looping ):
         
       epoch = epoch + 1
-      for minibatch_index in xrange( n_train_batches ):
+      for x, y in dataset.train(batch_size):
 
-        minibatch_avg_cost = train_model( minibatch_index )
+        minibatch_avg_cost = train_model( x, y )
         # iteration number
-        iter = epoch * n_train_batches + minibatch_index
+        iter += 1
 
-        if ( iter + 1 ) % validation_frequency == 0: 
+        if iter % validation_frequency == 0: 
             # compute zero-one loss on validation set 
-            validation_losses     = [ validate_model( i ) for i in xrange( n_valid_batches ) ]
+            validation_losses     = [ validate_model( xv, yv ) for xv, yv in dataset.valid(batch_size) ]
             this_validation_loss = numpy.mean( validation_losses )
 
-            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
-                 ( epoch, minibatch_index + 1,n_train_batches, \
-                  this_validation_loss*100. ) )
+            print('epoch %i, iter %i, validation error %f %%' % \
+                 ( epoch, iter, this_validation_loss*100. ) )
 
 
             # if we got the best validation score until now
@@ -393,12 +279,12 @@
                 best_validation_loss = this_validation_loss
                 # test it on the test set
 
-                test_losses = [test_model(i) for i in xrange(n_test_batches)]
+                test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
                 test_score  = numpy.mean(test_losses)
 
-                print(('     epoch %i, minibatch %i/%i, test error of best ' 
+                print(('     epoch %i, iter %i, test error of best ' 
                        'model %f %%') % \
-                  (epoch, minibatch_index+1, n_train_batches,test_score*100.))
+                  (epoch, iter, test_score*100.))
 
         if patience <= iter :
                 done_looping = True
--- a/deep/convolutional_dae/stacked_convolutional_dae.py	Wed Mar 03 12:51:40 2010 -0500
+++ b/deep/convolutional_dae/stacked_convolutional_dae.py	Wed Mar 03 16:46:16 2010 -0500
@@ -7,44 +7,10 @@
 
 from theano.tensor.signal import downsample
 from theano.tensor.nnet import conv 
-import gzip
-import cPickle
- 
- 
-class LogisticRegression(object):
- 
-    def __init__(self, input, n_in, n_out):
- 
-        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
-                                            dtype = theano.config.floatX) )
-
-        self.b = theano.shared( value=numpy.zeros((n_out,),
-                                            dtype = theano.config.floatX) )
-
-        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
-        
 
-        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
- 
-        self.params = [self.W, self.b]
- 
-    def negative_log_likelihood(self, y):
-        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
- 
-    def MSE(self, y):
-        return -T.mean(abs((self.p_y_given_x)[T.arange(y.shape[0]),y]-y)**2)
+from ift6266 import datasets
 
-    def errors(self, y):
-        if y.ndim != self.y_pred.ndim:
-            raise TypeError('y should have the same shape as self.y_pred',
-                ('y', target.type, 'y_pred', self.y_pred.type))
- 
-
-        if y.dtype.startswith('int'):
-            return T.mean(T.neq(self.y_pred, y))
-        else:
-            raise NotImplementedError()
- 
+from ift6266.baseline.log_reg.log_reg import LogisticRegression
  
 class SigmoidalLayer(object):
     def __init__(self, rng, input, n_in, n_out):
@@ -65,8 +31,9 @@
  
 class dA_conv(object):
  
-  def __init__(self, corruption_level = 0.1, input = None, shared_W = None,\
-                   shared_b = None, filter_shape = None, image_shape = None, poolsize = (2,2)):
+  def __init__(self, input, filter_shape, corruption_level = 0.1, 
+               shared_W = None, shared_b = None, image_shape = None, 
+               poolsize = (2,2)):
 
     theano_rng = RandomStreams()
     
@@ -80,13 +47,11 @@
         self.W = shared_W
         self.b = shared_b
     else:
-        initial_W = numpy.asarray( numpy.random.uniform( \
-              low = -numpy.sqrt(6./(fan_in+fan_out)), \
-              high = numpy.sqrt(6./(fan_in+fan_out)), \
+        initial_W = numpy.asarray( numpy.random.uniform(
+              low = -numpy.sqrt(6./(fan_in+fan_out)),
+              high = numpy.sqrt(6./(fan_in+fan_out)),
               size = filter_shape), dtype = theano.config.floatX)
-        initial_b = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
-    
-    
+        initial_b = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
         self.W = theano.shared(value = initial_W, name = "W")
         self.b = theano.shared(value = initial_b, name = "b")
     
@@ -101,9 +66,8 @@
 
     self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
 
-    conv1_out = conv.conv2d(self.tilde_x, self.W, \
-                             filter_shape=filter_shape, \
-                                image_shape=image_shape, border_mode='valid')
+    conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
+                            image_shape=image_shape, border_mode='valid')
 
     
     self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
@@ -111,19 +75,15 @@
     
     da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
                        filter_shape[3] ]
-    da_image_shape = [ image_shape[0],filter_shape[0],image_shape[2]-filter_shape[2]+1, \
-                         image_shape[3]-filter_shape[3]+1 ]
     initial_W_prime =  numpy.asarray( numpy.random.uniform( \
               low = -numpy.sqrt(6./(fan_in+fan_out)), \
               high = numpy.sqrt(6./(fan_in+fan_out)), \
               size = da_filter_shape), dtype = theano.config.floatX)
     self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
 
-    #import pdb;pdb.set_trace()
-
-    conv2_out = conv.conv2d(self.y, self.W_prime, \
-                               filter_shape = da_filter_shape, image_shape = da_image_shape ,\
-                                border_mode='full')
+    conv2_out = conv.conv2d(self.y, self.W_prime,
+                            filter_shape = da_filter_shape,
+                            border_mode='full')
 
     self.z =  (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
 
@@ -134,19 +94,16 @@
     self.cost = T.mean(self.L)
 
     self.params = [ self.W, self.b, self.b_prime ] 
- 
- 
 
 class LeNetConvPoolLayer(object):
-    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
-        assert image_shape[1]==filter_shape[1]
+    def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)):
         self.input = input
   
         W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
-        self.W = theano.shared(value = W_values)
+        self.W = theano.shared(value=W_values)
  
-        b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
-        self.b = theano.shared(value= b_values)
+        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
+        self.b = theano.shared(value=b_values)
  
         conv_out = conv.conv2d(input, self.W,
                 filter_shape=filter_shape, image_shape=image_shape)
@@ -168,67 +125,60 @@
  
 
 class SdA():
-    def __init__(self, input, n_ins_conv, n_ins_mlp, train_set_x, train_set_y, batch_size, \
-                     conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, \
-                     rng, n_out, pretrain_lr, finetune_lr):
-
+    def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes,
+                 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, 
+                 pretrain_lr, finetune_lr):
+        
         self.layers = []
         self.pretrain_functions = []
         self.params = []
         self.conv_n_layers = len(conv_hidden_layers_sizes)
         self.mlp_n_layers = len(mlp_hidden_layers_sizes)
-         
-        index = T.lscalar() # index to a [mini]batch
+        
         self.x = T.dmatrix('x') # the data is presented as rasterized images
         self.y = T.ivector('y') # the labels are presented as 1D vector of
         
- 
-        
         for i in xrange( self.conv_n_layers ):
-
             filter_shape=conv_hidden_layers_sizes[i][0]
             image_shape=conv_hidden_layers_sizes[i][1]
             max_poolsize=conv_hidden_layers_sizes[i][2]
                 
             if i == 0 :
-                layer_input=self.x.reshape((batch_size,1,28,28))
+                layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32))
             else:
                 layer_input=self.layers[-1].output
-
-            layer = LeNetConvPoolLayer(rng, input=layer_input, \
-                                image_shape=image_shape, \
-                                filter_shape=filter_shape,poolsize=max_poolsize)
-            print 'Convolutional layer '+str(i+1)+' created'
-                
+            
+            layer = LeNetConvPoolLayer(rng, input=layer_input,
+                                       image_shape=image_shape,
+                                       filter_shape=filter_shape,
+                                       poolsize=max_poolsize)
+            print 'Convolutional layer', str(i+1), 'created'
+            
             self.layers += [layer]
             self.params += layer.params
-                
-            da_layer = dA_conv(corruption_level = corruption_levels[0],\
-                                  input = layer_input, \
-                                  shared_W = layer.W, shared_b = layer.b,\
-                                  filter_shape = filter_shape , image_shape = image_shape )
-                
-                
+            
+            da_layer = dA_conv(corruption_level = corruption_levels[0],
+                               input = layer_input,
+                               shared_W = layer.W, shared_b = layer.b,
+                               filter_shape = filter_shape,
+                               image_shape = image_shape )
+            
             gparams = T.grad(da_layer.cost, da_layer.params)
-                
+            
             updates = {}
             for param, gparam in zip(da_layer.params, gparams):
-                    updates[param] = param - gparam * pretrain_lr
-                    
-                
-            update_fn = theano.function([index], da_layer.cost, \
-                                        updates = updates,
-                                        givens = {
-                    self.x : train_set_x[index*batch_size:(index+1)*batch_size]} )
-             
+                updates[param] = param - gparam * pretrain_lr
+            
+            update_fn = theano.function([self.x], da_layer.cost, updates = updates)
+            
             self.pretrain_functions += [update_fn]
-
+            
         for i in xrange( self.mlp_n_layers ): 
             if i == 0 :
                 input_size = n_ins_mlp
             else:
                 input_size = mlp_hidden_layers_sizes[i-1]
-
+            
             if i == 0 :
                 if len( self.layers ) == 0 :
                     layer_input=self.x
@@ -236,72 +186,43 @@
                     layer_input = self.layers[-1].output.flatten(2)
             else:
                 layer_input = self.layers[-1].output
-     
+            
             layer = SigmoidalLayer(rng, layer_input, input_size,
                                         mlp_hidden_layers_sizes[i] )
-              
+            
             self.layers += [layer]
             self.params += layer.params
             
-
-            print 'MLP layer '+str(i+1)+' created'
+            print 'MLP layer', str(i+1), 'created'
             
         self.logLayer = LogisticRegression(input=self.layers[-1].output, \
                                                      n_in=mlp_hidden_layers_sizes[-1], n_out=n_out)
         self.params += self.logLayer.params
-
+        
         cost = self.logLayer.negative_log_likelihood(self.y)
+        
+        gparams = T.grad(cost, self.params)
 
-        gparams = T.grad(cost, self.params)
         updates = {}
-
         for param,gparam in zip(self.params, gparams):
             updates[param] = param - gparam*finetune_lr
-            
-        self.finetune = theano.function([index], cost,
-                updates = updates,
-                givens = {
-                  self.x : train_set_x[index*batch_size:(index+1)*batch_size],
-                  self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
- 
+        
+        self.finetune = theano.function([self.x, self.y], cost, updates = updates)
+        
+        self.errors = self.logLayer.errors(self.y)
 
-        self.errors = self.logLayer.errors(self.y)
- 
- 
- 
 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \
                             pretrain_lr = 0.01, training_epochs = 1000, \
-                            dataset='mnist.pkl.gz'):
-
-    f = gzip.open(dataset,'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    f.close()
- 
- 
-    def shared_dataset(data_xy):
-        data_x, data_y = data_xy
-        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
-        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
-        return shared_x, T.cast(shared_y, 'int32')
- 
-
-    test_set_x, test_set_y = shared_dataset(test_set)
-    valid_set_x, valid_set_y = shared_dataset(valid_set)
-    train_set_x, train_set_y = shared_dataset(train_set)
- 
+                            dataset=datasets.nist_digits):
+    
     batch_size = 500 # size of the minibatch
  
-
-    n_train_batches = train_set_x.value.shape[0] / batch_size
-    n_valid_batches = valid_set_x.value.shape[0] / batch_size
-    n_test_batches = test_set_x.value.shape[0] / batch_size
- 
     # allocate symbolic variables for the data
     index = T.lscalar() # index to a [mini]batch
     x = T.matrix('x') # the data is presented as rasterized images
     y = T.ivector('y') # the labels are presented as 1d vector of
-                           # [int] labels
-    layer0_input = x.reshape((batch_size,1,28,28))
+    # [int] labels
+    layer0_input = x.reshape((x.shape[0],1,32,32))
     
 
     # Setup the convolutional layers with their DAs(add as many as you want)
@@ -310,45 +231,34 @@
     ker1=2
     ker2=2
     conv_layers=[]
-    conv_layers.append([[ker1,1,5,5], [batch_size,1,28,28], [2,2] ])
-    conv_layers.append([[ker2,ker1,5,5], [batch_size,ker1,12,12], [2,2] ])
+    conv_layers.append([[ker1,1,5,5], None, [2,2] ])
+    conv_layers.append([[ker2,ker1,5,5], None, [2,2] ])
 
     # Setup the MLP layers of the network
     mlp_layers=[500]
   
-    network = SdA(input = layer0_input, n_ins_conv = 28*28, n_ins_mlp = ker2*4*4, \
-                      train_set_x = train_set_x, train_set_y = train_set_y, batch_size = batch_size,
-                      conv_hidden_layers_sizes = conv_layers,  \
-                      mlp_hidden_layers_sizes = mlp_layers, \
-                      corruption_levels = corruption_levels , n_out = 10, \
-                      rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate )
+    network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4,
+                  conv_hidden_layers_sizes = conv_layers,
+                  mlp_hidden_layers_sizes = mlp_layers,
+                  corruption_levels = corruption_levels , n_out = 10,
+                  rng = rng , pretrain_lr = pretrain_lr ,
+                  finetune_lr = learning_rate )
 
-    test_model = theano.function([index], network.errors,
-             givens = {
-                network.x: test_set_x[index*batch_size:(index+1)*batch_size],
-                network.y: test_set_y[index*batch_size:(index+1)*batch_size]})
+    test_model = theano.function([network.x, network.y], network.errors)
  
-    validate_model = theano.function([index], network.errors,
-           givens = {
-                network.x: valid_set_x[index*batch_size:(index+1)*batch_size],
-                network.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
-
-
-
     start_time = time.clock()
     for i in xrange(len(network.layers)-len(mlp_layers)):
         for epoch in xrange(pretraining_epochs):
-            for batch_index in xrange(n_train_batches):
-                c = network.pretrain_functions[i](batch_index)
-            print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch),c
+            for x, y in dataset.train(batch_size):
+                c = network.pretrain_functions[i](x)
+            print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c
 
     patience = 10000 # look as this many examples regardless
     patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
                                   # FOUND
     improvement_threshold = 0.995 # a relative improvement of this much is
 
-    validation_frequency = min(n_train_batches, patience/2)
- 
+    validation_frequency = patience/2
  
     best_params = None
     best_validation_loss = float('inf')
@@ -357,23 +267,21 @@
  
     done_looping = False
     epoch = 0
- 
+    iter = 0
+
     while (epoch < training_epochs) and (not done_looping):
       epoch = epoch + 1
-      for minibatch_index in xrange(n_train_batches):
+      for x, y in dataset.train(batch_size):
  
-        cost_ij = network.finetune(minibatch_index)
-        iter = epoch * n_train_batches + minibatch_index
- 
-        if (iter+1) % validation_frequency == 0:
+        cost_ij = network.finetune(x, y)
+        iter += 1
+        
+        if iter % validation_frequency == 0:
+            validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)]
+            this_validation_loss = numpy.mean(validation_losses)
+            print('epoch %i, iter %i, validation error %f %%' % \
+                   (epoch, iter, this_validation_loss*100.))
             
-            validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
-            this_validation_loss = numpy.mean(validation_losses)
-            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
-                   (epoch, minibatch_index+1, n_train_batches, \
-                    this_validation_loss*100.))
- 
- 
             # if we got the best validation score until now
             if this_validation_loss < best_validation_loss:
  
@@ -381,35 +289,28 @@
                 if this_validation_loss < best_validation_loss * \
                        improvement_threshold :
                     patience = max(patience, iter * patience_increase)
- 
+                
                 # save best validation score and iteration number
                 best_validation_loss = this_validation_loss
                 best_iter = iter
- 
+                
                 # test it on the test set
-                test_losses = [test_model(i) for i in xrange(n_test_batches)]
+                test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
                 test_score = numpy.mean(test_losses)
-                print((' epoch %i, minibatch %i/%i, test error of best '
+                print((' epoch %i, iter %i, test error of best '
                       'model %f %%') %
-                             (epoch, minibatch_index+1, n_train_batches,
-                              test_score*100.))
- 
- 
+                             (epoch, iter, test_score*100.))
+                
         if patience <= iter :
-                done_looping = True
-                break
- 
+            done_looping = True
+            break
+    
     end_time = time.clock()
     print(('Optimization complete with best validation score of %f %%,'
            'with test performance %f %%') %
                  (best_validation_loss * 100., test_score*100.))
     print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
  
- 
- 
- 
- 
- 
 if __name__ == '__main__':
     sgd_optimization_mnist()
  
--- a/test.py	Wed Mar 03 12:51:40 2010 -0500
+++ b/test.py	Wed Mar 03 16:46:16 2010 -0500
@@ -1,6 +1,6 @@
 import doctest, sys, pkgutil
 
-def runTests(options = doctest.ELLIPSIS or doctest.DONT_ACCEPT_TRUE_FOR_1):
+def runTests():
     import ift6266
     predefs = ift6266.__dict__
     for (_, name, ispkg) in pkgutil.walk_packages(ift6266.__path__, ift6266.__name__+'.'):
@@ -11,9 +11,20 @@
                         'ift6266.data_generation.transformations.testmod',
                         'ift6266.data_generation.transformations.gimp_script']:
                 continue
-            print "Testing:", name
-            __import__(name)
-            doctest.testmod(sys.modules[name], extraglobs=predefs, optionflags=options)
+
+def test(name):
+    import ift6266
+    predefs = ift6266.__dict__
+    options = doctest.ELLIPSIS or doctest.DONT_ACCEPT_TRUE_FOR_1
+    print "Testing:", name
+    __import__(name)
+    doctest.testmod(sys.modules[name], extraglobs=predefs, optionflags=options)
 
 if __name__ == '__main__':
-    runTests()
+    if len(sys.argv) > 1:
+        for mod in sys.argv[1:]:
+            if mod.endswith('.py'):
+                mod = mod[:-3]
+            test(mod)
+    else:
+        runTests()