changeset 1493:b1af99fd7bf6

Merged
author Olivier Delalleau <delallea@iro>
date Tue, 16 Aug 2011 15:44:15 -0400
parents e7c4d031d333 (current diff) 8be8cdde97ee (diff)
children 625fe86e3d5e
files
diffstat 12 files changed, 118 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Tue Aug 16 15:44:01 2011 -0400
+++ b/.hgignore	Tue Aug 16 15:44:15 2011 -0400
@@ -4,4 +4,4 @@
 *.pyc
 *.orig
 core.*
-html
\ No newline at end of file
+html
--- a/pylearn/dataset_ops/protocol.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/dataset_ops/protocol.py	Tue Aug 16 15:44:15 2011 -0400
@@ -3,6 +3,7 @@
 """
 
 __docformat__  = "restructuredtext_en"
+import numpy
 import theano
 
 class Dataset(theano.Op):
@@ -119,6 +120,6 @@
         except:
             x = self.x_ = self.fn(*self.fn_args)
         if idx.ndim == 0:
-            z[0] = x[int(idx)]
+            z[0] = numpy.asarray(x[int(idx)]) # asarray is important for memmaps
         else:
-            z[0] = x[idx]
+            z[0] = numpy.asarray(x[idx]) # asarray is important for memmaps
--- a/pylearn/datasets/MNIST.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/datasets/MNIST.py	Tue Aug 16 15:44:15 2011 -0400
@@ -6,8 +6,8 @@
 import numpy
 
 from pylearn.io.pmat import PMat
-from pylearn.datasets.config import data_root # config
 from pylearn.datasets.dataset import Dataset
+import config
 
 def head(n=10, path=None):
     """Load the first MNIST examples.
@@ -18,7 +18,9 @@
 
     """
     if path is None:
-      path = os.path.join(data_root(), 'mnist','mnist_all.pmat')
+        # dataset lookup through $PYLEARN_DATA_ROOT
+        _path = os.path.join('mnist', 'mnist_all.pmat')
+        path = config.get_filepath_in_roots(_path)
 
     dat = PMat(fname=path)
 
--- a/pylearn/datasets/caltech.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/datasets/caltech.py	Tue Aug 16 15:44:15 2011 -0400
@@ -6,15 +6,15 @@
 import numpy
 
 from pylearn.io.pmat import PMat
-from pylearn.datasets.config import data_root # config
 from pylearn.datasets.dataset import Dataset
+import config
 
 def caltech_silhouette():
 
     rval = Dataset()
 
-    
-    path = os.path.join(data_root(), 'caltech_silhouettes')
+    # dataset lookup through $PYLEARN_DATA_ROOT
+    path = config.get_filepath_in_roots('caltech_silhouettes')
 
     rval.train = Dataset.Obj(x=numpy.load(os.path.join(path,'train_data.npy')),
                              y=numpy.load(os.path.join(path,'train_labels.npy')))
--- a/pylearn/datasets/icml07.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/datasets/icml07.py	Tue Aug 16 15:44:15 2011 -0400
@@ -3,7 +3,32 @@
 import os, sys
 import numpy
 
+from config import get_filepath_in_roots
 from pylearn.io.amat import AMat
+from pylearn.datasets.config import data_root # config
+from pylearn.datasets.dataset import Dataset
+
+class MNIST_rotated_background(object):
+
+    def __init__(self, n_train=10000, n_valid=2000, n_test=50000):
+
+        basedir = os.path.join(data_root(), 'icml07data', 'npy')
+
+        x_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_inputs.npy'))
+        y_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_labels.npy'))
+
+        vstart = n_train
+        tstart = n_train + n_valid
+
+        self.train = Dataset.Obj(x=x_all[:n_train], y=y_all[:n_train])
+        self.valid = Dataset.Obj(x=x_all[vstart:vstart+n_valid],
+                                 y=y_all[vstart:vstart+n_valid])
+        self.test  = Dataset.Obj(x=x_all[tstart:tstart+n_test],
+                                 y=y_all[tstart:tstart+n_test])
+
+        self.n_classes = 10
+        self.img_shape = (28,28)
+
 
 class DatasetLoader(object):
     """
@@ -68,7 +93,11 @@
         assert numpy.all(labels < self.n_classes)
         return inputs, labels
 
-def icml07_loaders(new_version=True, rootdir='.'):
+def icml07_loaders(new_version=True, rootdir=None):
+    if rootdir is None:
+        rootdir = get_filepath_in_roots('icml07data_twiki')
+    if rootdir is None:
+        raise IOError('dataset not found (no icml07data_twiki folder in PYLEARN_DATA_ROOT or DBPATH environment variable.')
     rval = dict(
         mnist_basic=DatasetLoader(
             http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip',
--- a/pylearn/datasets/nade.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/datasets/nade.py	Tue Aug 16 15:44:15 2011 -0400
@@ -2,8 +2,8 @@
 import numpy
 
 from pylearn.io.pmat import PMat
-from pylearn.datasets.config import data_root # config
 from pylearn.datasets.dataset import Dataset
+import config
 
 def load_dataset(name=None):
     """
@@ -26,8 +26,10 @@
     assert name in ['adult','binarized_mnist', 'mnist', 'connect4','dna',
                     'mushrooms','nips','ocr_letters','rcv1','web']
     rval = Dataset()
-    
-    path = os.path.join(data_root(), 'larocheh', name)
+
+    # dataset lookup through $PYLEARN_DATA_ROOT
+    _path = os.path.join('larocheh', name)
+    path = config.get_filepath_in_roots(_path)
 
     # load training set
     x=numpy.load(os.path.join(path,'train_data.npy'))
--- a/pylearn/datasets/utlc.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/datasets/utlc.py	Tue Aug 16 15:44:15 2011 -0400
@@ -110,7 +110,7 @@
         else:
             raise Exception("This dataset don't have its normalization defined")
     if transfer:
-        transfer = load_filetensor(os.path.join(config.data_root(),"UTLC","filetensor",name+"_transfer.ft"))
+        transfer = load_ndarray_transfer(name)
         return train, valid, test, transfer
     else:
         return train, valid, test
@@ -179,6 +179,17 @@
     else:
         return train, valid, test
 
+def load_ndarray_transfer(name):
+    """
+    Load the transfer labels for the training set of data set `name`.
+
+    It will be returned in ndarray format.
+    """
+    assert name in ['avicenna','harry','rita','sylvester','terry','ule']
+    transfer = load_filetensor(os.path.join(config.data_root(), 'UTLC',
+        'filetensor', name+'_transfer.ft'))
+    return transfer
+
 def load_ndarray_label(name):
     """ Load the train,valid,test data for the dataset `name`
         and return it in ndarray format.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/formulas/nnet.py	Tue Aug 16 15:44:15 2011 -0400
@@ -0,0 +1,44 @@
+import theano
+from theano import tensor
+from theano.sandbox import neighbours
+
+import tags
+
+
+@tags.tags('nnet', 'max pooling', 'inverse')
+def inverse_max_pooling(max_pool_out,output_shape,pooling_shape=(2,2),
+                        ignore_borders = True):
+    """
+    Return a symbolic variable representing the inverse of a max pooling
+    on a given tensor.
+
+    Parameters
+    ----------
+    max_pool_out : 4D tensor
+        A Theano variable representing the output of a max pooling
+    output_shape : 4D shape
+        The shape of the input before pooling
+    pooling_shape : 2D shape
+        The shape of the pooling windows
+    ignore_borders : boolean
+        Will pad borders with zeros if true
+    
+    Returns
+    -------
+    ret : 4D tensor
+        A Theano variable with same shape as output_shape
+    """
+    # flatten the input and repeat it 
+    repeated_input = [max_pool_out.flatten()]*(pooling_shape[0]*pooling_shape[1])
+
+    # concatenate the repeated vectors into
+    # a 2D matrix in the format neibs2images wants
+    stacked_conv_neibs = tensor.stack(*repeated_input).T
+    
+    # then get back a stretched version of the stacked neighbours
+    stretch_unpooling_out = \
+        neighbours.neibs2images(stacked_conv_neibs,
+                                pooling_shape,
+                                output_shape,
+                                'ignore_borders' if ignore_borders else 'valid')
+    return stretch_unpooling_out
--- a/pylearn/formulas/noise.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/formulas/noise.py	Tue Aug 16 15:44:15 2011 -0400
@@ -63,7 +63,7 @@
     """
     assert inp.dtype in ['float32','float64']
     return theano_rng.binomial( size = inp.shape, n = 1, p =  1 - noise_lvl[0], dtype=inp.dtype) * inp \
-                        + (inp==0) * theano_rng.binomial( size = inp.shape, n = 1, p =  noise_lvl[1], dtype=inp.dtype)
+                        + (theano.tensor.eq(inp,0)) * theano_rng.binomial( size = inp.shape, n = 1, p =  noise_lvl[1], dtype=inp.dtype)
 
 @tags.tags('noise','gauss','gaussian')
 def gaussian_noise(theano_rng,inp,noise_lvl):
--- a/pylearn/gd/sgd.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/gd/sgd.py	Tue Aug 16 15:44:15 2011 -0400
@@ -1,6 +1,6 @@
 """A stochastic gradient descent minimizer.
 """
-
+import numpy
 import theano
 
 def sgd_updates(params, grads, stepsizes):
@@ -35,11 +35,11 @@
         momentum = [momentum for p in params]
     if len(params) != len(grads):
         raise ValueError('params and grads have different lens')
-    headings = [theano.shared(p.get_value(borrow=False)*0) for p in params]
+    headings = [theano.shared(numpy.zeros_like(p.get_value(borrow=True))) for p in params]
     updates = []
     for s, p, gp, m, h in zip(stepsizes, params, grads, momentum, headings):
         updates.append((p, p + s * h))
-        updates.append((h, m*h - (1-m)*gp))
+        updates.append((h, m*h - (1.0-m)*gp))
     return updates
 
 
--- a/pylearn/io/image_tiling.py	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/io/image_tiling.py	Tue Aug 16 15:44:15 2011 -0400
@@ -100,6 +100,10 @@
         H, W = img_shape
         Hs, Ws = tile_spacing
 
+        out_scaling = 1
+        if output_pixel_vals and str(X.dtype).startswith('float'):
+            out_scaling = 255
+
         out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
         for tile_row in xrange(tile_shape[0]):
             for tile_col in xrange(tile_shape[1]):
@@ -121,7 +125,7 @@
                         tile_row * (H+Hs):tile_row*(H+Hs)+H,
                         tile_col * (W+Ws):tile_col*(W+Ws)+W
                         ] \
-                        = this_img * (255 if output_pixel_vals else 1)
+                        = this_img * out_scaling
         return out_array
 
 
--- a/pylearn/misc/do_nightly_build	Tue Aug 16 15:44:01 2011 -0400
+++ b/pylearn/misc/do_nightly_build	Tue Aug 16 15:44:15 2011 -0400
@@ -9,7 +9,12 @@
 FLAGS=warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug=False,warn.sum_sum_bug=False,warn.sum_div_dimshuffle_bug=False,compiledir=${COMPILEDIR}
 export PYTHONPATH=${ROOT_CWD}/Theano:${ROOT_CWD}/Pylearn:$PYTHONPATH
 
-cd ${ROOT_CWD}/
+cd ${ROOT_CWD}/Theano
+hg summary
+cd ../Pylearn
+hg summary
+cd ..
+
 echo "executing nosetests with mode=FAST_COMPILE"
 #THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} Pylearn
 echo "executing nosetests with mode=FAST_RUN"
@@ -20,5 +25,5 @@
 #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long.
 seed=$RANDOM
 echo "executing nosetests with mode=DEBUG_MODE with seed of the day $seed"
-THEANO_FLAGS=${FLAGS},unittests.rseed=$seed,mode=DEBUG_MODE,DebugMode.check_strides=0,DebugMode.patience=3 ${NOSETESTS} Pylearn
+THEANO_FLAGS=${FLAGS},unittests.rseed=$seed,mode=DEBUG_MODE,DebugMode.check_strides=0,DebugMode.patience=3,DebugMode.check_preallocated_output= ${NOSETESTS} Pylearn