# HG changeset patch # User Olivier Delalleau # Date 1313523855 14400 # Node ID b1af99fd7bf667878b496a8e2f22545bf16868dd # Parent e7c4d031d333b4d89b3aeddfef80de68410371be# Parent 8be8cdde97ee4199ae119ec8bfa2f984ccfbf0b8 Merged diff -r e7c4d031d333 -r b1af99fd7bf6 .hgignore --- a/.hgignore Tue Aug 16 15:44:01 2011 -0400 +++ b/.hgignore Tue Aug 16 15:44:15 2011 -0400 @@ -4,4 +4,4 @@ *.pyc *.orig core.* -html \ No newline at end of file +html diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/dataset_ops/protocol.py --- a/pylearn/dataset_ops/protocol.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/dataset_ops/protocol.py Tue Aug 16 15:44:15 2011 -0400 @@ -3,6 +3,7 @@ """ __docformat__ = "restructuredtext_en" +import numpy import theano class Dataset(theano.Op): @@ -119,6 +120,6 @@ except: x = self.x_ = self.fn(*self.fn_args) if idx.ndim == 0: - z[0] = x[int(idx)] + z[0] = numpy.asarray(x[int(idx)]) # asarray is important for memmaps else: - z[0] = x[idx] + z[0] = numpy.asarray(x[idx]) # asarray is important for memmaps diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/datasets/MNIST.py --- a/pylearn/datasets/MNIST.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/MNIST.py Tue Aug 16 15:44:15 2011 -0400 @@ -6,8 +6,8 @@ import numpy from pylearn.io.pmat import PMat -from pylearn.datasets.config import data_root # config from pylearn.datasets.dataset import Dataset +import config def head(n=10, path=None): """Load the first MNIST examples. @@ -18,7 +18,9 @@ """ if path is None: - path = os.path.join(data_root(), 'mnist','mnist_all.pmat') + # dataset lookup through $PYLEARN_DATA_ROOT + _path = os.path.join('mnist', 'mnist_all.pmat') + path = config.get_filepath_in_roots(_path) dat = PMat(fname=path) diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/datasets/caltech.py --- a/pylearn/datasets/caltech.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/caltech.py Tue Aug 16 15:44:15 2011 -0400 @@ -6,15 +6,15 @@ import numpy from pylearn.io.pmat import PMat -from pylearn.datasets.config import data_root # config from pylearn.datasets.dataset import Dataset +import config def caltech_silhouette(): rval = Dataset() - - path = os.path.join(data_root(), 'caltech_silhouettes') + # dataset lookup through $PYLEARN_DATA_ROOT + path = config.get_filepath_in_roots('caltech_silhouettes') rval.train = Dataset.Obj(x=numpy.load(os.path.join(path,'train_data.npy')), y=numpy.load(os.path.join(path,'train_labels.npy'))) diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/datasets/icml07.py --- a/pylearn/datasets/icml07.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/icml07.py Tue Aug 16 15:44:15 2011 -0400 @@ -3,7 +3,32 @@ import os, sys import numpy +from config import get_filepath_in_roots from pylearn.io.amat import AMat +from pylearn.datasets.config import data_root # config +from pylearn.datasets.dataset import Dataset + +class MNIST_rotated_background(object): + + def __init__(self, n_train=10000, n_valid=2000, n_test=50000): + + basedir = os.path.join(data_root(), 'icml07data', 'npy') + + x_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_inputs.npy')) + y_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_labels.npy')) + + vstart = n_train + tstart = n_train + n_valid + + self.train = Dataset.Obj(x=x_all[:n_train], y=y_all[:n_train]) + self.valid = Dataset.Obj(x=x_all[vstart:vstart+n_valid], + y=y_all[vstart:vstart+n_valid]) + self.test = Dataset.Obj(x=x_all[tstart:tstart+n_test], + y=y_all[tstart:tstart+n_test]) + + self.n_classes = 10 + self.img_shape = (28,28) + class DatasetLoader(object): """ @@ -68,7 +93,11 @@ assert numpy.all(labels < self.n_classes) return inputs, labels -def icml07_loaders(new_version=True, rootdir='.'): +def icml07_loaders(new_version=True, rootdir=None): + if rootdir is None: + rootdir = get_filepath_in_roots('icml07data_twiki') + if rootdir is None: + raise IOError('dataset not found (no icml07data_twiki folder in PYLEARN_DATA_ROOT or DBPATH environment variable.') rval = dict( mnist_basic=DatasetLoader( http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip', diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/datasets/nade.py --- a/pylearn/datasets/nade.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/nade.py Tue Aug 16 15:44:15 2011 -0400 @@ -2,8 +2,8 @@ import numpy from pylearn.io.pmat import PMat -from pylearn.datasets.config import data_root # config from pylearn.datasets.dataset import Dataset +import config def load_dataset(name=None): """ @@ -26,8 +26,10 @@ assert name in ['adult','binarized_mnist', 'mnist', 'connect4','dna', 'mushrooms','nips','ocr_letters','rcv1','web'] rval = Dataset() - - path = os.path.join(data_root(), 'larocheh', name) + + # dataset lookup through $PYLEARN_DATA_ROOT + _path = os.path.join('larocheh', name) + path = config.get_filepath_in_roots(_path) # load training set x=numpy.load(os.path.join(path,'train_data.npy')) diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/datasets/utlc.py --- a/pylearn/datasets/utlc.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/utlc.py Tue Aug 16 15:44:15 2011 -0400 @@ -110,7 +110,7 @@ else: raise Exception("This dataset don't have its normalization defined") if transfer: - transfer = load_filetensor(os.path.join(config.data_root(),"UTLC","filetensor",name+"_transfer.ft")) + transfer = load_ndarray_transfer(name) return train, valid, test, transfer else: return train, valid, test @@ -179,6 +179,17 @@ else: return train, valid, test +def load_ndarray_transfer(name): + """ + Load the transfer labels for the training set of data set `name`. + + It will be returned in ndarray format. + """ + assert name in ['avicenna','harry','rita','sylvester','terry','ule'] + transfer = load_filetensor(os.path.join(config.data_root(), 'UTLC', + 'filetensor', name+'_transfer.ft')) + return transfer + def load_ndarray_label(name): """ Load the train,valid,test data for the dataset `name` and return it in ndarray format. diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/formulas/nnet.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/formulas/nnet.py Tue Aug 16 15:44:15 2011 -0400 @@ -0,0 +1,44 @@ +import theano +from theano import tensor +from theano.sandbox import neighbours + +import tags + + +@tags.tags('nnet', 'max pooling', 'inverse') +def inverse_max_pooling(max_pool_out,output_shape,pooling_shape=(2,2), + ignore_borders = True): + """ + Return a symbolic variable representing the inverse of a max pooling + on a given tensor. + + Parameters + ---------- + max_pool_out : 4D tensor + A Theano variable representing the output of a max pooling + output_shape : 4D shape + The shape of the input before pooling + pooling_shape : 2D shape + The shape of the pooling windows + ignore_borders : boolean + Will pad borders with zeros if true + + Returns + ------- + ret : 4D tensor + A Theano variable with same shape as output_shape + """ + # flatten the input and repeat it + repeated_input = [max_pool_out.flatten()]*(pooling_shape[0]*pooling_shape[1]) + + # concatenate the repeated vectors into + # a 2D matrix in the format neibs2images wants + stacked_conv_neibs = tensor.stack(*repeated_input).T + + # then get back a stretched version of the stacked neighbours + stretch_unpooling_out = \ + neighbours.neibs2images(stacked_conv_neibs, + pooling_shape, + output_shape, + 'ignore_borders' if ignore_borders else 'valid') + return stretch_unpooling_out diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/formulas/noise.py --- a/pylearn/formulas/noise.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/formulas/noise.py Tue Aug 16 15:44:15 2011 -0400 @@ -63,7 +63,7 @@ """ assert inp.dtype in ['float32','float64'] return theano_rng.binomial( size = inp.shape, n = 1, p = 1 - noise_lvl[0], dtype=inp.dtype) * inp \ - + (inp==0) * theano_rng.binomial( size = inp.shape, n = 1, p = noise_lvl[1], dtype=inp.dtype) + + (theano.tensor.eq(inp,0)) * theano_rng.binomial( size = inp.shape, n = 1, p = noise_lvl[1], dtype=inp.dtype) @tags.tags('noise','gauss','gaussian') def gaussian_noise(theano_rng,inp,noise_lvl): diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/gd/sgd.py --- a/pylearn/gd/sgd.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/gd/sgd.py Tue Aug 16 15:44:15 2011 -0400 @@ -1,6 +1,6 @@ """A stochastic gradient descent minimizer. """ - +import numpy import theano def sgd_updates(params, grads, stepsizes): @@ -35,11 +35,11 @@ momentum = [momentum for p in params] if len(params) != len(grads): raise ValueError('params and grads have different lens') - headings = [theano.shared(p.get_value(borrow=False)*0) for p in params] + headings = [theano.shared(numpy.zeros_like(p.get_value(borrow=True))) for p in params] updates = [] for s, p, gp, m, h in zip(stepsizes, params, grads, momentum, headings): updates.append((p, p + s * h)) - updates.append((h, m*h - (1-m)*gp)) + updates.append((h, m*h - (1.0-m)*gp)) return updates diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/io/image_tiling.py --- a/pylearn/io/image_tiling.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/io/image_tiling.py Tue Aug 16 15:44:15 2011 -0400 @@ -100,6 +100,10 @@ H, W = img_shape Hs, Ws = tile_spacing + out_scaling = 1 + if output_pixel_vals and str(X.dtype).startswith('float'): + out_scaling = 255 + out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype) for tile_row in xrange(tile_shape[0]): for tile_col in xrange(tile_shape[1]): @@ -121,7 +125,7 @@ tile_row * (H+Hs):tile_row*(H+Hs)+H, tile_col * (W+Ws):tile_col*(W+Ws)+W ] \ - = this_img * (255 if output_pixel_vals else 1) + = this_img * out_scaling return out_array diff -r e7c4d031d333 -r b1af99fd7bf6 pylearn/misc/do_nightly_build --- a/pylearn/misc/do_nightly_build Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/misc/do_nightly_build Tue Aug 16 15:44:15 2011 -0400 @@ -9,7 +9,12 @@ FLAGS=warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug=False,warn.sum_sum_bug=False,warn.sum_div_dimshuffle_bug=False,compiledir=${COMPILEDIR} export PYTHONPATH=${ROOT_CWD}/Theano:${ROOT_CWD}/Pylearn:$PYTHONPATH -cd ${ROOT_CWD}/ +cd ${ROOT_CWD}/Theano +hg summary +cd ../Pylearn +hg summary +cd .. + echo "executing nosetests with mode=FAST_COMPILE" #THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} Pylearn echo "executing nosetests with mode=FAST_RUN" @@ -20,5 +25,5 @@ #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long. seed=$RANDOM echo "executing nosetests with mode=DEBUG_MODE with seed of the day $seed" -THEANO_FLAGS=${FLAGS},unittests.rseed=$seed,mode=DEBUG_MODE,DebugMode.check_strides=0,DebugMode.patience=3 ${NOSETESTS} Pylearn +THEANO_FLAGS=${FLAGS},unittests.rseed=$seed,mode=DEBUG_MODE,DebugMode.check_strides=0,DebugMode.patience=3,DebugMode.check_preallocated_output= ${NOSETESTS} Pylearn