Mercurial > pylearn
changeset 1493:b1af99fd7bf6
Merged
author | Olivier Delalleau <delallea@iro> |
---|---|
date | Tue, 16 Aug 2011 15:44:15 -0400 |
parents | e7c4d031d333 (current diff) 8be8cdde97ee (diff) |
children | 625fe86e3d5e |
files | |
diffstat | 12 files changed, 118 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgignore Tue Aug 16 15:44:01 2011 -0400 +++ b/.hgignore Tue Aug 16 15:44:15 2011 -0400 @@ -4,4 +4,4 @@ *.pyc *.orig core.* -html \ No newline at end of file +html
--- a/pylearn/dataset_ops/protocol.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/dataset_ops/protocol.py Tue Aug 16 15:44:15 2011 -0400 @@ -3,6 +3,7 @@ """ __docformat__ = "restructuredtext_en" +import numpy import theano class Dataset(theano.Op): @@ -119,6 +120,6 @@ except: x = self.x_ = self.fn(*self.fn_args) if idx.ndim == 0: - z[0] = x[int(idx)] + z[0] = numpy.asarray(x[int(idx)]) # asarray is important for memmaps else: - z[0] = x[idx] + z[0] = numpy.asarray(x[idx]) # asarray is important for memmaps
--- a/pylearn/datasets/MNIST.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/MNIST.py Tue Aug 16 15:44:15 2011 -0400 @@ -6,8 +6,8 @@ import numpy from pylearn.io.pmat import PMat -from pylearn.datasets.config import data_root # config from pylearn.datasets.dataset import Dataset +import config def head(n=10, path=None): """Load the first MNIST examples. @@ -18,7 +18,9 @@ """ if path is None: - path = os.path.join(data_root(), 'mnist','mnist_all.pmat') + # dataset lookup through $PYLEARN_DATA_ROOT + _path = os.path.join('mnist', 'mnist_all.pmat') + path = config.get_filepath_in_roots(_path) dat = PMat(fname=path)
--- a/pylearn/datasets/caltech.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/caltech.py Tue Aug 16 15:44:15 2011 -0400 @@ -6,15 +6,15 @@ import numpy from pylearn.io.pmat import PMat -from pylearn.datasets.config import data_root # config from pylearn.datasets.dataset import Dataset +import config def caltech_silhouette(): rval = Dataset() - - path = os.path.join(data_root(), 'caltech_silhouettes') + # dataset lookup through $PYLEARN_DATA_ROOT + path = config.get_filepath_in_roots('caltech_silhouettes') rval.train = Dataset.Obj(x=numpy.load(os.path.join(path,'train_data.npy')), y=numpy.load(os.path.join(path,'train_labels.npy')))
--- a/pylearn/datasets/icml07.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/icml07.py Tue Aug 16 15:44:15 2011 -0400 @@ -3,7 +3,32 @@ import os, sys import numpy +from config import get_filepath_in_roots from pylearn.io.amat import AMat +from pylearn.datasets.config import data_root # config +from pylearn.datasets.dataset import Dataset + +class MNIST_rotated_background(object): + + def __init__(self, n_train=10000, n_valid=2000, n_test=50000): + + basedir = os.path.join(data_root(), 'icml07data', 'npy') + + x_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_inputs.npy')) + y_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_labels.npy')) + + vstart = n_train + tstart = n_train + n_valid + + self.train = Dataset.Obj(x=x_all[:n_train], y=y_all[:n_train]) + self.valid = Dataset.Obj(x=x_all[vstart:vstart+n_valid], + y=y_all[vstart:vstart+n_valid]) + self.test = Dataset.Obj(x=x_all[tstart:tstart+n_test], + y=y_all[tstart:tstart+n_test]) + + self.n_classes = 10 + self.img_shape = (28,28) + class DatasetLoader(object): """ @@ -68,7 +93,11 @@ assert numpy.all(labels < self.n_classes) return inputs, labels -def icml07_loaders(new_version=True, rootdir='.'): +def icml07_loaders(new_version=True, rootdir=None): + if rootdir is None: + rootdir = get_filepath_in_roots('icml07data_twiki') + if rootdir is None: + raise IOError('dataset not found (no icml07data_twiki folder in PYLEARN_DATA_ROOT or DBPATH environment variable.') rval = dict( mnist_basic=DatasetLoader( http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip',
--- a/pylearn/datasets/nade.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/nade.py Tue Aug 16 15:44:15 2011 -0400 @@ -2,8 +2,8 @@ import numpy from pylearn.io.pmat import PMat -from pylearn.datasets.config import data_root # config from pylearn.datasets.dataset import Dataset +import config def load_dataset(name=None): """ @@ -26,8 +26,10 @@ assert name in ['adult','binarized_mnist', 'mnist', 'connect4','dna', 'mushrooms','nips','ocr_letters','rcv1','web'] rval = Dataset() - - path = os.path.join(data_root(), 'larocheh', name) + + # dataset lookup through $PYLEARN_DATA_ROOT + _path = os.path.join('larocheh', name) + path = config.get_filepath_in_roots(_path) # load training set x=numpy.load(os.path.join(path,'train_data.npy'))
--- a/pylearn/datasets/utlc.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/datasets/utlc.py Tue Aug 16 15:44:15 2011 -0400 @@ -110,7 +110,7 @@ else: raise Exception("This dataset don't have its normalization defined") if transfer: - transfer = load_filetensor(os.path.join(config.data_root(),"UTLC","filetensor",name+"_transfer.ft")) + transfer = load_ndarray_transfer(name) return train, valid, test, transfer else: return train, valid, test @@ -179,6 +179,17 @@ else: return train, valid, test +def load_ndarray_transfer(name): + """ + Load the transfer labels for the training set of data set `name`. + + It will be returned in ndarray format. + """ + assert name in ['avicenna','harry','rita','sylvester','terry','ule'] + transfer = load_filetensor(os.path.join(config.data_root(), 'UTLC', + 'filetensor', name+'_transfer.ft')) + return transfer + def load_ndarray_label(name): """ Load the train,valid,test data for the dataset `name` and return it in ndarray format.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/formulas/nnet.py Tue Aug 16 15:44:15 2011 -0400 @@ -0,0 +1,44 @@ +import theano +from theano import tensor +from theano.sandbox import neighbours + +import tags + + +@tags.tags('nnet', 'max pooling', 'inverse') +def inverse_max_pooling(max_pool_out,output_shape,pooling_shape=(2,2), + ignore_borders = True): + """ + Return a symbolic variable representing the inverse of a max pooling + on a given tensor. + + Parameters + ---------- + max_pool_out : 4D tensor + A Theano variable representing the output of a max pooling + output_shape : 4D shape + The shape of the input before pooling + pooling_shape : 2D shape + The shape of the pooling windows + ignore_borders : boolean + Will pad borders with zeros if true + + Returns + ------- + ret : 4D tensor + A Theano variable with same shape as output_shape + """ + # flatten the input and repeat it + repeated_input = [max_pool_out.flatten()]*(pooling_shape[0]*pooling_shape[1]) + + # concatenate the repeated vectors into + # a 2D matrix in the format neibs2images wants + stacked_conv_neibs = tensor.stack(*repeated_input).T + + # then get back a stretched version of the stacked neighbours + stretch_unpooling_out = \ + neighbours.neibs2images(stacked_conv_neibs, + pooling_shape, + output_shape, + 'ignore_borders' if ignore_borders else 'valid') + return stretch_unpooling_out
--- a/pylearn/formulas/noise.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/formulas/noise.py Tue Aug 16 15:44:15 2011 -0400 @@ -63,7 +63,7 @@ """ assert inp.dtype in ['float32','float64'] return theano_rng.binomial( size = inp.shape, n = 1, p = 1 - noise_lvl[0], dtype=inp.dtype) * inp \ - + (inp==0) * theano_rng.binomial( size = inp.shape, n = 1, p = noise_lvl[1], dtype=inp.dtype) + + (theano.tensor.eq(inp,0)) * theano_rng.binomial( size = inp.shape, n = 1, p = noise_lvl[1], dtype=inp.dtype) @tags.tags('noise','gauss','gaussian') def gaussian_noise(theano_rng,inp,noise_lvl):
--- a/pylearn/gd/sgd.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/gd/sgd.py Tue Aug 16 15:44:15 2011 -0400 @@ -1,6 +1,6 @@ """A stochastic gradient descent minimizer. """ - +import numpy import theano def sgd_updates(params, grads, stepsizes): @@ -35,11 +35,11 @@ momentum = [momentum for p in params] if len(params) != len(grads): raise ValueError('params and grads have different lens') - headings = [theano.shared(p.get_value(borrow=False)*0) for p in params] + headings = [theano.shared(numpy.zeros_like(p.get_value(borrow=True))) for p in params] updates = [] for s, p, gp, m, h in zip(stepsizes, params, grads, momentum, headings): updates.append((p, p + s * h)) - updates.append((h, m*h - (1-m)*gp)) + updates.append((h, m*h - (1.0-m)*gp)) return updates
--- a/pylearn/io/image_tiling.py Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/io/image_tiling.py Tue Aug 16 15:44:15 2011 -0400 @@ -100,6 +100,10 @@ H, W = img_shape Hs, Ws = tile_spacing + out_scaling = 1 + if output_pixel_vals and str(X.dtype).startswith('float'): + out_scaling = 255 + out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype) for tile_row in xrange(tile_shape[0]): for tile_col in xrange(tile_shape[1]): @@ -121,7 +125,7 @@ tile_row * (H+Hs):tile_row*(H+Hs)+H, tile_col * (W+Ws):tile_col*(W+Ws)+W ] \ - = this_img * (255 if output_pixel_vals else 1) + = this_img * out_scaling return out_array
--- a/pylearn/misc/do_nightly_build Tue Aug 16 15:44:01 2011 -0400 +++ b/pylearn/misc/do_nightly_build Tue Aug 16 15:44:15 2011 -0400 @@ -9,7 +9,12 @@ FLAGS=warn.argmax_pushdown_bug=False,warn.gpusum_01_011_0111_bug=False,warn.sum_sum_bug=False,warn.sum_div_dimshuffle_bug=False,compiledir=${COMPILEDIR} export PYTHONPATH=${ROOT_CWD}/Theano:${ROOT_CWD}/Pylearn:$PYTHONPATH -cd ${ROOT_CWD}/ +cd ${ROOT_CWD}/Theano +hg summary +cd ../Pylearn +hg summary +cd .. + echo "executing nosetests with mode=FAST_COMPILE" #THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS} Pylearn echo "executing nosetests with mode=FAST_RUN" @@ -20,5 +25,5 @@ #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long. seed=$RANDOM echo "executing nosetests with mode=DEBUG_MODE with seed of the day $seed" -THEANO_FLAGS=${FLAGS},unittests.rseed=$seed,mode=DEBUG_MODE,DebugMode.check_strides=0,DebugMode.patience=3 ${NOSETESTS} Pylearn +THEANO_FLAGS=${FLAGS},unittests.rseed=$seed,mode=DEBUG_MODE,DebugMode.check_strides=0,DebugMode.patience=3,DebugMode.check_preallocated_output= ${NOSETESTS} Pylearn