# HG changeset patch # User James Bergstra # Date 1285283572 14400 # Node ID 8905186b176c647afec762af41f05cc08c2e2da9 # Parent 976539956475baf5c2c5b33bbfb8997f602fe5cb test_mcRBM - added code to iterate over tinyimages diff -r 976539956475 -r 8905186b176c pylearn/algorithms/tests/test_mcRBM.py --- a/pylearn/algorithms/tests/test_mcRBM.py Thu Sep 23 19:12:08 2010 -0400 +++ b/pylearn/algorithms/tests/test_mcRBM.py Thu Sep 23 19:12:52 2010 -0400 @@ -1,119 +1,9 @@ from pylearn.algorithms.mcRBM import * import pylearn.datasets.cifar10 +import pylearn.dataset_ops.tinyimages import pylearn.dataset_ops.cifar10 -def _mar_train_patches(dtype): - R,C=16,16 - train_data = pylearn.dataset_ops.cifar10.train_data_labels(dtype)[0][:40000] - #train_data shape is (40000, 3072) - train_data = train_data.reshape((40000,3,32,32)).transpose([0,2,3,1]) - patches = train_data[:, :R, :C, :].reshape((40000, 3*R*C)) - patches -= patches.mean(axis=0) - wpatches = numpy.dot(patches, d['pcatransf'].T) - return wpatches - -def mar_centered(s_idx, split, dtype='float64', rasterized=False, color='grey'): - """ - Returns a pair (img, label) of theano expressions for cifar-10 samples - - :param s_idx: the indexes - - :param split: - - :param dtype: - - :param rasterized: return examples as vectors (True) or 28x28 matrices (False) - - :param color: control how to deal with the color in the images' - - grey greyscale (with luminance weighting) - - rgb add a trailing dimension of length 3 with rgb colour channels - - """ - - split_options = {'train':(train_data, train_labels), - 'valid': (valid_data, valid_labels), - 'test': (test_data, test_labels)} - - if split not in split_options: - raise ValueError('invalid split option', (split, split_options.keys())) - - color_options = ('grey', 'rgb') - if color not in color_options: - raise ValueError('invalid color option', (color, color_options)) - - x_fn, y_fn = split_options[split] - - x_op = TensorFnDataset(dtype, (False,), (x_fn, (dtype,)), (3072,)) - y_op = TensorFnDataset('int32', (), y_fn) - - x = x_op(s_idx) - y = y_op(s_idx) - - # Y = 0.3R + 0.59G + 0.11B from - # http://gimp-savvy.com/BOOK/index.html?node54.html - rgb_dtype = 'float32' - if dtype == 'float64': - rgb_dtype = dtype - r = numpy.asarray(.3, dtype=rgb_dtype) - g = numpy.asarray(.59, dtype=rgb_dtype) - b = numpy.asarray(.11, dtype=rgb_dtype) - - if x.ndim == 1: - if rasterized: - if color=='grey': - x = r * x[:1024] + g * x[1024:2048] + b * x[2048:] - if dtype=='uint8': - x = theano.tensor.cast(x, 'uint8') - elif color=='rgb': - # the strides aren't what you'd expect between channels, - # but theano is all about weird strides - x = x.reshape((3,32*32)).T - else: - raise NotImplemented('color', color) - else: - if color=='grey': - x = r * x[:1024] + g * x[1024:2048] + b * x[2048:] - if dtype=='uint8': - x = theano.tensor.cast(x, 'uint8') - x = x.reshape((32,32)) - elif color=='rgb': - # the strides aren't what you'd expect between channels, - # but theano is all about weird strides - x = x.reshape((3,32,32)).dimshuffle(1, 2, 0) - else: - raise NotImplemented('color', color) - elif x.ndim == 2: - N = x.shape[0] # symbolic - if rasterized: - if color=='grey': - x = r * x[:,:1024] + g * x[:,1024:2048] + b * x[:,2048:] - if dtype=='uint8': - x = theano.tensor.cast(x, 'uint8') - elif color=='rgb': - # the strides aren't what you'd expect between channels, - # but theano is all about weird strides - x = x.reshape((N, 3,32*32)).dimshuffle(0, 2, 1) - else: - raise NotImplemented('color', color) - else: - if color=='grey': - x = r * x[:,:1024] + g * x[:,1024:2048] + b * x[:,2048:] - if dtype=='uint8': - x = theano.tensor.cast(x, 'uint8') - x.reshape((N, 32, 32)) - elif color=='rgb': - # the strides aren't what you'd expect between channels, - # but theano is all about weird strides - x = x.reshape((N,3,32,32)).dimshuffle(0, 2, 3, 1) - else: - raise NotImplemented('color', color) - else: - raise ValueError('x has too many dimensions', x.ndim) - - return x, y - - def _default_rbm_alloc(n_I, n_K=256, n_J=100): return mcRBM.alloc(n_I, n_K, n_J) @@ -141,6 +31,11 @@ n_vis=96 # pca components epoch_size=batchsize*500 n_patches=epoch_size*20 + elif dataset=='tinyimages_patches': + R,C=8,8 + n_vis=81 + epoch_size=batchsize*500 + n_patches=epoch_size*20 else: R,C= 16,16 # the size of image patches n_vis=R*C @@ -161,6 +56,8 @@ X), img_shape=(R,C)) image_tiling.save_tiled_raster_images(_img, fname) + elif dataset == 'tinyimages_patches': + tile = pylearn.dataset_ops.tinyimages.save_filters else: def tile(X, fname): _img = image_tiling.tile_raster_images(X, @@ -169,13 +66,16 @@ image_tiling.save_tiled_raster_images(_img, fname) batch_idx = TT.iscalar() + batch_range =batch_idx * batchsize + np.arange(batchsize) if dataset == 'MAR': - train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_idx * batchsize + np.arange(batchsize)) + train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range) elif dataset == 'cifar10patches8x8': - train_batch = pylearn.dataset_ops.cifar10.cifar10_patches(batch_idx * batchsize + - np.arange(batchsize), 'train', n_patches=n_patches, patch_size=(R,C), + train_batch = pylearn.dataset_ops.cifar10.cifar10_patches( + batch_range, 'train', n_patches=n_patches, patch_size=(R,C), pca_components=n_vis) + elif dataset == 'tinyimages_patches': + train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range) else: train_batch = pylearn.dataset_ops.image_patches.image_patches( s_idx = (batch_idx * batchsize + np.arange(batchsize)), @@ -319,13 +219,14 @@ ) if 1: + # pretraining settings rbm,smplr = test_reproduce_ranzato_hinton_2010( as_unittest=False, n_train_iters=60000, rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81), trainer_alloc=mcRBMTrainer.alloc_for_P, lr_per_example=0.05, - dataset='cifar10patches8x8', + dataset='tinyimages_patches', l1_penalty=1e-3, l1_penalty_start=30000, #l1_penalty_start=350, #DEBUG