Mercurial > pylearn
diff pylearn/dataset_ops/image_patches.py @ 998:8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
modified mcRBM to use it.
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Tue, 24 Aug 2010 16:51:53 -0400 |
parents | 507159eea97e |
children | 976539956475 |
line wrap: on
line diff
--- a/pylearn/dataset_ops/image_patches.py Tue Aug 24 16:00:08 2010 -0400 +++ b/pylearn/dataset_ops/image_patches.py Tue Aug 24 16:51:53 2010 -0400 @@ -2,12 +2,16 @@ import theano from pylearn.datasets.image_patches import ( + data_root, olshausen_field_1996_whitened_images, extract_random_patches) from .protocol import TensorFnDataset # protocol.py __init__.py from .memo import memo +import scipy.io +from pylearn.io import image_tiling + @memo def get_dataset(N,R,C,dtype,center,unitvar): seed=98234 @@ -48,3 +52,60 @@ return x + + +@memo +def ranzato_hinton_2010(path=None): + if path is None: + path = os.path.join(data_root(), 'image_patches', 'mcRBM', + 'training_colorpatches_16x16_demo.mat') + dct = scipy.io.loadmat(path) + return dct +def ranzato_hinton_2010_whitened_patches(path=None): + """Return the pca of the data, which is 10240 x 105 + """ + dct = ranzato_hinton_2010(path) + return dct['whitendata'].astype('float32') + +def undo_pca_filters_of_ranzato_hinton_2010(X, path=None): + """Return tuple (R,G,B,None) of matrices for matrix `X` of filters (one per row) + + Return value can be passed to `image_tiling.tile_raster_images`. + """ + dct = ranzato_hinton_2010(path) + X = numpy.dot(X, dct['invpcatransf'].T) + return (X[:,:256], X[:,256:512], X[:,512:], None) + +def save_filters_of_ranzato_hinton_2010(X, fname, min_dynamic_range=1e-3, data_path=None): + _img = image_tiling.tile_raster_images( + undo_pca_filters_of_ranzato_hinton_2010(X, path=data_path), + img_shape=(16,16), + min_dynamic_range=min_dynamic_range) + image_tiling.save_tiled_raster_images(_img, fname) + +def ranzato_hinton_2010_op(s_idx, + split='train', + dtype=theano.config.floatX, rasterized=True, + center=True, + unitvar=True): + N = 10240 + + if split != 'train': + raise NotImplementedError('train/test/valid splits for randomly sampled image patches?') + + if not rasterized: + # the data is provided as PCA-sphered, so rasterizing does not make sense + # TODO: add a param to enable/disable 'PCA', and if disabled, then consider + # rasterizing or not + raise NotImplementedError('only pca data is provided') + + if dtype != 'float32': + raise NotImplementedError('dtype not float32') + + op = TensorFnDataset(dtype, + bcast=(False,), + fn=ranzato_hinton_2010_whitened_patches, + single_shape=(105,)) + x = op(s_idx%N) + return x +