annotate pylearn/dataset_ops/image_patches.py @ 1510:07b48bd449cd

Make a dataset ops use the new path system.
author Frederic Bastien <nouiz@nouiz.org>
date Mon, 12 Sep 2011 11:47:00 -0400
parents 976539956475
children 9ffe5d6faee3
rev   line source
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 import os, numpy
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 import theano
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 from pylearn.datasets.image_patches import (
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5 olshausen_field_1996_whitened_images,
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 extract_random_patches)
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 from .protocol import TensorFnDataset # protocol.py __init__.py
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9 from .memo import memo
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
10
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
11 import scipy.io
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
12 from pylearn.io import image_tiling
1510
07b48bd449cd Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents: 1285
diff changeset
13 from pylearn.datasets.config import get_filepath_in_roots
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
14
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
15 @memo
971
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
16 def get_dataset(N,R,C,dtype,center,unitvar):
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17 seed=98234
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
18 rng = numpy.random.RandomState(seed)
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19 img_stack = olshausen_field_1996_whitened_images()
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20 patch_stack = extract_random_patches(img_stack, N,R,C,rng)
971
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
21 rval = patch_stack.astype(dtype).reshape((N,(R*C)))
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
22
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
23 if center:
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
24 rval -= rval.mean(axis=0)
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
25 if unitvar:
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
26 rval /= numpy.max(rval.std(axis=0),1e-8)
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
27
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
28 return rval
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30 def image_patches(s_idx, dims,
971
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
31 split='train', dtype=theano.config.floatX, rasterized=False,
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
32 center=True,
1285
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
33 unitvar=True,
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
34 fn=get_dataset):
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 N,R,C=dims
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
37 if split != 'train':
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
38 raise NotImplementedError('train/test/valid splits for randomly sampled image patches?')
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 if not rasterized:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 raise NotImplementedError()
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42
1285
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
43 op = TensorFnDataset(dtype, bcast=(False,), fn=(fn, (N,R,C,dtype,center,unitvar)), single_shape=(R*C,))
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44 x = op(s_idx%N)
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 if x.ndim == 1:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46 if not rasterized:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
47 x = x.reshape((20,20))
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48 elif x.ndim == 2:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49 if not rasterized:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 x = x.reshape((x.shape[0], 20,20))
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 else:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52 assert False, 'what happened?'
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 return x
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
56
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
57
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
58 @memo
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
59 def ranzato_hinton_2010(path=None):
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
60 if path is None:
1510
07b48bd449cd Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents: 1285
diff changeset
61 path = get_filepath_in_roots(os.path.join('image_patches', 'mcRBM',
07b48bd449cd Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents: 1285
diff changeset
62 'training_colorpatches_16x16_demo.mat'))
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
63 dct = scipy.io.loadmat(path)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
64 return dct
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
65 def ranzato_hinton_2010_whitened_patches(path=None):
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
66 """Return the pca of the data, which is 10240 x 105
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
67 """
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
68 dct = ranzato_hinton_2010(path)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
69 return dct['whitendata'].astype('float32')
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
70
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
71 def undo_pca_filters_of_ranzato_hinton_2010(X, path=None):
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
72 """Return tuple (R,G,B,None) of matrices for matrix `X` of filters (one per row)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
73
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
74 Return value can be passed to `image_tiling.tile_raster_images`.
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
75 """
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
76 dct = ranzato_hinton_2010(path)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
77 X = numpy.dot(X, dct['invpcatransf'].T)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
78 return (X[:,:256], X[:,256:512], X[:,512:], None)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
79
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
80 def save_filters_of_ranzato_hinton_2010(X, fname, min_dynamic_range=1e-3, data_path=None):
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
81 _img = image_tiling.tile_raster_images(
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
82 undo_pca_filters_of_ranzato_hinton_2010(X, path=data_path),
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
83 img_shape=(16,16),
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
84 min_dynamic_range=min_dynamic_range)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
85 image_tiling.save_tiled_raster_images(_img, fname)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
86
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
87 def ranzato_hinton_2010_op(s_idx,
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
88 split='train',
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
89 dtype=theano.config.floatX, rasterized=True,
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
90 center=True,
1285
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
91 unitvar=True,
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
92 fn=ranzato_hinton_2010_whitened_patches):
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
93 N = 10240
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
94
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
95 if split != 'train':
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
96 raise NotImplementedError('train/test/valid splits for randomly sampled image patches?')
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
97
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
98 if not rasterized:
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
99 # the data is provided as PCA-sphered, so rasterizing does not make sense
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
100 # TODO: add a param to enable/disable 'PCA', and if disabled, then consider
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
101 # rasterizing or not
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
102 raise NotImplementedError('only pca data is provided')
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
103
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
104 if dtype != 'float32':
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
105 raise NotImplementedError('dtype not float32')
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
106
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
107 op = TensorFnDataset(dtype,
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
108 bcast=(False,),
1285
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
109 fn=fn,
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
110 single_shape=(105,))
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
111 x = op(s_idx%N)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
112 return x
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
113