annotate pylearn/dataset_ops/image_patches.py @ 1522:5972fab3cfd2

make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
author Frederic Bastien <nouiz@nouiz.org>
date Wed, 31 Oct 2012 16:19:51 -0400
parents 6397233f3ccd
children
rev   line source
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
1 import os
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
2 import numpy
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 import theano
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5 from pylearn.datasets.image_patches import (
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 olshausen_field_1996_whitened_images,
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7 extract_random_patches)
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
9 from .protocol import TensorFnDataset # protocol.py __init__.py
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
10 from .memo import memo
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
11
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
12 import scipy.io
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
13 from pylearn.io import image_tiling
1510
07b48bd449cd Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents: 1285
diff changeset
14 from pylearn.datasets.config import get_filepath_in_roots
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
15
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
16
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17 @memo
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
18 def get_dataset(N, R, C, dtype, center, unitvar):
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
19 seed = 98234
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20 rng = numpy.random.RandomState(seed)
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 img_stack = olshausen_field_1996_whitened_images()
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
22 patch_stack = extract_random_patches(img_stack, N, R, C, rng)
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
23 rval = patch_stack.astype(dtype).reshape((N, (R * C)))
971
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
24
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
25 if center:
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
26 rval -= rval.mean(axis=0)
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
27 if unitvar:
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
28 rval /= numpy.max(rval.std(axis=0), 1e-8)
971
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
29
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
30 return rval
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
31
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
32
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 def image_patches(s_idx, dims,
971
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
34 split='train', dtype=theano.config.floatX, rasterized=False,
507159eea97e image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 963
diff changeset
35 center=True,
1285
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
36 unitvar=True,
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
37 fn=get_dataset):
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
38 N, R, C = dims
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 if split != 'train':
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
41 raise NotImplementedError(
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
42 'train/test/valid splits for randomly sampled image patches?')
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44 if not rasterized:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 raise NotImplementedError()
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
47 op = TensorFnDataset(dtype, bcast=(False, ), fn=(fn, (N, R, C, dtype,
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
48 center, unitvar)), single_shape=(R * C, ))
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
49 x = op(s_idx % N)
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 if x.ndim == 1:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 if not rasterized:
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
52 x = x.reshape((20, 20))
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 elif x.ndim == 2:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 if not rasterized:
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
55 x = x.reshape((x.shape[0], 20, 20))
963
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56 else:
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 assert False, 'what happened?'
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59 return x
06f21a964bd8 datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
61
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
62 @memo
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
63 def ranzato_hinton_2010(path=None):
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
64 if path is None:
1510
07b48bd449cd Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents: 1285
diff changeset
65 path = get_filepath_in_roots(os.path.join('image_patches', 'mcRBM',
07b48bd449cd Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents: 1285
diff changeset
66 'training_colorpatches_16x16_demo.mat'))
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
67 dct = scipy.io.loadmat(path)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
68 return dct
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
69
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
70
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
71 def ranzato_hinton_2010_whitened_patches(path=None):
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
72 """Return the pca of the data, which is 10240 x 105
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
73 """
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
74 dct = ranzato_hinton_2010(path)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
75 return dct['whitendata'].astype('float32')
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
76
1522
5972fab3cfd2 make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
Frederic Bastien <nouiz@nouiz.org>
parents: 1521
diff changeset
77 def ranzato_hinton_2010_whitened_patches_f64(path=None):
5972fab3cfd2 make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
Frederic Bastien <nouiz@nouiz.org>
parents: 1521
diff changeset
78 """Return the pca of the data, which is 10240 x 105
5972fab3cfd2 make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
Frederic Bastien <nouiz@nouiz.org>
parents: 1521
diff changeset
79 """
5972fab3cfd2 make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
Frederic Bastien <nouiz@nouiz.org>
parents: 1521
diff changeset
80 dct = ranzato_hinton_2010(path)
5972fab3cfd2 make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
Frederic Bastien <nouiz@nouiz.org>
parents: 1521
diff changeset
81 return dct['whitendata'].astype('float64')
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
82
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
83 def undo_pca_filters_of_ranzato_hinton_2010(X, path=None):
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
84 """Return tuple (R,G,B,None) of matrices for matrix `X` of filters (one per row)
1511
9ffe5d6faee3 Auto white space fix.
Frederic Bastien <nouiz@nouiz.org>
parents: 1510
diff changeset
85
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
86 Return value can be passed to `image_tiling.tile_raster_images`.
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
87 """
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
88 dct = ranzato_hinton_2010(path)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
89 X = numpy.dot(X, dct['invpcatransf'].T)
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
90 return (X[:, :256], X[:, 256:512], X[:, 512:], None)
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
91
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
92 def save_filters_of_ranzato_hinton_2010(X, fname, min_dynamic_range=1e-3, data_path=None):
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
93 _img = image_tiling.tile_raster_images(
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
94 undo_pca_filters_of_ranzato_hinton_2010(X, path=data_path),
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
95 img_shape=(16, 16),
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
96 min_dynamic_range=min_dynamic_range)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
97 image_tiling.save_tiled_raster_images(_img, fname)
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
98
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
99
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
100 def ranzato_hinton_2010_op(s_idx,
1511
9ffe5d6faee3 Auto white space fix.
Frederic Bastien <nouiz@nouiz.org>
parents: 1510
diff changeset
101 split='train',
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
102 dtype=theano.config.floatX, rasterized=True,
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
103 center=True,
1285
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
104 unitvar=True,
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
105 fn=ranzato_hinton_2010_whitened_patches):
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
106 N = 10240
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
107
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
108 if split != 'train':
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
109 raise NotImplementedError(
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
110 'train/test/valid splits for randomly sampled image patches?')
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
111
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
112 if not rasterized:
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
113 # the data is provided as PCA-sphered, so rasterizing does not make sense
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
114 # TODO: add a param to enable/disable 'PCA', and if disabled, then consider
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
115 # rasterizing or not
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
116 raise NotImplementedError('only pca data is provided')
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
117
1522
5972fab3cfd2 make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
Frederic Bastien <nouiz@nouiz.org>
parents: 1521
diff changeset
118 if dtype == "float64" and fn is ranzato_hinton_2010_whitened_patches:
5972fab3cfd2 make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
Frederic Bastien <nouiz@nouiz.org>
parents: 1521
diff changeset
119 fn = ranzato_hinton_2010_whitened_patches_f64
5972fab3cfd2 make ranzato_hinton_2010_op work with float64 for a DLT on mcrbm test.
Frederic Bastien <nouiz@nouiz.org>
parents: 1521
diff changeset
120 elif dtype != 'float32':
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
121 raise NotImplementedError('dtype not float32')
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
122
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
123 op = TensorFnDataset(dtype,
1511
9ffe5d6faee3 Auto white space fix.
Frederic Bastien <nouiz@nouiz.org>
parents: 1510
diff changeset
124 bcast=(False,),
1285
976539956475 adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 998
diff changeset
125 fn=fn,
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
126 single_shape=(105,))
1521
6397233f3ccd autopep8
Frederic Bastien <nouiz@nouiz.org>
parents: 1511
diff changeset
127 x = op(s_idx % N)
998
8ba8b08e0442 added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 971
diff changeset
128 return x