Mercurial > pylearn
annotate pylearn/dataset_ops/image_patches.py @ 1521:6397233f3ccd
autopep8
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Wed, 31 Oct 2012 16:12:57 -0400 |
parents | 9ffe5d6faee3 |
children | 5972fab3cfd2 |
rev | line source |
---|---|
1521 | 1 import os |
2 import numpy | |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 import theano |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 from pylearn.datasets.image_patches import ( |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
6 olshausen_field_1996_whitened_images, |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 extract_random_patches) |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 |
1521 | 9 from .protocol import TensorFnDataset # protocol.py __init__.py |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
10 from .memo import memo |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
11 |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
12 import scipy.io |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
13 from pylearn.io import image_tiling |
1510
07b48bd449cd
Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents:
1285
diff
changeset
|
14 from pylearn.datasets.config import get_filepath_in_roots |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
15 |
1521 | 16 |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
17 @memo |
1521 | 18 def get_dataset(N, R, C, dtype, center, unitvar): |
19 seed = 98234 | |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
20 rng = numpy.random.RandomState(seed) |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
21 img_stack = olshausen_field_1996_whitened_images() |
1521 | 22 patch_stack = extract_random_patches(img_stack, N, R, C, rng) |
23 rval = patch_stack.astype(dtype).reshape((N, (R * C))) | |
971
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
24 |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
25 if center: |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
26 rval -= rval.mean(axis=0) |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
27 if unitvar: |
1521 | 28 rval /= numpy.max(rval.std(axis=0), 1e-8) |
971
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
29 |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
30 return rval |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
31 |
1521 | 32 |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
33 def image_patches(s_idx, dims, |
971
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
34 split='train', dtype=theano.config.floatX, rasterized=False, |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
35 center=True, |
1285
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
36 unitvar=True, |
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
37 fn=get_dataset): |
1521 | 38 N, R, C = dims |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
39 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
40 if split != 'train': |
1521 | 41 raise NotImplementedError( |
42 'train/test/valid splits for randomly sampled image patches?') | |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
43 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
44 if not rasterized: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
45 raise NotImplementedError() |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 |
1521 | 47 op = TensorFnDataset(dtype, bcast=(False, ), fn=(fn, (N, R, C, dtype, |
48 center, unitvar)), single_shape=(R * C, )) | |
49 x = op(s_idx % N) | |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 if x.ndim == 1: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51 if not rasterized: |
1521 | 52 x = x.reshape((20, 20)) |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53 elif x.ndim == 2: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 if not rasterized: |
1521 | 55 x = x.reshape((x.shape[0], 20, 20)) |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
56 else: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
57 assert False, 'what happened?' |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
58 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
59 return x |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
60 |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
61 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
62 @memo |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
63 def ranzato_hinton_2010(path=None): |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
64 if path is None: |
1510
07b48bd449cd
Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents:
1285
diff
changeset
|
65 path = get_filepath_in_roots(os.path.join('image_patches', 'mcRBM', |
07b48bd449cd
Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents:
1285
diff
changeset
|
66 'training_colorpatches_16x16_demo.mat')) |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
67 dct = scipy.io.loadmat(path) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
68 return dct |
1521 | 69 |
70 | |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
71 def ranzato_hinton_2010_whitened_patches(path=None): |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
72 """Return the pca of the data, which is 10240 x 105 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
73 """ |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
74 dct = ranzato_hinton_2010(path) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
75 return dct['whitendata'].astype('float32') |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
76 |
1521 | 77 |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
78 def undo_pca_filters_of_ranzato_hinton_2010(X, path=None): |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
79 """Return tuple (R,G,B,None) of matrices for matrix `X` of filters (one per row) |
1511 | 80 |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
81 Return value can be passed to `image_tiling.tile_raster_images`. |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
82 """ |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
83 dct = ranzato_hinton_2010(path) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
84 X = numpy.dot(X, dct['invpcatransf'].T) |
1521 | 85 return (X[:, :256], X[:, 256:512], X[:, 512:], None) |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
86 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
87 def save_filters_of_ranzato_hinton_2010(X, fname, min_dynamic_range=1e-3, data_path=None): |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
88 _img = image_tiling.tile_raster_images( |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
89 undo_pca_filters_of_ranzato_hinton_2010(X, path=data_path), |
1521 | 90 img_shape=(16, 16), |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
91 min_dynamic_range=min_dynamic_range) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
92 image_tiling.save_tiled_raster_images(_img, fname) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
93 |
1521 | 94 |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
95 def ranzato_hinton_2010_op(s_idx, |
1511 | 96 split='train', |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
97 dtype=theano.config.floatX, rasterized=True, |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
98 center=True, |
1285
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
99 unitvar=True, |
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
100 fn=ranzato_hinton_2010_whitened_patches): |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
101 N = 10240 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
102 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
103 if split != 'train': |
1521 | 104 raise NotImplementedError( |
105 'train/test/valid splits for randomly sampled image patches?') | |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
106 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
107 if not rasterized: |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
108 # the data is provided as PCA-sphered, so rasterizing does not make sense |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
109 # TODO: add a param to enable/disable 'PCA', and if disabled, then consider |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
110 # rasterizing or not |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
111 raise NotImplementedError('only pca data is provided') |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
112 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
113 if dtype != 'float32': |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
114 raise NotImplementedError('dtype not float32') |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
115 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
116 op = TensorFnDataset(dtype, |
1511 | 117 bcast=(False,), |
1285
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
118 fn=fn, |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
119 single_shape=(105,)) |
1521 | 120 x = op(s_idx % N) |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
121 return x |