Mercurial > pylearn
annotate pylearn/dataset_ops/image_patches.py @ 1510:07b48bd449cd
Make a dataset ops use the new path system.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Mon, 12 Sep 2011 11:47:00 -0400 |
parents | 976539956475 |
children | 9ffe5d6faee3 |
rev | line source |
---|---|
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 import os, numpy |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 import theano |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 from pylearn.datasets.image_patches import ( |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 olshausen_field_1996_whitened_images, |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
6 extract_random_patches) |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 from .protocol import TensorFnDataset # protocol.py __init__.py |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
9 from .memo import memo |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
10 |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
11 import scipy.io |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
12 from pylearn.io import image_tiling |
1510
07b48bd449cd
Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents:
1285
diff
changeset
|
13 from pylearn.datasets.config import get_filepath_in_roots |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
14 |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
15 @memo |
971
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
16 def get_dataset(N,R,C,dtype,center,unitvar): |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
17 seed=98234 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
18 rng = numpy.random.RandomState(seed) |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
19 img_stack = olshausen_field_1996_whitened_images() |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
20 patch_stack = extract_random_patches(img_stack, N,R,C,rng) |
971
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
21 rval = patch_stack.astype(dtype).reshape((N,(R*C))) |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
22 |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
23 if center: |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
24 rval -= rval.mean(axis=0) |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
25 if unitvar: |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
26 rval /= numpy.max(rval.std(axis=0),1e-8) |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
27 |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
28 return rval |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
29 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
30 def image_patches(s_idx, dims, |
971
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
31 split='train', dtype=theano.config.floatX, rasterized=False, |
507159eea97e
image_patches - return centered data by default
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
963
diff
changeset
|
32 center=True, |
1285
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
33 unitvar=True, |
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
34 fn=get_dataset): |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
35 N,R,C=dims |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
36 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
37 if split != 'train': |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
38 raise NotImplementedError('train/test/valid splits for randomly sampled image patches?') |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
39 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
40 if not rasterized: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
41 raise NotImplementedError() |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
42 |
1285
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
43 op = TensorFnDataset(dtype, bcast=(False,), fn=(fn, (N,R,C,dtype,center,unitvar)), single_shape=(R*C,)) |
963
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
44 x = op(s_idx%N) |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
45 if x.ndim == 1: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 if not rasterized: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
47 x = x.reshape((20,20)) |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
48 elif x.ndim == 2: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
49 if not rasterized: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 x = x.reshape((x.shape[0], 20,20)) |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51 else: |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
52 assert False, 'what happened?' |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53 |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 return x |
06f21a964bd8
datasets - added olshausen_field data loaders, and an image_patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
55 |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
56 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
57 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
58 @memo |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
59 def ranzato_hinton_2010(path=None): |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
60 if path is None: |
1510
07b48bd449cd
Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents:
1285
diff
changeset
|
61 path = get_filepath_in_roots(os.path.join('image_patches', 'mcRBM', |
07b48bd449cd
Make a dataset ops use the new path system.
Frederic Bastien <nouiz@nouiz.org>
parents:
1285
diff
changeset
|
62 'training_colorpatches_16x16_demo.mat')) |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
63 dct = scipy.io.loadmat(path) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
64 return dct |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
65 def ranzato_hinton_2010_whitened_patches(path=None): |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
66 """Return the pca of the data, which is 10240 x 105 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
67 """ |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
68 dct = ranzato_hinton_2010(path) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
69 return dct['whitendata'].astype('float32') |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
70 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
71 def undo_pca_filters_of_ranzato_hinton_2010(X, path=None): |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
72 """Return tuple (R,G,B,None) of matrices for matrix `X` of filters (one per row) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
73 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
74 Return value can be passed to `image_tiling.tile_raster_images`. |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
75 """ |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
76 dct = ranzato_hinton_2010(path) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
77 X = numpy.dot(X, dct['invpcatransf'].T) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
78 return (X[:,:256], X[:,256:512], X[:,512:], None) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
79 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
80 def save_filters_of_ranzato_hinton_2010(X, fname, min_dynamic_range=1e-3, data_path=None): |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
81 _img = image_tiling.tile_raster_images( |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
82 undo_pca_filters_of_ranzato_hinton_2010(X, path=data_path), |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
83 img_shape=(16,16), |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
84 min_dynamic_range=min_dynamic_range) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
85 image_tiling.save_tiled_raster_images(_img, fname) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
86 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
87 def ranzato_hinton_2010_op(s_idx, |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
88 split='train', |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
89 dtype=theano.config.floatX, rasterized=True, |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
90 center=True, |
1285
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
91 unitvar=True, |
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
92 fn=ranzato_hinton_2010_whitened_patches): |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
93 N = 10240 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
94 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
95 if split != 'train': |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
96 raise NotImplementedError('train/test/valid splits for randomly sampled image patches?') |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
97 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
98 if not rasterized: |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
99 # the data is provided as PCA-sphered, so rasterizing does not make sense |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
100 # TODO: add a param to enable/disable 'PCA', and if disabled, then consider |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
101 # rasterizing or not |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
102 raise NotImplementedError('only pca data is provided') |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
103 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
104 if dtype != 'float32': |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
105 raise NotImplementedError('dtype not float32') |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
106 |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
107 op = TensorFnDataset(dtype, |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
108 bcast=(False,), |
1285
976539956475
adding tinyimages
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
998
diff
changeset
|
109 fn=fn, |
998
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
110 single_shape=(105,)) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
111 x = op(s_idx%N) |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
112 return x |
8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
971
diff
changeset
|
113 |