Mercurial > pylearn
annotate pylearn/dataset_ops/tinyimages.py @ 1420:7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
dataset, not just patches.
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 04 Feb 2011 16:06:00 -0500 |
parents | cc3e3e596500 |
children | 93e5ce7ccd6d |
rev | line source |
---|---|
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
1 """I'm not sure where to put this code. |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
2 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
3 THIS IS NOT POLISHED LIBRARY CODE YET. |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
4 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
5 """ |
1285 | 6 |
7 __authors__ = "James Bergstra" | |
8 __copyright__ = "(c) 2010, Universite de Montreal" | |
9 __license__ = "3-clause BSD License" | |
10 __contact__ = "bergstrj@iro.umontreal.ca" | |
11 | |
12 | |
13 import cPickle, logging, sys | |
14 import numpy | |
15 from pylearn.datasets import tinyimages, image_patches | |
16 import pylearn.preprocessing.pca | |
17 import theano | |
18 from pylearn.io import image_tiling | |
19 | |
20 from .protocol import TensorFnDataset # protocol.py __init__.py | |
21 from .memo import memo | |
22 | |
23 | |
24 # | |
25 # This part of the file (until main()) is for generating a dataset of image patches from the | |
26 # tinyimages dataset. These patches are used in the pretraining stage of the mcRBM training | |
27 # algorithm. | |
28 # | |
29 # Since the 'dataset' is properly seen as a cached-to-disk preprocessing derived from raw | |
30 # material in tinyimages, it is not a real dataset (with a standard disk location in the | |
31 # PYLEARN_DATA_ROOT root). | |
32 # | |
33 # Hopefully the upcoming pylearn library proposal will have a policy on how/where this sort of | |
34 # pre-processed data should be stored. For now it is stored in the current working directory. | |
35 # | |
36 | |
1420
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
37 def tinyimages_op(s_idx): |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
38 """Return symbolic tiny_images[s_idx] |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
39 |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
40 If s_idx is a scalar, the return value is a tensor3 of shape 32,32,3 and |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
41 dtype uint8. |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
42 If s_idx is a vector of len N, the return value |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
43 is a tensor4 of shape N,32,32,3 and dtype uint8. |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
44 """ |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
45 op = TensorFnDataset('uint8', |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
46 bcast=(False, False, False), |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
47 fn=tinyimages.get_memmapped_file, |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
48 single_shape=(32,32,3)) |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
49 return op(s_idx) |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
50 |
7374d676c9b0
dataset_ops/tinyimages - added a tinyimages_op that gives access to the full
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1352
diff
changeset
|
51 |
1285 | 52 _raw_patch_file = 'tinydataset_raw.npy' |
53 _pca_file = 'tinydataset_pca.pkl' | |
54 _whitened_file = 'tinydataset_whitened.npy' | |
55 | |
56 def normalize_channels(X, max_scale=5): | |
57 """Map images from (0,1) to all reals so that each channel of each image has zero mean, | |
58 [maximum] unit variance. | |
59 | |
60 Channels will not be scaled by more than max_scale, so the output variance might be smaller | |
61 than 1. | |
62 """ | |
63 n_imgs,n_rows,n_cols,n_channels = X.shape | |
64 X = X.copy() | |
65 # ensure that we're working with floats on (0,1) | |
66 if not str(X.dtype).startswith('float'): | |
67 raise TypeError() | |
68 if X.min() < 0: | |
69 raise ValueError('min out of bounds') | |
70 if X.max() > 1: | |
71 raise ValueError('max out of bounds') | |
72 assert n_channels==3 | |
73 imaxscale = 1.0 / max_scale | |
74 def centre(imgstack): | |
75 a,b,c = imgstack.shape | |
76 flat = imgstack.reshape((a,b*c)) | |
77 flat -= flat.mean(axis=1).reshape((a,1)) | |
78 flat /= numpy.maximum(flat.std(axis=1).reshape((a,1)),imaxscale) | |
1289
092cd4cd2009
tinyimages - minor: removed inplace assumption and reduced min_dynamic_range
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1285
diff
changeset
|
79 return flat.reshape((a,b,c)) |
092cd4cd2009
tinyimages - minor: removed inplace assumption and reduced min_dynamic_range
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1285
diff
changeset
|
80 X[:,:,:,0]=centre(X[:,:,:,0]) |
092cd4cd2009
tinyimages - minor: removed inplace assumption and reduced min_dynamic_range
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1285
diff
changeset
|
81 X[:,:,:,1]=centre(X[:,:,:,1]) |
092cd4cd2009
tinyimages - minor: removed inplace assumption and reduced min_dynamic_range
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1285
diff
changeset
|
82 X[:,:,:,2]=centre(X[:,:,:,2]) |
1285 | 83 return X |
84 | |
1352
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
85 def save_filters_orig(X, fname, min_dynamic_range=1e-8, data_path=None, img_shape=(8,8), |
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
86 tile_shape=None): |
1285 | 87 """ |
88 Save filters X (encoded as whitened images) in the original image space. | |
89 """ | |
90 dct = load_pca_dct() | |
91 pca = dct['eig_vals'], dct['eig_vecs'] | |
92 | |
93 _img = image_tiling.tile_raster_images( | |
94 pylearn.preprocessing.pca.pca_whiten_inverse(pca, X), | |
1352
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
95 img_shape=img_shape, |
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
96 min_dynamic_range=1e-6, |
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
97 tile_shape=tile_shape) |
1285 | 98 image_tiling.save_tiled_raster_images(_img, fname) |
99 | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
100 def extract_patches(n_imgs=1000*100, n_patches_per_image=10, patch_shape=(8,8), rng=numpy.random.RandomState(234)): |
1285 | 101 """ |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
102 Extract a number of patches from each of the first TinyImages |
1285 | 103 """ |
104 R,C=patch_shape | |
105 | |
106 dataset = numpy.empty((n_imgs*n_patches_per_image, R, C, 3), dtype='uint8') | |
107 | |
108 assert n_imgs < tinyimages.n_images | |
109 | |
110 image_stream = tinyimages.image_generator() | |
111 | |
112 i = 0 | |
113 while i < n_imgs: | |
114 y = image_stream.next() | |
115 yy = image_patches.extract_random_patches( | |
116 y.reshape((1,32,32,3)), | |
117 n_patches_per_image, | |
118 R,C, | |
119 rng) | |
120 ii = i*n_patches_per_image | |
121 dataset[ii:ii+n_patches_per_image] = yy | |
122 i += 1 | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
123 return dataset |
1285 | 124 |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
125 def compute_pca_dct(X, use_only=100000, max_components=128, max_energy_fraction=.99): |
1285 | 126 |
127 # each image channel is adjusted here | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
128 ### X = normalize_channels(numpy.asarray(data[:use_only], dtype='float32')/255) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
129 |
1285 | 130 |
131 # rasterize images | |
132 X = X.reshape((X.shape[0], X.shape[1]* X.shape[2]* X.shape[3])) | |
133 | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
134 # switch to floats |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
135 X = X.astype('float32') |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
136 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
137 # subtract off each image mean (ignoring channels) #TODO: IS THIS GOOD IDEA? |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
138 X = X - X.mean(axis=1).reshape((X.shape[0], 1)) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
139 |
1285 | 140 # subtract off global mean as part of pca |
141 data_mean = X.mean(axis=0) | |
142 X = X - data_mean | |
143 | |
144 # calculating pca | |
145 (eig_vals,eig_vecs), _ = pylearn.preprocessing.pca.pca_from_examples( | |
146 X, max_components, max_energy_fraction, x_centered=True) | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
147 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
148 print "Keeping %i principle components" % len(eig_vals) |
1285 | 149 |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
150 return dict( |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
151 mean=data_mean, |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
152 eig_vecs=eig_vecs, |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
153 eig_vals=eig_vals) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
154 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
155 def whiten_patches(raw_patches, pca_dct): |
1285 | 156 """ |
157 Load the patches from sys.argv[1] and whiten them with sys.argv[2], saving them to | |
158 sys.argv[3]. | |
159 """ | |
160 | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
161 rval = numpy.empty((raw_patches.shape[0], len(pca_dct['eig_vals'])), dtype='float32') |
1285 | 162 |
163 print 'allocated output of size', rval.shape | |
164 | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
165 b = 100 #batchsize |
1285 | 166 i = 0 |
167 while i < len(rval): | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
168 xi = numpy.asarray(raw_patches[i:i+b], dtype='float32') |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
169 # rasterize |
1285 | 170 xi = xi.reshape((xi.shape[0], xi.shape[1]*xi.shape[2]*xi.shape[3])) |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
171 # remove image mean |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
172 xi = xi - xi.mean(axis=1).reshape((xi.shape[0], 1)) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
173 # remove pixel means |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
174 xi -= pca_dct['mean'] |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
175 rval[i:i+b] = pylearn.preprocessing.pca.pca_whiten((pca_dct['eig_vals'], pca_dct['eig_vecs']), xi) |
1285 | 176 i += b |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
177 return rval |
1285 | 178 |
1352
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
179 def main(n_imgs=1000, n_patches_per_image=10, max_components=128, seed=234, patch_shape=(8,8)): |
1285 | 180 if 0: #do this to render the dataset to the screen |
181 sys.exit(glviewer()) | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
182 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
183 rng = numpy.random.RandomState(seed) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
184 |
1285 | 185 try: |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
186 open(_raw_patch_file).close() #fails if file not present |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
187 load_raw_patches = True |
1285 | 188 except: |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
189 load_raw_patches = False |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
190 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
191 if load_raw_patches: |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
192 print 'loading raw patches from', _raw_patch_file |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
193 raw_patches = numpy.load(_raw_patch_file, mmap_mode='r') |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
194 else: |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
195 print 'extracting raw patches' |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
196 raw_patches = extract_patches(rng=rng, n_imgs=n_imgs, |
1352
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
197 n_patches_per_image=n_patches_per_image, patch_shape=patch_shape) |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
198 rng.shuffle(raw_patches) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
199 print 'saving raw patches to', _raw_patch_file |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
200 numpy.save(open(_raw_patch_file, 'wb'), raw_patches) |
1285 | 201 |
202 try: | |
203 open(_pca_file).close() | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
204 load_pca = True |
1285 | 205 except: |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
206 load_pca = False |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
207 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
208 if load_pca: |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
209 print 'loading pca from', _pca_file |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
210 pca_dct = cPickle.load(open(_pca_file)) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
211 else: |
1285 | 212 print 'computing pca' |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
213 pca_dct = compute_pca_dct(raw_patches, max_components=max_components) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
214 print 'saving pca to', _pca_file |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
215 cPickle.dump(pca_dct, open(_pca_file, 'wb')) |
1285 | 216 |
217 try: | |
218 open(_whitened_file).close() | |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
219 load_patches = True |
1285 | 220 except: |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
221 load_patches = False |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
222 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
223 if load_patches: |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
224 print 'loading whitened patches from', _whitened_file |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
225 whitened_patches = numpy.load(_whitened_file, mmap_mode='r') |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
226 else: |
1285 | 227 print 'computing whitened data' |
1321
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
228 whitened_patches = whiten_patches(raw_patches, pca_dct) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
229 print 'saving', _whitened_file |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
230 numpy.save(_whitened_file, whitened_patches) |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
231 |
ebcb76b38817
tinyimages - added main script to whiten patches
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1289
diff
changeset
|
232 return whitened_patches, pca_dct |
1285 | 233 |
234 # | |
235 # This part of the file defines an op-constructor that uses the pre-processed cache / dataset generated | |
236 # | |
237 | |
238 @memo | |
239 def load_whitened(path=_whitened_file): | |
240 """ | |
241 Replacement for dataset_ops.image_patches.ranzato_hinton_2010_op | |
242 """ | |
243 try: | |
244 return numpy.load(path, mmap_mode='r') | |
245 except: | |
246 print >> sys.stderr, "Maybe you need to run 'python pylearn.dataset_ops.tinyimages'?" | |
247 raise | |
248 | |
249 @memo | |
250 def load_pca_dct(path=_pca_file): | |
251 return cPickle.load(open(path)) | |
252 | |
253 def tinydataset_op(s_idx, | |
254 split='train', | |
255 fn=load_whitened): | |
256 | |
257 n_examples,n_dim = fn().shape | |
258 | |
259 if split != 'train': | |
260 raise NotImplementedError('train/test/valid splits for randomly sampled image patches?') | |
261 | |
262 op = TensorFnDataset('float32', bcast=(False,), fn=fn, single_shape=(n_dim,)) | |
263 x = op(s_idx%n_examples) | |
264 return x | |
265 | |
266 | |
1352
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
267 def save_filters(X, fname, tile_shape=None, img_shape=(8,8)): |
1285 | 268 dct = load_pca_dct() |
269 eigs = dct['eig_vals'], dct['eig_vecs'] | |
270 mean = dct['mean'] | |
271 rasterized = pylearn.preprocessing.pca.pca_whiten_inverse(eigs, X)+mean | |
272 _img = image_tiling.tile_raster_images( | |
273 (rasterized[:,::3], rasterized[:,1::3], rasterized[:,2::3], None), | |
1352
cc3e3e596500
dataset_ops/tinyimages - added an img_shape optional flag to save_filters fns
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1333
diff
changeset
|
274 img_shape=img_shape, |
1333
c7b2da4e2df6
modifs to test_mcRBM to reproduce mcRBM classif results
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1321
diff
changeset
|
275 min_dynamic_range=1e-6, |
c7b2da4e2df6
modifs to test_mcRBM to reproduce mcRBM classif results
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1321
diff
changeset
|
276 tile_shape=tile_shape) |
1285 | 277 image_tiling.save_tiled_raster_images(_img, fname) |
278 | |
279 def glviewer(split='train'): | |
280 from glviewer import GlViewer | |
281 #i = theano.tensor.iscalar() | |
282 #f = theano.function([i], mnist(i, split, dtype='uint8', rasterized=False)[0]) | |
283 data = numpy.load(_raw_patch_file, mmap_mode='r') | |
284 print 'RAW', data.shape | |
285 data = numpy.load(_whitened_file, mmap_mode='r') | |
286 print 'WHI', data.shape | |
287 | |
288 if 1: # check the raw data | |
289 data = numpy.load(_raw_patch_file, mmap_mode='r') | |
290 data = data.reshape((data.shape[0], data.shape[1]*data.shape[2], data.shape[3])) | |
291 def f(i): | |
292 j = i*5000 | |
293 jj = j + 5000 | |
294 return image_tiling.tile_raster_images( | |
295 (data[j:jj,:,0], data[j:jj,:,1], data[j:jj,:,2], None), | |
296 img_shape=(8,8)) | |
297 if 0: # check the whitened data | |
298 dct = load_pca_dct() | |
299 eigs = dct['eig_vals'], dct['eig_vecs'] | |
300 mean = dct['mean'] | |
301 data = numpy.load(_whitened_file, mmap_mode='r') | |
302 def f(i): | |
303 j = i*5000 | |
304 jj = j + 5000 | |
305 X = data[j:jj] | |
306 print 'j', j, jj | |
307 rasterized = pylearn.preprocessing.pca.pca_whiten_inverse(eigs, X)+mean | |
308 _img = image_tiling.tile_raster_images( | |
309 (rasterized[:,::3], rasterized[:,1::3], rasterized[:,2::3], None), | |
310 img_shape=(8,8), | |
311 min_dynamic_range=1e-6) | |
312 return _img | |
313 GlViewer(f).main() | |
314 | |
315 | |
316 | |
317 if __name__=='__main__': | |
318 logging.basicConfig(stream=sys.stderr, level=logging.INFO) | |
319 sys.exit(main()) | |
320 | |
321 |