Mercurial > pylearn
comparison pylearn/algorithms/tests/test_mcRBM.py @ 1524:9d21919e2332
autopep8
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Fri, 02 Nov 2012 13:02:18 -0400 |
parents | 7f166d01bf8e |
children | 9c24a2bdbe90 |
comparison
equal
deleted
inserted
replaced
1523:0e0f9e8275a9 | 1524:9d21919e2332 |
---|---|
13 | 13 |
14 | 14 |
15 def _default_rbm_alloc(n_I, n_K=256, n_J=100): | 15 def _default_rbm_alloc(n_I, n_K=256, n_J=100): |
16 return mcRBM.alloc(n_I, n_K, n_J) | 16 return mcRBM.alloc(n_I, n_K, n_J) |
17 | 17 |
18 | |
18 def _default_trainer_alloc(rbm, train_batch, batchsize, initial_lr_per_example, | 19 def _default_trainer_alloc(rbm, train_batch, batchsize, initial_lr_per_example, |
19 l1_penalty, l1_penalty_start, persistent_chains): | 20 l1_penalty, l1_penalty_start, persistent_chains): |
20 return mcRBMTrainer.alloc(rbm, train_batch, batchsize, l1_penalty=l1_penalty, | 21 return mcRBMTrainer.alloc(rbm, train_batch, batchsize, |
21 l1_penalty_start=l1_penalty_start,persistent_chains=persistent_chains) | 22 l1_penalty=l1_penalty, |
22 | 23 l1_penalty_start=l1_penalty_start, |
23 | 24 persistent_chains=persistent_chains) |
24 def test_reproduce_ranzato_hinton_2010(dataset='MAR', as_unittest=True, n_train_iters=5000, | 25 |
26 | |
27 def test_reproduce_ranzato_hinton_2010( | |
28 dataset='MAR', as_unittest=True, n_train_iters=5000, | |
25 rbm_alloc=_default_rbm_alloc, trainer_alloc=_default_trainer_alloc, | 29 rbm_alloc=_default_rbm_alloc, trainer_alloc=_default_trainer_alloc, |
26 lr_per_example=.075, | 30 lr_per_example=.075, |
27 l1_penalty=1e-3, | 31 l1_penalty=1e-3, |
28 l1_penalty_start=1000, | 32 l1_penalty_start=1000, |
29 persistent_chains=True, | 33 persistent_chains=True, |
30 ): | 34 ): |
31 | 35 |
32 batchsize = 128 | 36 batchsize = 128 |
33 | 37 |
34 if dataset == 'MAR': | 38 if dataset == 'MAR': |
35 n_vis=105 | 39 n_vis = 105 |
36 n_patches=10240 | 40 n_patches = 10240 |
37 epoch_size=n_patches | 41 epoch_size = n_patches |
38 elif dataset=='cifar10patches8x8': | 42 elif dataset == 'cifar10patches8x8': |
39 R,C= 8,8 # the size of image patches | 43 R, C = 8, 8 # the size of image patches |
40 n_vis=96 # pca components | 44 n_vis = 96 # pca components |
41 epoch_size=batchsize*500 | 45 epoch_size = batchsize * 500 |
42 n_patches=epoch_size*20 | 46 n_patches = epoch_size * 20 |
43 elif dataset=='tinyimages_patches': | 47 elif dataset == 'tinyimages_patches': |
44 R,C=8,8 | 48 R, C = 8, 8 |
45 n_vis=81 | 49 n_vis = 81 |
46 epoch_size=batchsize*500 | 50 epoch_size = batchsize * 500 |
47 n_patches=epoch_size*20 | 51 n_patches = epoch_size * 20 |
48 else: | 52 else: |
49 R,C= 16,16 # the size of image patches | 53 R, C = 16, 16 # the size of image patches |
50 n_vis=R*C | 54 n_vis = R * C |
51 n_patches=100000 | 55 n_patches = 100000 |
52 epoch_size=n_patches | 56 epoch_size = n_patches |
53 | 57 |
54 def l2(X): | 58 def l2(X): |
55 return numpy.sqrt((X**2).sum()) | 59 return numpy.sqrt((X ** 2).sum()) |
56 | 60 |
57 if dataset == 'MAR': | 61 if dataset == 'MAR': |
58 tile = pylearn.dataset_ops.image_patches.save_filters_of_ranzato_hinton_2010 | 62 tile = pylearn.dataset_ops.image_patches.save_filters_of_ranzato_hinton_2010 |
59 elif dataset == 'cifar10patches8x8': | 63 elif dataset == 'cifar10patches8x8': |
60 def tile(X, fname): | 64 def tile(X, fname): |
61 _img = pylearn.datasets.cifar10.tile_rasterized_examples( | 65 _img = pylearn.datasets.cifar10.tile_rasterized_examples( |
62 pylearn.preprocessing.pca.pca_whiten_inverse( | 66 pylearn.preprocessing.pca.pca_whiten_inverse( |
63 pylearn.dataset_ops.cifar10.random_cifar_patches_pca( | 67 pylearn.dataset_ops.cifar10.random_cifar_patches_pca( |
64 n_vis, None, 'float32', n_patches, R, C,), | 68 n_vis, None, 'float32', n_patches, R, C,), |
65 X), | 69 X), |
66 img_shape=(R,C)) | 70 img_shape=(R, C)) |
67 image_tiling.save_tiled_raster_images(_img, fname) | 71 image_tiling.save_tiled_raster_images(_img, fname) |
68 elif dataset == 'tinyimages_patches': | 72 elif dataset == 'tinyimages_patches': |
69 tile = pylearn.dataset_ops.tinyimages.save_filters | 73 tile = pylearn.dataset_ops.tinyimages.save_filters |
70 else: | 74 else: |
71 def tile(X, fname): | 75 def tile(X, fname): |
72 _img = image_tiling.tile_raster_images(X, | 76 _img = image_tiling.tile_raster_images(X, |
73 img_shape=(R,C), | 77 img_shape=(R, C), |
74 min_dynamic_range=1e-2) | 78 min_dynamic_range=1e-2) |
75 image_tiling.save_tiled_raster_images(_img, fname) | 79 image_tiling.save_tiled_raster_images(_img, fname) |
76 | 80 |
77 batch_idx = tensor.iscalar() | 81 batch_idx = tensor.iscalar() |
78 batch_range =batch_idx * batchsize + numpy.arange(batchsize) | 82 batch_range = batch_idx * batchsize + numpy.arange(batchsize) |
79 | 83 |
80 if dataset == 'MAR': | 84 if dataset == 'MAR': |
81 train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range) | 85 train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range) |
82 elif dataset == 'cifar10patches8x8': | 86 elif dataset == 'cifar10patches8x8': |
83 train_batch = pylearn.dataset_ops.cifar10.cifar10_patches( | 87 train_batch = pylearn.dataset_ops.cifar10.cifar10_patches( |
84 batch_range, 'train', n_patches=n_patches, patch_size=(R,C), | 88 batch_range, 'train', n_patches=n_patches, patch_size=(R, C), |
85 pca_components=n_vis) | 89 pca_components=n_vis) |
86 elif dataset == 'tinyimages_patches': | 90 elif dataset == 'tinyimages_patches': |
87 train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range) | 91 train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range) |
88 else: | 92 else: |
89 train_batch = pylearn.dataset_ops.image_patches.image_patches( | 93 train_batch = pylearn.dataset_ops.image_patches.image_patches( |
90 s_idx = (batch_idx * batchsize + numpy.arange(batchsize)), | 94 s_idx=(batch_idx * batchsize + numpy.arange(batchsize)), |
91 dims = (n_patches,R,C), | 95 dims=(n_patches, R, C), |
92 center=True, | 96 center=True, |
93 unitvar=True, | 97 unitvar=True, |
94 dtype=theano.config.floatX, | 98 dtype=theano.config.floatX, |
95 rasterized=True) | 99 rasterized=True) |
96 | 100 |
103 batchsize, | 107 batchsize, |
104 initial_lr_per_example=lr_per_example, | 108 initial_lr_per_example=lr_per_example, |
105 l1_penalty=l1_penalty, | 109 l1_penalty=l1_penalty, |
106 l1_penalty_start=l1_penalty_start, | 110 l1_penalty_start=l1_penalty_start, |
107 persistent_chains=persistent_chains) | 111 persistent_chains=persistent_chains) |
108 rbm=trainer.rbm | 112 rbm = trainer.rbm |
109 | 113 |
110 if persistent_chains: | 114 if persistent_chains: |
111 grads = trainer.contrastive_grads() | 115 grads = trainer.contrastive_grads() |
112 learn_fn = theano.function([batch_idx], | 116 learn_fn = theano.function([batch_idx], |
113 outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)], | 117 outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)], |
114 updates=trainer.cd_updates()) | 118 updates=trainer.cd_updates()) |
115 else: | 119 else: |
116 learn_fn = theano.function([batch_idx], outputs=[], updates=trainer.cd_updates()) | 120 learn_fn = theano.function([batch_idx], outputs=[], |
121 updates=trainer.cd_updates()) | |
117 | 122 |
118 if persistent_chains: | 123 if persistent_chains: |
119 smplr = trainer.sampler | 124 smplr = trainer.sampler |
120 else: | 125 else: |
121 smplr = trainer._last_cd1_sampler | 126 smplr = trainer._last_cd1_sampler |
122 | 127 |
123 if dataset == 'cifar10patches8x8': | 128 if dataset == 'cifar10patches8x8': |
124 cPickle.dump( | 129 cPickle.dump( |
125 pylearn.dataset_ops.cifar10.random_cifar_patches_pca( | 130 pylearn.dataset_ops.cifar10.random_cifar_patches_pca( |
126 n_vis, None, 'float32', n_patches, R, C,), | 131 n_vis, None, 'float32', n_patches, R, C,), |
127 open('test_mcRBM.pca.pkl','w')) | 132 open('test_mcRBM.pca.pkl', 'w')) |
128 | 133 |
129 print "Learning..." | 134 print "Learning..." |
130 last_epoch = -1 | 135 last_epoch = -1 |
131 for jj in xrange(n_train_iters): | 136 for jj in xrange(n_train_iters): |
132 epoch = jj*batchsize / epoch_size | 137 epoch = jj * batchsize / epoch_size |
133 | 138 |
134 print_jj = epoch != last_epoch | 139 print_jj = epoch != last_epoch |
135 last_epoch = epoch | 140 last_epoch = epoch |
136 | 141 |
137 if as_unittest and epoch == 5: | 142 if as_unittest and epoch == 5: |
138 U = rbm.U.get_value(borrow=True) | 143 U = rbm.U.get_value(borrow=True) |
139 W = rbm.W.get_value(borrow=True) | 144 W = rbm.W.get_value(borrow=True) |
140 def allclose(a,b): | 145 |
141 return numpy.allclose(a,b,rtol=1.01,atol=1e-3) | 146 def allclose(a, b): |
147 return numpy.allclose(a, b, rtol=1.01, atol=1e-3) | |
142 print "" | 148 print "" |
143 print "--------------" | 149 print "--------------" |
144 print "assert allclose(l2(U), %f)"%l2(U) | 150 print "assert allclose(l2(U), %f)" % l2(U) |
145 print "assert allclose(l2(W), %f)"%l2(W) | 151 print "assert allclose(l2(W), %f)" % l2(W) |
146 print "assert allclose(U.min(), %f)"%U.min() | 152 print "assert allclose(U.min(), %f)" % U.min() |
147 print "assert allclose(U.max(), %f)"%U.max() | 153 print "assert allclose(U.max(), %f)" % U.max() |
148 print "assert allclose(W.min(),%f)"%W.min() | 154 print "assert allclose(W.min(),%f)" % W.min() |
149 print "assert allclose(W.max(), %f)"%W.max() | 155 print "assert allclose(W.max(), %f)" % W.max() |
150 print "--------------" | 156 print "--------------" |
151 | 157 |
152 assert allclose(l2(U), 21.351664) | 158 assert allclose(l2(U), 21.351664) |
153 assert allclose(l2(W), 6.275828) | 159 assert allclose(l2(W), 6.275828) |
154 assert allclose(U.min(), -1.176703) | 160 assert allclose(U.min(), -1.176703) |
155 assert allclose(U.max(), 0.859802) | 161 assert allclose(U.max(), 0.859802) |
156 assert allclose(W.min(),-0.223128) | 162 assert allclose(W.min(), -0.223128) |
157 assert allclose(W.max(), 0.227558 ) | 163 assert allclose(W.max(), 0.227558) |
158 | 164 |
159 break | 165 break |
160 | 166 |
161 if print_jj: | 167 if print_jj: |
162 if not as_unittest: | 168 if not as_unittest: |
163 tile(imgs_fn(jj), "imgs_%06i.png"%jj) | 169 tile(imgs_fn(jj), "imgs_%06i.png" % jj) |
164 if persistent_chains: | 170 if persistent_chains: |
165 tile(smplr.positions.value, "sample_%06i.png"%jj) | 171 tile(smplr.positions.value, "sample_%06i.png" % jj) |
166 tile(rbm.U.value.T, "U_%06i.png"%jj) | 172 tile(rbm.U.value.T, "U_%06i.png" % jj) |
167 tile(rbm.W.value.T, "W_%06i.png"%jj) | 173 tile(rbm.W.value.T, "W_%06i.png" % jj) |
168 | 174 |
169 print 'saving samples', jj, 'epoch', jj/(epoch_size/batchsize) | 175 print 'saving samples', jj, 'epoch', jj / (epoch_size / batchsize) |
170 | 176 |
171 print 'l2(U)', l2(rbm.U.get_value(borrow=True)), | 177 print 'l2(U)', l2(rbm.U.get_value(borrow=True)), |
172 print 'l2(W)', l2(rbm.W.get_value(borrow=True)), | 178 print 'l2(W)', l2(rbm.W.get_value(borrow=True)), |
173 print 'l1_penalty', | 179 print 'l1_penalty', |
174 try: | 180 try: |
182 print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(), | 188 print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(), |
183 print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max() | 189 print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max() |
184 | 190 |
185 if persistent_chains: | 191 if persistent_chains: |
186 print 'parts min', smplr.positions.get_value(borrow=True).min(), | 192 print 'parts min', smplr.positions.get_value(borrow=True).min(), |
187 print 'max',smplr.positions.get_value(borrow=True).max(), | 193 print 'max', smplr.positions.get_value(borrow=True).max(), |
188 print 'HMC step', smplr.stepsize.get_value(borrow=True), | 194 print 'HMC step', smplr.stepsize.get_value(borrow=True), |
189 print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True) | 195 print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True) |
190 | |
191 | 196 |
192 l2_of_Ugrad = learn_fn(jj) | 197 l2_of_Ugrad = learn_fn(jj) |
193 | 198 |
194 if persistent_chains and print_jj: | 199 if persistent_chains and print_jj: |
195 print 'l2(U_grad)', float(l2_of_Ugrad[0]), | 200 print 'l2(U_grad)', float(l2_of_Ugrad[0]), |
203 | 208 |
204 if not as_unittest: | 209 if not as_unittest: |
205 if jj % 2000 == 0: | 210 if jj % 2000 == 0: |
206 print '' | 211 print '' |
207 print 'Saving rbm...' | 212 print 'Saving rbm...' |
208 cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl'%jj, 'w'), -1) | 213 cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl' % jj, 'w'), -1) |
209 if persistent_chains: | 214 if persistent_chains: |
210 print 'Saving sampler...' | 215 print 'Saving sampler...' |
211 cPickle.dump(smplr, open('mcRBM.smplr.%06i.pkl'%jj, 'w'), -1) | 216 cPickle.dump(smplr, open( |
212 | 217 'mcRBM.smplr.%06i.pkl' % jj, 'w'), -1) |
213 | 218 |
214 if not as_unittest: | 219 if not as_unittest: |
215 return rbm, smplr | 220 return rbm, smplr |
216 | 221 |
217 | 222 |
218 def run_classif_experiment(checkpoint): | 223 def run_classif_experiment(checkpoint): |
219 | 224 |
220 R,C=8,8 | 225 R, C = 8, 8 |
221 n_vis=74 | 226 n_vis = 74 |
222 # PRETRAIN | 227 # PRETRAIN |
223 # | 228 # |
224 # extract 1 million 8x8 patches from TinyImages | 229 # extract 1 million 8x8 patches from TinyImages |
225 # pre-process them the right way | 230 # pre-process them the right way |
226 # find 74 dims of PCA | 231 # find 74 dims of PCA |
227 # filter patches through PCA | 232 # filter patches through PCA |
228 whitened_patches, pca_dct = pylearn.dataset_ops.tinyimages.main(n_imgs=100000, | 233 whitened_patches, pca_dct = pylearn.dataset_ops.tinyimages.main( |
229 max_components=n_vis, seed=234) | 234 n_imgs=100000, max_components=n_vis, seed=234) |
230 # | 235 # |
231 # Set up mcRBM Trainer | 236 # Set up mcRBM Trainer |
232 # Initialize P using topological 3x3 overlapping patches thing | 237 # Initialize P using topological 3x3 overlapping patches thing |
233 # start learning P matrix after 2 passes through dataset | 238 # start learning P matrix after 2 passes through dataset |
234 # | 239 # |
235 rbm_filename = 'mcRBM.rbm.%06i.pkl'%46000 | 240 rbm_filename = 'mcRBM.rbm.%06i.pkl' % 46000 |
236 try: | 241 try: |
237 open(rbm_filename).close() | 242 open(rbm_filename).close() |
238 load_mcrbm = True | 243 load_mcrbm = True |
239 except: | 244 except: |
240 load_mcrbm = False | 245 load_mcrbm = False |
243 print 'loading mcRBM from file', rbm_filename | 248 print 'loading mcRBM from file', rbm_filename |
244 rbm = cPickle.load(open(rbm_filename)) | 249 rbm = cPickle.load(open(rbm_filename)) |
245 | 250 |
246 else: | 251 else: |
247 print "Training mcRBM" | 252 print "Training mcRBM" |
248 batchsize=128 | 253 batchsize = 128 |
249 epoch_size=len(whitened_patches) | 254 epoch_size = len(whitened_patches) |
250 tile = pylearn.dataset_ops.tinyimages.save_filters | 255 tile = pylearn.dataset_ops.tinyimages.save_filters |
251 train_batch = theano.tensor.matrix() | 256 train_batch = theano.tensor.matrix() |
252 trainer = mcRBMTrainer.alloc_for_P( | 257 trainer = mcRBMTrainer.alloc_for_P( |
253 rbm=mcRBM_withP.alloc_topo_P(n_I=n_vis, n_J=81), | 258 rbm=mcRBM_withP.alloc_topo_P(n_I=n_vis, n_J=81), |
254 visible_batch=train_batch, | 259 visible_batch=train_batch, |
255 batchsize=batchsize, | 260 batchsize=batchsize, |
256 initial_lr_per_example=0.05, | 261 initial_lr_per_example=0.05, |
257 l1_penalty=1e-3, | 262 l1_penalty=1e-3, |
258 l1_penalty_start=sys.maxint, | 263 l1_penalty_start=sys.maxint, |
259 p_training_start=2*epoch_size//batchsize, | 264 p_training_start=2 * epoch_size // batchsize, |
260 persistent_chains=False) | 265 persistent_chains=False) |
261 rbm=trainer.rbm | 266 rbm = trainer.rbm |
262 learn_fn = theano.function([train_batch], outputs=[], updates=trainer.cd_updates()) | 267 learn_fn = theano.function([train_batch], outputs=[], |
268 updates=trainer.cd_updates()) | |
263 smplr = trainer._last_cd1_sampler | 269 smplr = trainer._last_cd1_sampler |
264 | 270 |
265 ii = 0 | 271 ii = 0 |
266 for i_epoch in range(6): | 272 for i_epoch in range(6): |
267 for i_batch in xrange(epoch_size // batchsize): | 273 for i_batch in xrange(epoch_size // batchsize): |
268 batch_vals = whitened_patches[i_batch*batchsize:(i_batch+1)*batchsize] | 274 batch_vals = whitened_patches[i_batch * |
275 batchsize:(i_batch + 1) * batchsize] | |
269 learn_fn(batch_vals) | 276 learn_fn(batch_vals) |
270 | 277 |
271 if (ii % 1000) == 0: | 278 if (ii % 1000) == 0: |
272 #tile(imgs_fn(ii), "imgs_%06i.png"%ii) | 279 #tile(imgs_fn(ii), "imgs_%06i.png"%ii) |
273 tile(rbm.U.value.T, "U_%06i.png"%ii) | 280 tile(rbm.U.value.T, "U_%06i.png" % ii) |
274 tile(rbm.W.value.T, "W_%06i.png"%ii) | 281 tile(rbm.W.value.T, "W_%06i.png" % ii) |
275 | 282 |
276 print 'saving samples', ii, 'epoch', i_epoch, i_batch | 283 print 'saving samples', ii, 'epoch', i_epoch, i_batch |
277 | 284 |
278 print 'l2(U)', l2(rbm.U.get_value(borrow=True)), | 285 print 'l2(U)', l2(rbm.U.get_value(borrow=True)), |
279 print 'l2(W)', l2(rbm.W.get_value(borrow=True)), | 286 print 'l2(W)', l2(rbm.W.get_value(borrow=True)), |
281 try: | 288 try: |
282 print trainer.effective_l1_penalty.get_value(borrow=True) | 289 print trainer.effective_l1_penalty.get_value(borrow=True) |
283 except: | 290 except: |
284 print trainer.effective_l1_penalty | 291 print trainer.effective_l1_penalty |
285 | 292 |
286 print 'U min max', rbm.U.get_value(borrow=True).min(), rbm.U.get_value(borrow=True).max(), | 293 print 'U min max', rbm.U.get_value( |
287 print 'W min max', rbm.W.get_value(borrow=True).min(), rbm.W.get_value(borrow=True).max(), | 294 borrow=True).min(), rbm.U.get_value(borrow=True).max(), |
288 print 'a min max', rbm.a.get_value(borrow=True).min(), rbm.a.get_value(borrow=True).max(), | 295 print 'W min max', rbm.W.get_value( |
289 print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(), | 296 borrow=True).min(), rbm.W.get_value(borrow=True).max(), |
290 print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max() | 297 print 'a min max', rbm.a.get_value( |
298 borrow=True).min(), rbm.a.get_value(borrow=True).max(), | |
299 print 'b min max', rbm.b.get_value( | |
300 borrow=True).min(), rbm.b.get_value(borrow=True).max(), | |
301 print 'c min max', rbm.c.get_value( | |
302 borrow=True).min(), rbm.c.get_value(borrow=True).max() | |
291 | 303 |
292 print 'HMC step', smplr.stepsize.get_value(borrow=True), | 304 print 'HMC step', smplr.stepsize.get_value(borrow=True), |
293 print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True) | 305 print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True) |
294 print 'P min max', rbm.P.get_value(borrow=True).min(), rbm.P.get_value(borrow=True).max(), | 306 print 'P min max', rbm.P.get_value( |
307 borrow=True).min(), rbm.P.get_value(borrow=True).max(), | |
295 print 'P_lr', trainer.p_lr.get_value(borrow=True) | 308 print 'P_lr', trainer.p_lr.get_value(borrow=True) |
296 print '' | 309 print '' |
297 print 'Saving rbm...' | 310 print 'Saving rbm...' |
298 cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl'%ii, 'w'), -1) | 311 cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl' % ii, 'w'), -1) |
299 | 312 |
300 ii += 1 | 313 ii += 1 |
301 | |
302 | 314 |
303 # extract convolutional features from the CIFAR10 data | 315 # extract convolutional features from the CIFAR10 data |
304 feat_filename = 'mcrbm_features.npy' | 316 feat_filename = 'mcrbm_features.npy' |
305 feat_filename = 'cifar10.features.46000.npy' | 317 feat_filename = 'cifar10.features.46000.npy' |
306 try: | 318 try: |
311 | 323 |
312 if load_features: | 324 if load_features: |
313 print 'Loading features from', feat_filename | 325 print 'Loading features from', feat_filename |
314 all_features = numpy.load(feat_filename, mmap_mode='r') | 326 all_features = numpy.load(feat_filename, mmap_mode='r') |
315 else: | 327 else: |
316 batchsize=100 | 328 batchsize = 100 |
317 feat_idx = tensor.lscalar() | 329 feat_idx = tensor.lscalar() |
318 feat_idx_range = feat_idx * batchsize + tensor.arange(batchsize) | 330 feat_idx_range = feat_idx * batchsize + tensor.arange(batchsize) |
319 train_batch_x, train_batch_y = pylearn.dataset_ops.cifar10.cifar10( | 331 train_batch_x, train_batch_y = pylearn.dataset_ops.cifar10.cifar10( |
320 feat_idx_range, | 332 feat_idx_range, |
321 split='all', | 333 split='all', |
322 dtype='uint8', | 334 dtype='uint8', |
323 rasterized=False, | 335 rasterized=False, |
324 color='rgb') | 336 color='rgb') |
325 | 337 |
326 WINDOW_SIZE=8 | 338 WINDOW_SIZE = 8 |
327 WINDOW_STRIDE=4 | 339 WINDOW_STRIDE = 4 |
328 | 340 |
329 # put these into shared vars because support for big matrix constants is bad, | 341 # put these into shared vars because support for big matrix |
330 # (comparing them is slow) | 342 # constants is bad, (comparing them is slow) |
331 pca_eigvecs = theano.shared(pca_dct['eig_vecs'].astype('float32')) | 343 pca_eigvecs = theano.shared(pca_dct['eig_vecs'].astype('float32')) |
332 pca_eigvals = theano.shared(pca_dct['eig_vals'].astype('float32')) | 344 pca_eigvals = theano.shared(pca_dct['eig_vals'].astype('float32')) |
333 pca_mean = theano.shared(pca_dct['mean'].astype('float32')) | 345 pca_mean = theano.shared(pca_dct['mean'].astype('float32')) |
334 | 346 |
335 def theano_pca_whiten(X): | 347 def theano_pca_whiten(X): |
336 #copying preprepcessing.pca.pca_whiten | 348 #copying preprepcessing.pca.pca_whiten |
337 return tensor.true_div( | 349 return tensor.true_div( |
338 tensor.dot(X-pca_mean, pca_eigvecs), | 350 tensor.dot(X - pca_mean, pca_eigvecs), |
339 tensor.sqrt(pca_eigvals)+1e-8) | 351 tensor.sqrt(pca_eigvals) + 1e-8) |
340 | 352 |
341 h_list = [] | 353 h_list = [] |
342 g_list = [] | 354 g_list = [] |
343 for r_offset in range(0, 32-WINDOW_SIZE+1, WINDOW_STRIDE): | 355 for r_offset in range(0, 32 - WINDOW_SIZE + 1, WINDOW_STRIDE): |
344 for c_offset in range(0, 32-WINDOW_SIZE+1, WINDOW_STRIDE): | 356 for c_offset in range(0, 32 - WINDOW_SIZE + 1, WINDOW_STRIDE): |
345 window = train_batch_x[:, r_offset:r_offset+WINDOW_SIZE, | 357 window = train_batch_x[:, r_offset:r_offset + WINDOW_SIZE, |
346 c_offset:c_offset+WINDOW_SIZE, :] | 358 c_offset:c_offset + WINDOW_SIZE, :] |
347 assert window.dtype=='uint8' | 359 assert window.dtype == 'uint8' |
348 | 360 |
349 #rasterize the patches | 361 #rasterize the patches |
350 raster_window = tensor.flatten(tensor.cast(window, 'float32'),2) | 362 raster_window = tensor.flatten(tensor.cast( |
363 window, 'float32'), 2) | |
351 | 364 |
352 #subtract off the mean of each image | 365 #subtract off the mean of each image |
353 raster_window = raster_window - raster_window.mean(axis=1).reshape((batchsize,1)) | 366 raster_window = raster_window - raster_window.mean(axis=1).reshape((batchsize,1)) |
354 | 367 |
355 h,g = rbm.expected_h_g_given_v(theano_pca_whiten(raster_window)) | 368 h, g = rbm.expected_h_g_given_v( |
369 theano_pca_whiten(raster_window)) | |
356 | 370 |
357 h_list.append(h) | 371 h_list.append(h) |
358 g_list.append(g) | 372 g_list.append(g) |
359 | 373 |
360 hg = tensor.concatenate(h_list + g_list, axis=1) | 374 hg = tensor.concatenate(h_list + g_list, axis=1) |
361 | 375 |
362 feat_fn = theano.function([feat_idx], hg) | 376 feat_fn = theano.function([feat_idx], hg) |
363 features = numpy.empty((60000, 11025), dtype='float32') | 377 features = numpy.empty((60000, 11025), dtype='float32') |
364 for i in xrange(60000//batchsize): | 378 for i in xrange(60000 // batchsize): |
365 if i % 100 == 0: | 379 if i % 100 == 0: |
366 print("feature batch %i"%i) | 380 print("feature batch %i" % i) |
367 features[i*batchsize:(i+1)*batchsize] = feat_fn(i) | 381 features[i * batchsize:(i + 1) * batchsize] = feat_fn(i) |
368 | 382 |
369 print("saving features to %s"%feat_filename) | 383 print("saving features to %s" % feat_filename) |
370 numpy.save(feat_filename, features) | 384 numpy.save(feat_filename, features) |
371 all_features = features | 385 all_features = features |
372 del features | 386 del features |
373 | 387 |
374 | |
375 # CLASSIFY FEATURES | 388 # CLASSIFY FEATURES |
376 | |
377 if 0: | 389 if 0: |
378 # nothing to load | 390 # nothing to load |
379 pass | 391 pass |
380 else: | 392 else: |
381 batchsize=100 | 393 batchsize = 100 |
382 | 394 |
383 if feat_filename.startswith('cifar'): | 395 if feat_filename.startswith('cifar'): |
384 learnrate = 0.002 | 396 learnrate = 0.002 |
385 l1_regularization = 0.004 | 397 l1_regularization = 0.004 |
386 anneal_epoch=100 | 398 anneal_epoch = 100 |
387 n_epochs = 500 | 399 n_epochs = 500 |
388 else: | 400 else: |
389 learnrate = 0.005 | 401 learnrate = 0.005 |
390 l1_regularization = 0.004 | 402 l1_regularization = 0.004 |
391 n_epochs = 100 | 403 n_epochs = 100 |
392 anneal_epoch=20 | 404 anneal_epoch = 20 |
393 | 405 |
394 x_i = tensor.matrix() | 406 x_i = tensor.matrix() |
395 y_i = tensor.ivector() | 407 y_i = tensor.ivector() |
396 lr = tensor.scalar() | 408 lr = tensor.scalar() |
397 #l1_regularization = float(sys.argv[1]) #1.e-3 | 409 #l1_regularization = float(sys.argv[1]) #1.e-3 |
398 #l2_regularization = float(sys.argv[2]) #1.e-3*0 | 410 #l2_regularization = float(sys.argv[2]) #1.e-3*0 |
399 | 411 |
400 feature_logreg = LogisticRegression.new(x_i, | 412 feature_logreg = LogisticRegression.new(x_i, |
401 n_in = 11025, n_out=10, | 413 n_in=11025, n_out=10, |
402 dtype=x_i.dtype) | 414 dtype=x_i.dtype) |
403 | 415 |
404 # marc'aurelio does this... | 416 # marc'aurelio does this... |
405 feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,10)*.02 | 417 feature_logreg.w.value = numpy.random.RandomState(44).randn(11025, |
418 10) * .02 | |
406 | 419 |
407 traincost = feature_logreg.nll(y_i).sum() | 420 traincost = feature_logreg.nll(y_i).sum() |
408 traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization | 421 traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization |
409 #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization | 422 #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization |
410 train_logreg_fn = theano.function([x_i, y_i, lr], | 423 train_logreg_fn = theano.function([x_i, y_i, lr], |
411 [feature_logreg.nll(y_i).mean(), | 424 [feature_logreg.nll(y_i).mean(), |
412 feature_logreg.errors(y_i).mean()], | 425 feature_logreg.errors(y_i).mean()], |
413 updates=pylearn.gd.sgd.sgd_updates( | 426 updates=pylearn.gd.sgd.sgd_updates( |
414 params=feature_logreg.params, | 427 params=feature_logreg.params, |
415 grads=tensor.grad(traincost, feature_logreg.params), | 428 grads=tensor.grad(traincost, feature_logreg.params), |
416 stepsizes=[lr,lr/10.])) | 429 stepsizes=[lr, lr / 10.])) |
417 | 430 |
418 all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1] | 431 all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1] |
419 pylearn.dataset_ops.cifar10.all_data_labels.forget() # clear memo cache | 432 pylearn.dataset_ops.cifar10.all_data_labels.forget() |
420 assert len(all_labels)==60000 | 433 # clear memo cache |
434 assert len(all_labels) == 60000 | |
421 if 0: | 435 if 0: |
422 print "Using validation set" | 436 print "Using validation set" |
423 train_labels = all_labels[:40000] | 437 train_labels = all_labels[:40000] |
424 valid_labels = all_labels[40000:50000] | 438 valid_labels = all_labels[40000:50000] |
425 test_labels = all_labels[50000:60000] | 439 test_labels = all_labels[50000:60000] |
436 test_features = all_features[50000:60000] | 450 test_features = all_features[50000:60000] |
437 | 451 |
438 if 1: | 452 if 1: |
439 print "Computing mean and std.dev" | 453 print "Computing mean and std.dev" |
440 train_mean = train_features.mean(axis=0) | 454 train_mean = train_features.mean(axis=0) |
441 train_std = train_features.std(axis=0)+1e-4 | 455 train_std = train_features.std(axis=0) + 1e-4 |
442 preproc = lambda x: (x-train_mean)/(0.1+train_std) | 456 preproc = lambda x: (x - train_mean) / (0.1 + train_std) |
443 else: | 457 else: |
444 print "Not centering data" | 458 print "Not centering data" |
445 preproc = lambda x:x | 459 preproc = lambda x: x |
446 | 460 |
447 for epoch in xrange(n_epochs): | 461 for epoch in xrange(n_epochs): |
448 print 'epoch', epoch | 462 print 'epoch', epoch |
449 # validate | 463 # validate |
450 # Marc'Aurelio, you crazy!! | 464 # Marc'Aurelio, you crazy!! |
451 # the division by batchsize is done in the cost function | 465 # the division by batchsize is done in the cost function |
452 e_lr = learnrate / (batchsize*max(1.0, numpy.floor(max(1., epoch/float(anneal_epoch))-2))) | 466 e_lr = learnrate / (batchsize * max(1.0, numpy.floor( |
467 max(1., epoch / float(anneal_epoch)) - 2))) | |
453 | 468 |
454 if valid_features is not None: | 469 if valid_features is not None: |
455 l01s = [] | 470 l01s = [] |
456 nlls = [] | 471 nlls = [] |
457 for i in xrange(10000/batchsize): | 472 for i in xrange(10000 / batchsize): |
458 x_i = valid_features[i*batchsize:(i+1)*batchsize] | 473 x_i = valid_features[i * batchsize:(i + 1) * batchsize] |
459 y_i = valid_labels[i*batchsize:(i+1)*batchsize] | 474 y_i = valid_labels[i * batchsize:(i + 1) * batchsize] |
460 | 475 |
461 #lr=0.0 -> no learning, safe for validation set | 476 #lr=0.0 -> no learning, safe for validation set |
462 nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) | 477 nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) |
463 nlls.append(nll) | 478 nlls.append(nll) |
464 l01s.append(l01) | 479 l01s.append(l01) |
466 | 481 |
467 # test | 482 # test |
468 | 483 |
469 l01s = [] | 484 l01s = [] |
470 nlls = [] | 485 nlls = [] |
471 for i in xrange(len(test_features)//batchsize): | 486 for i in xrange(len(test_features) // batchsize): |
472 x_i = test_features[i*batchsize:(i+1)*batchsize] | 487 x_i = test_features[i * batchsize:(i + 1) * batchsize] |
473 y_i = test_labels[i*batchsize:(i+1)*batchsize] | 488 y_i = test_labels[i * batchsize:(i + 1) * batchsize] |
474 | 489 |
475 #lr=0.0 -> no learning, safe for validation set | 490 #lr=0.0 -> no learning, safe for validation set |
476 nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) | 491 nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) |
477 nlls.append(nll) | 492 nlls.append(nll) |
478 l01s.append(l01) | 493 l01s.append(l01) |
479 print 'test log_reg', numpy.mean(nlls), numpy.mean(l01s) | 494 print 'test log_reg', numpy.mean(nlls), numpy.mean(l01s) |
480 | 495 |
481 #train | 496 #train |
482 l01s = [] | 497 l01s = [] |
483 nlls = [] | 498 nlls = [] |
484 for i in xrange(len(train_features)//batchsize): | 499 for i in xrange(len(train_features) // batchsize): |
485 x_i = train_features[i*batchsize:(i+1)*batchsize] | 500 x_i = train_features[i * batchsize:(i + 1) * batchsize] |
486 y_i = train_labels[i*batchsize:(i+1)*batchsize] | 501 y_i = train_labels[i * batchsize:(i + 1) * batchsize] |
487 nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr) | 502 nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr) |
488 nlls.append(nll) | 503 nlls.append(nll) |
489 l01s.append(l01) | 504 l01s.append(l01) |
490 print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s) | 505 print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s) |
491 | |
492 | |
493 | 506 |
494 | 507 |
495 import pickle as cPickle | 508 import pickle as cPickle |
496 #import cPickle | 509 #import cPickle |
497 if __name__ == '__main__': | 510 if __name__ == '__main__': |
498 if 0: | 511 if 0: |
499 #learning 16 x 16 pinwheel filters from official cifar patches (MAR) | 512 #learning 16 x 16 pinwheel filters from official cifar patches (MAR) |
500 rbm,smplr = test_reproduce_ranzato_hinton_2010( | 513 rbm, smplr = test_reproduce_ranzato_hinton_2010( |
501 as_unittest=False, | 514 as_unittest=False, |
502 n_train_iters=5000, | 515 n_train_iters=5000, |
503 rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81), | 516 rbm_alloc=lambda n_I: mcRBM_withP.alloc_topo_P(n_I, n_J=81), |
504 trainer_alloc=mcRBMTrainer.alloc_for_P, | 517 trainer_alloc=mcRBMTrainer.alloc_for_P, |
505 dataset='MAR' | 518 dataset='MAR' |
506 ) | 519 ) |
507 | 520 |
508 if 0: | 521 if 0: |
509 # pretraining settings | 522 # pretraining settings |
510 rbm,smplr = test_reproduce_ranzato_hinton_2010( | 523 rbm, smplr = test_reproduce_ranzato_hinton_2010( |
511 as_unittest=False, | 524 as_unittest=False, |
512 n_train_iters=60000, | 525 n_train_iters=60000, |
513 rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81), | 526 rbm_alloc=lambda n_I: mcRBM_withP.alloc_topo_P(n_I, n_J=81), |
514 trainer_alloc=mcRBMTrainer.alloc_for_P, | 527 trainer_alloc=mcRBMTrainer.alloc_for_P, |
515 lr_per_example=0.05, | 528 lr_per_example=0.05, |
516 dataset='tinyimages_patches', | 529 dataset='tinyimages_patches', |
517 l1_penalty=1e-3, | 530 l1_penalty=1e-3, |
518 l1_penalty_start=30000, | 531 l1_penalty_start=30000, |