comparison pylearn/algorithms/tests/test_mcRBM.py @ 1524:9d21919e2332

autopep8
author Frederic Bastien <nouiz@nouiz.org>
date Fri, 02 Nov 2012 13:02:18 -0400
parents 7f166d01bf8e
children 9c24a2bdbe90
comparison
equal deleted inserted replaced
1523:0e0f9e8275a9 1524:9d21919e2332
13 13
14 14
15 def _default_rbm_alloc(n_I, n_K=256, n_J=100): 15 def _default_rbm_alloc(n_I, n_K=256, n_J=100):
16 return mcRBM.alloc(n_I, n_K, n_J) 16 return mcRBM.alloc(n_I, n_K, n_J)
17 17
18
18 def _default_trainer_alloc(rbm, train_batch, batchsize, initial_lr_per_example, 19 def _default_trainer_alloc(rbm, train_batch, batchsize, initial_lr_per_example,
19 l1_penalty, l1_penalty_start, persistent_chains): 20 l1_penalty, l1_penalty_start, persistent_chains):
20 return mcRBMTrainer.alloc(rbm, train_batch, batchsize, l1_penalty=l1_penalty, 21 return mcRBMTrainer.alloc(rbm, train_batch, batchsize,
21 l1_penalty_start=l1_penalty_start,persistent_chains=persistent_chains) 22 l1_penalty=l1_penalty,
22 23 l1_penalty_start=l1_penalty_start,
23 24 persistent_chains=persistent_chains)
24 def test_reproduce_ranzato_hinton_2010(dataset='MAR', as_unittest=True, n_train_iters=5000, 25
26
27 def test_reproduce_ranzato_hinton_2010(
28 dataset='MAR', as_unittest=True, n_train_iters=5000,
25 rbm_alloc=_default_rbm_alloc, trainer_alloc=_default_trainer_alloc, 29 rbm_alloc=_default_rbm_alloc, trainer_alloc=_default_trainer_alloc,
26 lr_per_example=.075, 30 lr_per_example=.075,
27 l1_penalty=1e-3, 31 l1_penalty=1e-3,
28 l1_penalty_start=1000, 32 l1_penalty_start=1000,
29 persistent_chains=True, 33 persistent_chains=True,
30 ): 34 ):
31 35
32 batchsize = 128 36 batchsize = 128
33 37
34 if dataset == 'MAR': 38 if dataset == 'MAR':
35 n_vis=105 39 n_vis = 105
36 n_patches=10240 40 n_patches = 10240
37 epoch_size=n_patches 41 epoch_size = n_patches
38 elif dataset=='cifar10patches8x8': 42 elif dataset == 'cifar10patches8x8':
39 R,C= 8,8 # the size of image patches 43 R, C = 8, 8 # the size of image patches
40 n_vis=96 # pca components 44 n_vis = 96 # pca components
41 epoch_size=batchsize*500 45 epoch_size = batchsize * 500
42 n_patches=epoch_size*20 46 n_patches = epoch_size * 20
43 elif dataset=='tinyimages_patches': 47 elif dataset == 'tinyimages_patches':
44 R,C=8,8 48 R, C = 8, 8
45 n_vis=81 49 n_vis = 81
46 epoch_size=batchsize*500 50 epoch_size = batchsize * 500
47 n_patches=epoch_size*20 51 n_patches = epoch_size * 20
48 else: 52 else:
49 R,C= 16,16 # the size of image patches 53 R, C = 16, 16 # the size of image patches
50 n_vis=R*C 54 n_vis = R * C
51 n_patches=100000 55 n_patches = 100000
52 epoch_size=n_patches 56 epoch_size = n_patches
53 57
54 def l2(X): 58 def l2(X):
55 return numpy.sqrt((X**2).sum()) 59 return numpy.sqrt((X ** 2).sum())
56 60
57 if dataset == 'MAR': 61 if dataset == 'MAR':
58 tile = pylearn.dataset_ops.image_patches.save_filters_of_ranzato_hinton_2010 62 tile = pylearn.dataset_ops.image_patches.save_filters_of_ranzato_hinton_2010
59 elif dataset == 'cifar10patches8x8': 63 elif dataset == 'cifar10patches8x8':
60 def tile(X, fname): 64 def tile(X, fname):
61 _img = pylearn.datasets.cifar10.tile_rasterized_examples( 65 _img = pylearn.datasets.cifar10.tile_rasterized_examples(
62 pylearn.preprocessing.pca.pca_whiten_inverse( 66 pylearn.preprocessing.pca.pca_whiten_inverse(
63 pylearn.dataset_ops.cifar10.random_cifar_patches_pca( 67 pylearn.dataset_ops.cifar10.random_cifar_patches_pca(
64 n_vis, None, 'float32', n_patches, R, C,), 68 n_vis, None, 'float32', n_patches, R, C,),
65 X), 69 X),
66 img_shape=(R,C)) 70 img_shape=(R, C))
67 image_tiling.save_tiled_raster_images(_img, fname) 71 image_tiling.save_tiled_raster_images(_img, fname)
68 elif dataset == 'tinyimages_patches': 72 elif dataset == 'tinyimages_patches':
69 tile = pylearn.dataset_ops.tinyimages.save_filters 73 tile = pylearn.dataset_ops.tinyimages.save_filters
70 else: 74 else:
71 def tile(X, fname): 75 def tile(X, fname):
72 _img = image_tiling.tile_raster_images(X, 76 _img = image_tiling.tile_raster_images(X,
73 img_shape=(R,C), 77 img_shape=(R, C),
74 min_dynamic_range=1e-2) 78 min_dynamic_range=1e-2)
75 image_tiling.save_tiled_raster_images(_img, fname) 79 image_tiling.save_tiled_raster_images(_img, fname)
76 80
77 batch_idx = tensor.iscalar() 81 batch_idx = tensor.iscalar()
78 batch_range =batch_idx * batchsize + numpy.arange(batchsize) 82 batch_range = batch_idx * batchsize + numpy.arange(batchsize)
79 83
80 if dataset == 'MAR': 84 if dataset == 'MAR':
81 train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range) 85 train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range)
82 elif dataset == 'cifar10patches8x8': 86 elif dataset == 'cifar10patches8x8':
83 train_batch = pylearn.dataset_ops.cifar10.cifar10_patches( 87 train_batch = pylearn.dataset_ops.cifar10.cifar10_patches(
84 batch_range, 'train', n_patches=n_patches, patch_size=(R,C), 88 batch_range, 'train', n_patches=n_patches, patch_size=(R, C),
85 pca_components=n_vis) 89 pca_components=n_vis)
86 elif dataset == 'tinyimages_patches': 90 elif dataset == 'tinyimages_patches':
87 train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range) 91 train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range)
88 else: 92 else:
89 train_batch = pylearn.dataset_ops.image_patches.image_patches( 93 train_batch = pylearn.dataset_ops.image_patches.image_patches(
90 s_idx = (batch_idx * batchsize + numpy.arange(batchsize)), 94 s_idx=(batch_idx * batchsize + numpy.arange(batchsize)),
91 dims = (n_patches,R,C), 95 dims=(n_patches, R, C),
92 center=True, 96 center=True,
93 unitvar=True, 97 unitvar=True,
94 dtype=theano.config.floatX, 98 dtype=theano.config.floatX,
95 rasterized=True) 99 rasterized=True)
96 100
103 batchsize, 107 batchsize,
104 initial_lr_per_example=lr_per_example, 108 initial_lr_per_example=lr_per_example,
105 l1_penalty=l1_penalty, 109 l1_penalty=l1_penalty,
106 l1_penalty_start=l1_penalty_start, 110 l1_penalty_start=l1_penalty_start,
107 persistent_chains=persistent_chains) 111 persistent_chains=persistent_chains)
108 rbm=trainer.rbm 112 rbm = trainer.rbm
109 113
110 if persistent_chains: 114 if persistent_chains:
111 grads = trainer.contrastive_grads() 115 grads = trainer.contrastive_grads()
112 learn_fn = theano.function([batch_idx], 116 learn_fn = theano.function([batch_idx],
113 outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)], 117 outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)],
114 updates=trainer.cd_updates()) 118 updates=trainer.cd_updates())
115 else: 119 else:
116 learn_fn = theano.function([batch_idx], outputs=[], updates=trainer.cd_updates()) 120 learn_fn = theano.function([batch_idx], outputs=[],
121 updates=trainer.cd_updates())
117 122
118 if persistent_chains: 123 if persistent_chains:
119 smplr = trainer.sampler 124 smplr = trainer.sampler
120 else: 125 else:
121 smplr = trainer._last_cd1_sampler 126 smplr = trainer._last_cd1_sampler
122 127
123 if dataset == 'cifar10patches8x8': 128 if dataset == 'cifar10patches8x8':
124 cPickle.dump( 129 cPickle.dump(
125 pylearn.dataset_ops.cifar10.random_cifar_patches_pca( 130 pylearn.dataset_ops.cifar10.random_cifar_patches_pca(
126 n_vis, None, 'float32', n_patches, R, C,), 131 n_vis, None, 'float32', n_patches, R, C,),
127 open('test_mcRBM.pca.pkl','w')) 132 open('test_mcRBM.pca.pkl', 'w'))
128 133
129 print "Learning..." 134 print "Learning..."
130 last_epoch = -1 135 last_epoch = -1
131 for jj in xrange(n_train_iters): 136 for jj in xrange(n_train_iters):
132 epoch = jj*batchsize / epoch_size 137 epoch = jj * batchsize / epoch_size
133 138
134 print_jj = epoch != last_epoch 139 print_jj = epoch != last_epoch
135 last_epoch = epoch 140 last_epoch = epoch
136 141
137 if as_unittest and epoch == 5: 142 if as_unittest and epoch == 5:
138 U = rbm.U.get_value(borrow=True) 143 U = rbm.U.get_value(borrow=True)
139 W = rbm.W.get_value(borrow=True) 144 W = rbm.W.get_value(borrow=True)
140 def allclose(a,b): 145
141 return numpy.allclose(a,b,rtol=1.01,atol=1e-3) 146 def allclose(a, b):
147 return numpy.allclose(a, b, rtol=1.01, atol=1e-3)
142 print "" 148 print ""
143 print "--------------" 149 print "--------------"
144 print "assert allclose(l2(U), %f)"%l2(U) 150 print "assert allclose(l2(U), %f)" % l2(U)
145 print "assert allclose(l2(W), %f)"%l2(W) 151 print "assert allclose(l2(W), %f)" % l2(W)
146 print "assert allclose(U.min(), %f)"%U.min() 152 print "assert allclose(U.min(), %f)" % U.min()
147 print "assert allclose(U.max(), %f)"%U.max() 153 print "assert allclose(U.max(), %f)" % U.max()
148 print "assert allclose(W.min(),%f)"%W.min() 154 print "assert allclose(W.min(),%f)" % W.min()
149 print "assert allclose(W.max(), %f)"%W.max() 155 print "assert allclose(W.max(), %f)" % W.max()
150 print "--------------" 156 print "--------------"
151 157
152 assert allclose(l2(U), 21.351664) 158 assert allclose(l2(U), 21.351664)
153 assert allclose(l2(W), 6.275828) 159 assert allclose(l2(W), 6.275828)
154 assert allclose(U.min(), -1.176703) 160 assert allclose(U.min(), -1.176703)
155 assert allclose(U.max(), 0.859802) 161 assert allclose(U.max(), 0.859802)
156 assert allclose(W.min(),-0.223128) 162 assert allclose(W.min(), -0.223128)
157 assert allclose(W.max(), 0.227558 ) 163 assert allclose(W.max(), 0.227558)
158 164
159 break 165 break
160 166
161 if print_jj: 167 if print_jj:
162 if not as_unittest: 168 if not as_unittest:
163 tile(imgs_fn(jj), "imgs_%06i.png"%jj) 169 tile(imgs_fn(jj), "imgs_%06i.png" % jj)
164 if persistent_chains: 170 if persistent_chains:
165 tile(smplr.positions.value, "sample_%06i.png"%jj) 171 tile(smplr.positions.value, "sample_%06i.png" % jj)
166 tile(rbm.U.value.T, "U_%06i.png"%jj) 172 tile(rbm.U.value.T, "U_%06i.png" % jj)
167 tile(rbm.W.value.T, "W_%06i.png"%jj) 173 tile(rbm.W.value.T, "W_%06i.png" % jj)
168 174
169 print 'saving samples', jj, 'epoch', jj/(epoch_size/batchsize) 175 print 'saving samples', jj, 'epoch', jj / (epoch_size / batchsize)
170 176
171 print 'l2(U)', l2(rbm.U.get_value(borrow=True)), 177 print 'l2(U)', l2(rbm.U.get_value(borrow=True)),
172 print 'l2(W)', l2(rbm.W.get_value(borrow=True)), 178 print 'l2(W)', l2(rbm.W.get_value(borrow=True)),
173 print 'l1_penalty', 179 print 'l1_penalty',
174 try: 180 try:
182 print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(), 188 print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(),
183 print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max() 189 print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max()
184 190
185 if persistent_chains: 191 if persistent_chains:
186 print 'parts min', smplr.positions.get_value(borrow=True).min(), 192 print 'parts min', smplr.positions.get_value(borrow=True).min(),
187 print 'max',smplr.positions.get_value(borrow=True).max(), 193 print 'max', smplr.positions.get_value(borrow=True).max(),
188 print 'HMC step', smplr.stepsize.get_value(borrow=True), 194 print 'HMC step', smplr.stepsize.get_value(borrow=True),
189 print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True) 195 print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True)
190
191 196
192 l2_of_Ugrad = learn_fn(jj) 197 l2_of_Ugrad = learn_fn(jj)
193 198
194 if persistent_chains and print_jj: 199 if persistent_chains and print_jj:
195 print 'l2(U_grad)', float(l2_of_Ugrad[0]), 200 print 'l2(U_grad)', float(l2_of_Ugrad[0]),
203 208
204 if not as_unittest: 209 if not as_unittest:
205 if jj % 2000 == 0: 210 if jj % 2000 == 0:
206 print '' 211 print ''
207 print 'Saving rbm...' 212 print 'Saving rbm...'
208 cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl'%jj, 'w'), -1) 213 cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl' % jj, 'w'), -1)
209 if persistent_chains: 214 if persistent_chains:
210 print 'Saving sampler...' 215 print 'Saving sampler...'
211 cPickle.dump(smplr, open('mcRBM.smplr.%06i.pkl'%jj, 'w'), -1) 216 cPickle.dump(smplr, open(
212 217 'mcRBM.smplr.%06i.pkl' % jj, 'w'), -1)
213 218
214 if not as_unittest: 219 if not as_unittest:
215 return rbm, smplr 220 return rbm, smplr
216 221
217 222
218 def run_classif_experiment(checkpoint): 223 def run_classif_experiment(checkpoint):
219 224
220 R,C=8,8 225 R, C = 8, 8
221 n_vis=74 226 n_vis = 74
222 # PRETRAIN 227 # PRETRAIN
223 # 228 #
224 # extract 1 million 8x8 patches from TinyImages 229 # extract 1 million 8x8 patches from TinyImages
225 # pre-process them the right way 230 # pre-process them the right way
226 # find 74 dims of PCA 231 # find 74 dims of PCA
227 # filter patches through PCA 232 # filter patches through PCA
228 whitened_patches, pca_dct = pylearn.dataset_ops.tinyimages.main(n_imgs=100000, 233 whitened_patches, pca_dct = pylearn.dataset_ops.tinyimages.main(
229 max_components=n_vis, seed=234) 234 n_imgs=100000, max_components=n_vis, seed=234)
230 # 235 #
231 # Set up mcRBM Trainer 236 # Set up mcRBM Trainer
232 # Initialize P using topological 3x3 overlapping patches thing 237 # Initialize P using topological 3x3 overlapping patches thing
233 # start learning P matrix after 2 passes through dataset 238 # start learning P matrix after 2 passes through dataset
234 # 239 #
235 rbm_filename = 'mcRBM.rbm.%06i.pkl'%46000 240 rbm_filename = 'mcRBM.rbm.%06i.pkl' % 46000
236 try: 241 try:
237 open(rbm_filename).close() 242 open(rbm_filename).close()
238 load_mcrbm = True 243 load_mcrbm = True
239 except: 244 except:
240 load_mcrbm = False 245 load_mcrbm = False
243 print 'loading mcRBM from file', rbm_filename 248 print 'loading mcRBM from file', rbm_filename
244 rbm = cPickle.load(open(rbm_filename)) 249 rbm = cPickle.load(open(rbm_filename))
245 250
246 else: 251 else:
247 print "Training mcRBM" 252 print "Training mcRBM"
248 batchsize=128 253 batchsize = 128
249 epoch_size=len(whitened_patches) 254 epoch_size = len(whitened_patches)
250 tile = pylearn.dataset_ops.tinyimages.save_filters 255 tile = pylearn.dataset_ops.tinyimages.save_filters
251 train_batch = theano.tensor.matrix() 256 train_batch = theano.tensor.matrix()
252 trainer = mcRBMTrainer.alloc_for_P( 257 trainer = mcRBMTrainer.alloc_for_P(
253 rbm=mcRBM_withP.alloc_topo_P(n_I=n_vis, n_J=81), 258 rbm=mcRBM_withP.alloc_topo_P(n_I=n_vis, n_J=81),
254 visible_batch=train_batch, 259 visible_batch=train_batch,
255 batchsize=batchsize, 260 batchsize=batchsize,
256 initial_lr_per_example=0.05, 261 initial_lr_per_example=0.05,
257 l1_penalty=1e-3, 262 l1_penalty=1e-3,
258 l1_penalty_start=sys.maxint, 263 l1_penalty_start=sys.maxint,
259 p_training_start=2*epoch_size//batchsize, 264 p_training_start=2 * epoch_size // batchsize,
260 persistent_chains=False) 265 persistent_chains=False)
261 rbm=trainer.rbm 266 rbm = trainer.rbm
262 learn_fn = theano.function([train_batch], outputs=[], updates=trainer.cd_updates()) 267 learn_fn = theano.function([train_batch], outputs=[],
268 updates=trainer.cd_updates())
263 smplr = trainer._last_cd1_sampler 269 smplr = trainer._last_cd1_sampler
264 270
265 ii = 0 271 ii = 0
266 for i_epoch in range(6): 272 for i_epoch in range(6):
267 for i_batch in xrange(epoch_size // batchsize): 273 for i_batch in xrange(epoch_size // batchsize):
268 batch_vals = whitened_patches[i_batch*batchsize:(i_batch+1)*batchsize] 274 batch_vals = whitened_patches[i_batch *
275 batchsize:(i_batch + 1) * batchsize]
269 learn_fn(batch_vals) 276 learn_fn(batch_vals)
270 277
271 if (ii % 1000) == 0: 278 if (ii % 1000) == 0:
272 #tile(imgs_fn(ii), "imgs_%06i.png"%ii) 279 #tile(imgs_fn(ii), "imgs_%06i.png"%ii)
273 tile(rbm.U.value.T, "U_%06i.png"%ii) 280 tile(rbm.U.value.T, "U_%06i.png" % ii)
274 tile(rbm.W.value.T, "W_%06i.png"%ii) 281 tile(rbm.W.value.T, "W_%06i.png" % ii)
275 282
276 print 'saving samples', ii, 'epoch', i_epoch, i_batch 283 print 'saving samples', ii, 'epoch', i_epoch, i_batch
277 284
278 print 'l2(U)', l2(rbm.U.get_value(borrow=True)), 285 print 'l2(U)', l2(rbm.U.get_value(borrow=True)),
279 print 'l2(W)', l2(rbm.W.get_value(borrow=True)), 286 print 'l2(W)', l2(rbm.W.get_value(borrow=True)),
281 try: 288 try:
282 print trainer.effective_l1_penalty.get_value(borrow=True) 289 print trainer.effective_l1_penalty.get_value(borrow=True)
283 except: 290 except:
284 print trainer.effective_l1_penalty 291 print trainer.effective_l1_penalty
285 292
286 print 'U min max', rbm.U.get_value(borrow=True).min(), rbm.U.get_value(borrow=True).max(), 293 print 'U min max', rbm.U.get_value(
287 print 'W min max', rbm.W.get_value(borrow=True).min(), rbm.W.get_value(borrow=True).max(), 294 borrow=True).min(), rbm.U.get_value(borrow=True).max(),
288 print 'a min max', rbm.a.get_value(borrow=True).min(), rbm.a.get_value(borrow=True).max(), 295 print 'W min max', rbm.W.get_value(
289 print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(), 296 borrow=True).min(), rbm.W.get_value(borrow=True).max(),
290 print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max() 297 print 'a min max', rbm.a.get_value(
298 borrow=True).min(), rbm.a.get_value(borrow=True).max(),
299 print 'b min max', rbm.b.get_value(
300 borrow=True).min(), rbm.b.get_value(borrow=True).max(),
301 print 'c min max', rbm.c.get_value(
302 borrow=True).min(), rbm.c.get_value(borrow=True).max()
291 303
292 print 'HMC step', smplr.stepsize.get_value(borrow=True), 304 print 'HMC step', smplr.stepsize.get_value(borrow=True),
293 print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True) 305 print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True)
294 print 'P min max', rbm.P.get_value(borrow=True).min(), rbm.P.get_value(borrow=True).max(), 306 print 'P min max', rbm.P.get_value(
307 borrow=True).min(), rbm.P.get_value(borrow=True).max(),
295 print 'P_lr', trainer.p_lr.get_value(borrow=True) 308 print 'P_lr', trainer.p_lr.get_value(borrow=True)
296 print '' 309 print ''
297 print 'Saving rbm...' 310 print 'Saving rbm...'
298 cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl'%ii, 'w'), -1) 311 cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl' % ii, 'w'), -1)
299 312
300 ii += 1 313 ii += 1
301
302 314
303 # extract convolutional features from the CIFAR10 data 315 # extract convolutional features from the CIFAR10 data
304 feat_filename = 'mcrbm_features.npy' 316 feat_filename = 'mcrbm_features.npy'
305 feat_filename = 'cifar10.features.46000.npy' 317 feat_filename = 'cifar10.features.46000.npy'
306 try: 318 try:
311 323
312 if load_features: 324 if load_features:
313 print 'Loading features from', feat_filename 325 print 'Loading features from', feat_filename
314 all_features = numpy.load(feat_filename, mmap_mode='r') 326 all_features = numpy.load(feat_filename, mmap_mode='r')
315 else: 327 else:
316 batchsize=100 328 batchsize = 100
317 feat_idx = tensor.lscalar() 329 feat_idx = tensor.lscalar()
318 feat_idx_range = feat_idx * batchsize + tensor.arange(batchsize) 330 feat_idx_range = feat_idx * batchsize + tensor.arange(batchsize)
319 train_batch_x, train_batch_y = pylearn.dataset_ops.cifar10.cifar10( 331 train_batch_x, train_batch_y = pylearn.dataset_ops.cifar10.cifar10(
320 feat_idx_range, 332 feat_idx_range,
321 split='all', 333 split='all',
322 dtype='uint8', 334 dtype='uint8',
323 rasterized=False, 335 rasterized=False,
324 color='rgb') 336 color='rgb')
325 337
326 WINDOW_SIZE=8 338 WINDOW_SIZE = 8
327 WINDOW_STRIDE=4 339 WINDOW_STRIDE = 4
328 340
329 # put these into shared vars because support for big matrix constants is bad, 341 # put these into shared vars because support for big matrix
330 # (comparing them is slow) 342 # constants is bad, (comparing them is slow)
331 pca_eigvecs = theano.shared(pca_dct['eig_vecs'].astype('float32')) 343 pca_eigvecs = theano.shared(pca_dct['eig_vecs'].astype('float32'))
332 pca_eigvals = theano.shared(pca_dct['eig_vals'].astype('float32')) 344 pca_eigvals = theano.shared(pca_dct['eig_vals'].astype('float32'))
333 pca_mean = theano.shared(pca_dct['mean'].astype('float32')) 345 pca_mean = theano.shared(pca_dct['mean'].astype('float32'))
334 346
335 def theano_pca_whiten(X): 347 def theano_pca_whiten(X):
336 #copying preprepcessing.pca.pca_whiten 348 #copying preprepcessing.pca.pca_whiten
337 return tensor.true_div( 349 return tensor.true_div(
338 tensor.dot(X-pca_mean, pca_eigvecs), 350 tensor.dot(X - pca_mean, pca_eigvecs),
339 tensor.sqrt(pca_eigvals)+1e-8) 351 tensor.sqrt(pca_eigvals) + 1e-8)
340 352
341 h_list = [] 353 h_list = []
342 g_list = [] 354 g_list = []
343 for r_offset in range(0, 32-WINDOW_SIZE+1, WINDOW_STRIDE): 355 for r_offset in range(0, 32 - WINDOW_SIZE + 1, WINDOW_STRIDE):
344 for c_offset in range(0, 32-WINDOW_SIZE+1, WINDOW_STRIDE): 356 for c_offset in range(0, 32 - WINDOW_SIZE + 1, WINDOW_STRIDE):
345 window = train_batch_x[:, r_offset:r_offset+WINDOW_SIZE, 357 window = train_batch_x[:, r_offset:r_offset + WINDOW_SIZE,
346 c_offset:c_offset+WINDOW_SIZE, :] 358 c_offset:c_offset + WINDOW_SIZE, :]
347 assert window.dtype=='uint8' 359 assert window.dtype == 'uint8'
348 360
349 #rasterize the patches 361 #rasterize the patches
350 raster_window = tensor.flatten(tensor.cast(window, 'float32'),2) 362 raster_window = tensor.flatten(tensor.cast(
363 window, 'float32'), 2)
351 364
352 #subtract off the mean of each image 365 #subtract off the mean of each image
353 raster_window = raster_window - raster_window.mean(axis=1).reshape((batchsize,1)) 366 raster_window = raster_window - raster_window.mean(axis=1).reshape((batchsize,1))
354 367
355 h,g = rbm.expected_h_g_given_v(theano_pca_whiten(raster_window)) 368 h, g = rbm.expected_h_g_given_v(
369 theano_pca_whiten(raster_window))
356 370
357 h_list.append(h) 371 h_list.append(h)
358 g_list.append(g) 372 g_list.append(g)
359 373
360 hg = tensor.concatenate(h_list + g_list, axis=1) 374 hg = tensor.concatenate(h_list + g_list, axis=1)
361 375
362 feat_fn = theano.function([feat_idx], hg) 376 feat_fn = theano.function([feat_idx], hg)
363 features = numpy.empty((60000, 11025), dtype='float32') 377 features = numpy.empty((60000, 11025), dtype='float32')
364 for i in xrange(60000//batchsize): 378 for i in xrange(60000 // batchsize):
365 if i % 100 == 0: 379 if i % 100 == 0:
366 print("feature batch %i"%i) 380 print("feature batch %i" % i)
367 features[i*batchsize:(i+1)*batchsize] = feat_fn(i) 381 features[i * batchsize:(i + 1) * batchsize] = feat_fn(i)
368 382
369 print("saving features to %s"%feat_filename) 383 print("saving features to %s" % feat_filename)
370 numpy.save(feat_filename, features) 384 numpy.save(feat_filename, features)
371 all_features = features 385 all_features = features
372 del features 386 del features
373 387
374
375 # CLASSIFY FEATURES 388 # CLASSIFY FEATURES
376
377 if 0: 389 if 0:
378 # nothing to load 390 # nothing to load
379 pass 391 pass
380 else: 392 else:
381 batchsize=100 393 batchsize = 100
382 394
383 if feat_filename.startswith('cifar'): 395 if feat_filename.startswith('cifar'):
384 learnrate = 0.002 396 learnrate = 0.002
385 l1_regularization = 0.004 397 l1_regularization = 0.004
386 anneal_epoch=100 398 anneal_epoch = 100
387 n_epochs = 500 399 n_epochs = 500
388 else: 400 else:
389 learnrate = 0.005 401 learnrate = 0.005
390 l1_regularization = 0.004 402 l1_regularization = 0.004
391 n_epochs = 100 403 n_epochs = 100
392 anneal_epoch=20 404 anneal_epoch = 20
393 405
394 x_i = tensor.matrix() 406 x_i = tensor.matrix()
395 y_i = tensor.ivector() 407 y_i = tensor.ivector()
396 lr = tensor.scalar() 408 lr = tensor.scalar()
397 #l1_regularization = float(sys.argv[1]) #1.e-3 409 #l1_regularization = float(sys.argv[1]) #1.e-3
398 #l2_regularization = float(sys.argv[2]) #1.e-3*0 410 #l2_regularization = float(sys.argv[2]) #1.e-3*0
399 411
400 feature_logreg = LogisticRegression.new(x_i, 412 feature_logreg = LogisticRegression.new(x_i,
401 n_in = 11025, n_out=10, 413 n_in=11025, n_out=10,
402 dtype=x_i.dtype) 414 dtype=x_i.dtype)
403 415
404 # marc'aurelio does this... 416 # marc'aurelio does this...
405 feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,10)*.02 417 feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,
418 10) * .02
406 419
407 traincost = feature_logreg.nll(y_i).sum() 420 traincost = feature_logreg.nll(y_i).sum()
408 traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization 421 traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization
409 #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization 422 #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization
410 train_logreg_fn = theano.function([x_i, y_i, lr], 423 train_logreg_fn = theano.function([x_i, y_i, lr],
411 [feature_logreg.nll(y_i).mean(), 424 [feature_logreg.nll(y_i).mean(),
412 feature_logreg.errors(y_i).mean()], 425 feature_logreg.errors(y_i).mean()],
413 updates=pylearn.gd.sgd.sgd_updates( 426 updates=pylearn.gd.sgd.sgd_updates(
414 params=feature_logreg.params, 427 params=feature_logreg.params,
415 grads=tensor.grad(traincost, feature_logreg.params), 428 grads=tensor.grad(traincost, feature_logreg.params),
416 stepsizes=[lr,lr/10.])) 429 stepsizes=[lr, lr / 10.]))
417 430
418 all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1] 431 all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1]
419 pylearn.dataset_ops.cifar10.all_data_labels.forget() # clear memo cache 432 pylearn.dataset_ops.cifar10.all_data_labels.forget()
420 assert len(all_labels)==60000 433 # clear memo cache
434 assert len(all_labels) == 60000
421 if 0: 435 if 0:
422 print "Using validation set" 436 print "Using validation set"
423 train_labels = all_labels[:40000] 437 train_labels = all_labels[:40000]
424 valid_labels = all_labels[40000:50000] 438 valid_labels = all_labels[40000:50000]
425 test_labels = all_labels[50000:60000] 439 test_labels = all_labels[50000:60000]
436 test_features = all_features[50000:60000] 450 test_features = all_features[50000:60000]
437 451
438 if 1: 452 if 1:
439 print "Computing mean and std.dev" 453 print "Computing mean and std.dev"
440 train_mean = train_features.mean(axis=0) 454 train_mean = train_features.mean(axis=0)
441 train_std = train_features.std(axis=0)+1e-4 455 train_std = train_features.std(axis=0) + 1e-4
442 preproc = lambda x: (x-train_mean)/(0.1+train_std) 456 preproc = lambda x: (x - train_mean) / (0.1 + train_std)
443 else: 457 else:
444 print "Not centering data" 458 print "Not centering data"
445 preproc = lambda x:x 459 preproc = lambda x: x
446 460
447 for epoch in xrange(n_epochs): 461 for epoch in xrange(n_epochs):
448 print 'epoch', epoch 462 print 'epoch', epoch
449 # validate 463 # validate
450 # Marc'Aurelio, you crazy!! 464 # Marc'Aurelio, you crazy!!
451 # the division by batchsize is done in the cost function 465 # the division by batchsize is done in the cost function
452 e_lr = learnrate / (batchsize*max(1.0, numpy.floor(max(1., epoch/float(anneal_epoch))-2))) 466 e_lr = learnrate / (batchsize * max(1.0, numpy.floor(
467 max(1., epoch / float(anneal_epoch)) - 2)))
453 468
454 if valid_features is not None: 469 if valid_features is not None:
455 l01s = [] 470 l01s = []
456 nlls = [] 471 nlls = []
457 for i in xrange(10000/batchsize): 472 for i in xrange(10000 / batchsize):
458 x_i = valid_features[i*batchsize:(i+1)*batchsize] 473 x_i = valid_features[i * batchsize:(i + 1) * batchsize]
459 y_i = valid_labels[i*batchsize:(i+1)*batchsize] 474 y_i = valid_labels[i * batchsize:(i + 1) * batchsize]
460 475
461 #lr=0.0 -> no learning, safe for validation set 476 #lr=0.0 -> no learning, safe for validation set
462 nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) 477 nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0)
463 nlls.append(nll) 478 nlls.append(nll)
464 l01s.append(l01) 479 l01s.append(l01)
466 481
467 # test 482 # test
468 483
469 l01s = [] 484 l01s = []
470 nlls = [] 485 nlls = []
471 for i in xrange(len(test_features)//batchsize): 486 for i in xrange(len(test_features) // batchsize):
472 x_i = test_features[i*batchsize:(i+1)*batchsize] 487 x_i = test_features[i * batchsize:(i + 1) * batchsize]
473 y_i = test_labels[i*batchsize:(i+1)*batchsize] 488 y_i = test_labels[i * batchsize:(i + 1) * batchsize]
474 489
475 #lr=0.0 -> no learning, safe for validation set 490 #lr=0.0 -> no learning, safe for validation set
476 nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) 491 nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0)
477 nlls.append(nll) 492 nlls.append(nll)
478 l01s.append(l01) 493 l01s.append(l01)
479 print 'test log_reg', numpy.mean(nlls), numpy.mean(l01s) 494 print 'test log_reg', numpy.mean(nlls), numpy.mean(l01s)
480 495
481 #train 496 #train
482 l01s = [] 497 l01s = []
483 nlls = [] 498 nlls = []
484 for i in xrange(len(train_features)//batchsize): 499 for i in xrange(len(train_features) // batchsize):
485 x_i = train_features[i*batchsize:(i+1)*batchsize] 500 x_i = train_features[i * batchsize:(i + 1) * batchsize]
486 y_i = train_labels[i*batchsize:(i+1)*batchsize] 501 y_i = train_labels[i * batchsize:(i + 1) * batchsize]
487 nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr) 502 nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr)
488 nlls.append(nll) 503 nlls.append(nll)
489 l01s.append(l01) 504 l01s.append(l01)
490 print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s) 505 print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s)
491
492
493 506
494 507
495 import pickle as cPickle 508 import pickle as cPickle
496 #import cPickle 509 #import cPickle
497 if __name__ == '__main__': 510 if __name__ == '__main__':
498 if 0: 511 if 0:
499 #learning 16 x 16 pinwheel filters from official cifar patches (MAR) 512 #learning 16 x 16 pinwheel filters from official cifar patches (MAR)
500 rbm,smplr = test_reproduce_ranzato_hinton_2010( 513 rbm, smplr = test_reproduce_ranzato_hinton_2010(
501 as_unittest=False, 514 as_unittest=False,
502 n_train_iters=5000, 515 n_train_iters=5000,
503 rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81), 516 rbm_alloc=lambda n_I: mcRBM_withP.alloc_topo_P(n_I, n_J=81),
504 trainer_alloc=mcRBMTrainer.alloc_for_P, 517 trainer_alloc=mcRBMTrainer.alloc_for_P,
505 dataset='MAR' 518 dataset='MAR'
506 ) 519 )
507 520
508 if 0: 521 if 0:
509 # pretraining settings 522 # pretraining settings
510 rbm,smplr = test_reproduce_ranzato_hinton_2010( 523 rbm, smplr = test_reproduce_ranzato_hinton_2010(
511 as_unittest=False, 524 as_unittest=False,
512 n_train_iters=60000, 525 n_train_iters=60000,
513 rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81), 526 rbm_alloc=lambda n_I: mcRBM_withP.alloc_topo_P(n_I, n_J=81),
514 trainer_alloc=mcRBMTrainer.alloc_for_P, 527 trainer_alloc=mcRBMTrainer.alloc_for_P,
515 lr_per_example=0.05, 528 lr_per_example=0.05,
516 dataset='tinyimages_patches', 529 dataset='tinyimages_patches',
517 l1_penalty=1e-3, 530 l1_penalty=1e-3,
518 l1_penalty_start=30000, 531 l1_penalty_start=30000,