Mercurial > pylearn
comparison pylearn/algorithms/tests/test_mcRBM.py @ 1507:2a6a6f16416c
fix import.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Mon, 12 Sep 2011 11:45:41 -0400 |
parents | 7c51c0355d86 |
children | b28e8730c948 |
comparison
equal
deleted
inserted
replaced
1506:2f69c9932d9a | 1507:2a6a6f16416c |
---|---|
1 import sys | 1 import sys |
2 from pylearn.algorithms.mcRBM import * | 2 |
3 import pylearn.datasets.cifar10 | 3 import numpy |
4 import pylearn.dataset_ops.tinyimages | 4 import theano |
5 | 5 from theano import tensor |
6 | |
7 from pylearn.algorithms.mcRBM import mcRBM, mcRBMTrainer, mcRBM_withP, l2 | |
8 #import pylearn.datasets.cifar10 | |
6 import pylearn.dataset_ops.cifar10 | 9 import pylearn.dataset_ops.cifar10 |
7 from theano import tensor | |
8 from pylearn.shared.layers.logreg import LogisticRegression | 10 from pylearn.shared.layers.logreg import LogisticRegression |
11 from pylearn.io import image_tiling | |
12 import pylearn.dataset_ops.image_patches | |
9 | 13 |
10 | 14 |
11 def _default_rbm_alloc(n_I, n_K=256, n_J=100): | 15 def _default_rbm_alloc(n_I, n_K=256, n_J=100): |
12 return mcRBM.alloc(n_I, n_K, n_J) | 16 return mcRBM.alloc(n_I, n_K, n_J) |
13 | 17 |
67 _img = image_tiling.tile_raster_images(X, | 71 _img = image_tiling.tile_raster_images(X, |
68 img_shape=(R,C), | 72 img_shape=(R,C), |
69 min_dynamic_range=1e-2) | 73 min_dynamic_range=1e-2) |
70 image_tiling.save_tiled_raster_images(_img, fname) | 74 image_tiling.save_tiled_raster_images(_img, fname) |
71 | 75 |
72 batch_idx = TT.iscalar() | 76 batch_idx = tensor.iscalar() |
73 batch_range =batch_idx * batchsize + np.arange(batchsize) | 77 batch_range =batch_idx * batchsize + numpy.arange(batchsize) |
74 | 78 |
75 if dataset == 'MAR': | 79 if dataset == 'MAR': |
76 train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range) | 80 train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range) |
77 elif dataset == 'cifar10patches8x8': | 81 elif dataset == 'cifar10patches8x8': |
78 train_batch = pylearn.dataset_ops.cifar10.cifar10_patches( | 82 train_batch = pylearn.dataset_ops.cifar10.cifar10_patches( |
80 pca_components=n_vis) | 84 pca_components=n_vis) |
81 elif dataset == 'tinyimages_patches': | 85 elif dataset == 'tinyimages_patches': |
82 train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range) | 86 train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range) |
83 else: | 87 else: |
84 train_batch = pylearn.dataset_ops.image_patches.image_patches( | 88 train_batch = pylearn.dataset_ops.image_patches.image_patches( |
85 s_idx = (batch_idx * batchsize + np.arange(batchsize)), | 89 s_idx = (batch_idx * batchsize + numpy.arange(batchsize)), |
86 dims = (n_patches,R,C), | 90 dims = (n_patches,R,C), |
87 center=True, | 91 center=True, |
88 unitvar=True, | 92 unitvar=True, |
89 dtype=floatX, | 93 dtype=theano.config.floatX, |
90 rasterized=True) | 94 rasterized=True) |
91 | 95 |
92 if not as_unittest: | 96 if not as_unittest: |
93 imgs_fn = function([batch_idx], outputs=train_batch) | 97 imgs_fn = theano.function([batch_idx], outputs=train_batch) |
94 | 98 |
95 trainer = trainer_alloc( | 99 trainer = trainer_alloc( |
96 rbm_alloc(n_I=n_vis), | 100 rbm_alloc(n_I=n_vis), |
97 train_batch, | 101 train_batch, |
98 batchsize, | 102 batchsize, |
102 persistent_chains=persistent_chains) | 106 persistent_chains=persistent_chains) |
103 rbm=trainer.rbm | 107 rbm=trainer.rbm |
104 | 108 |
105 if persistent_chains: | 109 if persistent_chains: |
106 grads = trainer.contrastive_grads() | 110 grads = trainer.contrastive_grads() |
107 learn_fn = function([batch_idx], | 111 learn_fn = theano.function([batch_idx], |
108 outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)], | 112 outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)], |
109 updates=trainer.cd_updates()) | 113 updates=trainer.cd_updates()) |
110 else: | 114 else: |
111 learn_fn = function([batch_idx], outputs=[], updates=trainer.cd_updates()) | 115 learn_fn = theano.function([batch_idx], outputs=[], updates=trainer.cd_updates()) |
112 | 116 |
113 if persistent_chains: | 117 if persistent_chains: |
114 smplr = trainer.sampler | 118 smplr = trainer.sampler |
115 else: | 119 else: |
116 smplr = trainer._last_cd1_sampler | 120 smplr = trainer._last_cd1_sampler |
252 l1_penalty=1e-3, | 256 l1_penalty=1e-3, |
253 l1_penalty_start=sys.maxint, | 257 l1_penalty_start=sys.maxint, |
254 p_training_start=2*epoch_size//batchsize, | 258 p_training_start=2*epoch_size//batchsize, |
255 persistent_chains=False) | 259 persistent_chains=False) |
256 rbm=trainer.rbm | 260 rbm=trainer.rbm |
257 learn_fn = function([train_batch], outputs=[], updates=trainer.cd_updates()) | 261 learn_fn = theano.function([train_batch], outputs=[], updates=trainer.cd_updates()) |
258 smplr = trainer._last_cd1_sampler | 262 smplr = trainer._last_cd1_sampler |
259 | 263 |
260 ii = 0 | 264 ii = 0 |
261 for i_epoch in range(6): | 265 for i_epoch in range(6): |
262 for i_batch in xrange(epoch_size // batchsize): | 266 for i_batch in xrange(epoch_size // batchsize): |
321 WINDOW_SIZE=8 | 325 WINDOW_SIZE=8 |
322 WINDOW_STRIDE=4 | 326 WINDOW_STRIDE=4 |
323 | 327 |
324 # put these into shared vars because support for big matrix constants is bad, | 328 # put these into shared vars because support for big matrix constants is bad, |
325 # (comparing them is slow) | 329 # (comparing them is slow) |
326 pca_eigvecs = shared(pca_dct['eig_vecs'].astype('float32')) | 330 pca_eigvecs = theano.shared(pca_dct['eig_vecs'].astype('float32')) |
327 pca_eigvals = shared(pca_dct['eig_vals'].astype('float32')) | 331 pca_eigvals = theano.shared(pca_dct['eig_vals'].astype('float32')) |
328 pca_mean = shared(pca_dct['mean'].astype('float32')) | 332 pca_mean = theano.shared(pca_dct['mean'].astype('float32')) |
329 | 333 |
330 def theano_pca_whiten(X): | 334 def theano_pca_whiten(X): |
331 #copying preprepcessing.pca.pca_whiten | 335 #copying preprepcessing.pca.pca_whiten |
332 return tensor.true_div( | 336 return tensor.true_div( |
333 tensor.dot(X-pca_mean, pca_eigvecs), | 337 tensor.dot(X-pca_mean, pca_eigvecs), |
352 h_list.append(h) | 356 h_list.append(h) |
353 g_list.append(g) | 357 g_list.append(g) |
354 | 358 |
355 hg = tensor.concatenate(h_list + g_list, axis=1) | 359 hg = tensor.concatenate(h_list + g_list, axis=1) |
356 | 360 |
357 feat_fn = function([feat_idx], hg) | 361 feat_fn = theano.function([feat_idx], hg) |
358 features = numpy.empty((60000, 11025), dtype='float32') | 362 features = numpy.empty((60000, 11025), dtype='float32') |
359 for i in xrange(60000//batchsize): | 363 for i in xrange(60000//batchsize): |
360 if i % 100 == 0: | 364 if i % 100 == 0: |
361 print("feature batch %i"%i) | 365 print("feature batch %i"%i) |
362 features[i*batchsize:(i+1)*batchsize] = feat_fn(i) | 366 features[i*batchsize:(i+1)*batchsize] = feat_fn(i) |
400 feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,10)*.02 | 404 feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,10)*.02 |
401 | 405 |
402 traincost = feature_logreg.nll(y_i).sum() | 406 traincost = feature_logreg.nll(y_i).sum() |
403 traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization | 407 traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization |
404 #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization | 408 #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization |
405 train_logreg_fn = function([x_i, y_i, lr], | 409 train_logreg_fn = theano.function([x_i, y_i, lr], |
406 [feature_logreg.nll(y_i).mean(), | 410 [feature_logreg.nll(y_i).mean(), |
407 feature_logreg.errors(y_i).mean()], | 411 feature_logreg.errors(y_i).mean()], |
408 updates=pylearn.gd.sgd.sgd_updates( | 412 updates=pylearn.gd.sgd.sgd_updates( |
409 params=feature_logreg.params, | 413 params=feature_logreg.params, |
410 grads=tensor.grad(traincost, feature_logreg.params), | 414 grads=tensor.grad(traincost, feature_logreg.params), |