# HG changeset patch # User Frederic Bastien # Date 1351875738 14400 # Node ID 9d21919e2332aab103220bc25a4c85fbab9dcbd9 # Parent 0e0f9e8275a9739b0e6b7795282f8cb18ee4cfa9 autopep8 diff -r 0e0f9e8275a9 -r 9d21919e2332 pylearn/algorithms/tests/test_mcRBM.py --- a/pylearn/algorithms/tests/test_mcRBM.py Fri Nov 02 12:53:08 2012 -0400 +++ b/pylearn/algorithms/tests/test_mcRBM.py Fri Nov 02 13:02:18 2012 -0400 @@ -15,13 +15,17 @@ def _default_rbm_alloc(n_I, n_K=256, n_J=100): return mcRBM.alloc(n_I, n_K, n_J) + def _default_trainer_alloc(rbm, train_batch, batchsize, initial_lr_per_example, l1_penalty, l1_penalty_start, persistent_chains): - return mcRBMTrainer.alloc(rbm, train_batch, batchsize, l1_penalty=l1_penalty, - l1_penalty_start=l1_penalty_start,persistent_chains=persistent_chains) + return mcRBMTrainer.alloc(rbm, train_batch, batchsize, + l1_penalty=l1_penalty, + l1_penalty_start=l1_penalty_start, + persistent_chains=persistent_chains) -def test_reproduce_ranzato_hinton_2010(dataset='MAR', as_unittest=True, n_train_iters=5000, +def test_reproduce_ranzato_hinton_2010( + dataset='MAR', as_unittest=True, n_train_iters=5000, rbm_alloc=_default_rbm_alloc, trainer_alloc=_default_trainer_alloc, lr_per_example=.075, l1_penalty=1e-3, @@ -32,27 +36,27 @@ batchsize = 128 if dataset == 'MAR': - n_vis=105 - n_patches=10240 - epoch_size=n_patches - elif dataset=='cifar10patches8x8': - R,C= 8,8 # the size of image patches - n_vis=96 # pca components - epoch_size=batchsize*500 - n_patches=epoch_size*20 - elif dataset=='tinyimages_patches': - R,C=8,8 - n_vis=81 - epoch_size=batchsize*500 - n_patches=epoch_size*20 + n_vis = 105 + n_patches = 10240 + epoch_size = n_patches + elif dataset == 'cifar10patches8x8': + R, C = 8, 8 # the size of image patches + n_vis = 96 # pca components + epoch_size = batchsize * 500 + n_patches = epoch_size * 20 + elif dataset == 'tinyimages_patches': + R, C = 8, 8 + n_vis = 81 + epoch_size = batchsize * 500 + n_patches = epoch_size * 20 else: - R,C= 16,16 # the size of image patches - n_vis=R*C - n_patches=100000 - epoch_size=n_patches + R, C = 16, 16 # the size of image patches + n_vis = R * C + n_patches = 100000 + epoch_size = n_patches def l2(X): - return numpy.sqrt((X**2).sum()) + return numpy.sqrt((X ** 2).sum()) if dataset == 'MAR': tile = pylearn.dataset_ops.image_patches.save_filters_of_ranzato_hinton_2010 @@ -63,32 +67,32 @@ pylearn.dataset_ops.cifar10.random_cifar_patches_pca( n_vis, None, 'float32', n_patches, R, C,), X), - img_shape=(R,C)) + img_shape=(R, C)) image_tiling.save_tiled_raster_images(_img, fname) elif dataset == 'tinyimages_patches': tile = pylearn.dataset_ops.tinyimages.save_filters else: def tile(X, fname): _img = image_tiling.tile_raster_images(X, - img_shape=(R,C), + img_shape=(R, C), min_dynamic_range=1e-2) image_tiling.save_tiled_raster_images(_img, fname) batch_idx = tensor.iscalar() - batch_range =batch_idx * batchsize + numpy.arange(batchsize) + batch_range = batch_idx * batchsize + numpy.arange(batchsize) if dataset == 'MAR': train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range) elif dataset == 'cifar10patches8x8': train_batch = pylearn.dataset_ops.cifar10.cifar10_patches( - batch_range, 'train', n_patches=n_patches, patch_size=(R,C), + batch_range, 'train', n_patches=n_patches, patch_size=(R, C), pca_components=n_vis) elif dataset == 'tinyimages_patches': train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range) else: train_batch = pylearn.dataset_ops.image_patches.image_patches( - s_idx = (batch_idx * batchsize + numpy.arange(batchsize)), - dims = (n_patches,R,C), + s_idx=(batch_idx * batchsize + numpy.arange(batchsize)), + dims=(n_patches, R, C), center=True, unitvar=True, dtype=theano.config.floatX, @@ -105,7 +109,7 @@ l1_penalty=l1_penalty, l1_penalty_start=l1_penalty_start, persistent_chains=persistent_chains) - rbm=trainer.rbm + rbm = trainer.rbm if persistent_chains: grads = trainer.contrastive_grads() @@ -113,7 +117,8 @@ outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)], updates=trainer.cd_updates()) else: - learn_fn = theano.function([batch_idx], outputs=[], updates=trainer.cd_updates()) + learn_fn = theano.function([batch_idx], outputs=[], + updates=trainer.cd_updates()) if persistent_chains: smplr = trainer.sampler @@ -124,12 +129,12 @@ cPickle.dump( pylearn.dataset_ops.cifar10.random_cifar_patches_pca( n_vis, None, 'float32', n_patches, R, C,), - open('test_mcRBM.pca.pkl','w')) + open('test_mcRBM.pca.pkl', 'w')) print "Learning..." last_epoch = -1 for jj in xrange(n_train_iters): - epoch = jj*batchsize / epoch_size + epoch = jj * batchsize / epoch_size print_jj = epoch != last_epoch last_epoch = epoch @@ -137,36 +142,37 @@ if as_unittest and epoch == 5: U = rbm.U.get_value(borrow=True) W = rbm.W.get_value(borrow=True) - def allclose(a,b): - return numpy.allclose(a,b,rtol=1.01,atol=1e-3) + + def allclose(a, b): + return numpy.allclose(a, b, rtol=1.01, atol=1e-3) print "" print "--------------" - print "assert allclose(l2(U), %f)"%l2(U) - print "assert allclose(l2(W), %f)"%l2(W) - print "assert allclose(U.min(), %f)"%U.min() - print "assert allclose(U.max(), %f)"%U.max() - print "assert allclose(W.min(),%f)"%W.min() - print "assert allclose(W.max(), %f)"%W.max() + print "assert allclose(l2(U), %f)" % l2(U) + print "assert allclose(l2(W), %f)" % l2(W) + print "assert allclose(U.min(), %f)" % U.min() + print "assert allclose(U.max(), %f)" % U.max() + print "assert allclose(W.min(),%f)" % W.min() + print "assert allclose(W.max(), %f)" % W.max() print "--------------" assert allclose(l2(U), 21.351664) assert allclose(l2(W), 6.275828) assert allclose(U.min(), -1.176703) assert allclose(U.max(), 0.859802) - assert allclose(W.min(),-0.223128) - assert allclose(W.max(), 0.227558 ) + assert allclose(W.min(), -0.223128) + assert allclose(W.max(), 0.227558) break if print_jj: if not as_unittest: - tile(imgs_fn(jj), "imgs_%06i.png"%jj) + tile(imgs_fn(jj), "imgs_%06i.png" % jj) if persistent_chains: - tile(smplr.positions.value, "sample_%06i.png"%jj) - tile(rbm.U.value.T, "U_%06i.png"%jj) - tile(rbm.W.value.T, "W_%06i.png"%jj) + tile(smplr.positions.value, "sample_%06i.png" % jj) + tile(rbm.U.value.T, "U_%06i.png" % jj) + tile(rbm.W.value.T, "W_%06i.png" % jj) - print 'saving samples', jj, 'epoch', jj/(epoch_size/batchsize) + print 'saving samples', jj, 'epoch', jj / (epoch_size / batchsize) print 'l2(U)', l2(rbm.U.get_value(borrow=True)), print 'l2(W)', l2(rbm.W.get_value(borrow=True)), @@ -184,11 +190,10 @@ if persistent_chains: print 'parts min', smplr.positions.get_value(borrow=True).min(), - print 'max',smplr.positions.get_value(borrow=True).max(), + print 'max', smplr.positions.get_value(borrow=True).max(), print 'HMC step', smplr.stepsize.get_value(borrow=True), print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True) - l2_of_Ugrad = learn_fn(jj) if persistent_chains and print_jj: @@ -205,11 +210,11 @@ if jj % 2000 == 0: print '' print 'Saving rbm...' - cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl'%jj, 'w'), -1) + cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl' % jj, 'w'), -1) if persistent_chains: print 'Saving sampler...' - cPickle.dump(smplr, open('mcRBM.smplr.%06i.pkl'%jj, 'w'), -1) - + cPickle.dump(smplr, open( + 'mcRBM.smplr.%06i.pkl' % jj, 'w'), -1) if not as_unittest: return rbm, smplr @@ -217,22 +222,22 @@ def run_classif_experiment(checkpoint): - R,C=8,8 - n_vis=74 + R, C = 8, 8 + n_vis = 74 # PRETRAIN # # extract 1 million 8x8 patches from TinyImages # pre-process them the right way # find 74 dims of PCA # filter patches through PCA - whitened_patches, pca_dct = pylearn.dataset_ops.tinyimages.main(n_imgs=100000, - max_components=n_vis, seed=234) + whitened_patches, pca_dct = pylearn.dataset_ops.tinyimages.main( + n_imgs=100000, max_components=n_vis, seed=234) # # Set up mcRBM Trainer # Initialize P using topological 3x3 overlapping patches thing # start learning P matrix after 2 passes through dataset # - rbm_filename = 'mcRBM.rbm.%06i.pkl'%46000 + rbm_filename = 'mcRBM.rbm.%06i.pkl' % 46000 try: open(rbm_filename).close() load_mcrbm = True @@ -245,8 +250,8 @@ else: print "Training mcRBM" - batchsize=128 - epoch_size=len(whitened_patches) + batchsize = 128 + epoch_size = len(whitened_patches) tile = pylearn.dataset_ops.tinyimages.save_filters train_batch = theano.tensor.matrix() trainer = mcRBMTrainer.alloc_for_P( @@ -256,22 +261,24 @@ initial_lr_per_example=0.05, l1_penalty=1e-3, l1_penalty_start=sys.maxint, - p_training_start=2*epoch_size//batchsize, + p_training_start=2 * epoch_size // batchsize, persistent_chains=False) - rbm=trainer.rbm - learn_fn = theano.function([train_batch], outputs=[], updates=trainer.cd_updates()) + rbm = trainer.rbm + learn_fn = theano.function([train_batch], outputs=[], + updates=trainer.cd_updates()) smplr = trainer._last_cd1_sampler ii = 0 for i_epoch in range(6): for i_batch in xrange(epoch_size // batchsize): - batch_vals = whitened_patches[i_batch*batchsize:(i_batch+1)*batchsize] + batch_vals = whitened_patches[i_batch * + batchsize:(i_batch + 1) * batchsize] learn_fn(batch_vals) if (ii % 1000) == 0: #tile(imgs_fn(ii), "imgs_%06i.png"%ii) - tile(rbm.U.value.T, "U_%06i.png"%ii) - tile(rbm.W.value.T, "W_%06i.png"%ii) + tile(rbm.U.value.T, "U_%06i.png" % ii) + tile(rbm.W.value.T, "W_%06i.png" % ii) print 'saving samples', ii, 'epoch', i_epoch, i_batch @@ -283,23 +290,28 @@ except: print trainer.effective_l1_penalty - print 'U min max', rbm.U.get_value(borrow=True).min(), rbm.U.get_value(borrow=True).max(), - print 'W min max', rbm.W.get_value(borrow=True).min(), rbm.W.get_value(borrow=True).max(), - print 'a min max', rbm.a.get_value(borrow=True).min(), rbm.a.get_value(borrow=True).max(), - print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(), - print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max() + print 'U min max', rbm.U.get_value( + borrow=True).min(), rbm.U.get_value(borrow=True).max(), + print 'W min max', rbm.W.get_value( + borrow=True).min(), rbm.W.get_value(borrow=True).max(), + print 'a min max', rbm.a.get_value( + borrow=True).min(), rbm.a.get_value(borrow=True).max(), + print 'b min max', rbm.b.get_value( + borrow=True).min(), rbm.b.get_value(borrow=True).max(), + print 'c min max', rbm.c.get_value( + borrow=True).min(), rbm.c.get_value(borrow=True).max() print 'HMC step', smplr.stepsize.get_value(borrow=True), print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True) - print 'P min max', rbm.P.get_value(borrow=True).min(), rbm.P.get_value(borrow=True).max(), + print 'P min max', rbm.P.get_value( + borrow=True).min(), rbm.P.get_value(borrow=True).max(), print 'P_lr', trainer.p_lr.get_value(borrow=True) print '' print 'Saving rbm...' - cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl'%ii, 'w'), -1) + cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl' % ii, 'w'), -1) ii += 1 - # extract convolutional features from the CIFAR10 data feat_filename = 'mcrbm_features.npy' feat_filename = 'cifar10.features.46000.npy' @@ -313,7 +325,7 @@ print 'Loading features from', feat_filename all_features = numpy.load(feat_filename, mmap_mode='r') else: - batchsize=100 + batchsize = 100 feat_idx = tensor.lscalar() feat_idx_range = feat_idx * batchsize + tensor.arange(batchsize) train_batch_x, train_batch_y = pylearn.dataset_ops.cifar10.cifar10( @@ -323,36 +335,38 @@ rasterized=False, color='rgb') - WINDOW_SIZE=8 - WINDOW_STRIDE=4 + WINDOW_SIZE = 8 + WINDOW_STRIDE = 4 - # put these into shared vars because support for big matrix constants is bad, - # (comparing them is slow) + # put these into shared vars because support for big matrix + # constants is bad, (comparing them is slow) pca_eigvecs = theano.shared(pca_dct['eig_vecs'].astype('float32')) pca_eigvals = theano.shared(pca_dct['eig_vals'].astype('float32')) - pca_mean = theano.shared(pca_dct['mean'].astype('float32')) + pca_mean = theano.shared(pca_dct['mean'].astype('float32')) def theano_pca_whiten(X): #copying preprepcessing.pca.pca_whiten return tensor.true_div( - tensor.dot(X-pca_mean, pca_eigvecs), - tensor.sqrt(pca_eigvals)+1e-8) + tensor.dot(X - pca_mean, pca_eigvecs), + tensor.sqrt(pca_eigvals) + 1e-8) h_list = [] g_list = [] - for r_offset in range(0, 32-WINDOW_SIZE+1, WINDOW_STRIDE): - for c_offset in range(0, 32-WINDOW_SIZE+1, WINDOW_STRIDE): - window = train_batch_x[:, r_offset:r_offset+WINDOW_SIZE, - c_offset:c_offset+WINDOW_SIZE, :] - assert window.dtype=='uint8' + for r_offset in range(0, 32 - WINDOW_SIZE + 1, WINDOW_STRIDE): + for c_offset in range(0, 32 - WINDOW_SIZE + 1, WINDOW_STRIDE): + window = train_batch_x[:, r_offset:r_offset + WINDOW_SIZE, + c_offset:c_offset + WINDOW_SIZE, :] + assert window.dtype == 'uint8' #rasterize the patches - raster_window = tensor.flatten(tensor.cast(window, 'float32'),2) + raster_window = tensor.flatten(tensor.cast( + window, 'float32'), 2) #subtract off the mean of each image raster_window = raster_window - raster_window.mean(axis=1).reshape((batchsize,1)) - h,g = rbm.expected_h_g_given_v(theano_pca_whiten(raster_window)) + h, g = rbm.expected_h_g_given_v( + theano_pca_whiten(raster_window)) h_list.append(h) g_list.append(g) @@ -361,35 +375,33 @@ feat_fn = theano.function([feat_idx], hg) features = numpy.empty((60000, 11025), dtype='float32') - for i in xrange(60000//batchsize): + for i in xrange(60000 // batchsize): if i % 100 == 0: - print("feature batch %i"%i) - features[i*batchsize:(i+1)*batchsize] = feat_fn(i) + print("feature batch %i" % i) + features[i * batchsize:(i + 1) * batchsize] = feat_fn(i) - print("saving features to %s"%feat_filename) + print("saving features to %s" % feat_filename) numpy.save(feat_filename, features) all_features = features del features - # CLASSIFY FEATURES - if 0: # nothing to load pass else: - batchsize=100 + batchsize = 100 if feat_filename.startswith('cifar'): learnrate = 0.002 l1_regularization = 0.004 - anneal_epoch=100 + anneal_epoch = 100 n_epochs = 500 else: learnrate = 0.005 l1_regularization = 0.004 n_epochs = 100 - anneal_epoch=20 + anneal_epoch = 20 x_i = tensor.matrix() y_i = tensor.ivector() @@ -398,11 +410,12 @@ #l2_regularization = float(sys.argv[2]) #1.e-3*0 feature_logreg = LogisticRegression.new(x_i, - n_in = 11025, n_out=10, + n_in=11025, n_out=10, dtype=x_i.dtype) # marc'aurelio does this... - feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,10)*.02 + feature_logreg.w.value = numpy.random.RandomState(44).randn(11025, + 10) * .02 traincost = feature_logreg.nll(y_i).sum() traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization @@ -413,11 +426,12 @@ updates=pylearn.gd.sgd.sgd_updates( params=feature_logreg.params, grads=tensor.grad(traincost, feature_logreg.params), - stepsizes=[lr,lr/10.])) + stepsizes=[lr, lr / 10.])) all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1] - pylearn.dataset_ops.cifar10.all_data_labels.forget() # clear memo cache - assert len(all_labels)==60000 + pylearn.dataset_ops.cifar10.all_data_labels.forget() + # clear memo cache + assert len(all_labels) == 60000 if 0: print "Using validation set" train_labels = all_labels[:40000] @@ -438,25 +452,26 @@ if 1: print "Computing mean and std.dev" train_mean = train_features.mean(axis=0) - train_std = train_features.std(axis=0)+1e-4 - preproc = lambda x: (x-train_mean)/(0.1+train_std) + train_std = train_features.std(axis=0) + 1e-4 + preproc = lambda x: (x - train_mean) / (0.1 + train_std) else: print "Not centering data" - preproc = lambda x:x + preproc = lambda x: x for epoch in xrange(n_epochs): print 'epoch', epoch # validate # Marc'Aurelio, you crazy!! # the division by batchsize is done in the cost function - e_lr = learnrate / (batchsize*max(1.0, numpy.floor(max(1., epoch/float(anneal_epoch))-2))) + e_lr = learnrate / (batchsize * max(1.0, numpy.floor( + max(1., epoch / float(anneal_epoch)) - 2))) if valid_features is not None: l01s = [] nlls = [] - for i in xrange(10000/batchsize): - x_i = valid_features[i*batchsize:(i+1)*batchsize] - y_i = valid_labels[i*batchsize:(i+1)*batchsize] + for i in xrange(10000 / batchsize): + x_i = valid_features[i * batchsize:(i + 1) * batchsize] + y_i = valid_labels[i * batchsize:(i + 1) * batchsize] #lr=0.0 -> no learning, safe for validation set nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) @@ -468,9 +483,9 @@ l01s = [] nlls = [] - for i in xrange(len(test_features)//batchsize): - x_i = test_features[i*batchsize:(i+1)*batchsize] - y_i = test_labels[i*batchsize:(i+1)*batchsize] + for i in xrange(len(test_features) // batchsize): + x_i = test_features[i * batchsize:(i + 1) * batchsize] + y_i = test_labels[i * batchsize:(i + 1) * batchsize] #lr=0.0 -> no learning, safe for validation set nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) @@ -481,36 +496,34 @@ #train l01s = [] nlls = [] - for i in xrange(len(train_features)//batchsize): - x_i = train_features[i*batchsize:(i+1)*batchsize] - y_i = train_labels[i*batchsize:(i+1)*batchsize] + for i in xrange(len(train_features) // batchsize): + x_i = train_features[i * batchsize:(i + 1) * batchsize] + y_i = train_labels[i * batchsize:(i + 1) * batchsize] nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr) nlls.append(nll) l01s.append(l01) print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s) - - import pickle as cPickle #import cPickle if __name__ == '__main__': if 0: #learning 16 x 16 pinwheel filters from official cifar patches (MAR) - rbm,smplr = test_reproduce_ranzato_hinton_2010( + rbm, smplr = test_reproduce_ranzato_hinton_2010( as_unittest=False, n_train_iters=5000, - rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81), + rbm_alloc=lambda n_I: mcRBM_withP.alloc_topo_P(n_I, n_J=81), trainer_alloc=mcRBMTrainer.alloc_for_P, dataset='MAR' ) if 0: # pretraining settings - rbm,smplr = test_reproduce_ranzato_hinton_2010( + rbm, smplr = test_reproduce_ranzato_hinton_2010( as_unittest=False, n_train_iters=60000, - rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81), + rbm_alloc=lambda n_I: mcRBM_withP.alloc_topo_P(n_I, n_J=81), trainer_alloc=mcRBMTrainer.alloc_for_P, lr_per_example=0.05, dataset='tinyimages_patches',