Mercurial > pylearn

import sys

import numpy
import theano
from theano import tensor

from pylearn.algorithms.mcRBM import mcRBM, mcRBMTrainer, mcRBM_withP, l2
#import pylearn.datasets.cifar10
import pylearn.dataset_ops.cifar10
from pylearn.shared.layers.logreg import LogisticRegression
from pylearn.io import image_tiling
import pylearn.dataset_ops.image_patches


def _default_rbm_alloc(n_I, n_K=256, n_J=100):
    return mcRBM.alloc(n_I, n_K, n_J)


def _default_trainer_alloc(rbm, train_batch, batchsize, initial_lr_per_example,
                           l1_penalty, l1_penalty_start, persistent_chains):
    return mcRBMTrainer.alloc(rbm, train_batch, batchsize,
                              l1_penalty=l1_penalty,
                              l1_penalty_start=l1_penalty_start,
                              persistent_chains=persistent_chains)


def test_reproduce_ranzato_hinton_2010(
        dataset='MAR', as_unittest=True, n_train_iters=5000,
        rbm_alloc=_default_rbm_alloc, trainer_alloc=_default_trainer_alloc,
        lr_per_example=.075,
        l1_penalty=1e-3,
        l1_penalty_start=1000,
        persistent_chains=True,
        ):

    batchsize = 128

    if dataset == 'MAR':
        n_vis = 105
        n_patches = 10240
        epoch_size = n_patches
    elif dataset == 'cifar10patches8x8':
        R, C = 8, 8  # the size of image patches
        n_vis = 96  # pca components
        epoch_size = batchsize * 500
        n_patches = epoch_size * 20
    elif dataset == 'tinyimages_patches':
        R, C = 8, 8
        n_vis = 81
        epoch_size = batchsize * 500
        n_patches = epoch_size * 20
    else:
        R, C = 16, 16  # the size of image patches
        n_vis = R * C
        n_patches = 100000
        epoch_size = n_patches

    def l2(X):
        return numpy.sqrt((X ** 2).sum())

    if dataset == 'MAR':
        tile = pylearn.dataset_ops.image_patches.save_filters_of_ranzato_hinton_2010
    elif dataset == 'cifar10patches8x8':
        def tile(X, fname):
            _img = pylearn.datasets.cifar10.tile_rasterized_examples(
                    pylearn.preprocessing.pca.pca_whiten_inverse(
                        pylearn.dataset_ops.cifar10.random_cifar_patches_pca(
                            n_vis, None, 'float32', n_patches, R, C,),
                        X),
                    img_shape=(R, C))
            image_tiling.save_tiled_raster_images(_img, fname)
    elif dataset == 'tinyimages_patches':
        tile = pylearn.dataset_ops.tinyimages.save_filters
    else:
        def tile(X, fname):
            _img = image_tiling.tile_raster_images(X,
                    img_shape=(R, C),
                    min_dynamic_range=1e-2)
            image_tiling.save_tiled_raster_images(_img, fname)

    batch_idx = tensor.iscalar()
    batch_range = batch_idx * batchsize + numpy.arange(batchsize)

    if dataset == 'MAR':
        train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range)
    elif dataset == 'cifar10patches8x8':
        train_batch = pylearn.dataset_ops.cifar10.cifar10_patches(
                batch_range, 'train', n_patches=n_patches, patch_size=(R, C),
                pca_components=n_vis)
    elif dataset == 'tinyimages_patches':
        train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range)
    else:
        train_batch = pylearn.dataset_ops.image_patches.image_patches(
                s_idx=(batch_idx * batchsize + numpy.arange(batchsize)),
                dims=(n_patches, R, C),
                center=True,
                unitvar=True,
                dtype=theano.config.floatX,
                rasterized=True)

    if not as_unittest:
        imgs_fn = theano.function([batch_idx], outputs=train_batch)

    trainer = trainer_alloc(
            rbm_alloc(n_I=n_vis),
            train_batch,
            batchsize,
            initial_lr_per_example=lr_per_example,
            l1_penalty=l1_penalty,
            l1_penalty_start=l1_penalty_start,
            persistent_chains=persistent_chains)
    rbm = trainer.rbm

    if persistent_chains:
        grads = trainer.contrastive_grads()
        learn_fn = theano.function([batch_idx],
                outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)],
                updates=trainer.cd_updates())
    else:
        learn_fn = theano.function([batch_idx], outputs=[],
             updates=trainer.cd_updates())

    if persistent_chains:
        smplr = trainer.sampler
    else:
        smplr = trainer._last_cd1_sampler

    if dataset == 'cifar10patches8x8':
        cPickle.dump(
                pylearn.dataset_ops.cifar10.random_cifar_patches_pca(
                    n_vis, None, 'float32', n_patches, R, C,),
                open('test_mcRBM.pca.pkl', 'w'))

    print "Learning..."
    last_epoch = -1
    for jj in xrange(n_train_iters):
        epoch = jj * batchsize / epoch_size

        print_jj = epoch != last_epoch
        last_epoch = epoch

        if as_unittest and epoch == 5:
            U = rbm.U.get_value(borrow=True)
            W = rbm.W.get_value(borrow=True)

            def allclose(a, b):
                return numpy.allclose(a, b, rtol=1.01, atol=1e-3)
            print ""
            print "--------------"
            print "assert allclose(l2(U), %f)" % l2(U)
            print "assert allclose(l2(W), %f)" % l2(W)
            print "assert allclose(U.min(), %f)" % U.min()
            print "assert allclose(U.max(), %f)" % U.max()
            print "assert allclose(W.min(),%f)" % W.min()
            print "assert allclose(W.max(), %f)" % W.max()
            print "--------------"

            assert allclose(l2(U), 21.351664)
            assert allclose(l2(W), 6.275828)
            assert allclose(U.min(), -1.176703)
            assert allclose(U.max(), 0.859802)
            assert allclose(W.min(), -0.223128)
            assert allclose(W.max(), 0.227558)

            break

        if print_jj:
            if not as_unittest:
                tile(imgs_fn(jj), "imgs_%06i.png" % jj)
                if persistent_chains:
                    tile(smplr.positions.value, "sample_%06i.png" % jj)
                tile(rbm.U.value.T, "U_%06i.png" % jj)
                tile(rbm.W.value.T, "W_%06i.png" % jj)

            print 'saving samples', jj, 'epoch', jj / (epoch_size / batchsize)

            print 'l2(U)', l2(rbm.U.get_value(borrow=True)),
            print 'l2(W)', l2(rbm.W.get_value(borrow=True)),
            print 'l1_penalty',
            try:
                print trainer.effective_l1_penalty.get_value(borrow=True)
            except:
                print trainer.effective_l1_penalty

            print 'U min max', rbm.U.get_value(borrow=True).min(), rbm.U.get_value(borrow=True).max(),
            print 'W min max', rbm.W.get_value(borrow=True).min(), rbm.W.get_value(borrow=True).max(),
            print 'a min max', rbm.a.get_value(borrow=True).min(), rbm.a.get_value(borrow=True).max(),
            print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(),
            print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max()

            if persistent_chains:
                print 'parts min', smplr.positions.get_value(borrow=True).min(),
                print 'max', smplr.positions.get_value(borrow=True).max(),
            print 'HMC step', smplr.stepsize.get_value(borrow=True),
            print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True)

        l2_of_Ugrad = learn_fn(jj)

        if persistent_chains and print_jj:
            print 'l2(U_grad)', float(l2_of_Ugrad[0]),
            print 'l2(U_inc)', float(l2_of_Ugrad[1]),
            print 'l2(W_inc)', float(l2_of_Ugrad[2]),
            #print 'FE+', float(l2_of_Ugrad[2]),
            #print 'FE+[0]', float(l2_of_Ugrad[3]),
            #print 'FE+[1]', float(l2_of_Ugrad[4]),
            #print 'FE+[2]', float(l2_of_Ugrad[5]),
            #print 'FE+[3]', float(l2_of_Ugrad[6])

        if not as_unittest:
            if jj % 2000 == 0:
                print ''
                print 'Saving rbm...'
                cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl' % jj, 'w'), -1)
                if persistent_chains:
                    print 'Saving sampler...'
                    cPickle.dump(smplr, open(
                        'mcRBM.smplr.%06i.pkl' % jj, 'w'), -1)

    if not as_unittest:
        return rbm, smplr


def run_classif_experiment(checkpoint):

    R, C = 8, 8
    n_vis = 74
    # PRETRAIN
    #
    # extract 1 million 8x8 patches from TinyImages
    # pre-process them the right way
    # find 74 dims of PCA
    # filter patches through PCA
    whitened_patches, pca_dct = pylearn.dataset_ops.tinyimages.main(
        n_imgs=100000, max_components=n_vis, seed=234)
    #
    # Set up mcRBM Trainer
    # Initialize P using topological 3x3 overlapping patches thing
    # start learning P matrix after 2 passes through dataset
    #
    rbm_filename = 'mcRBM.rbm.%06i.pkl' % 46000
    try:
        open(rbm_filename).close()
        load_mcrbm = True
    except:
        load_mcrbm = False

    if load_mcrbm:
        print 'loading mcRBM from file', rbm_filename
        rbm = cPickle.load(open(rbm_filename))

    else:
        print "Training mcRBM"
        batchsize = 128
        epoch_size = len(whitened_patches)
        tile = pylearn.dataset_ops.tinyimages.save_filters
        train_batch = theano.tensor.matrix()
        trainer = mcRBMTrainer.alloc_for_P(
                rbm=mcRBM_withP.alloc_topo_P(n_I=n_vis, n_J=81),
                visible_batch=train_batch,
                batchsize=batchsize,
                initial_lr_per_example=0.05,
                l1_penalty=1e-3,
                l1_penalty_start=sys.maxint,
                p_training_start=2 * epoch_size // batchsize,
                persistent_chains=False)
        rbm = trainer.rbm
        learn_fn = theano.function([train_batch], outputs=[],
             updates=trainer.cd_updates())
        smplr = trainer._last_cd1_sampler

        ii = 0
        for i_epoch in range(6):
            for i_batch in xrange(epoch_size // batchsize):
                batch_vals = whitened_patches[i_batch *
                    batchsize:(i_batch + 1) * batchsize]
                learn_fn(batch_vals)

                if (ii % 1000) == 0:
                    #tile(imgs_fn(ii), "imgs_%06i.png"%ii)
                    tile(rbm.U.value.T, "U_%06i.png" % ii)
                    tile(rbm.W.value.T, "W_%06i.png" % ii)

                    print 'saving samples', ii, 'epoch', i_epoch, i_batch

                    print 'l2(U)', l2(rbm.U.get_value(borrow=True)),
                    print 'l2(W)', l2(rbm.W.get_value(borrow=True)),
                    print 'l1_penalty',
                    try:
                        print trainer.effective_l1_penalty.get_value(borrow=True)
                    except:
                        print trainer.effective_l1_penalty

                    print 'U min max', rbm.U.get_value(
                        borrow=True).min(), rbm.U.get_value(borrow=True).max(),
                    print 'W min max', rbm.W.get_value(
                        borrow=True).min(), rbm.W.get_value(borrow=True).max(),
                    print 'a min max', rbm.a.get_value(
                        borrow=True).min(), rbm.a.get_value(borrow=True).max(),
                    print 'b min max', rbm.b.get_value(
                        borrow=True).min(), rbm.b.get_value(borrow=True).max(),
                    print 'c min max', rbm.c.get_value(
                        borrow=True).min(), rbm.c.get_value(borrow=True).max()

                    print 'HMC step', smplr.stepsize.get_value(borrow=True),
                    print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True)
                    print 'P min max', rbm.P.get_value(
                        borrow=True).min(), rbm.P.get_value(borrow=True).max(),
                    print 'P_lr', trainer.p_lr.get_value(borrow=True)
                    print ''
                    print 'Saving rbm...'
                    cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl' % ii, 'w'), -1)

                ii += 1

    # extract convolutional features from the CIFAR10 data
    feat_filename = 'mcrbm_features.npy'
    feat_filename = 'cifar10.features.46000.npy'
    try:
        open(feat_filename).close()
        load_features = True
    except:
        load_features = False

    if load_features:
        print 'Loading features from', feat_filename
        all_features = numpy.load(feat_filename, mmap_mode='r')
    else:
        batchsize = 100
        feat_idx = tensor.lscalar()
        feat_idx_range = feat_idx * batchsize + tensor.arange(batchsize)
        train_batch_x, train_batch_y = pylearn.dataset_ops.cifar10.cifar10(
                feat_idx_range,
                split='all',
                dtype='uint8',
                rasterized=False,
                color='rgb')

        WINDOW_SIZE = 8
        WINDOW_STRIDE = 4

        # put these into shared vars because support for big matrix
        # constants is bad, (comparing them is slow)
        pca_eigvecs = theano.shared(pca_dct['eig_vecs'].astype('float32'))
        pca_eigvals = theano.shared(pca_dct['eig_vals'].astype('float32'))
        pca_mean = theano.shared(pca_dct['mean'].astype('float32'))

        def theano_pca_whiten(X):
            #copying preprepcessing.pca.pca_whiten
            return tensor.true_div(
                tensor.dot(X - pca_mean, pca_eigvecs),
                tensor.sqrt(pca_eigvals) + 1e-8)

        h_list = []
        g_list = []
        for r_offset in range(0, 32 - WINDOW_SIZE + 1, WINDOW_STRIDE):
            for c_offset in range(0, 32 - WINDOW_SIZE + 1, WINDOW_STRIDE):
                window = train_batch_x[:, r_offset:r_offset + WINDOW_SIZE,
                        c_offset:c_offset + WINDOW_SIZE, :]
                assert window.dtype == 'uint8'

                #rasterize the patches
                raster_window = tensor.flatten(tensor.cast(
                    window, 'float32'), 2)

                #subtract off the mean of each image
                raster_window = raster_window - raster_window.mean(axis=1).reshape((batchsize,1))

                h, g = rbm.expected_h_g_given_v(
                    theano_pca_whiten(raster_window))

                h_list.append(h)
                g_list.append(g)

        hg = tensor.concatenate(h_list + g_list, axis=1)

        feat_fn = theano.function([feat_idx], hg)
        features = numpy.empty((60000, 11025), dtype='float32')
        for i in xrange(60000 // batchsize):
            if i % 100 == 0:
                print("feature batch %i" % i)
            features[i * batchsize:(i + 1) * batchsize] = feat_fn(i)

        print("saving features to %s" % feat_filename)
        numpy.save(feat_filename, features)
        all_features = features
        del features

    # CLASSIFY FEATURES
    if 0:
        # nothing to load
        pass
    else:
        batchsize = 100

        if feat_filename.startswith('cifar'):
            learnrate = 0.002
            l1_regularization = 0.004
            anneal_epoch = 100
            n_epochs = 500
        else:
            learnrate = 0.005
            l1_regularization = 0.004
            n_epochs = 100
            anneal_epoch = 20

        x_i = tensor.matrix()
        y_i = tensor.ivector()
        lr = tensor.scalar()
        #l1_regularization = float(sys.argv[1]) #1.e-3
        #l2_regularization = float(sys.argv[2]) #1.e-3*0

        feature_logreg = LogisticRegression.new(x_i,
                n_in=11025, n_out=10,
                dtype=x_i.dtype)

        # marc'aurelio does this...
        feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,
                                                                    10) * .02

        traincost = feature_logreg.nll(y_i).sum()
        traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization
        #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization
        train_logreg_fn = theano.function([x_i, y_i, lr],
                [feature_logreg.nll(y_i).mean(),
                    feature_logreg.errors(y_i).mean()],
                updates=pylearn.gd.sgd.sgd_updates(
                    params=feature_logreg.params,
                    grads=tensor.grad(traincost, feature_logreg.params),
                    stepsizes=[lr, lr / 10.]))

        all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1]
        pylearn.dataset_ops.cifar10.all_data_labels.forget()
              # clear memo cache
        assert len(all_labels) == 60000
        if 0:
            print "Using validation set"
            train_labels = all_labels[:40000]
            valid_labels = all_labels[40000:50000]
            test_labels = all_labels[50000:60000]
            train_features = all_features[:40000]
            valid_features = all_features[40000:50000]
            test_features = all_features[50000:60000]
        else:
            print "NOT USING validation set"
            train_labels = all_labels[:50000]
            valid_labels = None
            test_labels = all_labels[50000:60000]
            train_features = all_features[:50000]
            valid_features = None
            test_features = all_features[50000:60000]

        if 1:
            print "Computing mean and std.dev"
            train_mean = train_features.mean(axis=0)
            train_std = train_features.std(axis=0) + 1e-4
            preproc = lambda x: (x - train_mean) / (0.1 + train_std)
        else:
            print "Not centering data"
            preproc = lambda x: x

        for epoch in xrange(n_epochs):
            print 'epoch', epoch
            # validate
            # Marc'Aurelio, you crazy!!
            # the division by batchsize is done in the cost function
            e_lr = learnrate / (batchsize * max(1.0, numpy.floor(
                max(1., epoch / float(anneal_epoch)) - 2)))

            if valid_features is not None:
                l01s = []
                nlls = []
                for i in xrange(10000 / batchsize):
                    x_i = valid_features[i * batchsize:(i + 1) * batchsize]
                    y_i = valid_labels[i * batchsize:(i + 1) * batchsize]

                    #lr=0.0 -> no learning, safe for validation set
                    nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0)
                    nlls.append(nll)
                    l01s.append(l01)
                print 'validate log_reg', numpy.mean(nlls), numpy.mean(l01s)

            # test

            l01s = []
            nlls = []
            for i in xrange(len(test_features) // batchsize):
                x_i = test_features[i * batchsize:(i + 1) * batchsize]
                y_i = test_labels[i * batchsize:(i + 1) * batchsize]

                #lr=0.0 -> no learning, safe for validation set
                nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0)
                nlls.append(nll)
                l01s.append(l01)
            print 'test log_reg', numpy.mean(nlls), numpy.mean(l01s)

            #train
            l01s = []
            nlls = []
            for i in xrange(len(train_features) // batchsize):
                x_i = train_features[i * batchsize:(i + 1) * batchsize]
                y_i = train_labels[i * batchsize:(i + 1) * batchsize]
                nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr)
                nlls.append(nll)
                l01s.append(l01)
            print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s)


import pickle as cPickle
#import cPickle
if __name__ == '__main__':
    if 0:
        #learning 16 x 16 pinwheel filters from official cifar patches (MAR)
        rbm, smplr = test_reproduce_ranzato_hinton_2010(
                as_unittest=False,
                n_train_iters=5000,
                rbm_alloc=lambda n_I: mcRBM_withP.alloc_topo_P(n_I, n_J=81),
                trainer_alloc=mcRBMTrainer.alloc_for_P,
                dataset='MAR'
                )

    if 0:
        # pretraining settings
        rbm, smplr = test_reproduce_ranzato_hinton_2010(
                as_unittest=False,
                n_train_iters=60000,
                rbm_alloc=lambda n_I: mcRBM_withP.alloc_topo_P(n_I, n_J=81),
                trainer_alloc=mcRBMTrainer.alloc_for_P,
                lr_per_example=0.05,
                dataset='tinyimages_patches',
                l1_penalty=1e-3,
                l1_penalty_start=30000,
                #l1_penalty_start=350, #DEBUG
                persistent_chains=False
                )

    if 1:
        def checkpoint():
            return checkpoint
        run_classif_experiment(checkpoint=checkpoint)
author	Frederic Bastien <nouiz@nouiz.org>
date	Fri, 02 Nov 2012 13:02:18 -0400
parents	7f166d01bf8e
children	9c24a2bdbe90