Mercurial > pylearn

import sys

import numpy
import theano
from theano import tensor

from pylearn.algorithms.mcRBM import mcRBM, mcRBMTrainer, mcRBM_withP, l2
#import pylearn.datasets.cifar10
import pylearn.dataset_ops.cifar10
from pylearn.shared.layers.logreg import LogisticRegression
from pylearn.io import image_tiling
import pylearn.dataset_ops.image_patches


def _default_rbm_alloc(n_I, n_K=256, n_J=100):
    return mcRBM.alloc(n_I, n_K, n_J)

def _default_trainer_alloc(rbm, train_batch, batchsize, initial_lr_per_example,
                           l1_penalty, l1_penalty_start, persistent_chains):
    return mcRBMTrainer.alloc(rbm, train_batch, batchsize, l1_penalty=l1_penalty,
            l1_penalty_start=l1_penalty_start,persistent_chains=persistent_chains)


def test_reproduce_ranzato_hinton_2010(dataset='MAR', as_unittest=True, n_train_iters=5000,
        rbm_alloc=_default_rbm_alloc, trainer_alloc=_default_trainer_alloc,
        lr_per_example=.075,
        l1_penalty=1e-3,
        l1_penalty_start=1000,
        persistent_chains=True,
        ):

    batchsize = 128

    if dataset == 'MAR':
        n_vis=105
        n_patches=10240
        epoch_size=n_patches
    elif dataset=='cifar10patches8x8':
        R,C= 8,8 # the size of image patches
        n_vis=96 # pca components
        epoch_size=batchsize*500
        n_patches=epoch_size*20
    elif dataset=='tinyimages_patches':
        R,C=8,8
        n_vis=81
        epoch_size=batchsize*500
        n_patches=epoch_size*20
    else:
        R,C= 16,16 # the size of image patches
        n_vis=R*C
        n_patches=100000
        epoch_size=n_patches

    def l2(X):
        return numpy.sqrt((X**2).sum())

    if dataset == 'MAR':
        tile = pylearn.dataset_ops.image_patches.save_filters_of_ranzato_hinton_2010
    elif dataset == 'cifar10patches8x8':
        def tile(X, fname):
            _img = pylearn.datasets.cifar10.tile_rasterized_examples(
                    pylearn.preprocessing.pca.pca_whiten_inverse(
                        pylearn.dataset_ops.cifar10.random_cifar_patches_pca(
                            n_vis, None, 'float32', n_patches, R, C,),
                        X),
                    img_shape=(R,C))
            image_tiling.save_tiled_raster_images(_img, fname)
    elif dataset == 'tinyimages_patches':
        tile = pylearn.dataset_ops.tinyimages.save_filters
    else:
        def tile(X, fname):
            _img = image_tiling.tile_raster_images(X,
                    img_shape=(R,C),
                    min_dynamic_range=1e-2)
            image_tiling.save_tiled_raster_images(_img, fname)

    batch_idx = tensor.iscalar()
    batch_range =batch_idx * batchsize + numpy.arange(batchsize)

    if dataset == 'MAR':
        train_batch = pylearn.dataset_ops.image_patches.ranzato_hinton_2010_op(batch_range)
    elif dataset == 'cifar10patches8x8':
        train_batch = pylearn.dataset_ops.cifar10.cifar10_patches(
                batch_range, 'train', n_patches=n_patches, patch_size=(R,C),
                pca_components=n_vis)
    elif dataset == 'tinyimages_patches':
        train_batch = pylearn.dataset_ops.tinyimages.tinydataset_op(batch_range)
    else:
        train_batch = pylearn.dataset_ops.image_patches.image_patches(
                s_idx = (batch_idx * batchsize + numpy.arange(batchsize)),
                dims = (n_patches,R,C),
                center=True,
                unitvar=True,
                dtype=theano.config.floatX,
                rasterized=True)

    if not as_unittest:
        imgs_fn = theano.function([batch_idx], outputs=train_batch)

    trainer = trainer_alloc(
            rbm_alloc(n_I=n_vis),
            train_batch,
            batchsize,
            initial_lr_per_example=lr_per_example,
            l1_penalty=l1_penalty,
            l1_penalty_start=l1_penalty_start,
            persistent_chains=persistent_chains)
    rbm=trainer.rbm

    if persistent_chains:
        grads = trainer.contrastive_grads()
        learn_fn = theano.function([batch_idx],
                outputs=[grads[0].norm(2), grads[0].norm(2), grads[1].norm(2)],
                updates=trainer.cd_updates())
    else:
        learn_fn = theano.function([batch_idx], outputs=[], updates=trainer.cd_updates())

    if persistent_chains:
        smplr = trainer.sampler
    else:
        smplr = trainer._last_cd1_sampler

    if dataset == 'cifar10patches8x8':
        cPickle.dump(
                pylearn.dataset_ops.cifar10.random_cifar_patches_pca(
                    n_vis, None, 'float32', n_patches, R, C,),
                open('test_mcRBM.pca.pkl','w'))

    print "Learning..."
    last_epoch = -1
    for jj in xrange(n_train_iters):
        epoch = jj*batchsize / epoch_size

        print_jj = epoch != last_epoch
        last_epoch = epoch

        if as_unittest and epoch == 5:
            U = rbm.U.get_value(borrow=True)
            W = rbm.W.get_value(borrow=True)
            def allclose(a,b):
                return numpy.allclose(a,b,rtol=1.01,atol=1e-3)
            print ""
            print "--------------"
            print "assert allclose(l2(U), %f)"%l2(U)
            print "assert allclose(l2(W), %f)"%l2(W)
            print "assert allclose(U.min(), %f)"%U.min()
            print "assert allclose(U.max(), %f)"%U.max()
            print "assert allclose(W.min(),%f)"%W.min()
            print "assert allclose(W.max(), %f)"%W.max()
            print "--------------"

            assert allclose(l2(U), 21.351664)
            assert allclose(l2(W), 6.275828)
            assert allclose(U.min(), -1.176703)
            assert allclose(U.max(), 0.859802)
            assert allclose(W.min(),-0.223128)
            assert allclose(W.max(), 0.227558 )

            break

        if print_jj:
            if not as_unittest:
                tile(imgs_fn(jj), "imgs_%06i.png"%jj)
                if persistent_chains:
                    tile(smplr.positions.value, "sample_%06i.png"%jj)
                tile(rbm.U.value.T, "U_%06i.png"%jj)
                tile(rbm.W.value.T, "W_%06i.png"%jj)

            print 'saving samples', jj, 'epoch', jj/(epoch_size/batchsize)

            print 'l2(U)', l2(rbm.U.get_value(borrow=True)),
            print 'l2(W)', l2(rbm.W.get_value(borrow=True)),
            print 'l1_penalty',
            try:
                print trainer.effective_l1_penalty.get_value(borrow=True)
            except:
                print trainer.effective_l1_penalty

            print 'U min max', rbm.U.get_value(borrow=True).min(), rbm.U.get_value(borrow=True).max(),
            print 'W min max', rbm.W.get_value(borrow=True).min(), rbm.W.get_value(borrow=True).max(),
            print 'a min max', rbm.a.get_value(borrow=True).min(), rbm.a.get_value(borrow=True).max(),
            print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(),
            print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max()

            if persistent_chains:
                print 'parts min', smplr.positions.get_value(borrow=True).min(),
                print 'max',smplr.positions.get_value(borrow=True).max(),
            print 'HMC step', smplr.stepsize.get_value(borrow=True),
            print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True)


        l2_of_Ugrad = learn_fn(jj)

        if persistent_chains and print_jj:
            print 'l2(U_grad)', float(l2_of_Ugrad[0]),
            print 'l2(U_inc)', float(l2_of_Ugrad[1]),
            print 'l2(W_inc)', float(l2_of_Ugrad[2]),
            #print 'FE+', float(l2_of_Ugrad[2]),
            #print 'FE+[0]', float(l2_of_Ugrad[3]),
            #print 'FE+[1]', float(l2_of_Ugrad[4]),
            #print 'FE+[2]', float(l2_of_Ugrad[5]),
            #print 'FE+[3]', float(l2_of_Ugrad[6])

        if not as_unittest:
            if jj % 2000 == 0:
                print ''
                print 'Saving rbm...'
                cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl'%jj, 'w'), -1)
                if persistent_chains:
                    print 'Saving sampler...'
                    cPickle.dump(smplr, open('mcRBM.smplr.%06i.pkl'%jj, 'w'), -1)


    if not as_unittest:
        return rbm, smplr


def run_classif_experiment(checkpoint):

    R,C=8,8
    n_vis=74
    # PRETRAIN
    #
    # extract 1 million 8x8 patches from TinyImages
    # pre-process them the right way
    # find 74 dims of PCA
    # filter patches through PCA
    whitened_patches, pca_dct = pylearn.dataset_ops.tinyimages.main(n_imgs=100000,
            max_components=n_vis, seed=234)
    #
    # Set up mcRBM Trainer
    # Initialize P using topological 3x3 overlapping patches thing
    # start learning P matrix after 2 passes through dataset
    #
    rbm_filename = 'mcRBM.rbm.%06i.pkl'%46000
    try:
        open(rbm_filename).close()
        load_mcrbm = True
    except:
        load_mcrbm = False

    if load_mcrbm:
        print 'loading mcRBM from file', rbm_filename
        rbm = cPickle.load(open(rbm_filename))

    else:
        print "Training mcRBM"
        batchsize=128
        epoch_size=len(whitened_patches)
        tile = pylearn.dataset_ops.tinyimages.save_filters
        train_batch = theano.tensor.matrix()
        trainer = mcRBMTrainer.alloc_for_P(
                rbm=mcRBM_withP.alloc_topo_P(n_I=n_vis, n_J=81),
                visible_batch=train_batch,
                batchsize=batchsize,
                initial_lr_per_example=0.05,
                l1_penalty=1e-3,
                l1_penalty_start=sys.maxint,
                p_training_start=2*epoch_size//batchsize,
                persistent_chains=False)
        rbm=trainer.rbm
        learn_fn = theano.function([train_batch], outputs=[], updates=trainer.cd_updates())
        smplr = trainer._last_cd1_sampler

        ii = 0
        for i_epoch in range(6):
            for i_batch in xrange(epoch_size // batchsize):
                batch_vals = whitened_patches[i_batch*batchsize:(i_batch+1)*batchsize]
                learn_fn(batch_vals)

                if (ii % 1000) == 0:
                    #tile(imgs_fn(ii), "imgs_%06i.png"%ii)
                    tile(rbm.U.value.T, "U_%06i.png"%ii)
                    tile(rbm.W.value.T, "W_%06i.png"%ii)

                    print 'saving samples', ii, 'epoch', i_epoch, i_batch

                    print 'l2(U)', l2(rbm.U.get_value(borrow=True)),
                    print 'l2(W)', l2(rbm.W.get_value(borrow=True)),
                    print 'l1_penalty',
                    try:
                        print trainer.effective_l1_penalty.get_value(borrow=True)
                    except:
                        print trainer.effective_l1_penalty

                    print 'U min max', rbm.U.get_value(borrow=True).min(), rbm.U.get_value(borrow=True).max(),
                    print 'W min max', rbm.W.get_value(borrow=True).min(), rbm.W.get_value(borrow=True).max(),
                    print 'a min max', rbm.a.get_value(borrow=True).min(), rbm.a.get_value(borrow=True).max(),
                    print 'b min max', rbm.b.get_value(borrow=True).min(), rbm.b.get_value(borrow=True).max(),
                    print 'c min max', rbm.c.get_value(borrow=True).min(), rbm.c.get_value(borrow=True).max()

                    print 'HMC step', smplr.stepsize.get_value(borrow=True),
                    print 'arate', smplr.avg_acceptance_rate.get_value(borrow=True)
                    print 'P min max', rbm.P.get_value(borrow=True).min(), rbm.P.get_value(borrow=True).max(),
                    print 'P_lr', trainer.p_lr.get_value(borrow=True)
                    print ''
                    print 'Saving rbm...'
                    cPickle.dump(rbm, open('mcRBM.rbm.%06i.pkl'%ii, 'w'), -1)

                ii += 1


    # extract convolutional features from the CIFAR10 data
    feat_filename = 'mcrbm_features.npy'
    feat_filename = 'cifar10.features.46000.npy'
    try:
        open(feat_filename).close()
        load_features = True
    except:
        load_features = False

    if load_features:
        print 'Loading features from', feat_filename
        all_features = numpy.load(feat_filename, mmap_mode='r')
    else:
        batchsize=100
        feat_idx = tensor.lscalar()
        feat_idx_range = feat_idx * batchsize + tensor.arange(batchsize)
        train_batch_x, train_batch_y = pylearn.dataset_ops.cifar10.cifar10(
                feat_idx_range,
                split='all',
                dtype='uint8',
                rasterized=False,
                color='rgb')

        WINDOW_SIZE=8
        WINDOW_STRIDE=4

        # put these into shared vars because support for big matrix constants is bad,
        # (comparing them is slow)
        pca_eigvecs = theano.shared(pca_dct['eig_vecs'].astype('float32'))
        pca_eigvals = theano.shared(pca_dct['eig_vals'].astype('float32'))
        pca_mean    = theano.shared(pca_dct['mean'].astype('float32'))

        def theano_pca_whiten(X):
            #copying preprepcessing.pca.pca_whiten
            return tensor.true_div(
                tensor.dot(X-pca_mean, pca_eigvecs),
                tensor.sqrt(pca_eigvals)+1e-8)

        h_list = []
        g_list = []
        for r_offset in range(0, 32-WINDOW_SIZE+1, WINDOW_STRIDE):
            for c_offset in range(0, 32-WINDOW_SIZE+1, WINDOW_STRIDE):
                window = train_batch_x[:, r_offset:r_offset+WINDOW_SIZE,
                        c_offset:c_offset+WINDOW_SIZE, :]
                assert window.dtype=='uint8'

                #rasterize the patches
                raster_window = tensor.flatten(tensor.cast(window, 'float32'),2)

                #subtract off the mean of each image
                raster_window = raster_window - raster_window.mean(axis=1).reshape((batchsize,1))

                h,g = rbm.expected_h_g_given_v(theano_pca_whiten(raster_window))

                h_list.append(h)
                g_list.append(g)

        hg = tensor.concatenate(h_list + g_list, axis=1)

        feat_fn = theano.function([feat_idx], hg)
        features = numpy.empty((60000, 11025), dtype='float32')
        for i in xrange(60000//batchsize):
            if i % 100 == 0:
                print("feature batch %i"%i)
            features[i*batchsize:(i+1)*batchsize] = feat_fn(i)

        print("saving features to %s"%feat_filename)
        numpy.save(feat_filename, features)
        all_features = features
        del features


    # CLASSIFY FEATURES

    if 0:
        # nothing to load
        pass
    else:
        batchsize=100

        if feat_filename.startswith('cifar'):
            learnrate = 0.002
            l1_regularization = 0.004
            anneal_epoch=100
            n_epochs = 500
        else:
            learnrate = 0.005
            l1_regularization = 0.004
            n_epochs = 100
            anneal_epoch=20

        x_i = tensor.matrix()
        y_i = tensor.ivector()
        lr = tensor.scalar()
        #l1_regularization = float(sys.argv[1]) #1.e-3
        #l2_regularization = float(sys.argv[2]) #1.e-3*0

        feature_logreg = LogisticRegression.new(x_i,
                n_in = 11025, n_out=10,
                dtype=x_i.dtype)

        # marc'aurelio does this...
        feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,10)*.02

        traincost = feature_logreg.nll(y_i).sum()
        traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization
        #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization
        train_logreg_fn = theano.function([x_i, y_i, lr],
                [feature_logreg.nll(y_i).mean(),
                    feature_logreg.errors(y_i).mean()],
                updates=pylearn.gd.sgd.sgd_updates(
                    params=feature_logreg.params,
                    grads=tensor.grad(traincost, feature_logreg.params),
                    stepsizes=[lr,lr/10.]))

        all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1]
        pylearn.dataset_ops.cifar10.all_data_labels.forget() # clear memo cache
        assert len(all_labels)==60000
        if 0:
            print "Using validation set"
            train_labels = all_labels[:40000]
            valid_labels = all_labels[40000:50000]
            test_labels = all_labels[50000:60000]
            train_features = all_features[:40000]
            valid_features = all_features[40000:50000]
            test_features = all_features[50000:60000]
        else:
            print "NOT USING validation set"
            train_labels = all_labels[:50000]
            valid_labels = None
            test_labels = all_labels[50000:60000]
            train_features = all_features[:50000]
            valid_features = None
            test_features = all_features[50000:60000]

        if 1:
            print "Computing mean and std.dev"
            train_mean = train_features.mean(axis=0)
            train_std = train_features.std(axis=0)+1e-4
            preproc = lambda x: (x-train_mean)/(0.1+train_std)
        else:
            print "Not centering data"
            preproc = lambda x:x

        for epoch in xrange(n_epochs):
            print 'epoch', epoch
            # validate
            # Marc'Aurelio, you crazy!!
            # the division by batchsize is done in the cost function
            e_lr = learnrate / (batchsize*max(1.0, numpy.floor(max(1., epoch/float(anneal_epoch))-2)))

            if valid_features is not None:
                l01s = []
                nlls = []
                for i in xrange(10000/batchsize):
                    x_i = valid_features[i*batchsize:(i+1)*batchsize]
                    y_i = valid_labels[i*batchsize:(i+1)*batchsize]

                    #lr=0.0 -> no learning, safe for validation set
                    nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0)
                    nlls.append(nll)
                    l01s.append(l01)
                print 'validate log_reg', numpy.mean(nlls), numpy.mean(l01s)

            # test

            l01s = []
            nlls = []
            for i in xrange(len(test_features)//batchsize):
                x_i = test_features[i*batchsize:(i+1)*batchsize]
                y_i = test_labels[i*batchsize:(i+1)*batchsize]

                #lr=0.0 -> no learning, safe for validation set
                nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0)
                nlls.append(nll)
                l01s.append(l01)
            print 'test log_reg', numpy.mean(nlls), numpy.mean(l01s)

            #train
            l01s = []
            nlls = []
            for i in xrange(len(train_features)//batchsize):
                x_i = train_features[i*batchsize:(i+1)*batchsize]
                y_i = train_labels[i*batchsize:(i+1)*batchsize]
                nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr)
                nlls.append(nll)
                l01s.append(l01)
            print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s)


import pickle as cPickle
#import cPickle
if __name__ == '__main__':
    if 0:
        #learning 16 x 16 pinwheel filters from official cifar patches (MAR)
        rbm,smplr = test_reproduce_ranzato_hinton_2010(
                as_unittest=False,
                n_train_iters=5000,
                rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81),
                trainer_alloc=mcRBMTrainer.alloc_for_P,
                dataset='MAR'
                )

    if 0:
        # pretraining settings
        rbm,smplr = test_reproduce_ranzato_hinton_2010(
                as_unittest=False,
                n_train_iters=60000,
                rbm_alloc=lambda n_I : mcRBM_withP.alloc_topo_P(n_I, n_J=81),
                trainer_alloc=mcRBMTrainer.alloc_for_P,
                lr_per_example=0.05,
                dataset='tinyimages_patches',
                l1_penalty=1e-3,
                l1_penalty_start=30000,
                #l1_penalty_start=350, #DEBUG
                persistent_chains=False
                )

    if 1:
        def checkpoint():
            return checkpoint
        run_classif_experiment(checkpoint=checkpoint)
author	Frederic Bastien <nouiz@nouiz.org>
date	Mon, 12 Sep 2011 11:59:55 -0400
parents	b709f6b53b17
children	9d21919e2332