Mercurial > pylearn
changeset 1333:c7b2da4e2df6
modifs to test_mcRBM to reproduce mcRBM classif results
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Mon, 18 Oct 2010 14:58:39 -0400 |
parents | d6726417cf57 |
children | 6fd2610c1706 |
files | pylearn/algorithms/tests/test_mcRBM.py pylearn/dataset_ops/tinyimages.py |
diffstat | 2 files changed, 67 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/tests/test_mcRBM.py Sun Oct 10 14:51:02 2010 -0400 +++ b/pylearn/algorithms/tests/test_mcRBM.py Mon Oct 18 14:58:39 2010 -0400 @@ -296,8 +296,8 @@ # extract convolutional features from the CIFAR10 data + feat_filename = 'mcrbm_features.npy' feat_filename = 'cifar10.features.46000.npy' - feat_filename = 'mcrbm_features.npy' try: open(feat_filename).close() load_features = True @@ -373,69 +373,102 @@ # nothing to load pass else: - batchsize=10 + batchsize=100 + + if feat_filename.startswith('cifar'): + learnrate = 0.002 + l1_regularization = 0.004 + anneal_epoch=100 + n_epochs = 500 + else: + learnrate = 0.005 + l1_regularization = 0.004 + n_epochs = 100 + anneal_epoch=20 + x_i = tensor.matrix() y_i = tensor.ivector() lr = tensor.scalar() - l1_regularization = float(sys.argv[1]) #1.e-3 - l2_regularization = float(sys.argv[2]) #1.e-3*0 + #l1_regularization = float(sys.argv[1]) #1.e-3 + #l2_regularization = float(sys.argv[2]) #1.e-3*0 feature_logreg = LogisticRegression.new(x_i, n_in = 11025, n_out=10, dtype=x_i.dtype) + # marc'aurelio does this... + feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,10)*.02 + traincost = feature_logreg.nll(y_i).sum() traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization - traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization + #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization train_logreg_fn = function([x_i, y_i, lr], [feature_logreg.nll(y_i).mean(), feature_logreg.errors(y_i).mean()], updates=pylearn.gd.sgd.sgd_updates( params=feature_logreg.params, grads=tensor.grad(traincost, feature_logreg.params), - stepsizes=[lr,lr])) + stepsizes=[lr,lr/10.])) all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1] pylearn.dataset_ops.cifar10.all_data_labels.forget() # clear memo cache assert len(all_labels)==60000 - train_labels = all_labels[:40000] - valid_labels = all_labels[40000:50000] - test_labels = all_labels[50000:60000] - train_features = all_features[:40000] - valid_features = all_features[40000:50000] - test_features = all_features[50000:60000] + if 0: + print "Using validation set" + train_labels = all_labels[:40000] + valid_labels = all_labels[40000:50000] + test_labels = all_labels[50000:60000] + train_features = all_features[:40000] + valid_features = all_features[40000:50000] + test_features = all_features[50000:60000] + else: + print "NOT USING validation set" + train_labels = all_labels[:50000] + valid_labels = None + test_labels = all_labels[50000:60000] + train_features = all_features[:50000] + valid_features = None + test_features = all_features[50000:60000] - print "Computing mean and std.dev" - train_mean = train_features.mean(axis=0) - train_std = train_features.std(axis=0)+1e-4 + if 1: + print "Computing mean and std.dev" + train_mean = train_features.mean(axis=0) + train_std = train_features.std(axis=0)+1e-4 + preproc = lambda x: (x-train_mean)/(0.1+train_std) + else: + print "Not centering data" + preproc = lambda x:x - - for epoch in xrange(20): + for epoch in xrange(n_epochs): print 'epoch', epoch # validate + # Marc'Aurelio, you crazy!! + # the division by batchsize is done in the cost function + e_lr = learnrate / (batchsize*max(1.0, numpy.floor(max(1., epoch/float(anneal_epoch))-2))) - l01s = [] - nlls = [] - for i in xrange(10000/batchsize): - x_i = valid_features[i*batchsize:(i+1)*batchsize] - y_i = valid_labels[i*batchsize:(i+1)*batchsize] + if valid_features is not None: + l01s = [] + nlls = [] + for i in xrange(10000/batchsize): + x_i = valid_features[i*batchsize:(i+1)*batchsize] + y_i = valid_labels[i*batchsize:(i+1)*batchsize] - #lr=0.0 -> no learning, safe for validation set - nll, l01 = train_logreg_fn((x_i-train_mean)/train_std, y_i, 0.0) - nlls.append(nll) - l01s.append(l01) - print 'validate log_reg', numpy.mean(nlls), numpy.mean(l01s) + #lr=0.0 -> no learning, safe for validation set + nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) + nlls.append(nll) + l01s.append(l01) + print 'validate log_reg', numpy.mean(nlls), numpy.mean(l01s) # test l01s = [] nlls = [] - for i in xrange(10000/batchsize): + for i in xrange(len(test_features)//batchsize): x_i = test_features[i*batchsize:(i+1)*batchsize] y_i = test_labels[i*batchsize:(i+1)*batchsize] #lr=0.0 -> no learning, safe for validation set - nll, l01 = train_logreg_fn((x_i-train_mean)/train_std, y_i, 0.0) + nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) nlls.append(nll) l01s.append(l01) print 'test log_reg', numpy.mean(nlls), numpy.mean(l01s) @@ -443,10 +476,10 @@ #train l01s = [] nlls = [] - for i in xrange(40000/batchsize): + for i in xrange(len(train_features)//batchsize): x_i = train_features[i*batchsize:(i+1)*batchsize] y_i = train_labels[i*batchsize:(i+1)*batchsize] - nll, l01 = train_logreg_fn((x_i-train_mean)/train_std, y_i, 0.00003) + nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr) nlls.append(nll) l01s.append(l01) print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s)
--- a/pylearn/dataset_ops/tinyimages.py Sun Oct 10 14:51:02 2010 -0400 +++ b/pylearn/dataset_ops/tinyimages.py Mon Oct 18 14:58:39 2010 -0400 @@ -247,7 +247,7 @@ return x -def save_filters(X, fname): +def save_filters(X, fname, tile_shape=None): dct = load_pca_dct() eigs = dct['eig_vals'], dct['eig_vecs'] mean = dct['mean'] @@ -255,7 +255,8 @@ _img = image_tiling.tile_raster_images( (rasterized[:,::3], rasterized[:,1::3], rasterized[:,2::3], None), img_shape=(8,8), - min_dynamic_range=1e-6) + min_dynamic_range=1e-6, + tile_shape=tile_shape) image_tiling.save_tiled_raster_images(_img, fname) def glviewer(split='train'):