# HG changeset patch # User Joseph Turian # Date 1215496814 14400 # Node ID db28ff3fb8877b0288cb4adf7c053b78a0a29671 # Parent edec18614a70a98bcfdd28c8c528a93ce78ae4e0# Parent 74b402b5a81b91227e62c1f62994874ca3a75050 merge diff -r 74b402b5a81b -r db28ff3fb887 nnet_ops.py --- a/nnet_ops.py Mon Jul 07 12:27:06 2008 -0400 +++ b/nnet_ops.py Tue Jul 08 02:00:14 2008 -0400 @@ -380,3 +380,10 @@ b = tensor.zeros_like(x[0,:]) return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) +def binary_crossentropy(output, target): + """ + Compute the crossentropy of binary output wrt binary target. + @note: We do not sum, crossentropy is computed by component. + @todo: Rewrite as a scalar, and then broadcast to tensor. + """ + return -(target * tensor.log(output) + (1 - target) * tensor.log(1 - output)) diff -r 74b402b5a81b -r db28ff3fb887 sparse_random_autoassociator/globals.py --- a/sparse_random_autoassociator/globals.py Mon Jul 07 12:27:06 2008 -0400 +++ b/sparse_random_autoassociator/globals.py Tue Jul 08 02:00:14 2008 -0400 @@ -2,11 +2,12 @@ Global variables. """ -INPUT_DIMENSION = 20 -HIDDEN_DIMENSION = 5 +INPUT_DIMENSION = 1000 +HIDDEN_DIMENSION = 100 LEARNING_RATE = 0.1 LR = LEARNING_RATE SEED = 666 -ZERO_SAMPLE_SIZE = 5 -MARGIN = 0.1 +ZERO_SAMPLE_SIZE = 50 +#ZERO_SAMPLE_SIZE = 250 +MARGIN = 0.25 #MARGIN = 0.0 diff -r 74b402b5a81b -r db28ff3fb887 sparse_random_autoassociator/graph.py --- a/sparse_random_autoassociator/graph.py Mon Jul 07 12:27:06 2008 -0400 +++ b/sparse_random_autoassociator/graph.py Tue Jul 08 02:00:14 2008 -0400 @@ -6,7 +6,7 @@ from globals import MARGIN -from pylearn.nnet_ops import sigmoid, crossentropy_softmax_1hot +from pylearn.nnet_ops import sigmoid, binary_crossentropy from theano import tensor as t from theano.tensor import dot xnonzero = t.dvector() @@ -31,6 +31,8 @@ #zeroloss = hingeloss(-t.max(-(ynonzero - xnonzero)) - yzero - MARGIN) loss = t.sum(nonzeroloss) + t.sum(zeroloss) +#loss = t.sum(binary_crossentropy(ynonzero, xnonzero)) + t.sum(binary_crossentropy(yzero, t.constant(0))) + (gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = t.grad(loss, [w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero]) import theano.compile diff -r 74b402b5a81b -r db28ff3fb887 sparse_random_autoassociator/main.py --- a/sparse_random_autoassociator/main.py Mon Jul 07 12:27:06 2008 -0400 +++ b/sparse_random_autoassociator/main.py Tue Jul 08 02:00:14 2008 -0400 @@ -20,6 +20,8 @@ xnonzero magnitude (this may be a limitation). Hence, all nonzeroes are equally important to exceed the maximum yzero. + (Alternately, there is a commented out binary xent loss.) + LIMITATIONS: - Only does pure stochastic gradient (batchsize = 1). - Loss is irrespective of the xnonzero magnitude.