# HG changeset patch # User Joseph Turian # Date 1215410086 14400 # Node ID a1bbcde6b45677872115c0e2e82681cd9ef9e7b0 # Parent 90a29489b5c87c87b61ccc2c7d14d80a5ddbb5c0 Moved sparse_random_autoassociator from my repository diff -r 90a29489b5c8 -r a1bbcde6b456 sparse_random_autoassociator/globals.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sparse_random_autoassociator/globals.py Mon Jul 07 01:54:46 2008 -0400 @@ -0,0 +1,12 @@ +""" +Global variables. +""" + +INPUT_DIMENSION = 20 +HIDDEN_DIMENSION = 5 +LEARNING_RATE = 0.1 +LR = LEARNING_RATE +SEED = 666 +ZERO_SAMPLE_SIZE = 5 +MARGIN = 0.1 +#MARGIN = 0.0 diff -r 90a29489b5c8 -r a1bbcde6b456 sparse_random_autoassociator/graph.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sparse_random_autoassociator/graph.py Mon Jul 07 01:54:46 2008 -0400 @@ -0,0 +1,40 @@ +""" +Theano graph for an autoassociator for sparse inputs, which will be trained +using Ronan Collobert + Jason Weston's sampling trick (2008). +@todo: Make nearly everything private. +""" + +from globals import MARGIN + +from pylearn.nnet_ops import sigmoid, crossentropy_softmax_1hot +from theano import tensor as t +from theano.tensor import dot +xnonzero = t.dvector() +w1nonzero = t.dmatrix() +b1 = t.dvector() +w2nonzero = t.dmatrix() +w2zero = t.dmatrix() +b2nonzero = t.dvector() +b2zero = t.dvector() +h = sigmoid(dot(xnonzero, w1nonzero) + b1) +ynonzero = sigmoid(dot(h, w2nonzero) + b2nonzero) +yzero = sigmoid(dot(h, w2zero) + b2zero) + +# May want to weight loss wrt nonzero value? e.g. MARGIN violation for +# 0.1 nonzero is not as bad as MARGIN violation for 0.2 nonzero. +def hingeloss(MARGIN): + return -MARGIN * (MARGIN < 0) +nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN) +zeroloss = hingeloss(-t.max(-(ynonzero)) - yzero - MARGIN) +# xnonzero sensitive loss: +#nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN - xnonzero) +#zeroloss = hingeloss(-t.max(-(ynonzero - xnonzero)) - yzero - MARGIN) +loss = t.sum(nonzeroloss) + t.sum(zeroloss) + +(gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = t.grad(loss, [w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero]) + +import theano.compile + +inputs = [xnonzero, w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero] +outputs = [ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero] +trainfn = theano.compile.function(inputs, outputs) diff -r 90a29489b5c8 -r a1bbcde6b456 sparse_random_autoassociator/main.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sparse_random_autoassociator/main.py Mon Jul 07 01:54:46 2008 -0400 @@ -0,0 +1,64 @@ +#!/usr/bin/python +""" + An autoassociator for sparse inputs, using Ronan Collobert + Jason + Weston's sampling trick (2008). + + The learned model is:: + h = sigmoid(dot(x, w1) + b1) + y = sigmoid(dot(h, w2) + b2) + + We assume that most of the inputs are zero, and hence that we can + separate x into xnonzero, x's nonzero components, and a xzero, + a sample of the zeros. (We randomly without replacement choose + ZERO_SAMPLE_SIZE zero columns.) + + The desideratum is that every nonzero entry is separated from every + zero entry by margin at least MARGIN. + For each ynonzero, we want it to exceed max(yzero) by at least MARGIN. + For each yzero, we want it to be exceed by min(ynonzero) by at least MARGIN. + The loss is a hinge loss (linear). The loss is irrespective of the + xnonzero magnitude (this may be a limitation). Hence, all nonzeroes + are equally important to exceed the maximum yzero. + + LIMITATIONS: + - Only does pure stochastic gradient (batchsize = 1). + - Loss is irrespective of the xnonzero magnitude. + - We will always use all nonzero entries, even if the training + instance is very non-sparse. + + @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an + endless loop. +""" + + +import numpy, random +import globals +random.seed(globals.SEED) + +nonzero_instances = [] +nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1}) +nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8}) +nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) + +import model +model = model.Model() + +for i in xrange(100000): + # Select an instance + instance = nonzero_instances[i % len(nonzero_instances)] + + # Get the nonzero indices + nonzero_indexes = instance.keys() + nonzero_indexes.sort() + + # Get the zero indices + # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop. + zero_indexes = [] + while len(zero_indexes) < globals.ZERO_SAMPLE_SIZE: + idx = random.randint(0, globals.INPUT_DIMENSION - 1) + if idx in nonzero_indexes or idx in zero_indexes: continue + zero_indexes.append(idx) + zero_indexes.sort() + + # SGD update over instance + model.update(instance, nonzero_indexes, zero_indexes) diff -r 90a29489b5c8 -r a1bbcde6b456 sparse_random_autoassociator/model.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sparse_random_autoassociator/model.py Mon Jul 07 01:54:46 2008 -0400 @@ -0,0 +1,37 @@ +""" +The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason +Weston's sampling trick (2008). +""" + +from graph import trainfn +import parameters +import numpy +from globals import LR + +class Model: + def __init__(self): + self.parameters = parameters.Parameters(randomly_initialize=True) + + def update(self, instance, nonzero_indexes, zero_indexes): + xnonzero = numpy.asarray([instance[idx] for idx in nonzero_indexes]) + print + print "xnonzero:", xnonzero + + (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indexes, :], self.parameters.b1, self.parameters.w2[:, nonzero_indexes], self.parameters.w2[:, zero_indexes], self.parameters.b2[nonzero_indexes], self.parameters.b2[zero_indexes]) + print "OLD ynonzero:", ynonzero + print "OLD yzero:", yzero + print "OLD total loss:", loss + + # SGD update + self.parameters.w1[nonzero_indexes, :] -= LR * gw1nonzero + self.parameters.b1 -= LR * gb1 + self.parameters.w2[:, nonzero_indexes] -= LR * gw2nonzero + self.parameters.w2[:, zero_indexes] -= LR * gw2zero + self.parameters.b2[nonzero_indexes] -= LR * gb2nonzero + self.parameters.b2[zero_indexes] -= LR * gb2zero + + # Recompute the loss, to make sure it's descreasing + (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indexes, :], self.parameters.b1, self.parameters.w2[:, nonzero_indexes], self.parameters.w2[:, zero_indexes], self.parameters.b2[nonzero_indexes], self.parameters.b2[zero_indexes]) + print "NEW ynonzero:", ynonzero + print "NEW yzero:", yzero + print "NEW total loss:", loss diff -r 90a29489b5c8 -r a1bbcde6b456 sparse_random_autoassociator/parameters.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sparse_random_autoassociator/parameters.py Mon Jul 07 01:54:46 2008 -0400 @@ -0,0 +1,28 @@ +""" +Parameters (weights) used by the L{Model}. +""" + +import numpy +import globals + +class Parameters: + """ + Parameters used by the L{Model}. + """ + def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED): + """ + Initialize L{Model} parameters. + @param randomly_initialize: If True, then randomly initialize + according to the given seed. If False, then just use zeroes. + """ + if randomly_initialize: + numpy.random.seed(seed) + self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension + self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension + self.b1 = numpy.zeros(hidden_dimension) + self.b2 = numpy.zeros(input_dimension) + else: + self.w1 = numpy.zeros((input_dimension, hidden_dimension)) + self.w2 = numpy.zeros((hidden_dimension, input_dimension)) + self.b1 = numpy.zeros(hidden_dimension) + self.b2 = numpy.zeros(input_dimension)