# HG changeset patch # User Joseph Turian # Date 1215557197 14400 # Node ID f2d112dc53be560fc31c232d9a395911bd632a77 # Parent 36baeb7125a49183b0710bc13025bc8a8faa1bb2# Parent b4015b07ab1749ab4a1ec8529d3ba59691667a3d merge diff -r b4015b07ab17 -r f2d112dc53be sandbox/README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/README.txt Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,1 @@ +Stuff in the sandbox may be very broken and/or in flux. diff -r b4015b07ab17 -r f2d112dc53be sandbox/simple_autoassociator/__init__.py diff -r b4015b07ab17 -r f2d112dc53be sandbox/simple_autoassociator/globals.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/simple_autoassociator/globals.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,12 @@ +""" +Global variables. +""" + +#INPUT_DIMENSION = 1000 +#INPUT_DIMENSION = 100 +INPUT_DIMENSION = 4 +HIDDEN_DIMENSION = 10 +#HIDDEN_DIMENSION = 4 +LEARNING_RATE = 0.1 +LR = LEARNING_RATE +SEED = 666 diff -r b4015b07ab17 -r f2d112dc53be sandbox/simple_autoassociator/graph.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/simple_autoassociator/graph.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,26 @@ +""" +Theano graph for a simple autoassociator. +@todo: Make nearly everything private. +""" + +from pylearn.nnet_ops import sigmoid, binary_crossentropy +from theano import tensor as t +from theano.tensor import dot +x = t.dvector() +w1 = t.dmatrix() +b1 = t.dvector() +w2 = t.dmatrix() +b2 = t.dvector() +h = sigmoid(dot(x, w1) + b1) +y = sigmoid(dot(h, w2) + b2) + +loss_unsummed = binary_crossentropy(y, x) +loss = t.sum(loss_unsummed) + +(gw1, gb1, gw2, gb2, gy) = t.grad(loss, [w1, b1, w2, b2, y]) + +import theano.compile + +inputs = [x, w1, b1, w2, b2] +outputs = [y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy] +trainfn = theano.compile.function(inputs, outputs) diff -r b4015b07ab17 -r f2d112dc53be sandbox/simple_autoassociator/main.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/simple_autoassociator/main.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,34 @@ +#!/usr/bin/python +""" + A simple autoassociator. + + The learned model is:: + h = sigmoid(dot(x, w1) + b1) + y = sigmoid(dot(h, w2) + b2) + + Binary xent loss. + + LIMITATIONS: + - Only does pure stochastic gradient (batchsize = 1). +""" + + +import numpy + +nonzero_instances = [] +nonzero_instances.append({0: 1, 1: 1}) +nonzero_instances.append({0: 1, 2: 1}) + +#nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1}) +#nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8}) +##nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) + +import model +model = model.Model() + +for i in xrange(100000): + # Select an instance + instance = nonzero_instances[i % len(nonzero_instances)] + + # SGD update over instance + model.update(instance) diff -r b4015b07ab17 -r f2d112dc53be sandbox/simple_autoassociator/model.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/simple_autoassociator/model.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,57 @@ +""" +The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason +Weston's sampling trick (2008). +""" + +from graph import trainfn +import parameters + +import globals +from globals import LR + +import numpy +import random +random.seed(globals.SEED) + +class Model: + def __init__(self): + self.parameters = parameters.Parameters(randomly_initialize=True) + + def update(self, instance): + """ + Update the L{Model} using one training instance. + @param instance: A dict from feature index to (non-zero) value. + @todo: Should assert that nonzero_indices and zero_indices + are correct (i.e. are truly nonzero/zero). + """ + x = numpy.zeros(globals.INPUT_DIMENSION) + for idx in instance.keys(): + x[idx] = instance[idx] + + (y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) + print + print "instance:", instance + print "x:", x + print "OLD y:", y + print "OLD loss (unsummed):", loss_unsummed + print "gy:", gy + print "OLD total loss:", loss + print "gw1:", gw1 + print "gb1:", gb1 + print "gw2:", gw2 + print "gb2:", gb2 + + # SGD update + self.parameters.w1 -= LR * gw1 + self.parameters.b1 -= LR * gb1 + self.parameters.w2 -= LR * gw2 + self.parameters.b2 -= LR * gb2 + + # Recompute the loss, to make sure it's descreasing + (y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) + print "NEW y:", y + print "NEW loss (unsummed):", loss_unsummed + print "gy:", gy + print "NEW total loss:", loss + print "h:", h + print self.parameters diff -r b4015b07ab17 -r f2d112dc53be sandbox/simple_autoassociator/parameters.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/simple_autoassociator/parameters.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,36 @@ +""" +Parameters (weights) used by the L{Model}. +""" + +import numpy +import globals + +class Parameters: + """ + Parameters used by the L{Model}. + """ + def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED): + """ + Initialize L{Model} parameters. + @param randomly_initialize: If True, then randomly initialize + according to the given seed. If False, then just use zeroes. + """ + if randomly_initialize: + numpy.random.seed(seed) + self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension + self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension + self.b1 = numpy.zeros(hidden_dimension) + self.b2 = numpy.zeros(input_dimension) + else: + self.w1 = numpy.zeros((input_dimension, hidden_dimension)) + self.w2 = numpy.zeros((hidden_dimension, input_dimension)) + self.b1 = numpy.zeros(hidden_dimension) + self.b2 = numpy.zeros(input_dimension) + + def __str__(self): + s = "" + s += "w1: %s\n" % self.w1 + s += "b1: %s\n" % self.b1 + s += "w2: %s\n" % self.w2 + s += "b2: %s\n" % self.b2 + return s diff -r b4015b07ab17 -r f2d112dc53be sandbox/sparse_random_autoassociator/__init__.py diff -r b4015b07ab17 -r f2d112dc53be sandbox/sparse_random_autoassociator/globals.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/sparse_random_autoassociator/globals.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,13 @@ +""" +Global variables. +""" + +INPUT_DIMENSION = 1000 +HIDDEN_DIMENSION = 20 +LEARNING_RATE = 0.1 +LR = LEARNING_RATE +SEED = 666 +ZERO_SAMPLE_SIZE = 50 +#ZERO_SAMPLE_SIZE = 250 +MARGIN = 0.25 +#MARGIN = 0.0 diff -r b4015b07ab17 -r f2d112dc53be sandbox/sparse_random_autoassociator/graph.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/sparse_random_autoassociator/graph.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,42 @@ +""" +Theano graph for an autoassociator for sparse inputs, which will be trained +using Ronan Collobert + Jason Weston's sampling trick (2008). +@todo: Make nearly everything private. +""" + +from globals import MARGIN + +from pylearn.nnet_ops import sigmoid, binary_crossentropy +from theano import tensor as t +from theano.tensor import dot +xnonzero = t.dvector() +w1nonzero = t.dmatrix() +b1 = t.dvector() +w2nonzero = t.dmatrix() +w2zero = t.dmatrix() +b2nonzero = t.dvector() +b2zero = t.dvector() +h = sigmoid(dot(xnonzero, w1nonzero) + b1) +ynonzero = sigmoid(dot(h, w2nonzero) + b2nonzero) +yzero = sigmoid(dot(h, w2zero) + b2zero) + +# May want to weight loss wrt nonzero value? e.g. MARGIN violation for +# 0.1 nonzero is not as bad as MARGIN violation for 0.2 nonzero. +def hingeloss(MARGIN): + return -MARGIN * (MARGIN < 0) +nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN) +zeroloss = hingeloss(-t.max(-(ynonzero)) - yzero - MARGIN) +# xnonzero sensitive loss: +#nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN - xnonzero) +#zeroloss = hingeloss(-t.max(-(ynonzero - xnonzero)) - yzero - MARGIN) +loss = t.sum(nonzeroloss) + t.sum(zeroloss) + +#loss = t.sum(binary_crossentropy(ynonzero, xnonzero)) + t.sum(binary_crossentropy(yzero, t.constant(0))) + +(gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = t.grad(loss, [w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero]) + +import theano.compile + +inputs = [xnonzero, w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero] +outputs = [ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero] +trainfn = theano.compile.function(inputs, outputs) diff -r b4015b07ab17 -r f2d112dc53be sandbox/sparse_random_autoassociator/main.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/sparse_random_autoassociator/main.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,48 @@ +#!/usr/bin/python +""" + An autoassociator for sparse inputs, using Ronan Collobert + Jason + Weston's sampling trick (2008). + + The learned model is:: + h = sigmoid(dot(x, w1) + b1) + y = sigmoid(dot(h, w2) + b2) + + We assume that most of the inputs are zero, and hence that + we can separate x into xnonzero, x's nonzero components, and + xzero, a sample of the zeros. We sample---randomly without + replacement---ZERO_SAMPLE_SIZE zero columns from x. + + The desideratum is that every nonzero entry is separated from every + zero entry by margin at least MARGIN. + For each ynonzero, we want it to exceed max(yzero) by at least MARGIN. + For each yzero, we want it to be exceed by min(ynonzero) by at least MARGIN. + The loss is a hinge loss (linear). The loss is irrespective of the + xnonzero magnitude (this may be a limitation). Hence, all nonzeroes + are equally important to exceed the maximum yzero. + + (Alternately, there is a commented out binary xent loss.) + + LIMITATIONS: + - Only does pure stochastic gradient (batchsize = 1). + - Loss is irrespective of the xnonzero magnitude. + - We will always use all nonzero entries, even if the training + instance is very non-sparse. +""" + + +import numpy + +nonzero_instances = [] +nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1}) +nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8}) +nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) + +import model +model = model.Model() + +for i in xrange(100000): + # Select an instance + instance = nonzero_instances[i % len(nonzero_instances)] + + # SGD update over instance + model.update(instance) diff -r b4015b07ab17 -r f2d112dc53be sandbox/sparse_random_autoassociator/model.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/sparse_random_autoassociator/model.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,76 @@ +""" +The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason +Weston's sampling trick (2008). +""" + +from graph import trainfn +import parameters + +import globals +from globals import LR + +import numpy +import random +random.seed(globals.SEED) + +def _select_indices(instance): + """ + Choose nonzero and zero indices (feature columns) of the instance. + We select B{all} nonzero indices. + We select L{globals.ZERO_SAMPLE_SIZE} zero indices randomly, + without replacement. + @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter + an endless loop. + @return: (nonzero_indices, zero_indices) + """ + # Get the nonzero indices + nonzero_indices = instance.keys() + nonzero_indices.sort() + + # Get the zero indices + # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop. + zero_indices = [] + while len(zero_indices) < globals.ZERO_SAMPLE_SIZE: + idx = random.randint(0, globals.INPUT_DIMENSION - 1) + if idx in nonzero_indices or idx in zero_indices: continue + zero_indices.append(idx) + zero_indices.sort() + + return (nonzero_indices, zero_indices) + +class Model: + def __init__(self): + self.parameters = parameters.Parameters(randomly_initialize=True) + + def update(self, instance): + """ + Update the L{Model} using one training instance. + @param instance: A dict from feature index to (non-zero) value. + @todo: Should assert that nonzero_indices and zero_indices + are correct (i.e. are truly nonzero/zero). + """ + (nonzero_indices, zero_indices) = _select_indices(instance) + # No update if there aren't any non-zeros. + if len(nonzero_indices) == 0: return + xnonzero = numpy.asarray([instance[idx] for idx in nonzero_indices]) + print + print "xnonzero:", xnonzero + + (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices]) + print "OLD ynonzero:", ynonzero + print "OLD yzero:", yzero + print "OLD total loss:", loss + + # SGD update + self.parameters.w1[nonzero_indices, :] -= LR * gw1nonzero + self.parameters.b1 -= LR * gb1 + self.parameters.w2[:, nonzero_indices] -= LR * gw2nonzero + self.parameters.w2[:, zero_indices] -= LR * gw2zero + self.parameters.b2[nonzero_indices] -= LR * gb2nonzero + self.parameters.b2[zero_indices] -= LR * gb2zero + + # Recompute the loss, to make sure it's descreasing + (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices]) + print "NEW ynonzero:", ynonzero + print "NEW yzero:", yzero + print "NEW total loss:", loss diff -r b4015b07ab17 -r f2d112dc53be sandbox/sparse_random_autoassociator/parameters.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/sparse_random_autoassociator/parameters.py Tue Jul 08 18:46:37 2008 -0400 @@ -0,0 +1,28 @@ +""" +Parameters (weights) used by the L{Model}. +""" + +import numpy +import globals + +class Parameters: + """ + Parameters used by the L{Model}. + """ + def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED): + """ + Initialize L{Model} parameters. + @param randomly_initialize: If True, then randomly initialize + according to the given seed. If False, then just use zeroes. + """ + if randomly_initialize: + numpy.random.seed(seed) + self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension + self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension + self.b1 = numpy.zeros(hidden_dimension) + self.b2 = numpy.zeros(input_dimension) + else: + self.w1 = numpy.zeros((input_dimension, hidden_dimension)) + self.w2 = numpy.zeros((hidden_dimension, input_dimension)) + self.b1 = numpy.zeros(hidden_dimension) + self.b2 = numpy.zeros(input_dimension) diff -r b4015b07ab17 -r f2d112dc53be simple_autoassociator/README.txt --- a/simple_autoassociator/README.txt Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -This may be buggy. -jpt diff -r b4015b07ab17 -r f2d112dc53be simple_autoassociator/__init__.py diff -r b4015b07ab17 -r f2d112dc53be simple_autoassociator/globals.py --- a/simple_autoassociator/globals.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -""" -Global variables. -""" - -#INPUT_DIMENSION = 1000 -#INPUT_DIMENSION = 100 -INPUT_DIMENSION = 10 -#HIDDEN_DIMENSION = 20 -HIDDEN_DIMENSION = 4 -LEARNING_RATE = 0.01 -LR = LEARNING_RATE -SEED = 666 diff -r b4015b07ab17 -r f2d112dc53be simple_autoassociator/graph.py --- a/simple_autoassociator/graph.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -""" -Theano graph for a simple autoassociator. -@todo: Make nearly everything private. -""" - -from pylearn.nnet_ops import sigmoid, binary_crossentropy -from theano import tensor as t -from theano.tensor import dot -x = t.dvector() -w1 = t.dmatrix() -b1 = t.dvector() -w2 = t.dmatrix() -b2 = t.dvector() -h = sigmoid(dot(x, w1) + b1) -y = sigmoid(dot(h, w2) + b2) - -loss_unsummed = binary_crossentropy(y, x) -loss = t.sum(loss_unsummed) - -(gw1, gb1, gw2, gb2) = t.grad(loss, [w1, b1, w2, b2]) - -import theano.compile - -inputs = [x, w1, b1, w2, b2] -outputs = [y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2] -trainfn = theano.compile.function(inputs, outputs) diff -r b4015b07ab17 -r f2d112dc53be simple_autoassociator/main.py --- a/simple_autoassociator/main.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#!/usr/bin/python -""" - A simple autoassociator. - - The learned model is:: - h = sigmoid(dot(x, w1) + b1) - y = sigmoid(dot(h, w2) + b2) - - Binary xent loss. - - LIMITATIONS: - - Only does pure stochastic gradient (batchsize = 1). -""" - - -import numpy - -nonzero_instances = [] -nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1}) -nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8}) -#nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) - -import model -model = model.Model() - -for i in xrange(100000): - # Select an instance - instance = nonzero_instances[i % len(nonzero_instances)] - - # SGD update over instance - model.update(instance) diff -r b4015b07ab17 -r f2d112dc53be simple_autoassociator/model.py --- a/simple_autoassociator/model.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -""" -The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason -Weston's sampling trick (2008). -""" - -from graph import trainfn -import parameters - -import globals -from globals import LR - -import numpy -import random -random.seed(globals.SEED) - -class Model: - def __init__(self): - self.parameters = parameters.Parameters(randomly_initialize=True) - - def update(self, instance): - """ - Update the L{Model} using one training instance. - @param instance: A dict from feature index to (non-zero) value. - @todo: Should assert that nonzero_indices and zero_indices - are correct (i.e. are truly nonzero/zero). - """ - x = numpy.zeros(globals.INPUT_DIMENSION) - for idx in instance.keys(): - x[idx] = instance[idx] - - (y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) - print - print "instance:", instance - print "x:", x - print "OLD y:", y - print "NEW loss (unsummed):", loss_unsummed - print "OLD total loss:", loss - print "gw1:", gw1 - print "gb1:", gb1 - print "gw2:", gw2 - print "gb2:", gb2 - - # SGD update - self.parameters.w1 -= LR * gw1 - self.parameters.b1 -= LR * gb1 - self.parameters.w2 -= LR * gw2 - self.parameters.b2 -= LR * gb2 - - # Recompute the loss, to make sure it's descreasing - (y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2) - print "NEW y:", y - print "NEW loss (unsummed):", loss_unsummed - print "NEW total loss:", loss - print h - print self.parameters diff -r b4015b07ab17 -r f2d112dc53be simple_autoassociator/parameters.py --- a/simple_autoassociator/parameters.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -""" -Parameters (weights) used by the L{Model}. -""" - -import numpy -import globals - -class Parameters: - """ - Parameters used by the L{Model}. - """ - def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED): - """ - Initialize L{Model} parameters. - @param randomly_initialize: If True, then randomly initialize - according to the given seed. If False, then just use zeroes. - """ - if randomly_initialize: - numpy.random.seed(seed) - self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension - self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension - self.b1 = numpy.zeros(hidden_dimension) - self.b2 = numpy.zeros(input_dimension) - else: - self.w1 = numpy.zeros((input_dimension, hidden_dimension)) - self.w2 = numpy.zeros((hidden_dimension, input_dimension)) - self.b1 = numpy.zeros(hidden_dimension) - self.b2 = numpy.zeros(input_dimension) - - def __str__(self): - s = "" - s += "w1: %s\n" % self.w1 - s += "b1: %s\n" % self.b1 - s += "w2: %s\n" % self.w2 - s += "b2: %s\n" % self.b2 - return s diff -r b4015b07ab17 -r f2d112dc53be sparse_random_autoassociator/README.txt --- a/sparse_random_autoassociator/README.txt Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -This may be buggy. -jpt diff -r b4015b07ab17 -r f2d112dc53be sparse_random_autoassociator/__init__.py diff -r b4015b07ab17 -r f2d112dc53be sparse_random_autoassociator/globals.py --- a/sparse_random_autoassociator/globals.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -""" -Global variables. -""" - -INPUT_DIMENSION = 1000 -HIDDEN_DIMENSION = 20 -LEARNING_RATE = 0.1 -LR = LEARNING_RATE -SEED = 666 -ZERO_SAMPLE_SIZE = 50 -#ZERO_SAMPLE_SIZE = 250 -MARGIN = 0.25 -#MARGIN = 0.0 diff -r b4015b07ab17 -r f2d112dc53be sparse_random_autoassociator/graph.py --- a/sparse_random_autoassociator/graph.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -""" -Theano graph for an autoassociator for sparse inputs, which will be trained -using Ronan Collobert + Jason Weston's sampling trick (2008). -@todo: Make nearly everything private. -""" - -from globals import MARGIN - -from pylearn.nnet_ops import sigmoid, binary_crossentropy -from theano import tensor as t -from theano.tensor import dot -xnonzero = t.dvector() -w1nonzero = t.dmatrix() -b1 = t.dvector() -w2nonzero = t.dmatrix() -w2zero = t.dmatrix() -b2nonzero = t.dvector() -b2zero = t.dvector() -h = sigmoid(dot(xnonzero, w1nonzero) + b1) -ynonzero = sigmoid(dot(h, w2nonzero) + b2nonzero) -yzero = sigmoid(dot(h, w2zero) + b2zero) - -# May want to weight loss wrt nonzero value? e.g. MARGIN violation for -# 0.1 nonzero is not as bad as MARGIN violation for 0.2 nonzero. -def hingeloss(MARGIN): - return -MARGIN * (MARGIN < 0) -nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN) -zeroloss = hingeloss(-t.max(-(ynonzero)) - yzero - MARGIN) -# xnonzero sensitive loss: -#nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN - xnonzero) -#zeroloss = hingeloss(-t.max(-(ynonzero - xnonzero)) - yzero - MARGIN) -loss = t.sum(nonzeroloss) + t.sum(zeroloss) - -#loss = t.sum(binary_crossentropy(ynonzero, xnonzero)) + t.sum(binary_crossentropy(yzero, t.constant(0))) - -(gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = t.grad(loss, [w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero]) - -import theano.compile - -inputs = [xnonzero, w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero] -outputs = [ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero] -trainfn = theano.compile.function(inputs, outputs) diff -r b4015b07ab17 -r f2d112dc53be sparse_random_autoassociator/main.py --- a/sparse_random_autoassociator/main.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -#!/usr/bin/python -""" - An autoassociator for sparse inputs, using Ronan Collobert + Jason - Weston's sampling trick (2008). - - The learned model is:: - h = sigmoid(dot(x, w1) + b1) - y = sigmoid(dot(h, w2) + b2) - - We assume that most of the inputs are zero, and hence that - we can separate x into xnonzero, x's nonzero components, and - xzero, a sample of the zeros. We sample---randomly without - replacement---ZERO_SAMPLE_SIZE zero columns from x. - - The desideratum is that every nonzero entry is separated from every - zero entry by margin at least MARGIN. - For each ynonzero, we want it to exceed max(yzero) by at least MARGIN. - For each yzero, we want it to be exceed by min(ynonzero) by at least MARGIN. - The loss is a hinge loss (linear). The loss is irrespective of the - xnonzero magnitude (this may be a limitation). Hence, all nonzeroes - are equally important to exceed the maximum yzero. - - (Alternately, there is a commented out binary xent loss.) - - LIMITATIONS: - - Only does pure stochastic gradient (batchsize = 1). - - Loss is irrespective of the xnonzero magnitude. - - We will always use all nonzero entries, even if the training - instance is very non-sparse. -""" - - -import numpy - -nonzero_instances = [] -nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1}) -nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8}) -nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) - -import model -model = model.Model() - -for i in xrange(100000): - # Select an instance - instance = nonzero_instances[i % len(nonzero_instances)] - - # SGD update over instance - model.update(instance) diff -r b4015b07ab17 -r f2d112dc53be sparse_random_autoassociator/model.py --- a/sparse_random_autoassociator/model.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ -""" -The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason -Weston's sampling trick (2008). -""" - -from graph import trainfn -import parameters - -import globals -from globals import LR - -import numpy -import random -random.seed(globals.SEED) - -def _select_indices(instance): - """ - Choose nonzero and zero indices (feature columns) of the instance. - We select B{all} nonzero indices. - We select L{globals.ZERO_SAMPLE_SIZE} zero indices randomly, - without replacement. - @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter - an endless loop. - @return: (nonzero_indices, zero_indices) - """ - # Get the nonzero indices - nonzero_indices = instance.keys() - nonzero_indices.sort() - - # Get the zero indices - # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop. - zero_indices = [] - while len(zero_indices) < globals.ZERO_SAMPLE_SIZE: - idx = random.randint(0, globals.INPUT_DIMENSION - 1) - if idx in nonzero_indices or idx in zero_indices: continue - zero_indices.append(idx) - zero_indices.sort() - - return (nonzero_indices, zero_indices) - -class Model: - def __init__(self): - self.parameters = parameters.Parameters(randomly_initialize=True) - - def update(self, instance): - """ - Update the L{Model} using one training instance. - @param instance: A dict from feature index to (non-zero) value. - @todo: Should assert that nonzero_indices and zero_indices - are correct (i.e. are truly nonzero/zero). - """ - (nonzero_indices, zero_indices) = _select_indices(instance) - # No update if there aren't any non-zeros. - if len(nonzero_indices) == 0: return - xnonzero = numpy.asarray([instance[idx] for idx in nonzero_indices]) - print - print "xnonzero:", xnonzero - - (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices]) - print "OLD ynonzero:", ynonzero - print "OLD yzero:", yzero - print "OLD total loss:", loss - - # SGD update - self.parameters.w1[nonzero_indices, :] -= LR * gw1nonzero - self.parameters.b1 -= LR * gb1 - self.parameters.w2[:, nonzero_indices] -= LR * gw2nonzero - self.parameters.w2[:, zero_indices] -= LR * gw2zero - self.parameters.b2[nonzero_indices] -= LR * gb2nonzero - self.parameters.b2[zero_indices] -= LR * gb2zero - - # Recompute the loss, to make sure it's descreasing - (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices]) - print "NEW ynonzero:", ynonzero - print "NEW yzero:", yzero - print "NEW total loss:", loss diff -r b4015b07ab17 -r f2d112dc53be sparse_random_autoassociator/parameters.py --- a/sparse_random_autoassociator/parameters.py Tue Jul 08 17:50:23 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -""" -Parameters (weights) used by the L{Model}. -""" - -import numpy -import globals - -class Parameters: - """ - Parameters used by the L{Model}. - """ - def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED): - """ - Initialize L{Model} parameters. - @param randomly_initialize: If True, then randomly initialize - according to the given seed. If False, then just use zeroes. - """ - if randomly_initialize: - numpy.random.seed(seed) - self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension - self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension - self.b1 = numpy.zeros(hidden_dimension) - self.b2 = numpy.zeros(input_dimension) - else: - self.w1 = numpy.zeros((input_dimension, hidden_dimension)) - self.w2 = numpy.zeros((hidden_dimension, input_dimension)) - self.b1 = numpy.zeros(hidden_dimension) - self.b2 = numpy.zeros(input_dimension)