# HG changeset patch
# User Yoshua Bengio <bengioy@iro.umontreal.ca>
# Date 1215439777 14400
# Node ID 67c33926087551bbb19b999861a547f28db9ad8b
# Parent  c9a89be5cb0abf3d1a9a35b5a6b969f76195f48a# Parent  42cc94cf6c122490c7c7c82c20c8be15e0a8a1ab
merge

diff -r c9a89be5cb0a -r 67c339260875 sparse_random_autoassociator/globals.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sparse_random_autoassociator/globals.py	Mon Jul 07 10:09:37 2008 -0400
@@ -0,0 +1,12 @@
+"""
+Global variables.
+"""
+
+INPUT_DIMENSION = 20
+HIDDEN_DIMENSION = 5
+LEARNING_RATE = 0.1
+LR = LEARNING_RATE
+SEED = 666
+ZERO_SAMPLE_SIZE = 5
+MARGIN = 0.1
+#MARGIN = 0.0
diff -r c9a89be5cb0a -r 67c339260875 sparse_random_autoassociator/graph.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sparse_random_autoassociator/graph.py	Mon Jul 07 10:09:37 2008 -0400
@@ -0,0 +1,40 @@
+"""
+Theano graph for an autoassociator for sparse inputs, which will be trained
+using Ronan Collobert + Jason Weston's sampling trick (2008).
+@todo: Make nearly everything private.
+"""
+
+from globals import MARGIN
+
+from pylearn.nnet_ops import sigmoid, crossentropy_softmax_1hot
+from theano import tensor as t
+from theano.tensor import dot
+xnonzero    = t.dvector()
+w1nonzero   = t.dmatrix()
+b1          = t.dvector()
+w2nonzero   = t.dmatrix()
+w2zero      = t.dmatrix()
+b2nonzero   = t.dvector()
+b2zero      = t.dvector()
+h           = sigmoid(dot(xnonzero, w1nonzero) + b1)
+ynonzero    = sigmoid(dot(h, w2nonzero) + b2nonzero)
+yzero       = sigmoid(dot(h, w2zero) + b2zero)
+
+# May want to weight loss wrt nonzero value? e.g. MARGIN violation for
+# 0.1 nonzero is not as bad as MARGIN violation for 0.2 nonzero.
+def hingeloss(MARGIN):
+    return -MARGIN * (MARGIN < 0)
+nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN)
+zeroloss = hingeloss(-t.max(-(ynonzero)) - yzero - MARGIN)
+# xnonzero sensitive loss:
+#nonzeroloss = hingeloss(ynonzero - t.max(yzero) - MARGIN - xnonzero)
+#zeroloss = hingeloss(-t.max(-(ynonzero - xnonzero)) - yzero - MARGIN)
+loss = t.sum(nonzeroloss) + t.sum(zeroloss)
+
+(gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = t.grad(loss, [w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero])
+
+import theano.compile
+
+inputs  = [xnonzero, w1nonzero, b1, w2nonzero, w2zero, b2nonzero, b2zero]
+outputs = [ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero]
+trainfn = theano.compile.function(inputs, outputs)
diff -r c9a89be5cb0a -r 67c339260875 sparse_random_autoassociator/main.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sparse_random_autoassociator/main.py	Mon Jul 07 10:09:37 2008 -0400
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+"""
+    An autoassociator for sparse inputs, using Ronan Collobert + Jason
+    Weston's sampling trick (2008).
+
+    The learned model is::
+       h   = sigmoid(dot(x, w1) + b1)
+       y   = sigmoid(dot(h, w2) + b2)
+
+    We assume that most of the inputs are zero, and hence that
+    we can separate x into xnonzero, x's nonzero components, and
+    xzero, a sample of the zeros. We sample---randomly without
+    replacement---ZERO_SAMPLE_SIZE zero columns from x.
+
+    The desideratum is that every nonzero entry is separated from every
+    zero entry by margin at least MARGIN.
+    For each ynonzero, we want it to exceed max(yzero) by at least MARGIN.
+    For each yzero, we want it to be exceed by min(ynonzero) by at least MARGIN.
+    The loss is a hinge loss (linear). The loss is irrespective of the
+    xnonzero magnitude (this may be a limitation). Hence, all nonzeroes
+    are equally important to exceed the maximum yzero.
+
+    LIMITATIONS:
+       - Only does pure stochastic gradient (batchsize = 1).
+       - Loss is irrespective of the xnonzero magnitude.
+       - We will always use all nonzero entries, even if the training
+       instance is very non-sparse.
+"""
+
+
+import numpy
+
+nonzero_instances = []
+nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1})
+nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8})
+nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})
+
+import model
+model = model.Model()
+
+for i in xrange(100000):
+    # Select an instance
+    instance = nonzero_instances[i % len(nonzero_instances)]
+
+    # SGD update over instance
+    model.update(instance)
diff -r c9a89be5cb0a -r 67c339260875 sparse_random_autoassociator/model.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sparse_random_autoassociator/model.py	Mon Jul 07 10:09:37 2008 -0400
@@ -0,0 +1,76 @@
+"""
+The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
+Weston's sampling trick (2008).
+"""
+
+from graph import trainfn
+import parameters
+
+import globals
+from globals import LR
+
+import numpy
+import random
+random.seed(globals.SEED)
+
+def _select_indices(instance):
+    """
+    Choose nonzero and zero indices (feature columns) of the instance.
+    We select B{all} nonzero indices.
+    We select L{globals.ZERO_SAMPLE_SIZE} zero indices randomly,
+    without replacement.
+    @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter
+    an endless loop.
+    @return: (nonzero_indices, zero_indices)
+    """
+    # Get the nonzero indices
+    nonzero_indices = instance.keys()
+    nonzero_indices.sort()
+
+    # Get the zero indices
+    # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop.
+    zero_indices = []
+    while len(zero_indices) < globals.ZERO_SAMPLE_SIZE:
+        idx = random.randint(0, globals.INPUT_DIMENSION - 1)
+        if idx in nonzero_indices or idx in zero_indices: continue
+        zero_indices.append(idx)
+    zero_indices.sort()
+
+    return (nonzero_indices, zero_indices)
+
+class Model:
+    def __init__(self):
+        self.parameters = parameters.Parameters(randomly_initialize=True)
+
+    def update(self, instance):
+        """
+        Update the L{Model} using one training instance.
+        @param instance: A dict from feature index to (non-zero) value.
+        @todo: Should assert that nonzero_indices and zero_indices
+        are correct (i.e. are truly nonzero/zero).
+        """
+        (nonzero_indices, zero_indices) = _select_indices(instance)
+        # No update if there aren't any non-zeros.
+        if len(nonzero_indices) == 0: return
+        xnonzero = numpy.asarray([instance[idx] for idx in nonzero_indices])
+        print
+        print "xnonzero:", xnonzero
+
+        (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices])
+        print "OLD ynonzero:", ynonzero
+        print "OLD yzero:", yzero
+        print "OLD total loss:", loss
+
+        # SGD update
+        self.parameters.w1[nonzero_indices, :]  -= LR * gw1nonzero
+        self.parameters.b1						-= LR * gb1
+        self.parameters.w2[:, nonzero_indices]  -= LR * gw2nonzero
+        self.parameters.w2[:, zero_indices]		-= LR * gw2zero
+        self.parameters.b2[nonzero_indices]		-= LR * gb2nonzero
+        self.parameters.b2[zero_indices]		-= LR * gb2zero
+
+        # Recompute the loss, to make sure it's descreasing
+        (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices])
+        print "NEW ynonzero:", ynonzero
+        print "NEW yzero:", yzero
+        print "NEW total loss:", loss
diff -r c9a89be5cb0a -r 67c339260875 sparse_random_autoassociator/parameters.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sparse_random_autoassociator/parameters.py	Mon Jul 07 10:09:37 2008 -0400
@@ -0,0 +1,28 @@
+"""
+Parameters (weights) used by the L{Model}.
+"""
+
+import numpy
+import globals
+
+class Parameters:
+    """
+    Parameters used by the L{Model}.
+    """
+    def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED):
+        """
+        Initialize L{Model} parameters.
+        @param randomly_initialize: If True, then randomly initialize
+        according to the given seed. If False, then just use zeroes.
+        """
+        if randomly_initialize:
+            numpy.random.seed(seed)
+            self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension
+            self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension
+            self.b1 = numpy.zeros(hidden_dimension)
+            self.b2 = numpy.zeros(input_dimension)
+        else:
+            self.w1 = numpy.zeros((input_dimension, hidden_dimension))
+            self.w2 = numpy.zeros((hidden_dimension, input_dimension))
+            self.b1 = numpy.zeros(hidden_dimension)
+            self.b2 = numpy.zeros(input_dimension)