diff sparse_random_autoassociator/main.py @ 370:a1bbcde6b456

Moved sparse_random_autoassociator from my repository
author Joseph Turian <turian@gmail.com>
date Mon, 07 Jul 2008 01:54:46 -0400
parents
children 22463a194c90
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sparse_random_autoassociator/main.py	Mon Jul 07 01:54:46 2008 -0400
@@ -0,0 +1,64 @@
+#!/usr/bin/python
+"""
+    An autoassociator for sparse inputs, using Ronan Collobert + Jason
+    Weston's sampling trick (2008).
+
+    The learned model is::
+       h   = sigmoid(dot(x, w1) + b1)
+       y   = sigmoid(dot(h, w2) + b2)
+
+    We assume that most of the inputs are zero, and hence that we can
+    separate x into xnonzero, x's nonzero components, and a xzero,
+    a sample of the zeros. (We randomly without replacement choose
+    ZERO_SAMPLE_SIZE zero columns.)
+
+    The desideratum is that every nonzero entry is separated from every
+    zero entry by margin at least MARGIN.
+    For each ynonzero, we want it to exceed max(yzero) by at least MARGIN.
+    For each yzero, we want it to be exceed by min(ynonzero) by at least MARGIN.
+    The loss is a hinge loss (linear). The loss is irrespective of the
+    xnonzero magnitude (this may be a limitation). Hence, all nonzeroes
+    are equally important to exceed the maximum yzero.
+
+    LIMITATIONS:
+       - Only does pure stochastic gradient (batchsize = 1).
+       - Loss is irrespective of the xnonzero magnitude.
+       - We will always use all nonzero entries, even if the training
+       instance is very non-sparse.
+       
+    @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an
+    endless loop.
+"""
+
+
+import numpy, random
+import globals
+random.seed(globals.SEED)
+
+nonzero_instances = []
+nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1})
+nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8})
+nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})
+
+import model
+model = model.Model()
+
+for i in xrange(100000):
+    # Select an instance
+    instance = nonzero_instances[i % len(nonzero_instances)]
+
+    # Get the nonzero indices
+    nonzero_indexes = instance.keys()
+    nonzero_indexes.sort()
+
+    # Get the zero indices
+    # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop.
+    zero_indexes = []
+    while len(zero_indexes) < globals.ZERO_SAMPLE_SIZE:
+        idx = random.randint(0, globals.INPUT_DIMENSION - 1)
+        if idx in nonzero_indexes or idx in zero_indexes: continue
+        zero_indexes.append(idx)
+    zero_indexes.sort()
+
+    # SGD update over instance
+    model.update(instance, nonzero_indexes, zero_indexes)