Mercurial > pylearn
diff sparse_random_autoassociator/main.py @ 370:a1bbcde6b456
Moved sparse_random_autoassociator from my repository
author | Joseph Turian <turian@gmail.com> |
---|---|
date | Mon, 07 Jul 2008 01:54:46 -0400 |
parents | |
children | 22463a194c90 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sparse_random_autoassociator/main.py Mon Jul 07 01:54:46 2008 -0400 @@ -0,0 +1,64 @@ +#!/usr/bin/python +""" + An autoassociator for sparse inputs, using Ronan Collobert + Jason + Weston's sampling trick (2008). + + The learned model is:: + h = sigmoid(dot(x, w1) + b1) + y = sigmoid(dot(h, w2) + b2) + + We assume that most of the inputs are zero, and hence that we can + separate x into xnonzero, x's nonzero components, and a xzero, + a sample of the zeros. (We randomly without replacement choose + ZERO_SAMPLE_SIZE zero columns.) + + The desideratum is that every nonzero entry is separated from every + zero entry by margin at least MARGIN. + For each ynonzero, we want it to exceed max(yzero) by at least MARGIN. + For each yzero, we want it to be exceed by min(ynonzero) by at least MARGIN. + The loss is a hinge loss (linear). The loss is irrespective of the + xnonzero magnitude (this may be a limitation). Hence, all nonzeroes + are equally important to exceed the maximum yzero. + + LIMITATIONS: + - Only does pure stochastic gradient (batchsize = 1). + - Loss is irrespective of the xnonzero magnitude. + - We will always use all nonzero entries, even if the training + instance is very non-sparse. + + @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an + endless loop. +""" + + +import numpy, random +import globals +random.seed(globals.SEED) + +nonzero_instances = [] +nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1}) +nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8}) +nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) + +import model +model = model.Model() + +for i in xrange(100000): + # Select an instance + instance = nonzero_instances[i % len(nonzero_instances)] + + # Get the nonzero indices + nonzero_indexes = instance.keys() + nonzero_indexes.sort() + + # Get the zero indices + # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop. + zero_indexes = [] + while len(zero_indexes) < globals.ZERO_SAMPLE_SIZE: + idx = random.randint(0, globals.INPUT_DIMENSION - 1) + if idx in nonzero_indexes or idx in zero_indexes: continue + zero_indexes.append(idx) + zero_indexes.sort() + + # SGD update over instance + model.update(instance, nonzero_indexes, zero_indexes)