Mercurial > pylearn

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cost.py	Mon Jul 14 13:48:41 2008 -0400
@@ -0,0 +1,11 @@
+"""
+Cost functions.
+"""
+
+import theano.tensor as T
+
+def quadratic(target, output, axis=1):
+    return T.mean(T.sqr(target - output), axis)
+
+def cross_entropy(target, output, axis=1):
+    return -T.mean(target * T.log2(output) + (1 - target) * T.log2(1 - output), axis=axis)
--- a/denoising_aa.py	Mon Jul 14 13:48:36 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,220 +0,0 @@
-"""
-A denoising auto-encoder
-"""
-
-import theano
-from theano.formula import *
-from learner import *
-from theano import tensor as t
-from nnet_ops import *
-import math
-from misc import *
-from misc_theano import *
-from theano.tensor_random import binomial
-
-def hiding_corruption_formula(seed,average_fraction_hidden):
-    """
-    Return a formula for the corruption process, in which a random
-    subset of the input numbers are hidden (mapped to 0).
-
-    @param seed: seed of the random generator
-    @type seed: anything that numpy.random.RandomState accepts
-
-    @param average_fraction_hidden: the probability with which each
-                                    input number is hidden (set to 0).
-    @type average_fraction_hidden: 0 <= real number <= 1
-    """
-    class HidingCorruptionFormula(Formulas):
-        x = t.matrix()
-        corrupted_x = x * binomial(seed,x,1,fraction_sampled)
-
-    return HidingCorruptionFormula()
-
-def squash_affine_formula(squash_function=sigmoid):
-    """
-    Simply does: squash_function(b + xW)
-    By convention prefix the parameters by _
-    """
-    class SquashAffineFormula(Formulas):
-        x = t.matrix() # of dimensions minibatch_size x n_inputs
-        _b = t.row() # of dimensions 1 x n_outputs
-        _W = t.matrix() # of dimensions n_inputs x n_outputs
-        a = _b + t.dot(x,_W) # of dimensions minibatch_size x n_outputs
-        y = squash_function(a)
-    return SquashAffineFormula()
-
-def gradient_descent_update_formula():
-    class GradientDescentUpdateFormula(Formula):
-        param = t.matrix()
-        learning_rate = t.scalar()
-        cost = t.column() # cost of each example in a minibatch
-        param_update = t.add_inplace(param, -learning_rate*t.sgrad(cost))
-    return gradient_descent_update_formula()
-
-def probabilistic_classifier_loss_formula():
-    class ProbabilisticClassifierLossFormula(Formulas):
-        a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output
-        target_class = t.ivector() # dimension (minibatch_size)
-        nll, probability_predictions = crossentropy_softmax_1hot(a, target_class) # defined in nnet_ops.py
-    return ProbabilisticClassifierLossFormula()
-
-def binomial_cross_entropy_formula():
-    class BinomialCrossEntropyFormula(Formulas):
-        a = t.matrix() # pre-sigmoid activations, minibatch_size x dim
-        p = sigmoid(a) # model prediction
-        q = t.matrix() # target binomial probabilities, minibatch_size x dim
-        # using the identity softplus(a) - softplus(-a) = a,
-        # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a)
-        nll = -t.sum(q*a - softplus(-a))
-    # next line was missing... hope it's all correct above
-    return BinomialCrossEntropyFormula()
-
-def squash_affine_autoencoder_formula(hidden_squash=t.tanh,
-                                      reconstruction_squash=sigmoid,
-                                      share_weights=True,
-                                      reconstruction_nll_formula=binomial_cross_entropy_formula(),
-                                      update_formula=gradient_descent_update_formula):
-    if share_weights:
-        autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a') + \
-                      squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c') + \
-                      reconstruction_nll_formula
-    else:
-        autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a',_W='_W1') + \
-                      squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c',_W='_W2') + \
-                      reconstruction_nll_formula
-    autoencoder = autoencoder + [update_formula().rename(cost = 'nll',
-                                                         param = p)
-                                 for p in autoencoder.get_all('_.*')]
-    return autoencoder
-
-
-# @todo: try other corruption formulae. The above is the default one.
-# not quite used in the ICML paper... (had a fixed number of 0s).
-
-class DenoisingAutoEncoder(LearningAlgorithm):
-
-    def __init__(self,n_inputs,n_hidden_per_layer,
-                 learning_rate=0.1,
-                 max_n_epochs=100,
-                 L1_regularizer=0,
-                 init_range=1.,
-                 corruption_formula = hiding_corruption_formula(),
-                 autoencoder = squash_affine_autoencoder_formula(),
-                 minibatch_size=None,linker = "c|py"):
-        for name,val in locals().items():
-            if val is not self: self.__setattribute__(name,val)
-        self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x')
-
-    def __call__(self, training_set=None):
-        """ Allocate and optionnaly train a model
-
-        @TODO enables passing in training and valid sets, instead of cutting one set in 80/20
-        """
-        model = DenoisingAutoEncoderModel(self)
-        if training_set:
-            print 'DenoisingAutoEncoder(): what do I do if training_set????'
-            # copied from old mlp_factory_approach:
-            if len(trainset) == sys.maxint:
-                raise NotImplementedError('Learning from infinite streams is not supported')
-            nval = int(self.validation_portion * len(trainset))
-            nmin = len(trainset) - nval
-            assert nmin >= 0
-            minset = trainset[:nmin] #real training set for minimizing loss
-            valset = trainset[nmin:] #validation set for early stopping
-            best = model
-            for stp in self.early_stopper():
-                model.update(
-                    minset.minibatches([input, target], minibatch_size=min(32,
-                        len(trainset))))
-                #print 'mlp.__call__(), we did an update'
-                if stp.set_score:
-                    stp.score = model(valset, ['loss_01'])
-                    if (stp.score < stp.best_score):
-                        best = copy.copy(model)
-            model = best
-            # end of the copy from mlp_factory_approach
-
-        return model
-
-
-    def compile(self, inputs, outputs):
-        return theano.function(inputs,outputs,unpack_single=False,linker=self.linker)
-
-class DenoisingAutoEncoderModel(LearnerModel):
-    def __init__(self,learning_algorithm,params):
-        self.learning_algorithm=learning_algorithm
-        self.params=params
-        v = learning_algorithm.v
-        self.update_fn = learning_algorithm.compile(learning_algorithm.denoising_autoencoder_formula.inputs,
-                                                    learning_algorithm.denoising_autoencoder_formula.outputs)
-
-    def update(self, training_set, train_stats_collector=None):
-
-        print 'dont update you crazy frog!'
-
-# old stuff
-
-#         self._learning_rate = t.scalar('learning_rate') # this is the symbol
-#         self.L1_regularizer = L1_regularizer
-#         self._L1_regularizer = t.scalar('L1_regularizer')
-#         self._input = t.matrix('input') # n_examples x n_inputs
-#         self._W = t.matrix('W')
-#         self._b = t.row('b')
-#         self._c = t.row('b')
-#         self._regularization_term = self._L1_regularizer * t.sum(t.abs(self._W))
-#         self._corrupted_input = corruption_process(self._input)
-#         self._hidden = t.tanh(self._b + t.dot(self._input, self._W.T))
-#         self._reconstruction_activations =self._c+t.dot(self._hidden,self._W)
-#         self._nll,self._output = crossentropy_softmax_1hot(Print("output_activations")(self._output_activations),self._target_vector)
-#         self._output_class = t.argmax(self._output,1)
-#         self._class_error = t.neq(self._output_class,self._target_vector)
-#         self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
-#         OnlineGradientTLearner.__init__(self)
-
-#     def attributeNames(self):
-#         return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
-
-#     def parameterAttributes(self):
-#         return ["b1","W1", "b2", "W2"]
-
-#     def updateMinibatchInputFields(self):
-#         return ["input","target"]
-
-#     def updateEndOutputAttributes(self):
-#         return ["regularization_term"]
-
-#     def lossAttribute(self):
-#         return "minibatch_criterion"
-
-#     def defaultOutputFields(self, input_fields):
-#         output_fields = ["output", "output_class",]
-#         if "target" in input_fields:
-#             output_fields += ["class_error", "nll"]
-#         return output_fields
-
-#     def allocate(self,minibatch):
-#         minibatch_n_inputs  = minibatch["input"].shape[1]
-#         if not self._n_inputs:
-#             self._n_inputs = minibatch_n_inputs
-#             self.b1 = numpy.zeros((1,self._n_hidden))
-#             self.b2 = numpy.zeros((1,self._n_outputs))
-#             self.forget()
-#         elif self._n_inputs!=minibatch_n_inputs:
-#             # if the input changes dimension on the fly, we resize and forget everything
-#             self.forget()
-
-#     def forget(self):
-#         if self._n_inputs:
-#             r = self._init_range/math.sqrt(self._n_inputs)
-#             self.W1 = numpy.random.uniform(low=-r,high=r,
-#                                            size=(self._n_hidden,self._n_inputs))
-#             r = self._init_range/math.sqrt(self._n_hidden)
-#             self.W2 = numpy.random.uniform(low=-r,high=r,
-#                                            size=(self._n_outputs,self._n_hidden))
-#             self.b1[:]=0
-#             self.b2[:]=0
-#             self._n_epochs=0
-
-#     def isLastEpoch(self):
-#         self._n_epochs +=1
-#         return self._n_epochs>=self._max_n_epochs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/noise.py	Mon Jul 14 13:48:41 2008 -0400
@@ -0,0 +1,13 @@
+def binomial(input, rstate, p = 0.75):
+    """
+    Op to corrupt an input with binomial noise.
+    Generate a noise vector of 1's and 0's (1 with probability p).
+    We multiply this by the input.
+
+    @note: See U{ssh://projects@lgcm.iro.umontreal.ca/repos/denoising_aa}
+    to see how rstate is used.
+    """
+    noise = rstate.gen_like(('binomial',{'p': p, 'n': 1}), input)
+    noise.name = 'noise'
+    return noise * input
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sandbox/denoising_aa.py	Mon Jul 14 13:48:41 2008 -0400
@@ -0,0 +1,224 @@
+"""
+A denoising auto-encoder
+
+@warning: You should use this interface. It is not complete and is not functional.
+Instead, use::
+    ssh://projects@lgcm.iro.umontreal.ca/repos/denoising_aa
+"""
+
+import theano
+from theano.formula import *
+from learner import *
+from theano import tensor as t
+from nnet_ops import *
+import math
+from misc import *
+from misc_theano import *
+from theano.tensor_random import binomial
+
+def hiding_corruption_formula(seed,average_fraction_hidden):
+    """
+    Return a formula for the corruption process, in which a random
+    subset of the input numbers are hidden (mapped to 0).
+
+    @param seed: seed of the random generator
+    @type seed: anything that numpy.random.RandomState accepts
+
+    @param average_fraction_hidden: the probability with which each
+                                    input number is hidden (set to 0).
+    @type average_fraction_hidden: 0 <= real number <= 1
+    """
+    class HidingCorruptionFormula(Formulas):
+        x = t.matrix()
+        corrupted_x = x * binomial(seed,x,1,fraction_sampled)
+
+    return HidingCorruptionFormula()
+
+def squash_affine_formula(squash_function=sigmoid):
+    """
+    Simply does: squash_function(b + xW)
+    By convention prefix the parameters by _
+    """
+    class SquashAffineFormula(Formulas):
+        x = t.matrix() # of dimensions minibatch_size x n_inputs
+        _b = t.row() # of dimensions 1 x n_outputs
+        _W = t.matrix() # of dimensions n_inputs x n_outputs
+        a = _b + t.dot(x,_W) # of dimensions minibatch_size x n_outputs
+        y = squash_function(a)
+    return SquashAffineFormula()
+
+def gradient_descent_update_formula():
+    class GradientDescentUpdateFormula(Formula):
+        param = t.matrix()
+        learning_rate = t.scalar()
+        cost = t.column() # cost of each example in a minibatch
+        param_update = t.add_inplace(param, -learning_rate*t.sgrad(cost))
+    return gradient_descent_update_formula()
+
+def probabilistic_classifier_loss_formula():
+    class ProbabilisticClassifierLossFormula(Formulas):
+        a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output
+        target_class = t.ivector() # dimension (minibatch_size)
+        nll, probability_predictions = crossentropy_softmax_1hot(a, target_class) # defined in nnet_ops.py
+    return ProbabilisticClassifierLossFormula()
+
+def binomial_cross_entropy_formula():
+    class BinomialCrossEntropyFormula(Formulas):
+        a = t.matrix() # pre-sigmoid activations, minibatch_size x dim
+        p = sigmoid(a) # model prediction
+        q = t.matrix() # target binomial probabilities, minibatch_size x dim
+        # using the identity softplus(a) - softplus(-a) = a,
+        # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a)
+        nll = -t.sum(q*a - softplus(-a))
+    # next line was missing... hope it's all correct above
+    return BinomialCrossEntropyFormula()
+
+def squash_affine_autoencoder_formula(hidden_squash=t.tanh,
+                                      reconstruction_squash=sigmoid,
+                                      share_weights=True,
+                                      reconstruction_nll_formula=binomial_cross_entropy_formula(),
+                                      update_formula=gradient_descent_update_formula):
+    if share_weights:
+        autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a') + \
+                      squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c') + \
+                      reconstruction_nll_formula
+    else:
+        autoencoder = squash_affine_formula(hidden_squash).rename(a='code_a',_W='_W1') + \
+                      squash_affine_formula(reconstruction_squash).rename(x='hidden',y='reconstruction',_b='_c',_W='_W2') + \
+                      reconstruction_nll_formula
+    autoencoder = autoencoder + [update_formula().rename(cost = 'nll',
+                                                         param = p)
+                                 for p in autoencoder.get_all('_.*')]
+    return autoencoder
+
+
+# @todo: try other corruption formulae. The above is the default one.
+# not quite used in the ICML paper... (had a fixed number of 0s).
+
+class DenoisingAutoEncoder(LearningAlgorithm):
+
+    def __init__(self,n_inputs,n_hidden_per_layer,
+                 learning_rate=0.1,
+                 max_n_epochs=100,
+                 L1_regularizer=0,
+                 init_range=1.,
+                 corruption_formula = hiding_corruption_formula(),
+                 autoencoder = squash_affine_autoencoder_formula(),
+                 minibatch_size=None,linker = "c|py"):
+        for name,val in locals().items():
+            if val is not self: self.__setattribute__(name,val)
+        self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x')
+
+    def __call__(self, training_set=None):
+        """ Allocate and optionnaly train a model
+
+        @TODO enables passing in training and valid sets, instead of cutting one set in 80/20
+        """
+        model = DenoisingAutoEncoderModel(self)
+        if training_set:
+            print 'DenoisingAutoEncoder(): what do I do if training_set????'
+            # copied from old mlp_factory_approach:
+            if len(trainset) == sys.maxint:
+                raise NotImplementedError('Learning from infinite streams is not supported')
+            nval = int(self.validation_portion * len(trainset))
+            nmin = len(trainset) - nval
+            assert nmin >= 0
+            minset = trainset[:nmin] #real training set for minimizing loss
+            valset = trainset[nmin:] #validation set for early stopping
+            best = model
+            for stp in self.early_stopper():
+                model.update(
+                    minset.minibatches([input, target], minibatch_size=min(32,
+                        len(trainset))))
+                #print 'mlp.__call__(), we did an update'
+                if stp.set_score:
+                    stp.score = model(valset, ['loss_01'])
+                    if (stp.score < stp.best_score):
+                        best = copy.copy(model)
+            model = best
+            # end of the copy from mlp_factory_approach
+
+        return model
+
+
+    def compile(self, inputs, outputs):
+        return theano.function(inputs,outputs,unpack_single=False,linker=self.linker)
+
+class DenoisingAutoEncoderModel(LearnerModel):
+    def __init__(self,learning_algorithm,params):
+        self.learning_algorithm=learning_algorithm
+        self.params=params
+        v = learning_algorithm.v
+        self.update_fn = learning_algorithm.compile(learning_algorithm.denoising_autoencoder_formula.inputs,
+                                                    learning_algorithm.denoising_autoencoder_formula.outputs)
+
+    def update(self, training_set, train_stats_collector=None):
+
+        print 'dont update you crazy frog!'
+
+# old stuff
+
+#         self._learning_rate = t.scalar('learning_rate') # this is the symbol
+#         self.L1_regularizer = L1_regularizer
+#         self._L1_regularizer = t.scalar('L1_regularizer')
+#         self._input = t.matrix('input') # n_examples x n_inputs
+#         self._W = t.matrix('W')
+#         self._b = t.row('b')
+#         self._c = t.row('b')
+#         self._regularization_term = self._L1_regularizer * t.sum(t.abs(self._W))
+#         self._corrupted_input = corruption_process(self._input)
+#         self._hidden = t.tanh(self._b + t.dot(self._input, self._W.T))
+#         self._reconstruction_activations =self._c+t.dot(self._hidden,self._W)
+#         self._nll,self._output = crossentropy_softmax_1hot(Print("output_activations")(self._output_activations),self._target_vector)
+#         self._output_class = t.argmax(self._output,1)
+#         self._class_error = t.neq(self._output_class,self._target_vector)
+#         self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
+#         OnlineGradientTLearner.__init__(self)
+
+#     def attributeNames(self):
+#         return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
+
+#     def parameterAttributes(self):
+#         return ["b1","W1", "b2", "W2"]
+
+#     def updateMinibatchInputFields(self):
+#         return ["input","target"]
+
+#     def updateEndOutputAttributes(self):
+#         return ["regularization_term"]
+
+#     def lossAttribute(self):
+#         return "minibatch_criterion"
+
+#     def defaultOutputFields(self, input_fields):
+#         output_fields = ["output", "output_class",]
+#         if "target" in input_fields:
+#             output_fields += ["class_error", "nll"]
+#         return output_fields
+
+#     def allocate(self,minibatch):
+#         minibatch_n_inputs  = minibatch["input"].shape[1]
+#         if not self._n_inputs:
+#             self._n_inputs = minibatch_n_inputs
+#             self.b1 = numpy.zeros((1,self._n_hidden))
+#             self.b2 = numpy.zeros((1,self._n_outputs))
+#             self.forget()
+#         elif self._n_inputs!=minibatch_n_inputs:
+#             # if the input changes dimension on the fly, we resize and forget everything
+#             self.forget()
+
+#     def forget(self):
+#         if self._n_inputs:
+#             r = self._init_range/math.sqrt(self._n_inputs)
+#             self.W1 = numpy.random.uniform(low=-r,high=r,
+#                                            size=(self._n_hidden,self._n_inputs))
+#             r = self._init_range/math.sqrt(self._n_hidden)
+#             self.W2 = numpy.random.uniform(low=-r,high=r,
+#                                            size=(self._n_outputs,self._n_hidden))
+#             self.b1[:]=0
+#             self.b2[:]=0
+#             self._n_epochs=0
+
+#     def isLastEpoch(self):
+#         self._n_epochs +=1
+#         return self._n_epochs>=self._max_n_epochs
--- a/sandbox/rbm/model.py	Mon Jul 14 13:48:36 2008 -0400
+++ b/sandbox/rbm/model.py	Mon Jul 14 13:48:41 2008 -0400
@@ -59,7 +59,7 @@

         random.seed(random_seed)

-        self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=False, random_seed=self.random_seed)
+        self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed)
         self.prev_dw = 0
         self.prev_db = 0
         self.prev_dc = 0
@@ -89,7 +89,7 @@
         """
         minibatch = len(instances)
         v0 = pylearn.sparse_instance.to_vector(instances, self.input_dimension)
-        print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0))
+        print "old XENT per instance:", numpy.sum(self.deterministic_reconstruction_error(v0))/minibatch
         q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
         h0 = sample(q0)
         p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))
@@ -110,7 +110,7 @@
         self.last_db = db
         self.last_dc = dc

-        print "new XENT:", numpy.sum(self.deterministic_reconstruction_error(v0))
+        print "new XENT per instance:", numpy.sum(self.deterministic_reconstruction_error(v0))/minibatch

 #        print
 #        print "v[0]:", v0
--- a/sandbox/simple_autoassociator/README.txt	Mon Jul 14 13:48:36 2008 -0400
+++ b/sandbox/simple_autoassociator/README.txt	Mon Jul 14 13:48:41 2008 -0400
@@ -1,2 +1,5 @@
-This is broken. It can't even learn the simple two training instances in
-main.py
+This seems to work.
+
+@todo:
+    * Add momentum.
+    * Add learning rate decay schedule.
--- a/sandbox/simple_autoassociator/globals.py	Mon Jul 14 13:48:36 2008 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-"""
-Global variables.
-"""
-
-#INPUT_DIMENSION = 1000
-#INPUT_DIMENSION = 100
-INPUT_DIMENSION = 4
-HIDDEN_DIMENSION = 10
-#HIDDEN_DIMENSION = 4
-LEARNING_RATE = 0.1
-LR = LEARNING_RATE
-SEED = 666
--- a/sandbox/simple_autoassociator/graph.py	Mon Jul 14 13:48:36 2008 -0400
+++ b/sandbox/simple_autoassociator/graph.py	Mon Jul 14 13:48:41 2008 -0400
@@ -6,7 +6,7 @@
 from pylearn.nnet_ops import sigmoid, binary_crossentropy
 from theano import tensor as t
 from theano.tensor import dot
-x           = t.dvector()
+x           = t.dmatrix()
 w1          = t.dmatrix()
 b1          = t.dvector()
 w2          = t.dmatrix()
@@ -17,10 +17,10 @@
 loss_unsummed = binary_crossentropy(y, x)
 loss = t.sum(loss_unsummed)

-(gw1, gb1, gw2, gb2, gy, gh) = t.grad(loss, [w1, b1, w2, b2, y, h])
+(gw1, gb1, gw2, gb2) = t.grad(loss, [w1, b1, w2, b2])

 import theano.compile

 inputs  = [x, w1, b1, w2, b2]
-outputs = [y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy, gh]
+outputs = [y, h, loss, gw1, gb1, gw2, gb2]
 trainfn = theano.compile.function(inputs, outputs)
--- a/sandbox/simple_autoassociator/main.py	Mon Jul 14 13:48:36 2008 -0400
+++ b/sandbox/simple_autoassociator/main.py	Mon Jul 14 13:48:41 2008 -0400
@@ -7,9 +7,6 @@
        y   = sigmoid(dot(h, w2) + b2)

     Binary xent loss.
-
-    LIMITATIONS:
-       - Only does pure stochastic gradient (batchsize = 1).
 """


@@ -24,11 +21,11 @@
 ##nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5})

 import model
-model = model.Model()
+model = model.Model(input_dimension=10, hidden_dimension=4)

 for i in xrange(100000):
-    # Select an instance
-    instance = nonzero_instances[i % len(nonzero_instances)]
+#    # Select an instance
+#    instance = nonzero_instances[i % len(nonzero_instances)]

-    # SGD update over instance
-    model.update(instance)
+    # Update over instance
+    model.update(nonzero_instances)
--- a/sandbox/simple_autoassociator/model.py	Mon Jul 14 13:48:36 2008 -0400
+++ b/sandbox/simple_autoassociator/model.py	Mon Jul 14 13:48:41 2008 -0400
@@ -6,53 +6,66 @@
 from graph import trainfn
 import parameters

-import globals
-from globals import LR
-
 import numpy
 import random
-random.seed(globals.SEED)
+
+import pylearn.sparse_instance

 class Model:
-    def __init__(self):
-        self.parameters = parameters.Parameters(randomly_initialize=True)
+    """
+    @todo: Add momentum.
+    @todo: Add learning rate decay schedule.
+    """
+    def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, weight_decay = 0.0002, random_seed = 666):
+        self.input_dimension    = input_dimension
+        self.hidden_dimension   = hidden_dimension
+        self.learning_rate      = learning_rate
+        self.weight_decay       = weight_decay
+        self.random_seed        = random_seed

-    def update(self, instance):
+        random.seed(random_seed)
+
+        self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed)
+
+    def deterministic_reconstruction(self, x):
+        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
+        return y
+
+    def update(self, instances):
         """
         Update the L{Model} using one training instance.
-        @param instance: A dict from feature index to (non-zero) value.
+        @param instances: A list of dict from feature index to (non-zero) value.
         @todo: Should assert that nonzero_indices and zero_indices
         are correct (i.e. are truly nonzero/zero).
+        @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing?
+        @todo: Decay the biases too?
         """
-        x = numpy.zeros(globals.INPUT_DIMENSION)
-        for idx in instance.keys():
-            x[idx] = instance[idx]
+        minibatch = len(instances)
+        x = pylearn.sparse_instance.to_vector(instances, self.input_dimension)

-        (y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy, gh) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
-        print
-        print "instance:", instance
-        print "x:", x
-        print "OLD y:", y
-        print "OLD loss (unsummed):", loss_unsummed
-        print "gy:", gy
-        print "gh:", gh
+        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
+#        print
+#        print "instance:", instance
+#        print "x:", x
+#        print "OLD y:", y
         print "OLD total loss:", loss
-        print "gw1:", gw1
-        print "gb1:", gb1
-        print "gw2:", gw2
-        print "gb2:", gb2
+#        print "gw1:", gw1
+#        print "gb1:", gb1
+#        print "gw2:", gw2
+#        print "gb2:", gb2
+
+        self.parameters.w1 *= (1 - self.weight_decay)
+        self.parameters.w2 *= (1 - self.weight_decay)

         # SGD update
-        self.parameters.w1  -= LR * gw1
-        self.parameters.b1  -= LR * gb1
-        self.parameters.w2  -= LR * gw2
-        self.parameters.b2  -= LR * gb2
+        self.parameters.w1  -= self.learning_rate * gw1 / minibatch
+        self.parameters.b1  -= self.learning_rate * gb1 / minibatch
+        self.parameters.w2  -= self.learning_rate * gw2 / minibatch
+        self.parameters.b2  -= self.learning_rate * gb2 / minibatch

-        # Recompute the loss, to make sure it's descreasing
-        (y, h, loss, loss_unsummed, gw1, gb1, gw2, gb2, gy, gh) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
-        print "NEW y:", y
-        print "NEW loss (unsummed):", loss_unsummed
-        print "gy:", gy
-        print "NEW total loss:", loss
-        print "h:", h
-        print self.parameters
+#        # Recompute the loss, to make sure it's descreasing
+#        (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
+##        print "NEW y:", y
+#        print "NEW total loss:", loss
+##        print "h:", h
+##        print self.parameters
--- a/sandbox/simple_autoassociator/parameters.py	Mon Jul 14 13:48:36 2008 -0400
+++ b/sandbox/simple_autoassociator/parameters.py	Mon Jul 14 13:48:41 2008 -0400
@@ -3,25 +3,24 @@
 """

 import numpy
-import globals

 class Parameters:
     """
     Parameters used by the L{Model}.
     """
-    def __init__(self, input_dimension=globals.INPUT_DIMENSION, hidden_dimension=globals.HIDDEN_DIMENSION, randomly_initialize=False, seed=globals.SEED):
+    def __init__(self, input_dimension, hidden_dimension, randomly_initialize, random_seed):
         """
         Initialize L{Model} parameters.
         @param randomly_initialize: If True, then randomly initialize
         according to the given seed. If False, then just use zeroes.
         """
         if randomly_initialize:
-            numpy.random.seed(seed)
+            numpy.random.seed(random_seed)
             self.w1 = (numpy.random.rand(input_dimension, hidden_dimension)-0.5)/input_dimension
             self.w2 = (numpy.random.rand(hidden_dimension, input_dimension)-0.5)/hidden_dimension
             self.b1 = numpy.zeros(hidden_dimension)
-            #self.b2 = numpy.zeros(input_dimension)
-            self.b2 = numpy.array([10, 0, 0, -10])
+            self.b2 = numpy.zeros(input_dimension)
+            #self.b2 = numpy.array([10, 0, 0, -10])
         else:
             self.w1 = numpy.zeros((input_dimension, hidden_dimension))
             self.w2 = numpy.zeros((hidden_dimension, input_dimension))