diff denoising_aa.py @ 226:3595ba2610f7

merged
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 23 May 2008 17:12:12 -0400
parents df3fae88ab46
children 9e96fe8b955c
line wrap: on
line diff
--- a/denoising_aa.py	Fri May 23 17:11:39 2008 -0400
+++ b/denoising_aa.py	Fri May 23 17:12:12 2008 -0400
@@ -31,6 +31,7 @@
 
 def squash_affine_formula(squash_function=sigmoid):
     """
+    Simply does: squash_function(b + xW)
     By convention prefix the parameters by _
     """
     class SquashAffineFormula(Formulas):
@@ -53,7 +54,7 @@
     class ProbabilisticClassifierLossFormula(Formulas):
         a = t.matrix() # of dimensions minibatch_size x n_classes, pre-softmax output
         target_class = t.ivector() # dimension (minibatch_size)
-        nll, probability_predictions = crossentropy_softmax_1hot(a, target_class)
+        nll, probability_predictions = crossentropy_softmax_1hot(a, target_class) # defined in nnet_ops.py
     return ProbabilisticClassifierLossFormula()
 
 def binomial_cross_entropy_formula():
@@ -64,6 +65,8 @@
         # using the identity softplus(a) - softplus(-a) = a,
         # we obtain that q log(p) + (1-q) log(1-p) = q a - softplus(a)
         nll = -t.sum(q*a - softplus(-a))
+    # next line was missing... hope it's all correct above
+    return BinomialCrossEntropyFormula()
 
 def squash_affine_autoencoder_formula(hidden_squash=t.tanh,
                                       reconstruction_squash=sigmoid,
@@ -102,9 +105,33 @@
         self.denoising_autoencoder_formula = corruption_formula + autoencoder.rename(x='corrupted_x')
         
     def __call__(self, training_set=None):
+        """ Allocate and optionnaly train a model"""
         model = DenoisingAutoEncoderModel(self)
         if training_set:
-            print 'what do I do if training set????'
+            print 'DenoisingAutoEncoder(): what do I do if training_set????'
+            # copied from mlp_factory_approach:
+            if len(trainset) == sys.maxint:
+                raise NotImplementedError('Learning from infinite streams is not supported')
+            nval = int(self.validation_portion * len(trainset))
+            nmin = len(trainset) - nval
+            assert nmin >= 0
+            minset = trainset[:nmin] #real training set for minimizing loss
+            valset = trainset[nmin:] #validation set for early stopping
+            best = model
+            for stp in self.early_stopper():
+                model.update(
+                    minset.minibatches([input, target], minibatch_size=min(32,
+                        len(trainset))))
+                #print 'mlp.__call__(), we did an update'
+                if stp.set_score:
+                    stp.score = model(valset, ['loss_01'])
+                    if (stp.score < stp.best_score):
+                        best = copy.copy(model)
+            model = best
+            # end of the copy from mlp_factory_approach
+ 
+        return model
+
             
     def compile(self, inputs, outputs):
         return theano.function(inputs,outputs,unpack_single=False,linker=self.linker)