diff linear_regression.py @ 111:88257dfedf8c

Added another work in progress, for mlp's
author bengioy@bengiomac.local
date Wed, 07 May 2008 09:16:04 -0400
parents 8fa1ef2411a0
children d0a1bd0378c6
line wrap: on
line diff
--- a/linear_regression.py	Tue May 06 22:24:55 2008 -0400
+++ b/linear_regression.py	Wed May 07 09:16:04 2008 -0400
@@ -1,12 +1,11 @@
 
 from learner import *
 from theano import tensor as t
-from compile import Function
 from theano.scalar import as_scalar
 
 # this is one of the simplest example of learner, and illustrates
 # the use of theano 
-class LinearRegression(OneShotTLearner):
+class LinearRegression(MinibatchUpdatesTLearner):
     """
     Implement linear regression, with or without L2 regularization
     (the former is called Ridge Regression and the latter Ordinary Least Squares).
@@ -18,14 +17,13 @@
     of all the training sets passed to update since construction or since
     the last call to forget).
 
-    The L2 regularization coefficient is obtained analytically.
     For each (input[t],output[t]) pair in a minibatch,::
     
        output_t = b + W * input_t
 
     where b and W are obtained by minimizing::
 
-       lambda sum_{ij} W_{ij}^2  + sum_t ||output_t - target_t||^2
+       L2_regularizer sum_{ij} W_{ij}^2  + sum_t ||output_t - target_t||^2
 
     Let X be the whole training set inputs matrix (one input example per row),
     with the first column full of 1's, and Let Y the whole training set
@@ -36,7 +34,7 @@
        XtX * theta[:,i] = XtY[:,i]
 
     where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X
-    plus lambda on the diagonal except at (0,0),
+    plus L2_regularizer on the diagonal except at (0,0),
     and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y.
 
     The fields and attributes expected and produced by use and update are the following:
@@ -53,10 +51,10 @@
        input_dataset rather than those learned during 'update'; currently no support
        for providing these to update):
        
-       - 'lambda' 
+       - 'L2_regularizer' 
        - 'b' 
        - 'W'
-       - 'parameters' = (b, W) tuple
+       - 'parameters' = [b, W] 
        - 'regularization_term'
        - 'XtX'
        - 'XtY'
@@ -64,7 +62,7 @@
     """
 
     def attributeNames(self):
-        return ["lambda","parameters","b","W","regularization_term","XtX","XtY"]
+        return ["L2_regularizer","parameters","b","W","regularization_term","XtX","XtY"]
 
     def useInputAttributes(self):
         return ["b","W"]
@@ -73,10 +71,7 @@
         return []
 
     def updateInputAttributes(self):
-        return ["lambda","XtX","XtY"]
-
-    def updateOutputAttributes(self):
-        return ["parameters"] + self.updateMinibatchOutputAttributes() + self.updateEndOutputAttributes()
+        return ["L2_regularizer","XtX","XtY"]
 
     def updateMinibatchInputFields(self):
         return ["input","target"]
@@ -93,6 +88,9 @@
     def updateEndOutputAttributes(self):
         return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ?
 
+    def parameterAttributes(self):
+        return ["b","W"]
+    
     def defaultOutputFields(self, input_fields):
         output_fields = ["output"]
         if "target" in input_fields:
@@ -102,7 +100,7 @@
     def __init__(self):
         self._input = t.matrix('input') # n_examples x n_inputs
         self._target = t.matrix('target') # n_examples x n_outputs
-        self._lambda = as_scalar(0.,'lambda')
+        self._L2_regularizer = as_scalar(0.,'L2_regularizer')
         self._theta = t.matrix('theta')
         self._W = self._theta[:,1:] 
         self._b = self._theta[:,0]
@@ -111,13 +109,12 @@
         self._extended_input = t.prepend_one_to_each_row(self._input)
         self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
         self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._lambda * t.dot(self._W,self._W)
+        self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
         self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
         self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
         self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
 
         OneShotTLearner.__init__(self)
-        self.allocate()
             
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
@@ -130,7 +127,7 @@
             self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
             self.forget()
         elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
-            # if the input or target changes dimension on the fly, we forget everything
+            # if the input or target changes dimension on the fly, we resize and forget everything
             self.forget()
             
     def forget(self):
@@ -139,9 +136,5 @@
             self.XtY.resize((1+self.n_inputs,self.n_outputs))
             self.XtX.data[:,:]=0
             self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.lambda
+            numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
 
-    def updateEnd(self):
-        TLearner.updateEnd(self)
-        self.parameters = (self.W,self.b)
-