diff linear_regression.py @ 376:c9a89be5cb0a

Redesigning linear_regression
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Mon, 07 Jul 2008 10:08:35 -0400
parents f6505ec32dc3
children 74b402b5a81b
line wrap: on
line diff
--- a/linear_regression.py	Mon Jun 16 17:47:36 2008 -0400
+++ b/linear_regression.py	Mon Jul 07 10:08:35 2008 -0400
@@ -4,11 +4,12 @@
 the use of theano.
 """
 
-from learner import *
-from theano import tensor as t
+from pylearn import OfflineLearningAlgorithm
+from theano import tensor as T
 from theano.scalar import as_scalar
+from common.autoname import AutoName
 
-class LinearRegression(MinibatchUpdatesTLearner):
+class LinearRegression(OfflineLearningAlgorithm):
     """
     Implement linear regression, with or without L2 regularization
     (the former is called Ridge Regression and the latter Ordinary Least Squares).
@@ -40,96 +41,122 @@
     plus L2_regularizer on the diagonal except at (0,0),
     and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y.
 
-    The fields and attributes expected and produced by use and update are the following:
+    The dataset fields expected and produced by the learning algorithm and the trained model
+    are the following:
 
-     - Input and output fields (example-wise quantities):
+     - Input and output dataset fields (example-wise quantities):
 
-       - 'input' (always expected by use and update as an input_dataset field)
-       - 'target' (optionally expected by use and update as an input_dataset field)
-       - 'output' (optionally produced by use as an output dataset field)
-       - 'squared_error' (optionally produced by use as an output dataset field, needs 'target') = example-wise squared error
+       - 'input' (always expected as an input_dataset field)
+       - 'target' (always expected by the learning algorithm, optional for learned model)
+       - 'output' (always produced by learned model)
+       - 'squared_error' (optionally produced by learned model if 'target' is provided)
+          = example-wise squared error
+    """
+    def __init__(self, L2_regularizer=0):
+        self.predictor = LinearPredictor(None,None
+        self.L2_regularizer=L2_regularizer
+        self._XtX = T.matrix('XtX')
+        self._XtY = T.matrix('XtY')
+        self._extended_input = T.prepend_one_to_each_row(self._input)
 
-     - optional attributes (optionally expected as input_dataset attributes)
-       (warning, this may be dangerous, the 'use' method will use those provided in the 
-       input_dataset rather than those learned during 'update'; currently no support
-       for providing these to update):
-       
-       - 'L2_regularizer' 
-       - 'b' 
-       - 'W'
-       - 'parameters' = [b, W] 
-       - 'regularization_term'
-       - 'XtX'
-       - 'XtY'
-
-    """
+class LinearPredictorEquations(AutoName):
+    inputs = T.matrix() # minibatchsize x n_inputs
+    targets = T.matrix() # minibatchsize x n_outputs
+    theta = T.matrix() # (n_inputs+1) x n_outputs
+    b = theta[0]
+    Wt = theta[1:,:]
+    outputs = T.dot(inputs,Wt) + b # minibatchsize x n_outputs
+    squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
 
-    def attributeNames(self):
-        return ["L2_regularizer","parameters","b","W","regularization_term","XtX","XtY"]
+    __compiled = False
+    @classmethod
+    def compile(cls,linker='c|py'):
+        if cls.__compiled:
+            return
+        def fn(input_vars,output_vars):
+            return staticmethod(theano.function(input_vars,output_vars, linker=linker))
 
-    def useInputAttributes(self):
-        return ["b","W"]
-
-    def useOutputAttributes(self):
-        return []
+        cls.compute_outputs = fn([inputs,theta],[outputs])
+        cls.compute_errors = fn([outputs,targets],[squared_errors])
 
-    def updateInputAttributes(self):
-        return ["L2_regularizer","XtX","XtY"]
+        cls.__compiled = True
 
-    def updateMinibatchInputFields(self):
-        return ["input","target"]
-    
-    def updateMinibatchInputAttributes(self):
-        return ["XtX","XtY"]
+    def __init__(self)
+        self.compile()
+        
+class LinearRegressionEquations(LinearPredictorEquations):
+    P = LinearPredictorEquations
+    XtX = T.matrix() # (n_inputs+1) x (n_inputs+1)
+    XtY = T.matrix() # (n_inputs+1) x n_outputs
+    extended_input = T.prepend_scalar_to_each_row(1,P.inputs)
+    new_XtX = add_inplace(XtX,T.dot(extended_input.T,extended_input))
+    new_XtY = add_inplace(XtY,T.dot(extended_input.T,P.targets))
     
-    def updateMinibatchOutputAttributes(self):
-        return ["new_XtX","new_XtY"]
-    
-    def updateEndInputAttributes(self):
-        return ["theta","XtX","XtY"]
-
-    def updateEndOutputAttributes(self):
-        return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ?
+class LinearPredictor(object):
+    """
+    A linear predictor has parameters theta (a bias vector and a weight matrix)
+    it can use to make a linear prediction (according to the LinearPredictorEquations).
+    It can compute its output (bias + weight * input) and a squared error (||output - target||^2).
+    """
+    def __init__(self, theta):
+        self.theta=theta
+        self.n_inputs=theta.shape[0]-1
+        self.n_outputs=theta.shape[1]
+        self.predict_equations = LinearPredictorEquations()
 
-    def parameterAttributes(self):
-        return ["b","W"]
+    def compute_outputs(self,inputs):
+        return self.predict_equations.compute_outputs(inputs,self.theta)
+    def compute_errors(self,inputs,targets):
+        return self.predict_equations.compute_errors(self.compute_outputs(inputs),targets)
+    def compute_outputs_and_errors(self,inputs,targets):
+        outputs = self.compute_outputs(inputs)
+        return [outputs,self.predict_equations.compute_errors(outputs,targets)]
     
-    def defaultOutputFields(self, input_fields):
-        output_fields = ["output"]
-        if "target" in input_fields:
-            output_fields.append("squared_error")
-        return output_fields
+    def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
+        assert dataset.hasFields(["input"])
+        if output_fieldnames is None:
+            if dataset.hasFields(["target"]):
+                output_fieldnames = ["output","squared_error"]
+            else:
+                output_fieldnames = ["output"]
+        output_fieldnames.sort()
+        if output_fieldnames == ["squared_error"]:
+            f = self.compute_errors
+        elif output_fieldnames == ["output"]:
+            f = self.compute_outputs
+        elif output_fieldnames == ["output","squared_error"]:
+            f = self.compute_outputs_and_errors
+        else:
+            raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
         
-    def __init__(self):
-        self._input = t.matrix('input') # n_examples x n_inputs
-        self._target = t.matrix('target') # n_examples x n_outputs
-        self._L2_regularizer = as_scalar(0.,'L2_regularizer')
-        self._theta = t.matrix('theta')
-        self._W = self._theta[:,1:] 
-        self._b = self._theta[:,0]
-        self._XtX = t.matrix('XtX')
-        self._XtY = t.matrix('XtY')
-        self._extended_input = t.prepend_one_to_each_row(self._input)
-        self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
-        self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
-        self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
-        self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
-        self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
+        ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
+        if cached_output_dataset:
+            return CachedDataSet(ds)
+        else:
+            return ds
+        
 
-        MinibatchUpdatesTLearner.__init__(self)
-            
-    def allocate(self,minibatch):
-        minibatch_n_inputs  = minibatch["input"].shape[1]
-        minibatch_n_outputs = minibatch["target"].shape[1]
+        self._XtX = T.matrix('XtX')
+        self._XtY = T.matrix('XtY')
+        self._extended_input = T.prepend_one_to_each_row(self._input)
+        self._output = T.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
+        self._squared_error = T.sum_within_rows(T.sqr(self._output-self._target)) # (n_examples ) vector
+        self._regularizer = self._L2_regularizer * T.dot(self._W,self._W)
+        self._new_XtX = add_inplace(self._XtX,T.dot(self._extended_input.T,self._extended_input))
+        self._new_XtY = add_inplace(self._XtY,T.dot(self._extended_input.T,self._target))
+        self._new_theta = T.solve_inplace(self._theta,self._XtX,self._XtY)
+
+    def allocate(self,dataset):
+        dataset_n_inputs  = dataset["input"].shape[1]
+        dataset_n_outputs = dataset["target"].shape[1]
         if not self._n_inputs:
-            self._n_inputs = minibatch_n_inputs 
-            self._n_outputs = minibatch_n_outputs
+            self._n_inputs = dataset_n_inputs 
+            self._n_outputs = dataset_n_outputs
             self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
             self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
             self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
             self.forget()
-        elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
+        elif self._n_inputs!=dataset_n_inputs or self._n_outputs!=dataset_n_outputs:
             # if the input or target changes dimension on the fly, we resize and forget everything
             self.forget()
             
@@ -141,3 +168,6 @@
             self.XtY.data[:,:]=0
             numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
 
+    def __call__(self,dataset):
+
+