view linear_regression.py @ 376:c9a89be5cb0a

Redesigning linear_regression
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Mon, 07 Jul 2008 10:08:35 -0400
parents f6505ec32dc3
children 74b402b5a81b
line wrap: on
line source

"""
Implementation of linear regression, with or without L2 regularization.
This is one of the simplest example of L{learner}, and illustrates
the use of theano.
"""

from pylearn import OfflineLearningAlgorithm
from theano import tensor as T
from theano.scalar import as_scalar
from common.autoname import AutoName

class LinearRegression(OfflineLearningAlgorithm):
    """
    Implement linear regression, with or without L2 regularization
    (the former is called Ridge Regression and the latter Ordinary Least Squares).

    The predictor parameters are obtained analytically from the training set.
    Training can proceed sequentially (with multiple calls to update with
    different disjoint subsets of the training sets). After each call to
    update the predictor is ready to be used (and optimized for the union
    of all the training sets passed to update since construction or since
    the last call to forget).

    For each (input[t],output[t]) pair in a minibatch,::
    
       output_t = b + W * input_t

    where b and W are obtained by minimizing::

       L2_regularizer sum_{ij} W_{ij}^2  + sum_t ||output_t - target_t||^2

    Let X be the whole training set inputs matrix (one input example per row),
    with the first column full of 1's, and Let Y the whole training set
    targets matrix (one example's target vector per row).
    Let theta = the matrix with b in its first column and W in the others,
    then each theta[:,i] is the solution of the linear system::

       XtX * theta[:,i] = XtY[:,i]

    where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X
    plus L2_regularizer on the diagonal except at (0,0),
    and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y.

    The dataset fields expected and produced by the learning algorithm and the trained model
    are the following:

     - Input and output dataset fields (example-wise quantities):

       - 'input' (always expected as an input_dataset field)
       - 'target' (always expected by the learning algorithm, optional for learned model)
       - 'output' (always produced by learned model)
       - 'squared_error' (optionally produced by learned model if 'target' is provided)
          = example-wise squared error
    """
    def __init__(self, L2_regularizer=0):
        self.predictor = LinearPredictor(None,None
        self.L2_regularizer=L2_regularizer
        self._XtX = T.matrix('XtX')
        self._XtY = T.matrix('XtY')
        self._extended_input = T.prepend_one_to_each_row(self._input)

class LinearPredictorEquations(AutoName):
    inputs = T.matrix() # minibatchsize x n_inputs
    targets = T.matrix() # minibatchsize x n_outputs
    theta = T.matrix() # (n_inputs+1) x n_outputs
    b = theta[0]
    Wt = theta[1:,:]
    outputs = T.dot(inputs,Wt) + b # minibatchsize x n_outputs
    squared_errors = T.sum(T.sqr(targets-outputs),axis=1)

    __compiled = False
    @classmethod
    def compile(cls,linker='c|py'):
        if cls.__compiled:
            return
        def fn(input_vars,output_vars):
            return staticmethod(theano.function(input_vars,output_vars, linker=linker))

        cls.compute_outputs = fn([inputs,theta],[outputs])
        cls.compute_errors = fn([outputs,targets],[squared_errors])

        cls.__compiled = True

    def __init__(self)
        self.compile()
        
class LinearRegressionEquations(LinearPredictorEquations):
    P = LinearPredictorEquations
    XtX = T.matrix() # (n_inputs+1) x (n_inputs+1)
    XtY = T.matrix() # (n_inputs+1) x n_outputs
    extended_input = T.prepend_scalar_to_each_row(1,P.inputs)
    new_XtX = add_inplace(XtX,T.dot(extended_input.T,extended_input))
    new_XtY = add_inplace(XtY,T.dot(extended_input.T,P.targets))
    
class LinearPredictor(object):
    """
    A linear predictor has parameters theta (a bias vector and a weight matrix)
    it can use to make a linear prediction (according to the LinearPredictorEquations).
    It can compute its output (bias + weight * input) and a squared error (||output - target||^2).
    """
    def __init__(self, theta):
        self.theta=theta
        self.n_inputs=theta.shape[0]-1
        self.n_outputs=theta.shape[1]
        self.predict_equations = LinearPredictorEquations()

    def compute_outputs(self,inputs):
        return self.predict_equations.compute_outputs(inputs,self.theta)
    def compute_errors(self,inputs,targets):
        return self.predict_equations.compute_errors(self.compute_outputs(inputs),targets)
    def compute_outputs_and_errors(self,inputs,targets):
        outputs = self.compute_outputs(inputs)
        return [outputs,self.predict_equations.compute_errors(outputs,targets)]
    
    def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
        assert dataset.hasFields(["input"])
        if output_fieldnames is None:
            if dataset.hasFields(["target"]):
                output_fieldnames = ["output","squared_error"]
            else:
                output_fieldnames = ["output"]
        output_fieldnames.sort()
        if output_fieldnames == ["squared_error"]:
            f = self.compute_errors
        elif output_fieldnames == ["output"]:
            f = self.compute_outputs
        elif output_fieldnames == ["output","squared_error"]:
            f = self.compute_outputs_and_errors
        else:
            raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
        
        ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
        if cached_output_dataset:
            return CachedDataSet(ds)
        else:
            return ds
        

        self._XtX = T.matrix('XtX')
        self._XtY = T.matrix('XtY')
        self._extended_input = T.prepend_one_to_each_row(self._input)
        self._output = T.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
        self._squared_error = T.sum_within_rows(T.sqr(self._output-self._target)) # (n_examples ) vector
        self._regularizer = self._L2_regularizer * T.dot(self._W,self._W)
        self._new_XtX = add_inplace(self._XtX,T.dot(self._extended_input.T,self._extended_input))
        self._new_XtY = add_inplace(self._XtY,T.dot(self._extended_input.T,self._target))
        self._new_theta = T.solve_inplace(self._theta,self._XtX,self._XtY)

    def allocate(self,dataset):
        dataset_n_inputs  = dataset["input"].shape[1]
        dataset_n_outputs = dataset["target"].shape[1]
        if not self._n_inputs:
            self._n_inputs = dataset_n_inputs 
            self._n_outputs = dataset_n_outputs
            self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
            self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
            self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
            self.forget()
        elif self._n_inputs!=dataset_n_inputs or self._n_outputs!=dataset_n_outputs:
            # if the input or target changes dimension on the fly, we resize and forget everything
            self.forget()
            
    def forget(self):
        if self._n_inputs and self._n_outputs:
            self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
            self.XtY.resize((1+self.n_inputs,self.n_outputs))
            self.XtX.data[:,:]=0
            self.XtY.data[:,:]=0
            numpy.diag(self.XtX.data)[1:]=self.L2_regularizer

    def __call__(self,dataset):