pylearn: linear_regression.py comparison

comparison linear_regression.py @ 385:db28ff3fb887

merge

author	Joseph Turian <turian@gmail.com>
date	Tue, 08 Jul 2008 02:00:14 -0400
parents	74b402b5a81b
children	efb797c5efc0

comparison

equal deleted inserted replaced

-:edec18614a70
+:db28ff3fb887
 Implementation of linear regression, with or without L2 regularization.
 This is one of the simplest example of L{learner}, and illustrates
 the use of theano.
 """
-from learner import *
+from pylearn import OfflineLearningAlgorithm
-from theano import tensor as t
+from theano import tensor as T
 from theano.scalar import as_scalar
+from common.autoname import AutoName
-class LinearRegression(MinibatchUpdatesTLearner):
+class LinearRegression(OfflineLearningAlgorithm):
 """
 Implement linear regression, with or without L2 regularization
 (the former is called Ridge Regression and the latter Ordinary Least Squares).
 The predictor parameters are obtained analytically from the training set.
 where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X
 plus L2_regularizer on the diagonal except at (0,0),
 and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y.
-The fields and attributes expected and produced by use and update are the following:
+The dataset fields expected and produced by the learning algorithm and the trained model
+are the following:
-- Input and output fields (example-wise quantities):
+- Input and output dataset fields (example-wise quantities):
-- 'input' (always expected by use and update as an input_dataset field)
+- 'input' (always expected as an input_dataset field)
-- 'target' (optionally expected by use and update as an input_dataset field)
+- 'target' (always expected by the learning algorithm, optional for learned model)
-- 'output' (optionally produced by use as an output dataset field)
+- 'output' (always produced by learned model)
-- 'squared_error' (optionally produced by use as an output dataset field, needs 'target') = example-wise squared error
+- 'squared_error' (optionally produced by learned model if 'target' is provided)
+= example-wise squared error
+"""
+def __init__(self, L2_regularizer=0):
+self.predictor = LinearPredictor(None,None
+self.L2_regularizer=L2_regularizer
+self._XtX = T.matrix('XtX')
+self._XtY = T.matrix('XtY')
+self._extended_input = T.prepend_one_to_each_row(self._input)
-- optional attributes (optionally expected as input_dataset attributes)
+class LinearPredictorEquations(AutoName):
-(warning, this may be dangerous, the 'use' method will use those provided in the
+inputs = T.matrix() # minibatchsize x n_inputs
-input_dataset rather than those learned during 'update'; currently no support
+targets = T.matrix() # minibatchsize x n_outputs
-for providing these to update):
+theta = T.matrix() # (n_inputs+1) x n_outputs
+b = theta[0]
-- 'L2_regularizer'
+Wt = theta[1:,:]
-- 'b'
+outputs = T.dot(inputs,Wt) + b # minibatchsize x n_outputs
-- 'W'
+squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
-- 'parameters' = [b, W]
-- 'regularization_term'
-- 'XtX'
-- 'XtY'
+__compiled = False
+@classmethod
+def compile(cls,linker='c|py'):
+if cls.__compiled:
+return
+def fn(input_vars,output_vars):
+return staticmethod(theano.function(input_vars,output_vars, linker=linker))
+cls.compute_outputs = fn([inputs,theta],[outputs])
+cls.compute_errors = fn([outputs,targets],[squared_errors])
+cls.__compiled = True
+def __init__(self)
+self.compile()
+class LinearRegressionEquations(LinearPredictorEquations):
+P = LinearPredictorEquations
+XtX = T.matrix() # (n_inputs+1) x (n_inputs+1)
+XtY = T.matrix() # (n_inputs+1) x n_outputs
+extended_input = T.prepend_scalar_to_each_row(1.,P.inputs)
+new_XtX = add_inplace(XtX,T.dot(extended_input.T,extended_input))
+new_XtY = add_inplace(XtY,T.dot(extended_input.T,P.targets))
+new_theta = T.Cholesky_solve_inplace(P.theta,XtX,XtY)  # solve linear system XtX theta = XtY
+class LinearPredictor(object):
 """
+A linear predictor has parameters theta (a bias vector and a weight matrix)
+it can use to make a linear prediction (according to the LinearPredictorEquations).
+It can compute its output (bias + weight * input) and a squared error (||output - target||^2).
+"""
+def __init__(self, theta):
+self.theta=theta
+self.n_inputs=theta.shape[0]-1
+self.n_outputs=theta.shape[1]
+self.predict_equations = LinearPredictorEquations()
-def attributeNames(self):
+def compute_outputs(self,inputs):
-return ["L2_regularizer","parameters","b","W","regularization_term","XtX","XtY"]
+return self.predict_equations.compute_outputs(inputs,self.theta)
+def compute_errors(self,inputs,targets):
+return self.predict_equations.compute_errors(self.compute_outputs(inputs),targets)
+def compute_outputs_and_errors(self,inputs,targets):
+outputs = self.compute_outputs(inputs)
+return [outputs,self.predict_equations.compute_errors(outputs,targets)]
+def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
+assert dataset.hasFields(["input"])
+if output_fieldnames is None:
+if dataset.hasFields(["target"]):
+output_fieldnames = ["output","squared_error"]
+else:
+output_fieldnames = ["output"]
+output_fieldnames.sort()
+if output_fieldnames == ["squared_error"]:
+f = self.compute_errors
+elif output_fieldnames == ["output"]:
+f = self.compute_outputs
+elif output_fieldnames == ["output","squared_error"]:
+f = self.compute_outputs_and_errors
+else:
+raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
+ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
+if cached_output_dataset:
+return CachedDataSet(ds)
+else:
+return ds
-def useInputAttributes(self):
+self._XtX = T.matrix('XtX')
-return ["b","W"]
+self._XtY = T.matrix('XtY')
+self._extended_input = T.prepend_one_to_each_row(self._input)
+self._output = T.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
+self._squared_error = T.sum_within_rows(T.sqr(self._output-self._target)) # (n_examples ) vector
+self._regularizer = self._L2_regularizer * T.dot(self._W,self._W)
+self._new_XtX = add_inplace(self._XtX,T.dot(self._extended_input.T,self._extended_input))
+self._new_XtY = add_inplace(self._XtY,T.dot(self._extended_input.T,self._target))
+self._new_theta = T.solve_inplace(self._theta,self._XtX,self._XtY)
-def useOutputAttributes(self):
+def allocate(self,dataset):
-return []
+dataset_n_inputs  = dataset["input"].shape[1]
+dataset_n_outputs = dataset["target"].shape[1]
-def updateInputAttributes(self):
-return ["L2_regularizer","XtX","XtY"]
-def updateMinibatchInputFields(self):
-return ["input","target"]
-def updateMinibatchInputAttributes(self):
-return ["XtX","XtY"]
-def updateMinibatchOutputAttributes(self):
-return ["new_XtX","new_XtY"]
-def updateEndInputAttributes(self):
-return ["theta","XtX","XtY"]
-def updateEndOutputAttributes(self):
-return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ?
-def parameterAttributes(self):
-return ["b","W"]
-def defaultOutputFields(self, input_fields):
-output_fields = ["output"]
-if "target" in input_fields:
-output_fields.append("squared_error")
-return output_fields
-def __init__(self):
-self._input = t.matrix('input') # n_examples x n_inputs
-self._target = t.matrix('target') # n_examples x n_outputs
-self._L2_regularizer = as_scalar(0.,'L2_regularizer')
-self._theta = t.matrix('theta')
-self._W = self._theta[:,1:]
-self._b = self._theta[:,0]
-self._XtX = t.matrix('XtX')
-self._XtY = t.matrix('XtY')
-self._extended_input = t.prepend_one_to_each_row(self._input)
-self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
-self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
-self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
-self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
-self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
-MinibatchUpdatesTLearner.__init__(self)
-def allocate(self,minibatch):
-minibatch_n_inputs  = minibatch["input"].shape[1]
-minibatch_n_outputs = minibatch["target"].shape[1]
 if not self._n_inputs:
-self._n_inputs = minibatch_n_inputs
+self._n_inputs = dataset_n_inputs
-self._n_outputs = minibatch_n_outputs
+self._n_outputs = dataset_n_outputs
 self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
 self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
 self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
 self.forget()
-elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
+elif self._n_inputs!=dataset_n_inputs or self._n_outputs!=dataset_n_outputs:
 # if the input or target changes dimension on the fly, we resize and forget everything
 self.forget()
 def forget(self):
 if self._n_inputs and self._n_outputs:
 self.XtY.resize((1+self.n_inputs,self.n_outputs))
 self.XtX.data[:,:]=0
 self.XtY.data[:,:]=0
 numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
+def __call__(self,dataset):

Mercurial > pylearn

comparison linear_regression.py @ 385:db28ff3fb887