Mercurial > pylearn
view linear_regression.py @ 472:69c800af1370
changed weight initialization for logistic regression
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Thu, 23 Oct 2008 13:26:42 -0400 |
parents | 8e4d2ebd816a |
children | 111e547ffa7b |
line wrap: on
line source
""" Implementation of linear regression, with or without L2 regularization. This is one of the simplest example of L{learner}, and illustrates the use of theano. """ from pylearn.learner import OfflineLearningAlgorithm,OnlineLearningAlgorithm from theano import tensor as T from nnet_ops import prepend_1_to_each_row from theano.scalar import as_scalar from common.autoname import AutoName import theano import numpy class LinearRegression(OfflineLearningAlgorithm): """ Implement linear regression, with or without L2 regularization (the former is called Ridge Regression and the latter Ordinary Least Squares). Usage: linear_regressor=LinearRegression(L2_regularizer=0.1) linear_predictor=linear_regression(training_set) all_results_dataset=linear_predictor(test_set) # creates a dataset with "output" and "squared_error" field outputs = linear_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays outputs, errors = linear_predictor.compute_outputs_and_errors(inputs,targets) errors = linear_predictor.compute_errors(inputs,targets) mse = linear_predictor.compute_mse(inputs,targets) The training_set must have fields "input" and "target". The test_set must have field "input", and needs "target" if we want to compute the squared errors. The predictor parameters are obtained analytically from the training set. For each (input[t],output[t]) pair in a minibatch,:: output_t = b + W * input_t where b and W are obtained by minimizing:: L2_regularizer sum_{ij} W_{ij}^2 + sum_t ||output_t - target_t||^2 Let X be the whole training set inputs matrix (one input example per row), with the first column full of 1's, and Let Y the whole training set targets matrix (one example's target vector per row). Let theta = the matrix with b in its first column and W in the others, then each theta[:,i] is the solution of the linear system:: XtX * theta[:,i] = XtY[:,i] where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X plus L2_regularizer on the diagonal except at (0,0), and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. The dataset fields expected and produced by the learning algorithm and the trained model are the following: - Input and output dataset fields (example-wise quantities): - 'input' (always expected as an input_dataset field) - 'target' (always expected by the learning algorithm, optional for learned model) - 'output' (always produced by learned model) - 'squared_error' (optionally produced by learned model if 'target' is provided) = example-wise squared error """ def __init__(self, L2_regularizer=0,minibatch_size=10000): self.L2_regularizer=L2_regularizer self.equations = LinearRegressionEquations() self.minibatch_size=minibatch_size def __call__(self,trainset): first_example = trainset[0] n_inputs = first_example['input'].size n_outputs = first_example['target'].size XtX = numpy.zeros((n_inputs+1,n_inputs+1)) XtY = numpy.zeros((n_inputs+1,n_outputs)) for i in xrange(n_inputs): XtX[i+1,i+1]=self.L2_regularizer mbs=min(self.minibatch_size,len(trainset)) for inputs,targets in trainset.minibatches(["input","target"],minibatch_size=mbs): XtX,XtY=self.equations.update(XtX,XtY,numpy.array(inputs),numpy.array(targets)) theta=numpy.linalg.solve(XtX,XtY) return LinearPredictor(theta) class LinearPredictorEquations(AutoName): inputs = T.matrix() # minibatchsize x n_inputs targets = T.matrix() # minibatchsize x n_outputs theta = T.matrix() # (n_inputs+1) x n_outputs b = theta[0] Wt = theta[1:,:] outputs = T.dot(inputs,Wt) + b # minibatchsize x n_outputs squared_errors = T.sum(T.sqr(targets-outputs),axis=1) __compiled = False @classmethod def compile(cls,linker='c|py'): if cls.__compiled: return def fn(input_vars,output_vars): return staticmethod(theano.function(input_vars,output_vars, linker=linker)) cls.compute_outputs = fn([cls.inputs,cls.theta],[cls.outputs]) cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors]) cls.__compiled = True def __init__(self): self.compile() class LinearRegressionEquations(LinearPredictorEquations): P = LinearPredictorEquations XtX = T.matrix() # (n_inputs+1) x (n_inputs+1) XtY = T.matrix() # (n_inputs+1) x n_outputs extended_input = prepend_1_to_each_row(P.inputs) new_XtX = T.add_inplace(XtX,T.dot(extended_input.T,extended_input)) new_XtY = T.add_inplace(XtY,T.dot(extended_input.T,P.targets)) __compiled = False @classmethod def compile(cls,linker='c|py'): if cls.__compiled: return def fn(input_vars,output_vars): return staticmethod(theano.function(input_vars,output_vars, linker=linker)) cls.update = fn([cls.XtX,cls.XtY,cls.P.inputs,cls.P.targets],[cls.new_XtX,cls.new_XtY]) cls.__compiled = True def __init__(self): self.compile() class LinearPredictor(object): """ A linear predictor has parameters theta (a bias vector and a weight matrix) it can use to make a linear prediction (according to the LinearPredictorEquations). It can compute its output (bias + weight * input) and a squared error (||output - target||^2). """ def __init__(self, theta): self.theta=theta self.n_inputs=theta.shape[0]-1 self.n_outputs=theta.shape[1] self.equations = LinearPredictorEquations() def compute_outputs(self,inputs): return self.equations.compute_outputs(inputs,self.theta) def compute_errors(self,inputs,targets): return self.equations.compute_errors(self.compute_outputs(inputs),targets) def compute_outputs_and_errors(self,inputs,targets): outputs = self.compute_outputs(inputs) return [outputs,self.equations.compute_errors(outputs,targets)] def compute_mse(self,inputs,targets): errors = self.compute_errors(inputs,targets) return numpy.sum(errors)/errors.size def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): assert dataset.hasFields(["input"]) if output_fieldnames is None: if dataset.hasFields(["target"]): output_fieldnames = ["output","squared_error"] else: output_fieldnames = ["output"] output_fieldnames.sort() if output_fieldnames == ["squared_error"]: f = self.compute_errors elif output_fieldnames == ["output"]: f = self.compute_outputs elif output_fieldnames == ["output","squared_error"]: f = self.compute_outputs_and_errors else: raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames)) ds=ApplyFunctionDataSet(dataset,f,output_fieldnames) if cached_output_dataset: return CachedDataSet(ds) else: return ds def linear_predictor(inputs,params,*otherargs): p = LinearPredictor(params) return p.compute_outputs(inputs) #TODO : an online version class OnlineLinearRegression(OnlineLearningAlgorithm): """ Training can proceed sequentially (with multiple calls to update with different disjoint subsets of the training sets). After each call to update the predictor is ready to be used (and optimized for the union of all the training sets passed to update since construction or since the last call to forget). """ pass