Mercurial > pylearn
diff kernel_regression.py @ 421:e01f17be270a
Kernel regression learning algorithm
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Sat, 19 Jul 2008 10:11:22 -0400 |
parents | |
children | 32c5f87bc54e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kernel_regression.py Sat Jul 19 10:11:22 2008 -0400 @@ -0,0 +1,209 @@ +""" +Implementation of kernel regression: +""" + +from pylearn.learner import OfflineLearningAlgorithm +from theano import tensor as T +from nnet_ops import prepend_1_to_each_row +from theano.scalar import as_scalar +from common.autoname import AutoName +import theano +import numpy + +class KernelRegression(OfflineLearningAlgorithm): + """ +Implementation of kernel regression: +* the data are n (x_t,y_t) pairs and we want to estimate E[y|x] +* the predictor computes + f(x) = b + \sum_{t=1}^n \alpha_t K(x,x_t) + with free parameters b and alpha, training inputs x_t, + and kernel function K (gaussian by default). + Clearly, each prediction involves O(n) computations. +* the learner chooses b and alpha to minimize + lambda alpha' G' G alpha + \sum_{t=1}^n (f(x_t)-y_t)^2 + where G is the matrix with entries G_ij = K(x_i,x_j). + The first (L2 regularization) term is the squared L2 + norm of the primal weights w = \sum_t \alpha_t phi(x_t) + where phi is the function s.t. K(u,v)=phi(u).phi(v). +* this involves solving a linear system with (n+1,n+1) + matrix, which is an O(n^3) computation. In addition, + that linear system matrix requires O(n^2) memory. + So this learning algorithm should be used only for + small datasets. +* the linear system is + (M + lambda I_n) theta = (1, y)' + where theta = (b, alpha), I_n is the (n+1)x(n+1) matrix that is the identity + except with a 0 at (0,0), M is the matrix with G in the sub-matrix starting + at (1,1), 1's in column 0, except for a value of n at (0,0), and sum_i G_{i,j} + in the rest of row 0. + +Note that this is gives an estimate of E[y|x,training_set] that is the +same as obtained with a Gaussian process regression. The GP +regression would also provide a Bayesian Var[y|x,training_set]. +It corresponds to an assumption that f is a random variable +with Gaussian (process) prior distribution with covariance +function K. Because we assume Gaussian noise we obtain a Gaussian +posterior for f (whose mean is computed here). + + + Usage: + + kernel_regressor=KernelRegression(L2_regularizer=0.1,kernel=GaussianKernel(gamma=0.5)) + kernel_predictor=kernel_regressor(training_set) + all_results_dataset=kernel_predictor(test_set) # creates a dataset with "output" and "squared_error" field + outputs = kernel_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays + outputs, errors = kernel_predictor.compute_outputs_and_errors(inputs,targets) + errors = kernel_predictor.compute_errors(inputs,targets) + mse = kernel_predictor.compute_mse(inputs,targets) + + + + The training_set must have fields "input" and "target". + The test_set must have field "input", and needs "target" if + we want to compute the squared errors. + + The predictor parameters are obtained analytically from the training set. + Training is only done on a whole training set rather than on minibatches + (no online implementation). + + The dataset fields expected and produced by the learning algorithm and the trained model + are the following: + + - Input and output dataset fields (example-wise quantities): + + - 'input' (always expected as an input_dataset field) + - 'target' (always expected by the learning algorithm, optional for learned model) + - 'output' (always produced by learned model) + - 'squared_error' (optionally produced by learned model if 'target' is provided) + = example-wise squared error + """ + def __init__(self, kernel=None, L2_regularizer=0, gamma=1): + self.kernel = kernel + self.L2_regularizer=L2_regularizer + self.gamma = gamma # until we fix things, the kernel type is fixed, Gaussian + self.equations = KernelRegressionEquations() + + def __call__(self,trainset): + n_examples = len(trainset) + first_example = trainset[0] + n_inputs = first_example['input'].size + n_outputs = first_example['target'].size + M = numpy.zeros((n_examples+1,n_examples+1)) + Y = numpy.zeros((n_examples+1,n_outputs)) + for i in xrange(n_inputs): + M[i+1,i+1]=self.L2_regularizer + data = trainset.fields() + train_inputs = numpy.array(data['input']) + Y[0]=1 + Y[1:,:] = numpy.array(data['target']) + M,train_inputs_square=self.equations.compute_system_matrix(train_inputs,M) + theta=numpy.linalg.solve(M,Y) + return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square) + +class KernelPredictorEquations(AutoName): + train_inputs = T.matrix() # n_examples x n_inputs + train_inputs_square = T.vector() # n_examples + inputs = T.matrix() # minibatchsize x n_inputs + targets = T.matrix() # minibatchsize x n_outputs + theta = T.matrix() # (n_examples+1) x n_outputs + gamma = T.scalar() + inv_gamma2 = 1./(gamma*gamma) + b = theta[0] + alpha = theta[1:,:] + inputs_square = T.sum(inputs*inputs,axis=1) + Kx = exp(-(train_inputs_square-2*dot(inputs,train_inputs.T)+inputs_square)*inv_gamma2) + outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs + squared_errors = T.sum(T.sqr(targets-outputs),axis=1) + + __compiled = False + @classmethod + def compile(cls,linker='c|py'): + if cls.__compiled: + return + def fn(input_vars,output_vars): + return staticmethod(theano.function(input_vars,output_vars, linker=linker)) + + cls.compute_outputs = fn([cls.inputs,cls.theta,cls.gamma,cls.train_inputs,cls.train_inputs_square],[cls.outputs]) + cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors]) + + cls.__compiled = True + + def __init__(self): + self.compile() + +class KernelRegressionEquations(KernelPredictorEquations): + # P = KernelPredictorEquations + M = T.matrix() # (n_examples+1) x (n_examples+1) + inputs = T.matrix() # n_examples x n_inputs + G = M[1:,1:] + new_G = gemm(G,1.,inputs,inputs.T,1.) + M2 = T.add_inplace(M,new_G) + M2[0,0] = M.shape[0] + M2[1:,0] = 1 + M2[0,1:] = T.sum(G,axis=0) + inputs_square = T.sum(inputs*inputs,axis=1) + + __compiled = False + + @classmethod + def compile(cls,linker='c|py'): + if cls.__compiled: + return + def fn(input_vars,output_vars): + return staticmethod(theano.function(input_vars,output_vars, linker=linker)) + + cls.compute_system_matrix = fn([cls.inputs,cls.M],[cls.M2,cls.inputs_square]) + + cls.__compiled = True + + def __init__(self): + self.compile() + +class KernelPredictor(object): + """ + A kernel predictor has parameters theta (a bias vector and a weight matrix alpha) + it can use to make a non-linear prediction (according to the KernelPredictorEquations). + It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2). + """ + def __init__(self, theta, gamma, train_inputs, train_inputs_square): + self.theta=theta + self.gamma=gamma + self.train_inputs=train_inputs + self.train_inputs_square=train_inputs_square + self.equations = LinearPredictorEquations() + + def compute_outputs(self,inputs): + return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square) + def compute_errors(self,inputs,targets): + return self.equations.compute_errors(self.compute_outputs(inputs),targets) + def compute_outputs_and_errors(self,inputs,targets): + outputs = self.compute_outputs(inputs) + return [outputs,self.equations.compute_errors(outputs,targets)] + def compute_mse(self,inputs,targets): + errors = self.compute_errors(inputs,targets) + return numpy.sum(errors)/errors.size + + def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): + assert dataset.hasFields(["input"]) + if output_fieldnames is None: + if dataset.hasFields(["target"]): + output_fieldnames = ["output","squared_error"] + else: + output_fieldnames = ["output"] + output_fieldnames.sort() + if output_fieldnames == ["squared_error"]: + f = self.compute_errors + elif output_fieldnames == ["output"]: + f = self.compute_outputs + elif output_fieldnames == ["output","squared_error"]: + f = self.compute_outputs_and_errors + else: + raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames)) + + ds=ApplyFunctionDataSet(dataset,f,output_fieldnames) + if cached_output_dataset: + return CachedDataSet(ds) + else: + return ds + +