# HG changeset patch # User Yoshua Bengio # Date 1216504666 14400 # Node ID e2b46a8f2b7b0f50051b0711ea25d4b1be6c9cb1 # Parent 0231eeac11c965bcd941f673c4b30d7ba8ff0879 Debugging kernel regression diff -r 0231eeac11c9 -r e2b46a8f2b7b kernel_regression.py --- a/kernel_regression.py Sat Jul 19 14:26:24 2008 -0400 +++ b/kernel_regression.py Sat Jul 19 17:57:46 2008 -0400 @@ -10,6 +10,11 @@ import theano import numpy +# map a N-vector to a 1xN matrix +row_vector = theano.elemwise.DimShuffle((False,),['x',0]) +# map a N-vector to a Nx1 matrix +col_vector = theano.elemwise.DimShuffle((False,),[0,'x']) + class KernelRegression(OfflineLearningAlgorithm): """ Implementation of kernel regression: @@ -90,17 +95,19 @@ n_outputs = first_example['target'].size M = numpy.zeros((n_examples+1,n_examples+1)) Y = numpy.zeros((n_examples+1,n_outputs)) - for i in xrange(n_inputs): + for i in xrange(n_examples): M[i+1,i+1]=self.L2_regularizer data = trainset.fields() train_inputs = numpy.array(data['input']) Y[0]=1 Y[1:,:] = numpy.array(data['target']) - train_inputs_square,sumG=self.equations.compute_system_matrix(train_inputs,M) + train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma) + M[1:,1:] += G M[0,1:] = sumG M[1:,0] = 1 M[0,0] = M.shape[0] - print M + self.M=M + self.Y=Y theta=numpy.linalg.solve(M,Y) return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square) @@ -115,7 +122,7 @@ b = theta[0] alpha = theta[1:,:] inputs_square = T.sum(inputs*inputs,axis=1) - Kx = T.exp(-(train_inputs_square-2*T.dot(inputs,train_inputs.T)+inputs_square)*inv_gamma2) + Kx = T.exp(-(row_vector(train_inputs_square)-2*T.dot(inputs,train_inputs.T)+col_vector(inputs_square))*inv_gamma2) outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs squared_errors = T.sum(T.sqr(targets-outputs),axis=1) @@ -136,12 +143,15 @@ self.compile() class KernelRegressionEquations(KernelPredictorEquations): - M = T.matrix() # (n_examples+1) x (n_examples+1) + #M = T.matrix() # (n_examples+1) x (n_examples+1) inputs = T.matrix() # n_examples x n_inputs - G = M[1:,1:] - new_G = T.gemm(G,1.,inputs,inputs.T,1.) - sumG = T.sum(new_G,axis=0) + gamma = T.scalar() + inv_gamma2 = 1./(gamma*gamma) inputs_square = T.sum(inputs*inputs,axis=1) + #new_G = G+T.dot(inputs,inputs.T) + #new_G = T.gemm(G,1.,inputs,inputs.T,1.) + G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2) + sumG = T.sum(G,axis=0) __compiled = False @@ -152,7 +162,7 @@ def fn(input_vars,output_vars): return staticmethod(theano.function(input_vars,output_vars, linker=linker)) - cls.compute_system_matrix = fn([cls.inputs,cls.M],[cls.inputs_square,cls.sumG]) + cls.compute_system_matrix = fn([cls.inputs,cls.gamma],[cls.inputs_square,cls.sumG,cls.G]) cls.__compiled = True @@ -165,12 +175,14 @@ it can use to make a non-linear prediction (according to the KernelPredictorEquations). It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2). """ - def __init__(self, theta, gamma, train_inputs, train_inputs_square): + def __init__(self, theta, gamma, train_inputs, train_inputs_square=None): self.theta=theta self.gamma=gamma self.train_inputs=train_inputs + if train_inputs_square==None: + train_inputs_square = numpy.sum(train_inputs*train_inputs,axis=1) self.train_inputs_square=train_inputs_square - self.equations = LinearPredictorEquations() + self.equations = KernelPredictorEquations() def compute_outputs(self,inputs): return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square)