diff kernel_regression.py @ 425:e2b46a8f2b7b

Debugging kernel regression
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Sat, 19 Jul 2008 17:57:46 -0400
parents 32c5f87bc54e
children d7611a3811f2
line wrap: on
line diff
--- a/kernel_regression.py	Sat Jul 19 14:26:24 2008 -0400
+++ b/kernel_regression.py	Sat Jul 19 17:57:46 2008 -0400
@@ -10,6 +10,11 @@
 import theano
 import numpy
 
+# map a N-vector to a 1xN matrix
+row_vector = theano.elemwise.DimShuffle((False,),['x',0])
+# map a N-vector to a Nx1 matrix
+col_vector = theano.elemwise.DimShuffle((False,),[0,'x'])
+
 class KernelRegression(OfflineLearningAlgorithm):
     """
 Implementation of kernel regression:
@@ -90,17 +95,19 @@
         n_outputs = first_example['target'].size
         M = numpy.zeros((n_examples+1,n_examples+1))
         Y = numpy.zeros((n_examples+1,n_outputs))
-        for i in xrange(n_inputs):
+        for i in xrange(n_examples):
             M[i+1,i+1]=self.L2_regularizer
         data = trainset.fields()
         train_inputs = numpy.array(data['input'])
         Y[0]=1
         Y[1:,:] = numpy.array(data['target'])
-        train_inputs_square,sumG=self.equations.compute_system_matrix(train_inputs,M)
+        train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma)
+        M[1:,1:] += G 
         M[0,1:] = sumG
         M[1:,0] = 1
         M[0,0] = M.shape[0]
-        print M
+        self.M=M
+        self.Y=Y
         theta=numpy.linalg.solve(M,Y)
         return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square)
 
@@ -115,7 +122,7 @@
     b = theta[0]
     alpha = theta[1:,:]
     inputs_square = T.sum(inputs*inputs,axis=1)
-    Kx = T.exp(-(train_inputs_square-2*T.dot(inputs,train_inputs.T)+inputs_square)*inv_gamma2)
+    Kx = T.exp(-(row_vector(train_inputs_square)-2*T.dot(inputs,train_inputs.T)+col_vector(inputs_square))*inv_gamma2)
     outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs
     squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
 
@@ -136,12 +143,15 @@
         self.compile()
         
 class KernelRegressionEquations(KernelPredictorEquations):
-    M = T.matrix() # (n_examples+1) x (n_examples+1)
+    #M = T.matrix() # (n_examples+1) x (n_examples+1)
     inputs = T.matrix() # n_examples x n_inputs
-    G = M[1:,1:]
-    new_G = T.gemm(G,1.,inputs,inputs.T,1.)
-    sumG = T.sum(new_G,axis=0)
+    gamma = T.scalar()
+    inv_gamma2 = 1./(gamma*gamma)
     inputs_square = T.sum(inputs*inputs,axis=1)
+    #new_G = G+T.dot(inputs,inputs.T)
+    #new_G = T.gemm(G,1.,inputs,inputs.T,1.)
+    G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2)
+    sumG = T.sum(G,axis=0)
     
     __compiled = False
     
@@ -152,7 +162,7 @@
         def fn(input_vars,output_vars):
             return staticmethod(theano.function(input_vars,output_vars, linker=linker))
 
-        cls.compute_system_matrix = fn([cls.inputs,cls.M],[cls.inputs_square,cls.sumG])
+        cls.compute_system_matrix = fn([cls.inputs,cls.gamma],[cls.inputs_square,cls.sumG,cls.G])
 
         cls.__compiled = True
 
@@ -165,12 +175,14 @@
     it can use to make a non-linear prediction (according to the KernelPredictorEquations).
     It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2).
     """
-    def __init__(self, theta, gamma, train_inputs, train_inputs_square):
+    def __init__(self, theta, gamma, train_inputs, train_inputs_square=None):
         self.theta=theta
         self.gamma=gamma
         self.train_inputs=train_inputs
+        if train_inputs_square==None:
+            train_inputs_square = numpy.sum(train_inputs*train_inputs,axis=1)
         self.train_inputs_square=train_inputs_square
-        self.equations = LinearPredictorEquations()
+        self.equations = KernelPredictorEquations()
 
     def compute_outputs(self,inputs):
         return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square)