comparison kernel_regression.py @ 425:e2b46a8f2b7b

Debugging kernel regression
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Sat, 19 Jul 2008 17:57:46 -0400
parents 32c5f87bc54e
children d7611a3811f2
comparison
equal deleted inserted replaced
424:0231eeac11c9 425:e2b46a8f2b7b
7 from nnet_ops import prepend_1_to_each_row 7 from nnet_ops import prepend_1_to_each_row
8 from theano.scalar import as_scalar 8 from theano.scalar import as_scalar
9 from common.autoname import AutoName 9 from common.autoname import AutoName
10 import theano 10 import theano
11 import numpy 11 import numpy
12
13 # map a N-vector to a 1xN matrix
14 row_vector = theano.elemwise.DimShuffle((False,),['x',0])
15 # map a N-vector to a Nx1 matrix
16 col_vector = theano.elemwise.DimShuffle((False,),[0,'x'])
12 17
13 class KernelRegression(OfflineLearningAlgorithm): 18 class KernelRegression(OfflineLearningAlgorithm):
14 """ 19 """
15 Implementation of kernel regression: 20 Implementation of kernel regression:
16 * the data are n (x_t,y_t) pairs and we want to estimate E[y|x] 21 * the data are n (x_t,y_t) pairs and we want to estimate E[y|x]
88 first_example = trainset[0] 93 first_example = trainset[0]
89 n_inputs = first_example['input'].size 94 n_inputs = first_example['input'].size
90 n_outputs = first_example['target'].size 95 n_outputs = first_example['target'].size
91 M = numpy.zeros((n_examples+1,n_examples+1)) 96 M = numpy.zeros((n_examples+1,n_examples+1))
92 Y = numpy.zeros((n_examples+1,n_outputs)) 97 Y = numpy.zeros((n_examples+1,n_outputs))
93 for i in xrange(n_inputs): 98 for i in xrange(n_examples):
94 M[i+1,i+1]=self.L2_regularizer 99 M[i+1,i+1]=self.L2_regularizer
95 data = trainset.fields() 100 data = trainset.fields()
96 train_inputs = numpy.array(data['input']) 101 train_inputs = numpy.array(data['input'])
97 Y[0]=1 102 Y[0]=1
98 Y[1:,:] = numpy.array(data['target']) 103 Y[1:,:] = numpy.array(data['target'])
99 train_inputs_square,sumG=self.equations.compute_system_matrix(train_inputs,M) 104 train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma)
105 M[1:,1:] += G
100 M[0,1:] = sumG 106 M[0,1:] = sumG
101 M[1:,0] = 1 107 M[1:,0] = 1
102 M[0,0] = M.shape[0] 108 M[0,0] = M.shape[0]
103 print M 109 self.M=M
110 self.Y=Y
104 theta=numpy.linalg.solve(M,Y) 111 theta=numpy.linalg.solve(M,Y)
105 return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square) 112 return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square)
106 113
107 class KernelPredictorEquations(AutoName): 114 class KernelPredictorEquations(AutoName):
108 train_inputs = T.matrix() # n_examples x n_inputs 115 train_inputs = T.matrix() # n_examples x n_inputs
113 gamma = T.scalar() 120 gamma = T.scalar()
114 inv_gamma2 = 1./(gamma*gamma) 121 inv_gamma2 = 1./(gamma*gamma)
115 b = theta[0] 122 b = theta[0]
116 alpha = theta[1:,:] 123 alpha = theta[1:,:]
117 inputs_square = T.sum(inputs*inputs,axis=1) 124 inputs_square = T.sum(inputs*inputs,axis=1)
118 Kx = T.exp(-(train_inputs_square-2*T.dot(inputs,train_inputs.T)+inputs_square)*inv_gamma2) 125 Kx = T.exp(-(row_vector(train_inputs_square)-2*T.dot(inputs,train_inputs.T)+col_vector(inputs_square))*inv_gamma2)
119 outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs 126 outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs
120 squared_errors = T.sum(T.sqr(targets-outputs),axis=1) 127 squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
121 128
122 __compiled = False 129 __compiled = False
123 @classmethod 130 @classmethod
134 141
135 def __init__(self): 142 def __init__(self):
136 self.compile() 143 self.compile()
137 144
138 class KernelRegressionEquations(KernelPredictorEquations): 145 class KernelRegressionEquations(KernelPredictorEquations):
139 M = T.matrix() # (n_examples+1) x (n_examples+1) 146 #M = T.matrix() # (n_examples+1) x (n_examples+1)
140 inputs = T.matrix() # n_examples x n_inputs 147 inputs = T.matrix() # n_examples x n_inputs
141 G = M[1:,1:] 148 gamma = T.scalar()
142 new_G = T.gemm(G,1.,inputs,inputs.T,1.) 149 inv_gamma2 = 1./(gamma*gamma)
143 sumG = T.sum(new_G,axis=0)
144 inputs_square = T.sum(inputs*inputs,axis=1) 150 inputs_square = T.sum(inputs*inputs,axis=1)
151 #new_G = G+T.dot(inputs,inputs.T)
152 #new_G = T.gemm(G,1.,inputs,inputs.T,1.)
153 G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2)
154 sumG = T.sum(G,axis=0)
145 155
146 __compiled = False 156 __compiled = False
147 157
148 @classmethod 158 @classmethod
149 def compile(cls,linker='c|py'): 159 def compile(cls,linker='c|py'):
150 if cls.__compiled: 160 if cls.__compiled:
151 return 161 return
152 def fn(input_vars,output_vars): 162 def fn(input_vars,output_vars):
153 return staticmethod(theano.function(input_vars,output_vars, linker=linker)) 163 return staticmethod(theano.function(input_vars,output_vars, linker=linker))
154 164
155 cls.compute_system_matrix = fn([cls.inputs,cls.M],[cls.inputs_square,cls.sumG]) 165 cls.compute_system_matrix = fn([cls.inputs,cls.gamma],[cls.inputs_square,cls.sumG,cls.G])
156 166
157 cls.__compiled = True 167 cls.__compiled = True
158 168
159 def __init__(self): 169 def __init__(self):
160 self.compile() 170 self.compile()
163 """ 173 """
164 A kernel predictor has parameters theta (a bias vector and a weight matrix alpha) 174 A kernel predictor has parameters theta (a bias vector and a weight matrix alpha)
165 it can use to make a non-linear prediction (according to the KernelPredictorEquations). 175 it can use to make a non-linear prediction (according to the KernelPredictorEquations).
166 It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2). 176 It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2).
167 """ 177 """
168 def __init__(self, theta, gamma, train_inputs, train_inputs_square): 178 def __init__(self, theta, gamma, train_inputs, train_inputs_square=None):
169 self.theta=theta 179 self.theta=theta
170 self.gamma=gamma 180 self.gamma=gamma
171 self.train_inputs=train_inputs 181 self.train_inputs=train_inputs
182 if train_inputs_square==None:
183 train_inputs_square = numpy.sum(train_inputs*train_inputs,axis=1)
172 self.train_inputs_square=train_inputs_square 184 self.train_inputs_square=train_inputs_square
173 self.equations = LinearPredictorEquations() 185 self.equations = KernelPredictorEquations()
174 186
175 def compute_outputs(self,inputs): 187 def compute_outputs(self,inputs):
176 return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square) 188 return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square)
177 def compute_errors(self,inputs,targets): 189 def compute_errors(self,inputs,targets):
178 return self.equations.compute_errors(self.compute_outputs(inputs),targets) 190 return self.equations.compute_errors(self.compute_outputs(inputs),targets)