comparison pylearn/algorithms/kernel_regression.py @ 1505:723e2d761985

auto white space fix.
author Frederic Bastien <nouiz@nouiz.org>
date Mon, 12 Sep 2011 10:49:15 -0400
parents bf5c0f797161
children
comparison
equal deleted inserted replaced
1504:bf5c0f797161 1505:723e2d761985
35 that linear system matrix requires O(n^2) memory. 35 that linear system matrix requires O(n^2) memory.
36 So this learning algorithm should be used only for 36 So this learning algorithm should be used only for
37 small datasets. 37 small datasets.
38 * the linear system is 38 * the linear system is
39 (M + lambda I_n) theta = (1, y)' 39 (M + lambda I_n) theta = (1, y)'
40 where theta = (b, alpha), I_n is the (n+1)x(n+1) matrix that is the identity 40 where theta = (b, alpha), I_n is the (n+1)x(n+1) matrix that is the identity
41 except with a 0 at (0,0), M is the matrix with G in the sub-matrix starting 41 except with a 0 at (0,0), M is the matrix with G in the sub-matrix starting
42 at (1,1), 1's in column 0, except for a value of n at (0,0), and sum_i G_{i,j} 42 at (1,1), 1's in column 0, except for a value of n at (0,0), and sum_i G_{i,j}
43 in the rest of row 0. 43 in the rest of row 0.
44 44
45 Note that this is gives an estimate of E[y|x,training_set] that is the 45 Note that this is gives an estimate of E[y|x,training_set] that is the
46 same as obtained with a Gaussian process regression. The GP 46 same as obtained with a Gaussian process regression. The GP
47 regression would also provide a Bayesian Var[y|x,training_set]. 47 regression would also provide a Bayesian Var[y|x,training_set].
48 It corresponds to an assumption that f is a random variable 48 It corresponds to an assumption that f is a random variable
49 with Gaussian (process) prior distribution with covariance 49 with Gaussian (process) prior distribution with covariance
58 all_results_dataset=kernel_predictor(test_set) # creates a dataset with "output" and "squared_error" field 58 all_results_dataset=kernel_predictor(test_set) # creates a dataset with "output" and "squared_error" field
59 outputs = kernel_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays 59 outputs = kernel_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays
60 outputs, errors = kernel_predictor.compute_outputs_and_errors(inputs,targets) 60 outputs, errors = kernel_predictor.compute_outputs_and_errors(inputs,targets)
61 errors = kernel_predictor.compute_errors(inputs,targets) 61 errors = kernel_predictor.compute_errors(inputs,targets)
62 mse = kernel_predictor.compute_mse(inputs,targets) 62 mse = kernel_predictor.compute_mse(inputs,targets)
63 63
64 64
65 65
66 The training_set must have fields "input" and "target". 66 The training_set must have fields "input" and "target".
67 The test_set must have field "input", and needs "target" if 67 The test_set must have field "input", and needs "target" if
68 we want to compute the squared errors. 68 we want to compute the squared errors.
69 69
145 145
146 cls.__compiled = True 146 cls.__compiled = True
147 147
148 def __init__(self): 148 def __init__(self):
149 self.compile() 149 self.compile()
150 150
151 class KernelRegressionEquations(KernelPredictorEquations): 151 class KernelRegressionEquations(KernelPredictorEquations):
152 #M = T.matrix() # (n_examples+1) x (n_examples+1) 152 #M = T.matrix() # (n_examples+1) x (n_examples+1)
153 inputs = T.matrix() # n_examples x n_inputs 153 inputs = T.matrix() # n_examples x n_inputs
154 gamma = T.scalar() 154 gamma = T.scalar()
155 inv_gamma2 = 1./(gamma*gamma) 155 inv_gamma2 = 1./(gamma*gamma)
156 inputs_square = T.sum(inputs*inputs,axis=1) 156 inputs_square = T.sum(inputs*inputs,axis=1)
157 #new_G = G+T.dot(inputs,inputs.T) 157 #new_G = G+T.dot(inputs,inputs.T)
158 #new_G = T.gemm(G,1.,inputs,inputs.T,1.) 158 #new_G = T.gemm(G,1.,inputs,inputs.T,1.)
159 G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2) 159 G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2)
160 sumG = T.sum(G,axis=0) 160 sumG = T.sum(G,axis=0)
161 161
162 __compiled = False 162 __compiled = False
163 163
164 @classmethod 164 @classmethod
165 def compile(cls,linker='c|py'): 165 def compile(cls,linker='c|py'):
166 if cls.__compiled: 166 if cls.__compiled:
167 return 167 return
168 def fn(input_vars,output_vars): 168 def fn(input_vars,output_vars):
198 outputs = self.compute_outputs(inputs) 198 outputs = self.compute_outputs(inputs)
199 return [outputs,self.equations.compute_errors(outputs,targets)] 199 return [outputs,self.equations.compute_errors(outputs,targets)]
200 def compute_mse(self,inputs,targets): 200 def compute_mse(self,inputs,targets):
201 errors = self.compute_errors(inputs,targets) 201 errors = self.compute_errors(inputs,targets)
202 return numpy.sum(errors)/errors.size 202 return numpy.sum(errors)/errors.size
203 203
204 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): 204 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
205 assert dataset.hasFields(["input"]) 205 assert dataset.hasFields(["input"])
206 if output_fieldnames is None: 206 if output_fieldnames is None:
207 if dataset.hasFields(["target"]): 207 if dataset.hasFields(["target"]):
208 output_fieldnames = ["output","squared_error"] 208 output_fieldnames = ["output","squared_error"]
215 f = self.compute_outputs 215 f = self.compute_outputs
216 elif output_fieldnames == ["output","squared_error"]: 216 elif output_fieldnames == ["output","squared_error"]:
217 f = self.compute_outputs_and_errors 217 f = self.compute_outputs_and_errors
218 else: 218 else:
219 raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames)) 219 raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
220 220
221 ds=ApplyFunctionDataSet(dataset,f,output_fieldnames) 221 ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
222 if cached_output_dataset: 222 if cached_output_dataset:
223 return CachedDataSet(ds) 223 return CachedDataSet(ds)
224 else: 224 else:
225 return ds 225 return ds
226 226
227 227
228 def kernel_predictor(inputs,params,*otherargs): 228 def kernel_predictor(inputs,params,*otherargs):
229 p = KernelPredictor(params,*otherargs[0]) 229 p = KernelPredictor(params,*otherargs[0])
230 return p.compute_outputs(inputs) 230 return p.compute_outputs(inputs)
231