Mercurial > pylearn
comparison pylearn/algorithms/kernel_regression.py @ 1505:723e2d761985
auto white space fix.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Mon, 12 Sep 2011 10:49:15 -0400 |
parents | bf5c0f797161 |
children |
comparison
equal
deleted
inserted
replaced
1504:bf5c0f797161 | 1505:723e2d761985 |
---|---|
35 that linear system matrix requires O(n^2) memory. | 35 that linear system matrix requires O(n^2) memory. |
36 So this learning algorithm should be used only for | 36 So this learning algorithm should be used only for |
37 small datasets. | 37 small datasets. |
38 * the linear system is | 38 * the linear system is |
39 (M + lambda I_n) theta = (1, y)' | 39 (M + lambda I_n) theta = (1, y)' |
40 where theta = (b, alpha), I_n is the (n+1)x(n+1) matrix that is the identity | 40 where theta = (b, alpha), I_n is the (n+1)x(n+1) matrix that is the identity |
41 except with a 0 at (0,0), M is the matrix with G in the sub-matrix starting | 41 except with a 0 at (0,0), M is the matrix with G in the sub-matrix starting |
42 at (1,1), 1's in column 0, except for a value of n at (0,0), and sum_i G_{i,j} | 42 at (1,1), 1's in column 0, except for a value of n at (0,0), and sum_i G_{i,j} |
43 in the rest of row 0. | 43 in the rest of row 0. |
44 | 44 |
45 Note that this is gives an estimate of E[y|x,training_set] that is the | 45 Note that this is gives an estimate of E[y|x,training_set] that is the |
46 same as obtained with a Gaussian process regression. The GP | 46 same as obtained with a Gaussian process regression. The GP |
47 regression would also provide a Bayesian Var[y|x,training_set]. | 47 regression would also provide a Bayesian Var[y|x,training_set]. |
48 It corresponds to an assumption that f is a random variable | 48 It corresponds to an assumption that f is a random variable |
49 with Gaussian (process) prior distribution with covariance | 49 with Gaussian (process) prior distribution with covariance |
58 all_results_dataset=kernel_predictor(test_set) # creates a dataset with "output" and "squared_error" field | 58 all_results_dataset=kernel_predictor(test_set) # creates a dataset with "output" and "squared_error" field |
59 outputs = kernel_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays | 59 outputs = kernel_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays |
60 outputs, errors = kernel_predictor.compute_outputs_and_errors(inputs,targets) | 60 outputs, errors = kernel_predictor.compute_outputs_and_errors(inputs,targets) |
61 errors = kernel_predictor.compute_errors(inputs,targets) | 61 errors = kernel_predictor.compute_errors(inputs,targets) |
62 mse = kernel_predictor.compute_mse(inputs,targets) | 62 mse = kernel_predictor.compute_mse(inputs,targets) |
63 | 63 |
64 | 64 |
65 | 65 |
66 The training_set must have fields "input" and "target". | 66 The training_set must have fields "input" and "target". |
67 The test_set must have field "input", and needs "target" if | 67 The test_set must have field "input", and needs "target" if |
68 we want to compute the squared errors. | 68 we want to compute the squared errors. |
69 | 69 |
145 | 145 |
146 cls.__compiled = True | 146 cls.__compiled = True |
147 | 147 |
148 def __init__(self): | 148 def __init__(self): |
149 self.compile() | 149 self.compile() |
150 | 150 |
151 class KernelRegressionEquations(KernelPredictorEquations): | 151 class KernelRegressionEquations(KernelPredictorEquations): |
152 #M = T.matrix() # (n_examples+1) x (n_examples+1) | 152 #M = T.matrix() # (n_examples+1) x (n_examples+1) |
153 inputs = T.matrix() # n_examples x n_inputs | 153 inputs = T.matrix() # n_examples x n_inputs |
154 gamma = T.scalar() | 154 gamma = T.scalar() |
155 inv_gamma2 = 1./(gamma*gamma) | 155 inv_gamma2 = 1./(gamma*gamma) |
156 inputs_square = T.sum(inputs*inputs,axis=1) | 156 inputs_square = T.sum(inputs*inputs,axis=1) |
157 #new_G = G+T.dot(inputs,inputs.T) | 157 #new_G = G+T.dot(inputs,inputs.T) |
158 #new_G = T.gemm(G,1.,inputs,inputs.T,1.) | 158 #new_G = T.gemm(G,1.,inputs,inputs.T,1.) |
159 G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2) | 159 G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2) |
160 sumG = T.sum(G,axis=0) | 160 sumG = T.sum(G,axis=0) |
161 | 161 |
162 __compiled = False | 162 __compiled = False |
163 | 163 |
164 @classmethod | 164 @classmethod |
165 def compile(cls,linker='c|py'): | 165 def compile(cls,linker='c|py'): |
166 if cls.__compiled: | 166 if cls.__compiled: |
167 return | 167 return |
168 def fn(input_vars,output_vars): | 168 def fn(input_vars,output_vars): |
198 outputs = self.compute_outputs(inputs) | 198 outputs = self.compute_outputs(inputs) |
199 return [outputs,self.equations.compute_errors(outputs,targets)] | 199 return [outputs,self.equations.compute_errors(outputs,targets)] |
200 def compute_mse(self,inputs,targets): | 200 def compute_mse(self,inputs,targets): |
201 errors = self.compute_errors(inputs,targets) | 201 errors = self.compute_errors(inputs,targets) |
202 return numpy.sum(errors)/errors.size | 202 return numpy.sum(errors)/errors.size |
203 | 203 |
204 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): | 204 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False): |
205 assert dataset.hasFields(["input"]) | 205 assert dataset.hasFields(["input"]) |
206 if output_fieldnames is None: | 206 if output_fieldnames is None: |
207 if dataset.hasFields(["target"]): | 207 if dataset.hasFields(["target"]): |
208 output_fieldnames = ["output","squared_error"] | 208 output_fieldnames = ["output","squared_error"] |
215 f = self.compute_outputs | 215 f = self.compute_outputs |
216 elif output_fieldnames == ["output","squared_error"]: | 216 elif output_fieldnames == ["output","squared_error"]: |
217 f = self.compute_outputs_and_errors | 217 f = self.compute_outputs_and_errors |
218 else: | 218 else: |
219 raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames)) | 219 raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames)) |
220 | 220 |
221 ds=ApplyFunctionDataSet(dataset,f,output_fieldnames) | 221 ds=ApplyFunctionDataSet(dataset,f,output_fieldnames) |
222 if cached_output_dataset: | 222 if cached_output_dataset: |
223 return CachedDataSet(ds) | 223 return CachedDataSet(ds) |
224 else: | 224 else: |
225 return ds | 225 return ds |
226 | 226 |
227 | 227 |
228 def kernel_predictor(inputs,params,*otherargs): | 228 def kernel_predictor(inputs,params,*otherargs): |
229 p = KernelPredictor(params,*otherargs[0]) | 229 p = KernelPredictor(params,*otherargs[0]) |
230 return p.compute_outputs(inputs) | 230 return p.compute_outputs(inputs) |
231 |