comparison kernel_regression.py @ 435:eac0a7d44ff0

merge
author Olivier Breuleux <breuleuo@iro.umontreal.ca>
date Mon, 04 Aug 2008 16:29:30 -0400
parents 0f8c81b0776d
children 05f802184606
comparison
equal deleted inserted replaced
434:0f366ecb11ee 435:eac0a7d44ff0
1 """
2 Implementation of kernel regression:
3 """
4
5 from pylearn.learner import OfflineLearningAlgorithm
6 from theano import tensor as T
7 from nnet_ops import prepend_1_to_each_row
8 from theano.scalar import as_scalar
9 from common.autoname import AutoName
10 import theano
11 import numpy
12
13 # map a N-vector to a 1xN matrix
14 row_vector = theano.elemwise.DimShuffle((False,),['x',0])
15 # map a N-vector to a Nx1 matrix
16 col_vector = theano.elemwise.DimShuffle((False,),[0,'x'])
17
18 class KernelRegression(OfflineLearningAlgorithm):
19 """
20 Implementation of kernel regression:
21 * the data are n (x_t,y_t) pairs and we want to estimate E[y|x]
22 * the predictor computes
23 f(x) = b + \sum_{t=1}^n \alpha_t K(x,x_t)
24 with free parameters b and alpha, training inputs x_t,
25 and kernel function K (gaussian by default).
26 Clearly, each prediction involves O(n) computations.
27 * the learner chooses b and alpha to minimize
28 lambda alpha' G' G alpha + \sum_{t=1}^n (f(x_t)-y_t)^2
29 where G is the matrix with entries G_ij = K(x_i,x_j).
30 The first (L2 regularization) term is the squared L2
31 norm of the primal weights w = \sum_t \alpha_t phi(x_t)
32 where phi is the function s.t. K(u,v)=phi(u).phi(v).
33 * this involves solving a linear system with (n+1,n+1)
34 matrix, which is an O(n^3) computation. In addition,
35 that linear system matrix requires O(n^2) memory.
36 So this learning algorithm should be used only for
37 small datasets.
38 * the linear system is
39 (M + lambda I_n) theta = (1, y)'
40 where theta = (b, alpha), I_n is the (n+1)x(n+1) matrix that is the identity
41 except with a 0 at (0,0), M is the matrix with G in the sub-matrix starting
42 at (1,1), 1's in column 0, except for a value of n at (0,0), and sum_i G_{i,j}
43 in the rest of row 0.
44
45 Note that this is gives an estimate of E[y|x,training_set] that is the
46 same as obtained with a Gaussian process regression. The GP
47 regression would also provide a Bayesian Var[y|x,training_set].
48 It corresponds to an assumption that f is a random variable
49 with Gaussian (process) prior distribution with covariance
50 function K. Because we assume Gaussian noise we obtain a Gaussian
51 posterior for f (whose mean is computed here).
52
53
54 Usage:
55
56 kernel_regressor=KernelRegression(L2_regularizer=0.1,gamma=0.5) (kernel=GaussianKernel(gamma=0.5))
57 kernel_predictor=kernel_regressor(training_set)
58 all_results_dataset=kernel_predictor(test_set) # creates a dataset with "output" and "squared_error" field
59 outputs = kernel_predictor.compute_outputs(inputs) # inputs and outputs are numpy arrays
60 outputs, errors = kernel_predictor.compute_outputs_and_errors(inputs,targets)
61 errors = kernel_predictor.compute_errors(inputs,targets)
62 mse = kernel_predictor.compute_mse(inputs,targets)
63
64
65
66 The training_set must have fields "input" and "target".
67 The test_set must have field "input", and needs "target" if
68 we want to compute the squared errors.
69
70 The predictor parameters are obtained analytically from the training set.
71 Training is only done on a whole training set rather than on minibatches
72 (no online implementation).
73
74 The dataset fields expected and produced by the learning algorithm and the trained model
75 are the following:
76
77 - Input and output dataset fields (example-wise quantities):
78
79 - 'input' (always expected as an input_dataset field)
80 - 'target' (always expected by the learning algorithm, optional for learned model)
81 - 'output' (always produced by learned model)
82 - 'squared_error' (optionally produced by learned model if 'target' is provided)
83 = example-wise squared error
84 """
85 def __init__(self, kernel=None, L2_regularizer=0, gamma=1, use_bias=False):
86 # THE VERSION WITH BIAS DOES NOT SEEM RIGHT
87 self.kernel = kernel
88 self.L2_regularizer=L2_regularizer
89 self.use_bias=use_bias
90 self.gamma = gamma # until we fix things, the kernel type is fixed, Gaussian
91 self.equations = KernelRegressionEquations()
92
93 def __call__(self,trainset):
94 n_examples = len(trainset)
95 first_example = trainset[0]
96 n_inputs = first_example['input'].size
97 n_outputs = first_example['target'].size
98 b1=1 if self.use_bias else 0
99 M = numpy.zeros((n_examples+b1,n_examples+b1))
100 Y = numpy.zeros((n_examples+b1,n_outputs))
101 for i in xrange(n_examples):
102 M[i+b1,i+b1]=self.L2_regularizer
103 data = trainset.fields()
104 train_inputs = numpy.array(data['input'])
105 if self.use_bias:
106 Y[0]=1
107 Y[b1:,:] = numpy.array(data['target'])
108 train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma)
109 M[b1:,b1:] += G
110 if self.use_bias:
111 M[0,1:] = sumG
112 M[1:,0] = 1
113 M[0,0] = M.shape[0]
114 self.M=M
115 self.Y=Y
116 theta=numpy.linalg.solve(M,Y)
117 return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square)
118
119 class KernelPredictorEquations(AutoName):
120 train_inputs = T.matrix() # n_examples x n_inputs
121 train_inputs_square = T.vector() # n_examples
122 inputs = T.matrix() # minibatchsize x n_inputs
123 targets = T.matrix() # minibatchsize x n_outputs
124 theta = T.matrix() # (n_examples+1) x n_outputs
125 b1 = T.shape(train_inputs_square)[0]<T.shape(theta)[0]
126 gamma = T.scalar()
127 inv_gamma2 = 1./(gamma*gamma)
128 b = b1*theta[0]
129 alpha = theta[b1:,:]
130 inputs_square = T.sum(inputs*inputs,axis=1)
131 Kx = T.exp(-(row_vector(train_inputs_square)-2*T.dot(inputs,train_inputs.T)+col_vector(inputs_square))*inv_gamma2)
132 outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs
133 squared_errors = T.sum(T.sqr(targets-outputs),axis=1)
134
135 __compiled = False
136 @classmethod
137 def compile(cls,linker='c|py'):
138 if cls.__compiled:
139 return
140 def fn(input_vars,output_vars):
141 return staticmethod(theano.function(input_vars,output_vars, linker=linker))
142
143 cls.compute_outputs = fn([cls.inputs,cls.theta,cls.gamma,cls.train_inputs,cls.train_inputs_square],[cls.outputs])
144 cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors])
145
146 cls.__compiled = True
147
148 def __init__(self):
149 self.compile()
150
151 class KernelRegressionEquations(KernelPredictorEquations):
152 #M = T.matrix() # (n_examples+1) x (n_examples+1)
153 inputs = T.matrix() # n_examples x n_inputs
154 gamma = T.scalar()
155 inv_gamma2 = 1./(gamma*gamma)
156 inputs_square = T.sum(inputs*inputs,axis=1)
157 #new_G = G+T.dot(inputs,inputs.T)
158 #new_G = T.gemm(G,1.,inputs,inputs.T,1.)
159 G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2)
160 sumG = T.sum(G,axis=0)
161
162 __compiled = False
163
164 @classmethod
165 def compile(cls,linker='c|py'):
166 if cls.__compiled:
167 return
168 def fn(input_vars,output_vars):
169 return staticmethod(theano.function(input_vars,output_vars, linker=linker))
170
171 cls.compute_system_matrix = fn([cls.inputs,cls.gamma],[cls.inputs_square,cls.sumG,cls.G])
172
173 cls.__compiled = True
174
175 def __init__(self):
176 self.compile()
177
178 class KernelPredictor(object):
179 """
180 A kernel predictor has parameters theta (a bias vector and a weight matrix alpha)
181 it can use to make a non-linear prediction (according to the KernelPredictorEquations).
182 It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2).
183 """
184 def __init__(self, theta, gamma, train_inputs, train_inputs_square=None):
185 self.theta=theta
186 self.gamma=gamma
187 self.train_inputs=train_inputs
188 if train_inputs_square==None:
189 train_inputs_square = numpy.sum(train_inputs*train_inputs,axis=1)
190 self.train_inputs_square=train_inputs_square
191 self.equations = KernelPredictorEquations()
192
193 def compute_outputs(self,inputs):
194 return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square)
195 def compute_errors(self,inputs,targets):
196 return self.equations.compute_errors(self.compute_outputs(inputs),targets)
197 def compute_outputs_and_errors(self,inputs,targets):
198 outputs = self.compute_outputs(inputs)
199 return [outputs,self.equations.compute_errors(outputs,targets)]
200 def compute_mse(self,inputs,targets):
201 errors = self.compute_errors(inputs,targets)
202 return numpy.sum(errors)/errors.size
203
204 def __call__(self,dataset,output_fieldnames=None,cached_output_dataset=False):
205 assert dataset.hasFields(["input"])
206 if output_fieldnames is None:
207 if dataset.hasFields(["target"]):
208 output_fieldnames = ["output","squared_error"]
209 else:
210 output_fieldnames = ["output"]
211 output_fieldnames.sort()
212 if output_fieldnames == ["squared_error"]:
213 f = self.compute_errors
214 elif output_fieldnames == ["output"]:
215 f = self.compute_outputs
216 elif output_fieldnames == ["output","squared_error"]:
217 f = self.compute_outputs_and_errors
218 else:
219 raise ValueError("unknown field(s) in output_fieldnames: "+str(output_fieldnames))
220
221 ds=ApplyFunctionDataSet(dataset,f,output_fieldnames)
222 if cached_output_dataset:
223 return CachedDataSet(ds)
224 else:
225 return ds
226
227
228 def kernel_predictor(inputs,params,*otherargs):
229 p = KernelPredictor(params,*otherargs[0])
230 return p.compute_outputs(inputs)
231