Mercurial > pylearn
comparison kernel_regression.py @ 425:e2b46a8f2b7b
Debugging kernel regression
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Sat, 19 Jul 2008 17:57:46 -0400 |
parents | 32c5f87bc54e |
children | d7611a3811f2 |
comparison
equal
deleted
inserted
replaced
424:0231eeac11c9 | 425:e2b46a8f2b7b |
---|---|
7 from nnet_ops import prepend_1_to_each_row | 7 from nnet_ops import prepend_1_to_each_row |
8 from theano.scalar import as_scalar | 8 from theano.scalar import as_scalar |
9 from common.autoname import AutoName | 9 from common.autoname import AutoName |
10 import theano | 10 import theano |
11 import numpy | 11 import numpy |
12 | |
13 # map a N-vector to a 1xN matrix | |
14 row_vector = theano.elemwise.DimShuffle((False,),['x',0]) | |
15 # map a N-vector to a Nx1 matrix | |
16 col_vector = theano.elemwise.DimShuffle((False,),[0,'x']) | |
12 | 17 |
13 class KernelRegression(OfflineLearningAlgorithm): | 18 class KernelRegression(OfflineLearningAlgorithm): |
14 """ | 19 """ |
15 Implementation of kernel regression: | 20 Implementation of kernel regression: |
16 * the data are n (x_t,y_t) pairs and we want to estimate E[y|x] | 21 * the data are n (x_t,y_t) pairs and we want to estimate E[y|x] |
88 first_example = trainset[0] | 93 first_example = trainset[0] |
89 n_inputs = first_example['input'].size | 94 n_inputs = first_example['input'].size |
90 n_outputs = first_example['target'].size | 95 n_outputs = first_example['target'].size |
91 M = numpy.zeros((n_examples+1,n_examples+1)) | 96 M = numpy.zeros((n_examples+1,n_examples+1)) |
92 Y = numpy.zeros((n_examples+1,n_outputs)) | 97 Y = numpy.zeros((n_examples+1,n_outputs)) |
93 for i in xrange(n_inputs): | 98 for i in xrange(n_examples): |
94 M[i+1,i+1]=self.L2_regularizer | 99 M[i+1,i+1]=self.L2_regularizer |
95 data = trainset.fields() | 100 data = trainset.fields() |
96 train_inputs = numpy.array(data['input']) | 101 train_inputs = numpy.array(data['input']) |
97 Y[0]=1 | 102 Y[0]=1 |
98 Y[1:,:] = numpy.array(data['target']) | 103 Y[1:,:] = numpy.array(data['target']) |
99 train_inputs_square,sumG=self.equations.compute_system_matrix(train_inputs,M) | 104 train_inputs_square,sumG,G=self.equations.compute_system_matrix(train_inputs,self.gamma) |
105 M[1:,1:] += G | |
100 M[0,1:] = sumG | 106 M[0,1:] = sumG |
101 M[1:,0] = 1 | 107 M[1:,0] = 1 |
102 M[0,0] = M.shape[0] | 108 M[0,0] = M.shape[0] |
103 print M | 109 self.M=M |
110 self.Y=Y | |
104 theta=numpy.linalg.solve(M,Y) | 111 theta=numpy.linalg.solve(M,Y) |
105 return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square) | 112 return KernelPredictor(theta,self.gamma, train_inputs, train_inputs_square) |
106 | 113 |
107 class KernelPredictorEquations(AutoName): | 114 class KernelPredictorEquations(AutoName): |
108 train_inputs = T.matrix() # n_examples x n_inputs | 115 train_inputs = T.matrix() # n_examples x n_inputs |
113 gamma = T.scalar() | 120 gamma = T.scalar() |
114 inv_gamma2 = 1./(gamma*gamma) | 121 inv_gamma2 = 1./(gamma*gamma) |
115 b = theta[0] | 122 b = theta[0] |
116 alpha = theta[1:,:] | 123 alpha = theta[1:,:] |
117 inputs_square = T.sum(inputs*inputs,axis=1) | 124 inputs_square = T.sum(inputs*inputs,axis=1) |
118 Kx = T.exp(-(train_inputs_square-2*T.dot(inputs,train_inputs.T)+inputs_square)*inv_gamma2) | 125 Kx = T.exp(-(row_vector(train_inputs_square)-2*T.dot(inputs,train_inputs.T)+col_vector(inputs_square))*inv_gamma2) |
119 outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs | 126 outputs = T.dot(Kx,alpha) + b # minibatchsize x n_outputs |
120 squared_errors = T.sum(T.sqr(targets-outputs),axis=1) | 127 squared_errors = T.sum(T.sqr(targets-outputs),axis=1) |
121 | 128 |
122 __compiled = False | 129 __compiled = False |
123 @classmethod | 130 @classmethod |
134 | 141 |
135 def __init__(self): | 142 def __init__(self): |
136 self.compile() | 143 self.compile() |
137 | 144 |
138 class KernelRegressionEquations(KernelPredictorEquations): | 145 class KernelRegressionEquations(KernelPredictorEquations): |
139 M = T.matrix() # (n_examples+1) x (n_examples+1) | 146 #M = T.matrix() # (n_examples+1) x (n_examples+1) |
140 inputs = T.matrix() # n_examples x n_inputs | 147 inputs = T.matrix() # n_examples x n_inputs |
141 G = M[1:,1:] | 148 gamma = T.scalar() |
142 new_G = T.gemm(G,1.,inputs,inputs.T,1.) | 149 inv_gamma2 = 1./(gamma*gamma) |
143 sumG = T.sum(new_G,axis=0) | |
144 inputs_square = T.sum(inputs*inputs,axis=1) | 150 inputs_square = T.sum(inputs*inputs,axis=1) |
151 #new_G = G+T.dot(inputs,inputs.T) | |
152 #new_G = T.gemm(G,1.,inputs,inputs.T,1.) | |
153 G = T.exp(-(row_vector(inputs_square)-2*T.dot(inputs,inputs.T)+col_vector(inputs_square))*inv_gamma2) | |
154 sumG = T.sum(G,axis=0) | |
145 | 155 |
146 __compiled = False | 156 __compiled = False |
147 | 157 |
148 @classmethod | 158 @classmethod |
149 def compile(cls,linker='c|py'): | 159 def compile(cls,linker='c|py'): |
150 if cls.__compiled: | 160 if cls.__compiled: |
151 return | 161 return |
152 def fn(input_vars,output_vars): | 162 def fn(input_vars,output_vars): |
153 return staticmethod(theano.function(input_vars,output_vars, linker=linker)) | 163 return staticmethod(theano.function(input_vars,output_vars, linker=linker)) |
154 | 164 |
155 cls.compute_system_matrix = fn([cls.inputs,cls.M],[cls.inputs_square,cls.sumG]) | 165 cls.compute_system_matrix = fn([cls.inputs,cls.gamma],[cls.inputs_square,cls.sumG,cls.G]) |
156 | 166 |
157 cls.__compiled = True | 167 cls.__compiled = True |
158 | 168 |
159 def __init__(self): | 169 def __init__(self): |
160 self.compile() | 170 self.compile() |
163 """ | 173 """ |
164 A kernel predictor has parameters theta (a bias vector and a weight matrix alpha) | 174 A kernel predictor has parameters theta (a bias vector and a weight matrix alpha) |
165 it can use to make a non-linear prediction (according to the KernelPredictorEquations). | 175 it can use to make a non-linear prediction (according to the KernelPredictorEquations). |
166 It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2). | 176 It can compute its output (bias + alpha * kernel(train_inputs,input) and a squared error (||output - target||^2). |
167 """ | 177 """ |
168 def __init__(self, theta, gamma, train_inputs, train_inputs_square): | 178 def __init__(self, theta, gamma, train_inputs, train_inputs_square=None): |
169 self.theta=theta | 179 self.theta=theta |
170 self.gamma=gamma | 180 self.gamma=gamma |
171 self.train_inputs=train_inputs | 181 self.train_inputs=train_inputs |
182 if train_inputs_square==None: | |
183 train_inputs_square = numpy.sum(train_inputs*train_inputs,axis=1) | |
172 self.train_inputs_square=train_inputs_square | 184 self.train_inputs_square=train_inputs_square |
173 self.equations = LinearPredictorEquations() | 185 self.equations = KernelPredictorEquations() |
174 | 186 |
175 def compute_outputs(self,inputs): | 187 def compute_outputs(self,inputs): |
176 return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square) | 188 return self.equations.compute_outputs(inputs,self.theta,self.gamma,self.train_inputs,self.train_inputs_square) |
177 def compute_errors(self,inputs,targets): | 189 def compute_errors(self,inputs,targets): |
178 return self.equations.compute_errors(self.compute_outputs(inputs),targets) | 190 return self.equations.compute_errors(self.compute_outputs(inputs),targets) |