comparison nnet_ops.py @ 440:18dbc1c11647

Work on softmax operators
author Pascal Lamblin <lamblinp@iro.umontreal.ca>
date Thu, 21 Aug 2008 13:55:16 -0400
parents 43d9aa93934e
children b3315b252824
comparison
equal deleted inserted replaced
433:200a5b0e24ea 440:18dbc1c11647
63 ############ 63 ############
64 # 64 #
65 # TENSOR OPS 65 # TENSOR OPS
66 # 66 #
67 67
68
69 class SoftmaxWithBias(theano.Op):
70 """
71 An L{Op} for the output of neural-net multiclass classifiers.
72
73 @type x: is a matrix of floats (32 or 64)
74 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
75
76 This L{Op}'s output is softmax(x+b).
77 softmax(x[i]) is the i'th distribution over len(x[i]) options.
78 """
79
80 nin = 2
81 nout = 1
82 def __init__(self, **kwargs):
83 theano.Op.__init__(self, **kwargs)
84
85 def make_node(self, x, b):
86 x = tensor.as_tensor(x)
87 b = tensor.as_tensor(b)
88 if x.type.ndim != 2 \
89 or x.type.dtype not in ['float32', 'float64']:
90 raise ValueError('x must be 2-d tensor of floats')
91 if b.type.ndim != 1 \
92 or x.type.dtype not in ['float32', 'float64']:
93 raise ValueError('b must be 1-d tensor of floats')
94
95 sm = x.type.make_result()
96 return theano.Apply(self, [x, b], [sm])
97
98 def perform(self, node, input_storage, output_storage):
99 x, b = input_storage
100 if b.shape[0] != x.shape[1]:
101 raise ValueError('b must have same number of columns as x')
102
103 sm = nympy.zeros_like(x)
104 for i in xrange(sm.shape[0]):
105 row = x[i] + b
106 sm[i] = numpy.exp(row - numpy.max(row))
107 sm[i] *= 1.0 / numpy.sum(sm[i])
108 output_storage[0][0] = nll
109
110 def grad(self, (x, b), (g_sm,)):
111 sm = softmax_with_bias(x, b)
112 dx = SoftmaxWithBiasDx()(g_sm, sm)
113 db = tensor.sum(dx, axis = 0)
114 return dx, db
115
116 def c_headers(self):
117 return ['<iostream>']
118
119 @staticmethod
120 def c_code_template():
121 # this implementation was lifted from
122 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
123
124 #TODO: put this into a templated function, in the support code
125 #TODO: declare the max of each row as an Op output
126
127 #TODO: set error messages for failures in this code
128
129 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
130 init_decl = """
131 npy_intp* Nx = %(x)s->dimensions;
132
133 if (%(x)s->nd != 2)
134 {
135 PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
136 %(fail)s;
137 }
138 if (%(b)s->nd != 1)
139 {
140 PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
141 %(fail)s;
142 }
143 if (%(x)s->descr->type_num != PyArray_DOUBLE)
144 {
145 PyErr_SetString(PyExc_TypeError, "a not float64");
146 %(fail)s;
147 }
148 if (%(b)s->descr->type_num != PyArray_DOUBLE)
149 {
150 PyErr_SetString(PyExc_TypeError, "b not float64");
151 %(fail)s;
152 }
153 if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
154 {
155 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
156 %(fail)s;
157 }
158
159 if ((NULL == %(sm)s)
160 || (%(sm)s->dimensions[0] != %(x)s->dimensions[0])
161 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
162 {
163 if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
164 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
165 if(!%(sm)s) {
166 // The normal cleanup code will take care of %(nll)s
167 // Py_XDECREF(%(nll)s); %(nll)s=NULL;
168 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
169 %(fail)s
170 }
171 }
172 """
173
174 begin_row_loop = """
175 for (size_t i = 0; i < Nx[0]; ++i)
176 {
177 size_t j;
178 double sum = 0.0;
179 bool discount_max = false;
180
181 const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
182 const double* __restrict__ b_i = (double*)(%(b)s->data);
183 double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
184 """
185
186 inside_row_loop = """
187 npy_intp Sx = %(x)s->strides[1]/sizeof(double);
188 npy_intp Sb = %(b)s->strides[0]/sizeof(double);
189 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
190
191 size_t row_max_j=0;
192 double row_max = x_i[0] + b_i[0];
193 // Get the maximum value of the row
194 for (j = 0; j < Nx[1]; ++j)
195 {
196 double row_ij = x_i[j * Sx] + b_i[j * Sb];
197 row_max_j = (row_ij > row_max) ? j : row_max_j;
198 row_max = (row_ij > row_max) ? row_ij : row_max;
199 }
200
201 for (j = 0; j < Nx[1]; ++j)
202 {
203 double row_ij = x_i[j * Sx] + b_i[j * Sb];
204 double sm_ij = exp(row_ij - row_max);
205 sum += sm_ij;
206 sm_i[j * Ssm] = sm_ij;
207 }
208 if ( (0.0 == sum) || (isinf(sum)))
209 {
210 //that was our best...
211 %(fail)s;
212 }
213
214 //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
215 double sum_inv = 1.0 / sum;
216 for (j = 0; j < Nx[1]; ++j)
217 {
218 sm_i[j * Ssm] *= sum_inv;
219 }
220
221 if (y_i >= Nx[1])
222 {
223 %(fail)s;
224 }
225 """
226
227 end_row_loop = """
228 }
229 """
230
231 return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
232
233
234 def c_code(self, node, name, (x, b), (sm,), sub):
235 code_template = ''.join(self.c_code_template())
236 return code_template % dict(locals(), **sub)
237
238 softmax_with_bias = SoftmaxWithBias()
239
240
241
68 class CrossentropySoftmax1HotWithBias(theano.Op): 242 class CrossentropySoftmax1HotWithBias(theano.Op):
69 """A special compound L{Op} for the output of neural-net classifiers. 243 """A special compound L{Op} for the output of neural-net classifiers.
70 244
71 @type x: is a matrix of floats (32 or 64) 245 @type x: is a matrix of floats (32 or 64)
72 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x 246 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
76 250
77 This L{Op} has two outputs: 251 This L{Op} has two outputs:
78 - KL(softmax(x+b), y) 252 - KL(softmax(x+b), y)
79 - softmax(x+b) 253 - softmax(x+b)
80 254
81 255
82 softmax(x[i]) is the i'th distribution over len(x[i]) options 256 softmax(x[i]) is the i'th distribution over len(x[i]) options
83 257
84 y_idx[i] is an integer index, encoding a 1-hot distribution. 258 y_idx[i] is an integer index, encoding a 1-hot distribution.
85 259
86 In practice, when we're trying to do classification, we have one row in x 260 In practice, when we're trying to do classification, we have one row in x
87 and y_idx per example, and y[i] is the index of the (correct) class of the 261 and y_idx per example, and y[i] is the index of the (correct) class of the
88 i'th example. 262 i'th example.
89 263
90 """ 264 """
136 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) 310 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
137 db = tensor.sum(dx, axis = [0]) 311 db = tensor.sum(dx, axis = [0])
138 return dx, db, None 312 return dx, db, None
139 313
140 def c_headers(self): return ['<iostream>'] 314 def c_headers(self): return ['<iostream>']
141 def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub): 315
316 @staticmethod
317 def c_code_template():
142 # this implementation was lifted from 318 # this implementation was lifted from
143 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx 319 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
144 320
145 #TODO: put this into a templated function, in the support code 321 #TODO: put this into a templated function, in the support code
146 #TODO: declare the max of each row as an Op output 322 #TODO: declare the max of each row as an Op output
147 323
148 #TODO: set error messages for failures in this code 324 #TODO: set error messages for failures in this code
149 325
150 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] 326 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
151 y_idx_type = node.inputs[2].type.dtype_specs()[1] 327 (init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \
152 328 SoftmaxWithBias.c_code_template()
153 return """ 329 return (init_decl,
154 npy_intp* Nx = %(x)s->dimensions; 330 """
155
156 if (%(x)s->nd != 2)
157 {
158 PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
159 %(fail)s;
160 }
161 if (%(b)s->nd != 1)
162 {
163 PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
164 %(fail)s;
165 }
166 if (%(y_idx)s->nd != 1) 331 if (%(y_idx)s->nd != 1)
167 { 332 {
168 PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor"); 333 PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
169 %(fail)s;
170 }
171 if (%(x)s->descr->type_num != PyArray_DOUBLE)
172 {
173 PyErr_SetString(PyExc_TypeError, "a not float64");
174 %(fail)s;
175 }
176 if (%(b)s->descr->type_num != PyArray_DOUBLE)
177 {
178 PyErr_SetString(PyExc_TypeError, "b not float64");
179 %(fail)s; 334 %(fail)s;
180 } 335 }
181 if ((%(y_idx)s->descr->type_num != PyArray_INT64) 336 if ((%(y_idx)s->descr->type_num != PyArray_INT64)
182 && (%(y_idx)s->descr->type_num != PyArray_INT32) 337 && (%(y_idx)s->descr->type_num != PyArray_INT32)
183 && (%(y_idx)s->descr->type_num != PyArray_INT16) 338 && (%(y_idx)s->descr->type_num != PyArray_INT16)
184 && (%(y_idx)s->descr->type_num != PyArray_INT8)) 339 && (%(y_idx)s->descr->type_num != PyArray_INT8))
185 { 340 {
186 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); 341 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
187 %(fail)s; 342 %(fail)s;
188 } 343 }
189 if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]) 344 if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0])
190 || (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0]))
191 { 345 {
192 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments"); 346 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
193 %(fail)s; 347 %(fail)s;
194 } 348 }
195 349
202 { 356 {
203 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output"); 357 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output");
204 %(fail)s; 358 %(fail)s;
205 } 359 }
206 } 360 }
207 if ((NULL == %(sm)s) 361 """,
208 || (%(sm)s->dimensions[0] != %(x)s->dimensions[0]) 362 begin_row_loop,
209 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) 363 """
210 {
211 if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
212 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
213 if(!%(sm)s) {
214 // The normal cleanup code will take care of %(nll)s
215 // Py_XDECREF(%(nll)s); %(nll)s=NULL;
216 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
217 %(fail)s
218 }
219 }
220
221 for (size_t i = 0; i < Nx[0]; ++i)
222 {
223 size_t j;
224 double sum = 0.0;
225 bool discount_max = false;
226
227 const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
228 const double* __restrict__ b_i = (double*)(%(b)s->data);
229 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; 364 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
230 double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
231 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); 365 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
232 366 """,
233 npy_intp Sx = %(x)s->strides[1]/sizeof(double); 367 inside_row_loop,
234 npy_intp Sb = %(b)s->strides[0]/sizeof(double); 368 """
235 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); 369 nll_i[0] = - x_i[y_i*Sx]
236
237 size_t row_max_j=0;
238 double row_max = x_i[0] + b_i[0];
239 //try to compute sum and sm the easy way
240 for (j = 0; j < Nx[1]; ++j)
241 {
242 double row_ij = x_i[j * Sx] + b_i[j * Sb];
243 row_max_j = (row_ij > row_max) ? j : row_max_j;
244 row_max = (row_ij > row_max) ? row_ij : row_max;
245
246 double sm_ij = exp(row_ij);
247 sum += sm_ij;
248 sm_i[j * Ssm] = sm_ij;
249 }
250 if ((0.0 == sum) || (isinf(sum)))
251 {
252 //our cheap trick didn't work... try again and do it better.
253 discount_max = true;
254 sum = 0.0; //reset sum and recompute....
255 for (j = 0; j < Nx[1]; ++j)
256 {
257 double row_ij = x_i[j * Sx] + b_i[j * Sb];
258
259 double sm_ij = exp(row_ij - row_max);
260 sum += sm_ij;
261 sm_i[j * Ssm] = sm_ij;
262 }
263 if ( (0.0 == sum) || (isinf(sum)))
264 {
265 //that was our best...
266 %(fail)s;
267 }
268 //if we still can't sum it up, we're screwed.
269 //So far, this assertion has never failed...
270 }
271
272 //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
273 double sum_inv = 1.0 / sum;
274 for (j = 0; j < Nx[1]; ++j)
275 {
276 sm_i[j * Ssm] *= sum_inv;
277 }
278
279 if (y_i >= Nx[1])
280 {
281 %(fail)s;
282 }
283
284 nll_i[0] = - x_i[y_i*Sx]
285 - b_i[y_i*Sb] 370 - b_i[y_i*Sb]
286 + (discount_max ? row_max : 0.0)
287 + log(sum); 371 + log(sum);
288 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? 372 """,
289 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); 373 end_row_loop)
290 } 374
291 """ % dict(locals(), **sub) 375
376 def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub):
377 y_idx_type = node.inputs[2].type.dtype_specs()[1]
378 code_template = ''.join(self.c_code_template())
379 return code_template % dict(locals(), **sub)
380
292 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() 381 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias()
293 382
294 class CrossentropySoftmax1HotWithBiasDx (theano.Op): 383 class CrossentropySoftmax1HotWithBiasDx (theano.Op):
295 nin=3 384 nin=3
296 nout=1 385 nout=1