Mercurial > pylearn
diff nnet_ops.py @ 117:3ef569b92fba
ported nnet_ops to new theano
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 07 May 2008 15:28:17 -0400 |
parents | 76e5c0f37165 |
children | 2ca8dccba270 |
line wrap: on
line diff
--- a/nnet_ops.py Wed May 07 13:07:33 2008 -0400 +++ b/nnet_ops.py Wed May 07 15:28:17 2008 -0400 @@ -1,5 +1,5 @@ import theano -from theano import tensor, gof, scalar +from theano import tensor, scalar import numpy ############ @@ -7,7 +7,7 @@ # SCALAR OPS # -class ScalarSigmoid(scalar.FloatUnaryScalarOp): +class ScalarSigmoid(scalar.UnaryScalarOp): @staticmethod def st_impl(x): if x < -30.0: @@ -20,7 +20,7 @@ def grad(self, (x,), (gz,)): y = scalar_sigmoid(x) return [gz * y * (1.0 - y)] - def c_foreach(self, (x,), (z,), sub): + def c_code(self, (x,), (z,), sub): if 'float' in self.inputs[0].dtype: return """%(z)s = %(x)s < -30.0 @@ -28,12 +28,11 @@ : %(x)s > 30.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals() - raise NotImplementedError('only floatingpoint is implemented') -scalar_sigmoid = gof.op.constructor(ScalarSigmoid) -Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace =\ - tensor.broadcast(ScalarSigmoid, 'Sigmoid') + return NotImplemented#Error('only floatingpoint is implemented') +scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid') +sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid') -class ScalarSoftplus(scalar.FloatUnaryScalarOp): +class ScalarSoftplus(scalar.UnaryScalarOp): @staticmethod def static_impl(x): if x < -30.0: @@ -45,7 +44,7 @@ return ScalarSoftplus.static_impl(x) def grad(self, (x,), (gz,)): return [gz * scalar_sigmoid(x)] - def c_foreach(self, (x,), (z,), sub): + def c_code(self, (x,), (z,), sub): if 'float' in self.inputs[0].dtype: return """%(z)s = %(x)s < -30.0 @@ -53,10 +52,9 @@ : %(x)s > 30.0 ? %(x)s : log1p(exp(%(x)s));""" % locals() - raise NotImplementedError('only floating point x is implemented') -scalar_softplus = gof.op.constructor(ScalarSoftplus) -Softplus, softplus, SoftplusInplace, softplus_inplace =\ - tensor.broadcast(ScalarSoftplus, 'Softplus') + return NotImplemented#Error('only floating point x is implemented') +scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus') +softplus = tensor.Elemwise(scalar_softplus, name='softplus') ############ @@ -64,8 +62,7 @@ # TENSOR OPS # - -class CrossentropySoftmax1HotWithBias(gof.op.Op): +class CrossentropySoftmax1HotWithBias(theano.Op): """A special compound L{Op} for the output of neural-net classifiers. @type x: is a matrix of floats (32 or 64) @@ -90,28 +87,31 @@ """ nin=3 nout=2 - def __init__(self, x, b, y_idx, **kwargs): - x = tensor._as_tensor(x) - b = tensor._as_tensor(b) - y_idx = tensor._as_tensor(y_idx) - if len(x.broadcastable) != 2 \ - or x.dtype not in ['float32', 'float64']: + def __init__(self, **kwargs): + theano.Op.__init__(self, **kwargs) + + def make_node(self, x, b, y_idx): + x = tensor.as_tensor(x) + b = tensor.as_tensor(b) + y_idx = tensor.as_tensor(y_idx) + if x.type.ndim != 2 \ + or x.type.dtype not in ['float32', 'float64']: raise ValueError('x must be 2-d tensor of floats') - if len(b.broadcastable) != 1 \ - or x.dtype not in ['float32', 'float64']: + if b.type.ndim != 1 \ + or x.type.dtype not in ['float32', 'float64']: raise ValueError('x must be 1-d tensor of floats') - if len(y_idx.broadcastable) != 1 \ - or y_idx.dtype not in ['int32', 'int64']: + if y_idx.type.ndim != 1 \ + or y_idx.type.dtype not in ['int32', 'int64']: raise ValueError('x must be 1-d tensor of ints') # TODO: Is this correct? It used to be y, not y_idx - nll = tensor.Tensor(x.dtype, y_idx.broadcastable) + nll = tensor.Tensor(x.type.dtype, + y_idx.type.broadcastable).make_result() # nll = Tensor(x.dtype, y.broadcastable) - sm = tensor.Tensor(x.dtype, x.broadcastable) - self.inputs = [x, b, y_idx] - self.outputs = [nll, sm] - def perform(self): - x, b, y_idx = [i.data for i in self.inputs] + sm = x.type.make_result() + return theano.Apply(self, [x, b, y_idx],[nll, sm]) + def perform(self, node, input_storage, output_storage): + x, b, y_idx = input_storage if b.shape[0] != x.shape[1]: raise ValueError('b must have same number of columns as x') if y_idx.shape[0] != x.shape[0]: @@ -124,14 +124,14 @@ sm[i] = numpy.exp(row - numpy.max(row)) #softmax sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy - self.outputs[0].data = nll - self.outputs[1].data = sm + output_storage[0][0] = nll + output_storage[1][0] = sm def grad(self, (x, b, y_idx), (g_nll, g_sm)): if g_sm is not None: raise NotImplementedError() nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) - dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0] - db = tensor.Sum(dx, axis = [0]).outputs[0] + dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) + db = tensor.sum(dx, axis = [0]) return dx, db, None def c_headers(self): return ['<iostream>'] @@ -280,27 +280,26 @@ //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); } """ % dict(locals(), **sub) +crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() -crossentropy_softmax_1hot_with_bias = \ - gof.op.constructor(CrossentropySoftmax1HotWithBias) - -class CrossentropySoftmax1HotWithBiasDx (gof.op.Op): +class CrossentropySoftmax1HotWithBiasDx (theano.Op): nin=3 nout=1 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" - def __init__(self, dy, sm, y_idx,**kwargs): - dy = tensor._as_tensor(dy) - sm = tensor._as_tensor(sm) - y_idx = tensor._as_tensor(y_idx) - self.inputs = [dy, sm, y_idx] - self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)] - def perform(self): - dy,sm,y_idx = [i.data for i in self.inputs] + def __init__(self, **kwargs): + theano.Op.__init__(self,**kwargs) + def make_node(self, dy, sm, y_idx,**kwargs): + dy = tensor.as_tensor(dy) + sm = tensor.as_tensor(sm) + y_idx = tensor.as_tensor(y_idx) + return theano.Apply(self, [dy, sm, y_idx],[sm.type.make_result()]) + def perform(self, node, input_storage, output_storage): + dy,sm,y_idx = input_storage dx = numpy.zeros_like(sm) for i in xrange(sm.shape[0]): dx[i] = dy[i] * sm[i] #vector scale dx[i, y_idx[i]] -= dy[i] #scalar decrement - self.outputs[0].data = dx + output_storage[0][0] = dx def grad(self, *args): raise NotImplementedError() def c_code(self, (dnll, sm, y_idx), (dx,), sub):