Mercurial > pylearn
changeset 124:9c4f522526bf
Automated merge with ssh://p-omega1@lgcm.iro.umontreal.ca/tlearn
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Wed, 07 May 2008 16:20:40 -0400 |
parents | 2ca8dccba270 (diff) 8b520423d4ee (current diff) |
children | 7d8b3d6dd4e9 |
files | |
diffstat | 6 files changed, 150 insertions(+), 118 deletions(-) [+] |
line wrap: on
line diff
--- a/_nnet_ops.py Wed May 07 16:17:25 2008 -0400 +++ b/_nnet_ops.py Wed May 07 16:20:40 2008 -0400 @@ -9,29 +9,31 @@ def setUp(self): numpy.random.seed(9999) def test_elemwise(self): - TT.verify_grad(self, Sigmoid, [numpy.random.rand(3,4)]) + TT.verify_grad(self, sigmoid, [numpy.random.rand(3,4)]) class T_softplus(unittest.TestCase): def setUp(self): numpy.random.seed(9999) def test_elemwise(self): - TT.verify_grad(self, Softplus, [numpy.random.rand(3,4)]) + TT.verify_grad(self, softplus, [numpy.random.rand(3,4)]) class T_CrossentropySoftmax1Hot(unittest.TestCase): def setUp(self): numpy.random.seed(9999) def test0(self): y_idx = [0,1,3] - def output1(a,b): - return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1] - TT.verify_grad(self, output1, [numpy.random.rand(3,4), + class Dummy(object): + def make_node(self, a,b): + return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1] + TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4), numpy.random.rand(4)]) def test1(self): y_idx = [0,1,3] - def output1(a): - return crossentropy_softmax_1hot(a, y_idx)[0:1] - TT.verify_grad(self, output1, [numpy.random.rand(3,4)]) + class Dummy(object): + def make_node(self, a): + return crossentropy_softmax_1hot(a, y_idx)[0:1] + TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
--- a/learner.py Wed May 07 16:17:25 2008 -0400 +++ b/learner.py Wed May 07 16:20:40 2008 -0400 @@ -1,6 +1,6 @@ -from dataset import * -from compile import Function +from dataset import AttributesHolder +import compile class Learner(AttributesHolder): """Base class for learning algorithms, provides an interface @@ -173,8 +173,8 @@ if key not in self.use_functions_dictionary: use_input_attributes = self.useInputAttributes() use_output_attributes = self.useOutputAttributes() - complete_f = Function(self.names2OpResults(input_fields+use_input_attributes), - self.names2OpResults(output_fields+use_output_attributes)) + complete_f = compile.function(self.names2OpResults(input_fields+use_input_attributes), + self.names2OpResults(output_fields+use_output_attributes)) def f(*input_field_values): input_attribute_values = self.names2attributes(use_input_attributes) results = complete_f(*(input_field_values + input_attribute_values)) @@ -273,12 +273,13 @@ def __init__(self): TLearner.__init__(self) - self.update_minibatch_function = - Function(self.names2OpResults(self.updateMinibatchOutputAttributes()+ - self.updateMinibatchInputFields()), + self.update_minibatch_function = compile.function + (self.names2OpResults(self.updateMinibatchOutputAttributes()+ + self.updateMinibatchInputFields()), self.names2OpResults(self.updateMinibatchOutputAttributes())) - self.update_end_function = Function(self.names2OpResults(self.updateEndInputAttributes()), - self.names2OpResults(self.updateEndOutputAttributes())) + self.update_end_function = compile.function + (self.names2OpResults(self.updateEndInputAttributes()), + self.names2OpResults(self.updateEndOutputAttributes())) def updateMinibatchInputFields(self): raise AbstractFunction() @@ -310,7 +311,9 @@ # make sure all required fields are allocated and initialized self.allocate(minibatch) self.setAttributes(self.updateMinibatchOutputAttributes(), - self.update_minibatch_function(*(self.names2attributes(self.updateMinibatchInputAttributes())) + # concatenate the attribute values and field values and then apply update fn + self.update_minibatch_function(*(self.names2attributes + (self.updateMinibatchInputAttributes())) + minibatch(self.updateMinibatchInputFields()))) def isLastEpoch(self): @@ -347,17 +350,40 @@ Specialization of MinibatchUpdatesTLearner in which the minibatch updates are obtained by performing an online (minibatch-based) gradient step. - Sub-classes must define the following methods: - + Sub-classes must define the following: + + self._learning_rate (may be changed by the sub-class between epochs or minibatches) + + self.lossAttribute() = name of the loss field + """ def __init__(self,truly_online=False): """ If truly_online then only one pass is made through the training set passed to update(). - + + SUBCLASSES SHOULD CALL THIS CONSTRUCTOR ONLY AFTER HAVING DEFINED ALL THEIR THEANO FORMULAS """ self.truly_online=truly_online + # create the formulas for the gradient update + old_params = [self.__getattr__("_"+name) for name in self.parameterAttributes()] + new_params_names = ["_new_"+name for name in self.parameterAttributes()] + loss = self.__getattr__(self.lossAttribute()) + self.setAttributes(new_params_names, + [t.add_inplace(self.param, + self._learning_rate*t.grad(loss,param)) + for param in old_params]) + def isLastEpoch(self): return self.truly_online + def updateMinibatchInputAttributes(self): + return self.parameterAttributes() + + def updateMinibatchOutputAttributes(self): + return ["_new"+name for name in self.parameterAttributes()] + + def updateEndInputAttributes(self): + return self.parameterAttributes() +
--- a/linear_regression.py Wed May 07 16:17:25 2008 -0400 +++ b/linear_regression.py Wed May 07 16:20:40 2008 -0400 @@ -114,7 +114,7 @@ self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) - OneShotTLearner.__init__(self) + MinibatchUpdatesTLearner.__init__(self) def allocate(self,minibatch): minibatch_n_inputs = minibatch["input"].shape[1]
--- a/mlp.py Wed May 07 16:17:25 2008 -0400 +++ b/mlp.py Wed May 07 16:20:40 2008 -0400 @@ -1,7 +1,7 @@ from learner import * from theano import tensor as t -from theano.scalar import as_scalar +from nnet_ops import * # this is one of the simplest example of learner, and illustrates # the use of theano @@ -64,6 +64,27 @@ """ + def __init__(self,n_hidden,n_classes,learning_rate,init_range=1.): + self._n_outputs = n_classes + self._n_hidden = n_hidden + self._init_range = init_range + self.learning_rate = learning_rate # this is the float + self._learning_rate = t.scalar('learning_rate') # this is the symbol + self._input = t.matrix('input') # n_examples x n_inputs + self._target = t.matrix('target','int32') # n_examples x n_outputs + self._L2_regularizer = t.scalar('L2_regularizer') + self._W1 = t.matrix('W1') + self._W2 = t.matrix('W2') + self._b1 = t.row('b1') + self._b2 = t.row('b2') + self._regularization_term = self._L2_regularizer * (t.dot(self._W1,self._W1) + t.dot(self._W2,self._W2)) + self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T) + self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target) + self._output_class = t.argmax(self._output,1) + self._class_error = self._output_class != self._target + self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0] + MinibatchUpdatesTLearner.__init__(self) + def attributeNames(self): return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] @@ -82,64 +103,39 @@ def updateMinibatchInputFields(self): return ["input","target"] - def updateMinibatchInputAttributes(self): - return self.parameterAttributes() - - def updateMinibatchOutputAttributes(self): - return self.parameterAttributes() - - def updateEndInputAttributes(self): - return self.parameterAttributes() - def updateEndOutputAttributes(self): return ["regularization_term"] + def lossAttribute(self): + return "minibatch_criterion" + def defaultOutputFields(self, input_fields): output_fields = ["output", "output_class",] if "target" in input_fields: output_fields += ["class_error", "nll"] return output_fields - def __init__(self): - self._input = t.matrix('input') # n_examples x n_inputs - self._target = t.matrix('target') # n_examples x n_outputs - self._lambda = as_scalar(0.,'lambda') - self._theta = t.matrix('theta') - self._W = self._theta[:,1:] - self._b = self._theta[:,0] - self._XtX = t.matrix('XtX') - self._XtY = t.matrix('XtY') - self._extended_input = t.prepend_one_to_each_row(self._input) - self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix - self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector - self._regularizer = self._lambda * t.dot(self._W,self._W) - self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) - self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) - self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) - - OneShotTLearner.__init__(self) - def allocate(self,minibatch): minibatch_n_inputs = minibatch["input"].shape[1] - minibatch_n_outputs = minibatch["target"].shape[1] if not self._n_inputs: - self._n_inputs = minibatch_n_inputs - self._n_outputs = minibatch_n_outputs - self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) - self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) - self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) + self._n_inputs = minibatch_n_inputs + self.b1 = numpy.zeros(self._n_hidden) + self.b2 = numpy.zeros(self._n_outputs) self.forget() - elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: - # if the input or target changes dimension on the fly, we resize and forget everything + elif self._n_inputs!=minibatch_n_inputs: + # if the input changes dimension on the fly, we resize and forget everything self.forget() def forget(self): - if self._n_inputs and self._n_outputs: - self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) - self.XtY.resize((1+self.n_inputs,self.n_outputs)) - self.XtX.data[:,:]=0 - self.XtY.data[:,:]=0 - numpy.diag(self.XtX.data)[1:]=self.lambda + if self._n_inputs: + r = self._init_range/math.sqrt(self._n_inputs) + self.W1 = numpy.random.uniform(low=-r,high=r, + size=(self._n_hidden,self._n_inputs)) + r = self._init_range/math.sqrt(self._n_hidden) + self.W2 = numpy.random.uniform(low=-r,high=r, + size=(self._n_outputs,self._n_hidden)) + self.b1[:]=0 + self.b2[:]=0 class MLP(MinibatchUpdatesTLearner): @@ -236,7 +232,7 @@ def __init__(self): self._input = t.matrix('input') # n_examples x n_inputs self._target = t.matrix('target') # n_examples x n_outputs - self._lambda = as_scalar(0.,'lambda') + self._L2_regularizer = t.scalar('L2_regularizer') self._theta = t.matrix('theta') self._W = self._theta[:,1:] self._b = self._theta[:,0] @@ -245,7 +241,7 @@ self._extended_input = t.prepend_one_to_each_row(self._input) self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector - self._regularizer = self._lambda * t.dot(self._W,self._W) + self._regularizer = self._L2_regularizer * t.dot(self._W,self._W) self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) @@ -272,5 +268,5 @@ self.XtY.resize((1+self.n_inputs,self.n_outputs)) self.XtX.data[:,:]=0 self.XtY.data[:,:]=0 - numpy.diag(self.XtX.data)[1:]=self.lambda + numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
--- a/nnet_ops.py Wed May 07 16:17:25 2008 -0400 +++ b/nnet_ops.py Wed May 07 16:20:40 2008 -0400 @@ -1,5 +1,5 @@ import theano -from theano import tensor, gof, scalar +from theano import tensor, scalar import numpy ############ @@ -7,7 +7,7 @@ # SCALAR OPS # -class ScalarSigmoid(scalar.FloatUnaryScalarOp): +class ScalarSigmoid(scalar.UnaryScalarOp): @staticmethod def st_impl(x): if x < -30.0: @@ -20,7 +20,7 @@ def grad(self, (x,), (gz,)): y = scalar_sigmoid(x) return [gz * y * (1.0 - y)] - def c_foreach(self, (x,), (z,), sub): + def c_code(self, (x,), (z,), sub): if 'float' in self.inputs[0].dtype: return """%(z)s = %(x)s < -30.0 @@ -28,12 +28,11 @@ : %(x)s > 30.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals() - raise NotImplementedError('only floatingpoint is implemented') -scalar_sigmoid = gof.op.constructor(ScalarSigmoid) -Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace =\ - tensor.broadcast(ScalarSigmoid, 'Sigmoid') + return NotImplemented#Error('only floatingpoint is implemented') +scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid') +sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid') -class ScalarSoftplus(scalar.FloatUnaryScalarOp): +class ScalarSoftplus(scalar.UnaryScalarOp): @staticmethod def static_impl(x): if x < -30.0: @@ -45,7 +44,7 @@ return ScalarSoftplus.static_impl(x) def grad(self, (x,), (gz,)): return [gz * scalar_sigmoid(x)] - def c_foreach(self, (x,), (z,), sub): + def c_code(self, (x,), (z,), sub): if 'float' in self.inputs[0].dtype: return """%(z)s = %(x)s < -30.0 @@ -53,10 +52,9 @@ : %(x)s > 30.0 ? %(x)s : log1p(exp(%(x)s));""" % locals() - raise NotImplementedError('only floating point x is implemented') -scalar_softplus = gof.op.constructor(ScalarSoftplus) -Softplus, softplus, SoftplusInplace, softplus_inplace =\ - tensor.broadcast(ScalarSoftplus, 'Softplus') + return NotImplemented#Error('only floating point x is implemented') +scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus') +softplus = tensor.Elemwise(scalar_softplus, name='softplus') ############ @@ -64,8 +62,7 @@ # TENSOR OPS # - -class CrossentropySoftmax1HotWithBias(gof.op.Op): +class CrossentropySoftmax1HotWithBias(theano.Op): """A special compound L{Op} for the output of neural-net classifiers. @type x: is a matrix of floats (32 or 64) @@ -90,28 +87,31 @@ """ nin=3 nout=2 - def __init__(self, x, b, y_idx, **kwargs): - x = tensor._as_tensor(x) - b = tensor._as_tensor(b) - y_idx = tensor._as_tensor(y_idx) - if len(x.broadcastable) != 2 \ - or x.dtype not in ['float32', 'float64']: + def __init__(self, **kwargs): + theano.Op.__init__(self, **kwargs) + + def make_node(self, x, b, y_idx): + x = tensor.as_tensor(x) + b = tensor.as_tensor(b) + y_idx = tensor.as_tensor(y_idx) + if x.type.ndim != 2 \ + or x.type.dtype not in ['float32', 'float64']: raise ValueError('x must be 2-d tensor of floats') - if len(b.broadcastable) != 1 \ - or x.dtype not in ['float32', 'float64']: - raise ValueError('x must be 1-d tensor of floats') - if len(y_idx.broadcastable) != 1 \ - or y_idx.dtype not in ['int32', 'int64']: - raise ValueError('x must be 1-d tensor of ints') + if b.type.ndim != 1 \ + or x.type.dtype not in ['float32', 'float64']: + raise ValueError('b must be 1-d tensor of floats') + if y_idx.type.ndim != 1 \ + or y_idx.type.dtype not in ['int32', 'int64']: + raise ValueError('y_idx must be 1-d tensor of ints') # TODO: Is this correct? It used to be y, not y_idx - nll = tensor.Tensor(x.dtype, y_idx.broadcastable) + nll = tensor.Tensor(x.type.dtype, + y_idx.type.broadcastable).make_result() # nll = Tensor(x.dtype, y.broadcastable) - sm = tensor.Tensor(x.dtype, x.broadcastable) - self.inputs = [x, b, y_idx] - self.outputs = [nll, sm] - def perform(self): - x, b, y_idx = [i.data for i in self.inputs] + sm = x.type.make_result() + return theano.Apply(self, [x, b, y_idx],[nll, sm]) + def perform(self, node, input_storage, output_storage): + x, b, y_idx = input_storage if b.shape[0] != x.shape[1]: raise ValueError('b must have same number of columns as x') if y_idx.shape[0] != x.shape[0]: @@ -124,14 +124,14 @@ sm[i] = numpy.exp(row - numpy.max(row)) #softmax sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy - self.outputs[0].data = nll - self.outputs[1].data = sm + output_storage[0][0] = nll + output_storage[1][0] = sm def grad(self, (x, b, y_idx), (g_nll, g_sm)): if g_sm is not None: raise NotImplementedError() nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) - dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0] - db = tensor.Sum(dx, axis = [0]).outputs[0] + dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) + db = tensor.sum(dx, axis = [0]) return dx, db, None def c_headers(self): return ['<iostream>'] @@ -280,27 +280,26 @@ //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); } """ % dict(locals(), **sub) +crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() -crossentropy_softmax_1hot_with_bias = \ - gof.op.constructor(CrossentropySoftmax1HotWithBias) - -class CrossentropySoftmax1HotWithBiasDx (gof.op.Op): +class CrossentropySoftmax1HotWithBiasDx (theano.Op): nin=3 nout=1 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" - def __init__(self, dy, sm, y_idx,**kwargs): - dy = tensor._as_tensor(dy) - sm = tensor._as_tensor(sm) - y_idx = tensor._as_tensor(y_idx) - self.inputs = [dy, sm, y_idx] - self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)] - def perform(self): - dy,sm,y_idx = [i.data for i in self.inputs] + def __init__(self, **kwargs): + theano.Op.__init__(self,**kwargs) + def make_node(self, dy, sm, y_idx,**kwargs): + dy = tensor.as_tensor(dy) + sm = tensor.as_tensor(sm) + y_idx = tensor.as_tensor(y_idx) + return theano.Apply(self, [dy, sm, y_idx],[sm.type.make_result()]) + def perform(self, node, input_storage, output_storage): + dy,sm,y_idx = input_storage dx = numpy.zeros_like(sm) for i in xrange(sm.shape[0]): dx[i] = dy[i] * sm[i] #vector scale dx[i, y_idx[i]] -= dy[i] #scalar decrement - self.outputs[0].data = dx + output_storage[0][0] = dx def grad(self, *args): raise NotImplementedError() def c_code(self, (dnll, sm, y_idx), (dx,), sub):