Mercurial > pylearn
changeset 205:d1359de1ea13
Automated merge with ssh://p-omega1@lgcm.iro.umontreal.ca/tlearn
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Wed, 14 May 2008 14:06:52 -0400 |
parents | ebbb0e749565 (diff) 62c7527c9ec1 (current diff) |
children | f2ddc795ec49 |
files | mlp.py |
diffstat | 4 files changed, 338 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/mlp.py Wed May 14 14:06:15 2008 -0400 +++ b/mlp.py Wed May 14 14:06:52 2008 -0400 @@ -11,6 +11,77 @@ import math from misc import * +def function(inputs, outputs, linker='c&py'): + return theano.function(inputs, outputs, unpack_single=False,linker=linker) + +def randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001 + +class ManualNNet(object): + def __init__(self, ninputs, nhid, nclass, lr, nepochs, + linker='c&yp', + hidden_layer=None): + class Vars: + def __init__(self, lr, l2coef=0.0): + lr = t.constant(lr) + l2coef = t.constant(l2coef) + input = t.matrix('input') # n_examples x n_inputs + target = t.ivector('target') # n_examples x 1 + W2 = t.matrix('W2') + b2 = t.vector('b2') + + if hidden_layer: + hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) + else: + W1 = t.matrix('W1') + b1 = t.vector('b1') + hid = t.tanh(b1 + t.dot(input, W1)) + hid_params = [W1, b1] + hid_regularization = l2coef * t.sum(W1*W1) + hid_ivals = [randshape(ninputs, nhid), randshape(nhid)] + + params = [W2, b2] + hid_params + ivals = [randshape(nhid, nclass), randshape(nclass)]\ + + hid_ivals + nll, predictions = crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target) + regularization = l2coef * t.sum(W2*W2) + hid_regularization + output_class = t.argmax(predictions,1) + loss_01 = t.neq(output_class, target) + g_params = t.grad(nll + regularization, params) + new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] + self.__dict__.update(locals()); del self.self + self.nhid = nhid + self.nclass = nclass + self.nepochs = nepochs + self.v = Vars(lr) + self.params = None + + def update(self, trainset): + params = self.v.ivals + update_fn = function( + [self.v.input, self.v.target] + self.v.params, + [self.v.nll] + self.v.new_params) + for i in xrange(self.nepochs): + for input, target in trainset.minibatches(['input', 'target'], + minibatch_size=min(32, len(trainset))): + dummy = update_fn(input, target[:,0], *params) + if 0: print dummy[0] #the nll + return self.use + __call__ = update + + def use(self, dset, + output_fieldnames=['output_class'], + test_stats_collector=None, + copy_inputs=False, + put_stats_in_output_dataset=True, + output_attributes=[]): + inputs = [self.v.input, self.v.target] + self.v.params + fn = function(inputs, [getattr(self.v, name) for name in output_fieldnames]) + target = dset.fields()['target'] if ('target' in dset.fields()) else numpy.zeros((1,1),dtype='int64') + return ApplyFunctionDataSet(dset, + lambda input, target: fn(input, target[:,0], *self.v.ivals), + output_fieldnames) + + class OneHiddenLayerNNetClassifier(OnlineGradientTLearner): """ Implement a straightforward classicial feedforward @@ -67,7 +138,7 @@ - 'regularization_term' """ - def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None): + def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None,linker='c|py'): self._n_inputs = n_inputs self._n_outputs = n_classes self._n_hidden = n_hidden @@ -78,7 +149,7 @@ self.L2_regularizer = L2_regularizer self._learning_rate = t.scalar('learning_rate') # this is the symbol self._input = t.matrix('input') # n_examples x n_inputs - self._target = t.imatrix('target') # n_examples x 1 + self._target = t.lmatrix('target') # n_examples x 1 self._target_vector = self._target[:,0] self._L2_regularizer = t.scalar('L2_regularizer') self._W1 = t.matrix('W1') @@ -91,7 +162,7 @@ self._output_class = t.argmax(self._output,1) self._class_error = t.neq(self._output_class,self._target_vector) self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0] - OnlineGradientTLearner.__init__(self) + OnlineGradientTLearner.__init__(self, linker = linker) def attributeNames(self): return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] @@ -119,7 +190,7 @@ def updateMinibatch(self,minibatch): MinibatchUpdatesTLearner.updateMinibatch(self,minibatch) - print "NLL=",self.nll + #print self.nll def allocate(self,minibatch): minibatch_n_inputs = minibatch["input"].shape[1]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlp_factory_approach.py Wed May 14 14:06:52 2008 -0400 @@ -0,0 +1,127 @@ +import dataset +import theano +import theano.tensor as t +import numpy +import nnet_ops + +def _randshape(*shape): + return (numpy.random.rand(*shape) -0.5) * 0.001 +def _function(inputs, outputs, linker='c&py'): + return theano.function(inputs, outputs, unpack_single=False,linker=linker) + +class NeuralNet(object): + + class Model(object): + def __init__(self, nnet, params): + self.nnet = nnet + self.params = params + + def update(self, trainset, stopper=None): + """Update this model from more training data.""" + v = self.nnet.v + params = self.params + update_fn = _function([v.input, v.target] + v.params, [v.nll] + v.new_params) + if stopper is not None: + raise NotImplementedError() + else: + for i in xrange(100): + for input, target in trainset.minibatches(['input', 'target'], + minibatch_size=min(32, len(trainset))): + dummy = update_fn(input, target[:,0], *params) + if 0: print dummy[0] #the nll + + def __call__(self, testset, + output_fieldnames=['output_class'], + test_stats_collector=None, + copy_inputs=False, + put_stats_in_output_dataset=True, + output_attributes=[]): + """Apply this model (as a function) to new data""" + inputs = [self.nnet.v.input, self.nnet.v.target] + self.nnet.v.params + fn = _function(inputs, [getattr(self.nnet.v, name) for name in output_fieldnames]) + if 'target' in testset.fields(): + return dataset.ApplyFunctionDataSet(testset, + lambda input, target: fn(input, target[:,0], *self.params), + output_fieldnames) + else: + return dataset.ApplyFunctionDataSet(testset, + lambda input: fn(input, numpy.zeros(1,dtype='int64'), *self.params), + output_fieldnames) + + def __init__(self, ninputs, nhid, nclass, lr, nepochs, + l2coef=0.0, + linker='c&yp', + hidden_layer=None): + class Vars: + def __init__(self, lr, l2coef): + lr = t.constant(lr) + l2coef = t.constant(l2coef) + input = t.matrix('input') # n_examples x n_inputs + target = t.ivector('target') # n_examples x 1 + W2 = t.matrix('W2') + b2 = t.vector('b2') + + if hidden_layer: + hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) + else: + W1 = t.matrix('W1') + b1 = t.vector('b1') + hid = t.tanh(b1 + t.dot(input, W1)) + hid_params = [W1, b1] + hid_regularization = l2coef * t.sum(W1*W1) + hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] + + params = [W2, b2] + hid_params + nll, predictions = nnet_ops.crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target) + regularization = l2coef * t.sum(W2*W2) + hid_regularization + output_class = t.argmax(predictions,1) + loss_01 = t.neq(output_class, target) + g_params = t.grad(nll + regularization, params) + new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] + self.__dict__.update(locals()); del self.self + self.nhid = nhid + self.nclass = nclass + self.nepochs = nepochs + self.v = Vars(lr, l2coef) + self.params = None + + def __call__(self, trainset=None, iparams=None): + if iparams is None: + iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ + + self.v.hid_ivals() + rval = NeuralNet.Model(self, iparams) + if trainset: + rval.update(trainset) + return rval + + +if __name__ == '__main__': + training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 1], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 0], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 0], + [1, 1, 1]]), + {'input':slice(2)}) + + learn_algo = NeuralNet(2, 10, 3, .1, 1000) + + model1 = learn_algo(training_set1) + + model2 = learn_algo(training_set2) + + n_match = 0 + for o1, o2 in zip(model1(test_data), model2(test_data)): + n_match += (o1 == o2) + + print n_match, numpy.sum(training_set1.fields()['target'] == + training_set2.fields()['target']) +
--- a/nnet_ops.py Wed May 14 14:06:15 2008 -0400 +++ b/nnet_ops.py Wed May 14 14:06:52 2008 -0400 @@ -101,7 +101,7 @@ or x.type.dtype not in ['float32', 'float64']: raise ValueError('b must be 1-d tensor of floats') if y_idx.type.ndim != 1 \ - or y_idx.type.dtype not in ['int32', 'int64']: + or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']: raise ValueError('y_idx must be 1-d tensor of ints') # TODO: Is this correct? It used to be y, not y_idx @@ -109,7 +109,7 @@ y_idx.type.broadcastable).make_result() # nll = Tensor(x.dtype, y.broadcastable) sm = x.type.make_result() - return theano.Apply(self, [x, b, y_idx],[nll, sm]) + return theano.Apply(self, [x, b, y_idx], [nll, sm]) def perform(self, node, input_storage, output_storage): x, b, y_idx = input_storage if b.shape[0] != x.shape[1]: @@ -144,6 +144,9 @@ #TODO: set error messages for failures in this code + #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] + y_idx_type = node.inputs[2].type.dtype_specs()[1] + return """ npy_intp* Nx = %(x)s->dimensions; @@ -172,9 +175,12 @@ PyErr_SetString(PyExc_TypeError, "b not float64"); %(fail)s; } - if (%(y_idx)s->descr->type_num != PyArray_INT64) + if ((%(y_idx)s->descr->type_num != PyArray_INT64) + && (%(y_idx)s->descr->type_num != PyArray_INT32) + && (%(y_idx)s->descr->type_num != PyArray_INT16) + && (%(y_idx)s->descr->type_num != PyArray_INT8)) { - PyErr_SetString(PyExc_TypeError, "y_idx not int64"); + PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); %(fail)s; } if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]) @@ -217,7 +223,7 @@ const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i); const double* __restrict__ b_i = (double*)(%(b)s->data); - const long int y_i = ((long int*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; + const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); @@ -303,15 +309,24 @@ def grad(self, *args): raise NotImplementedError() def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub): + y_idx_type = node.inputs[2].type.dtype_specs()[1] return """ if ((%(dnll)s->descr->type_num != PyArray_DOUBLE) || (%(sm)s->descr->type_num != PyArray_DOUBLE) - || (%(y_idx)s->descr->type_num != PyArray_INT64)) + ) { PyErr_SetString(PyExc_TypeError, "types should be float64, float64, int64"); %(fail)s; } + if ((%(y_idx)s->descr->type_num != PyArray_INT64) + && (%(y_idx)s->descr->type_num != PyArray_INT32) + && (%(y_idx)s->descr->type_num != PyArray_INT16) + && (%(y_idx)s->descr->type_num != PyArray_INT8)) + { + PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); + %(fail)s; + } if ((%(dnll)s->nd != 1) || (%(sm)s->nd != 2) || (%(y_idx)s->nd != 1)) @@ -341,7 +356,7 @@ { const double dnll_i = ((double*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0]; - const long int y_i = ((long int*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; + const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; const double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
--- a/test_mlp.py Wed May 14 14:06:15 2008 -0400 +++ b/test_mlp.py Wed May 14 14:06:52 2008 -0400 @@ -1,9 +1,58 @@ from mlp import * import dataset +import nnet_ops + + +from functools import partial +def separator(debugger, i, node, *ths): + print "===================" + +def what(debugger, i, node, *ths): + print "#%i" % i, node + +def parents(debugger, i, node, *ths): + print [input.step for input in node.inputs] + +def input_shapes(debugger, i, node, *ths): + print "input shapes: ", + for r in node.inputs: + if hasattr(r.value, 'shape'): + print r.value.shape, + else: + print "no_shape", + print + +def input_types(debugger, i, node, *ths): + print "input types: ", + for r in node.inputs: + print r.type, + print + +def output_shapes(debugger, i, node, *ths): + print "output shapes:", + for r in node.outputs: + if hasattr(r.value, 'shape'): + print r.value.shape, + else: + print "no_shape", + print + +def output_types(debugger, i, node, *ths): + print "output types:", + for r in node.outputs: + print r.type, + print + def test0(): - nnet = OneHiddenLayerNNetClassifier(10,2,.001,1000) + linker = 'c|py' + #linker = partial(theano.gof.DebugLinker, linkers = [theano.gof.OpWiseCLinker], + # debug_pre = [separator, what, parents, input_types, input_shapes], + # debug_post = [output_shapes, output_types], + # compare_fn = lambda x, y: numpy.all(x == y)) + + nnet = OneHiddenLayerNNetClassifier(10,2,.001,1000, linker = linker) training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], @@ -16,5 +65,68 @@ for fieldname in output_ds.fieldNames(): print fieldname+"=",output_ds[fieldname] -test0() +def test1(): + nnet = ManualNNet(2, 10,3,.1,1000) + training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 1], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + fprop=nnet(training_set) + + output_ds = fprop(training_set) + + for fieldname in output_ds.fieldNames(): + print fieldname+"=",output_ds[fieldname] + +def test2(): + training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 1], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + nin, nhid=2, 10 + def sigm_layer(input): + W1 = t.matrix('W1') + b1 = t.vector('b1') + return (nnet_ops.sigmoid(b1 + t.dot(input, W1)), + [W1, b1], + [(numpy.random.rand(nin, nhid) -0.5) * 0.001, numpy.zeros(nhid)]) + nnet = ManualNNet(nin, nhid, 3, .1, 1000, hidden_layer=sigm_layer) + fprop=nnet(training_set) + + output_ds = fprop(training_set) + for fieldname in output_ds.fieldNames(): + print fieldname+"=",output_ds[fieldname] + +def test_interface_0(): + learner = ManualNNet(2, 10, 3, .1, 1000) + + model = learner(training_set) + + model2 = learner(training_set) # trains model a second time + + learner.update(additional_data) # modifies nnet and model by side-effect + + +def test_interface2_1(): + learn_algo = ManualNNet(2, 10, 3, .1, 1000) + + prior = learn_algo() + + model1 = learn_algo(training_set1) + + model2 = learn_algo(training_set2) + + model2.update(additional_data) + + n_match = 0 + for o1, o2 in zip(model1.use(test_data), model2.use(test_data)): + n_match += (o1 == o2) + + print n_match + +test1() +test2() +