# HG changeset patch # User Thierry Bertin-Mahieux # Date 1211559774 14400 # Node ID df3fae88ab46499b702338e76c63da51d93d02a1 # Parent 44dd9b6448c56a0a7540f446ef9b2228a384eb0f small debugging diff -r 44dd9b6448c5 -r df3fae88ab46 dataset.py --- a/dataset.py Thu May 22 19:08:46 2008 -0400 +++ b/dataset.py Fri May 23 12:22:54 2008 -0400 @@ -245,8 +245,7 @@ if n_batches is not None: ds_nbatches = min(n_batches,ds_nbatches) if fieldnames: - if not dataset.hasFields(*fieldnames): - raise ValueError('field not present', fieldnames) + assert dataset.hasFields(*fieldnames) else: self.fieldnames=dataset.fieldNames() self.iterator = self.dataset.minibatches_nowrap(self.fieldnames,self.minibatch_size, @@ -970,16 +969,7 @@ for fieldname, fieldcolumns in self.fields_columns.items(): if type(fieldcolumns) is int: assert fieldcolumns>=0 and fieldcolumns= 0 + minset = trainset[:nmin] #real training set for minimizing loss + valset = trainset[nmin:] #validation set for early stopping + best = model + for stp in self.early_stopper(): + model.update( + minset.minibatches([input, target], minibatch_size=min(32, + len(trainset)))) + #print 'mlp.__call__(), we did an update' + if stp.set_score: + stp.score = model(valset, ['loss_01']) + if (stp.score < stp.best_score): + best = copy.copy(model) + model = best + # end of the copy from mlp_factory_approach + + return model + def compile(self, inputs, outputs): return theano.function(inputs,outputs,unpack_single=False,linker=self.linker) diff -r 44dd9b6448c5 -r df3fae88ab46 mlp_factory_approach.py --- a/mlp_factory_approach.py Thu May 22 19:08:46 2008 -0400 +++ b/mlp_factory_approach.py Fri May 23 12:22:54 2008 -0400 @@ -4,7 +4,7 @@ import theano from theano import tensor as t -import dataset, nnet_ops, stopper +from pylearn import dataset, nnet_ops, stopper def _randshape(*shape): @@ -31,18 +31,19 @@ """Update this model from more training data.""" params = self.params #TODO: why should we have to unpack target like this? + # tbm : creates problem... for input, target in input_target: rval= self.update_fn(input, target[:,0], *params) #print rval[0] - def __call__(self, testset, fieldnames=['output_class']): + def __call__(self, testset, fieldnames=['output_class'],input='input',target='target'): """Apply this model (as a function) to new data""" #TODO: cache fn between calls - assert 'input' == testset.fieldNames()[0] + assert input == testset.fieldNames()[0] # why first one??? assert len(testset.fieldNames()) <= 2 v = self.algo.v outputs = [getattr(v, name) for name in fieldnames] - inputs = [v.input] + ([v.target] if 'target' in testset else []) + inputs = [v.input] + ([v.target] if target in testset else []) inputs.extend(v.params) theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), lambda: self.algo._fn(inputs, outputs)) diff -r 44dd9b6448c5 -r df3fae88ab46 nnet_ops.py --- a/nnet_ops.py Thu May 22 19:08:46 2008 -0400 +++ b/nnet_ops.py Fri May 23 12:22:54 2008 -0400 @@ -44,7 +44,7 @@ return ScalarSoftplus.static_impl(x) def grad(self, (x,), (gz,)): return [gz * scalar_sigmoid(x)] - def c_code(self, node, name, (x,), (z,), sub): + def c_code(self, name, node, (x,), (z,), sub): if node.inputs[0].type in [scalar.float32, scalar.float64]: return """%(z)s = %(x)s < -30.0 diff -r 44dd9b6448c5 -r df3fae88ab46 test_dataset.py --- a/test_dataset.py Thu May 22 19:08:46 2008 -0400 +++ b/test_dataset.py Fri May 23 12:22:54 2008 -0400 @@ -491,5 +491,4 @@ test_ArrayDataSet() test_CachedDataSet() test_ApplyFunctionDataSet() - #test pmat.py