pylearn: mlp_factory_approach.py comparison

comparison mlp_factory_approach.py @ 244:3156a9976183

mlp_factory_approach.py, updated and un-deprecated by popular demand

author	James Bergstra <bergstrj@iro.umontreal.ca>
date	Mon, 02 Jun 2008 17:08:17 -0400
parents	c047238e5b3f
children	a1793a5e9523

comparison

equal deleted inserted replaced

-:a70f2c973ea5
+:3156a9976183
-"""
-This file is deprecated. I'm continuing development in hpu/models.py.
-Get that project like this: hg clone ssh://user@lgcm/../bergstrj/hpu
-"""
 import copy, sys
 import numpy
 import theano
-from theano import tensor as t
+from theano import tensor as T
-from pylearn import dataset, nnet_ops, stopper
+from pylearn import dataset, nnet_ops, stopper, LookupList
+class AbstractFunction (Exception): pass
-def _randshape(*shape):
-return (numpy.random.rand(*shape) -0.5) * 0.001
+class AutoName(object):
+"""
-def _cache(d, key, valfn):
+By inheriting from this class, class variables which have a name attribute
-#valfn() is only evaluated if key isn't in dictionary d
+will have that name attribute set to the class variable name.
-if key not in d:
+"""
-d[key] = valfn()
+class __metaclass__(type):
-return d[key]
+def __init__(cls, name, bases, dct):
+type.__init__(name, bases, dct)
-class _Model(object):
+for key, val in dct.items():
-def __init__(self, algo, params):
+assert type(key) is str
-self.algo = algo
+if hasattr(val, 'name'):
-self.params = params
+val.name = key
-v = algo.v
-self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params)
+class GraphLearner(object):
-self._fn_cache = {}
+class Model(object):
+def __init__(self, algo, params):
-def __copy__(self):
+self.algo = algo
-return _Model(self.algo, [copy.copy(p) for p in params])
+self.params = params
+graph = self.algo.graph
-def update(self, input_target):
+self.update_fn = algo._fn([graph.input, graph.target] + graph.params,
-"""Update this model from more training data."""
+[graph.nll] + graph.new_params)
-params = self.params
+self._fn_cache = {}
-#TODO: why should we have to unpack target like this?
-# tbm : creates problem...
+def __copy__(self):
-for input, target in input_target:
+raise Exception('why not called?')
-rval= self.update_fn(input, target, *params)
+return GraphLearner.Model(self.algo, [copy.copy(p) for p in params])
-#print rval[0]
+def _cache(self, key, valfn):
-def __call__(self, testset, fieldnames=['output_class'],input='input',target='target'):
+d = self._fn_cache
-"""Apply this model (as a function) to new data"""
+if key not in d:
-#TODO: cache fn between calls
+d[key] = valfn()
-assert input == testset.fieldNames()[0] # why first one???
+return d[key]
-assert len(testset.fieldNames()) <= 2
-v = self.algo.v
+def update_minibatch(self, minibatch):
-outputs = [getattr(v, name) for name in fieldnames]
+assert isinstance(minibatch, LookupList)
-inputs = [v.input] + ([v.target] if target in testset else [])
+self.update_fn(minibatch['input'], minibatch['target'], *self.params)
-inputs.extend(v.params)
-theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)),
+def update(self, dataset,
-lambda: self.algo._fn(inputs, outputs))
+default_minibatch_size=32):
-lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
+"""Update this model from more training data."""
-return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
+params = self.params
+minibatch_size = min(default_minibatch_size, len(dataset))
-class AutonameVars(object):
+for mb in dataset.minibatches(['input', 'target'], minibatch_size=minibatch_size):
-def __init__(self, dct):
+self.update_minibatch(mb)
-for key, val in dct.items():
-if type(key) is str and hasattr(val, 'name'):
+def __call__(self, testset, fieldnames=['output_class']):
-val.name = key
+"""Apply this model (as a function) to new data.
-self.__dict__.update(dct)
+@param testset: DataSet, whose fields feed Result terms in self.algo.g
-class MultiLayerPerceptron(object):
+@type testset: DataSet
-def __init__(self, ninputs, nhid, nclass, lr,
+@param fieldnames: names of results in self.algo.g to compute.
-l2coef=0.0,
+@type fieldnames: list of strings
-linker='c&py',
-hidden_layer=None,
+@return: DataSet with fields from fieldnames, computed from testset by
-early_stopper=None,
+this model.
-validation_portion=0.2,
+@rtype: ApplyFunctionDataSet instance
-V_extern=None):
-class V_intern(AutonameVars):
+"""
-def __init__(v_self, lr, l2coef, **kwargs):
+graph = self.algo.graph
-lr = t.constant(lr)
+def getresult(name):
-l2coef = t.constant(l2coef)
+r = getattr(graph, name)
-input = t.matrix() # n_examples x n_inputs
+if not isinstance(r, theano.Result):
-target = t.ivector() # len: n_examples
+raise TypeError('string does not name a theano.Result', (name, r))
-W2, b2 = t.matrix(), t.vector()
+return r
-if hidden_layer:
+provided = [getresult(name) for name in testset.fieldNames()]
-hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
+wanted = [getresult(name) for name in fieldnames]
-else:
+inputs = provided + graph.params
-W1, b1 = t.matrix(), t.vector()
-hid = t.tanh(b1 + t.dot(input, W1))
+theano_fn = self._cache((tuple(inputs), tuple(wanted)),
-hid_params = [W1, b1]
+lambda: self.algo._fn(inputs, wanted))
-hid_regularization = l2coef * t.sum(W1*W1)
+lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
-hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)]
+return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
-params = [W2, b2] + hid_params
+class Graph(object):
-activations = b2 + t.dot(hid, W2)
+class Opt(object):
-nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
+merge = theano.gof.MergeOptimizer()
-regularization = l2coef * t.sum(W2*W2) + hid_regularization
+gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)
-output_class = t.argmax(activations,1)
+sqr_opt_0 = theano.gof.TopoOptimizer(theano.gof.PatternSub(
-loss_01 = t.neq(output_class, target)
+(T.mul,'x', 'x'),
-g_params = t.grad(nll + regularization, params)
+(T.sqr, 'x')))
-new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
-self.__dict__.update(locals()); del self.self
+def __init__(self, do_sqr=True):
-AutonameVars.__init__(v_self, locals())
+self.do_sqr = do_sqr
-self.nhid = nhid
-self.nclass = nclass
+def __call__(self, env):
-self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals())
+self.merge(env)
-self.linker = linker
+self.gemm_opt_1(env)
-self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1)
+if self.do_sqr:
-self.validation_portion = validation_portion
+self.sqr_opt_0(env)
+self.merge(env)
+def linker(self):
+return theano.gof.PerformLinker()
+def early_stopper(self):
+stopper.NStages(10,1)
+def train_iter(self, trainset):
+raise AbstractFunction
+optimizer = Opt()
+def __init__(self, graph):
+self.graph = graph
 def _fn(self, inputs, outputs):
 # Caching here would hamper multi-threaded apps
-# prefer caching in _Model.__call__
+# prefer caching in Model.__call__
-return theano.function(inputs, outputs, unpack_single=False, linker=self.linker)
+return theano.function(inputs, outputs,
+unpack_single=False,
-def __call__(self, trainset=None, iparams=None, input='input', target='target'):
+optimizer=self.graph.optimizer,
-"""Allocate and optionally train a model"""
+linker=self.graph.linker() if hasattr(self.graph, 'linker')
-if iparams is None:
+else 'c&py')
-iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\
-+ self.v.hid_ivals()
+def __call__(self,
-rval = _Model(self, iparams)
+trainset=None,
-if trainset:
+validset=None,
-if len(trainset) == sys.maxint:
+iparams=None):
-raise NotImplementedError('Learning from infinite streams is not supported')
+"""Allocate and optionally train a model
-nval = int(self.validation_portion * len(trainset))
-nmin = len(trainset) - nval
+@param trainset: Data for minimizing the cost function
-assert nmin >= 0
+@type trainset: None or Dataset
-minset = trainset[:nmin] #real training set for minimizing loss
-valset = trainset[nmin:] #validation set for early stopping
+@param validset: Data for early stopping
-best = rval
+@type validset: None or Dataset
-for stp in self.early_stopper():
-rval.update(
+@param input: name of field to use as input
-minset.minibatches([input, target], minibatch_size=min(32,
+@type input: string
-len(minset))))
-#print 'mlp.__call__(), we did an update'
+@param target: name of field to use as target
+@type target: string
+@return: model
+@rtype: GraphLearner.Model instance
+"""
+iparams = self.graph.iparams() if iparams is None else iparams
+curmodel = GraphLearner.Model(self, iparams)
+best = curmodel
+if trainset is not None:
+#do some training by calling Model.update_minibatch()
+stp = self.graph.early_stopper()
+for mb in self.graph.train_iter(trainset):
+curmodel.update_minibatch(mb)
 if stp.set_score:
-stp.score = rval(valset, ['loss_01'])
+if validset:
-if (stp.score < stp.best_score):
+stp.score = curmodel(validset, ['validset_score'])
-best = copy.copy(rval)
+if (stp.score < stp.best_score):
-rval = best
+best = copy.copy(curmodel)
-return rval
+else:
+stp.score = 0.0
+stp.next()
+if validset:
+curmodel = best
+return curmodel
+def graphMLP(ninputs, nhid, nclass, lr_val, l2coef_val=0.0):
+def wrapper(i, node, thunk):
+if 0:
+print i, node
+print thunk.inputs
+print thunk.outputs
+if node.op == nnet_ops.crossentropy_softmax_1hot_with_bias:
+print 'here is the nll op'
+thunk() #actually compute this piece of the graph
+class G(GraphLearner.Graph, AutoName):
+lr = T.constant(lr_val)
+assert l2coef_val == 0.0
+l2coef = T.constant(l2coef_val)
+input = T.matrix() # n_examples x n_inputs
+target = T.ivector() # len: n_examples
+W2, b2 = T.matrix(), T.vector()
+W1, b1 = T.matrix(), T.vector()
+hid = T.tanh(b1 + T.dot(input, W1))
+hid_regularization = l2coef * T.sum(W1*W1)
+params = [W1, b1, W2, b2]
+activations = b2 + T.dot(hid, W2)
+nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
+regularization = l2coef * T.sum(W2*W2) + hid_regularization
+output_class = T.argmax(activations,1)
+loss_01 = T.neq(output_class, target)
+#g_params = T.grad(nll + regularization, params)
+g_params = T.grad(nll, params)
+new_params = [T.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
+def iparams(self):
+def randsmall(*shape):
+return (numpy.random.rand(*shape) -0.5) * 0.001
+return [randsmall(ninputs, nhid)
+, randsmall(nhid)
+, randsmall(nhid, nclass)
+, randsmall(nclass)]
+def train_iter(self, trainset):
+return trainset.minibatches(['input', 'target'],
+minibatch_size=min(len(trainset), 32), n_batches=300)
+def early_stopper(self):
+return stopper.NStages(300,1)
+return G()
 import unittest
 class TestMLP(unittest.TestCase):
-def test0(self):
+def blah(self, g):
 training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
 [0, 1, 1],
 [1, 0, 1],
 [1, 1, 1]]),
 {'input':slice(2),'target':2})
 [0, 1, 1],
 [1, 0, 0],
 [1, 1, 1]]),
 {'input':slice(2)})
-learn_algo = MultiLayerPerceptron(2, 10, 2, .1
+learn_algo = GraphLearner(g)
-, linker='c&py'
-, early_stopper = lambda:stopper.NStages(100,1))
 model1 = learn_algo(training_set1)
 model2 = learn_algo(training_set2)
-n_match = 0
+omatch = [o1 == o2 for o1, o2 in zip(model1(test_data),
-for o1, o2 in zip(model1(test_data), model2(test_data)):
+model2(test_data))]
-#print o1
-#print o2
+n_match = sum(omatch)
-n_match += (o1 == o2)
+self.failUnless(n_match ==  (numpy.sum(training_set1.fields()['target'] ==
-assert n_match ==  (numpy.sum(training_set1.fields()['target'] ==
+training_set2.fields()['target'])), omatch)
-training_set2.fields()['target']))
+def equiv(self, g0, g1):
+training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+[0, 1, 1],
+[1, 0, 1],
+[1, 1, 1]]),
+{'input':slice(2),'target':2})
+learn_algo_0 = GraphLearner(g0)
+learn_algo_1 = GraphLearner(g1)
+model_0 = learn_algo_0(training_set1)
+model_1 = learn_algo_1(training_set1)
+print '----'
+for p in zip(model_0.params, model_1.params):
+abs_rel_err = theano.gradient.numeric_grad.abs_rel_err(p[0], p[1])
+max_abs_rel_err = numpy.max(abs_rel_err)
+if max_abs_rel_err > 1.0e-7:
+print 'p0', p[0]
+print 'p1', p[1]
+#self.failUnless(max_abs_rel_err < 1.0e-7, max_abs_rel_err)
+def test0(self): self.blah(graphMLP(2, 10, 2, .1))
+def test1(self): self.blah(graphMLP(2, 3, 2, .1))
 if __name__ == '__main__':
 unittest.main()

Mercurial > pylearn

comparison mlp_factory_approach.py @ 244:3156a9976183