Mercurial > pylearn
changeset 1253:826d78f0135f
Prototype for "hooks" simpler than full control-flow rewrite.
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Fri, 24 Sep 2010 01:46:12 -0400 |
parents | 4a1339682c8f |
children | 705795076efd bf41991692ea |
files | doc/v2_planning/plugin_PL.py |
diffstat | 1 files changed, 287 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/plugin_PL.py Fri Sep 24 01:46:12 2010 -0400 @@ -0,0 +1,287 @@ +class RBM(Model): + ''' + Restricted Boltzmann Machine. + ''' + def __init__(self, n_visible, n_hidden, visible = None, name = None): + + if name is None: + self.__name__ = self.__class__.__name__ + else: + self.__name__ = name + self.n_visible = n_visible + self.n_hidden = n_hidden + + if self.visible is None: + self.visible = theano.tensor.matrix([name='.'.join(self.__name__, 'visible'])) + else: + self.visible = visible + + self.W = theano.shared( + numpy.zeros((n_visible, n_hidden), dtype=theano.config.floatX), + name=self.__name__ + '.W') + self.b_hid = theano.shared( + numpy.zeros((n_hidden,), dtype=theano.config.floatX), + name=self.__name__ + '.b_hid') + self.b_vis = theano.shared( + numpy.zeros((n_hidden,), dtype=theano.config.floatX), + name=self.__name__ + '.b_vis') + + self.inputs = [self.visible] + self.targets = [] + self.parameters = [self.W, self.b_hid, self.b_vis] + self.outputs = ... + self.cost = None + self.gradients = [...] + +class LogisticRegression(Model): + pass + +class GradientBasedLearner(Learner): + ''' + Learner that uses a gradient-base Optimizer to train a Model. + ''' + def __init__(self, model, optimizer, name = None): + self.model = model + self.optimizer = optimizer + ... + + self.updates = optimizer.iterative_optimizer( + parameters = model.parameters, + cost = model.cost) + + # TODO: not sure of how to interface data set with the function + self.train_fn = theano.function(model.inputs+model.targets, + model.cost, updates=self.updates) + + def use_dataset(self, dataset): + self.train_set = dataset + + # The decorator indicates that this function will declare some hooks. + # More hooks could be automatically declared, for instance + # 'begin_function' and 'end_function' + @declare_hooks(['begin_train_iter', 'end_train_iter']) + def adapt(self, n_steps=1): + for i in xrange(n_steps): + self.adapt.hooks.execute( + 'begin_train_iter', + context = dict(iter=i, total=n_steps, locals=locals())) + + data = self.train_set.next() + self.train_fn(data) + + self.adapt.hooks.execute( + 'end_train_iter', + context = dict(iter = i, locals=locals())) + + +class SGD(Optimizer): + ''' + Stochastic gradient descent with fixed learning rate. + ''' + def __init__(self, step_size): + self.step_size = step_size + + def iterative_optimizer( + parameters, + cost=None, + gradients=None, + stop=None, + updates=None, + ): + + if updates is not None: + ret = updates + else: + ret = {} + + if gradients is None: + if cost is None: + raise SomeError('SGD needs to be provided either a cost or a gradients list') + gradients = theano.tensor.grad(cost, parameters) + + for p, g in izip(parameters, gradients): + if p in updates: + raise KeyError('Parameter %s already has an update value (%s)' % (p, g)) + ret[p] = p - self.step_size * g + + # never stop + if stop is not None: + ret[stop] = False + + return ret + + + +class DBN(Learner): + ''' + Deep Belief Network. + ''' + def __init__(self, n_layers, layer_config, n_ft_steps, ft_step_size): + # Layers are GradientBasedLearners, with DBN as Model + self.layers = [] + # Pretraining cumulative schedule + self.pt_cumul_schedule = [0] + # Build the layers and the fully-connected model + self.input = theano.tensor.matrix(name='.'.join([self.__name__, 'input'])) + self.output = self.input + self.ft_params = [] + for i,lconf in enumerate(layer_config): + rbm = RBM(visible = self.output, ...) + self.output = rbm.hidden_expectation + layer = GradientBasedLearner(...) + self.layers.append(layer) + + self.pt_cumul_schedule.append( + self.pt_cumul_schedule[-1]+lc.n_pretrain_steps) + + self.ft_params.extend([rbm.W, rbm.b_hid]) + + # Build the fine-tunable model + self.target = theano.tensor.ivector(name='.'.join([self.__name__, 'target'])) + logreg = LogisticRegression(...) + self.output = logreg.output + self.cost = logreg.nll + self.ft_params.extend([logreg.W, logreg.b]) + + ft_optimizer = SGD(ft_step_size) + self.ft_updates = ft_optimizer.iterative_optimizer( + parameters = self.ft_params, + cost = self.cost) + self.ft_fn = theano.function( + [self.input, self.target], + self.cost, + updates = self.ft_updates, + name='.'.join([self.__name__, 'ft_fn'])) + + self.stage = 0 + + @declare_hooks([ + 'begin_pretrain_layer','end_pretrain_layer', + 'begin_finetune_iter', 'end_finetune_iter']) + + def adapt(self, n_steps=1): + ''' + Each "step" is accomplished by the corresponding Learner (either an RBM, + or the global NNet). + ''' + train_x, train_y = self.dataset + n_remaining_steps = n_steps + # Unsupervised pre-training + for i, layer in ienumerate(self.layers): + if (self.pt_cumul_schedule[i] <= self.stage + and self.stage < self.pt_cumul_schedule[i+1]): + + self.adapt.hooks.execute( + 'begin_pretrain_layer', + context = dict(iter=i, total=len(self.layers), locals=locals())) + + n_pt_steps = min(n_remaining_steps, self.pt_cumul_schedule[i+1] - self.stage) + layer.use_dataset(train_x) + layer.adapt(n_steps = n_pt_steps) + self.stage += self.n_pt_steps + n_remaining_steps -= n_pt_steps + + self.adapt.hooks.execute( + 'end_pretrain_layer', + context = dict(iter=i, total=len(self.layers), locals=locals())) + + # For the next layer, the data needs to be preprocessed + train_x = layer.compute_Eh_given_v(train_x) # or just compute_output? + + # Supervised fine-tuning + if n_remaining_steps > 0: + sup_data = train_x, train_y + for i in xrange(n_remaining_steps): + self.adapt.hooks.execute( + 'begin_train_iter', + context = dict(iter=i, total=n_steps, locals=locals())) + + data = self.train_set.next() + self.ft_fn(data) + + self.adapt.hooks.execute( + 'end_train_iter', + context = dict(iter = i, locals=locals())) + + +## TODO: implement k-fold cross-validation + +class Hooks: + def __init__(self): + # The DB consists in a dictionary, + # the keys are the hooks' names (as strings), + # the values are lists of (function, exec_condition) pairs of functions + self.db = {} + + def declare(self, name): + if name in self.db: + raise KeyError('Hook "%s" is already declared' % name) + self.db[name] = [] + + def execute(self, name, context): + if name not in self.db: + raise KeyError('Hook "%s" does not exist', % name) + #TODO: add contextual information to context, like current time, time of last call,... + for fn, cond in self.db[name]: + if cond(**context): + fn(**context) + + def register(self, name, function, exec_condition): + if name not in self.db: + raise KeyError('Hook "%s" does not exist', % name) + self.db[name].append((function, exec_condition)) + + #TODO: add __getattr__ to have more intuitive access to the hooks + +# Hook declaration mechanism +def declare_hooks(hooks_list): + def deco(f): + f.hooks = Hooks() + for hook_name in hooks_list: + f.hooks.declare(hook_name) + + return deco + +# Conditions +def always(): + return lambda *args, **kwargs: True + + +def main(): + train_data = MNIST.gettrain() + test_data = MNIST.gettest() + + train_x, train_y = train_data + + preprocessor = PCA(ndim = 80) + preprocessor.train(train_x) + + preprocessed_x = preprocessor.compute_output(train_x) + ## for more robustess, we can have something like: + # peprocessed_x = ProcessDataSet(orig_data = train_x, function = preprocessor.compute_output) + + + x = matrix() + dbn = DBN( + input = x, + n_layers = 3, + layer_config = [dict(n_hidden = 500, n_unsup_steps=1000)] * 3 + ) + + dbn.layers[0].adapt.hooks.register( + 'begin_train_iter', + function = ..., + exec_cond = always() + ) + + dbn.layers[0].adapt.hooks.register( + 'end_train_iter', + function = ..., + exec_cond = lambda iter, **kwargs: iter%20==0 + ) + + +if __name__ == '__main__': + main() + +