Mercurial > pylearn
view doc/v2_planning/plugin_PL.py @ 1460:86bf03990aad
added sgd_momentum_updates to gd module
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 06 Apr 2011 13:53:00 -0400 |
parents | 826d78f0135f |
children |
line wrap: on
line source
class RBM(Model): ''' Restricted Boltzmann Machine. ''' def __init__(self, n_visible, n_hidden, visible = None, name = None): if name is None: self.__name__ = self.__class__.__name__ else: self.__name__ = name self.n_visible = n_visible self.n_hidden = n_hidden if self.visible is None: self.visible = theano.tensor.matrix([name='.'.join(self.__name__, 'visible'])) else: self.visible = visible self.W = theano.shared( numpy.zeros((n_visible, n_hidden), dtype=theano.config.floatX), name=self.__name__ + '.W') self.b_hid = theano.shared( numpy.zeros((n_hidden,), dtype=theano.config.floatX), name=self.__name__ + '.b_hid') self.b_vis = theano.shared( numpy.zeros((n_hidden,), dtype=theano.config.floatX), name=self.__name__ + '.b_vis') self.inputs = [self.visible] self.targets = [] self.parameters = [self.W, self.b_hid, self.b_vis] self.outputs = ... self.cost = None self.gradients = [...] class LogisticRegression(Model): pass class GradientBasedLearner(Learner): ''' Learner that uses a gradient-base Optimizer to train a Model. ''' def __init__(self, model, optimizer, name = None): self.model = model self.optimizer = optimizer ... self.updates = optimizer.iterative_optimizer( parameters = model.parameters, cost = model.cost) # TODO: not sure of how to interface data set with the function self.train_fn = theano.function(model.inputs+model.targets, model.cost, updates=self.updates) def use_dataset(self, dataset): self.train_set = dataset # The decorator indicates that this function will declare some hooks. # More hooks could be automatically declared, for instance # 'begin_function' and 'end_function' @declare_hooks(['begin_train_iter', 'end_train_iter']) def adapt(self, n_steps=1): for i in xrange(n_steps): self.adapt.hooks.execute( 'begin_train_iter', context = dict(iter=i, total=n_steps, locals=locals())) data = self.train_set.next() self.train_fn(data) self.adapt.hooks.execute( 'end_train_iter', context = dict(iter = i, locals=locals())) class SGD(Optimizer): ''' Stochastic gradient descent with fixed learning rate. ''' def __init__(self, step_size): self.step_size = step_size def iterative_optimizer( parameters, cost=None, gradients=None, stop=None, updates=None, ): if updates is not None: ret = updates else: ret = {} if gradients is None: if cost is None: raise SomeError('SGD needs to be provided either a cost or a gradients list') gradients = theano.tensor.grad(cost, parameters) for p, g in izip(parameters, gradients): if p in updates: raise KeyError('Parameter %s already has an update value (%s)' % (p, g)) ret[p] = p - self.step_size * g # never stop if stop is not None: ret[stop] = False return ret class DBN(Learner): ''' Deep Belief Network. ''' def __init__(self, n_layers, layer_config, n_ft_steps, ft_step_size): # Layers are GradientBasedLearners, with DBN as Model self.layers = [] # Pretraining cumulative schedule self.pt_cumul_schedule = [0] # Build the layers and the fully-connected model self.input = theano.tensor.matrix(name='.'.join([self.__name__, 'input'])) self.output = self.input self.ft_params = [] for i,lconf in enumerate(layer_config): rbm = RBM(visible = self.output, ...) self.output = rbm.hidden_expectation layer = GradientBasedLearner(...) self.layers.append(layer) self.pt_cumul_schedule.append( self.pt_cumul_schedule[-1]+lc.n_pretrain_steps) self.ft_params.extend([rbm.W, rbm.b_hid]) # Build the fine-tunable model self.target = theano.tensor.ivector(name='.'.join([self.__name__, 'target'])) logreg = LogisticRegression(...) self.output = logreg.output self.cost = logreg.nll self.ft_params.extend([logreg.W, logreg.b]) ft_optimizer = SGD(ft_step_size) self.ft_updates = ft_optimizer.iterative_optimizer( parameters = self.ft_params, cost = self.cost) self.ft_fn = theano.function( [self.input, self.target], self.cost, updates = self.ft_updates, name='.'.join([self.__name__, 'ft_fn'])) self.stage = 0 @declare_hooks([ 'begin_pretrain_layer','end_pretrain_layer', 'begin_finetune_iter', 'end_finetune_iter']) def adapt(self, n_steps=1): ''' Each "step" is accomplished by the corresponding Learner (either an RBM, or the global NNet). ''' train_x, train_y = self.dataset n_remaining_steps = n_steps # Unsupervised pre-training for i, layer in ienumerate(self.layers): if (self.pt_cumul_schedule[i] <= self.stage and self.stage < self.pt_cumul_schedule[i+1]): self.adapt.hooks.execute( 'begin_pretrain_layer', context = dict(iter=i, total=len(self.layers), locals=locals())) n_pt_steps = min(n_remaining_steps, self.pt_cumul_schedule[i+1] - self.stage) layer.use_dataset(train_x) layer.adapt(n_steps = n_pt_steps) self.stage += self.n_pt_steps n_remaining_steps -= n_pt_steps self.adapt.hooks.execute( 'end_pretrain_layer', context = dict(iter=i, total=len(self.layers), locals=locals())) # For the next layer, the data needs to be preprocessed train_x = layer.compute_Eh_given_v(train_x) # or just compute_output? # Supervised fine-tuning if n_remaining_steps > 0: sup_data = train_x, train_y for i in xrange(n_remaining_steps): self.adapt.hooks.execute( 'begin_train_iter', context = dict(iter=i, total=n_steps, locals=locals())) data = self.train_set.next() self.ft_fn(data) self.adapt.hooks.execute( 'end_train_iter', context = dict(iter = i, locals=locals())) ## TODO: implement k-fold cross-validation class Hooks: def __init__(self): # The DB consists in a dictionary, # the keys are the hooks' names (as strings), # the values are lists of (function, exec_condition) pairs of functions self.db = {} def declare(self, name): if name in self.db: raise KeyError('Hook "%s" is already declared' % name) self.db[name] = [] def execute(self, name, context): if name not in self.db: raise KeyError('Hook "%s" does not exist', % name) #TODO: add contextual information to context, like current time, time of last call,... for fn, cond in self.db[name]: if cond(**context): fn(**context) def register(self, name, function, exec_condition): if name not in self.db: raise KeyError('Hook "%s" does not exist', % name) self.db[name].append((function, exec_condition)) #TODO: add __getattr__ to have more intuitive access to the hooks # Hook declaration mechanism def declare_hooks(hooks_list): def deco(f): f.hooks = Hooks() for hook_name in hooks_list: f.hooks.declare(hook_name) return deco # Conditions def always(): return lambda *args, **kwargs: True def main(): train_data = MNIST.gettrain() test_data = MNIST.gettest() train_x, train_y = train_data preprocessor = PCA(ndim = 80) preprocessor.train(train_x) preprocessed_x = preprocessor.compute_output(train_x) ## for more robustess, we can have something like: # peprocessed_x = ProcessDataSet(orig_data = train_x, function = preprocessor.compute_output) x = matrix() dbn = DBN( input = x, n_layers = 3, layer_config = [dict(n_hidden = 500, n_unsup_steps=1000)] * 3 ) dbn.layers[0].adapt.hooks.register( 'begin_train_iter', function = ..., exec_cond = always() ) dbn.layers[0].adapt.hooks.register( 'end_train_iter', function = ..., exec_cond = lambda iter, **kwargs: iter%20==0 ) if __name__ == '__main__': main()