Mercurial > pylearn

import sys, copy
import theano
from theano import tensor as T
from theano.tensor.nnet import sigmoid
from theano.compile import module
from theano import printing, pprint
from theano import compile

import numpy as N

from ..datasets import make_dataset
from .minimizer import make_minimizer
from .stopper import make_stopper

class RBM(T.RModule):

    # is it really necessary to pass ALL of these ? - GD
    def __init__(self,
            nvis=None, nhid=None,
            input=None,
            w=None, hidb=None, visb=None,
            seed=0, lr=0.1):

        super(RBM, self).__init__()
        self.nhid, self.nvis = nhid, nvis
        self.lr = lr

        # symbolic theano stuff
        # what about multidimensional inputs/outputs ? do they have to be
        # flattened or should we used tensors instead ?
        self.w = w if w is not None else module.Member(T.dmatrix())
        self.visb = visb if visb is not None else module.Member(T.dvector())
        self.hidb = hidb if hidb is not None else module.Member(T.dvector())
        self.seed = seed;

        # 1-step Markov chain
        vis = T.dmatrix()
        hid = sigmoid(T.dot(vis, self.w) + self.hidb)
        hid_sample = self.random.binomial(T.shape(hid), 1, hid)
        neg_vis = sigmoid(T.dot(hid_sample, self.w.T) + self.visb)
        neg_vis_sample = self.random.binomial(T.shape(neg_vis), 1, neg_vis)
        neg_hid = sigmoid(T.dot(neg_vis_sample, self.w) + self.hidb)

        # function which execute 1-step Markov chain (with and without cd updates)
        self.updownup = module.Method([vis], [hid, neg_vis_sample, neg_hid])

        # function to perform manual cd update given 2 visible and 2 hidden values
        vistemp = T.dmatrix()
        hidtemp = T.dmatrix()
        nvistemp = T.dmatrix()
        nhidtemp = T.dmatrix()
        self.cd_update = module.Method([vistemp, hidtemp, nvistemp, nhidtemp],
                [],
                updates = {self.w: self.w + self.lr *
                                   (T.dot(vistemp.T, hidtemp) -
                                    T.dot(nvistemp.T, nhidtemp)),
                           self.visb: self.visb + self.lr *
                                      (T.sum(vistemp - nvistemp,axis=0)),
                           self.hidb: self.hidb + self.lr *
                                      (T.sum(hidtemp - nhidtemp,axis=0))});

    # TODO: add parameter for weigth initialization
    def _instance_initialize(self, obj):
        obj.w = N.random.standard_normal((self.nvis,self.nhid))
        obj.visb = N.zeros(self.nvis)
        obj.hidb = N.zeros(self.nhid)
        obj.seed(self.seed);

    def _instance_cd1(self, obj, input, k=1):
        poshid, negvissample, neghid = obj.updownup(input)
        for i in xrange(k-1):
            ahid, negvissample, neghid = obj.updownup(negvissample)
        # CD-k update
        obj.cd_update(input, poshid, negvissample, neghid)


def train_rbm(state, channel=lambda *args, **kwargs:None):
    dataset = make_dataset(**state.dataset)
    train = dataset.train

    rbm_module = RBM(
            nvis=train.x.shape[1],
            nhid=state['nhid'])
    rbm = rbm_module.make()

    batchsize = state.get('batchsize', 1)
    verbose = state.get('verbose', 1)
    iter = [0]

    while iter[0] != state['max_iters']:
        for j in xrange(0,len(train.x)-batchsize+1,batchsize):
            rbm.cd1(train.x[j:j+batchsize])
            if verbose > 1:
                print 'estimated train cost...'
            if iter[0] == state['max_iters']:
                break
            else:
                iter[0] += 1
author	Pascal Lamblin <lamblinp@iro.umontreal.ca>
date	Fri, 20 Feb 2009 15:08:29 -0500
parents	de6de7c2c54b
children	070a7d68d3a1