Mercurial > pylearn
view pylearn/algorithms/rbm.py @ 656:40cae12a9bb8
Get rid of old dbdict
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Fri, 20 Feb 2009 15:08:29 -0500 |
parents | de6de7c2c54b |
children | 070a7d68d3a1 |
line wrap: on
line source
import sys, copy import theano from theano import tensor as T from theano.tensor.nnet import sigmoid from theano.compile import module from theano import printing, pprint from theano import compile import numpy as N from ..datasets import make_dataset from .minimizer import make_minimizer from .stopper import make_stopper class RBM(T.RModule): # is it really necessary to pass ALL of these ? - GD def __init__(self, nvis=None, nhid=None, input=None, w=None, hidb=None, visb=None, seed=0, lr=0.1): super(RBM, self).__init__() self.nhid, self.nvis = nhid, nvis self.lr = lr # symbolic theano stuff # what about multidimensional inputs/outputs ? do they have to be # flattened or should we used tensors instead ? self.w = w if w is not None else module.Member(T.dmatrix()) self.visb = visb if visb is not None else module.Member(T.dvector()) self.hidb = hidb if hidb is not None else module.Member(T.dvector()) self.seed = seed; # 1-step Markov chain vis = T.dmatrix() hid = sigmoid(T.dot(vis, self.w) + self.hidb) hid_sample = self.random.binomial(T.shape(hid), 1, hid) neg_vis = sigmoid(T.dot(hid_sample, self.w.T) + self.visb) neg_vis_sample = self.random.binomial(T.shape(neg_vis), 1, neg_vis) neg_hid = sigmoid(T.dot(neg_vis_sample, self.w) + self.hidb) # function which execute 1-step Markov chain (with and without cd updates) self.updownup = module.Method([vis], [hid, neg_vis_sample, neg_hid]) # function to perform manual cd update given 2 visible and 2 hidden values vistemp = T.dmatrix() hidtemp = T.dmatrix() nvistemp = T.dmatrix() nhidtemp = T.dmatrix() self.cd_update = module.Method([vistemp, hidtemp, nvistemp, nhidtemp], [], updates = {self.w: self.w + self.lr * (T.dot(vistemp.T, hidtemp) - T.dot(nvistemp.T, nhidtemp)), self.visb: self.visb + self.lr * (T.sum(vistemp - nvistemp,axis=0)), self.hidb: self.hidb + self.lr * (T.sum(hidtemp - nhidtemp,axis=0))}); # TODO: add parameter for weigth initialization def _instance_initialize(self, obj): obj.w = N.random.standard_normal((self.nvis,self.nhid)) obj.visb = N.zeros(self.nvis) obj.hidb = N.zeros(self.nhid) obj.seed(self.seed); def _instance_cd1(self, obj, input, k=1): poshid, negvissample, neghid = obj.updownup(input) for i in xrange(k-1): ahid, negvissample, neghid = obj.updownup(negvissample) # CD-k update obj.cd_update(input, poshid, negvissample, neghid) def train_rbm(state, channel=lambda *args, **kwargs:None): dataset = make_dataset(**state.dataset) train = dataset.train rbm_module = RBM( nvis=train.x.shape[1], nhid=state['nhid']) rbm = rbm_module.make() batchsize = state.get('batchsize', 1) verbose = state.get('verbose', 1) iter = [0] while iter[0] != state['max_iters']: for j in xrange(0,len(train.x)-batchsize+1,batchsize): rbm.cd1(train.x[j:j+batchsize]) if verbose > 1: print 'estimated train cost...' if iter[0] == state['max_iters']: break else: iter[0] += 1