changeset 544:de6de7c2c54b

merged and changed state to dictionary
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 17 Nov 2008 20:05:31 -0500
parents 34aba0efa3e9 (current diff) 5b4ccbf022c8 (diff)
children cb8eabe7d941 91735dbde209
files pylearn/algorithms/rbm.py
diffstat 1 files changed, 47 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/rbm.py	Mon Nov 17 20:03:13 2008 -0500
+++ b/pylearn/algorithms/rbm.py	Mon Nov 17 20:05:31 2008 -0500
@@ -11,17 +11,20 @@
 from ..datasets import make_dataset
 from .minimizer import make_minimizer
 from .stopper import make_stopper
-
 from ..dbdict.experiment import subdict
 
-class RBM(module.FancyModule):
+class RBM(T.RModule):
 
     # is it really necessary to pass ALL of these ? - GD
     def __init__(self,
             nvis=None, nhid=None,
             input=None,
-            w=None, hidb=None, visb=None):
+            w=None, hidb=None, visb=None,
+            seed=0, lr=0.1):
+      
         super(RBM, self).__init__()
+        self.nhid, self.nvis = nhid, nvis
+        self.lr = lr
        
         # symbolic theano stuff
         # what about multidimensional inputs/outputs ? do they have to be 
@@ -29,28 +32,48 @@
         self.w = w if w is not None else module.Member(T.dmatrix())
         self.visb = visb if visb is not None else module.Member(T.dvector())
         self.hidb = hidb if hidb is not None else module.Member(T.dvector())
-
+        self.seed = seed;
+       
         # 1-step Markov chain
-        self.hid = sigmoid(T.dot(self.w,self.input) + self.hidb)
-        self.hid_sample = self.hid #TODO: sample!
-        self.vis = sigmoid(T.dot(self.w.T, self.hid) + self.visb)
-        self.vis_sample = self.vis #TODO: sample!
-        self.neg_hid = sigmoid(T.dot(self.w, self.vis) + self.hidb)
+        vis = T.dmatrix()
+        hid = sigmoid(T.dot(vis, self.w) + self.hidb)
+        hid_sample = self.random.binomial(T.shape(hid), 1, hid)
+        neg_vis = sigmoid(T.dot(hid_sample, self.w.T) + self.visb)
+        neg_vis_sample = self.random.binomial(T.shape(neg_vis), 1, neg_vis)
+        neg_hid = sigmoid(T.dot(neg_vis_sample, self.w) + self.hidb)
+
+        # function which execute 1-step Markov chain (with and without cd updates)
+        self.updownup = module.Method([vis], [hid, neg_vis_sample, neg_hid])
 
-        # cd1 updates:
-        self.params = [self.w, self.visb, self.hidb]
-        self.gradients = [
-            T.dot(self.hid, self.input) - T.dot(self.neg_hid, self.vis),
-            self.input - self.vis,
-            self.hid - self.neg_hid ]
+        # function to perform manual cd update given 2 visible and 2 hidden values
+        vistemp = T.dmatrix()
+        hidtemp = T.dmatrix()
+        nvistemp = T.dmatrix()
+        nhidtemp = T.dmatrix()
+        self.cd_update = module.Method([vistemp, hidtemp, nvistemp, nhidtemp],
+                [],
+                updates = {self.w: self.w + self.lr * 
+                                   (T.dot(vistemp.T, hidtemp) - 
+                                    T.dot(nvistemp.T, nhidtemp)),
+                           self.visb: self.visb + self.lr * 
+                                      (T.sum(vistemp - nvistemp,axis=0)),
+                           self.hidb: self.hidb + self.lr *
+                                      (T.sum(hidtemp - nhidtemp,axis=0))});
 
-    def __instance_initialize(self, obj):
-        obj.w = N.random.standard_normal((self.nhid,self.nvis))
-        obj.genb = N.zeros(self.nvis)
+    # TODO: add parameter for weigth initialization
+    def _instance_initialize(self, obj):
+        obj.w = N.random.standard_normal((self.nvis,self.nhid))
+        obj.visb = N.zeros(self.nvis)
         obj.hidb = N.zeros(self.nhid)
+        obj.seed(self.seed);
 
-def RBM_cd():
-    pass;
+    def _instance_cd1(self, obj, input, k=1):
+        poshid, negvissample, neghid = obj.updownup(input)
+        for i in xrange(k-1):
+            ahid, negvissample, neghid = obj.updownup(negvissample)
+        # CD-k update
+        obj.cd_update(input, poshid, negvissample, neghid)
+
 
 def train_rbm(state, channel=lambda *args, **kwargs:None):
     dataset = make_dataset(**subdict_copy(state, prefix='dataset_'))
@@ -58,10 +81,11 @@
 
     rbm_module = RBM(
             nvis=train.x.shape[1],
-            nhid=state['size_hidden'])
+            nhid=state['nhid'])
+    rbm = rbm_module.make()
 
-    batchsize = state['batchsize']
-    verbose = state['verbose']
+    batchsize = state.get('batchsize', 1)
+    verbose = state.get('verbose', 1)
     iter = [0]
 
     while iter[0] != state['max_iters']: