diff sandbox/rbm/model.py @ 400:269d5c5a4209

Cleaned up, added sparse_instance
author Joseph Turian <turian@gmail.com>
date Tue, 08 Jul 2008 23:59:57 -0400
parents 8796b91a9f09
children 217c8789284b
line wrap: on
line diff
--- a/sandbox/rbm/model.py	Tue Jul 08 21:42:21 2008 -0400
+++ b/sandbox/rbm/model.py	Tue Jul 08 23:59:57 2008 -0400
@@ -14,13 +14,21 @@
 random.seed(globals.SEED)
 
 import pylearn.nnet_ops
+import pylearn.sparse_instance
 
 def sigmoid(v):
+    """
+    @todo: Move to pylearn.more_numpy
+    @todo: Fix to avoid floating point overflow.
+    """
 #    if x < -30.0: return 0.0
 #    if x > 30.0: return 1.0 
     return 1.0 / (1.0 + numpy.exp(-v))
 
 def sample(v):
+    """
+    @todo: Move to pylearn.more_numpy
+    """
     assert len(v.shape) == 2
     x = numpy.zeros(v.shape)
     for j in range(v.shape[0]):
@@ -35,14 +43,28 @@
     Compute the crossentropy of binary output wrt binary target.
     @note: We do not sum, crossentropy is computed by component.
     @todo: Rewrite as a scalar, and then broadcast to tensor.
+    @todo: Move to pylearn.more_numpy
+    @todo: Fix to avoid floating point overflow.
     """
     return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output))
 
 
 class Model:
+    """
+    @todo: input dimensions should be stored here! not as a global.
+    """
     def __init__(self):
         self.parameters = parameters.Parameters(randomly_initialize=True)
 
+    def sample(self, instances, iterations=1):
+        v = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION)
+        for i in range(iterations):
+            q = sigmoid(self.parameters.b + dot(v, self.parameters.w))
+            h = sample(q)
+            p = sigmoid(self.parameters.c + dot(h, self.parameters.w.T))
+            v = sample(p)
+        return v
+
     def update(self, instances):
         """
         Update the L{Model} using one training instance.
@@ -50,12 +72,8 @@
         @todo: Should assert that nonzero_indices and zero_indices
         are correct (i.e. are truly nonzero/zero).
         """
-        v0 = numpy.zeros((len(instances), globals.INPUT_DIMENSION))
         minibatch = len(instances)
-        for i in range(minibatch):
-            for idx in instances[i].keys():
-                v0[i][idx] = instances[i][idx]
-
+        v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION)
         q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
         h0 = sample(q0)
         p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))