comparison sandbox/rbm/model.py @ 400:269d5c5a4209

Cleaned up, added sparse_instance
author Joseph Turian <turian@gmail.com>
date Tue, 08 Jul 2008 23:59:57 -0400
parents 8796b91a9f09
children 217c8789284b
comparison
equal deleted inserted replaced
399:8796b91a9f09 400:269d5c5a4209
12 from numpy import dot 12 from numpy import dot
13 import random 13 import random
14 random.seed(globals.SEED) 14 random.seed(globals.SEED)
15 15
16 import pylearn.nnet_ops 16 import pylearn.nnet_ops
17 import pylearn.sparse_instance
17 18
18 def sigmoid(v): 19 def sigmoid(v):
20 """
21 @todo: Move to pylearn.more_numpy
22 @todo: Fix to avoid floating point overflow.
23 """
19 # if x < -30.0: return 0.0 24 # if x < -30.0: return 0.0
20 # if x > 30.0: return 1.0 25 # if x > 30.0: return 1.0
21 return 1.0 / (1.0 + numpy.exp(-v)) 26 return 1.0 / (1.0 + numpy.exp(-v))
22 27
23 def sample(v): 28 def sample(v):
29 """
30 @todo: Move to pylearn.more_numpy
31 """
24 assert len(v.shape) == 2 32 assert len(v.shape) == 2
25 x = numpy.zeros(v.shape) 33 x = numpy.zeros(v.shape)
26 for j in range(v.shape[0]): 34 for j in range(v.shape[0]):
27 for i in range(v.shape[1]): 35 for i in range(v.shape[1]):
28 assert v[j][i] >= 0 and v[j][i] <= 1 36 assert v[j][i] >= 0 and v[j][i] <= 1
33 def crossentropy(output, target): 41 def crossentropy(output, target):
34 """ 42 """
35 Compute the crossentropy of binary output wrt binary target. 43 Compute the crossentropy of binary output wrt binary target.
36 @note: We do not sum, crossentropy is computed by component. 44 @note: We do not sum, crossentropy is computed by component.
37 @todo: Rewrite as a scalar, and then broadcast to tensor. 45 @todo: Rewrite as a scalar, and then broadcast to tensor.
46 @todo: Move to pylearn.more_numpy
47 @todo: Fix to avoid floating point overflow.
38 """ 48 """
39 return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output)) 49 return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output))
40 50
41 51
42 class Model: 52 class Model:
53 """
54 @todo: input dimensions should be stored here! not as a global.
55 """
43 def __init__(self): 56 def __init__(self):
44 self.parameters = parameters.Parameters(randomly_initialize=True) 57 self.parameters = parameters.Parameters(randomly_initialize=True)
58
59 def sample(self, instances, iterations=1):
60 v = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION)
61 for i in range(iterations):
62 q = sigmoid(self.parameters.b + dot(v, self.parameters.w))
63 h = sample(q)
64 p = sigmoid(self.parameters.c + dot(h, self.parameters.w.T))
65 v = sample(p)
66 return v
45 67
46 def update(self, instances): 68 def update(self, instances):
47 """ 69 """
48 Update the L{Model} using one training instance. 70 Update the L{Model} using one training instance.
49 @param instance: A dict from feature index to (non-zero) value. 71 @param instance: A dict from feature index to (non-zero) value.
50 @todo: Should assert that nonzero_indices and zero_indices 72 @todo: Should assert that nonzero_indices and zero_indices
51 are correct (i.e. are truly nonzero/zero). 73 are correct (i.e. are truly nonzero/zero).
52 """ 74 """
53 v0 = numpy.zeros((len(instances), globals.INPUT_DIMENSION))
54 minibatch = len(instances) 75 minibatch = len(instances)
55 for i in range(minibatch): 76 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION)
56 for idx in instances[i].keys():
57 v0[i][idx] = instances[i][idx]
58
59 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) 77 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
60 h0 = sample(q0) 78 h0 = sample(q0)
61 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) 79 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))
62 v1 = sample(p0) 80 v1 = sample(p0)
63 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) 81 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w))