comparison sandbox/rbm/model.py @ 402:ffdd2c199f2a

* Added momentum. * Added deterministic_reconstruction_error.
author Joseph Turian <turian@gmail.com>
date Wed, 09 Jul 2008 15:27:12 -0400
parents 217c8789284b
children be4209cd568f
comparison
equal deleted inserted replaced
401:217c8789284b 402:ffdd2c199f2a
53 """ 53 """
54 @todo: input dimensions should be stored here! not as a global. 54 @todo: input dimensions should be stored here! not as a global.
55 """ 55 """
56 def __init__(self): 56 def __init__(self):
57 self.parameters = parameters.Parameters(randomly_initialize=True) 57 self.parameters = parameters.Parameters(randomly_initialize=True)
58 self.prev_dw = 0
59 self.prev_db = 0
60 self.prev_dc = 0
58 61
59 def sample(self, instances, iterations=1): 62 def deterministic_reconstruction(self, v0):
60 v = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) 63 """
61 for i in range(iterations): 64 One up-down cycle, but a mean-field approximation (no sampling).
62 q = sigmoid(self.parameters.b + dot(v, self.parameters.w)) 65 """
63 h = sample(q) 66 q = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
64 p = sigmoid(self.parameters.c + dot(h, self.parameters.w.T)) 67 p = sigmoid(self.parameters.c + dot(q, self.parameters.w.T))
65 return p 68 return p
69
70 def deterministic_reconstruction_error(self, v0):
71 """
72 @note: According to Yoshua, -log P(V1 = v0 | tilde(h)(v0)).
73 """
74 return crossentropy(self.deterministic_reconstruction(v0), v0)
66 75
67 def update(self, instances): 76 def update(self, instances):
68 """ 77 """
69 Update the L{Model} using one training instance. 78 Update the L{Model} using one training instance.
70 @param instance: A dict from feature index to (non-zero) value. 79 @param instance: A dict from feature index to (non-zero) value.
71 @todo: Should assert that nonzero_indices and zero_indices 80 @todo: Should assert that nonzero_indices and zero_indices
72 are correct (i.e. are truly nonzero/zero). 81 are correct (i.e. are truly nonzero/zero).
73 """ 82 """
74 minibatch = len(instances) 83 minibatch = len(instances)
75 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) 84 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION)
85 print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0))
76 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) 86 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
77 h0 = sample(q0) 87 h0 = sample(q0)
78 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) 88 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))
79 v1 = sample(p0) 89 v1 = sample(p0)
80 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) 90 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w))
81 print 91
92 dw = LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + globals.MOMENTUM * self.prev_dw
93 db = LR * numpy.sum(h0 - q1, axis=0) / minibatch + globals.MOMENTUM * self.prev_db
94 dc = LR * numpy.sum(v0 - v1, axis=0) / minibatch + globals.MOMENTUM * self.prev_dc
95
96 self.parameters.w += dw
97 self.parameters.b += db
98 self.parameters.c += dc
99
100 self.last_dw = dw
101 self.last_db = db
102 self.last_dc = dc
103
104 print "new XENT:", numpy.sum(self.deterministic_reconstruction_error(v0))
105
106 # print
82 # print "v[0]:", v0 107 # print "v[0]:", v0
83 # print "Q(h[0][i] = 1 | v[0]):", q0 108 # print "Q(h[0][i] = 1 | v[0]):", q0
84 # print "h[0]:", h0 109 # print "h[0]:", h0
85 # print "P(v[1][j] = 1 | h[0]):", p0 110 # print "P(v[1][j] = 1 | h[0]):", p0
86 print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0)) 111 # print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0))
87 # print "v[1]:", v1 112 # print "v[1]:", v1
88 # print "Q(h[1][i] = 1 | v[1]):", q1 113 # print "Q(h[1][i] = 1 | v[1]):", q1
89 114 #
90 # print 115 # print
91 # print v0.T.shape 116 # print v0.T.shape
92 # print h0.shape 117 # print h0.shape
93 # print dot(v0.T, h0).shape 118 # print dot(v0.T, h0).shape
94 # print self.parameters.w.shape 119 # print self.parameters.w.shape
95 self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch 120 # self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch
96 # print 121 # print
97 # print h0.shape 122 # print h0.shape
98 # print q1.shape 123 # print q1.shape
99 # print self.parameters.b.shape 124 # print self.parameters.b.shape
100 self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch 125 # self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch
101 # print v0.shape, v1.shape 126 # print v0.shape, v1.shape
102 # print 127 # print
103 # print self.parameters.c.shape 128 # print self.parameters.c.shape
104 self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch 129 # self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch
105 # print self.parameters 130 # print self.parameters