# HG changeset patch # User Joseph Turian # Date 1215631632 14400 # Node ID ffdd2c199f2aeba67a474854cb78648a47d1545b # Parent 217c8789284b1d96a3d1f365309964d676df2b51 * Added momentum. * Added deterministic_reconstruction_error. diff -r 217c8789284b -r ffdd2c199f2a sandbox/rbm/globals.py --- a/sandbox/rbm/globals.py Wed Jul 09 00:29:16 2008 -0400 +++ b/sandbox/rbm/globals.py Wed Jul 09 15:27:12 2008 -0400 @@ -2,11 +2,13 @@ Global variables. """ -INPUT_DIMENSION = 1000 +INPUT_DIMENSION = 10 #INPUT_DIMENSION = 100 -HIDDEN_DIMENSION = 100 -#HIDDEN_DIMENSION = 20 +#HIDDEN_DIMENSION = 100 +HIDDEN_DIMENSION = 10 #HIDDEN_DIMENSION = 6 LEARNING_RATE = 0.1 LR = LEARNING_RATE +#MOMENTUM = 0.9 +MOMENTUM = 0 SEED = 666 diff -r 217c8789284b -r ffdd2c199f2a sandbox/rbm/main.py --- a/sandbox/rbm/main.py Wed Jul 09 00:29:16 2008 -0400 +++ b/sandbox/rbm/main.py Wed Jul 09 15:27:12 2008 -0400 @@ -8,12 +8,12 @@ import numpy nonzero_instances = [] -nonzero_instances.append({0: 1, 1: 1}) -nonzero_instances.append({0: 1, 2: 1}) +#nonzero_instances.append({0: 1, 1: 1}) +#nonzero_instances.append({0: 1, 2: 1}) -#nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1}) -#nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8}) -##nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) +nonzero_instances.append({1: 0.1, 5: 0.5, 9: 1}) +nonzero_instances.append({2: 0.3, 5: 0.5, 8: 0.8}) +nonzero_instances.append({1: 0.2, 2: 0.3, 5: 0.5}) import model model = model.Model() diff -r 217c8789284b -r ffdd2c199f2a sandbox/rbm/model.py --- a/sandbox/rbm/model.py Wed Jul 09 00:29:16 2008 -0400 +++ b/sandbox/rbm/model.py Wed Jul 09 15:27:12 2008 -0400 @@ -55,15 +55,24 @@ """ def __init__(self): self.parameters = parameters.Parameters(randomly_initialize=True) + self.prev_dw = 0 + self.prev_db = 0 + self.prev_dc = 0 - def sample(self, instances, iterations=1): - v = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) - for i in range(iterations): - q = sigmoid(self.parameters.b + dot(v, self.parameters.w)) - h = sample(q) - p = sigmoid(self.parameters.c + dot(h, self.parameters.w.T)) + def deterministic_reconstruction(self, v0): + """ + One up-down cycle, but a mean-field approximation (no sampling). + """ + q = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) + p = sigmoid(self.parameters.c + dot(q, self.parameters.w.T)) return p + def deterministic_reconstruction_error(self, v0): + """ + @note: According to Yoshua, -log P(V1 = v0 | tilde(h)(v0)). + """ + return crossentropy(self.deterministic_reconstruction(v0), v0) + def update(self, instances): """ Update the L{Model} using one training instance. @@ -73,33 +82,49 @@ """ minibatch = len(instances) v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) + print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) h0 = sample(q0) p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) v1 = sample(p0) q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) - print + + dw = LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + globals.MOMENTUM * self.prev_dw + db = LR * numpy.sum(h0 - q1, axis=0) / minibatch + globals.MOMENTUM * self.prev_db + dc = LR * numpy.sum(v0 - v1, axis=0) / minibatch + globals.MOMENTUM * self.prev_dc + + self.parameters.w += dw + self.parameters.b += db + self.parameters.c += dc + + self.last_dw = dw + self.last_db = db + self.last_dc = dc + + print "new XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) + +# print # print "v[0]:", v0 # print "Q(h[0][i] = 1 | v[0]):", q0 # print "h[0]:", h0 # print "P(v[1][j] = 1 | h[0]):", p0 - print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0)) +# print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0)) # print "v[1]:", v1 # print "Q(h[1][i] = 1 | v[1]):", q1 - +# # print # print v0.T.shape # print h0.shape # print dot(v0.T, h0).shape # print self.parameters.w.shape - self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch +# self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch # print # print h0.shape # print q1.shape # print self.parameters.b.shape - self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch +# self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch # print v0.shape, v1.shape # print # print self.parameters.c.shape - self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch +# self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch # print self.parameters