Mercurial > pylearn
comparison sandbox/rbm/model.py @ 402:ffdd2c199f2a
* Added momentum.
* Added deterministic_reconstruction_error.
author | Joseph Turian <turian@gmail.com> |
---|---|
date | Wed, 09 Jul 2008 15:27:12 -0400 |
parents | 217c8789284b |
children | be4209cd568f |
comparison
equal
deleted
inserted
replaced
401:217c8789284b | 402:ffdd2c199f2a |
---|---|
53 """ | 53 """ |
54 @todo: input dimensions should be stored here! not as a global. | 54 @todo: input dimensions should be stored here! not as a global. |
55 """ | 55 """ |
56 def __init__(self): | 56 def __init__(self): |
57 self.parameters = parameters.Parameters(randomly_initialize=True) | 57 self.parameters = parameters.Parameters(randomly_initialize=True) |
58 self.prev_dw = 0 | |
59 self.prev_db = 0 | |
60 self.prev_dc = 0 | |
58 | 61 |
59 def sample(self, instances, iterations=1): | 62 def deterministic_reconstruction(self, v0): |
60 v = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) | 63 """ |
61 for i in range(iterations): | 64 One up-down cycle, but a mean-field approximation (no sampling). |
62 q = sigmoid(self.parameters.b + dot(v, self.parameters.w)) | 65 """ |
63 h = sample(q) | 66 q = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) |
64 p = sigmoid(self.parameters.c + dot(h, self.parameters.w.T)) | 67 p = sigmoid(self.parameters.c + dot(q, self.parameters.w.T)) |
65 return p | 68 return p |
69 | |
70 def deterministic_reconstruction_error(self, v0): | |
71 """ | |
72 @note: According to Yoshua, -log P(V1 = v0 | tilde(h)(v0)). | |
73 """ | |
74 return crossentropy(self.deterministic_reconstruction(v0), v0) | |
66 | 75 |
67 def update(self, instances): | 76 def update(self, instances): |
68 """ | 77 """ |
69 Update the L{Model} using one training instance. | 78 Update the L{Model} using one training instance. |
70 @param instance: A dict from feature index to (non-zero) value. | 79 @param instance: A dict from feature index to (non-zero) value. |
71 @todo: Should assert that nonzero_indices and zero_indices | 80 @todo: Should assert that nonzero_indices and zero_indices |
72 are correct (i.e. are truly nonzero/zero). | 81 are correct (i.e. are truly nonzero/zero). |
73 """ | 82 """ |
74 minibatch = len(instances) | 83 minibatch = len(instances) |
75 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) | 84 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) |
85 print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) | |
76 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) | 86 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) |
77 h0 = sample(q0) | 87 h0 = sample(q0) |
78 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) | 88 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) |
79 v1 = sample(p0) | 89 v1 = sample(p0) |
80 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) | 90 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) |
81 print | 91 |
92 dw = LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + globals.MOMENTUM * self.prev_dw | |
93 db = LR * numpy.sum(h0 - q1, axis=0) / minibatch + globals.MOMENTUM * self.prev_db | |
94 dc = LR * numpy.sum(v0 - v1, axis=0) / minibatch + globals.MOMENTUM * self.prev_dc | |
95 | |
96 self.parameters.w += dw | |
97 self.parameters.b += db | |
98 self.parameters.c += dc | |
99 | |
100 self.last_dw = dw | |
101 self.last_db = db | |
102 self.last_dc = dc | |
103 | |
104 print "new XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) | |
105 | |
106 # print | |
82 # print "v[0]:", v0 | 107 # print "v[0]:", v0 |
83 # print "Q(h[0][i] = 1 | v[0]):", q0 | 108 # print "Q(h[0][i] = 1 | v[0]):", q0 |
84 # print "h[0]:", h0 | 109 # print "h[0]:", h0 |
85 # print "P(v[1][j] = 1 | h[0]):", p0 | 110 # print "P(v[1][j] = 1 | h[0]):", p0 |
86 print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0)) | 111 # print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0)) |
87 # print "v[1]:", v1 | 112 # print "v[1]:", v1 |
88 # print "Q(h[1][i] = 1 | v[1]):", q1 | 113 # print "Q(h[1][i] = 1 | v[1]):", q1 |
89 | 114 # |
90 # print | 115 # print |
91 # print v0.T.shape | 116 # print v0.T.shape |
92 # print h0.shape | 117 # print h0.shape |
93 # print dot(v0.T, h0).shape | 118 # print dot(v0.T, h0).shape |
94 # print self.parameters.w.shape | 119 # print self.parameters.w.shape |
95 self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch | 120 # self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch |
96 # print | 121 # print |
97 # print h0.shape | 122 # print h0.shape |
98 # print q1.shape | 123 # print q1.shape |
99 # print self.parameters.b.shape | 124 # print self.parameters.b.shape |
100 self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch | 125 # self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch |
101 # print v0.shape, v1.shape | 126 # print v0.shape, v1.shape |
102 # print | 127 # print |
103 # print self.parameters.c.shape | 128 # print self.parameters.c.shape |
104 self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch | 129 # self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch |
105 # print self.parameters | 130 # print self.parameters |