Mercurial > pylearn
annotate sandbox/rbm/model.py @ 405:be4209cd568f
Added weight decay
author | Joseph Turian <turian@gmail.com> |
---|---|
date | Thu, 10 Jul 2008 01:17:40 -0400 |
parents | ffdd2c199f2a |
children | c2e6a8fcc35e |
rev | line source |
---|---|
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
1 """ |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
2 The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
3 Weston's sampling trick (2008). |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
4 """ |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
5 |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
6 import parameters |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
7 |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
8 import globals |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
9 from globals import LR |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
10 |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
11 import numpy |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
12 from numpy import dot |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
13 import random |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
14 random.seed(globals.SEED) |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
15 |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
16 import pylearn.nnet_ops |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
17 import pylearn.sparse_instance |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
18 |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
19 def sigmoid(v): |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
20 """ |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
21 @todo: Move to pylearn.more_numpy |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
22 @todo: Fix to avoid floating point overflow. |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
23 """ |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
24 # if x < -30.0: return 0.0 |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
25 # if x > 30.0: return 1.0 |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
26 return 1.0 / (1.0 + numpy.exp(-v)) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
27 |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
28 def sample(v): |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
29 """ |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
30 @todo: Move to pylearn.more_numpy |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
31 """ |
396 | 32 assert len(v.shape) == 2 |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
33 x = numpy.zeros(v.shape) |
396 | 34 for j in range(v.shape[0]): |
35 for i in range(v.shape[1]): | |
36 assert v[j][i] >= 0 and v[j][i] <= 1 | |
37 if random.random() < v[j][i]: x[j][i] = 1 | |
38 else: x[j][i] = 0 | |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
39 return x |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
40 |
398 | 41 def crossentropy(output, target): |
42 """ | |
43 Compute the crossentropy of binary output wrt binary target. | |
44 @note: We do not sum, crossentropy is computed by component. | |
45 @todo: Rewrite as a scalar, and then broadcast to tensor. | |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
46 @todo: Move to pylearn.more_numpy |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
47 @todo: Fix to avoid floating point overflow. |
398 | 48 """ |
49 return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output)) | |
50 | |
51 | |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
52 class Model: |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
53 """ |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
54 @todo: input dimensions should be stored here! not as a global. |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
55 """ |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
56 def __init__(self): |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
57 self.parameters = parameters.Parameters(randomly_initialize=True) |
402 | 58 self.prev_dw = 0 |
59 self.prev_db = 0 | |
60 self.prev_dc = 0 | |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
61 |
402 | 62 def deterministic_reconstruction(self, v0): |
63 """ | |
64 One up-down cycle, but a mean-field approximation (no sampling). | |
65 """ | |
66 q = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) | |
67 p = sigmoid(self.parameters.c + dot(q, self.parameters.w.T)) | |
401 | 68 return p |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
69 |
402 | 70 def deterministic_reconstruction_error(self, v0): |
71 """ | |
72 @note: According to Yoshua, -log P(V1 = v0 | tilde(h)(v0)). | |
73 """ | |
74 return crossentropy(self.deterministic_reconstruction(v0), v0) | |
75 | |
399 | 76 def update(self, instances): |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
77 """ |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
78 Update the L{Model} using one training instance. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
79 @param instance: A dict from feature index to (non-zero) value. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
80 @todo: Should assert that nonzero_indices and zero_indices |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
81 are correct (i.e. are truly nonzero/zero). |
405 | 82 @todo: Multiply WEIGHT_DECAY by LEARNING_RATE, as done in Semantic Hashing? |
83 @todo: Decay the biases too? | |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
84 """ |
399 | 85 minibatch = len(instances) |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
86 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) |
402 | 87 print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
88 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
89 h0 = sample(q0) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
90 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
91 v1 = sample(p0) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
92 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) |
402 | 93 |
94 dw = LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + globals.MOMENTUM * self.prev_dw | |
95 db = LR * numpy.sum(h0 - q1, axis=0) / minibatch + globals.MOMENTUM * self.prev_db | |
96 dc = LR * numpy.sum(v0 - v1, axis=0) / minibatch + globals.MOMENTUM * self.prev_dc | |
97 | |
405 | 98 self.parameters.w *= (1 - globals.WEIGHT_DECAY) |
99 | |
402 | 100 self.parameters.w += dw |
101 self.parameters.b += db | |
102 self.parameters.c += dc | |
103 | |
104 self.last_dw = dw | |
105 self.last_db = db | |
106 self.last_dc = dc | |
107 | |
108 print "new XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) | |
109 | |
110 # print | |
399 | 111 # print "v[0]:", v0 |
112 # print "Q(h[0][i] = 1 | v[0]):", q0 | |
113 # print "h[0]:", h0 | |
114 # print "P(v[1][j] = 1 | h[0]):", p0 | |
402 | 115 # print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0)) |
399 | 116 # print "v[1]:", v1 |
117 # print "Q(h[1][i] = 1 | v[1]):", q1 | |
402 | 118 # |
399 | 119 # print |
120 # print v0.T.shape | |
121 # print h0.shape | |
122 # print dot(v0.T, h0).shape | |
123 # print self.parameters.w.shape | |
402 | 124 # self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch |
399 | 125 # print |
126 # print h0.shape | |
127 # print q1.shape | |
128 # print self.parameters.b.shape | |
402 | 129 # self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch |
399 | 130 # print v0.shape, v1.shape |
131 # print | |
132 # print self.parameters.c.shape | |
402 | 133 # self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch |
398 | 134 # print self.parameters |