Mercurial > pylearn
annotate sandbox/rbm/model.py @ 400:269d5c5a4209
Cleaned up, added sparse_instance
author | Joseph Turian <turian@gmail.com> |
---|---|
date | Tue, 08 Jul 2008 23:59:57 -0400 |
parents | 8796b91a9f09 |
children | 217c8789284b |
rev | line source |
---|---|
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
1 """ |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
2 The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
3 Weston's sampling trick (2008). |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
4 """ |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
5 |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
6 import parameters |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
7 |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
8 import globals |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
9 from globals import LR |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
10 |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
11 import numpy |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
12 from numpy import dot |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
13 import random |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
14 random.seed(globals.SEED) |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
15 |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
16 import pylearn.nnet_ops |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
17 import pylearn.sparse_instance |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
18 |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
19 def sigmoid(v): |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
20 """ |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
21 @todo: Move to pylearn.more_numpy |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
22 @todo: Fix to avoid floating point overflow. |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
23 """ |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
24 # if x < -30.0: return 0.0 |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
25 # if x > 30.0: return 1.0 |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
26 return 1.0 / (1.0 + numpy.exp(-v)) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
27 |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
28 def sample(v): |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
29 """ |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
30 @todo: Move to pylearn.more_numpy |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
31 """ |
396 | 32 assert len(v.shape) == 2 |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
33 x = numpy.zeros(v.shape) |
396 | 34 for j in range(v.shape[0]): |
35 for i in range(v.shape[1]): | |
36 assert v[j][i] >= 0 and v[j][i] <= 1 | |
37 if random.random() < v[j][i]: x[j][i] = 1 | |
38 else: x[j][i] = 0 | |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
39 return x |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
40 |
398 | 41 def crossentropy(output, target): |
42 """ | |
43 Compute the crossentropy of binary output wrt binary target. | |
44 @note: We do not sum, crossentropy is computed by component. | |
45 @todo: Rewrite as a scalar, and then broadcast to tensor. | |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
46 @todo: Move to pylearn.more_numpy |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
47 @todo: Fix to avoid floating point overflow. |
398 | 48 """ |
49 return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output)) | |
50 | |
51 | |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
52 class Model: |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
53 """ |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
54 @todo: input dimensions should be stored here! not as a global. |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
55 """ |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
56 def __init__(self): |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
57 self.parameters = parameters.Parameters(randomly_initialize=True) |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
58 |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
59 def sample(self, instances, iterations=1): |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
60 v = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
61 for i in range(iterations): |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
62 q = sigmoid(self.parameters.b + dot(v, self.parameters.w)) |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
63 h = sample(q) |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
64 p = sigmoid(self.parameters.c + dot(h, self.parameters.w.T)) |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
65 v = sample(p) |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
66 return v |
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
67 |
399 | 68 def update(self, instances): |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
69 """ |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
70 Update the L{Model} using one training instance. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
71 @param instance: A dict from feature index to (non-zero) value. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
72 @todo: Should assert that nonzero_indices and zero_indices |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
73 are correct (i.e. are truly nonzero/zero). |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
74 """ |
399 | 75 minibatch = len(instances) |
400
269d5c5a4209
Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents:
399
diff
changeset
|
76 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) |
395
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
77 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
78 h0 = sample(q0) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
79 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
80 v1 = sample(p0) |
70019965f888
Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents:
393
diff
changeset
|
81 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
82 print |
399 | 83 # print "v[0]:", v0 |
84 # print "Q(h[0][i] = 1 | v[0]):", q0 | |
85 # print "h[0]:", h0 | |
86 # print "P(v[1][j] = 1 | h[0]):", p0 | |
398 | 87 print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0)) |
399 | 88 # print "v[1]:", v1 |
89 # print "Q(h[1][i] = 1 | v[1]):", q1 | |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
90 |
399 | 91 # print |
92 # print v0.T.shape | |
93 # print h0.shape | |
94 # print dot(v0.T, h0).shape | |
95 # print self.parameters.w.shape | |
96 self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch | |
97 # print | |
98 # print h0.shape | |
99 # print q1.shape | |
100 # print self.parameters.b.shape | |
101 self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch | |
102 # print v0.shape, v1.shape | |
103 # print | |
104 # print self.parameters.c.shape | |
105 self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch | |
398 | 106 # print self.parameters |