annotate sandbox/rbm/model.py @ 400:269d5c5a4209

Cleaned up, added sparse_instance
author Joseph Turian <turian@gmail.com>
date Tue, 08 Jul 2008 23:59:57 -0400
parents 8796b91a9f09
children 217c8789284b
rev   line source
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
1 """
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
2 The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
3 Weston's sampling trick (2008).
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
4 """
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
5
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
6 import parameters
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
7
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
8 import globals
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
9 from globals import LR
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
10
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
11 import numpy
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
12 from numpy import dot
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
13 import random
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
14 random.seed(globals.SEED)
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
15
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
16 import pylearn.nnet_ops
400
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
17 import pylearn.sparse_instance
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
18
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
19 def sigmoid(v):
400
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
20 """
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
21 @todo: Move to pylearn.more_numpy
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
22 @todo: Fix to avoid floating point overflow.
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
23 """
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
24 # if x < -30.0: return 0.0
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
25 # if x > 30.0: return 1.0
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
26 return 1.0 / (1.0 + numpy.exp(-v))
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
27
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
28 def sample(v):
400
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
29 """
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
30 @todo: Move to pylearn.more_numpy
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
31 """
396
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
32 assert len(v.shape) == 2
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
33 x = numpy.zeros(v.shape)
396
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
34 for j in range(v.shape[0]):
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
35 for i in range(v.shape[1]):
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
36 assert v[j][i] >= 0 and v[j][i] <= 1
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
37 if random.random() < v[j][i]: x[j][i] = 1
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
38 else: x[j][i] = 0
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
39 return x
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
40
398
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
41 def crossentropy(output, target):
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
42 """
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
43 Compute the crossentropy of binary output wrt binary target.
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
44 @note: We do not sum, crossentropy is computed by component.
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
45 @todo: Rewrite as a scalar, and then broadcast to tensor.
400
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
46 @todo: Move to pylearn.more_numpy
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
47 @todo: Fix to avoid floating point overflow.
398
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
48 """
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
49 return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output))
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
50
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
51
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
52 class Model:
400
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
53 """
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
54 @todo: input dimensions should be stored here! not as a global.
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
55 """
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
56 def __init__(self):
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
57 self.parameters = parameters.Parameters(randomly_initialize=True)
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
58
400
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
59 def sample(self, instances, iterations=1):
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
60 v = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION)
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
61 for i in range(iterations):
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
62 q = sigmoid(self.parameters.b + dot(v, self.parameters.w))
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
63 h = sample(q)
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
64 p = sigmoid(self.parameters.c + dot(h, self.parameters.w.T))
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
65 v = sample(p)
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
66 return v
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
67
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
68 def update(self, instances):
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
69 """
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
70 Update the L{Model} using one training instance.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
71 @param instance: A dict from feature index to (non-zero) value.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
72 @todo: Should assert that nonzero_indices and zero_indices
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
73 are correct (i.e. are truly nonzero/zero).
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
74 """
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
75 minibatch = len(instances)
400
269d5c5a4209 Cleaned up, added sparse_instance
Joseph Turian <turian@gmail.com>
parents: 399
diff changeset
76 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION)
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
77 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
78 h0 = sample(q0)
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
79 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
80 v1 = sample(p0)
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
81 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w))
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
82 print
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
83 # print "v[0]:", v0
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
84 # print "Q(h[0][i] = 1 | v[0]):", q0
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
85 # print "h[0]:", h0
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
86 # print "P(v[1][j] = 1 | h[0]):", p0
398
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
87 print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0))
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
88 # print "v[1]:", v1
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
89 # print "Q(h[1][i] = 1 | v[1]):", q1
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
90
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
91 # print
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
92 # print v0.T.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
93 # print h0.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
94 # print dot(v0.T, h0).shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
95 # print self.parameters.w.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
96 self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
97 # print
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
98 # print h0.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
99 # print q1.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
100 # print self.parameters.b.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
101 self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
102 # print v0.shape, v1.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
103 # print
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
104 # print self.parameters.c.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
105 self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch
398
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
106 # print self.parameters