annotate sandbox/rbm/model.py @ 399:8796b91a9f09

RBM minibatch works
author Joseph Turian <turian@gmail.com>
date Tue, 08 Jul 2008 21:42:21 -0400
parents 6e55ccb7e2bf
children 269d5c5a4209
rev   line source
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
1 """
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
2 The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
3 Weston's sampling trick (2008).
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
4 """
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
5
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
6 import parameters
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
7
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
8 import globals
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
9 from globals import LR
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
10
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
11 import numpy
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
12 from numpy import dot
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
13 import random
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
14 random.seed(globals.SEED)
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
15
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
16 import pylearn.nnet_ops
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
17
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
18 def sigmoid(v):
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
19 # if x < -30.0: return 0.0
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
20 # if x > 30.0: return 1.0
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
21 return 1.0 / (1.0 + numpy.exp(-v))
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
22
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
23 def sample(v):
396
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
24 assert len(v.shape) == 2
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
25 x = numpy.zeros(v.shape)
396
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
26 for j in range(v.shape[0]):
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
27 for i in range(v.shape[1]):
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
28 assert v[j][i] >= 0 and v[j][i] <= 1
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
29 if random.random() < v[j][i]: x[j][i] = 1
e0c9357456e0 Bug fixed in RBM
Joseph Turian <turian@gmail.com>
parents: 395
diff changeset
30 else: x[j][i] = 0
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
31 return x
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
32
398
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
33 def crossentropy(output, target):
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
34 """
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
35 Compute the crossentropy of binary output wrt binary target.
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
36 @note: We do not sum, crossentropy is computed by component.
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
37 @todo: Rewrite as a scalar, and then broadcast to tensor.
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
38 """
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
39 return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output))
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
40
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
41
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
42 class Model:
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
43 def __init__(self):
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
44 self.parameters = parameters.Parameters(randomly_initialize=True)
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
45
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
46 def update(self, instances):
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
47 """
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
48 Update the L{Model} using one training instance.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
49 @param instance: A dict from feature index to (non-zero) value.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
50 @todo: Should assert that nonzero_indices and zero_indices
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
51 are correct (i.e. are truly nonzero/zero).
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
52 """
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
53 v0 = numpy.zeros((len(instances), globals.INPUT_DIMENSION))
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
54 minibatch = len(instances)
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
55 for i in range(minibatch):
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
56 for idx in instances[i].keys():
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
57 v0[i][idx] = instances[i][idx]
386
a474341861fa Added a simple AA
Joseph Turian <turian@gmail.com>
parents: 373
diff changeset
58
395
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
59 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
60 h0 = sample(q0)
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
61 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
62 v1 = sample(p0)
70019965f888 Basic, broken RBM implementation
Joseph Turian <turian@gmail.com>
parents: 393
diff changeset
63 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w))
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
64 print
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
65 # print "v[0]:", v0
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
66 # print "Q(h[0][i] = 1 | v[0]):", q0
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
67 # print "h[0]:", h0
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
68 # print "P(v[1][j] = 1 | h[0]):", p0
398
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
69 print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0))
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
70 # print "v[1]:", v1
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
71 # print "Q(h[1][i] = 1 | v[1]):", q1
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
72
399
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
73 # print
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
74 # print v0.T.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
75 # print h0.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
76 # print dot(v0.T, h0).shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
77 # print self.parameters.w.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
78 self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
79 # print
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
80 # print h0.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
81 # print q1.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
82 # print self.parameters.b.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
83 self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
84 # print v0.shape, v1.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
85 # print
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
86 # print self.parameters.c.shape
8796b91a9f09 RBM minibatch works
Joseph Turian <turian@gmail.com>
parents: 398
diff changeset
87 self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch
398
6e55ccb7e2bf Better output
Joseph Turian <turian@gmail.com>
parents: 396
diff changeset
88 # print self.parameters