Mercurial > pylearn
comparison sandbox/rbm/model.py @ 406:c2e6a8fcc35e
Globals are now parameters for the RBM model
author | Joseph Turian <turian@gmail.com> |
---|---|
date | Thu, 10 Jul 2008 02:10:23 -0400 |
parents | be4209cd568f |
children | 3cd4cfda2599 |
comparison
equal
deleted
inserted
replaced
405:be4209cd568f | 406:c2e6a8fcc35e |
---|---|
3 Weston's sampling trick (2008). | 3 Weston's sampling trick (2008). |
4 """ | 4 """ |
5 | 5 |
6 import parameters | 6 import parameters |
7 | 7 |
8 import globals | |
9 from globals import LR | |
10 | |
11 import numpy | 8 import numpy |
12 from numpy import dot | 9 from numpy import dot |
13 import random | 10 import random |
14 random.seed(globals.SEED) | |
15 | 11 |
16 import pylearn.nnet_ops | 12 import pylearn.nnet_ops |
17 import pylearn.sparse_instance | 13 import pylearn.sparse_instance |
18 | 14 |
19 def sigmoid(v): | 15 def sigmoid(v): |
51 | 47 |
52 class Model: | 48 class Model: |
53 """ | 49 """ |
54 @todo: input dimensions should be stored here! not as a global. | 50 @todo: input dimensions should be stored here! not as a global. |
55 """ | 51 """ |
56 def __init__(self): | 52 def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, momentum = 0.9, weight_decay = 0.0002, random_seed = 666): |
57 self.parameters = parameters.Parameters(randomly_initialize=True) | 53 self.input_dimension = input_dimension |
54 self.hidden_dimension = hidden_dimension | |
55 self.learning_rate = learning_rate | |
56 self.momentum = momentum | |
57 self.weight_decay = weight_decay | |
58 self.random_seed = random_seed | |
59 | |
60 random.seed(random_seed) | |
61 | |
62 self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=False, random_seed=self.random_seed) | |
58 self.prev_dw = 0 | 63 self.prev_dw = 0 |
59 self.prev_db = 0 | 64 self.prev_db = 0 |
60 self.prev_dc = 0 | 65 self.prev_dc = 0 |
61 | 66 |
62 def deterministic_reconstruction(self, v0): | 67 def deterministic_reconstruction(self, v0): |
77 """ | 82 """ |
78 Update the L{Model} using one training instance. | 83 Update the L{Model} using one training instance. |
79 @param instance: A dict from feature index to (non-zero) value. | 84 @param instance: A dict from feature index to (non-zero) value. |
80 @todo: Should assert that nonzero_indices and zero_indices | 85 @todo: Should assert that nonzero_indices and zero_indices |
81 are correct (i.e. are truly nonzero/zero). | 86 are correct (i.e. are truly nonzero/zero). |
82 @todo: Multiply WEIGHT_DECAY by LEARNING_RATE, as done in Semantic Hashing? | 87 @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing? |
83 @todo: Decay the biases too? | 88 @todo: Decay the biases too? |
84 """ | 89 """ |
85 minibatch = len(instances) | 90 minibatch = len(instances) |
86 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) | 91 v0 = pylearn.sparse_instance.to_vector(instances, self.input_dimension) |
87 print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) | 92 print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) |
88 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) | 93 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) |
89 h0 = sample(q0) | 94 h0 = sample(q0) |
90 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) | 95 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) |
91 v1 = sample(p0) | 96 v1 = sample(p0) |
92 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) | 97 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) |
93 | 98 |
94 dw = LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + globals.MOMENTUM * self.prev_dw | 99 dw = self.learning_rate * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + self.momentum * self.prev_dw |
95 db = LR * numpy.sum(h0 - q1, axis=0) / minibatch + globals.MOMENTUM * self.prev_db | 100 db = self.learning_rate * numpy.sum(h0 - q1, axis=0) / minibatch + self.momentum * self.prev_db |
96 dc = LR * numpy.sum(v0 - v1, axis=0) / minibatch + globals.MOMENTUM * self.prev_dc | 101 dc = self.learning_rate * numpy.sum(v0 - v1, axis=0) / minibatch + self.momentum * self.prev_dc |
97 | 102 |
98 self.parameters.w *= (1 - globals.WEIGHT_DECAY) | 103 self.parameters.w *= (1 - self.weight_decay) |
99 | 104 |
100 self.parameters.w += dw | 105 self.parameters.w += dw |
101 self.parameters.b += db | 106 self.parameters.b += db |
102 self.parameters.c += dc | 107 self.parameters.c += dc |
103 | 108 |
119 # print | 124 # print |
120 # print v0.T.shape | 125 # print v0.T.shape |
121 # print h0.shape | 126 # print h0.shape |
122 # print dot(v0.T, h0).shape | 127 # print dot(v0.T, h0).shape |
123 # print self.parameters.w.shape | 128 # print self.parameters.w.shape |
124 # self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch | 129 # self.parameters.w += self.learning_rate * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch |
125 # print | 130 # print |
126 # print h0.shape | 131 # print h0.shape |
127 # print q1.shape | 132 # print q1.shape |
128 # print self.parameters.b.shape | 133 # print self.parameters.b.shape |
129 # self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch | 134 # self.parameters.b += self.learning_rate * numpy.sum(h0 - q1, axis=0) / minibatch |
130 # print v0.shape, v1.shape | 135 # print v0.shape, v1.shape |
131 # print | 136 # print |
132 # print self.parameters.c.shape | 137 # print self.parameters.c.shape |
133 # self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch | 138 # self.parameters.c += self.learning_rate * numpy.sum(v0 - v1, axis=0) / minibatch |
134 # print self.parameters | 139 # print self.parameters |