comparison sandbox/rbm/model.py @ 406:c2e6a8fcc35e

Globals are now parameters for the RBM model
author Joseph Turian <turian@gmail.com>
date Thu, 10 Jul 2008 02:10:23 -0400
parents be4209cd568f
children 3cd4cfda2599
comparison
equal deleted inserted replaced
405:be4209cd568f 406:c2e6a8fcc35e
3 Weston's sampling trick (2008). 3 Weston's sampling trick (2008).
4 """ 4 """
5 5
6 import parameters 6 import parameters
7 7
8 import globals
9 from globals import LR
10
11 import numpy 8 import numpy
12 from numpy import dot 9 from numpy import dot
13 import random 10 import random
14 random.seed(globals.SEED)
15 11
16 import pylearn.nnet_ops 12 import pylearn.nnet_ops
17 import pylearn.sparse_instance 13 import pylearn.sparse_instance
18 14
19 def sigmoid(v): 15 def sigmoid(v):
51 47
52 class Model: 48 class Model:
53 """ 49 """
54 @todo: input dimensions should be stored here! not as a global. 50 @todo: input dimensions should be stored here! not as a global.
55 """ 51 """
56 def __init__(self): 52 def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, momentum = 0.9, weight_decay = 0.0002, random_seed = 666):
57 self.parameters = parameters.Parameters(randomly_initialize=True) 53 self.input_dimension = input_dimension
54 self.hidden_dimension = hidden_dimension
55 self.learning_rate = learning_rate
56 self.momentum = momentum
57 self.weight_decay = weight_decay
58 self.random_seed = random_seed
59
60 random.seed(random_seed)
61
62 self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=False, random_seed=self.random_seed)
58 self.prev_dw = 0 63 self.prev_dw = 0
59 self.prev_db = 0 64 self.prev_db = 0
60 self.prev_dc = 0 65 self.prev_dc = 0
61 66
62 def deterministic_reconstruction(self, v0): 67 def deterministic_reconstruction(self, v0):
77 """ 82 """
78 Update the L{Model} using one training instance. 83 Update the L{Model} using one training instance.
79 @param instance: A dict from feature index to (non-zero) value. 84 @param instance: A dict from feature index to (non-zero) value.
80 @todo: Should assert that nonzero_indices and zero_indices 85 @todo: Should assert that nonzero_indices and zero_indices
81 are correct (i.e. are truly nonzero/zero). 86 are correct (i.e. are truly nonzero/zero).
82 @todo: Multiply WEIGHT_DECAY by LEARNING_RATE, as done in Semantic Hashing? 87 @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing?
83 @todo: Decay the biases too? 88 @todo: Decay the biases too?
84 """ 89 """
85 minibatch = len(instances) 90 minibatch = len(instances)
86 v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) 91 v0 = pylearn.sparse_instance.to_vector(instances, self.input_dimension)
87 print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) 92 print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0))
88 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) 93 q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w))
89 h0 = sample(q0) 94 h0 = sample(q0)
90 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) 95 p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T))
91 v1 = sample(p0) 96 v1 = sample(p0)
92 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) 97 q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w))
93 98
94 dw = LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + globals.MOMENTUM * self.prev_dw 99 dw = self.learning_rate * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + self.momentum * self.prev_dw
95 db = LR * numpy.sum(h0 - q1, axis=0) / minibatch + globals.MOMENTUM * self.prev_db 100 db = self.learning_rate * numpy.sum(h0 - q1, axis=0) / minibatch + self.momentum * self.prev_db
96 dc = LR * numpy.sum(v0 - v1, axis=0) / minibatch + globals.MOMENTUM * self.prev_dc 101 dc = self.learning_rate * numpy.sum(v0 - v1, axis=0) / minibatch + self.momentum * self.prev_dc
97 102
98 self.parameters.w *= (1 - globals.WEIGHT_DECAY) 103 self.parameters.w *= (1 - self.weight_decay)
99 104
100 self.parameters.w += dw 105 self.parameters.w += dw
101 self.parameters.b += db 106 self.parameters.b += db
102 self.parameters.c += dc 107 self.parameters.c += dc
103 108
119 # print 124 # print
120 # print v0.T.shape 125 # print v0.T.shape
121 # print h0.shape 126 # print h0.shape
122 # print dot(v0.T, h0).shape 127 # print dot(v0.T, h0).shape
123 # print self.parameters.w.shape 128 # print self.parameters.w.shape
124 # self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch 129 # self.parameters.w += self.learning_rate * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch
125 # print 130 # print
126 # print h0.shape 131 # print h0.shape
127 # print q1.shape 132 # print q1.shape
128 # print self.parameters.b.shape 133 # print self.parameters.b.shape
129 # self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch 134 # self.parameters.b += self.learning_rate * numpy.sum(h0 - q1, axis=0) / minibatch
130 # print v0.shape, v1.shape 135 # print v0.shape, v1.shape
131 # print 136 # print
132 # print self.parameters.c.shape 137 # print self.parameters.c.shape
133 # self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch 138 # self.parameters.c += self.learning_rate * numpy.sum(v0 - v1, axis=0) / minibatch
134 # print self.parameters 139 # print self.parameters