# HG changeset patch # User Joseph Turian # Date 1215667060 14400 # Node ID be4209cd568fcd4a1e84f5bb15a5f519d667d04b # Parent 8cc11ac97087c1b6e1b344737b59a5821911f1af Added weight decay diff -r 8cc11ac97087 -r be4209cd568f sandbox/rbm/globals.py --- a/sandbox/rbm/globals.py Thu Jul 10 00:51:32 2008 -0400 +++ b/sandbox/rbm/globals.py Thu Jul 10 01:17:40 2008 -0400 @@ -2,13 +2,14 @@ Global variables. """ -INPUT_DIMENSION = 10 +INPUT_DIMENSION = 1000 #INPUT_DIMENSION = 100 HIDDEN_DIMENSION = 100 #HIDDEN_DIMENSION = 10 #HIDDEN_DIMENSION = 6 LEARNING_RATE = 0.1 LR = LEARNING_RATE -#MOMENTUM = 0.9 -MOMENTUM = 0 +MOMENTUM = 0.9 +#MOMENTUM = 0 +WEIGHT_DECAY = 0.0002 SEED = 666 diff -r 8cc11ac97087 -r be4209cd568f sandbox/rbm/model.py --- a/sandbox/rbm/model.py Thu Jul 10 00:51:32 2008 -0400 +++ b/sandbox/rbm/model.py Thu Jul 10 01:17:40 2008 -0400 @@ -79,6 +79,8 @@ @param instance: A dict from feature index to (non-zero) value. @todo: Should assert that nonzero_indices and zero_indices are correct (i.e. are truly nonzero/zero). + @todo: Multiply WEIGHT_DECAY by LEARNING_RATE, as done in Semantic Hashing? + @todo: Decay the biases too? """ minibatch = len(instances) v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) @@ -93,6 +95,8 @@ db = LR * numpy.sum(h0 - q1, axis=0) / minibatch + globals.MOMENTUM * self.prev_db dc = LR * numpy.sum(v0 - v1, axis=0) / minibatch + globals.MOMENTUM * self.prev_dc + self.parameters.w *= (1 - globals.WEIGHT_DECAY) + self.parameters.w += dw self.parameters.b += db self.parameters.c += dc