Mercurial > pylearn
view sandbox/rbm/model.py @ 405:be4209cd568f
Added weight decay
author | Joseph Turian <turian@gmail.com> |
---|---|
date | Thu, 10 Jul 2008 01:17:40 -0400 |
parents | ffdd2c199f2a |
children | c2e6a8fcc35e |
line wrap: on
line source
""" The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason Weston's sampling trick (2008). """ import parameters import globals from globals import LR import numpy from numpy import dot import random random.seed(globals.SEED) import pylearn.nnet_ops import pylearn.sparse_instance def sigmoid(v): """ @todo: Move to pylearn.more_numpy @todo: Fix to avoid floating point overflow. """ # if x < -30.0: return 0.0 # if x > 30.0: return 1.0 return 1.0 / (1.0 + numpy.exp(-v)) def sample(v): """ @todo: Move to pylearn.more_numpy """ assert len(v.shape) == 2 x = numpy.zeros(v.shape) for j in range(v.shape[0]): for i in range(v.shape[1]): assert v[j][i] >= 0 and v[j][i] <= 1 if random.random() < v[j][i]: x[j][i] = 1 else: x[j][i] = 0 return x def crossentropy(output, target): """ Compute the crossentropy of binary output wrt binary target. @note: We do not sum, crossentropy is computed by component. @todo: Rewrite as a scalar, and then broadcast to tensor. @todo: Move to pylearn.more_numpy @todo: Fix to avoid floating point overflow. """ return -(target * numpy.log(output) + (1 - target) * numpy.log(1 - output)) class Model: """ @todo: input dimensions should be stored here! not as a global. """ def __init__(self): self.parameters = parameters.Parameters(randomly_initialize=True) self.prev_dw = 0 self.prev_db = 0 self.prev_dc = 0 def deterministic_reconstruction(self, v0): """ One up-down cycle, but a mean-field approximation (no sampling). """ q = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) p = sigmoid(self.parameters.c + dot(q, self.parameters.w.T)) return p def deterministic_reconstruction_error(self, v0): """ @note: According to Yoshua, -log P(V1 = v0 | tilde(h)(v0)). """ return crossentropy(self.deterministic_reconstruction(v0), v0) def update(self, instances): """ Update the L{Model} using one training instance. @param instance: A dict from feature index to (non-zero) value. @todo: Should assert that nonzero_indices and zero_indices are correct (i.e. are truly nonzero/zero). @todo: Multiply WEIGHT_DECAY by LEARNING_RATE, as done in Semantic Hashing? @todo: Decay the biases too? """ minibatch = len(instances) v0 = pylearn.sparse_instance.to_vector(instances, globals.INPUT_DIMENSION) print "old XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) q0 = sigmoid(self.parameters.b + dot(v0, self.parameters.w)) h0 = sample(q0) p0 = sigmoid(self.parameters.c + dot(h0, self.parameters.w.T)) v1 = sample(p0) q1 = sigmoid(self.parameters.b + dot(v1, self.parameters.w)) dw = LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch + globals.MOMENTUM * self.prev_dw db = LR * numpy.sum(h0 - q1, axis=0) / minibatch + globals.MOMENTUM * self.prev_db dc = LR * numpy.sum(v0 - v1, axis=0) / minibatch + globals.MOMENTUM * self.prev_dc self.parameters.w *= (1 - globals.WEIGHT_DECAY) self.parameters.w += dw self.parameters.b += db self.parameters.c += dc self.last_dw = dw self.last_db = db self.last_dc = dc print "new XENT:", numpy.sum(self.deterministic_reconstruction_error(v0)) # print # print "v[0]:", v0 # print "Q(h[0][i] = 1 | v[0]):", q0 # print "h[0]:", h0 # print "P(v[1][j] = 1 | h[0]):", p0 # print "XENT(P(v[1][j] = 1 | h[0]) | v0):", numpy.sum(crossentropy(p0, v0)) # print "v[1]:", v1 # print "Q(h[1][i] = 1 | v[1]):", q1 # # print # print v0.T.shape # print h0.shape # print dot(v0.T, h0).shape # print self.parameters.w.shape # self.parameters.w += LR * (dot(v0.T, h0) - dot(v1.T, q1)) / minibatch # print # print h0.shape # print q1.shape # print self.parameters.b.shape # self.parameters.b += LR * numpy.sum(h0 - q1, axis=0) / minibatch # print v0.shape, v1.shape # print # print self.parameters.c.shape # self.parameters.c += LR * numpy.sum(v0 - v1, axis=0) / minibatch # print self.parameters