Mercurial > pylearn
view pylearn/sampling/hmc.py @ 1223:621e03253f0c
mini-bug in taglist
author | boulanni <nicolas_boulanger@hotmail.com> |
---|---|
date | Wed, 22 Sep 2010 15:08:39 -0400 |
parents | 492473059b37 |
children | 34512d1d4e9c |
line wrap: on
line source
"""Hybrid / Hamiltonian Monte Carlo Sampling This algorithm is described in Radford Neal's PhD Thesis, pages 63--70. """ import sys import logging import numpy as np from theano import function, shared from theano import tensor as TT import theano.sparse #installs the sparse shared var handler #TODO: # Consider how to refactor this code so that the key functions /functionality are provided as # theano expressions?? # It could be limiting that the implementation requires that the position be a list of shared # variables, and basically uses procedural logic on top of that. #TODO: # Consider a heuristic for updating the *MASS* of the particles. We might want the mass to be # such that the momentum is in the same range as the gradient on the energy. Look at Radford's # recent book chapter, maybe there are hints. (2010). class HMC_sampler(object): """Batch-wise Hybrid Monte-Carlo sampler The `draw` function advances the markov chain and returns the current sample by calling `simulate` and `get_position` in sequence. :note: The 'mass' of so-called particles is taken to be 1, so that kinetic energy (K) is the sum of squared velocities (p). :math:`K = \sum_i p_i^2 / 2` """ # Constants taken from Marc'Aurelio's 'train_mcRBM.py' file found in the code online for his # paper. stepsize_dec = 0.98 stepsize_min = 0.001 stepsize_max = 0.25 stepsize_inc = 1.02 avg_acceptance_slowness = 0.9 # used in geometric avg. 1.0 would be not moving at all n_steps=20 def __init__(self, positions, energy_fn, velocity=None, initial_stepsize=0.01, target_acceptance_rate=0.9, seed=12345, dtype=theano.config.floatX): """ :param positions: list of shared ndarray variables. :param energy: callable such that energy_fn(positions) returns theano vector of energies. The len of this vector is the batchsize. The sum of this energy vector must be differentiable (with theano.tensor.grad) with respect to the positions for HMC sampling to work. """ self.stepsize = initial_stepsize batchsize = positions[0].value.shape[0] self.target_acceptance_rate = target_acceptance_rate self.avg_acceptance_rate = self.target_acceptance_rate self.s_rng = TT.shared_randomstreams.RandomStreams(seed) self.positions = positions if velocity is None: self.velocity = [shared(np.zeros_like(q.value)) for q in self.positions] else: self.velocity = velocity self.start_positions = [shared(np.zeros_like(q.value)) for q in self.positions] self.initial_hamiltonian = shared(np.zeros(batchsize).astype(dtype) + float('inf')) energy = energy_fn(positions) # assuming mass is 1 kinetic_energy = 0.5 * sum([(p**2).sum(axis=1) for p in self.velocity]) hamiltonian = energy + kinetic_energy dE_dpos = TT.grad(energy.sum(), self.positions) s_stepsize = TT.scalar('stepsize') self.velocity_step = function([s_stepsize], [dE_dpos[0].norm(2)], updates=[(p, p - s_stepsize*dE_dq) for p,dE_dq in zip(self.velocity,dE_dpos)]) self.position_step = function([s_stepsize], [], updates=[(q, q + s_stepsize*p) for (q,p) in zip(self.positions, self.velocity)]) self.save_start_positions = function([], [], updates=[(self.initial_hamiltonian, hamiltonian)] + \ [(sq, q) for sq, q in zip(self.start_positions, self.positions)]) # sample the initial velocity from a # gaussian with mean 0. # Note: I think the fact that this distribution is symmetric about zero justifies not # sampling forward versus backward dynamics. self.randomize_velocity = function([], [], updates=[(p, self.s_rng.normal(size=p.value.shape)) for p in self.velocity]) # accept-reject according to Metropolis algo accept = TT.exp(self.initial_hamiltonian - hamiltonian) - self.s_rng.uniform(size=(batchsize,)) >= 0 self.accept_reject_positions = function([], accept.mean(), updates=[ (self.initial_hamiltonian, TT.switch(accept, hamiltonian, self.initial_hamiltonian))] + [ (q, TT.switch(accept.dimshuffle(0, *(('x',)*(sq.ndim-1))), q, sq)) for sq, q in zip(self.start_positions, self.positions)]) def simulate(self, n_steps=None): if n_steps is None: n_steps = self.n_steps # updates self.velocity with new random numbers self.randomize_velocity() self.save_start_positions() if 0: # not necessary for random initial direction if np.random.rand() > 0.5: step_amount = self.stepsize else: step_amount = -self.stepsize else: step_amount = self.stepsize if 0: print "initial", print "kinetic E", self.prev_energy.value , print (self.velocity[0].value**2).sum(axis=1), print (self.velocity[0].value**2).sum(axis=1) + self.prev_energy.value # Note on the order of leap-frog steps: # # the leap-frog algorithm can start with either position or velocity updates first, # but one of them has to run an extra time (because of two half-steps). # The position_step is cheap to evaluate, whereas the velocity_step is expensive, # so we evaluate the position_step the extra time. # # At the same time, we cannot drop the last half-step update of the position, because # the position is actually the terms we care about. #opening half-step in leap-frog algorithm self.position_step(step_amount/2.0) # full leap-frog steps for ss in range(n_steps): self.velocity_step(step_amount) if ss == n_steps-1: # closing half-step in the leap-frog algorithm self.position_step(step_amount/2.0) else: self.position_step(step_amount) acceptance_rate = self.accept_reject_positions() self.avg_acceptance_rate = self.avg_acceptance_slowness * self.avg_acceptance_rate \ + (1.0 - self.avg_acceptance_slowness) * acceptance_rate if self.avg_acceptance_rate < self.target_acceptance_rate: self.stepsize = max(self.stepsize*self.stepsize_dec,self.stepsize_min) else: self.stepsize = min(self.stepsize*self.stepsize_inc,self.stepsize_max) if 0: print "final kinetic E", self.prev_energy.value , print (self.velocity[0].value**2).sum(axis=1), print (self.velocity[0].value**2).sum(axis=1) + self.prev_energy.value # post-condition: # self.positions contains a new sample from our markov chain # it is not returned from this function because accessing the .value of a shared # variable can require making a copy # see `draw()` or `get_position` for that behaviour. def get_position(self): return [q.value for q in self.positions] def draw(self, n_steps=None): """Return the current sample in the Markov chain as a list of numpy arrays The size of the arrays will match the size of the `initial_position` argument to __init__. """ self.simulate(n_steps=n_steps) return self.get_position()