Mercurial > pylearn
view pylearn/sampling/mcmc.py @ 1447:fbe470217937
Use .get_value() and .set_value() of shared instead of the .value property
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Wed, 16 Mar 2011 20:20:02 -0400 |
parents | 492473059b37 |
children |
line wrap: on
line source
"""Metropolis-Hastings Monte Carlo See also Hamiltonian Monte Carlo (aka Hybrid Monte Carlo) in the hmc.py file. """ import sys import logging import numpy as np from theano import function, shared from theano import tensor as TT import theano.sparse #installs the sparse shared var handler from theano.printing import Print #TODO: # Consider how to refactor this code so that the key functions /functionality are provided as # theano expressions?? # It could be limiting that the implementation requires that the position be a list of shared # variables, and basically uses procedural logic on top of that. # # Theano can only do function optimization on one function at a time, so until these are # written as update expressions, they must be compiled as their own functions and cannot be # rolled into things the user is doing anyway. class MCMC_sampler(object): """Generic Metropolis-Hastings Monte-Carlo sampler The `draw` function advances the markov chain and returns the current sample by calling `simulate` and `get_position` in sequence. The stepsize is updated dynamically to """ # TODO: anyone has better guesses for these constants?? stepsize_dec = 0.98 stepsize_min = 0.001 stepsize_max = 0.25 stepsize_inc = 1.02 target_acceptance_rate=0.7 avg_acceptance_slowness = 0.9 # used in geometric avg. 1.0 would be not moving at all n_steps=1 def __init__(self, positions, energy_fn, initial_stepsize=0.01, seed=12345): """ :param positions: list of shared ndarray variables. :param energy: callable such that energy_fn(positions) returns theano vector of energies. The len of this vector is the batchsize. """ batchsize = positions[0].get_value(borrow=True).shape[0] self.s_rng = TT.shared_randomstreams.RandomStreams(seed) self.positions = positions self.prev_energy = shared(np.zeros(batchsize) + float('inf')) self.avg_acceptance_rate = 0.5 self.stepsize = initial_stepsize s_stepsize = TT.scalar('stepsize') new_positions = [p + s_stepsize * self.s_rng.normal(size=p.get_value(borrow=True).shape) for p in self.positions] # accept-reject according to Metropolis-Hastings energy = energy_fn(new_positions) accept = TT.exp(self.prev_energy - energy) - self.s_rng.uniform(size=(batchsize,)) >= 0 self.accept_reject_positions = function([s_stepsize], accept.mean(), updates=[(self.prev_energy, TT.switch(accept, energy, self.prev_energy))] + [ (q, TT.switch(accept.dimshuffle(0, *(('x',)*(q.ndim-1))), new_q, q)) for q, new_q in zip(self.positions, new_positions)]) def simulate(self, n_steps=None): if n_steps is None: n_steps = self.n_steps for ss in range(n_steps): acceptance_rate = self.accept_reject_positions(self.stepsize) self.avg_acceptance_rate = self.avg_acceptance_slowness * self.avg_acceptance_rate \ + (1.0 - self.avg_acceptance_slowness) * acceptance_rate if self.avg_acceptance_rate < self.target_acceptance_rate: self.stepsize = max(self.stepsize*self.stepsize_dec,self.stepsize_min) else: self.stepsize = min(self.stepsize*self.stepsize_inc,self.stepsize_max) def get_position(self): return [q.get_value(borrow=True) for q in self.positions] def draw(self, n_steps=None): """Return the current sample in the Markov chain as a list of numpy arrays The size of the arrays will match the size of the `initial_position` argument to __init__. """ self.simulate(n_steps=n_steps) return self.get_position()