Mercurial > pylearn
changeset 965:bf54637bb994
merge
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 20 Aug 2010 09:31:39 -0400 |
parents | 6a778bca0dec (diff) d944e1c26a57 (current diff) |
children | e88d7b7d53ed |
files | |
diffstat | 30 files changed, 1146 insertions(+), 808 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/__init__.py Mon Aug 16 10:39:36 2010 -0400 +++ b/pylearn/__init__.py Fri Aug 20 09:31:39 2010 -0400 @@ -1,3 +1,22 @@ +"""Pylearn - Theano-based machine learning library + +Modules: + + - `io` - Python parsers/writers for various file formats + + - `datasets` - loading of various datasets (depends on `io`) + + - `preprocessing` - pre-processing and feature extraction for various modalities + + - `external` - code for interacting with other packages + + - `gd` - gradient-based optimization (for stochastic online learning) + + - `sampling` - theano expressions and tools for sampling from energy functions + + - `algorithms` - implementations of [published] models typically with a script that reproduces a published result. (depends on all) + +""" #import exceptions
--- a/pylearn/algorithms/minimizer.py Mon Aug 16 10:39:36 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -"""Define the interface and factory for gradient-based minimizers. -""" -import theano - -class DummyMinimizer(theano.Module): - """ The idea of a minimizer is that it provides an `step` function that will - eventually converge toward (maybe realize?) the minimum of a cost function. - - The step_cost function takes a step and returns the cost associated with either - the current or previous parameter values (return whichever is easiest to compute, it's - meant for user feedback.) - - """ - def __init__(self, args, cost, parameters, gradients=None): - super(DummyMinimizer, self).__init__() - - def _instance_step(self, obj, *args): - """Move the parameters toward the minimum of a cost - - :param args: The arguments here should be values for the Variables that were in the - `args` argument to the constructor. - - :Return: None - """ - pass - - def _instance_step_cost(self, obj, *args): - """Move the parameters toward the minimum of a cost, and compute the cost - - :param args: The arguments here should be values for the Variables that were in the - `args` argument to the constructor. - - :Return: The current cost value. - """ - pass -
--- a/pylearn/algorithms/sgd.py Mon Aug 16 10:39:36 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.) -""" - -import theano - -class StochasticGradientDescent(theano.Module): - """Fixed stepsize gradient descent - - Methods for gradient descent are: - - step(arg_vals) which returns None and updates the params - - step_cost(arg_vals) which returns the cost value, and updates the params - - """ - def __init__(self, args, cost, params, - gradients=None, stepsize=None, - updates=None, auxout=None, methods=True): - """ - :param stepsize: the step to take in (negative) gradient direction - :type stepsize: None, scalar value, or scalar TensorVariable - - :param updates: extra symbolic updates to make when evating either step or step_cost - (these override the gradients if necessary) - :type updates: dict Variable -> Variable - :param auxout: auxiliary outputs, list containing output symbols to - compute at the same time as cost (for efficiency) - :param methods: Should this module define the step and step_cost methods? - """ - super(StochasticGradientDescent, self).__init__() - self.stepsize_init = None - - if stepsize is None: - self.stepsize = theano.tensor.dscalar() - elif isinstance(stepsize, theano.tensor.TensorVariable): - self.stepsize = stepsize - else: - self.stepsize = (theano.tensor.as_tensor_variable(stepsize)) - - if self.stepsize.ndim != 0: - raise TypeError('stepsize must be a scalar', stepsize) - - self.params = params - self.gparams = theano.tensor.grad(cost, self.params) if gradients is None else gradients - - self._updates = (dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))) - if updates is not None: - self._updates.update(updates) - - if methods: - if auxout is None: - self.step = theano.Method(args, [], updates=self._updates) - self.step_cost = theano.Method(args, cost, updates=self._updates) - else: - # step cost always returns a list if auxout - self.step = theano.Method( - args, [] + auxout, - updates=self._updates) - self.step_cost = theano.Method( - args, [cost]+auxout, - updates=self._updates) - - - updates = property(lambda self: self._updates.copy()) - - def _instance_initialize(self, obj): - pass - -def sgd_minimizer(stepsize=None): - """Curry the stepsize argument to StochasticGradientDescent, providing standard minimizer interface - - :returns: standard minimizer constructor f(args, cost, params, gradient=None) - """ - def f(args, cost, params, gradients=None, updates=None, auxout=None): - return StochasticGradientDescent(args, cost, params, gradients=gradients, stepsize=stepsize, - updates=updates, auxout=auxout) - return f
--- a/pylearn/algorithms/stopper.py Mon Aug 16 10:39:36 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,131 +0,0 @@ -import time -"""Early stopping iterators - -The idea here is to supply early-stopping heuristics that can be used in the -form: - - stopper = SomeEarlyStopper() - - for i in stopper(): - # train from data - if i.set_score: - i.score = validation_score - - -So far I only have one heuristic, so maybe this won't scale. -""" - -class Stopper(object): - - def train(self, data, update_rows_fn, update, validate, save=None): - """Return the best model trained on data - - Parameters: - data - a thing that accepts getitem(<list of int64>), or a tuple of such things - update_rows_fn - fn : int --> <list or tensor of int> - update - fn: update an internal model from elements of data - validate - fn: evaluate an internal model based on elements of data - save - fn: return a copy of the internal model - - The body of this function exhausts the <self> iterator, and trains a - model using early stopping in the process. - """ - - best = None - for stp in self: - i = stp.iter - - # call update on some training set rows - t_rows = update_rows_fn(i) - if isinstance(data, (tuple, list)): - update(*[d[t_rows] for d in data]) - else: - update(data[t_rows]) - - if stp.set_score: - stp.score = validate() - if (stp.score < stp.best_score) and save: - best = save() - return best - - def find_min(self, step, check, save): - best = None - for stp in self: - step() - if stp.set_score: - stp.score = check() - if (stp.score < stp.best_score) and save: - best = (save(), stp.iter, stp.score) - return best - -class ICML08Stopper(Stopper): - @staticmethod - def icml08(ntrain, batchsize): - """Some setting similar to what I used for ICML08 submission""" - #TODO: what did I actually use? put that in here. - return ICML08Stopper(30*ntrain/batchsize, - ntrain/batchsize, 0.96, 2.0, 100000000) - - def __init__(self, i_wait, v_int, min_improvement, patience, hard_limit, hard_time_limit=None): - self.initial_wait = i_wait - self.set_score_interval = v_int - self.min_improvement = min_improvement - self.patience = patience - self.hard_limit = hard_limit - self.hard_limit_seconds = hard_time_limit - self.start_time = time.time() - - self.best_score = float('inf') - self.best_iter = -1 - self.iter = -1 - - self.set_score = False - self.score = None - - def __iter__(self): - return self - - E_set_score = 'when iter.set_score is True, caller must assign a score to iter.score' - def next(self): - - #print 'ICML08 stopper, were doing a next' - - if self.set_score: #left over from last time - if self.score is None: - raise Exception(ICML08Stopper.E_set_score) - if self.score < (self.best_score * self.min_improvement): - (self.best_score, self.best_iter) = (self.score, self.iter) - self.score = None #un-set it - - - starting = self.iter < self.initial_wait - waiting = self.iter < (self.patience * self.best_iter) - if self.hard_limit_seconds != None: - times_up = (time.time() - self.start_time) > self.hard_limit_seconds - else: times_up = False - if (starting or waiting) and not times_up: - # continue to iterate - self.iter += 1 - if self.iter == self.hard_limit: - raise StopIteration - self.set_score = (self.iter % self.set_score_interval == 0) - return self - - raise StopIteration - -class NStages(ICML08Stopper): - """Run for a fixed number of steps, checking validation set every so - often.""" - def __init__(self, hard_limit, v_int): - ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit) - - #TODO: could optimize next() function. Most of what's in ICML08Stopper.next() - #is not necessary - -def geometric_patience(i_wait, v_int, min_improvement, patience, hard_limit): - return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit) - -def nstages(hard_limit, v_int): - return ICML08Stopper(hard_limit, v_int, 1.0, 1.0, hard_limit) - -
--- a/pylearn/algorithms/tests/test_sgd.py Mon Aug 16 10:39:36 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -import theano -from theano.compile.debugmode import DebugMode -from pylearn.algorithms import sgd - -mode = theano.compile.mode.get_default_mode() -if isinstance(mode,DebugMode): - mode = 'FAST_RUN' - -def test_sgd0(): - - x = theano.tensor.dscalar('x') - y = theano.tensor.dscalar('y') - - M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=0.01) - M.y = y - m = M.make(mode=mode) - m.y = 5.0 - for i in xrange(100): - c = m.step_cost(3.0) - #print c[0], m.y - - assert c < 1.0e-5 - assert abs(m.y - (1.0 / 3)) < 1.0e-4 - -def test_sgd_stepsize_variable(): - - x = theano.tensor.dscalar('x') - y = theano.tensor.dscalar('y') - lr = theano.tensor.dscalar('lr') - - M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=lr) - M.y = y - M.lr = lr - m = M.make(mode=mode) - m.y = 5.0 - m.lr = 0.01 - for i in xrange(100): - c = m.step_cost(3.0) - # print c, m.y - - assert c < 1.0e-5 - assert abs(m.y - (1.0 / 3)) < 1.0e-4 - - - #test that changing the lr has impact - - m.y = 5.0 - m.lr = 0.0 - for i in xrange(10): - c = m.step_cost(3.0) - # print c, m.y - - assert m.y == 5.0 - -def test_sgd_stepsize_none(): - - x = theano.tensor.dscalar('x') - y = theano.tensor.dscalar('y') - - M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y]) - M.y = y - m = M.make(mode=mode) - m.y = 5.0 - #there should be a learning rate here by default - assert m.stepsize is None - m.stepsize = 0.01 - for i in xrange(100): - c = m.step_cost(3.0) - # print c, m.y - - assert c < 1.0e-5 - assert abs(m.y - (1.0 / 3)) < 1.0e-4 - -if __name__ == '__main__': - test_sgd0()
--- a/pylearn/dataset_ops/cifar10.py Mon Aug 16 10:39:36 2010 -0400 +++ b/pylearn/dataset_ops/cifar10.py Fri Aug 20 09:31:39 2010 -0400 @@ -59,7 +59,9 @@ def cifar10(s_idx, split, dtype='float64', rasterized=False, color='grey'): - """ + """ + Returns a pair (img, label) of theano expressions for cifar-10 samples + :param s_idx: the indexes :param split:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/dataset_ops/image_patches.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,41 @@ +import os, numpy +import theano + +from pylearn.datasets.image_patches import ( + olshausen_field_1996_whitened_images, + extract_random_patches) + +from .protocol import TensorFnDataset # protocol.py __init__.py +from .memo import memo + +@memo +def get_dataset(N,R,C,dtype): + seed=98234 + rng = numpy.random.RandomState(seed) + img_stack = olshausen_field_1996_whitened_images() + patch_stack = extract_random_patches(img_stack, N,R,C,rng) + return patch_stack.astype(dtype).reshape((N,(R*C))) + +def image_patches(s_idx, dims, + split='train', dtype=theano.config.floatX, rasterized=False): + N,R,C=dims + + if split != 'train': + raise NotImplementedError('train/test/valid splits for randomly sampled image patches?') + + if not rasterized: + raise NotImplementedError() + + op = TensorFnDataset(dtype, bcast=(False,), fn=(get_dataset, (N,R,C,dtype,)), single_shape=(R*C,)) + x = op(s_idx%N) + if x.ndim == 1: + if not rasterized: + x = x.reshape((20,20)) + elif x.ndim == 2: + if not rasterized: + x = x.reshape((x.shape[0], 20,20)) + else: + assert False, 'what happened?' + + return x +
--- a/pylearn/dataset_ops/protocol.py Mon Aug 16 10:39:36 2010 -0400 +++ b/pylearn/dataset_ops/protocol.py Fri Aug 20 09:31:39 2010 -0400 @@ -66,6 +66,12 @@ property of this Op, then pickling the Op would require pickling the entire dataset. """ def __init__(self, dtype, bcast, fn, single_shape=None, batch_size=None): + """ + :type fn: callable or (callable, args) tuple [MUST BE PICKLABLE!] + + :param fn: function that returns the dataset as a tensor. Leading index is the example + index, others are considered part of each example. + """ super(TensorFnDataset, self).__init__(dtype, bcast, single_shape, batch_size) try: self.fn, self.fn_args = fn
--- a/pylearn/datasets/cifar10.py Mon Aug 16 10:39:36 2010 -0400 +++ b/pylearn/datasets/cifar10.py Fri Aug 20 09:31:39 2010 -0400 @@ -88,3 +88,19 @@ def first_1k(dtype='uint8', ntrain=1000, nvalid=200, ntest=200): return cifar10(dtype, ntrain, nvalid, ntest) + +def tile_rasterized_examples(X): + """Returns an ndarray that is ready to be passed to `image_tiling.save_tiled_raster_images` + + This function is for the `x` matrices in the cifar dataset, or for the weight matrices + (filters) used to multiply them. + """ + X = X.astype('float32') + r = X[:,:1024] + g = X[:,1024:2048] + b = X[:,2048:] + from pylearn.io.image_tiling import tile_raster_images + rval = tile_raster_images((r,g,b,None), img_shape=(32,32)) + return rval + +
--- a/pylearn/datasets/image_patches.py Mon Aug 16 10:39:36 2010 -0400 +++ b/pylearn/datasets/image_patches.py Fri Aug 20 09:31:39 2010 -0400 @@ -5,6 +5,8 @@ import os import numpy +import scipy.io #for loadmat + from .config import data_root from .dataset import Dataset @@ -14,6 +16,13 @@ '12by12_whiten_01': ('natural_images_patches_whiten_12_by_12_0_1.amat',(12,12))} def load_dataset(ntrain=70000, nvalid=15000, ntest=15000, variant='20by20_whiten_01'): + # This is implementation loads files which are way too big (storing bytes in double), and + # stored in ascii!??, and which appear to be un-documented variants on the original + # raw/whitened + # data. + # I'd like to deprecate this function. + # -JB Aug 2010 + print >> sys.stderr, "WARNING: pylearn.datasets.image_patches.load_dataset is badly documented and does some weird stuff... could someone who uses this function do something about it?" ndata = 100000 @@ -40,3 +49,65 @@ rval.img_shape = paths[variant][1] return rval + + +#TODO: a little function to render a tiling of example images to an image using PIL + +#TODO: a pca_load_dataset function that loads the data, as projected onto principle components + +def olshausen_field_1996_whitened_images(path=None): + """Returns a (512,512,10) ndarray containing 10 whitened images. + + The images are in floating point. + Whitening was done by the paper authors, with band-pass whitening I think. + """ + if path is None: + path=os.path.join(data_root(), 'image_patches', 'olshausen', + 'original', 'IMAGES.mat') + images = scipy.io.loadmat(path)['IMAGES'] + assert images.shape == (512,512,10) + return images.astype('float32') + +def olshausen_field_1996_raw_images(path=None): + """Returns a (512,512,10) ndarray containing 10 images. + + The images are in floating point. + """ + if path is None: + path=os.path.join(data_root(), 'image_patches', 'olshausen', + 'original', 'IMAGES_RAW.mat') + images = scipy.io.loadmat(path)['IMAGES_RAW'] + assert images.shape == (512,512,10) + return images.astype('float32') + +def extract_random_patches(img_stack, N, R,C, rng): + """Return subimages from the img_stack + + :param img_stack: a 3D ndarray (n_images, rows, cols) or a list of 2D images. + :param N: number of patches to extract + :param R: number of rows in patch + :param C: number of cols in patch + :param rng: numpy RandomState + + Sub-image regions are chosen at random from the img_stack with uniform probability, and + then within each image with uniform probability across the image. Patches from a larger + image in the stack therefore would be sampled less frequently than patches from a smaller + image in the stack. + + To use ZCA whitening, extract patches from the raw data, and pass it to + preprocessing.pca.zca_whitening. + """ + rval = numpy.empty((N,R,C), dtype=img_stack[0].dtype) + + L = len(img_stack) + img_idxlist = rng.randint(L,size=N) + + for n, img_idxlist in enumerate(img_idxlist): + img_idx = rng.randint(L) + img_n = img_stack[img_idx] + offset_R = rng.randint(img_n.shape[0]-R+1) + offset_C = rng.randint(img_n.shape[1]-C+1) + rval[n] = img_n[offset_R:offset_R+R,offset_C:offset_C+C] + + return rval +
--- a/pylearn/examples/daa.py Mon Aug 16 10:39:36 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,206 +0,0 @@ -import math, sys, time, copy, cPickle, shutil, functools -import numpy - -import theano -from theano import tensor -from theano.compile import module -from theano.sandbox.softsign import softsign -from theano import tensor as T, sparse as S -from theano.tensor import nnet, as_tensor -from theano.tensor.randomstreams import RandomStreams -from theano.printing import Print -from theano.compile.mode import Mode - -from ..algorithms import cost, minimizer, logistic_regression, sgd, stopper -from ..io import filetensor -from ..datasets import MNIST - - -class AbstractFunction(Exception): """Override me""" - -# -# default corruption function for BasicDAA -# to extend -# -def corrupt_random_zeros(x, rng, p_zero): - mask = rng.binomial(T.shape(x), 1, 1.0 - p_zero) - return mask * x - -# -# BasicDAA -# ======== -# -# Re-usable module, could be in pylearn.algorithms.daa -# -class BasicDAA(theano.Module): - def __init__(self, n_visible, n_hidden, seed, - w=None, - vbias=None, - hbias=None, - sigmoid=nnet.sigmoid, - corrupt=functools.partial(corrupt_random_zeros, p_zero=0.1), - reconstruction_cost=cost.cross_entropy, - w_scale = None - ): - """ - :param w_scale: should be a floating-point value or None. The weights are initialized - to a random value on the interval (-1,1) and scaled by this value. - None means the default of 1/sqrt(max(n_visible, n_hidden)) - - - """ - super(BasicDAA, self).__init__() - - self.n_visible = n_visible - self.n_hidden = n_hidden - self.random = RandomStreams() - self.seed = seed - self.w_scale = w_scale if w_scale is not None else 1.0 / math.sqrt(max(n_visible,n_hidden)) - self.sigmoid = sigmoid - self.corrupt = corrupt - self.reconstruction_cost = reconstruction_cost - - self.w = tensor.dmatrix() if w is None else w - self.vbias = tensor.dvector() if vbias is None else vbias - self.hbias = tensor.dvector() if hbias is None else hbias - - self.params = [self.w, self.vbias, self.hbias] - - def _instance_initialize(self, obj): - #consider offering override parameters for seed, n_visible, n_hidden, w_scale - super(BasicDAA, self)._instance_initialize(obj) - rng = numpy.random.RandomState(self.seed) - s = rng.randint(2**30) - obj.w = (rng.rand(self.n_visible, self.n_hidden) * 2.0 - 1.0) * self.w_scale - obj.vbias = numpy.zeros(self.n_visible) - obj.hbias = numpy.zeros(self.n_hidden) - obj.random.initialize(int(s)) - - def hidden_act(self, visible): - return theano.dot(visible, self.w) + self.hbias - - def hidden(self, visible): - return self.sigmoid(self.hidden_act(visible)) - - def visible_act(self, hidden): - return theano.dot(hidden, self.w.T) + self.vbias - - def visible(self, hidden): - return self.sigmoid(self.visible_act(hidden)) - - def reconstruction(self, visible): - return self.visible(self.hidden(self.corrupt(visible, self.random))) - - def daa_cost(self, visible): - return self.reconstruction_cost(visible, self.reconstruction(visible)) - - def l2_cost(self): - return self.w.norm(2) - - - -# -# StackedDAA -# ========== -# -# Hacky experiment type code, which would *not* be in pylearn. -# -# This Module can be extended / parametrized so many ways that I think this code is best cut & -# pasted. -# -class StackedDAA(theano.Module): - def __init__(self, layer_widths, seed, finetune_lr=1e-3, pretrain_lr=1e-4): - super(StackedDAA, self).__init__() - input = theano.tensor.dmatrix() - - #the parameters of this function, required for the minimizer - self.params = [] - daa_widths = layer_widths[:-1] - - #create the stack of DAA modules, and the self.params list - self.daa_list = [] - daa_input = input - for i in xrange(len(daa_widths)-1): - self.daa_list.append(BasicDAA(daa_widths[i], daa_widths[i+1], seed+i)) - daa_input = self.daa_list[-1].hidden(daa_input) - self.params.extend(self.daa_list[-1].params) - - #put a logistic regression module on top for classification - self.classif = logistic_regression.LogRegN(input=daa_input, - n_in=layer_widths[-2], n_out = layer_widths[-1]) - self.params.extend(self.classif.params) - - #set up the fine-tuning function (minimizer.step) - FineTuneMinimizer = sgd.sgd_minimizer(stepsize=finetune_lr) - self.finetune = FineTuneMinimizer([input, self.classif.target], self.classif.unregularized_cost, self.params) - - #set up the pre-training function - pretrain_cost, pretrain_input, pretrain_params = reduce( - lambda (c,i,p), daa: (c + daa.daa_cost(i), daa.hidden(i), p + daa.params), - self.daa_list, - (0.0, input, [])) - PreTrainMinimizer = sgd.sgd_minimizer(stepsize=pretrain_lr) - self.pretrain = PreTrainMinimizer([input], pretrain_cost, pretrain_params) - - def _instance_initialize(self, obj): - #consider offering override parameters for seed, n_visible, n_hidden, w_scale - super(StackedDAA, self)._instance_initialize(obj) - - #ugh... why do i need to do this??? - for daa in obj.daa_list: - daa.initialize() - obj.classif.initialize() - obj.finetune.initialize() - obj.pretrain.initialize() - -# -# DRIVER -# ====== -# -# This learning algorithm is the sort of thing that we've put in 'experiment' functions, that -# can be run using dbdict. -# -def demo_random(layer_widths=[3,4,5]): - sdaa = StackedDAA(layer_widths, seed=666).make(mode='FAST_COMPILE') - - # create some training data - rng = numpy.random.RandomState(7832) - input_data = rng.randn(10,3) - targ_data = rng.binomial(1,.5, size=10) - - print 'Pre-training ...' - for i in xrange(5): - print sdaa.pretrain.step_cost(input_data) - - print 'Fine-tuning ...' - for i in xrange(5): - print sdaa.finetune.step_cost(input_data, targ_data) - - -def demo_mnist(layer_widths=[784,500,500]): - sdaa = StackedDAA(layer_widths, seed=666).make() - - mnist = MNIST.full() - batchsize=10 - n_pretrain_batches=10000 - n_finetune_batches=10000 - - t0 = time.time() - print 'Pre-training ...' - for i in xrange(n_pretrain_batches): - ii = (i*batchsize) % len(mnist.train.x) - x = mnist.train.x[ii:ii+batchsize] - c = sdaa.pretrain.step_cost(x) - if not i % 100: - print i, n_pretrain_batches, time.time() - t0, c - - t1 = time.time() - print 'Fine-tuning ...' - for i in xrange(n_finetune_batches): - ii = (i*batchsize) % len(mnist.train.x) - x = mnist.train.x[ii:ii+batchsize] - y = mnist.train.y[ii:ii+batchsize] - c = sdaa.finetune.step_cost(x, y) - if not i % 100: - print i, n_finetune_batches, time.time() - t1, c -
--- a/pylearn/examples/linear_classifier.py Mon Aug 16 10:39:36 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,224 +0,0 @@ -#! /usr/bin/env python -""" -T. Bertin-Mahieux (2008) University of Montreal -bertinmt@iro.umontreal.ca - -linear_classifier.py -Simple script that creates a linear_classifier, and -learns the parameters using backpropagation. - -This is to illustrate how to use theano/pylearn. -Anyone who knows how to make this script simpler/clearer is welcome to -make the modifications. -""" - - -import os -import sys -import time -import copy -import pickle -import numpy -import numpy as N -import numpy.random as NR -from pylearn import cost -import theano -from theano import tensor as T - - -def cost_function(*args,**kwargs) : - """ default cost function, quadratic """ - return cost.quadratic(*args,**kwargs) - - -class modelgraph() : - """ class that contains the graph of the model """ - lr = T.scalar() # learning rate - inputs = T.matrix() # inputs (one example per line) - true_outputs = T.matrix() # outputs (one example per line) - W = T.matrix() # weights input * W + b= output - b = T.vector() # bias - outputs = T.dot(inputs,W) + b # output, one per line - costs = cost_function(true_outputs,outputs) # costs - g_W = T.grad(costs,W) # gradient of W - g_b = T.grad(costs,b) # gradient of b - new_W = T.sub_inplace(W, lr * g_W) # update inplace of W - new_b = T.sub_inplace(b, lr * g_b) # update inplace of b - - -class model() : - """ - The model! - Contains needed matrices, needed functions, and a link to the model graph. - """ - - def __init__(self,input_size,output_size) : - """ init matrix and bias, creates the graph, create a dict of compiled functions """ - # graph - self.graph = modelgraph() - # weights and bias, saved in self.params - seed = 666 - r = NR.RandomState(seed) - W = r.uniform(size = [input_size, output_size], low = -1/N.sqrt(input_size), high = 1/N.sqrt(input_size)) - b = numpy.zeros((output_size, )) - self.params = [W,b] - # dictionary of compiled functions - self.func_dict = dict() - # keep some init_infos (may not be necessary) - self.init_params = [input_size,output_size] - - - def update(self,lr,true_inputs,true_outputs) : - """ does an update of the model, one gradient descent """ - # do we already have the proper theano function? - if self.func_dict.has_key('update_func') : - self.func_dict['update_func'](lr,true_inputs,true_outputs,self.params[0],self.params[1]) - return - else : - # create the theano function, tell him what are the inputs and outputs) - func = theano.function([self.graph.lr,self.graph.inputs,self.graph.true_outputs, - self.graph.W, self.graph.b], - [self.graph.new_W,self.graph.new_b]) - # add function to dictionary, so we don't compile it again - self.func_dict['update_func'] = func - # use this function - func(lr,true_inputs,true_outputs,self.params[0],self.params[1]) - return - - def costs(self,true_inputs,true_outputs) : - """ get the costs for given examples, don't update """ - # do we already have the proper theano function? - if self.func_dict.has_key('costs_func') : - return self.func_dict['costs_func'](true_inputs,true_outputs,self.params[0],self.params[1]) - else : - # create the theano function, tell him what are the inputs and outputs) - func = theano.function([self.graph.inputs,self.graph.true_outputs,self.graph.W,self.graph.b], - [self.graph.costs]) - # add function to dictionary, se we don't compile it again - self.func_dict['costs_func'] = func - # use this function - return func(true_inputs,true_outputs,self.params[0],self.params[1]) - - def outputs(self,true_inputs) : - """ get the output for a set of examples (could be called 'predict') """ - # do we already have the proper theano function? - if self.func_dict.has_key('outputs_func') : - return self.func_dict['outputs_func'](true_inputs,self.params[0],self.params[1]) - else : - # create the theano function, tell him what are the inputs and outputs) - func = theano.function([self.graph.inputs, self.graph.W, self.graph.b], - [self.graph.outputs]) - # add function to dictionary, se we don't compile it again - self.func_dict['outputs_func'] = func - # use this function - return func(true_inputs,self.params[0],self.params[1]) - - def __getitem__(self,inputs) : - """ for simplicity, we can use the model this way: predictions = model[inputs] """ - return self.outputs(inputs) - - def __getstate__(self) : - """ - To save/copy the model, used by pickle.dump() and by copy.deepcopy(). - @return a dictionnary with the params (matrix + bias) - """ - d = dict() - d['params'] = self.params - d['init_params'] = self.init_params - return d - - def __setstate__(self,d) : - """ - Get the dictionary created by __getstate__(), use it to recreate the model. - """ - self.params = d['params'] - self.init_params = d['init_params'] - self.graph = modelgraph() # we did not save the model graph - - def __str__(self) : - """ returns a string representing the model """ - res = "Linear regressor, input size =",str(self.init_params[0]) - res += ", output size =", str(self.init_params[1]) - return res - - def __equal__(self,other) : - """ - Compares the model based on the params. - @return True if the params are the same, False otherwise - """ - # class - if not isinstance(other,model) : - return False - # input size - if self.params[0].shape[0] != other.params[0].shape[0] : - return False - # output size - if self.params[0].shape[1] != other.params[0].shape[1] : - return False - # actual values - if not (self.params[0] == other.params[0]).all(): - return False - if not (self.params[1] == other.params[1]).all(): - return False - # all good - return True - - -def die_with_usage() : - """ help menu """ - print 'simple script to illustrate how to use theano/pylearn' - print 'to launch:' - print ' python linear_classifier.py -launch' - sys.exit(0) - - - -#************************************************************ -# main - -if __name__ == '__main__' : - - if len(sys.argv) < 2 : - die_with_usage() - - # print create data - inputs = numpy.array([[.1,.2], - [.2,.8], - [.9,.3], - [.6,.5]]) - outputs = numpy.array([[0], - [0], - [1], - [1]]) - assert inputs.shape[0] == outputs.shape[0] - - # create model - m = model(2,1) - - # predict - print 'prediction before training:' - print m[inputs] - - # update it for 100 iterations - for k in range(50) : - m.update(.1,inputs,outputs) - - # predict - print 'prediction after training:' - print m[inputs] - - # show points - import pylab as P - colors = outputs.flatten().tolist() - x = inputs[:,0] - y = inputs[:,1] - P.plot(x[numpy.where(outputs==0)[0]],y[numpy.where(outputs==0)[0]],'r+') - P.plot(x[numpy.where(outputs==1)[0]],y[numpy.where(outputs==1)[0]],'b+') - # decision line - p1 = (.5 - m.params[1] * 1.) / m.params[0][1,0] # abs = 0 - p2 = (.5 - m.params[1] * 1.) / m.params[0][0,0] # ord = 0 - P.plot((0,p2[0],2*p2[0]),(p1[0],0,-p1[0]),'g-') - # show - P.axis([-1,2,-1,2]) - P.show() -
--- a/pylearn/examples/theano_update.py Mon Aug 16 10:39:36 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -import theano -from theano import tensor - -import numpy - -# Two scalar symbolic variables -a = tensor.scalar() -b = tensor.scalar() - -# Definition of output symbolic variable -c = a * b -# Definition of the function computing it -fprop = theano.function([a,b], [c]) - -# Initialize numerical variables -a_val = numpy.array(12.) -b_val = numpy.array(2.) -print 'a_val =', a_val -print 'b_val =', b_val - -# Numerical value of output is returned by the call to "fprop" -c_val = fprop(a_val, b_val) -print 'c_val =', c_val - - -# Definition of simple update (increment by one) -new_b = b + 1 -update = theano.function([b], [new_b]) - -# New numerical value of b is returned by the call to "update" -b_val = update(b_val) -print 'new b_val =', b_val -# We can use the new value in "fprop" -c_val = fprop(a_val, b_val) -print 'c_val =', c_val - - -# Definition of in-place update (increment by one) -re_new_b = tensor.add_inplace(b, 1.) -re_update = theano.function([b], [re_new_b]) - -# "re_update" can be used the same way as "update" -b_val = re_update(b_val) -print 'new b_val =', b_val -# We can use the new value in "fprop" -c_val = fprop(a_val, b_val) -print 'c_val =', c_val - -# It is not necessary to keep the return value when the update is done in place -re_update(b_val) -print 'new b_val =', b_val -c_val = fprop(a_val, b_val) -print 'c_val =', c_val - - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/README.txt Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,2 @@ + +see __init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/__init__.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,11 @@ +"""Gradient Descent + +This module should contain tools and algorithms related to [stochastic] gradient descent. For +example: + + - SGD with/without momentum + - Hessian Free GD + - TONGA + - Stopping criteria (incl. for use in theano functions) + +"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/sgd.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,75 @@ +"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.) +""" + +import theano + +class StochasticGradientDescent(theano.Module): + """Fixed stepsize gradient descent + + Methods for gradient descent are: + - step(arg_vals) which returns None and updates the params + - step_cost(arg_vals) which returns the cost value, and updates the params + + """ + def __init__(self, args, cost, params, + gradients=None, stepsize=None, + updates=None, auxout=None, methods=True): + """ + :param stepsize: the step to take in (negative) gradient direction + :type stepsize: None, scalar value, or scalar TensorVariable + + :param updates: extra symbolic updates to make when evating either step or step_cost + (these override the gradients if necessary) + :type updates: dict Variable -> Variable + :param auxout: auxiliary outputs, list containing output symbols to + compute at the same time as cost (for efficiency) + :param methods: Should this module define the step and step_cost methods? + """ + super(StochasticGradientDescent, self).__init__() + self.stepsize_init = None + + if stepsize is None: + self.stepsize = theano.tensor.dscalar() + elif isinstance(stepsize, theano.tensor.TensorVariable): + self.stepsize = stepsize + else: + self.stepsize = (theano.tensor.as_tensor_variable(stepsize)) + + if self.stepsize.ndim != 0: + raise TypeError('stepsize must be a scalar', stepsize) + + self.params = params + self.gparams = theano.tensor.grad(cost, self.params) if gradients is None else gradients + + self._updates = (dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))) + if updates is not None: + self._updates.update(updates) + + if methods: + if auxout is None: + self.step = theano.Method(args, [], updates=self._updates) + self.step_cost = theano.Method(args, cost, updates=self._updates) + else: + # step cost always returns a list if auxout + self.step = theano.Method( + args, [] + auxout, + updates=self._updates) + self.step_cost = theano.Method( + args, [cost]+auxout, + updates=self._updates) + + + updates = property(lambda self: self._updates.copy()) + + def _instance_initialize(self, obj): + pass + +def sgd_minimizer(stepsize=None): + """Curry the stepsize argument to StochasticGradientDescent, providing standard minimizer interface + + :returns: standard minimizer constructor f(args, cost, params, gradient=None) + """ + def f(args, cost, params, gradients=None, updates=None, auxout=None): + return StochasticGradientDescent(args, cost, params, gradients=gradients, stepsize=stepsize, + updates=updates, auxout=auxout) + return f
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/stopper.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,131 @@ +import time +"""Early stopping iterators + +The idea here is to supply early-stopping heuristics that can be used in the +form: + + stopper = SomeEarlyStopper() + + for i in stopper(): + # train from data + if i.set_score: + i.score = validation_score + + +So far I only have one heuristic, so maybe this won't scale. +""" + +class Stopper(object): + + def train(self, data, update_rows_fn, update, validate, save=None): + """Return the best model trained on data + + Parameters: + data - a thing that accepts getitem(<list of int64>), or a tuple of such things + update_rows_fn - fn : int --> <list or tensor of int> + update - fn: update an internal model from elements of data + validate - fn: evaluate an internal model based on elements of data + save - fn: return a copy of the internal model + + The body of this function exhausts the <self> iterator, and trains a + model using early stopping in the process. + """ + + best = None + for stp in self: + i = stp.iter + + # call update on some training set rows + t_rows = update_rows_fn(i) + if isinstance(data, (tuple, list)): + update(*[d[t_rows] for d in data]) + else: + update(data[t_rows]) + + if stp.set_score: + stp.score = validate() + if (stp.score < stp.best_score) and save: + best = save() + return best + + def find_min(self, step, check, save): + best = None + for stp in self: + step() + if stp.set_score: + stp.score = check() + if (stp.score < stp.best_score) and save: + best = (save(), stp.iter, stp.score) + return best + +class ICML08Stopper(Stopper): + @staticmethod + def icml08(ntrain, batchsize): + """Some setting similar to what I used for ICML08 submission""" + #TODO: what did I actually use? put that in here. + return ICML08Stopper(30*ntrain/batchsize, + ntrain/batchsize, 0.96, 2.0, 100000000) + + def __init__(self, i_wait, v_int, min_improvement, patience, hard_limit, hard_time_limit=None): + self.initial_wait = i_wait + self.set_score_interval = v_int + self.min_improvement = min_improvement + self.patience = patience + self.hard_limit = hard_limit + self.hard_limit_seconds = hard_time_limit + self.start_time = time.time() + + self.best_score = float('inf') + self.best_iter = -1 + self.iter = -1 + + self.set_score = False + self.score = None + + def __iter__(self): + return self + + E_set_score = 'when iter.set_score is True, caller must assign a score to iter.score' + def next(self): + + #print 'ICML08 stopper, were doing a next' + + if self.set_score: #left over from last time + if self.score is None: + raise Exception(ICML08Stopper.E_set_score) + if self.score < (self.best_score * self.min_improvement): + (self.best_score, self.best_iter) = (self.score, self.iter) + self.score = None #un-set it + + + starting = self.iter < self.initial_wait + waiting = self.iter < (self.patience * self.best_iter) + if self.hard_limit_seconds != None: + times_up = (time.time() - self.start_time) > self.hard_limit_seconds + else: times_up = False + if (starting or waiting) and not times_up: + # continue to iterate + self.iter += 1 + if self.iter == self.hard_limit: + raise StopIteration + self.set_score = (self.iter % self.set_score_interval == 0) + return self + + raise StopIteration + +class NStages(ICML08Stopper): + """Run for a fixed number of steps, checking validation set every so + often.""" + def __init__(self, hard_limit, v_int): + ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit) + + #TODO: could optimize next() function. Most of what's in ICML08Stopper.next() + #is not necessary + +def geometric_patience(i_wait, v_int, min_improvement, patience, hard_limit): + return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit) + +def nstages(hard_limit, v_int): + return ICML08Stopper(hard_limit, v_int, 1.0, 1.0, hard_limit) + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/tests/test_sgd.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,75 @@ +import theano +from theano.compile.debugmode import DebugMode +from pylearn.gd import sgd + +mode = theano.compile.mode.get_default_mode() +if isinstance(mode,DebugMode): + mode = 'FAST_RUN' + +def test_sgd0(): + + x = theano.tensor.dscalar('x') + y = theano.tensor.dscalar('y') + + M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=0.01) + M.y = y + m = M.make(mode=mode) + m.y = 5.0 + for i in xrange(100): + c = m.step_cost(3.0) + #print c[0], m.y + + assert c < 1.0e-5 + assert abs(m.y - (1.0 / 3)) < 1.0e-4 + +def test_sgd_stepsize_variable(): + + x = theano.tensor.dscalar('x') + y = theano.tensor.dscalar('y') + lr = theano.tensor.dscalar('lr') + + M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=lr) + M.y = y + M.lr = lr + m = M.make(mode=mode) + m.y = 5.0 + m.lr = 0.01 + for i in xrange(100): + c = m.step_cost(3.0) + # print c, m.y + + assert c < 1.0e-5 + assert abs(m.y - (1.0 / 3)) < 1.0e-4 + + + #test that changing the lr has impact + + m.y = 5.0 + m.lr = 0.0 + for i in xrange(10): + c = m.step_cost(3.0) + # print c, m.y + + assert m.y == 5.0 + +def test_sgd_stepsize_none(): + + x = theano.tensor.dscalar('x') + y = theano.tensor.dscalar('y') + + M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y]) + M.y = y + m = M.make(mode=mode) + m.y = 5.0 + #there should be a learning rate here by default + assert m.stepsize is None + m.stepsize = 0.01 + for i in xrange(100): + c = m.step_cost(3.0) + # print c, m.y + + assert c < 1.0e-5 + assert abs(m.y - (1.0 / 3)) < 1.0e-4 + +if __name__ == '__main__': + test_sgd0()
--- a/pylearn/io/image_tiling.py Mon Aug 16 10:39:36 2010 -0400 +++ b/pylearn/io/image_tiling.py Fri Aug 20 09:31:39 2010 -0400 @@ -11,7 +11,8 @@ ndar *= 1.0 / (ndar.max()+eps) return ndar -def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0,0), +def tile_raster_images(X, img_shape, + tile_shape=None, tile_spacing=(1,1), scale_rows_to_unit_interval=True, output_pixel_vals=True ): @@ -27,16 +28,26 @@ :type img_shape: tuple; (height, width) :param img_shape: the original shape of each image :type tile_shape: tuple; (rows, cols) - :param tile_shape: the number of images to tile (rows, cols) + :param tile_shape: the number of images to tile (rows, cols) (Defaults to a square-ish + shape with the right area for the number of images) :returns: array suitable for viewing as an image. (See:`PIL.Image.fromarray`.) :rtype: a 2-d array with same dtype as X. """ + if isinstance(X, tuple): + n_images_in_x = X[0].shape[0] + else: + n_images_in_x = X.shape[0] + + if tile_shape is None: + tile_shape = most_square_shape(n_images_in_x) + assert len(img_shape) == 2 assert len(tile_shape) == 2 assert len(tile_spacing) == 2 + #out_shape is the shape in pixels of the returned image array out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing)] @@ -82,3 +93,25 @@ return out_array +def most_square_shape(N): + """rectangle (height, width) with area N that is closest to sqaure + """ + for i in xrange(int(numpy.sqrt(N)),0, -1): + if 0 == N % i: + return (i, N/i) + +def save_tiled_raster_images(tiled_img, filename): + """Save a a return value from `tile_raster_images` to `filename`. + + Returns the PIL image that was saved + """ + if tiled_img.ndim==2: + img = Image.fromarray( tiled_img, 'L') + elif tiled_img_ndim==3: + img = Image.fromarray( tiled_img, 'RGBA') + else: + raise TypeError('bad ndim', tiled_img) + + img.save(filename) + return img +
--- a/pylearn/lib/README.txt Mon Aug 16 10:39:36 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -This folder is for general, reusable code. -Code may be moved upstream from time to time.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/preprocessing/README.txt Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,2 @@ + +Module documentation is given by docstring in the __init__.py file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/preprocessing/__init__.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,13 @@ + +"""Preprocessing module + +This module provides little numpy functions (not necessarily Theano expressions) for one-time +pre-processing of data for modeling experiments. The module is a work in progress, but the +sort of thing that belongs here is: + + - PCA + - whitening (for images and video) + - TF/IDF (for text) + - MFCC (for audio) + +"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/preprocessing/fft_whiten.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,76 @@ +""" +Whitening algorithm used in Olshausen & Field, 1997. + +If you want to use some other images, there are a number of +preprocessing steps you need to consider beforehand. First, you +should make sure all images have approximately the same overall +contrast. One way of doing this is to normalize each image so that +the variance of the pixels is the same (i.e., 1). Then you will need +to prewhiten the images. For a full explanation of whitening see + +Olshausen BA, Field DJ (1997) Sparse Coding with an Overcomplete +Basis Set: A Strategy Employed by V1? Vision Research, 37: 3311-3325. + +Basically, to whiten an image of size NxN, you multiply by the filter +f*exp(-(f/f_0)^4) in the frequency domain, where f_0=0.4*N (f_0 is the +cutoff frequency of a lowpass filter that is combined with the +whitening filter). Once you have preprocessed a number of images this +way, all the same size, then you should combine them into one big N^2 +x M array, where M is the number of images. Then rescale this array +so that the average image variance is 0.1 (the parameters in sparsenet +are set assuming that the data variance is 0.1). Name this array +IMAGES, save it to a file for future use, and you should be off and +running. The following Matlab commands should do this: + + + N=image_size; + M=num_images; + + [fx fy]=meshgrid(-N/2:N/2-1,-N/2:N/2-1); + rho=sqrt(fx.*fx+fy.*fy); + f_0=0.4*N; + filt=rho.*exp(-(rho/f_0).^4); + + for i=1:M + image=get_image; % you will need to provide get_image + If=fft2(image); + imagew=real(ifft2(If.*fftshift(filt))); + IMAGES(:,i)=reshape(imagew,N^2,1); + end + + IMAGES=sqrt(0.1)*IMAGES/sqrt(mean(var(IMAGES))); + + save MY_IMAGES IMAGES + +""" + +import numpy as np +def whiten(X, f0=None, n=None): + """ + :param X: a 3D tensor n_images x rows x cols + :param f0: filter parameter (see docs) + :param n: filter parameter (see docs) + + :returns: 3D tensor n_images x rows x cols of filtered images. + """ + # May be mixed up with the size2 and size1s because matlab does things differnetly + R, C = X.shape[-2:] + if R %2: + raise NotImplementedError() + if C %2: + raise NotImplementedError() + + if f0 is None: + f0 = .4 * min(R,C) + if n is None: + n = 4 + + + fx,fy = np.mgrid[-R/2:R/2, -C/2:C/2] + rho=np.sqrt(fx**2 + fy**2) + filt=rho * np.exp( - (rho/f0)**4) + If = np.fft.fft2(X) + imagew=np.real(np.fft.ifft2(If * np.fft.fftshift(filt))) + assert imagew.shape == X.shape + return imagew +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/preprocessing/pca.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,119 @@ +""" + +There is potentially a lot of approaches to PCA, this file may get there eventually. + + +Elements of this implementation have been borrowed from the MDP toolkit: + mdp/nodes/pca_nodes.py +""" + +#TODO: estimate number of principle components by cross-validation (early stopping) + +import numpy +import scipy.linalg + +def diag_as_vector(x): + if x.ndim != 2: + raise TypeError('this diagonal is implemented only for matrices', x) + rval = x[0,:min(*x.shape)] + rval.strides = (rval.strides[0] + x.strides[0],) + return rval + + +def pca_from_cov(cov, lower=0, max_components=None, max_energy_fraction=None): + """Return (eigvals, eigvecs) of data with covariance `cov`. + + The returned eigvals will be a numpy ndarray vector. + The returned eigvecs will be a numpy ndarray matrix whose *cols* are the eigenvectors. + + This is recommended for retrieving many components from high-dimensional data. + + :param cov: data covariance matrix + :type cov: a numpy ndarray + """ + + w, v = scipy.linalg.eigh(a=cov, lower=lower) + # definition of eigh + # a * v[:,i] = w[i] * vr[:,i] + # v.H * v = identity + + + # total variance can be computed at this point: + # note that vartot == d.sum() + vartot = diag_as_vector(cov).sum() + + assert numpy.allclose(vartot, w.sum()) + + a = numpy.argsort(w)[::-1] + + w = w[a] + v = v[:,a] + + if max_components != None: + w = w[:max_components] + v = v[:, :max_components] + + if max_energy_fraction != None: + if not (0.0 <= max_energy_fraction <= 1.0): + raise ValueError('illegal value for max_energy_fraction', max_energy_fraction) + if max_energy_fraction < 1.0: + energy = 0 + i = 0 + while energy < max_energy_fraction * vartot: + energy += w[i] + i += 1 + if i < len(w): + w = w[:i] + v = v[:,:i] + + + return w,v + + +def pca_from_examples(X, max_components=None, max_energy_fraction=None, x_centered=False): + """Return (eigvals, eigvecs), centered_X of observations `X` (1-per-row) + + This function exists to wrap several algorithms for getting the principle components. + + :param max_components: + Return no more than this many principle components. + + :param max_energy_fraction: + Return [only] enough components to account for this fraction of the energy (aka + variance) in the observations. + + :param x_centered: + True means to consider X as having mean 0 (even if it actually doesn't!) + + """ + if x_centered: + centered_X = X + else: + centered_X = X - numpy.mean(X, axis=0) + return pca_from_cov( numpy.cov(centered_X.T), max_components=max_components, + max_energy_fraction=max_energy_fraction), centered_X + + +def pca_whiten((eigvals, eigvecs), centered_X,eps=1e-8): + """ + Return the projection of X onto it's principle components. + + The return value has the same number of rows as X, but the number of columns is the number + of principle components. Columns of the return value have mean 0, variance 1, and are + uncorrelated. + + :param pca: the (w,v) pair returned by e.g. pca_from_examples(X) + + """ + pca_of_X = numpy.dot(centered_X, eigvecs) + pca_of_X /= numpy.sqrt(eigvals)+eps + return pca_of_X + +def zca_whiten((eigvals, eigvecs), centered_X): + """Return the PCA of X but rotated back into the original vector space. + + See also fft_whiten.py + """ + pca_of_X = pca_whiten((eigvals,eigvecs), centered_X) + return numpy.dot(pca_of_X, eigvecs.T) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/sampling/README.txt Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,2 @@ +See __init__.py +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/sampling/__init__.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,17 @@ +"""Sampling + +This module [will] contain theano-related code for various sampling algorithms, such as for +example: + + - MCMC + + - [Block] Gibbs Sampling + + - Slice sampling + + - HMC + + - Tempering methods + + +"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/sampling/hmc.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,203 @@ +"""Hybrid / Hamiltonian Monte Carlo Sampling + +This algorithm is described in Radford Neal's PhD Thesis, pages 63--70. + +""" +import sys +import logging +import numpy as np +from theano import function, shared +from theano import tensor as TT +import theano.sparse #installs the sparse shared var handler + + +#TODO: +# Consider how to refactor this code so that the key functions /functionality are provided as +# theano expressions?? +# It could be limiting that the implementation requires that the position be a list of shared +# variables, and basically uses procedural logic on top of that. + +#TODO: +# Consider a heuristic for updating the *MASS* of the particles. We might want the mass to be +# such that the momentum is in the same range as the gradient on the energy. Look at Radford's +# recent book chapter, maybe there are hints. (2010). + +class HMC_sampler(object): + """Batch-wise Hybrid Monte-Carlo sampler + + + The `draw` function advances the markov chain and returns the current sample by calling + `simulate` and `get_position` in sequence. + + + :note: + + The 'mass' of so-called particles is taken to be 1, so that kinetic energy (K) is the sum + of squared velocities (p). + + :math:`K = \sum_i p_i^2 / 2` + + """ + + # Constants taken from Marc'Aurelio's 'train_mcRBM.py' file found in the code online for his + # paper. + stepsize_dec = 0.98 + stepsize_min = 0.001 + stepsize_max = 0.25 + stepsize_inc = 1.02 + avg_acceptance_slowness = 0.9 # used in geometric avg. 1.0 would be not moving at all + n_steps=20 + + def __init__(self, positions, energy_fn, + velocity=None, + initial_stepsize=0.01, + target_acceptance_rate=0.9, + seed=12345, + dtype=theano.config.floatX): + """ + :param positions: list of shared ndarray variables. + + :param energy: + + callable such that energy_fn(positions) + returns theano vector of energies. + The len of this vector is the batchsize. + + The sum of this energy vector must be differentiable (with theano.tensor.grad) with + respect to the positions for HMC sampling to work. + + """ + + self.stepsize = initial_stepsize + batchsize = positions[0].value.shape[0] + self.target_acceptance_rate = target_acceptance_rate + self.avg_acceptance_rate = self.target_acceptance_rate + self.s_rng = TT.shared_randomstreams.RandomStreams(seed) + self.positions = positions + if velocity is None: + self.velocity = [shared(np.zeros_like(q.value)) for q in self.positions] + else: + self.velocity = velocity + + self.start_positions = [shared(np.zeros_like(q.value)) for q in self.positions] + self.initial_hamiltonian = shared(np.zeros(batchsize).astype(dtype) + float('inf')) + + energy = energy_fn(positions) + # assuming mass is 1 + kinetic_energy = 0.5 * sum([(p**2).sum(axis=1) for p in self.velocity]) + hamiltonian = energy + kinetic_energy + + dE_dpos = TT.grad(energy.sum(), self.positions) + + s_stepsize = TT.scalar('stepsize') + + self.velocity_step = function([s_stepsize], + [dE_dpos[0].norm(2)], + updates=[(p, p - s_stepsize*dE_dq) for p,dE_dq in zip(self.velocity,dE_dpos)]) + self.position_step = function([s_stepsize], [], + updates=[(q, q + s_stepsize*p) for (q,p) in zip(self.positions, self.velocity)]) + + self.save_start_positions = function([], [], + updates=[(self.initial_hamiltonian, hamiltonian)] + \ + [(sq, q) for sq, q in zip(self.start_positions, self.positions)]) + + # sample the initial velocity from a + # gaussian with mean 0. + # Note: I think the fact that this distribution is symmetric about zero justifies not + # sampling forward versus backward dynamics. + self.randomize_velocity = function([], [], + updates=[(p, self.s_rng.normal(size=p.value.shape)) for p in self.velocity]) + + + # accept-reject according to Metropolis algo + + accept = TT.exp(self.initial_hamiltonian - hamiltonian) - self.s_rng.uniform(size=(batchsize,)) >= 0 + + self.accept_reject_positions = function([], accept.mean(), + updates=[ + (self.initial_hamiltonian, + TT.switch(accept, hamiltonian, self.initial_hamiltonian))] + [ + (q, + TT.switch(accept.dimshuffle(0, *(('x',)*(sq.ndim-1))), q, sq)) + for sq, q in zip(self.start_positions, self.positions)]) + + def simulate(self, n_steps=None): + if n_steps is None: + n_steps = self.n_steps + + # updates self.velocity with new random numbers + self.randomize_velocity() + self.save_start_positions() + + if 0: + # not necessary for random initial direction + if np.random.rand() > 0.5: + step_amount = self.stepsize + else: + step_amount = -self.stepsize + else: + step_amount = self.stepsize + + if 0: + print "initial", + print "kinetic E", self.prev_energy.value , + print (self.velocity[0].value**2).sum(axis=1), + print (self.velocity[0].value**2).sum(axis=1) + self.prev_energy.value + + + # Note on the order of leap-frog steps: + # + # the leap-frog algorithm can start with either position or velocity updates first, + # but one of them has to run an extra time (because of two half-steps). + # The position_step is cheap to evaluate, whereas the velocity_step is expensive, + # so we evaluate the position_step the extra time. + # + # At the same time, we cannot drop the last half-step update of the position, because + # the position is actually the terms we care about. + + #opening half-step in leap-frog algorithm + self.position_step(step_amount/2.0) + + # full leap-frog steps + for ss in range(n_steps): + self.velocity_step(step_amount) + if ss == n_steps-1: + # closing half-step in the leap-frog algorithm + self.position_step(step_amount/2.0) + else: + self.position_step(step_amount) + + acceptance_rate = self.accept_reject_positions() + self.avg_acceptance_rate = self.avg_acceptance_slowness * self.avg_acceptance_rate \ + + (1.0 - self.avg_acceptance_slowness) * acceptance_rate + + if self.avg_acceptance_rate < self.target_acceptance_rate: + self.stepsize = max(self.stepsize*self.stepsize_dec,self.stepsize_min) + else: + self.stepsize = min(self.stepsize*self.stepsize_inc,self.stepsize_max) + + if 0: + print "final kinetic E", self.prev_energy.value , + print (self.velocity[0].value**2).sum(axis=1), + print (self.velocity[0].value**2).sum(axis=1) + self.prev_energy.value + + + # post-condition: + # self.positions contains a new sample from our markov chain + + # it is not returned from this function because accessing the .value of a shared + # variable can require making a copy + # see `draw()` or `get_position` for that behaviour. + + def get_position(self): + return [q.value for q in self.positions] + + def draw(self, n_steps=None): + """Return the current sample in the Markov chain as a list of numpy arrays + + The size of the arrays will match the size of the `initial_position` argument to + __init__. + """ + self.simulate(n_steps=n_steps) + return self.get_position() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/sampling/mcmc.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,103 @@ +"""Metropolis-Hastings Monte Carlo + + +See also Hamiltonian Monte Carlo (aka Hybrid Monte Carlo) in the hmc.py file. + +""" +import sys +import logging +import numpy as np +from theano import function, shared +from theano import tensor as TT +import theano.sparse #installs the sparse shared var handler +from theano.printing import Print + +#TODO: +# Consider how to refactor this code so that the key functions /functionality are provided as +# theano expressions?? +# It could be limiting that the implementation requires that the position be a list of shared +# variables, and basically uses procedural logic on top of that. +# +# Theano can only do function optimization on one function at a time, so until these are +# written as update expressions, they must be compiled as their own functions and cannot be +# rolled into things the user is doing anyway. + +class MCMC_sampler(object): + """Generic Metropolis-Hastings Monte-Carlo sampler + + The `draw` function advances the markov chain and returns the current sample by calling + `simulate` and `get_position` in sequence. + + The stepsize is updated dynamically to + + """ + + # TODO: anyone has better guesses for these constants?? + stepsize_dec = 0.98 + stepsize_min = 0.001 + stepsize_max = 0.25 + stepsize_inc = 1.02 + target_acceptance_rate=0.7 + + avg_acceptance_slowness = 0.9 # used in geometric avg. 1.0 would be not moving at all + n_steps=1 + + def __init__(self, positions, energy_fn, + initial_stepsize=0.01, + seed=12345): + """ + :param positions: list of shared ndarray variables. + + :param energy: + + callable such that energy_fn(positions) + returns theano vector of energies. + The len of this vector is the batchsize. + """ + + batchsize = positions[0].value.shape[0] + self.s_rng = TT.shared_randomstreams.RandomStreams(seed) + self.positions = positions + self.prev_energy = shared(np.zeros(batchsize) + float('inf')) + self.avg_acceptance_rate = 0.5 + self.stepsize = initial_stepsize + + s_stepsize = TT.scalar('stepsize') + + new_positions = [p + s_stepsize * self.s_rng.normal(size=p.value.shape) + for p in self.positions] + + # accept-reject according to Metropolis-Hastings + + energy = energy_fn(new_positions) + accept = TT.exp(self.prev_energy - energy) - self.s_rng.uniform(size=(batchsize,)) >= 0 + + self.accept_reject_positions = function([s_stepsize], accept.mean(), + updates=[(self.prev_energy, TT.switch(accept, energy, self.prev_energy))] + [ + (q, TT.switch(accept.dimshuffle(0, *(('x',)*(q.ndim-1))), new_q, q)) + for q, new_q in zip(self.positions, new_positions)]) + + def simulate(self, n_steps=None): + if n_steps is None: + n_steps = self.n_steps + for ss in range(n_steps): + acceptance_rate = self.accept_reject_positions(self.stepsize) + self.avg_acceptance_rate = self.avg_acceptance_slowness * self.avg_acceptance_rate \ + + (1.0 - self.avg_acceptance_slowness) * acceptance_rate + if self.avg_acceptance_rate < self.target_acceptance_rate: + self.stepsize = max(self.stepsize*self.stepsize_dec,self.stepsize_min) + else: + self.stepsize = min(self.stepsize*self.stepsize_inc,self.stepsize_max) + + def get_position(self): + return [q.value for q in self.positions] + + def draw(self, n_steps=None): + """Return the current sample in the Markov chain as a list of numpy arrays + + The size of the arrays will match the size of the `initial_position` argument to + __init__. + """ + self.simulate(n_steps=n_steps) + return self.get_position() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/sampling/tests/test_hmc.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,65 @@ +from pylearn.sampling.hmc import * + +def _sampler_on_2d_gaussian(sampler_cls, burnin, n_samples): + batchsize=3 + + rng = np.random.RandomState(234) + + # + # Define a covariance and mu for a gaussian + # + tmp = rng.randn(2,2).astype(theano.config.floatX) + tmp[0] += tmp[1] #induce some covariance + cov = np.dot(tmp, tmp.T) + cov_inv = np.linalg.inv(cov).astype(theano.config.floatX) + mu = np.asarray([5, 9.5], dtype=theano.config.floatX) + + def gaussian_energy(xlist): + x, = xlist + return 0.5 * (TT.dot((x-mu),cov_inv)*(x-mu)).sum(axis=1) + + + position = shared(rng.randn(batchsize, 2).astype(theano.config.floatX)) + sampler = sampler_cls([position], gaussian_energy) + + print 'initial position', position.value + print 'initial stepsize', sampler.stepsize + + # DRAW SAMPLES + + samples = [sampler.draw() for r in xrange(burnin)] #burn-in + samples = np.asarray([sampler.draw() for r in xrange(n_samples)]) + + assert sampler.avg_acceptance_rate > 0 + assert sampler.avg_acceptance_rate < 1 + + # TEST THAT THEY ARE FROM THE RIGHT DISTRIBUTION + + # samples.shape == (1000, 1, 3, 2) + + print 'target mean:', mu + print 'empirical mean: ', samples.mean(axis=0)[0] + #assert np.all(abs(mu - samples.mean(axis=0)) < 1) + + + print 'final stepsize', sampler.stepsize + print 'final acceptance_rate', sampler.avg_acceptance_rate + + print 'target cov', cov + s = samples[:,0,0,:] + empirical_cov = np.cov(samples[:,0,0,:].T) + print '' + print 'cov/empirical_cov', cov/empirical_cov + empirical_cov = np.cov(samples[:,0,1,:].T) + print 'cov/empirical_cov', cov/empirical_cov + empirical_cov = np.cov(samples[:,0,2,:].T) + print 'cov/empirical_cov', cov/empirical_cov + return sampler + +def test_hmc(): + print ('HMC') + sampler = _sampler_on_2d_gaussian(HMC_sampler, burnin=3000/20, n_samples=90000/20) + assert abs(sampler.avg_acceptance_rate - sampler.target_acceptance_rate) < .1 + assert sampler.stepsize >= sampler.stepsize_min + assert sampler.stepsize <= sampler.stepsize_max +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/sampling/tests/test_mcmc.py Fri Aug 20 09:31:39 2010 -0400 @@ -0,0 +1,61 @@ +from pylearn.sampling.mcmc import * + +def _sampler_on_2d_gaussian(sampler_cls, burnin, n_samples): + batchsize=3 + + rng = np.random.RandomState(234) + + # + # Define a covariance and mu for a gaussian + # + tmp = rng.randn(2,2) + tmp[0] += tmp[1] #induce some covariance + cov = np.dot(tmp, tmp.T) + cov_inv = np.linalg.inv(cov) + mu = np.asarray([5, 9.5], dtype=theano.config.floatX) + + def gaussian_energy(xlist): + x, = xlist + return 0.5 * (TT.dot((x-mu),cov_inv)*(x-mu)).sum(axis=1) + + + position = shared(rng.randn(batchsize, 2).astype(theano.config.floatX)) + sampler = sampler_cls([position], gaussian_energy) + + print 'initial position', position.value + print 'initial stepsize', sampler.stepsize + + # DRAW SAMPLES + + samples = [sampler.draw() for r in xrange(burnin)] #burn-in + samples = np.asarray([sampler.draw() for r in xrange(n_samples)]) + + assert sampler.avg_acceptance_rate > 0 + assert sampler.avg_acceptance_rate < 1 + + # TEST THAT THEY ARE FROM THE RIGHT DISTRIBUTION + + # samples.shape == (1000, 1, 3, 2) + + print 'target mean:', mu + print 'empirical mean: ', samples.mean(axis=0)[0] + #assert np.all(abs(mu - samples.mean(axis=0)) < 1) + + + print 'final stepsize', sampler.stepsize + print 'final acceptance_rate', sampler.avg_acceptance_rate + + print 'target cov', cov + s = samples[:,0,0,:] + empirical_cov = np.cov(samples[:,0,0,:].T) + print '' + print 'cov/empirical_cov', cov/empirical_cov + empirical_cov = np.cov(samples[:,0,1,:].T) + print 'cov/empirical_cov', cov/empirical_cov + empirical_cov = np.cov(samples[:,0,2,:].T) + print 'cov/empirical_cov', cov/empirical_cov + return sampler + +def test_mcmc(): + print ('MCMC') + sampler = _sampler_on_2d_gaussian(MCMC_sampler, burnin=3000, n_samples=90000)