Mercurial > pylearn
view pylearn/datasets/test_modes.py @ 1391:124b939d997f
* removed temporary caltech_silhouette2 dataset
* minor tweak to peaked_modes dataset (used for tempering stuff)
author | gdesjardins |
---|---|
date | Mon, 20 Dec 2010 18:08:48 -0500 |
parents | 3efd0effb2a7 |
children |
line wrap: on
line source
from pylearn.datasets import Dataset import numpy def neal94_AC(p=0.01, size=10000, seed=238904, w=[.25,.25,.25,.25]): """ Generates the dataset used in [Desjardins et al, AISTATS 2010]. The dataset is composed of 4x4 binary images with four basic modes: full black, full white, and [black,white] and [white,black] images. Modes are created by drawing each pixel from the 4 basic modes with a bit-flip probability p. :param p: probability of flipping each pixel p: scalar, list (one per mode) :param size: total size of the dataset :param seed: seed used to draw random samples :param w: weight of each mode within the dataset """ # can modify the p-value separately for each mode if not isinstance(p, (list,tuple)): p = [p for i in w] rng = numpy.random.RandomState(seed) data = numpy.zeros((size,16)) # mode 1: black image B = numpy.zeros((1,16)) # mode 2: white image W = numpy.ones((1,16)) # mode 3: white image with black stripe in left-hand side of image BW = numpy.ones((4,4)) BW[:, :2] = 0 BW = BW.reshape(1,16) # mode 4: white image with black stripe in right-hand side of image WB = numpy.zeros((4,4)) WB[:, :2] = 1 WB = WB.reshape(1,16) modes = [B,W,BW,WB] data = numpy.zeros((0,16)) # create permutations of basic modes with bitflip prob p for i, m in enumerate(modes): n = size * w[i] bitflip = rng.binomial(1,p[i],size=(n,16)) d = numpy.abs(numpy.repeat(m, n, axis=0) - bitflip) data = numpy.vstack((data,d)) y = numpy.zeros((size,1)) set = Dataset() set.train = Dataset.Obj(x=data, y=y) set.test = None set.img_shape = (4,4) return set def n_modes(n_modes=4, img_shape=(4,4), size=10000, p=0.001, w=None, seed=238904): """ Generates the dataset used in [Desjardins et al, AISTATS 2010]. The dataset is composed of 4x4 binary images with four basic modes: full black, full white, and [black,white] and [white,black] images. Modes are created by drawing each pixel from the 4 basic modes with a bit-flip probability p. :param p: probability of flipping each pixel p: scalar, list (one per mode) :param size: total size of the dataset :param seed: seed used to draw random samples :param w: weight of each mode within the dataset """ img_size = numpy.prod(img_shape) # can modify the p-value separately for each mode if not isinstance(p, (list,tuple)): p = [p for i in xrange(n_modes)] rng = numpy.random.RandomState(seed) data = numpy.zeros((0,img_size)) for i, m in enumerate(range(n_modes)): base = rng.randint(0,2,size=(1,img_size)) mode_size = w[i]*size if w is not None else size/numpy.float(n_modes) # create permutations of basic modes with bitflip prob p bitflip = rng.binomial(1,p[i],size=(mode_size, img_size)) d = numpy.abs(numpy.repeat(base, mode_size, axis=0) - bitflip) data = numpy.vstack((data,d)) y = numpy.zeros((size,1)) set = Dataset() set.train = Dataset.Obj(x=data, y=y) set.test = None set.img_shape = (4,4) return set class OnlineModes: def __init__(self, n_modes, img_shape, seed=238904, min_p=1e-4, max_p=1e-1, min_w=0., max_w=1., w = None, p = None): self.n_modes = n_modes self.img_shape = img_shape self.rng = numpy.random.RandomState(seed) self.img_size = numpy.prod(img_shape) # generate random p, w values if p is None: p = min_p + self.rng.rand(n_modes) * (max_p - min_p) self.p = p if w is None: w = min_w + self.rng.rand(n_modes) * (max_w - min_w) self.w = w / numpy.sum(w) self.sort_w_idx = numpy.argsort(self.w) self.modes = self.rng.randint(0,2,size=(n_modes,self.img_size)) def __iter__(self): return self def next(self, batch_size=1): modes = self.rng.multinomial(1, self.w, size=batch_size) data = numpy.zeros((batch_size, self.img_size)) modes_i = [] for bi, mode in enumerate(modes): mi, = numpy.where(mode != 0) modes_i.append(mi) bitflip = self.rng.binomial(1,self.p[mi], size=(1, self.img_size)) data[bi] = numpy.abs(self.modes[mi] - bitflip) self.data = data self.data_modes = modes_i return data