# HG changeset patch # User gdesjardins # Date 1283378323 14400 # Node ID d19e3cb809c194bf31d3ea4dcf54aa5e6ea4df69 # Parent 2a53384d97429977f89a2f82543c29a867d48f2f Created online dataset, for testing PCD style learning algorithms. Image size is parametrizable, as well as the number of modes and their respective depth. diff -r 2a53384d9742 -r d19e3cb809c1 pylearn/datasets/test_modes.py --- a/pylearn/datasets/test_modes.py Mon Aug 23 16:05:31 2010 -0400 +++ b/pylearn/datasets/test_modes.py Wed Sep 01 17:58:43 2010 -0400 @@ -52,3 +52,82 @@ set.img_shape = (4,4) return set + +def n_modes(n_modes=4, img_shape=(4,4), size=10000, + p=0.001, w=None, seed=238904): + """ + Generates the dataset used in [Desjardins et al, AISTATS 2010]. The dataset + is composed of 4x4 binary images with four basic modes: full black, full + white, and [black,white] and [white,black] images. Modes are created by + drawing each pixel from the 4 basic modes with a bit-flip probability p. + + :param p: probability of flipping each pixel p: scalar, list (one per mode) + :param size: total size of the dataset + :param seed: seed used to draw random samples + :param w: weight of each mode within the dataset + """ + img_size = numpy.prod(img_shape) + + # can modify the p-value separately for each mode + if not isinstance(p, (list,tuple)): + p = [p for i in xrange(n_modes)] + + rng = numpy.random.RandomState(seed) + data = numpy.zeros((0,img_size)) + + for i, m in enumerate(range(n_modes)): + base = rng.randint(0,2,size=(1,img_size)) + + mode_size = w[i]*size if w is not None else size/numpy.float(n_modes) + # create permutations of basic modes with bitflip prob p + + bitflip = rng.binomial(1,p[i],size=(mode_size, img_size)) + d = numpy.abs(numpy.repeat(base, mode_size, axis=0) - bitflip) + data = numpy.vstack((data,d)) + + y = numpy.zeros((size,1)) + + set = Dataset() + set.train = Dataset.Obj(x=data, y=y) + set.test = None + set.img_shape = (4,4) + + return set + + +class OnlineModes: + + def __init__(self, n_modes, img_shape, seed=238904, + min_p=1e-4, max_p=1e-1, + min_w=0., max_w=1.): + + self.n_modes = n_modes + self.img_shape = img_shape + self.rng = numpy.random.RandomState(seed) + self.img_size = numpy.prod(img_shape) + + # generate random p, w values + self.p = min_p + self.rng.rand(n_modes) * (max_p - min_p) + w = min_w + self.rng.rand(n_modes) * (max_w - min_w) + self.w = w / numpy.sum(w) + self.sort_w_idx = numpy.argsort(self.w) + + self.modes = self.rng.randint(0,2,size=(n_modes,self.img_size)) + + def __iter__(self): return self + + def next(self, batch_size=1): + + modes = self.rng.multinomial(1, self.w, size=batch_size) + data = numpy.zeros((batch_size, self.img_size)) + + modes_i = [] + + for bi, mode in enumerate(modes): + mi, = numpy.where(mode != 0) + bitflip = self.rng.binomial(1,self.p[mi], size=(1, self.img_size)) + data[bi] = numpy.abs(self.modes[mi] - bitflip) + + self.data = data + + return data