changeset 1003:d19e3cb809c1

Created online dataset, for testing PCD style learning algorithms. Image size is parametrizable, as well as the number of modes and their respective depth.
author gdesjardins
date Wed, 01 Sep 2010 17:58:43 -0400
parents 2a53384d9742
children 3977ecd49431
files pylearn/datasets/test_modes.py
diffstat 1 files changed, 79 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/datasets/test_modes.py	Mon Aug 23 16:05:31 2010 -0400
+++ b/pylearn/datasets/test_modes.py	Wed Sep 01 17:58:43 2010 -0400
@@ -52,3 +52,82 @@
     set.img_shape = (4,4)
 
     return set
+
+def n_modes(n_modes=4, img_shape=(4,4), size=10000,
+            p=0.001, w=None, seed=238904):
+    """
+    Generates the dataset used in [Desjardins et al, AISTATS 2010]. The dataset
+    is composed of 4x4 binary images with four basic modes: full black, full
+    white, and [black,white] and [white,black] images. Modes are created by
+    drawing each pixel from the 4 basic modes with a bit-flip probability p.
+    
+    :param p: probability of flipping each pixel p: scalar, list (one per mode) 
+    :param size: total size of the dataset
+    :param seed: seed used to draw random samples
+    :param w: weight of each mode within the dataset
+    """
+    img_size = numpy.prod(img_shape)
+
+    # can modify the p-value separately for each mode
+    if not isinstance(p, (list,tuple)):
+        p = [p for i in xrange(n_modes)]
+
+    rng = numpy.random.RandomState(seed)
+    data = numpy.zeros((0,img_size))
+
+    for i, m in enumerate(range(n_modes)):
+        base = rng.randint(0,2,size=(1,img_size))
+
+        mode_size = w[i]*size if w is not None else size/numpy.float(n_modes)
+        # create permutations of basic modes with bitflip prob p
+
+        bitflip = rng.binomial(1,p[i],size=(mode_size, img_size))
+        d = numpy.abs(numpy.repeat(base, mode_size, axis=0) - bitflip)
+        data = numpy.vstack((data,d))
+
+    y = numpy.zeros((size,1))
+    
+    set = Dataset()
+    set.train = Dataset.Obj(x=data, y=y)
+    set.test = None
+    set.img_shape = (4,4)
+
+    return set
+
+
+class OnlineModes:
+
+    def __init__(self, n_modes, img_shape, seed=238904, 
+                 min_p=1e-4, max_p=1e-1,
+                 min_w=0., max_w=1.):
+
+        self.n_modes = n_modes
+        self.img_shape = img_shape
+        self.rng = numpy.random.RandomState(seed)
+        self.img_size = numpy.prod(img_shape)
+
+        # generate random p, w values
+        self.p = min_p + self.rng.rand(n_modes) * (max_p - min_p)
+        w = min_w + self.rng.rand(n_modes) * (max_w - min_w)
+        self.w = w / numpy.sum(w)
+        self.sort_w_idx = numpy.argsort(self.w)
+
+        self.modes = self.rng.randint(0,2,size=(n_modes,self.img_size))
+
+    def __iter__(self): return self
+
+    def next(self, batch_size=1):
+
+        modes = self.rng.multinomial(1, self.w, size=batch_size)
+        data = numpy.zeros((batch_size, self.img_size))
+
+        modes_i = []
+
+        for bi, mode in enumerate(modes):
+            mi, = numpy.where(mode != 0)
+            bitflip = self.rng.binomial(1,self.p[mi], size=(1, self.img_size))
+            data[bi] = numpy.abs(self.modes[mi] - bitflip)
+
+        self.data = data
+
+        return data