changeset 948:0b4c39c33eb9

Toy dataset used in Desjardins et al. (AISTATS 2010). 4x4 binary images with 4 basic modes. Useful to test mixing problems in RBMs
author gdesjardins
date Mon, 16 Aug 2010 10:38:55 -0400
parents a75bf0aca18f
children d944e1c26a57
files pylearn/datasets/test_modes.py
diffstat 1 files changed, 54 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/test_modes.py	Mon Aug 16 10:38:55 2010 -0400
@@ -0,0 +1,54 @@
+from pylearn.datasets import Dataset
+import numpy
+
+def neal94_AC(p=0.01, size=10000, seed=238904, w=[.25,.25,.25,.25]):
+    """
+    Generates the dataset used in [Desjardins et al, AISTATS 2010]. The dataset
+    is composed of 4x4 binary images with four basic modes: full black, full
+    white, and [black,white] and [white,black] images. Modes are created by
+    drawing each pixel from the 4 basic modes with a bit-flip probability p.
+    
+    :param p: probability of flipping each pixel p: scalar, list (one per mode) 
+    :param size: total size of the dataset
+    :param seed: seed used to draw random samples
+    :param w: weight of each mode within the dataset
+    """
+
+    # can modify the p-value separately for each mode
+    if not isinstance(p, (list,tuple)):
+        p = [p for i in w]
+
+    rng = numpy.random.RandomState(seed)
+    data = numpy.zeros((size,16))
+
+    # mode 1: black image
+    B = numpy.zeros((1,16))
+    # mode 2: white image
+    W = numpy.ones((1,16))
+    # mode 3: white image with black stripe in left-hand side of image
+    BW = numpy.ones((4,4))
+    BW[:, :2] = 0
+    BW = BW.reshape(1,16)
+    # mode 4: white image with black stripe in right-hand side of image
+    WB = numpy.zeros((4,4))
+    WB[:, :2] = 1
+    WB = WB.reshape(1,16)
+
+    modes = [B,W,BW,WB]
+    data = numpy.zeros((0,16))
+    
+    # create permutations of basic modes with bitflip prob p
+    for i, m in enumerate(modes):
+        n = size * w[i]
+        bitflip = rng.binomial(1,p[i],size=(n,16))
+        d = numpy.abs(numpy.repeat(m, n, axis=0) - bitflip)
+        data = numpy.vstack((data,d))
+
+    y = numpy.zeros((size,1))
+    
+    set = Dataset()
+    set.train = Dataset.Obj(x=data, y=y)
+    set.test = None
+    set.img_shape = (4,4)
+
+    return set