diff make_test_datasets.py @ 441:a2e8de4669cd

merge
author Pascal Lamblin <lamblinp@iro.umontreal.ca>
date Thu, 21 Aug 2008 13:55:43 -0400
parents 2d8490d76b3e
children 3eb59514b534
line wrap: on
line diff
--- a/make_test_datasets.py	Thu Aug 21 13:55:16 2008 -0400
+++ b/make_test_datasets.py	Thu Aug 21 13:55:43 2008 -0400
@@ -1,4 +1,4 @@
-from pylearn.dataset import ArrayDataSet
+import dataset
 from shapeset.dset import Polygons
 from linear_regression import linear_predictor
 from kernel_regression import kernel_predictor
@@ -9,7 +9,8 @@
 to test different learning algorithms.
 """
 
-def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10)):
+
+def make_triangles_rectangles_online_dataset(image_size=(10,10)):
     """
     Make a binary classification dataset to discriminate triangle images from rectangle images.
     """
@@ -19,18 +20,47 @@
             n=len(n_vertices)
             targets = ndarray((n,1),dtype='float64')
             for i in xrange(n):
-                targets[i,0] = array([0. if vertices[i]==3 else 1.],dtype='float64')
+                targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64')
             return images.reshape(len(images),images[0].size).astype('float64'),targets
-        return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),True)
+        return dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"])
+  
+    p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9)
+    trainset=convert_dataset(p)
+    return trainset
+
+
+def make_triangles_rectangles_dataset(n_examples=600,image_size=(10,10), cache = True):
+    """
+    Make a binary classification dataset to discriminate triangle images from rectangle images.
+    """
+    def convert_dataset(dset):
+        # convert the n_vert==3 into target==0 and n_vert==4 into target==1
+        def mapf(images,n_vertices):
+            n=len(n_vertices)
+            targets = ndarray((n,1),dtype='float64')
+            for i in xrange(n):
+                targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64')
+            return images.reshape(len(images),images[0].size).astype('float64'),targets
+        return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),cache)
   
     p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9)
     data = p.subset[0:n_examples]
-    save_polygon_data(data,"shapes")
-    n_train=int(n_examples*train_frac)
+    trainset=convert_dataset(data.subset[0:n_examples])
+    return trainset
+
+
+def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10), cache = True):
+    """
+    Make two binary classification datasets to discriminate triangle images from rectangle images.
+    The first one is the training set, the second is the test set.
+    """
+    data = make_triangles_rectangles_dataset(n_examples=n_examples,image_size=image_size, cache = cache)
+    n_train = int(n_examples*train_frac)
     trainset=convert_dataset(data.subset[0:n_train])
     testset=convert_dataset(data.subset[n_train:n_examples])
     return trainset,testset
 
+
 def make_artificial_datasets_from_function(n_inputs=1,
                                            n_targets=1,
                                            n_examples=20,