# HG changeset patch # User Olivier Breuleux # Date 1218065976 14400 # Node ID 2d8490d76b3e3147323ae13df9f5d16574117c73 # Parent d7ed780364b3e2f99172eabecd78899738b279c8 added two methods to make_test_datasets diff -r d7ed780364b3 -r 2d8490d76b3e make_test_datasets.py --- a/make_test_datasets.py Wed Aug 06 19:39:14 2008 -0400 +++ b/make_test_datasets.py Wed Aug 06 19:39:36 2008 -0400 @@ -1,4 +1,4 @@ -from pylearn.dataset import ArrayDataSet +import dataset from shapeset.dset import Polygons from linear_regression import linear_predictor from kernel_regression import kernel_predictor @@ -9,7 +9,8 @@ to test different learning algorithms. """ -def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10)): + +def make_triangles_rectangles_online_dataset(image_size=(10,10)): """ Make a binary classification dataset to discriminate triangle images from rectangle images. """ @@ -19,18 +20,47 @@ n=len(n_vertices) targets = ndarray((n,1),dtype='float64') for i in xrange(n): - targets[i,0] = array([0. if vertices[i]==3 else 1.],dtype='float64') + targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64') return images.reshape(len(images),images[0].size).astype('float64'),targets - return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),True) + return dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]) + + p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) + trainset=convert_dataset(p) + return trainset + + +def make_triangles_rectangles_dataset(n_examples=600,image_size=(10,10), cache = True): + """ + Make a binary classification dataset to discriminate triangle images from rectangle images. + """ + def convert_dataset(dset): + # convert the n_vert==3 into target==0 and n_vert==4 into target==1 + def mapf(images,n_vertices): + n=len(n_vertices) + targets = ndarray((n,1),dtype='float64') + for i in xrange(n): + targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64') + return images.reshape(len(images),images[0].size).astype('float64'),targets + return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),cache) p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) data = p.subset[0:n_examples] - save_polygon_data(data,"shapes") - n_train=int(n_examples*train_frac) + trainset=convert_dataset(data.subset[0:n_examples]) + return trainset + + +def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10), cache = True): + """ + Make two binary classification datasets to discriminate triangle images from rectangle images. + The first one is the training set, the second is the test set. + """ + data = make_triangles_rectangles_dataset(n_examples=n_examples,image_size=image_size, cache = cache) + n_train = int(n_examples*train_frac) trainset=convert_dataset(data.subset[0:n_train]) testset=convert_dataset(data.subset[n_train:n_examples]) return trainset,testset + def make_artificial_datasets_from_function(n_inputs=1, n_targets=1, n_examples=20,