Mercurial > pylearn
diff make_test_datasets.py @ 437:2d8490d76b3e
added two methods to make_test_datasets
author | Olivier Breuleux <breuleuo@iro.umontreal.ca> |
---|---|
date | Wed, 06 Aug 2008 19:39:36 -0400 |
parents | 8e4d2ebd816a |
children | 3eb59514b534 |
line wrap: on
line diff
--- a/make_test_datasets.py Wed Aug 06 19:39:14 2008 -0400 +++ b/make_test_datasets.py Wed Aug 06 19:39:36 2008 -0400 @@ -1,4 +1,4 @@ -from pylearn.dataset import ArrayDataSet +import dataset from shapeset.dset import Polygons from linear_regression import linear_predictor from kernel_regression import kernel_predictor @@ -9,7 +9,8 @@ to test different learning algorithms. """ -def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10)): + +def make_triangles_rectangles_online_dataset(image_size=(10,10)): """ Make a binary classification dataset to discriminate triangle images from rectangle images. """ @@ -19,18 +20,47 @@ n=len(n_vertices) targets = ndarray((n,1),dtype='float64') for i in xrange(n): - targets[i,0] = array([0. if vertices[i]==3 else 1.],dtype='float64') + targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64') return images.reshape(len(images),images[0].size).astype('float64'),targets - return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),True) + return dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]) + + p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) + trainset=convert_dataset(p) + return trainset + + +def make_triangles_rectangles_dataset(n_examples=600,image_size=(10,10), cache = True): + """ + Make a binary classification dataset to discriminate triangle images from rectangle images. + """ + def convert_dataset(dset): + # convert the n_vert==3 into target==0 and n_vert==4 into target==1 + def mapf(images,n_vertices): + n=len(n_vertices) + targets = ndarray((n,1),dtype='float64') + for i in xrange(n): + targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64') + return images.reshape(len(images),images[0].size).astype('float64'),targets + return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),cache) p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) data = p.subset[0:n_examples] - save_polygon_data(data,"shapes") - n_train=int(n_examples*train_frac) + trainset=convert_dataset(data.subset[0:n_examples]) + return trainset + + +def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10), cache = True): + """ + Make two binary classification datasets to discriminate triangle images from rectangle images. + The first one is the training set, the second is the test set. + """ + data = make_triangles_rectangles_dataset(n_examples=n_examples,image_size=image_size, cache = cache) + n_train = int(n_examples*train_frac) trainset=convert_dataset(data.subset[0:n_train]) testset=convert_dataset(data.subset[n_train:n_examples]) return trainset,testset + def make_artificial_datasets_from_function(n_inputs=1, n_targets=1, n_examples=20,