comparison make_test_datasets.py @ 437:2d8490d76b3e

added two methods to make_test_datasets
author Olivier Breuleux <breuleuo@iro.umontreal.ca>
date Wed, 06 Aug 2008 19:39:36 -0400
parents 8e4d2ebd816a
children 3eb59514b534
comparison
equal deleted inserted replaced
436:d7ed780364b3 437:2d8490d76b3e
1 from pylearn.dataset import ArrayDataSet 1 import dataset
2 from shapeset.dset import Polygons 2 from shapeset.dset import Polygons
3 from linear_regression import linear_predictor 3 from linear_regression import linear_predictor
4 from kernel_regression import kernel_predictor 4 from kernel_regression import kernel_predictor
5 from numpy import * 5 from numpy import *
6 6
7 """ 7 """
8 General-purpose code to generate artificial datasets that can be used 8 General-purpose code to generate artificial datasets that can be used
9 to test different learning algorithms. 9 to test different learning algorithms.
10 """ 10 """
11 11
12 def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10)): 12
13 def make_triangles_rectangles_online_dataset(image_size=(10,10)):
13 """ 14 """
14 Make a binary classification dataset to discriminate triangle images from rectangle images. 15 Make a binary classification dataset to discriminate triangle images from rectangle images.
15 """ 16 """
16 def convert_dataset(dset): 17 def convert_dataset(dset):
17 # convert the n_vert==3 into target==0 and n_vert==4 into target==1 18 # convert the n_vert==3 into target==0 and n_vert==4 into target==1
18 def mapf(images,n_vertices): 19 def mapf(images,n_vertices):
19 n=len(n_vertices) 20 n=len(n_vertices)
20 targets = ndarray((n,1),dtype='float64') 21 targets = ndarray((n,1),dtype='float64')
21 for i in xrange(n): 22 for i in xrange(n):
22 targets[i,0] = array([0. if vertices[i]==3 else 1.],dtype='float64') 23 targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64')
23 return images.reshape(len(images),images[0].size).astype('float64'),targets 24 return images.reshape(len(images),images[0].size).astype('float64'),targets
24 return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),True) 25 return dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"])
26
27 p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9)
28 trainset=convert_dataset(p)
29 return trainset
30
31
32 def make_triangles_rectangles_dataset(n_examples=600,image_size=(10,10), cache = True):
33 """
34 Make a binary classification dataset to discriminate triangle images from rectangle images.
35 """
36 def convert_dataset(dset):
37 # convert the n_vert==3 into target==0 and n_vert==4 into target==1
38 def mapf(images,n_vertices):
39 n=len(n_vertices)
40 targets = ndarray((n,1),dtype='float64')
41 for i in xrange(n):
42 targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64')
43 return images.reshape(len(images),images[0].size).astype('float64'),targets
44 return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),cache)
25 45
26 p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) 46 p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9)
27 data = p.subset[0:n_examples] 47 data = p.subset[0:n_examples]
28 save_polygon_data(data,"shapes") 48 trainset=convert_dataset(data.subset[0:n_examples])
29 n_train=int(n_examples*train_frac) 49 return trainset
50
51
52 def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10), cache = True):
53 """
54 Make two binary classification datasets to discriminate triangle images from rectangle images.
55 The first one is the training set, the second is the test set.
56 """
57 data = make_triangles_rectangles_dataset(n_examples=n_examples,image_size=image_size, cache = cache)
58 n_train = int(n_examples*train_frac)
30 trainset=convert_dataset(data.subset[0:n_train]) 59 trainset=convert_dataset(data.subset[0:n_train])
31 testset=convert_dataset(data.subset[n_train:n_examples]) 60 testset=convert_dataset(data.subset[n_train:n_examples])
32 return trainset,testset 61 return trainset,testset
62
33 63
34 def make_artificial_datasets_from_function(n_inputs=1, 64 def make_artificial_datasets_from_function(n_inputs=1,
35 n_targets=1, 65 n_targets=1,
36 n_examples=20, 66 n_examples=20,
37 train_frac=0.5, 67 train_frac=0.5,