Mercurial > pylearn
comparison make_test_datasets.py @ 441:a2e8de4669cd
merge
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Thu, 21 Aug 2008 13:55:43 -0400 |
parents | 2d8490d76b3e |
children | 3eb59514b534 |
comparison
equal
deleted
inserted
replaced
440:18dbc1c11647 | 441:a2e8de4669cd |
---|---|
1 from pylearn.dataset import ArrayDataSet | 1 import dataset |
2 from shapeset.dset import Polygons | 2 from shapeset.dset import Polygons |
3 from linear_regression import linear_predictor | 3 from linear_regression import linear_predictor |
4 from kernel_regression import kernel_predictor | 4 from kernel_regression import kernel_predictor |
5 from numpy import * | 5 from numpy import * |
6 | 6 |
7 """ | 7 """ |
8 General-purpose code to generate artificial datasets that can be used | 8 General-purpose code to generate artificial datasets that can be used |
9 to test different learning algorithms. | 9 to test different learning algorithms. |
10 """ | 10 """ |
11 | 11 |
12 def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10)): | 12 |
13 def make_triangles_rectangles_online_dataset(image_size=(10,10)): | |
13 """ | 14 """ |
14 Make a binary classification dataset to discriminate triangle images from rectangle images. | 15 Make a binary classification dataset to discriminate triangle images from rectangle images. |
15 """ | 16 """ |
16 def convert_dataset(dset): | 17 def convert_dataset(dset): |
17 # convert the n_vert==3 into target==0 and n_vert==4 into target==1 | 18 # convert the n_vert==3 into target==0 and n_vert==4 into target==1 |
18 def mapf(images,n_vertices): | 19 def mapf(images,n_vertices): |
19 n=len(n_vertices) | 20 n=len(n_vertices) |
20 targets = ndarray((n,1),dtype='float64') | 21 targets = ndarray((n,1),dtype='float64') |
21 for i in xrange(n): | 22 for i in xrange(n): |
22 targets[i,0] = array([0. if vertices[i]==3 else 1.],dtype='float64') | 23 targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64') |
23 return images.reshape(len(images),images[0].size).astype('float64'),targets | 24 return images.reshape(len(images),images[0].size).astype('float64'),targets |
24 return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),True) | 25 return dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]) |
26 | |
27 p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) | |
28 trainset=convert_dataset(p) | |
29 return trainset | |
30 | |
31 | |
32 def make_triangles_rectangles_dataset(n_examples=600,image_size=(10,10), cache = True): | |
33 """ | |
34 Make a binary classification dataset to discriminate triangle images from rectangle images. | |
35 """ | |
36 def convert_dataset(dset): | |
37 # convert the n_vert==3 into target==0 and n_vert==4 into target==1 | |
38 def mapf(images,n_vertices): | |
39 n=len(n_vertices) | |
40 targets = ndarray((n,1),dtype='float64') | |
41 for i in xrange(n): | |
42 targets[i,0] = array([0. if n_vertices[i]==3 else 1.],dtype='float64') | |
43 return images.reshape(len(images),images[0].size).astype('float64'),targets | |
44 return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),cache) | |
25 | 45 |
26 p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) | 46 p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) |
27 data = p.subset[0:n_examples] | 47 data = p.subset[0:n_examples] |
28 save_polygon_data(data,"shapes") | 48 trainset=convert_dataset(data.subset[0:n_examples]) |
29 n_train=int(n_examples*train_frac) | 49 return trainset |
50 | |
51 | |
52 def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10), cache = True): | |
53 """ | |
54 Make two binary classification datasets to discriminate triangle images from rectangle images. | |
55 The first one is the training set, the second is the test set. | |
56 """ | |
57 data = make_triangles_rectangles_dataset(n_examples=n_examples,image_size=image_size, cache = cache) | |
58 n_train = int(n_examples*train_frac) | |
30 trainset=convert_dataset(data.subset[0:n_train]) | 59 trainset=convert_dataset(data.subset[0:n_train]) |
31 testset=convert_dataset(data.subset[n_train:n_examples]) | 60 testset=convert_dataset(data.subset[n_train:n_examples]) |
32 return trainset,testset | 61 return trainset,testset |
62 | |
33 | 63 |
34 def make_artificial_datasets_from_function(n_inputs=1, | 64 def make_artificial_datasets_from_function(n_inputs=1, |
35 n_targets=1, | 65 n_targets=1, |
36 n_examples=20, | 66 n_examples=20, |
37 train_frac=0.5, | 67 train_frac=0.5, |