Mercurial > pylearn
annotate make_test_datasets.py @ 436:d7ed780364b3
image_tools
author | Olivier Breuleux <breuleuo@iro.umontreal.ca> |
---|---|
date | Wed, 06 Aug 2008 19:39:14 -0400 |
parents | 8e4d2ebd816a |
children | 2d8490d76b3e |
rev | line source |
---|---|
431
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
1 from pylearn.dataset import ArrayDataSet |
432
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
2 from shapeset.dset import Polygons |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
3 from linear_regression import linear_predictor |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
4 from kernel_regression import kernel_predictor |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
5 from numpy import * |
431
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
6 |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
7 """ |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
8 General-purpose code to generate artificial datasets that can be used |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
9 to test different learning algorithms. |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
10 """ |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
11 |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
12 def make_triangles_rectangles_datasets(n_examples=600,train_frac=0.5,image_size=(10,10)): |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
13 """ |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
14 Make a binary classification dataset to discriminate triangle images from rectangle images. |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
15 """ |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
16 def convert_dataset(dset): |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
17 # convert the n_vert==3 into target==0 and n_vert==4 into target==1 |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
18 def mapf(images,n_vertices): |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
19 n=len(n_vertices) |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
20 targets = ndarray((n,1),dtype='float64') |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
21 for i in xrange(n): |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
22 targets[i,0] = array([0. if vertices[i]==3 else 1.],dtype='float64') |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
23 return images.reshape(len(images),images[0].size).astype('float64'),targets |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
24 return dataset.CachedDataSet(dataset.ApplyFunctionDataSet(dset("image","nvert"),mapf,["input","target"]),True) |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
25 |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
26 p=Polygons(image_size,[3,4],fg_min=1./255,fg_max=1./255,rot_max=1.,scale_min=0.35,scale_max=0.9,pos_min=0.1, pos_max=0.9) |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
27 data = p.subset[0:n_examples] |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
28 save_polygon_data(data,"shapes") |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
29 n_train=int(n_examples*train_frac) |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
30 trainset=convert_dataset(data.subset[0:n_train]) |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
31 testset=convert_dataset(data.subset[n_train:n_examples]) |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
32 return trainset,testset |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
33 |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
34 def make_artificial_datasets_from_function(n_inputs=1, |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
35 n_targets=1, |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
36 n_examples=20, |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
37 train_frac=0.5, |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
38 noise_level=0.1, # add Gaussian noise, noise_level=sigma |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
39 params_shape=None, |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
40 f=None, # function computing E[Y|X] |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
41 otherargs=None, # extra args to f |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
42 b=None): # force theta[0] with this value |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
43 """ |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
44 Make regression data of the form |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
45 Y | X ~ Normal(f(X,theta,otherargs),noise_level^2) |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
46 If n_inputs==1 then X is chosen at regular locations on the [-1,1] interval. |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
47 Otherwise X is sampled according to a Normal(0,1) on all dimensions (independently). |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
48 The parameters theta is a matrix of shape params_shape that is sampled from Normal(0,1). |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
49 Optionally theta[0] is set to the argument 'b', if b is provided. |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
50 |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
51 Return a training set and a test set, by splitting the generated n_examples |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
52 according to the 'train_frac'tion. |
0f8c81b0776d
Adding file make_test_datasets to host simple data-generating processes
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff
changeset
|
53 """ |
432
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
54 n_train=int(train_frac*n_examples) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
55 n_test=n_examples-n_train |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
56 if n_inputs==1: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
57 delta1=2./n_train |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
58 delta2=2./n_test |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
59 inputs = vstack((array(zip(range(n_train)))*delta1-1, |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
60 0.5*delta2+array(zip(range(n_test)))*delta2-1)) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
61 else: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
62 inputs = random.normal(size=(n_examples,n_inputs)) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
63 if not f: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
64 f = linear_predictor |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
65 if f==kernel_predictor and not otherargs[1]: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
66 otherargs=(otherargs[0],inputs[0:n_train]) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
67 if not params_shape: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
68 if f==linear_predictor: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
69 params_shape = (n_inputs+1,n_targets) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
70 elif f==kernel_predictor: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
71 params_shape = (otherargs[1].shape[0]+1,n_targets) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
72 theta = random.normal(size=params_shape) if params_shape else None |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
73 if b: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
74 theta[0]=b |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
75 outputs = f(inputs,theta,otherargs) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
76 targets = outputs + random.normal(scale=noise_level,size=(n_examples,n_targets)) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
77 # the | stacking creates a strange bug in LookupList constructor: |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
78 # trainset = ArrayDataSet(inputs[0:n_examples/2],{'input':slice(0,n_inputs)}) | \ |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
79 # ArrayDataSet(targets[0:n_examples/2],{'target':slice(0,n_targets)}) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
80 # testset = ArrayDataSet(inputs[n_examples/2:],{'input':slice(0,n_inputs)}) | \ |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
81 # ArrayDataSet(targets[n_examples/2:],{'target':slice(0,n_targets)}) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
82 data = hstack((inputs,targets)) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
83 trainset = ArrayDataSet(data[0:n_train], |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
84 {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)}) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
85 testset = ArrayDataSet(data[n_train:], |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
86 {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)}) |
8e4d2ebd816a
added a test for LinearRegression
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
431
diff
changeset
|
87 return trainset,testset,theta |