annotate pylearn/datasets/icml07.py @ 1353:2024c5618466

adding icml07 dataset
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 03 Nov 2010 12:49:24 -0400
parents
children be3030305d4b
rev   line source
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 """ Functions related to the datasets used in Larochelle et al. 2007 (incl. modified MNIST).
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 """
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 import os, sys
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 import numpy
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 from pylearn.io.amat import AMat
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 class DatasetLoader(object):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9 def __init__(self, http_source,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
10 n_inputs, n_classes,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
11 n_train, n_valid, n_test,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
12 npy_filename_root,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
13 amat_filename_root=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
14 amat_filename_train=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
15 amat_filename_test=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
16 amat_filename_all=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17 ):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
18 self.__dict__.update(locals())
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19 del self.__dict__['self']
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 def download(self, todir):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 raise NotImplementedError()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
23
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
24 def load_from_amat(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
25 if self.amat_filename_all is not None:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
26 raise NotImplementedError()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
27 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
28 if self.amat_filename_root is not None:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29 amat_train = AMat(self.amat_filename_root+'_train.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30 amat_test = AMat(self.amat_filename_root+'_test.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
31 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32 amat_train = AMat(self.amat_filename_train)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 amat_test = AMat(self.amat_filename_test)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34 assert amat_train.all.shape[0] == self.n_train + self.n_valid
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 assert amat_test.all.shape[0] == self.n_test
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36 allmat = numpy.vstack((amat_train.all, amat_test.all))
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
37 # CHECKPOINT: allmat has been computed by this point.
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
38 assert allmat.shape[1] == self.n_inputs+1
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 inputs = allmat[:, :self.n_inputs].astype('float32')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 labels = allmat[:, self.n_inputs].astype('int8')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 assert numpy.allclose(labels, allmat[:, self.n_inputs])
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 def load_from_amat_save_to_numpy(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46 inputs, labels = self.load_from_amat()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
47 numpy.save(self.npy_filename_root+'_inputs.npy', inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48 numpy.save(self.npy_filename_root+'_labels.npy', labels)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 def load_from_numpy(self, mmap_mode='r'):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52 """Much faster than load_from_amat"""
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 inputs = numpy.load(self.npy_filename_root+'_inputs.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 labels = numpy.load(self.npy_filename_root+'_labels.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55 assert inputs.shape == (self.n_train + self.n_valid + self.n_test, self.n_inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56 assert labels.shape[0] == inputs.shape[0]
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
61 def icml07_loaders(new_version=True, rootdir='.'):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
62 rval = dict(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63 mnist_basic=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
64 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
65 amat_filename_root=os.path.join(rootdir, 'mnist'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
66 npy_filename_root=os.path.join(rootdir, 'mnist_basic'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
67 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
68 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
69 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
70 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
71 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
72 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
73 mnist_background_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
74 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
75 amat_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
76 npy_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
77 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
78 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
79 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
80 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
81 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
82 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
83 mnist_background_random=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
84 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_random.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
85 amat_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
86 npy_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
87 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
88 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
89 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
90 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
91 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
92 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
93 rectangles=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
94 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
95 amat_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
96 npy_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
97 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
98 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
99 n_train=1000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
100 n_valid=200,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
101 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
102 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
103 rectangles_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
104 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
105 amat_filename_root=os.path.join(rootdir, 'rectangles_im'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
106 npy_filename_root=os.path.join(rootdir, 'rectangles_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
107 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
108 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
109 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
110 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
111 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
112 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
113 convex=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
114 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/convex.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
115 amat_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
116 npy_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
117 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
118 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
119 n_train=6500, #not sure about this train/valid split
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
120 n_valid=1500,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
121 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
122 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
123 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
124 for level in range(1,6):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
125 rval['mnist_noise_%i'%level] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
126 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_noise_variation.tar.gz',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
127 amat_filename_all=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
128 'mnist_noise_variations_all_%i.amat'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
129 npy_filename_root=os.path.join(rootdir, 'mnist_noise_%i'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
130 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
131 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
132 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
133 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
134 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
135 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
136
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
137 if new_version:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
138 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
139 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
140 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
141 'mnist_all_rotation_normalized_float_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
142 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
143 'mnist_all_rotation_normalized_float_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
144 npy_filename_root=os.path.join(rootdir, 'mnist_rotated'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
145 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
146 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
147 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
148 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
149 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
150 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
151 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
152 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
153 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
154 'mnist_all_background_images_rotation_normalized_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
155 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
156 'mnist_all_background_images_rotation_normalized_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
157 npy_filename_root=os.path.join(rootdir, 'mnist_rotated_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
158 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
159 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
160 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
161 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
162 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
163 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
164 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
165 raise NotImplementedError('TODO: what are the amat_filenames here')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
166 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
167 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
168 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
169 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
170 return rval
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
171
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
172