annotate pylearn/datasets/icml07.py @ 1354:be3030305d4b

icml07 loaders works
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 03 Nov 2010 12:58:04 -0400
parents 2024c5618466
children ba8a32b71356
rev   line source
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 """ Functions related to the datasets used in Larochelle et al. 2007 (incl. modified MNIST).
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 """
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 import os, sys
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 import numpy
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 from pylearn.io.amat import AMat
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 class DatasetLoader(object):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9 def __init__(self, http_source,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
10 n_inputs, n_classes,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
11 n_train, n_valid, n_test,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
12 npy_filename_root,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
13 amat_filename_root=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
14 amat_filename_train=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
15 amat_filename_test=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
16 amat_filename_all=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17 ):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
18 self.__dict__.update(locals())
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19 del self.__dict__['self']
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 def download(self, todir):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 raise NotImplementedError()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
23
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
24 def load_from_amat(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
25 if self.amat_filename_all is not None:
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
26 amat_all = AMat(self.amat_filename_all)
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
27 allmat = amat_all.all
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
28 assert allmat.shape[0] == self.n_train + self.n_valid + self.n_test, allmat.shape
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30 if self.amat_filename_root is not None:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
31 amat_train = AMat(self.amat_filename_root+'_train.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32 amat_test = AMat(self.amat_filename_root+'_test.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34 amat_train = AMat(self.amat_filename_train)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 amat_test = AMat(self.amat_filename_test)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36 assert amat_train.all.shape[0] == self.n_train + self.n_valid
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
37 assert amat_test.all.shape[0] == self.n_test
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
38 allmat = numpy.vstack((amat_train.all, amat_test.all))
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 # CHECKPOINT: allmat has been computed by this point.
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 assert allmat.shape[1] == self.n_inputs+1
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 inputs = allmat[:, :self.n_inputs].astype('float32')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 labels = allmat[:, self.n_inputs].astype('int8')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43 assert numpy.allclose(labels, allmat[:, self.n_inputs])
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
47 def load_from_amat_save_to_numpy(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48 inputs, labels = self.load_from_amat()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49 numpy.save(self.npy_filename_root+'_inputs.npy', inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 numpy.save(self.npy_filename_root+'_labels.npy', labels)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 def load_from_numpy(self, mmap_mode='r'):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 """Much faster than load_from_amat"""
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55 inputs = numpy.load(self.npy_filename_root+'_inputs.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56 labels = numpy.load(self.npy_filename_root+'_labels.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 assert inputs.shape == (self.n_train + self.n_valid + self.n_test, self.n_inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58 assert labels.shape[0] == inputs.shape[0]
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
61
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
62 def icml07_loaders(new_version=True, rootdir='.'):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63 rval = dict(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
64 mnist_basic=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
65 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
66 amat_filename_root=os.path.join(rootdir, 'mnist'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
67 npy_filename_root=os.path.join(rootdir, 'mnist_basic'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
68 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
69 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
70 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
71 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
72 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
73 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
74 mnist_background_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
75 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
76 amat_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
77 npy_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
78 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
79 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
80 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
81 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
82 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
83 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
84 mnist_background_random=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
85 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_random.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
86 amat_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
87 npy_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
88 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
89 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
90 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
91 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
92 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
93 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
94 rectangles=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
95 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
96 amat_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
97 npy_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
98 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
99 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
100 n_train=1000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
101 n_valid=200,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
102 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
103 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
104 rectangles_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
105 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
106 amat_filename_root=os.path.join(rootdir, 'rectangles_im'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
107 npy_filename_root=os.path.join(rootdir, 'rectangles_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
108 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
109 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
110 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
111 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
112 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
113 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
114 convex=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
115 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/convex.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
116 amat_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
117 npy_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
118 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
119 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
120 n_train=6500, #not sure about this train/valid split
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
121 n_valid=1500,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
122 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
123 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
124 )
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
125 for level in range(1,7):
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
126 rval['mnist_noise_%i'%level] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
127 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_noise_variation.tar.gz',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
128 amat_filename_all=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
129 'mnist_noise_variations_all_%i.amat'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
130 npy_filename_root=os.path.join(rootdir, 'mnist_noise_%i'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
131 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
132 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
133 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
134 n_valid=2000,
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
135 n_test=2000
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
136 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
137
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
138 if new_version:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
139 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
140 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
141 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
142 'mnist_all_rotation_normalized_float_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
143 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
144 'mnist_all_rotation_normalized_float_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
145 npy_filename_root=os.path.join(rootdir, 'mnist_rotated'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
146 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
147 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
148 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
149 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
150 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
151 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
152 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
153 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
154 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
155 'mnist_all_background_images_rotation_normalized_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
156 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
157 'mnist_all_background_images_rotation_normalized_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
158 npy_filename_root=os.path.join(rootdir, 'mnist_rotated_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
159 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
160 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
161 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
162 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
163 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
164 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
165 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
166 raise NotImplementedError('TODO: what are the amat_filenames here')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
167 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
168 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
169 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
170 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
171 return rval
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
172
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
173