annotate pylearn/datasets/icml07.py @ 1355:ba8a32b71356

icml07 - added comment
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 05 Nov 2010 13:29:07 -0400
parents be3030305d4b
children 281efa9a4463
rev   line source
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 """ Functions related to the datasets used in Larochelle et al. 2007 (incl. modified MNIST).
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 """
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 import os, sys
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 import numpy
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 from pylearn.io.amat import AMat
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 class DatasetLoader(object):
1355
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
9 """
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
10 A class for loading an ICML07 dataset into memory.
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
11
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
12 The class has functionality to
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
13 - download the dataset from the internet (in amat format)
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
14 - convert the dataset from amat format to npy format
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
15 - load the dataset from either amat or npy source files
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
16 """
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17 def __init__(self, http_source,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
18 n_inputs, n_classes,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19 n_train, n_valid, n_test,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20 npy_filename_root,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 amat_filename_root=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 amat_filename_train=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
23 amat_filename_test=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
24 amat_filename_all=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
25 ):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
26 self.__dict__.update(locals())
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
27 del self.__dict__['self']
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
28
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29 def download(self, todir):
1355
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
30 #TODO: write a system call to wget to dl the file from self.http_source
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
31 raise NotImplementedError()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 def load_from_amat(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34 if self.amat_filename_all is not None:
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
35 amat_all = AMat(self.amat_filename_all)
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
36 allmat = amat_all.all
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
37 assert allmat.shape[0] == self.n_train + self.n_valid + self.n_test, allmat.shape
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
38 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 if self.amat_filename_root is not None:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 amat_train = AMat(self.amat_filename_root+'_train.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 amat_test = AMat(self.amat_filename_root+'_test.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43 amat_train = AMat(self.amat_filename_train)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44 amat_test = AMat(self.amat_filename_test)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 assert amat_train.all.shape[0] == self.n_train + self.n_valid
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46 assert amat_test.all.shape[0] == self.n_test
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
47 allmat = numpy.vstack((amat_train.all, amat_test.all))
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48 # CHECKPOINT: allmat has been computed by this point.
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49 assert allmat.shape[1] == self.n_inputs+1
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 inputs = allmat[:, :self.n_inputs].astype('float32')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 labels = allmat[:, self.n_inputs].astype('int8')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52 assert numpy.allclose(labels, allmat[:, self.n_inputs])
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56 def load_from_amat_save_to_numpy(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 inputs, labels = self.load_from_amat()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58 numpy.save(self.npy_filename_root+'_inputs.npy', inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59 numpy.save(self.npy_filename_root+'_labels.npy', labels)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
61
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
62 def load_from_numpy(self, mmap_mode='r'):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63 """Much faster than load_from_amat"""
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
64 inputs = numpy.load(self.npy_filename_root+'_inputs.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
65 labels = numpy.load(self.npy_filename_root+'_labels.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
66 assert inputs.shape == (self.n_train + self.n_valid + self.n_test, self.n_inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
67 assert labels.shape[0] == inputs.shape[0]
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
68 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
69 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
70
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
71 def icml07_loaders(new_version=True, rootdir='.'):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
72 rval = dict(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
73 mnist_basic=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
74 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
75 amat_filename_root=os.path.join(rootdir, 'mnist'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
76 npy_filename_root=os.path.join(rootdir, 'mnist_basic'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
77 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
78 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
79 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
80 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
81 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
82 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
83 mnist_background_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
84 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
85 amat_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
86 npy_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
87 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
88 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
89 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
90 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
91 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
92 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
93 mnist_background_random=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
94 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_random.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
95 amat_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
96 npy_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
97 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
98 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
99 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
100 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
101 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
102 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
103 rectangles=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
104 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
105 amat_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
106 npy_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
107 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
108 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
109 n_train=1000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
110 n_valid=200,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
111 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
112 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
113 rectangles_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
114 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
115 amat_filename_root=os.path.join(rootdir, 'rectangles_im'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
116 npy_filename_root=os.path.join(rootdir, 'rectangles_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
117 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
118 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
119 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
120 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
121 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
122 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
123 convex=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
124 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/convex.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
125 amat_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
126 npy_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
127 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
128 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
129 n_train=6500, #not sure about this train/valid split
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
130 n_valid=1500,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
131 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
132 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
133 )
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
134 for level in range(1,7):
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
135 rval['mnist_noise_%i'%level] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
136 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_noise_variation.tar.gz',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
137 amat_filename_all=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
138 'mnist_noise_variations_all_%i.amat'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
139 npy_filename_root=os.path.join(rootdir, 'mnist_noise_%i'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
140 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
141 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
142 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
143 n_valid=2000,
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
144 n_test=2000
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
145 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
146
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
147 if new_version:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
148 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
149 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
150 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
151 'mnist_all_rotation_normalized_float_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
152 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
153 'mnist_all_rotation_normalized_float_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
154 npy_filename_root=os.path.join(rootdir, 'mnist_rotated'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
155 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
156 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
157 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
158 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
159 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
160 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
161 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
162 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
163 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
164 'mnist_all_background_images_rotation_normalized_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
165 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
166 'mnist_all_background_images_rotation_normalized_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
167 npy_filename_root=os.path.join(rootdir, 'mnist_rotated_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
168 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
169 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
170 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
171 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
172 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
173 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
174 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
175 raise NotImplementedError('TODO: what are the amat_filenames here')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
176 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
177 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
178 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
179 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
180 return rval
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
181
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
182