annotate pylearn/datasets/icml07.py @ 1471:281efa9a4463

icml07_loaders uses get_filepath_in_roots
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 18 May 2011 10:51:11 -0400
parents ba8a32b71356
children e7401822d596
rev   line source
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 """ Functions related to the datasets used in Larochelle et al. 2007 (incl. modified MNIST).
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 """
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 import os, sys
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 import numpy
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
1471
281efa9a4463 icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1355
diff changeset
6 from config import get_filepath_in_roots
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7 from pylearn.io.amat import AMat
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9 class DatasetLoader(object):
1355
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
10 """
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
11 A class for loading an ICML07 dataset into memory.
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
12
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
13 The class has functionality to
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
14 - download the dataset from the internet (in amat format)
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
15 - convert the dataset from amat format to npy format
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
16 - load the dataset from either amat or npy source files
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
17 """
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
18 def __init__(self, http_source,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19 n_inputs, n_classes,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20 n_train, n_valid, n_test,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 npy_filename_root,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 amat_filename_root=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
23 amat_filename_train=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
24 amat_filename_test=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
25 amat_filename_all=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
26 ):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
27 self.__dict__.update(locals())
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
28 del self.__dict__['self']
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30 def download(self, todir):
1355
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
31 #TODO: write a system call to wget to dl the file from self.http_source
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32 raise NotImplementedError()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34 def load_from_amat(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 if self.amat_filename_all is not None:
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
36 amat_all = AMat(self.amat_filename_all)
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
37 allmat = amat_all.all
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
38 assert allmat.shape[0] == self.n_train + self.n_valid + self.n_test, allmat.shape
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 if self.amat_filename_root is not None:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 amat_train = AMat(self.amat_filename_root+'_train.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 amat_test = AMat(self.amat_filename_root+'_test.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44 amat_train = AMat(self.amat_filename_train)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 amat_test = AMat(self.amat_filename_test)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46 assert amat_train.all.shape[0] == self.n_train + self.n_valid
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
47 assert amat_test.all.shape[0] == self.n_test
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48 allmat = numpy.vstack((amat_train.all, amat_test.all))
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49 # CHECKPOINT: allmat has been computed by this point.
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 assert allmat.shape[1] == self.n_inputs+1
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 inputs = allmat[:, :self.n_inputs].astype('float32')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52 labels = allmat[:, self.n_inputs].astype('int8')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 assert numpy.allclose(labels, allmat[:, self.n_inputs])
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 def load_from_amat_save_to_numpy(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58 inputs, labels = self.load_from_amat()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59 numpy.save(self.npy_filename_root+'_inputs.npy', inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60 numpy.save(self.npy_filename_root+'_labels.npy', labels)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
61 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
62
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63 def load_from_numpy(self, mmap_mode='r'):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
64 """Much faster than load_from_amat"""
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
65 inputs = numpy.load(self.npy_filename_root+'_inputs.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
66 labels = numpy.load(self.npy_filename_root+'_labels.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
67 assert inputs.shape == (self.n_train + self.n_valid + self.n_test, self.n_inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
68 assert labels.shape[0] == inputs.shape[0]
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
69 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
70 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
71
1471
281efa9a4463 icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1355
diff changeset
72 def icml07_loaders(new_version=True, rootdir=None):
281efa9a4463 icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1355
diff changeset
73 if rootdir is None:
281efa9a4463 icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1355
diff changeset
74 rootdir = get_filepath_in_roots('icml07data_twiki')
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
75 rval = dict(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
76 mnist_basic=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
77 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
78 amat_filename_root=os.path.join(rootdir, 'mnist'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
79 npy_filename_root=os.path.join(rootdir, 'mnist_basic'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
80 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
81 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
82 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
83 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
84 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
85 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
86 mnist_background_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
87 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
88 amat_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
89 npy_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
90 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
91 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
92 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
93 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
94 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
95 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
96 mnist_background_random=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
97 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_random.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
98 amat_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
99 npy_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
100 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
101 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
102 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
103 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
104 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
105 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
106 rectangles=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
107 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
108 amat_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
109 npy_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
110 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
111 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
112 n_train=1000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
113 n_valid=200,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
114 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
115 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
116 rectangles_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
117 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
118 amat_filename_root=os.path.join(rootdir, 'rectangles_im'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
119 npy_filename_root=os.path.join(rootdir, 'rectangles_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
120 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
121 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
122 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
123 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
124 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
125 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
126 convex=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
127 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/convex.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
128 amat_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
129 npy_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
130 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
131 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
132 n_train=6500, #not sure about this train/valid split
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
133 n_valid=1500,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
134 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
135 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
136 )
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
137 for level in range(1,7):
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
138 rval['mnist_noise_%i'%level] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
139 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_noise_variation.tar.gz',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
140 amat_filename_all=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
141 'mnist_noise_variations_all_%i.amat'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
142 npy_filename_root=os.path.join(rootdir, 'mnist_noise_%i'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
143 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
144 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
145 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
146 n_valid=2000,
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
147 n_test=2000
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
148 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
149
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
150 if new_version:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
151 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
152 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
153 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
154 'mnist_all_rotation_normalized_float_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
155 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
156 'mnist_all_rotation_normalized_float_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
157 npy_filename_root=os.path.join(rootdir, 'mnist_rotated'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
158 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
159 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
160 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
161 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
162 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
163 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
164 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
165 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
166 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
167 'mnist_all_background_images_rotation_normalized_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
168 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
169 'mnist_all_background_images_rotation_normalized_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
170 npy_filename_root=os.path.join(rootdir, 'mnist_rotated_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
171 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
172 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
173 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
174 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
175 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
176 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
177 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
178 raise NotImplementedError('TODO: what are the amat_filenames here')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
179 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
180 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
181 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
182 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
183 return rval
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
184
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
185