annotate pylearn/datasets/icml07.py @ 1531:88f361283a19 tip

Fix url/name to pylearn2.
author Frederic Bastien <nouiz@nouiz.org>
date Mon, 09 Sep 2013 10:08:05 -0400
parents 1b69d435f09f
children
rev   line source
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 """ Functions related to the datasets used in Larochelle et al. 2007 (incl. modified MNIST).
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 """
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 import os, sys
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 import numpy
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
1471
281efa9a4463 icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1355
diff changeset
6 from config import get_filepath_in_roots
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7 from pylearn.io.amat import AMat
1475
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
8 from pylearn.datasets.config import data_root # config
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
9 from pylearn.datasets.dataset import Dataset
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
10
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
11 class MNIST_rotated_background(object):
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
12
1476
8c10bda4bb5f Configured default train/valid/test split for icml07.MNIST_rotated_background
gdesjardins
parents: 1475
diff changeset
13 def __init__(self, n_train=10000, n_valid=2000, n_test=50000):
1475
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
14
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
15 basedir = os.path.join(data_root(), 'icml07data', 'npy')
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
16
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
17 x_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_inputs.npy'))
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
18 y_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_labels.npy'))
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
19
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
20 vstart = n_train
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
21 tstart = n_train + n_valid
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
22
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
23 self.train = Dataset.Obj(x=x_all[:n_train], y=y_all[:n_train])
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
24 self.valid = Dataset.Obj(x=x_all[vstart:vstart+n_valid],
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
25 y=y_all[vstart:vstart+n_valid])
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
26 self.test = Dataset.Obj(x=x_all[tstart:tstart+n_test],
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
27 y=y_all[tstart:tstart+n_test])
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
28
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
29 self.n_classes = 10
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
30 self.img_shape = (28,28)
e7401822d596 Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents: 1471
diff changeset
31
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 class DatasetLoader(object):
1355
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
34 """
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
35 A class for loading an ICML07 dataset into memory.
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
36
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
37 The class has functionality to
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
38 - download the dataset from the internet (in amat format)
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
39 - convert the dataset from amat format to npy format
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
40 - load the dataset from either amat or npy source files
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
41 """
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 def __init__(self, http_source,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43 n_inputs, n_classes,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44 n_train, n_valid, n_test,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 npy_filename_root,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46 amat_filename_root=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
47 amat_filename_train=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48 amat_filename_test=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49 amat_filename_all=None,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 ):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 self.__dict__.update(locals())
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52 del self.__dict__['self']
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 def download(self, todir):
1355
ba8a32b71356 icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1354
diff changeset
55 #TODO: write a system call to wget to dl the file from self.http_source
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56 raise NotImplementedError()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58 def load_from_amat(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59 if self.amat_filename_all is not None:
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
60 amat_all = AMat(self.amat_filename_all)
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
61 allmat = amat_all.all
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
62 assert allmat.shape[0] == self.n_train + self.n_valid + self.n_test, allmat.shape
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
64 if self.amat_filename_root is not None:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
65 amat_train = AMat(self.amat_filename_root+'_train.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
66 amat_test = AMat(self.amat_filename_root+'_test.amat')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
67 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
68 amat_train = AMat(self.amat_filename_train)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
69 amat_test = AMat(self.amat_filename_test)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
70 assert amat_train.all.shape[0] == self.n_train + self.n_valid
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
71 assert amat_test.all.shape[0] == self.n_test
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
72 allmat = numpy.vstack((amat_train.all, amat_test.all))
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
73 # CHECKPOINT: allmat has been computed by this point.
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
74 assert allmat.shape[1] == self.n_inputs+1
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
75 inputs = allmat[:, :self.n_inputs].astype('float32')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
76 labels = allmat[:, self.n_inputs].astype('int8')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
77 assert numpy.allclose(labels, allmat[:, self.n_inputs])
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
78 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
79 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
80
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
81 def load_from_amat_save_to_numpy(self):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
82 inputs, labels = self.load_from_amat()
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
83 numpy.save(self.npy_filename_root+'_inputs.npy', inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
84 numpy.save(self.npy_filename_root+'_labels.npy', labels)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
85 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
86
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
87 def load_from_numpy(self, mmap_mode='r'):
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
88 """Much faster than load_from_amat"""
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
89 inputs = numpy.load(self.npy_filename_root+'_inputs.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
90 labels = numpy.load(self.npy_filename_root+'_labels.npy', mmap_mode=mmap_mode)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
91 assert inputs.shape == (self.n_train + self.n_valid + self.n_test, self.n_inputs)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
92 assert labels.shape[0] == inputs.shape[0]
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
93 assert numpy.all(labels < self.n_classes)
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
94 return inputs, labels
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
95
1471
281efa9a4463 icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1355
diff changeset
96 def icml07_loaders(new_version=True, rootdir=None):
281efa9a4463 icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1355
diff changeset
97 if rootdir is None:
281efa9a4463 icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1355
diff changeset
98 rootdir = get_filepath_in_roots('icml07data_twiki')
1478
1e4dc99a3b13 better error in icml07 when data folder is missing
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1476
diff changeset
99 if rootdir is None:
1479
1b69d435f09f fix error string.
Frederic Bastien <nouiz@nouiz.org>
parents: 1478
diff changeset
100 raise IOError('dataset not found (no icml07data_twiki folder in PYLEARN_DATA_ROOT or DBPATH environment variable.')
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
101 rval = dict(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
102 mnist_basic=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
103 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
104 amat_filename_root=os.path.join(rootdir, 'mnist'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
105 npy_filename_root=os.path.join(rootdir, 'mnist_basic'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
106 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
107 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
108 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
109 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
110 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
111 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
112 mnist_background_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
113 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
114 amat_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
115 npy_filename_root=os.path.join(rootdir, 'mnist_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
116 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
117 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
118 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
119 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
120 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
121 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
122 mnist_background_random=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
123 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_random.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
124 amat_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
125 npy_filename_root=os.path.join(rootdir, 'mnist_background_random'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
126 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
127 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
128 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
129 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
130 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
131 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
132 rectangles=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
133 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
134 amat_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
135 npy_filename_root=os.path.join(rootdir, 'rectangles'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
136 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
137 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
138 n_train=1000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
139 n_valid=200,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
140 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
141 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
142 rectangles_images=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
143 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
144 amat_filename_root=os.path.join(rootdir, 'rectangles_im'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
145 npy_filename_root=os.path.join(rootdir, 'rectangles_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
146 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
147 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
148 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
149 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
150 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
151 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
152 convex=DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
153 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/convex.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
154 amat_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
155 npy_filename_root=os.path.join(rootdir, 'convex'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
156 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
157 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
158 n_train=6500, #not sure about this train/valid split
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
159 n_valid=1500,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
160 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
161 ),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
162 )
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
163 for level in range(1,7):
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
164 rval['mnist_noise_%i'%level] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
165 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_noise_variation.tar.gz',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
166 amat_filename_all=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
167 'mnist_noise_variations_all_%i.amat'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
168 npy_filename_root=os.path.join(rootdir, 'mnist_noise_%i'%level),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
169 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
170 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
171 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
172 n_valid=2000,
1354
be3030305d4b icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 1353
diff changeset
173 n_test=2000
1353
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
174 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
175
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
176 if new_version:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
177 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
178 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
179 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
180 'mnist_all_rotation_normalized_float_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
181 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
182 'mnist_all_rotation_normalized_float_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
183 npy_filename_root=os.path.join(rootdir, 'mnist_rotated'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
184 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
185 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
186 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
187 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
188 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
189 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
190 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
191 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image_new.zip',
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
192 amat_filename_test=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
193 'mnist_all_background_images_rotation_normalized_test.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
194 amat_filename_train=os.path.join(rootdir,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
195 'mnist_all_background_images_rotation_normalized_train_valid.amat'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
196 npy_filename_root=os.path.join(rootdir, 'mnist_rotated_background_images'),
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
197 n_inputs=784,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
198 n_classes=10,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
199 n_train=10000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
200 n_valid=2000,
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
201 n_test=50000
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
202 )
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
203 else:
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
204 raise NotImplementedError('TODO: what are the amat_filenames here')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
205 rval['mnist_rotated'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
206 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
207 rval['mnist_rotated_background_images'] = DatasetLoader(
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
208 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image.zip')
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
209 return rval
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
210
2024c5618466 adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
211