Mercurial > pylearn
annotate pylearn/datasets/icml07.py @ 1479:1b69d435f09f
fix error string.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Wed, 25 May 2011 09:26:47 -0400 |
parents | 1e4dc99a3b13 |
children |
rev | line source |
---|---|
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 """ Functions related to the datasets used in Larochelle et al. 2007 (incl. modified MNIST). |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 """ |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 import os, sys |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 import numpy |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 |
1471
281efa9a4463
icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1355
diff
changeset
|
6 from config import get_filepath_in_roots |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 from pylearn.io.amat import AMat |
1475
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
8 from pylearn.datasets.config import data_root # config |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
9 from pylearn.datasets.dataset import Dataset |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
10 |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
11 class MNIST_rotated_background(object): |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
12 |
1476
8c10bda4bb5f
Configured default train/valid/test split for icml07.MNIST_rotated_background
gdesjardins
parents:
1475
diff
changeset
|
13 def __init__(self, n_train=10000, n_valid=2000, n_test=50000): |
1475
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
14 |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
15 basedir = os.path.join(data_root(), 'icml07data', 'npy') |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
16 |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
17 x_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_inputs.npy')) |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
18 y_all = numpy.load(os.path.join(basedir, 'mnist_rotated_background_images_labels.npy')) |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
19 |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
20 vstart = n_train |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
21 tstart = n_train + n_valid |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
22 |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
23 self.train = Dataset.Obj(x=x_all[:n_train], y=y_all[:n_train]) |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
24 self.valid = Dataset.Obj(x=x_all[vstart:vstart+n_valid], |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
25 y=y_all[vstart:vstart+n_valid]) |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
26 self.test = Dataset.Obj(x=x_all[tstart:tstart+n_test], |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
27 y=y_all[tstart:tstart+n_test]) |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
28 |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
29 self.n_classes = 10 |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
30 self.img_shape = (28,28) |
e7401822d596
Pylearn Dataset wrapper for MNIST+rotate+background dataset used in ICML07 (new
gdesjardins
parents:
1471
diff
changeset
|
31 |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
32 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
33 class DatasetLoader(object): |
1355
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
34 """ |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
35 A class for loading an ICML07 dataset into memory. |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
36 |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
37 The class has functionality to |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
38 - download the dataset from the internet (in amat format) |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
39 - convert the dataset from amat format to npy format |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
40 - load the dataset from either amat or npy source files |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
41 """ |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
42 def __init__(self, http_source, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
43 n_inputs, n_classes, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
44 n_train, n_valid, n_test, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
45 npy_filename_root, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 amat_filename_root=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
47 amat_filename_train=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
48 amat_filename_test=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
49 amat_filename_all=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 ): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51 self.__dict__.update(locals()) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
52 del self.__dict__['self'] |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 def download(self, todir): |
1355
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
55 #TODO: write a system call to wget to dl the file from self.http_source |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
56 raise NotImplementedError() |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
57 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
58 def load_from_amat(self): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
59 if self.amat_filename_all is not None: |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
60 amat_all = AMat(self.amat_filename_all) |
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
61 allmat = amat_all.all |
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
62 assert allmat.shape[0] == self.n_train + self.n_valid + self.n_test, allmat.shape |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
63 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
64 if self.amat_filename_root is not None: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
65 amat_train = AMat(self.amat_filename_root+'_train.amat') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
66 amat_test = AMat(self.amat_filename_root+'_test.amat') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
67 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
68 amat_train = AMat(self.amat_filename_train) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
69 amat_test = AMat(self.amat_filename_test) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
70 assert amat_train.all.shape[0] == self.n_train + self.n_valid |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
71 assert amat_test.all.shape[0] == self.n_test |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
72 allmat = numpy.vstack((amat_train.all, amat_test.all)) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
73 # CHECKPOINT: allmat has been computed by this point. |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
74 assert allmat.shape[1] == self.n_inputs+1 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
75 inputs = allmat[:, :self.n_inputs].astype('float32') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
76 labels = allmat[:, self.n_inputs].astype('int8') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
77 assert numpy.allclose(labels, allmat[:, self.n_inputs]) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
78 assert numpy.all(labels < self.n_classes) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
79 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
80 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
81 def load_from_amat_save_to_numpy(self): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
82 inputs, labels = self.load_from_amat() |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
83 numpy.save(self.npy_filename_root+'_inputs.npy', inputs) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
84 numpy.save(self.npy_filename_root+'_labels.npy', labels) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
85 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
86 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
87 def load_from_numpy(self, mmap_mode='r'): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
88 """Much faster than load_from_amat""" |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
89 inputs = numpy.load(self.npy_filename_root+'_inputs.npy', mmap_mode=mmap_mode) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
90 labels = numpy.load(self.npy_filename_root+'_labels.npy', mmap_mode=mmap_mode) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
91 assert inputs.shape == (self.n_train + self.n_valid + self.n_test, self.n_inputs) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
92 assert labels.shape[0] == inputs.shape[0] |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
93 assert numpy.all(labels < self.n_classes) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
94 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
95 |
1471
281efa9a4463
icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1355
diff
changeset
|
96 def icml07_loaders(new_version=True, rootdir=None): |
281efa9a4463
icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1355
diff
changeset
|
97 if rootdir is None: |
281efa9a4463
icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1355
diff
changeset
|
98 rootdir = get_filepath_in_roots('icml07data_twiki') |
1478
1e4dc99a3b13
better error in icml07 when data folder is missing
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1476
diff
changeset
|
99 if rootdir is None: |
1479 | 100 raise IOError('dataset not found (no icml07data_twiki folder in PYLEARN_DATA_ROOT or DBPATH environment variable.') |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
101 rval = dict( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
102 mnist_basic=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
103 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
104 amat_filename_root=os.path.join(rootdir, 'mnist'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
105 npy_filename_root=os.path.join(rootdir, 'mnist_basic'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
106 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
107 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
108 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
109 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
110 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
111 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
112 mnist_background_images=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
113 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_images.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
114 amat_filename_root=os.path.join(rootdir, 'mnist_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
115 npy_filename_root=os.path.join(rootdir, 'mnist_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
116 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
117 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
118 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
119 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
120 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
121 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
122 mnist_background_random=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
123 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_random.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
124 amat_filename_root=os.path.join(rootdir, 'mnist_background_random'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
125 npy_filename_root=os.path.join(rootdir, 'mnist_background_random'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
126 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
127 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
128 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
129 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
130 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
131 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
132 rectangles=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
133 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
134 amat_filename_root=os.path.join(rootdir, 'rectangles'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
135 npy_filename_root=os.path.join(rootdir, 'rectangles'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
136 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
137 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
138 n_train=1000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
139 n_valid=200, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
140 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
141 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
142 rectangles_images=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
143 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
144 amat_filename_root=os.path.join(rootdir, 'rectangles_im'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
145 npy_filename_root=os.path.join(rootdir, 'rectangles_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
146 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
147 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
148 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
149 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
150 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
151 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
152 convex=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
153 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/convex.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
154 amat_filename_root=os.path.join(rootdir, 'convex'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
155 npy_filename_root=os.path.join(rootdir, 'convex'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
156 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
157 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
158 n_train=6500, #not sure about this train/valid split |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
159 n_valid=1500, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
160 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
161 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
162 ) |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
163 for level in range(1,7): |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
164 rval['mnist_noise_%i'%level] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
165 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_noise_variation.tar.gz', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
166 amat_filename_all=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
167 'mnist_noise_variations_all_%i.amat'%level), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
168 npy_filename_root=os.path.join(rootdir, 'mnist_noise_%i'%level), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
169 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
170 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
171 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
172 n_valid=2000, |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
173 n_test=2000 |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
174 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
175 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
176 if new_version: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
177 rval['mnist_rotated'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
178 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
179 amat_filename_test=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
180 'mnist_all_rotation_normalized_float_test.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
181 amat_filename_train=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
182 'mnist_all_rotation_normalized_float_train_valid.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
183 npy_filename_root=os.path.join(rootdir, 'mnist_rotated'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
184 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
185 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
186 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
187 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
188 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
189 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
190 rval['mnist_rotated_background_images'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
191 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image_new.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
192 amat_filename_test=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
193 'mnist_all_background_images_rotation_normalized_test.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
194 amat_filename_train=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
195 'mnist_all_background_images_rotation_normalized_train_valid.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
196 npy_filename_root=os.path.join(rootdir, 'mnist_rotated_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
197 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
198 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
199 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
200 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
201 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
202 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
203 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
204 raise NotImplementedError('TODO: what are the amat_filenames here') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
205 rval['mnist_rotated'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
206 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation.zip') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
207 rval['mnist_rotated_background_images'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
208 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image.zip') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
209 return rval |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
210 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
211 |