Mercurial > pylearn
annotate pylearn/datasets/icml07.py @ 1471:281efa9a4463
icml07_loaders uses get_filepath_in_roots
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 18 May 2011 10:51:11 -0400 |
parents | ba8a32b71356 |
children | e7401822d596 |
rev | line source |
---|---|
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 """ Functions related to the datasets used in Larochelle et al. 2007 (incl. modified MNIST). |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 """ |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 import os, sys |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 import numpy |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 |
1471
281efa9a4463
icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1355
diff
changeset
|
6 from config import get_filepath_in_roots |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 from pylearn.io.amat import AMat |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
9 class DatasetLoader(object): |
1355
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
10 """ |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
11 A class for loading an ICML07 dataset into memory. |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
12 |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
13 The class has functionality to |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
14 - download the dataset from the internet (in amat format) |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
15 - convert the dataset from amat format to npy format |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
16 - load the dataset from either amat or npy source files |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
17 """ |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
18 def __init__(self, http_source, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
19 n_inputs, n_classes, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
20 n_train, n_valid, n_test, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
21 npy_filename_root, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
22 amat_filename_root=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
23 amat_filename_train=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
24 amat_filename_test=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
25 amat_filename_all=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
26 ): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
27 self.__dict__.update(locals()) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
28 del self.__dict__['self'] |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
29 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
30 def download(self, todir): |
1355
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
31 #TODO: write a system call to wget to dl the file from self.http_source |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
32 raise NotImplementedError() |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
33 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
34 def load_from_amat(self): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
35 if self.amat_filename_all is not None: |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
36 amat_all = AMat(self.amat_filename_all) |
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
37 allmat = amat_all.all |
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
38 assert allmat.shape[0] == self.n_train + self.n_valid + self.n_test, allmat.shape |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
39 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
40 if self.amat_filename_root is not None: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
41 amat_train = AMat(self.amat_filename_root+'_train.amat') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
42 amat_test = AMat(self.amat_filename_root+'_test.amat') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
43 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
44 amat_train = AMat(self.amat_filename_train) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
45 amat_test = AMat(self.amat_filename_test) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 assert amat_train.all.shape[0] == self.n_train + self.n_valid |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
47 assert amat_test.all.shape[0] == self.n_test |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
48 allmat = numpy.vstack((amat_train.all, amat_test.all)) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
49 # CHECKPOINT: allmat has been computed by this point. |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 assert allmat.shape[1] == self.n_inputs+1 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51 inputs = allmat[:, :self.n_inputs].astype('float32') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
52 labels = allmat[:, self.n_inputs].astype('int8') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53 assert numpy.allclose(labels, allmat[:, self.n_inputs]) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 assert numpy.all(labels < self.n_classes) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
55 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
56 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
57 def load_from_amat_save_to_numpy(self): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
58 inputs, labels = self.load_from_amat() |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
59 numpy.save(self.npy_filename_root+'_inputs.npy', inputs) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
60 numpy.save(self.npy_filename_root+'_labels.npy', labels) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
61 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
62 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
63 def load_from_numpy(self, mmap_mode='r'): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
64 """Much faster than load_from_amat""" |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
65 inputs = numpy.load(self.npy_filename_root+'_inputs.npy', mmap_mode=mmap_mode) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
66 labels = numpy.load(self.npy_filename_root+'_labels.npy', mmap_mode=mmap_mode) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
67 assert inputs.shape == (self.n_train + self.n_valid + self.n_test, self.n_inputs) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
68 assert labels.shape[0] == inputs.shape[0] |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
69 assert numpy.all(labels < self.n_classes) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
70 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
71 |
1471
281efa9a4463
icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1355
diff
changeset
|
72 def icml07_loaders(new_version=True, rootdir=None): |
281efa9a4463
icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1355
diff
changeset
|
73 if rootdir is None: |
281efa9a4463
icml07_loaders uses get_filepath_in_roots
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1355
diff
changeset
|
74 rootdir = get_filepath_in_roots('icml07data_twiki') |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
75 rval = dict( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
76 mnist_basic=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
77 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
78 amat_filename_root=os.path.join(rootdir, 'mnist'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
79 npy_filename_root=os.path.join(rootdir, 'mnist_basic'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
80 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
81 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
82 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
83 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
84 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
85 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
86 mnist_background_images=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
87 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_images.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
88 amat_filename_root=os.path.join(rootdir, 'mnist_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
89 npy_filename_root=os.path.join(rootdir, 'mnist_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
90 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
91 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
92 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
93 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
94 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
95 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
96 mnist_background_random=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
97 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_random.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
98 amat_filename_root=os.path.join(rootdir, 'mnist_background_random'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
99 npy_filename_root=os.path.join(rootdir, 'mnist_background_random'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
100 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
101 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
102 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
103 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
104 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
105 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
106 rectangles=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
107 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
108 amat_filename_root=os.path.join(rootdir, 'rectangles'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
109 npy_filename_root=os.path.join(rootdir, 'rectangles'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
110 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
111 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
112 n_train=1000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
113 n_valid=200, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
114 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
115 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
116 rectangles_images=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
117 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
118 amat_filename_root=os.path.join(rootdir, 'rectangles_im'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
119 npy_filename_root=os.path.join(rootdir, 'rectangles_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
120 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
121 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
122 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
123 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
124 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
125 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
126 convex=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
127 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/convex.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
128 amat_filename_root=os.path.join(rootdir, 'convex'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
129 npy_filename_root=os.path.join(rootdir, 'convex'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
130 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
131 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
132 n_train=6500, #not sure about this train/valid split |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
133 n_valid=1500, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
134 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
135 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
136 ) |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
137 for level in range(1,7): |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
138 rval['mnist_noise_%i'%level] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
139 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_noise_variation.tar.gz', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
140 amat_filename_all=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
141 'mnist_noise_variations_all_%i.amat'%level), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
142 npy_filename_root=os.path.join(rootdir, 'mnist_noise_%i'%level), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
143 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
144 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
145 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
146 n_valid=2000, |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
147 n_test=2000 |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
148 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
149 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
150 if new_version: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
151 rval['mnist_rotated'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
152 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
153 amat_filename_test=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
154 'mnist_all_rotation_normalized_float_test.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
155 amat_filename_train=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
156 'mnist_all_rotation_normalized_float_train_valid.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
157 npy_filename_root=os.path.join(rootdir, 'mnist_rotated'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
158 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
159 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
160 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
161 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
162 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
163 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
164 rval['mnist_rotated_background_images'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
165 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image_new.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
166 amat_filename_test=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
167 'mnist_all_background_images_rotation_normalized_test.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
168 amat_filename_train=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
169 'mnist_all_background_images_rotation_normalized_train_valid.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
170 npy_filename_root=os.path.join(rootdir, 'mnist_rotated_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
171 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
172 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
173 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
174 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
175 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
176 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
177 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
178 raise NotImplementedError('TODO: what are the amat_filenames here') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
179 rval['mnist_rotated'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
180 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation.zip') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
181 rval['mnist_rotated_background_images'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
182 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image.zip') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
183 return rval |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
184 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
185 |