Mercurial > pylearn
annotate pylearn/datasets/icml07.py @ 1355:ba8a32b71356
icml07 - added comment
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 05 Nov 2010 13:29:07 -0400 |
parents | be3030305d4b |
children | 281efa9a4463 |
rev | line source |
---|---|
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 """ Functions related to the datasets used in Larochelle et al. 2007 (incl. modified MNIST). |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 """ |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 import os, sys |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 import numpy |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
6 from pylearn.io.amat import AMat |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 class DatasetLoader(object): |
1355
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
9 """ |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
10 A class for loading an ICML07 dataset into memory. |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
11 |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
12 The class has functionality to |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
13 - download the dataset from the internet (in amat format) |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
14 - convert the dataset from amat format to npy format |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
15 - load the dataset from either amat or npy source files |
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
16 """ |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
17 def __init__(self, http_source, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
18 n_inputs, n_classes, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
19 n_train, n_valid, n_test, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
20 npy_filename_root, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
21 amat_filename_root=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
22 amat_filename_train=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
23 amat_filename_test=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
24 amat_filename_all=None, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
25 ): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
26 self.__dict__.update(locals()) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
27 del self.__dict__['self'] |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
28 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
29 def download(self, todir): |
1355
ba8a32b71356
icml07 - added comment
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1354
diff
changeset
|
30 #TODO: write a system call to wget to dl the file from self.http_source |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
31 raise NotImplementedError() |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
32 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
33 def load_from_amat(self): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
34 if self.amat_filename_all is not None: |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
35 amat_all = AMat(self.amat_filename_all) |
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
36 allmat = amat_all.all |
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
37 assert allmat.shape[0] == self.n_train + self.n_valid + self.n_test, allmat.shape |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
38 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
39 if self.amat_filename_root is not None: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
40 amat_train = AMat(self.amat_filename_root+'_train.amat') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
41 amat_test = AMat(self.amat_filename_root+'_test.amat') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
42 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
43 amat_train = AMat(self.amat_filename_train) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
44 amat_test = AMat(self.amat_filename_test) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
45 assert amat_train.all.shape[0] == self.n_train + self.n_valid |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 assert amat_test.all.shape[0] == self.n_test |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
47 allmat = numpy.vstack((amat_train.all, amat_test.all)) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
48 # CHECKPOINT: allmat has been computed by this point. |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
49 assert allmat.shape[1] == self.n_inputs+1 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 inputs = allmat[:, :self.n_inputs].astype('float32') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51 labels = allmat[:, self.n_inputs].astype('int8') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
52 assert numpy.allclose(labels, allmat[:, self.n_inputs]) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53 assert numpy.all(labels < self.n_classes) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
55 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
56 def load_from_amat_save_to_numpy(self): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
57 inputs, labels = self.load_from_amat() |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
58 numpy.save(self.npy_filename_root+'_inputs.npy', inputs) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
59 numpy.save(self.npy_filename_root+'_labels.npy', labels) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
60 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
61 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
62 def load_from_numpy(self, mmap_mode='r'): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
63 """Much faster than load_from_amat""" |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
64 inputs = numpy.load(self.npy_filename_root+'_inputs.npy', mmap_mode=mmap_mode) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
65 labels = numpy.load(self.npy_filename_root+'_labels.npy', mmap_mode=mmap_mode) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
66 assert inputs.shape == (self.n_train + self.n_valid + self.n_test, self.n_inputs) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
67 assert labels.shape[0] == inputs.shape[0] |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
68 assert numpy.all(labels < self.n_classes) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
69 return inputs, labels |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
70 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
71 def icml07_loaders(new_version=True, rootdir='.'): |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
72 rval = dict( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
73 mnist_basic=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
74 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
75 amat_filename_root=os.path.join(rootdir, 'mnist'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
76 npy_filename_root=os.path.join(rootdir, 'mnist_basic'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
77 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
78 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
79 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
80 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
81 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
82 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
83 mnist_background_images=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
84 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_images.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
85 amat_filename_root=os.path.join(rootdir, 'mnist_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
86 npy_filename_root=os.path.join(rootdir, 'mnist_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
87 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
88 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
89 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
90 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
91 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
92 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
93 mnist_background_random=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
94 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_background_random.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
95 amat_filename_root=os.path.join(rootdir, 'mnist_background_random'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
96 npy_filename_root=os.path.join(rootdir, 'mnist_background_random'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
97 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
98 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
99 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
100 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
101 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
102 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
103 rectangles=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
104 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
105 amat_filename_root=os.path.join(rootdir, 'rectangles'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
106 npy_filename_root=os.path.join(rootdir, 'rectangles'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
107 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
108 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
109 n_train=1000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
110 n_valid=200, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
111 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
112 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
113 rectangles_images=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
114 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/rectangles_images.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
115 amat_filename_root=os.path.join(rootdir, 'rectangles_im'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
116 npy_filename_root=os.path.join(rootdir, 'rectangles_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
117 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
118 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
119 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
120 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
121 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
122 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
123 convex=DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
124 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/convex.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
125 amat_filename_root=os.path.join(rootdir, 'convex'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
126 npy_filename_root=os.path.join(rootdir, 'convex'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
127 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
128 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
129 n_train=6500, #not sure about this train/valid split |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
130 n_valid=1500, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
131 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
132 ), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
133 ) |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
134 for level in range(1,7): |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
135 rval['mnist_noise_%i'%level] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
136 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_noise_variation.tar.gz', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
137 amat_filename_all=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
138 'mnist_noise_variations_all_%i.amat'%level), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
139 npy_filename_root=os.path.join(rootdir, 'mnist_noise_%i'%level), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
140 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
141 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
142 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
143 n_valid=2000, |
1354
be3030305d4b
icml07 loaders works
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
1353
diff
changeset
|
144 n_test=2000 |
1353
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
145 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
146 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
147 if new_version: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
148 rval['mnist_rotated'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
149 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_new.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
150 amat_filename_test=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
151 'mnist_all_rotation_normalized_float_test.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
152 amat_filename_train=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
153 'mnist_all_rotation_normalized_float_train_valid.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
154 npy_filename_root=os.path.join(rootdir, 'mnist_rotated'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
155 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
156 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
157 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
158 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
159 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
160 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
161 rval['mnist_rotated_background_images'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
162 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image_new.zip', |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
163 amat_filename_test=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
164 'mnist_all_background_images_rotation_normalized_test.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
165 amat_filename_train=os.path.join(rootdir, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
166 'mnist_all_background_images_rotation_normalized_train_valid.amat'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
167 npy_filename_root=os.path.join(rootdir, 'mnist_rotated_background_images'), |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
168 n_inputs=784, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
169 n_classes=10, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
170 n_train=10000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
171 n_valid=2000, |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
172 n_test=50000 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
173 ) |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
174 else: |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
175 raise NotImplementedError('TODO: what are the amat_filenames here') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
176 rval['mnist_rotated'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
177 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation.zip') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
178 rval['mnist_rotated_background_images'] = DatasetLoader( |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
179 http_source='http://www.iro.umontreal.ca/~lisa/icml2007data/mnist_rotation_back_image.zip') |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
180 return rval |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
181 |
2024c5618466
adding icml07 dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
182 |