annotate datasets/smallNorb.py @ 534:eaa5ad4089a1

Another bugfix in pylearn.embeddings.length()
author Joseph Turian <turian@gmail.com>
date Tue, 18 Nov 2008 03:49:37 -0500
parents 60b7dd5be860
children
rev   line source
505
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 import os
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 import numpy
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 from ..filetensor import read
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 from .config import data_root
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 #Path = '/u/bergstrj/pub/data/smallnorb'
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7 #Path = '/home/fringant2/lisa/louradoj/data/smallnorb'
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 #Path = '/home/louradou/data/norb'
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
10 class Paths(object):
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
11 """File-related operations on smallNorb
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
12 """
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
13 def __init__(self):
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
14 smallnorb = [data_root(), 'smallnorb']
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
15 self.train_dat = os.path.join(*\
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
16 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat'])
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17 self.test_dat = os.path.join(*\
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
18 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat'])
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19 self.train_cat = os.path.join(*\
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat'])
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 self.test_cat = os.path.join(*\
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat'])
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
23 self.train_info = os.path.join(*\
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
24 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-info.mat'])
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
25 self.test_info = os.path.join(*\
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
26 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat'])
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
27
507
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
28 def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='uint8'):
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
29 """ Load the smallNorb data into numpy matrices.
505
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30
507
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
31 normalize_pixels True will divide the values by 255, which makes sense in conjunction
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
32 with dtype=float32 or dtype=float64.
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
33
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
34 """
505
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 def downsample(dataset):
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36 return dataset[:, 0, ::downsample_amt, ::downsample_amt]
507
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
37
508
60b7dd5be860 scrapped ulimit in smallnorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 507
diff changeset
38 samples = downsample(read(open(self.train_dat)))
505
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 samples = numpy.vstack((samples, downsample(read(open(self.test_dat)))))
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 samples = numpy.asarray(samples, dtype=dtype)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 if normalize_pixels:
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 samples *= (1.0 / 255.0)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43
508
60b7dd5be860 scrapped ulimit in smallnorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 507
diff changeset
44 labels = read(open(self.train_cat))
505
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 labels = numpy.hstack((labels, read(open(self.test_cat))))
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46
508
60b7dd5be860 scrapped ulimit in smallnorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 507
diff changeset
47 infos = read(open(self.train_info))
505
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48 infos = numpy.vstack((infos, read(open(self.test_info))))
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 return samples, labels, infos
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52 def smallnorb_iid(ntrain=29160, nvalid=9720, ntest=9720, dtype='float64', normalize_pixels=True):
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 """Variation of the smallNorb task in which we randomly shuffle all the object instances
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 together before dividing into train/valid/test.
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56 The default train/valid/test sizes correspond to 60/20/20 split of the entire dataset.
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58 :returns: 5, (train_x, train_labels), (valid_x, valid_labels), (test_x, test_labels)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60 """
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
61 # cut from /u/louradoj/theano/hpu/expcode1.py
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
62 rng = numpy.random.RandomState(1)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63 samples, labels, infos = Paths().load_append_train_test(downsample_amt=3, dtype=dtype, normalize_pixels=normalize_pixels)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
64
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
65 nsamples = samples.shape[0]
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
66 if ntrain + nvalid + ntest > nsamples:
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
67 raise Exception("ntrain+nvalid+ntest exceeds number of samples (%i)" % nsamples,
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
68 (ntrain, nvalid, ntest))
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
69 i0 = 0
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
70 i1 = ntrain
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
71 i2 = ntrain + nvalid
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
72 i3 = ntrain + nvalid + ntest
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
73
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
74 indices = rng.permutation(nsamples)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
75 train_rows = indices[i0:i1]
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
76 valid_rows = indices[i1:i2]
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
77 test_rows = indices[i2:i3]
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
78
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
79 n_labels = 5
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
80
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
81 def _pick_rows(rows):
507
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
82 a = numpy.array([samples[i].flatten() for i in rows])
b8e6de17eaa6 modifs to smallNorb
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 506
diff changeset
83 b = numpy.array([labels[i] for i in rows])
505
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
84 return a, b
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
85
506
eda3d576ee97 removed n_out from smallnorb_iid rval
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 505
diff changeset
86 return [_pick_rows(r) for r in (train_rows, valid_rows, test_rows)]
505
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
87
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
88 def smallnorb_azSplit():
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
89 # cut from /u/louradoj/theano/hpu/expcode1.py
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
90 # WARNING NOT NECESSARILY WORKING CODE
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
91
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
92 samples, labels, infos = _load_append_train_test()
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
93 train_rows, valid_rows, test_rows = [], [], []
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
94 train_rows_azimuth = []
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
95 for instance in range(10):
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
96 az_min = 4*instance
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
97 az_max = 4*instance + 18
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
98 train_rows_azimuth.append( [a % 36 for a in range(az_min,az_max,2)] )
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
99 #print "train_rows_azimuth", train_rows_azimuth
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
100 for i, info in enumerate(infos):
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
101 if info[2] in train_rows_azimuth[info[0]]:
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
102 train_rows.append(i)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
103 elif info[2] / 2 % 2 == 0:
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
104 test_rows.append(i)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
105 else:
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
106 valid_rows.append(i)
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
107
74b3e65f5f24 added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
108 return [_pick_rows(samples, labels, r) for r in (train_rows, valid_rows, test_rows)]