Mercurial > pylearn
annotate datasets/smallNorb.py @ 506:eda3d576ee97
removed n_out from smallnorb_iid rval
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 29 Oct 2008 17:20:18 -0400 |
parents | 74b3e65f5f24 |
children | b8e6de17eaa6 |
rev | line source |
---|---|
505
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 import os |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 import numpy |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 from ..filetensor import read |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 from .config import data_root |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
6 #Path = '/u/bergstrj/pub/data/smallnorb' |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 #Path = '/home/fringant2/lisa/louradoj/data/smallnorb' |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 #Path = '/home/louradou/data/norb' |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
9 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
10 class Paths(object): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
11 """File-related operations on smallNorb |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
12 """ |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
13 def __init__(self): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
14 smallnorb = [data_root(), 'smallnorb'] |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
15 self.train_dat = os.path.join(*\ |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
16 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat']) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
17 self.test_dat = os.path.join(*\ |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
18 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat']) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
19 self.train_cat = os.path.join(*\ |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
20 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat']) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
21 self.test_cat = os.path.join(*\ |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
22 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat']) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
23 self.train_info = os.path.join(*\ |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
24 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-info.mat']) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
25 self.test_info = os.path.join(*\ |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
26 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat']) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
27 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
28 def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='float64'): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
29 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
30 def downsample(dataset): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
31 return dataset[:, 0, ::downsample_amt, ::downsample_amt] |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
32 samples = downsample(read(open(self.train_dat))) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
33 samples = numpy.vstack((samples, downsample(read(open(self.test_dat))))) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
34 samples = numpy.asarray(samples, dtype=dtype) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
35 if normalize_pixels: |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
36 samples *= (1.0 / 255.0) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
37 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
38 labels = read(open(self.train_cat)) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
39 labels = numpy.hstack((labels, read(open(self.test_cat)))) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
40 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
41 infos = read(open(self.train_info)) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
42 infos = numpy.vstack((infos, read(open(self.test_info)))) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
43 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
44 return samples, labels, infos |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
45 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 def smallnorb_iid(ntrain=29160, nvalid=9720, ntest=9720, dtype='float64', normalize_pixels=True): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
47 """Variation of the smallNorb task in which we randomly shuffle all the object instances |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
48 together before dividing into train/valid/test. |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
49 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 The default train/valid/test sizes correspond to 60/20/20 split of the entire dataset. |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
52 :returns: 5, (train_x, train_labels), (valid_x, valid_labels), (test_x, test_labels) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 """ |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
55 # cut from /u/louradoj/theano/hpu/expcode1.py |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
56 rng = numpy.random.RandomState(1) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
57 samples, labels, infos = Paths().load_append_train_test(downsample_amt=3, dtype=dtype, normalize_pixels=normalize_pixels) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
58 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
59 nsamples = samples.shape[0] |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
60 if ntrain + nvalid + ntest > nsamples: |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
61 raise Exception("ntrain+nvalid+ntest exceeds number of samples (%i)" % nsamples, |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
62 (ntrain, nvalid, ntest)) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
63 i0 = 0 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
64 i1 = ntrain |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
65 i2 = ntrain + nvalid |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
66 i3 = ntrain + nvalid + ntest |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
67 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
68 indices = rng.permutation(nsamples) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
69 train_rows = indices[i0:i1] |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
70 valid_rows = indices[i1:i2] |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
71 test_rows = indices[i2:i3] |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
72 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
73 n_labels = 5 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
74 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
75 def _pick_rows(rows): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
76 a = numpy.array([ samples[i].flatten() for i in train_rows]) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
77 b = numpy.array([ [labels[i]] for i in train_rows]) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
78 return a, b |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
79 |
506
eda3d576ee97
removed n_out from smallnorb_iid rval
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
505
diff
changeset
|
80 return [_pick_rows(r) for r in (train_rows, valid_rows, test_rows)] |
505
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
81 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
82 def smallnorb_azSplit(): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
83 # cut from /u/louradoj/theano/hpu/expcode1.py |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
84 # WARNING NOT NECESSARILY WORKING CODE |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
85 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
86 samples, labels, infos = _load_append_train_test() |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
87 train_rows, valid_rows, test_rows = [], [], [] |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
88 train_rows_azimuth = [] |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
89 for instance in range(10): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
90 az_min = 4*instance |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
91 az_max = 4*instance + 18 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
92 train_rows_azimuth.append( [a % 36 for a in range(az_min,az_max,2)] ) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
93 #print "train_rows_azimuth", train_rows_azimuth |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
94 for i, info in enumerate(infos): |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
95 if info[2] in train_rows_azimuth[info[0]]: |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
96 train_rows.append(i) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
97 elif info[2] / 2 % 2 == 0: |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
98 test_rows.append(i) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
99 else: |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
100 valid_rows.append(i) |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
101 |
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
102 return [_pick_rows(samples, labels, r) for r in (train_rows, valid_rows, test_rows)] |