Mercurial > pylearn
comparison datasets/smallNorb.py @ 505:74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 29 Oct 2008 17:09:04 -0400 |
parents | |
children | eda3d576ee97 |
comparison
equal
deleted
inserted
replaced
504:19ab9ce916e3 | 505:74b3e65f5f24 |
---|---|
1 import os | |
2 import numpy | |
3 from ..filetensor import read | |
4 from .config import data_root | |
5 | |
6 #Path = '/u/bergstrj/pub/data/smallnorb' | |
7 #Path = '/home/fringant2/lisa/louradoj/data/smallnorb' | |
8 #Path = '/home/louradou/data/norb' | |
9 | |
10 class Paths(object): | |
11 """File-related operations on smallNorb | |
12 """ | |
13 def __init__(self): | |
14 smallnorb = [data_root(), 'smallnorb'] | |
15 self.train_dat = os.path.join(*\ | |
16 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat']) | |
17 self.test_dat = os.path.join(*\ | |
18 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat']) | |
19 self.train_cat = os.path.join(*\ | |
20 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat']) | |
21 self.test_cat = os.path.join(*\ | |
22 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat']) | |
23 self.train_info = os.path.join(*\ | |
24 smallnorb + ['smallnorb-5x46789x9x18x6x2x96x96-training-info.mat']) | |
25 self.test_info = os.path.join(*\ | |
26 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat']) | |
27 | |
28 def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='float64'): | |
29 | |
30 def downsample(dataset): | |
31 return dataset[:, 0, ::downsample_amt, ::downsample_amt] | |
32 samples = downsample(read(open(self.train_dat))) | |
33 samples = numpy.vstack((samples, downsample(read(open(self.test_dat))))) | |
34 samples = numpy.asarray(samples, dtype=dtype) | |
35 if normalize_pixels: | |
36 samples *= (1.0 / 255.0) | |
37 | |
38 labels = read(open(self.train_cat)) | |
39 labels = numpy.hstack((labels, read(open(self.test_cat)))) | |
40 | |
41 infos = read(open(self.train_info)) | |
42 infos = numpy.vstack((infos, read(open(self.test_info)))) | |
43 | |
44 return samples, labels, infos | |
45 | |
46 def smallnorb_iid(ntrain=29160, nvalid=9720, ntest=9720, dtype='float64', normalize_pixels=True): | |
47 """Variation of the smallNorb task in which we randomly shuffle all the object instances | |
48 together before dividing into train/valid/test. | |
49 | |
50 The default train/valid/test sizes correspond to 60/20/20 split of the entire dataset. | |
51 | |
52 :returns: 5, (train_x, train_labels), (valid_x, valid_labels), (test_x, test_labels) | |
53 | |
54 """ | |
55 # cut from /u/louradoj/theano/hpu/expcode1.py | |
56 rng = numpy.random.RandomState(1) | |
57 samples, labels, infos = Paths().load_append_train_test(downsample_amt=3, dtype=dtype, normalize_pixels=normalize_pixels) | |
58 | |
59 nsamples = samples.shape[0] | |
60 if ntrain + nvalid + ntest > nsamples: | |
61 raise Exception("ntrain+nvalid+ntest exceeds number of samples (%i)" % nsamples, | |
62 (ntrain, nvalid, ntest)) | |
63 i0 = 0 | |
64 i1 = ntrain | |
65 i2 = ntrain + nvalid | |
66 i3 = ntrain + nvalid + ntest | |
67 | |
68 indices = rng.permutation(nsamples) | |
69 train_rows = indices[i0:i1] | |
70 valid_rows = indices[i1:i2] | |
71 test_rows = indices[i2:i3] | |
72 | |
73 n_labels = 5 | |
74 | |
75 def _pick_rows(rows): | |
76 a = numpy.array([ samples[i].flatten() for i in train_rows]) | |
77 b = numpy.array([ [labels[i]] for i in train_rows]) | |
78 return a, b | |
79 | |
80 return n_labels, [_pick_rows(r) for r in (train_rows, valid_rows, test_rows)] | |
81 | |
82 def smallnorb_azSplit(): | |
83 # cut from /u/louradoj/theano/hpu/expcode1.py | |
84 # WARNING NOT NECESSARILY WORKING CODE | |
85 | |
86 samples, labels, infos = _load_append_train_test() | |
87 train_rows, valid_rows, test_rows = [], [], [] | |
88 train_rows_azimuth = [] | |
89 for instance in range(10): | |
90 az_min = 4*instance | |
91 az_max = 4*instance + 18 | |
92 train_rows_azimuth.append( [a % 36 for a in range(az_min,az_max,2)] ) | |
93 #print "train_rows_azimuth", train_rows_azimuth | |
94 for i, info in enumerate(infos): | |
95 if info[2] in train_rows_azimuth[info[0]]: | |
96 train_rows.append(i) | |
97 elif info[2] / 2 % 2 == 0: | |
98 test_rows.append(i) | |
99 else: | |
100 valid_rows.append(i) | |
101 | |
102 return [_pick_rows(samples, labels, r) for r in (train_rows, valid_rows, test_rows)] |