Mercurial > pylearn
changeset 507:b8e6de17eaa6
modifs to smallNorb
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 29 Oct 2008 18:06:49 -0400 |
parents | eda3d576ee97 |
children | 60b7dd5be860 |
files | datasets/smallNorb.py |
diffstat | 1 files changed, 15 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/datasets/smallNorb.py Wed Oct 29 17:20:18 2008 -0400 +++ b/datasets/smallNorb.py Wed Oct 29 18:06:49 2008 -0400 @@ -25,20 +25,29 @@ self.test_info = os.path.join(*\ smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat']) - def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='float64'): + def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='uint8'): + """ Load the smallNorb data into numpy matrices. + normalize_pixels True will divide the values by 255, which makes sense in conjunction + with dtype=float32 or dtype=float64. + + """ + #set ulimit to an integer, and disable reading of the test_xxx files to load only a + #subset of the data + ulimit=None def downsample(dataset): return dataset[:, 0, ::downsample_amt, ::downsample_amt] - samples = downsample(read(open(self.train_dat))) + + samples = downsample(read(open(self.train_dat), slice(None,ulimit))) samples = numpy.vstack((samples, downsample(read(open(self.test_dat))))) samples = numpy.asarray(samples, dtype=dtype) if normalize_pixels: samples *= (1.0 / 255.0) - labels = read(open(self.train_cat)) + labels = read(open(self.train_cat), slice(0,ulimit)) labels = numpy.hstack((labels, read(open(self.test_cat)))) - infos = read(open(self.train_info)) + infos = read(open(self.train_info), slice(0,ulimit)) infos = numpy.vstack((infos, read(open(self.test_info)))) return samples, labels, infos @@ -73,8 +82,8 @@ n_labels = 5 def _pick_rows(rows): - a = numpy.array([ samples[i].flatten() for i in train_rows]) - b = numpy.array([ [labels[i]] for i in train_rows]) + a = numpy.array([samples[i].flatten() for i in rows]) + b = numpy.array([labels[i] for i in rows]) return a, b return [_pick_rows(r) for r in (train_rows, valid_rows, test_rows)]