# HG changeset patch # User James Bergstra # Date 1225318009 14400 # Node ID b8e6de17eaa6e3e499c9a27f208c406ae9eaf661 # Parent eda3d576ee97ebcfcbbf075fc8fc4712c6f4dcc3 modifs to smallNorb diff -r eda3d576ee97 -r b8e6de17eaa6 datasets/smallNorb.py --- a/datasets/smallNorb.py Wed Oct 29 17:20:18 2008 -0400 +++ b/datasets/smallNorb.py Wed Oct 29 18:06:49 2008 -0400 @@ -25,20 +25,29 @@ self.test_info = os.path.join(*\ smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat']) - def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='float64'): + def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='uint8'): + """ Load the smallNorb data into numpy matrices. + normalize_pixels True will divide the values by 255, which makes sense in conjunction + with dtype=float32 or dtype=float64. + + """ + #set ulimit to an integer, and disable reading of the test_xxx files to load only a + #subset of the data + ulimit=None def downsample(dataset): return dataset[:, 0, ::downsample_amt, ::downsample_amt] - samples = downsample(read(open(self.train_dat))) + + samples = downsample(read(open(self.train_dat), slice(None,ulimit))) samples = numpy.vstack((samples, downsample(read(open(self.test_dat))))) samples = numpy.asarray(samples, dtype=dtype) if normalize_pixels: samples *= (1.0 / 255.0) - labels = read(open(self.train_cat)) + labels = read(open(self.train_cat), slice(0,ulimit)) labels = numpy.hstack((labels, read(open(self.test_cat)))) - infos = read(open(self.train_info)) + infos = read(open(self.train_info), slice(0,ulimit)) infos = numpy.vstack((infos, read(open(self.test_info)))) return samples, labels, infos @@ -73,8 +82,8 @@ n_labels = 5 def _pick_rows(rows): - a = numpy.array([ samples[i].flatten() for i in train_rows]) - b = numpy.array([ [labels[i]] for i in train_rows]) + a = numpy.array([samples[i].flatten() for i in rows]) + b = numpy.array([labels[i] for i in rows]) return a, b return [_pick_rows(r) for r in (train_rows, valid_rows, test_rows)]