changeset 507:b8e6de17eaa6

modifs to smallNorb
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 29 Oct 2008 18:06:49 -0400
parents eda3d576ee97
children 60b7dd5be860
files datasets/smallNorb.py
diffstat 1 files changed, 15 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/datasets/smallNorb.py	Wed Oct 29 17:20:18 2008 -0400
+++ b/datasets/smallNorb.py	Wed Oct 29 18:06:49 2008 -0400
@@ -25,20 +25,29 @@
         self.test_info = os.path.join(*\
                 smallnorb + ['smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat'])
 
-    def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='float64'):
+    def load_append_train_test(self, normalize_pixels=True, downsample_amt=1, dtype='uint8'):
+        """ Load the smallNorb data into numpy matrices.
 
+        normalize_pixels True will divide the values by 255, which makes sense in conjunction
+        with dtype=float32 or dtype=float64.
+
+        """
+        #set ulimit to an integer, and disable reading of the test_xxx files to load only a
+        #subset of the data
+        ulimit=None
         def downsample(dataset):
             return dataset[:, 0, ::downsample_amt, ::downsample_amt]
-        samples = downsample(read(open(self.train_dat)))
+
+        samples = downsample(read(open(self.train_dat), slice(None,ulimit)))
         samples = numpy.vstack((samples, downsample(read(open(self.test_dat)))))
         samples = numpy.asarray(samples, dtype=dtype)
         if normalize_pixels:
             samples *= (1.0 / 255.0)
 
-        labels = read(open(self.train_cat))
+        labels = read(open(self.train_cat), slice(0,ulimit))
         labels = numpy.hstack((labels, read(open(self.test_cat))))
 
-        infos = read(open(self.train_info))
+        infos = read(open(self.train_info), slice(0,ulimit))
         infos = numpy.vstack((infos, read(open(self.test_info))))
 
         return samples, labels, infos
@@ -73,8 +82,8 @@
     n_labels = 5
 
     def _pick_rows(rows):
-        a = numpy.array([ samples[i].flatten() for i in train_rows])
-        b = numpy.array([ [labels[i]] for i in train_rows])
+        a = numpy.array([samples[i].flatten() for i in rows])
+        b = numpy.array([labels[i] for i in rows])
         return a, b
 
     return [_pick_rows(r) for r in (train_rows, valid_rows, test_rows)]