Mercurial > pylearn

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/norb_small.py	Wed May 27 21:28:25 2009 -0400
@@ -0,0 +1,118 @@
+import os
+import numpy
+from ..io.filetensor import read
+from .config import data_root
+from .dataset import Dataset
+
+def load_file(info, normalize=True, downsample_amt=1, dtype='float64'):
+    """ Load the smallNorb data into numpy matrices.
+
+    normalize_pixels True will divide the values by 255, which makes sense in conjunction
+    with dtype=float32 or dtype=float64.
+
+    """
+    # NotImplementedError: subtensor access not written yet
+    #subt = [numpy.arange(self.dim[0]),
+            #numpy.arange(0,self.dim[1],downsample_amt),
+            #numpy.arange(0,self.dim[2],downsample_amt)]
+
+    dat = read(open(info['dat']))
+    if downsample_amt != 1:
+        dat = dat[:, :, ::downsample_amt, ::downsample_amt]
+    if dtype != 'int8':
+        dat = numpy.asarray(dat, dtype=dtype)
+    if normalize:
+        dat  *= (1.0 / 255.0)
+
+    labels  = read(open(info['cat']))
+
+    return dat, labels
+
+
+class NORB_small(object):
+
+    class Paths():
+        dirpath = os.path.join(data_root(), 'norb_small', 'original')
+        train = {}
+        test = {}
+        train['dat'] = os.path.join(dirpath, 'smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat')
+        train['cat'] = os.path.join(dirpath, 'smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat')
+        test['dat']  = os.path.join(dirpath, 'smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat')
+        test['cat']  = os.path.join(dirpath, 'smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat')
+    path = Paths()
+
+    def __init__(self, ntrain=19440, nvalid=4860, ntest=24300,
+               downsample_amt=1, seed=1, normalize=True,
+               mode='stereo', dtype='float64'):
+
+        self.n_classes = 5
+        self.nsamples = 24300
+        self.img_shape = (2,96,96) if mode=='stereo' else (96,96)
+
+        self.ntrain = ntrain
+        self.nvalid = nvalid
+        self.ntest = ntest
+        self.downsample_amt = 1
+        self.normalize = normalize
+        self.dtype = dtype
+
+        rng = numpy.random.RandomState(seed)
+        self.indices = rng.permutation(self.nsamples)
+        self.itr  = self.indices[0:ntrain]
+        self.ival = self.indices[ntrain:ntrain+nvalid]
+        self.current = None
+
+    def load(self, dataset='train'):
+
+        if dataset == 'train' or dataset=='valid':
+            print 'accessing train or valid dataset'
+
+            if self.current != 'train':
+                if self.current: del self.dat1
+
+                print 'need to reload from train file'
+                dat, cat  = load_file(self.path.train, self.normalize,
+                                      self.downsample_amt, self.dtype)
+
+                x = dat[self.itr,...].reshape(self.ntrain,-1)
+                y = cat[self.itr]
+                self.dat1 = Dataset.Obj(x=x, y=y) # training
+
+                x = dat[self.ival,...].reshape(self.nvalid,-1)
+                y = cat[self.ival]
+                self.dat2 = Dataset.Obj(x=x, y=y) # validation
+
+                del dat, cat, x, y
+
+            rval = self.dat1 if dataset=='train' else self.dat2
+            self.current = 'train'
+
+        elif dataset=='test':
+
+            print 'retrieving test set'
+            if self.current!='test':
+                if self.current: del self.dat1, self.dat2
+
+                print 'need to reload from test file'
+                dat, cat = load_file(self.path.test, self.normalize,
+                                     self.downsample_amt, self.dtype)
+
+                x = dat.reshape(self.nsamples,-1)
+                y = cat
+                self.dat1 = Dataset.Obj(x=x, y=y)
+
+                del dat, cat, x, y
+
+            rval = self.dat1
+            self.current = 'test'
+        else:
+            raise ValueError("Expected one of [train|valid|test]")
+
+        return rval
+
+    def __getattribute__(self, name):
+        if name in ('train','valid','test'):
+            print 'hello'
+            return object.__getattribute__(self, 'load')(name)
+        else:
+            return object.__getattribute__(self, name)