changeset 1392:2d3cbbb36178

merge
author gdesjardins
date Mon, 20 Dec 2010 18:09:11 -0500
parents 124b939d997f (diff) 0ff6c613cdf0 (current diff)
children 8ecc6da87350
files
diffstat 4 files changed, 73 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/datasets/caltech.py	Tue Dec 14 14:53:48 2010 -0500
+++ b/pylearn/datasets/caltech.py	Mon Dec 20 18:09:11 2010 -0500
@@ -27,20 +27,3 @@
     rval.img_shape = (28,28)
 
     return rval
-
-def caltech_silhouette2():
-
-    rval = Dataset()
-
-    from scipy import io 
-    path = '/data/lisa6/desjagui/caltech101_silhouettes_28_split1.mat'
-
-    data = io.loadmat(open(path,'r'))
-
-    rval.train = Dataset.Obj(x=data['train_data'], y=data['train_labels'])
-    rval.valid = Dataset.Obj(x=data['val_data'],   y=data['val_labels'])
-    rval.test  = Dataset.Obj(x=data['test_data'],  y=data['test_labels'])
-    rval.n_classes = 101
-    rval.img_shape = (28,28)
-
-    return rval
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/nist_all.py	Mon Dec 20 18:09:11 2010 -0500
@@ -0,0 +1,65 @@
+"""
+Provides a Dataset to access the nist digits dataset. 
+"""
+
+import os, numpy
+from pylearn.io import filetensor as ft
+from pylearn.datasets.config import data_root # config
+from pylearn.datasets.dataset import Dataset
+
+from pylearn.datasets.nist_sd import nist_to_float_11, nist_to_float_01
+
+
+def load(dataset = 'train', attribute = 'data'):
+  """Load the filetensor corresponding to the set and attribute.
+
+  :param dataset: str that is 'train', 'valid' or 'test'
+  :param attribute: str that is 'data' or 'labels'
+  """
+  fn = 'all_' + dataset + '_' + attribute + '.ft'
+  fn = os.path.join(data_root(), 'nist', 'by_class', 'all', fn)
+
+  fd = open(fn)
+  data = ft.read(fd)
+  fd.close()
+
+  return data
+
+def train_valid_test(ntrain=651668, nvalid=80000, ntest=82587, 
+                     path=None, range = '01'):
+  """
+  Load the nist digits dataset as a Dataset.
+
+  @note: the examples are uint8 and the labels are int32.
+  @todo: possibility of loading part of the data.
+  """
+  rval = Dataset()
+
+  # 
+  rval.n_classes = 62
+  rval.img_shape = (32,32)
+
+  if range == '01':
+    rval.preprocess = nist_to_float_01
+  elif range == '11':
+    rval.preprocess = nist_to_float_11
+  else:
+    raise ValueError('Nist Digits dataset does not support range = %s' % range)
+  print "Nist Digits dataset: using preproc will provide inputs in the %s range." \
+      % range
+
+  # train
+  examples = load(dataset = 'train', attribute = 'data')
+  labels = load(dataset = 'train', attribute = 'labels')
+  rval.train = Dataset.Obj(x=examples[:ntrain], y=labels[:ntrain])
+
+  # valid
+  rval.valid = Dataset.Obj(x=examples[651668:651668+nvalid], y=labels[651668:651668+nvalid])
+
+  # test
+  examples = load(dataset = 'test', attribute = 'data')
+  labels = load(dataset = 'test', attribute = 'labels')
+  rval.test = Dataset.Obj(x=examples[:ntest], y=labels[:ntest])
+  
+  return rval
+
--- a/pylearn/datasets/test_modes.py	Tue Dec 14 14:53:48 2010 -0500
+++ b/pylearn/datasets/test_modes.py	Mon Dec 20 18:09:11 2010 -0500
@@ -131,9 +131,11 @@
 
         for bi, mode in enumerate(modes):
             mi, = numpy.where(mode != 0)
+            modes_i.append(mi)
             bitflip = self.rng.binomial(1,self.p[mi], size=(1, self.img_size))
             data[bi] = numpy.abs(self.modes[mi] - bitflip)
 
         self.data = data
+        self.data_modes = modes_i
 
         return data
--- a/pylearn/sampling/hmc.py	Tue Dec 14 14:53:48 2010 -0500
+++ b/pylearn/sampling/hmc.py	Mon Dec 20 18:09:11 2010 -0500
@@ -132,6 +132,12 @@
             non_sequences=[stepsize],
             n_steps=n_steps-1)
 
+    # NOTE: Scan always returns an updates dictionary, in case the scanned function draws
+    # samples from a RandomStream. These updates must then be used when compiling the Theano
+    # function, to avoid drawing the same random numbers each time the function is called. In
+    # this case however, we consciously ignore "scan_updates" because we know it is empty.
+    assert not scan_updates
+    
     # The last velocity returned by the scan op is at time-step: t + n_steps* stepsize - 1/2
     # We therefore perform one more half-step to return vel(t + n_steps*stepsize)
     energy = energy_fn(final_p)