# HG changeset patch # User gdesjardins # Date 1292886551 18000 # Node ID 2d3cbbb36178672324f46adf1ef570ae106f35a4 # Parent 124b939d997f75b924ef748dedad60ac6a011ab6# Parent 0ff6c613cdf019a692b6fc2a49f04627b329dd38 merge diff -r 0ff6c613cdf0 -r 2d3cbbb36178 pylearn/datasets/caltech.py --- a/pylearn/datasets/caltech.py Tue Dec 14 14:53:48 2010 -0500 +++ b/pylearn/datasets/caltech.py Mon Dec 20 18:09:11 2010 -0500 @@ -27,20 +27,3 @@ rval.img_shape = (28,28) return rval - -def caltech_silhouette2(): - - rval = Dataset() - - from scipy import io - path = '/data/lisa6/desjagui/caltech101_silhouettes_28_split1.mat' - - data = io.loadmat(open(path,'r')) - - rval.train = Dataset.Obj(x=data['train_data'], y=data['train_labels']) - rval.valid = Dataset.Obj(x=data['val_data'], y=data['val_labels']) - rval.test = Dataset.Obj(x=data['test_data'], y=data['test_labels']) - rval.n_classes = 101 - rval.img_shape = (28,28) - - return rval diff -r 0ff6c613cdf0 -r 2d3cbbb36178 pylearn/datasets/nist_all.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/datasets/nist_all.py Mon Dec 20 18:09:11 2010 -0500 @@ -0,0 +1,65 @@ +""" +Provides a Dataset to access the nist digits dataset. +""" + +import os, numpy +from pylearn.io import filetensor as ft +from pylearn.datasets.config import data_root # config +from pylearn.datasets.dataset import Dataset + +from pylearn.datasets.nist_sd import nist_to_float_11, nist_to_float_01 + + +def load(dataset = 'train', attribute = 'data'): + """Load the filetensor corresponding to the set and attribute. + + :param dataset: str that is 'train', 'valid' or 'test' + :param attribute: str that is 'data' or 'labels' + """ + fn = 'all_' + dataset + '_' + attribute + '.ft' + fn = os.path.join(data_root(), 'nist', 'by_class', 'all', fn) + + fd = open(fn) + data = ft.read(fd) + fd.close() + + return data + +def train_valid_test(ntrain=651668, nvalid=80000, ntest=82587, + path=None, range = '01'): + """ + Load the nist digits dataset as a Dataset. + + @note: the examples are uint8 and the labels are int32. + @todo: possibility of loading part of the data. + """ + rval = Dataset() + + # + rval.n_classes = 62 + rval.img_shape = (32,32) + + if range == '01': + rval.preprocess = nist_to_float_01 + elif range == '11': + rval.preprocess = nist_to_float_11 + else: + raise ValueError('Nist Digits dataset does not support range = %s' % range) + print "Nist Digits dataset: using preproc will provide inputs in the %s range." \ + % range + + # train + examples = load(dataset = 'train', attribute = 'data') + labels = load(dataset = 'train', attribute = 'labels') + rval.train = Dataset.Obj(x=examples[:ntrain], y=labels[:ntrain]) + + # valid + rval.valid = Dataset.Obj(x=examples[651668:651668+nvalid], y=labels[651668:651668+nvalid]) + + # test + examples = load(dataset = 'test', attribute = 'data') + labels = load(dataset = 'test', attribute = 'labels') + rval.test = Dataset.Obj(x=examples[:ntest], y=labels[:ntest]) + + return rval + diff -r 0ff6c613cdf0 -r 2d3cbbb36178 pylearn/datasets/test_modes.py --- a/pylearn/datasets/test_modes.py Tue Dec 14 14:53:48 2010 -0500 +++ b/pylearn/datasets/test_modes.py Mon Dec 20 18:09:11 2010 -0500 @@ -131,9 +131,11 @@ for bi, mode in enumerate(modes): mi, = numpy.where(mode != 0) + modes_i.append(mi) bitflip = self.rng.binomial(1,self.p[mi], size=(1, self.img_size)) data[bi] = numpy.abs(self.modes[mi] - bitflip) self.data = data + self.data_modes = modes_i return data diff -r 0ff6c613cdf0 -r 2d3cbbb36178 pylearn/sampling/hmc.py --- a/pylearn/sampling/hmc.py Tue Dec 14 14:53:48 2010 -0500 +++ b/pylearn/sampling/hmc.py Mon Dec 20 18:09:11 2010 -0500 @@ -132,6 +132,12 @@ non_sequences=[stepsize], n_steps=n_steps-1) + # NOTE: Scan always returns an updates dictionary, in case the scanned function draws + # samples from a RandomStream. These updates must then be used when compiling the Theano + # function, to avoid drawing the same random numbers each time the function is called. In + # this case however, we consciously ignore "scan_updates" because we know it is empty. + assert not scan_updates + # The last velocity returned by the scan op is at time-step: t + n_steps* stepsize - 1/2 # We therefore perform one more half-step to return vel(t + n_steps*stepsize) energy = energy_fn(final_p)