# HG changeset patch # User Olivier Breuleux # Date 1232139227 18000 # Node ID e8cb4bde30a73bf5c44a69592e5373623cea9afa # Parent 7cee8c7f04498c1e1f78073d99b99c2fcfbd2466# Parent 8f40262297cf1a73551fd296eea40838025a5997 merge diff -r 7cee8c7f0449 -r e8cb4bde30a7 pylearn/datasets/tzanetakis.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/datasets/tzanetakis.py Fri Jan 16 15:53:47 2009 -0500 @@ -0,0 +1,102 @@ +""" +Load Tzanetakis' genre-classification dataset. + +""" +from __future__ import absolute_import + +import os +import numpy + +from ..io.amat import AMat +from .config import data_root +from .dataset import dataset_factory, Dataset + +def head(n=10, path=None): + """Load the first MNIST examples. + + Returns two matrices: x, y. x has N rows of 784 columns. Each row of x represents the + 28x28 grey-scale pixels in raster order. y is a vector of N integers. Each element y[i] + is the label of the i'th row of x. + + """ + path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path + + dat = AMat(path=path, head=n) + + try: + assert dat.input.shape[0] == n + assert dat.target.shape[0] == n + except Exception , e: + raise Exception("failed to read MNIST data", (dat, e)) + + return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0]) + +def all(path=None): + return head(n=None, path=path) + +def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None): + all_x, all_targ = head(ntrain+nvalid+ntest, path=path) + + rval = Dataset() + + rval.train = Dataset.Obj(x=all_x[0:ntrain], + y=all_targ[0:ntrain]) + rval.valid = Dataset.Obj(x=all_x[ntrain:ntrain+nvalid], + y=all_targ[ntrain:ntrain+nvalid]) + rval.test = Dataset.Obj(x=all_x[ntrain+nvalid:ntrain+nvalid+ntest], + y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest]) + + rval.n_classes = 10 + rval.img_shape = (28,28) + return rval + + +def mfcc16(segments_per_song = 1, include_covariance = True, random_split = 0, + ntrain = 700, nvalid = 100, ntest = 200): + if segments_per_song != 1: + raise NotImplementedError() + + path = os.path.join(data_root(), 'tzanetakis','feat_mfcc16_540_1.stat.amat') + dat = AMat(path=path) + all_input = dat.input + assert all_input.shape == (1000 * segments_per_song, 152) + all_targ = numpy.tile(numpy.arange(10).reshape(10,1), 100 * segments_per_song)\ + .reshape(1000 * segments_per_song) + + if not include_covariance: + all_input = all_input[:,0:16] + + #shuffle the data according to the random split + assert all_input.shape[0] == all_targ.shape[0] + seed = random_split + 1 + numpy.random.RandomState(seed).shuffle(all_input) + numpy.random.RandomState(seed).shuffle(all_targ) + + #construct a dataset to return + rval = Dataset() + + rval.train = Dataset.Obj(x=all_input[0:ntrain], + y=all_targ[0:ntrain]) + rval.valid = Dataset.Obj(x=all_input[ntrain:ntrain+nvalid], + y=all_targ[ntrain:ntrain+nvalid]) + rval.test = Dataset.Obj(x=all_input[ntrain+nvalid:ntrain+nvalid+ntest], + y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest]) + + rval.n_classes = 10 + + return rval + + + + +def mnist_factory(variant="", ntrain=None, nvalid=None, ntest=None): + if variant=="": + return train_valid_test() + elif variant=="1k": + return train_valid_test(ntrain=1000, nvalid=200, ntest=200) + elif variant=="10k": + return train_valid_test(ntrain=10000, nvalid=2000, ntest=2000) + elif variant=="custom": + return train_valid_test(ntrain=ntrain, nvalid=nvalid, ntest=ntest) + else: + raise Exception('Unknown MNIST variant', variant) diff -r 7cee8c7f0449 -r e8cb4bde30a7 pylearn/dbdict/newstuff.py --- a/pylearn/dbdict/newstuff.py Fri Jan 16 15:53:22 2009 -0500 +++ b/pylearn/dbdict/newstuff.py Fri Jan 16 15:53:47 2009 -0500 @@ -247,6 +247,10 @@ self.state = state self.feedback = None + #TODO: make this a property and disallow changing it during a with block + self.catch_sigterm = True + self.catch_sigint = True + def switch(self, message = None): feedback = self.feedback self.feedback = None @@ -266,7 +270,7 @@ self.state.dbdict.status = self.RUNNING v = self.COMPLETE - with self: + with self: #calls __enter__ and then __exit__ try: v = self.experiment(self.state, self) finally: @@ -282,10 +286,12 @@ def __enter__(self): # install a SIGTERM handler that asks the experiment function to return # the next time it will call switch() - self.prev_sigterm = signal.getsignal(signal.SIGTERM) - self.prev_sigint = signal.getsignal(signal.SIGINT) - signal.signal(signal.SIGTERM, self.on_sigterm) - signal.signal(signal.SIGINT, self.on_sigterm) + if self.catch_sigterm: + self.prev_sigterm = signal.getsignal(signal.SIGTERM) + signal.signal(signal.SIGTERM, self.on_sigterm) + if self.catch_sigint: + self.prev_sigint = signal.getsignal(signal.SIGINT) + signal.signal(signal.SIGINT, self.on_sigterm) return self def __exit__(self, type, value, tb_traceback, save = True): @@ -294,10 +300,12 @@ raise type, value, tb_traceback except: traceback.print_exc() - signal.signal(signal.SIGTERM, self.prev_sigterm) - signal.signal(signal.SIGINT, self.prev_sigint) - self.prev_sigterm = None - self.prev_sigint = None + if self.catch_sigterm: + signal.signal(signal.SIGTERM, self.prev_sigterm) + self.prev_sigterm = None + if self.catch_sigint: + signal.signal(signal.SIGINT, self.prev_sigint) + self.prev_sigint = None if save: self.save() return True @@ -519,6 +527,8 @@ help = 'the working directory in which to run the experiment') parser_cmdline.add_option('-n', '--dry-run', action = 'store_true', dest = 'dry_run', default = False, help = 'use this option to run the whole experiment in a temporary working directory (cleaned after use)') +parser_cmdline.add_option('-2', '--sigint', action = 'store_true', dest = 'allow_sigint', default = False, + help = 'allow sigint (CTRL-C) to interrupt a process') def runner_cmdline(options, experiment, *strings): """ @@ -551,6 +561,7 @@ experiment, state, redirect_stdout = options.redirect or options.redirect_stdout, redirect_stderr = options.redirect or options.redirect_stderr) + channel.catch_sigint = not options.allow_sigint channel.run(force = options.force) if options.dry_run: shutil.rmtree(workdir, ignore_errors=True) diff -r 7cee8c7f0449 -r e8cb4bde30a7 pylearn/io/amat.py --- a/pylearn/io/amat.py Fri Jan 16 15:53:22 2009 -0500 +++ b/pylearn/io/amat.py Fri Jan 16 15:53:47 2009 -0500 @@ -1,4 +1,22 @@ -"""load PLearn AMat files""" +"""load PLearn AMat files + + +An AMat file is an ascii format for dense matrices. + +The format is not precisely defined, so I'll describe here a single recipe for making a valid +file. + +.. code-block:: text + + #size: + #sizes: + number number number .... + number number number .... + + +Tabs and spaces are both valid delimiters. Newlines separate consecutive rows. + +""" import sys, numpy, array