# HG changeset patch # User James Bergstra # Date 1242333218 14400 # Node ID 4b5e0b5a11e16104682805d7d4aae4a5a5ea8b6f # Parent 0929be7f9e430faf200cf5af5d21699b5825ae05 added io/audio that can read mp3s diff -r 0929be7f9e43 -r 4b5e0b5a11e1 pylearn/io/audio.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/io/audio.py Thu May 14 16:33:38 2009 -0400 @@ -0,0 +1,69 @@ + +import numpy +import theano + +from wavread import WavRead, wav_read_int16, wav_read_double + +try: #define audioread and company only if pygmy.audio can be imported + import pygmy.audio + + class AudioRead(theano.Op): + #TODO: add the samplerate as an output + """Read a wave file or mp3 + + input - filename + output - the contents of the audiofile in pcm format, and the samplerate + + """ + + #arguments to pygmy.audio.audioread + _audioread_attrs=('mono', 'tlast', 'fs_target', 'stripzeros', 'stats_only', 'decoder') + + mono = False + tlast=-1 + fs_target=-1 + stripzeros='none' + stats_only=False + decoder = 'madplay' + + def __init__(self, **kwargs): + for kw in kwargs: + if not kw in self._audioread_attrs: + raise TypeError('unrecognized keyword argument', kw) + setattr(self, kw, kwargs[kw]) + def __eq__(self, other): + return (type(self) == type(other)) and \ + all(getattr(self, a) == getattr(other,a) for a in self._audioread_attrs) + def __hash__(self): + return reduce( + lambda a,b: a^b, + [getattr(self, a) for a in self._audioread_attrs], + initial=hash(type(self))) + def make_node(self, path): + out_type = theano.tensor.dvector if self.mono else theano.tensor.dmatrix + return theano.Apply(self, [path], [out_type(), theano.tensor.dscalar()]) + def perform(self, node, (path,), (data_storage, sr_storage)): + data, sr, dz = pygmy.audio.audioread(path, + mono=self.mono, + tlast=self.tlast, + fs_target=self.fs_target, + stripzeros=self.stripzeros, + stats_only=self.stats_only, + decoder=self.decoder) + + assert isinstance(data, numpy.ndarray) + assert data.ndim == (1 if self.mono else 2) + assert data.dtype == numpy.float64 + data_storage[0] = data + + sr_storage[0] = numpy.asarray(sr,dtype='float64') + assert sr_storage[0].ndim==0 + + def grad(self, inputs, g_output): + return [None for i in inputs] + + audioread = AudioRead() + audioread_mono = AudioRead(mono=True) +except ImportError: + pass +