Mercurial > pylearn
changeset 687:4b5e0b5a11e1
added io/audio that can read mp3s
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Thu, 14 May 2009 16:33:38 -0400 |
parents | 0929be7f9e43 |
children | 49e531f7b0ba |
files | pylearn/io/audio.py |
diffstat | 1 files changed, 69 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/io/audio.py Thu May 14 16:33:38 2009 -0400 @@ -0,0 +1,69 @@ + +import numpy +import theano + +from wavread import WavRead, wav_read_int16, wav_read_double + +try: #define audioread and company only if pygmy.audio can be imported + import pygmy.audio + + class AudioRead(theano.Op): + #TODO: add the samplerate as an output + """Read a wave file or mp3 + + input - filename + output - the contents of the audiofile in pcm format, and the samplerate + + """ + + #arguments to pygmy.audio.audioread + _audioread_attrs=('mono', 'tlast', 'fs_target', 'stripzeros', 'stats_only', 'decoder') + + mono = False + tlast=-1 + fs_target=-1 + stripzeros='none' + stats_only=False + decoder = 'madplay' + + def __init__(self, **kwargs): + for kw in kwargs: + if not kw in self._audioread_attrs: + raise TypeError('unrecognized keyword argument', kw) + setattr(self, kw, kwargs[kw]) + def __eq__(self, other): + return (type(self) == type(other)) and \ + all(getattr(self, a) == getattr(other,a) for a in self._audioread_attrs) + def __hash__(self): + return reduce( + lambda a,b: a^b, + [getattr(self, a) for a in self._audioread_attrs], + initial=hash(type(self))) + def make_node(self, path): + out_type = theano.tensor.dvector if self.mono else theano.tensor.dmatrix + return theano.Apply(self, [path], [out_type(), theano.tensor.dscalar()]) + def perform(self, node, (path,), (data_storage, sr_storage)): + data, sr, dz = pygmy.audio.audioread(path, + mono=self.mono, + tlast=self.tlast, + fs_target=self.fs_target, + stripzeros=self.stripzeros, + stats_only=self.stats_only, + decoder=self.decoder) + + assert isinstance(data, numpy.ndarray) + assert data.ndim == (1 if self.mono else 2) + assert data.dtype == numpy.float64 + data_storage[0] = data + + sr_storage[0] = numpy.asarray(sr,dtype='float64') + assert sr_storage[0].ndim==0 + + def grad(self, inputs, g_output): + return [None for i in inputs] + + audioread = AudioRead() + audioread_mono = AudioRead(mono=True) +except ImportError: + pass +