changeset 687:4b5e0b5a11e1

added io/audio that can read mp3s
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 14 May 2009 16:33:38 -0400
parents 0929be7f9e43
children 49e531f7b0ba
files pylearn/io/audio.py
diffstat 1 files changed, 69 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/audio.py	Thu May 14 16:33:38 2009 -0400
@@ -0,0 +1,69 @@
+
+import numpy
+import theano
+
+from wavread import WavRead, wav_read_int16, wav_read_double
+
+try: #define audioread and company only if pygmy.audio can be imported
+    import pygmy.audio
+
+    class AudioRead(theano.Op):
+        #TODO: add the samplerate as an output
+        """Read a wave file or mp3
+
+        input - filename
+        output - the contents of the audiofile in pcm format, and the samplerate
+        
+        """
+
+        #arguments to pygmy.audio.audioread
+        _audioread_attrs=('mono', 'tlast', 'fs_target', 'stripzeros', 'stats_only', 'decoder')
+
+        mono = False
+        tlast=-1
+        fs_target=-1
+        stripzeros='none'
+        stats_only=False
+        decoder = 'madplay'
+
+        def __init__(self, **kwargs):
+            for kw in kwargs:
+                if not kw in self._audioread_attrs:
+                    raise TypeError('unrecognized keyword argument', kw)
+                setattr(self, kw, kwargs[kw])
+        def __eq__(self, other):
+            return (type(self) == type(other)) and \
+                    all(getattr(self, a) == getattr(other,a) for a in self._audioread_attrs)
+        def __hash__(self):
+            return reduce(
+                    lambda a,b: a^b,
+                    [getattr(self, a) for a in self._audioread_attrs],
+                    initial=hash(type(self)))
+        def make_node(self, path):
+            out_type = theano.tensor.dvector if self.mono else theano.tensor.dmatrix
+            return theano.Apply(self, [path], [out_type(), theano.tensor.dscalar()])
+        def perform(self, node, (path,), (data_storage, sr_storage)):
+            data, sr, dz = pygmy.audio.audioread(path, 
+                    mono=self.mono, 
+                    tlast=self.tlast, 
+                    fs_target=self.fs_target,
+                    stripzeros=self.stripzeros,
+                    stats_only=self.stats_only,
+                    decoder=self.decoder)
+
+            assert isinstance(data, numpy.ndarray)
+            assert data.ndim == (1 if self.mono else 2)
+            assert data.dtype == numpy.float64
+            data_storage[0] = data
+
+            sr_storage[0] = numpy.asarray(sr,dtype='float64')
+            assert sr_storage[0].ndim==0
+
+        def grad(self, inputs, g_output):
+            return [None for i in inputs]
+
+    audioread = AudioRead()
+    audioread_mono = AudioRead(mono=True)
+except ImportError:
+    pass
+