# HG changeset patch
# User Sam Lantinga <slouken@libsdl.org>
# Date 1219676939 0
# Node ID f8f68f47285a9096922ab50e98a709901aec92e7
# Parent  336b604ec15be45c67b251b8de1cf19d5fd809b8
Final merge of Google Summer of Code 2008 work...

Audio Ideas - Resampling and Pitch Shifting
by Aaron Wishnick, mentored by Ryan C. Gordon

diff -r 336b604ec15b -r f8f68f47285a src/audio/SDL_audio.c
--- a/src/audio/SDL_audio.c	Mon Aug 25 10:14:21 2008 +0000
+++ b/src/audio/SDL_audio.c	Mon Aug 25 15:08:59 2008 +0000
@@ -256,6 +256,68 @@
 #undef FILL_STUB
 }
 
+/* Streaming functions (for when the input and output buffer sizes are different) */
+/* Write [length] bytes from buf into the streamer */
+void
+SDL_StreamWrite(SDL_AudioStreamer * stream, Uint8 * buf, int length)
+{
+    int i;
+
+    for (i = 0; i < length; ++i) {
+        stream->buffer[stream->write_pos] = buf[i];
+        ++stream->write_pos;
+    }
+}
+
+/* Read [length] bytes out of the streamer into buf */
+void
+SDL_StreamRead(SDL_AudioStreamer * stream, Uint8 * buf, int length)
+{
+    int i;
+
+    for (i = 0; i < length; ++i) {
+        buf[i] = stream->buffer[stream->read_pos];
+        ++stream->read_pos;
+    }
+}
+
+int
+SDL_StreamLength(SDL_AudioStreamer * stream)
+{
+    return (stream->write_pos - stream->read_pos) % stream->max_len;
+}
+
+/* Initialize the stream by allocating the buffer and setting the read/write heads to the beginning */
+int
+SDL_StreamInit(SDL_AudioStreamer * stream, int max_len, Uint8 silence)
+{
+    int i;
+
+    /* First try to allocate the buffer */
+    stream->buffer = (Uint8 *) malloc(max_len);
+    if (stream->buffer == NULL) {
+        return -1;
+    }
+
+    stream->max_len = max_len;
+    stream->read_pos = 0;
+    stream->write_pos = 0;
+
+    /* Zero out the buffer */
+    for (i = 0; i < max_len; ++i) {
+        stream->buffer[i] = silence;
+    }
+}
+
+/* Deinitialize the stream simply by freeing the buffer */
+void
+SDL_StreamDeinit(SDL_AudioStreamer * stream)
+{
+    if (stream->buffer != NULL) {
+        free(stream->buffer);
+    }
+}
+
 
 /* The general mixing thread function */
 int SDLCALL
@@ -267,6 +329,11 @@
     void *udata;
     void (SDLCALL * fill) (void *userdata, Uint8 * stream, int len);
     int silence;
+    int stream_max_len;
+
+    /* For streaming when the buffer sizes don't match up */
+    Uint8 *istream;
+    int istream_len;
 
     /* Perform any thread setup */
     device->threadid = SDL_ThreadID();
@@ -276,67 +343,188 @@
     fill = device->spec.callback;
     udata = device->spec.userdata;
 
+    /* By default do not stream */
+    device->use_streamer = 0;
+
     if (device->convert.needed) {
         if (device->convert.src_format == AUDIO_U8) {
             silence = 0x80;
         } else {
             silence = 0;
         }
-        stream_len = device->convert.len;
+
+        /* If the result of the conversion alters the length, i.e. resampling is being used, use the streamer */
+        if (device->convert.len_mult != 1 || device->convert.len_div != 1) {
+            /* The streamer's maximum length should be twice whichever is larger: spec.size or len_cvt */
+            stream_max_len = 2 * device->spec.size;
+            if (device->convert.len_mult > device->convert.len_div) {
+                stream_max_len *= device->convert.len_mult;
+                stream_max_len /= device->convert.len_div;
+            }
+            if (SDL_StreamInit(&device->streamer, stream_max_len, silence) <
+                0)
+                return -1;
+            device->use_streamer = 1;
+
+            /* istream_len should be the length of what we grab from the callback and feed to conversion,
+               so that we get close to spec_size. I.e. we want device.spec_size = istream_len * u / d
+             */
+            istream_len =
+                device->spec.size * device->convert.len_div /
+                device->convert.len_mult;
+        }
+
+        /* stream_len = device->convert.len; */
+        stream_len = device->spec.size;
     } else {
         silence = device->spec.silence;
         stream_len = device->spec.size;
     }
 
-    /* Loop, filling the audio buffers */
-    while (device->enabled) {
+    /* Determine if the streamer is necessary here */
+    if (device->use_streamer == 1) {
+        /* This code is almost the same as the old code. The difference is, instead of reding
+           directly from the callback into "stream", then converting and sending the audio off,
+           we go: callback -> "istream" -> (conversion) -> streamer -> stream -> device.
+           However, reading and writing with streamer are done separately:
+           - We only call the callback and write to the streamer when the streamer does not
+           contain enough samples to output to the device.
+           - We only read from the streamer and tell the device to play when the streamer
+           does have enough samples to output.
+           This allows us to perform resampling in the conversion step, where the output of the
+           resampling process can be any number. We will have to see what a good size for the
+           stream's maximum length is, but I suspect 2*max(len_cvt, stream_len) is a good figure.
+         */
+        while (device->enabled) {
+            /* Only read in audio if the streamer doesn't have enough already (if it does not have enough samples to output) */
+            if (SDL_StreamLength(&device->streamer) < stream_len) {
+                /* Set up istream */
+                if (device->convert.needed) {
+                    if (device->convert.buf) {
+                        istream = device->convert.buf;
+                    } else {
+                        continue;
+                    }
+                } else {
+                    istream = current_audio.impl.GetDeviceBuf(device);
+                    if (istream == NULL) {
+                        istream = device->fake_stream;
+                    }
+                }
 
-        /* Fill the current buffer with sound */
-        if (device->convert.needed) {
-            if (device->convert.buf) {
-                stream = device->convert.buf;
-            } else {
-                continue;
+                /* Read from the callback into the _input_ stream */
+                if (!device->paused) {
+                    SDL_mutexP(device->mixer_lock);
+                    (*fill) (udata, istream, istream_len);
+                    SDL_mutexV(device->mixer_lock);
+                }
+
+                /* Convert the audio if necessary and write to the streamer */
+                if (device->convert.needed) {
+                    SDL_ConvertAudio(&device->convert);
+                    if (istream == NULL) {
+                        istream = device->fake_stream;
+                    }
+                    /*SDL_memcpy(istream, device->convert.buf, device->convert.len_cvt); */
+                    SDL_StreamWrite(&device->streamer, device->convert.buf,
+                                    device->convert.len_cvt);
+                } else {
+                    SDL_StreamWrite(&device->streamer, istream, istream_len);
+                }
             }
-        } else {
-            stream = current_audio.impl.GetDeviceBuf(device);
-            if (stream == NULL) {
-                stream = device->fake_stream;
+
+            /* Only output audio if the streamer has enough to output */
+            if (SDL_StreamLength(&device->streamer) >= stream_len) {
+                /* Set up the output stream */
+                if (device->convert.needed) {
+                    if (device->convert.buf) {
+                        stream = device->convert.buf;
+                    } else {
+                        continue;
+                    }
+                } else {
+                    stream = current_audio.impl.GetDeviceBuf(device);
+                    if (stream == NULL) {
+                        stream = device->fake_stream;
+                    }
+                }
+
+                /* Now read from the streamer */
+                SDL_StreamRead(&device->streamer, stream, stream_len);
+
+                /* Ready current buffer for play and change current buffer */
+                if (stream != device->fake_stream) {
+                    current_audio.impl.PlayDevice(device);
+                }
+
+                /* Wait for an audio buffer to become available */
+                if (stream == device->fake_stream) {
+                    SDL_Delay((device->spec.samples * 1000) /
+                              device->spec.freq);
+                } else {
+                    current_audio.impl.WaitDevice(device);
+                }
             }
-        }
 
-        if (!device->paused) {
-            SDL_mutexP(device->mixer_lock);
-            (*fill) (udata, stream, stream_len);
-            SDL_mutexV(device->mixer_lock);
         }
+    } else {
+        /* Otherwise, do not use the streamer. This is the old code. */
 
-        /* Convert the audio if necessary */
-        if (device->convert.needed) {
-            SDL_ConvertAudio(&device->convert);
-            stream = current_audio.impl.GetDeviceBuf(device);
-            if (stream == NULL) {
-                stream = device->fake_stream;
+        /* Loop, filling the audio buffers */
+        while (device->enabled) {
+
+            /* Fill the current buffer with sound */
+            if (device->convert.needed) {
+                if (device->convert.buf) {
+                    stream = device->convert.buf;
+                } else {
+                    continue;
+                }
+            } else {
+                stream = current_audio.impl.GetDeviceBuf(device);
+                if (stream == NULL) {
+                    stream = device->fake_stream;
+                }
             }
-            SDL_memcpy(stream, device->convert.buf, device->convert.len_cvt);
-        }
 
-        /* Ready current buffer for play and change current buffer */
-        if (stream != device->fake_stream) {
-            current_audio.impl.PlayDevice(device);
-        }
+            if (!device->paused) {
+                SDL_mutexP(device->mixer_lock);
+                (*fill) (udata, stream, stream_len);
+                SDL_mutexV(device->mixer_lock);
+            }
 
-        /* Wait for an audio buffer to become available */
-        if (stream == device->fake_stream) {
-            SDL_Delay((device->spec.samples * 1000) / device->spec.freq);
-        } else {
-            current_audio.impl.WaitDevice(device);
+            /* Convert the audio if necessary */
+            if (device->convert.needed) {
+                SDL_ConvertAudio(&device->convert);
+                stream = current_audio.impl.GetDeviceBuf(device);
+                if (stream == NULL) {
+                    stream = device->fake_stream;
+                }
+                SDL_memcpy(stream, device->convert.buf,
+                           device->convert.len_cvt);
+            }
+
+            /* Ready current buffer for play and change current buffer */
+            if (stream != device->fake_stream) {
+                current_audio.impl.PlayDevice(device);
+            }
+
+            /* Wait for an audio buffer to become available */
+            if (stream == device->fake_stream) {
+                SDL_Delay((device->spec.samples * 1000) / device->spec.freq);
+            } else {
+                current_audio.impl.WaitDevice(device);
+            }
         }
     }
 
     /* Wait for the audio to drain.. */
     current_audio.impl.WaitDone(device);
 
+    /* If necessary, deinit the streamer */
+    if (device->use_streamer == 1)
+        SDL_StreamDeinit(&device->streamer);
+
     return (0);
 }
 
diff -r 336b604ec15b -r f8f68f47285a src/audio/SDL_audiocvt.c
--- a/src/audio/SDL_audiocvt.c	Mon Aug 25 10:14:21 2008 +0000
+++ b/src/audio/SDL_audiocvt.c	Mon Aug 25 15:08:59 2008 +0000
@@ -20,12 +20,45 @@
     slouken@libsdl.org
 */
 #include "SDL_config.h"
+#include <math.h>
 
 /* Functions for audio drivers to perform runtime conversion of audio format */
 
 #include "SDL_audio.h"
 #include "SDL_audio_c.h"
 
+#define DEBUG_CONVERT
+
+/* These are fractional multiplication routines. That is, their inputs
+   are two numbers in the range [-1, 1) and the result falls in that
+   same range. The output is the same size as the inputs, i.e.
+   32-bit x 32-bit = 32-bit.
+ */
+
+/* We hope here that the right shift includes sign extension */
+#ifdef SDL_HAS_64BIT_Type
+#define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff)
+#else
+/* If we don't have the 64-bit type, do something more complicated. See http://www.8052.com/mul16.phtml or http://www.cs.uaf.edu/~cs301/notes/Chapter5/node5.html */
+#define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff)
+#endif
+#define SDL_FixMpy16(a, b) ((((Sint32)a * (Sint32)b) >> 14) & 0xffff)
+#define SDL_FixMpy8(a, b) ((((Sint16)a * (Sint16)b) >> 7) & 0xff)
+/* This macro just makes the floating point filtering code not have to be a special case. */
+#define SDL_FloatMpy(a, b) (a * b)
+
+/* These macros take floating point numbers in the range [-1.0f, 1.0f) and
+   represent them as fixed-point numbers in that same range. There's no
+   checking that the floating point argument is inside the appropriate range.
+ */
+
+#define SDL_Make_1_7(a) (Sint8)(a * 128.0f)
+#define SDL_Make_1_15(a) (Sint16)(a * 32768.0f)
+#define SDL_Make_1_31(a) (Sint32)(a * 2147483648.0f)
+#define SDL_Make_2_6(a) (Sint8)(a * 64.0f)
+#define SDL_Make_2_14(a) (Sint16)(a * 16384.0f)
+#define SDL_Make_2_30(a) (Sint32)(a * 1073741824.0f)
+
 /* Effectively mix right and left channels into a single channel */
 static void SDLCALL
 SDL_ConvertMono(SDL_AudioCVT * cvt, SDL_AudioFormat format)
@@ -1309,6 +1342,468 @@
     return 0;                   /* no conversion necessary. */
 }
 
+/* Generate the necessary IIR lowpass coefficients for resampling.
+   Assume that the SDL_AudioCVT struct is already set up with
+   the correct values for len_mult and len_div, and use the
+   type of dst_format. Also assume the buffer is allocated.
+   Note the buffer needs to be 6 units long.
+   For now, use RBJ's cookbook coefficients. It might be more
+   optimal to create a Butterworth filter, but this is more difficult.
+*/
+int
+SDL_BuildIIRLowpass(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+    float fc;                   /* cutoff frequency */
+    float coeff[6];             /* floating point iir coefficients b0, b1, b2, a0, a1, a2 */
+    float scale;
+    float w0, alpha, cosw0;
+    int i;
+
+    /* The higher Q is, the higher CUTOFF can be. Need to find a good balance to avoid aliasing */
+    static const float Q = 5.0f;
+    static const float CUTOFF = 0.4f;
+
+    fc = (cvt->len_mult >
+          cvt->len_div) ? CUTOFF / (float) cvt->len_mult : CUTOFF /
+        (float) cvt->len_div;
+
+    w0 = 2.0f * M_PI * fc;
+    cosw0 = cosf(w0);
+    alpha = sin(w0) / (2.0f * Q);
+
+    /* Compute coefficients, normalizing by a0 */
+    scale = 1.0f / (1.0f + alpha);
+
+    coeff[0] = (1.0f - cosw0) / 2.0f * scale;
+    coeff[1] = (1.0f - cosw0) * scale;
+    coeff[2] = coeff[0];
+
+    coeff[3] = 1.0f;            /* a0 is normalized to 1 */
+    coeff[4] = -2.0f * cosw0 * scale;
+    coeff[5] = (1.0f - alpha) * scale;
+
+    /* Copy the coefficients to the struct. If necessary, convert coefficients to fixed point, using the range (-2.0, 2.0) */
+#define convert_fixed(type, fix) { \
+            type *cvt_coeff = (type *)cvt->coeff; \
+            for(i = 0; i < 6; ++i) { \
+                cvt_coeff[i] = fix(coeff[i]); \
+            } \
+        }
+
+    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
+        float *cvt_coeff = (float *) cvt->coeff;
+        for (i = 0; i < 6; ++i) {
+            cvt_coeff[i] = coeff[i];
+        }
+    } else {
+        switch (SDL_AUDIO_BITSIZE(format)) {
+        case 8:
+            convert_fixed(Uint8, SDL_Make_2_6);
+            break;
+        case 16:
+            convert_fixed(Uint16, SDL_Make_2_14);
+            break;
+        case 32:
+            convert_fixed(Uint32, SDL_Make_2_30);
+            break;
+        }
+    }
+
+#ifdef DEBUG_CONVERT
+#define debug_iir(type) { \
+            type *cvt_coeff = (type *)cvt->coeff; \
+            for(i = 0; i < 6; ++i) { \
+                printf("coeff[%u] = %f = 0x%x\n", i, coeff[i], cvt_coeff[i]); \
+            } \
+        }
+    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
+        float *cvt_coeff = (float *) cvt->coeff;
+        for (i = 0; i < 6; ++i) {
+            printf("coeff[%u] = %f = %f\n", i, coeff[i], cvt_coeff[i]);
+        }
+    } else {
+        switch (SDL_AUDIO_BITSIZE(format)) {
+        case 8:
+            debug_iir(Uint8);
+            break;
+        case 16:
+            debug_iir(Uint16);
+            break;
+        case 32:
+            debug_iir(Uint32);
+            break;
+        }
+    }
+#undef debug_iir
+#endif
+
+    /* Initialize the state buffer to all zeroes, and set initial position */
+    memset(cvt->state_buf, 0, 4 * SDL_AUDIO_BITSIZE(format) / 4);
+    cvt->state_pos = 0;
+#undef convert_fixed
+}
+
+/* Apply the lowpass IIR filter to the given SDL_AudioCVT struct */
+/* This was implemented because it would be much faster than the fir filter, 
+   but it doesn't seem to have a steep enough cutoff so we'd need several
+   cascaded biquads, which probably isn't a great idea. Therefore, this
+   function can probably be discarded.
+*/
+static void
+SDL_FilterIIR(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+    Uint32 i, n;
+
+    /* TODO: Check that n is calculated right */
+    n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format);
+
+    /* Note that the coefficients are 2_x and the input is 1_x. Do we need to shift left at the end here? The right shift temp = buf[n] >> 1 needs to depend on whether the type is signed or not for sign extension. */
+    /* cvt->state_pos = 1: state[0] = x_n-1, state[1] = x_n-2, state[2] = y_n-1, state[3] - y_n-2 */
+#define iir_fix(type, mult) {\
+            type *coeff = (type *)cvt->coeff; \
+            type *state = (type *)cvt->state_buf; \
+            type *buf = (type *)cvt->buf; \
+            type temp; \
+            for(i = 0; i < n; ++i) { \
+                    temp = buf[i] >> 1; \
+                    if(cvt->state_pos) { \
+                        buf[i] = mult(coeff[0], temp) + mult(coeff[1], state[0]) + mult(coeff[2], state[1]) - mult(coeff[4], state[2]) - mult(coeff[5], state[3]); \
+                        state[1] = temp; \
+                        state[3] = buf[i]; \
+                        cvt->state_pos = 0; \
+                    } else { \
+                        buf[i] = mult(coeff[0], temp) + mult(coeff[1], state[1]) + mult(coeff[2], state[0]) - mult(coeff[4], state[3]) - mult(coeff[5], state[2]); \
+                        state[0] = temp; \
+                        state[2] = buf[i]; \
+                        cvt->state_pos = 1; \
+                    } \
+                } \
+        }
+/* Need to test to see if the previous method or this one is faster */
+/*#define iir_fix(type, mult) {\
+            type *coeff = (type *)cvt->coeff; \
+            type *state = (type *)cvt->state_buf; \
+            type *buf = (type *)cvt->buf; \
+            type temp; \
+            for(i = 0; i < n; ++i) { \
+                    temp = buf[i] >> 1; \
+                    buf[i] = mult(coeff[0], temp) + mult(coeff[1], state[0]) + mult(coeff[2], state[1]) - mult(coeff[4], state[2]) - mult(coeff[5], state[3]); \
+                    state[1] = state[0]; \
+                    state[0] = temp; \
+                    state[3] = state[2]; \
+                    state[2] = buf[i]; \
+                } \
+        }*/
+
+    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
+        float *coeff = (float *) cvt->coeff;
+        float *state = (float *) cvt->state_buf;
+        float *buf = (float *) cvt->buf;
+        float temp;
+
+        for (i = 0; i < n; ++i) {
+            /* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a[2] * y[n-2] */
+            temp = buf[i];
+            if (cvt->state_pos) {
+                buf[i] =
+                    coeff[0] * buf[n] + coeff[1] * state[0] +
+                    coeff[2] * state[1] - coeff[4] * state[2] -
+                    coeff[5] * state[3];
+                state[1] = temp;
+                state[3] = buf[i];
+                cvt->state_pos = 0;
+            } else {
+                buf[i] =
+                    coeff[0] * buf[n] + coeff[1] * state[1] +
+                    coeff[2] * state[0] - coeff[4] * state[3] -
+                    coeff[5] * state[2];
+                state[0] = temp;
+                state[2] = buf[i];
+                cvt->state_pos = 1;
+            }
+        }
+    } else {
+        /* Treat everything as signed! */
+        switch (SDL_AUDIO_BITSIZE(format)) {
+        case 8:
+            iir_fix(Sint8, SDL_FixMpy8);
+            break;
+        case 16:
+            iir_fix(Sint16, SDL_FixMpy16);
+            break;
+        case 32:
+            iir_fix(Sint32, SDL_FixMpy32);
+            break;
+        }
+    }
+#undef iir_fix
+}
+
+/* Apply the windowed sinc FIR filter to the given SDL_AudioCVT struct.
+*/
+static void
+SDL_FilterFIR(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+    int n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format);
+    int m = cvt->len_sinc;
+    int i, j;
+
+    /* 
+       Note: We can make a big optimization here by taking advantage
+       of the fact that the signal is zero stuffed, so we can do
+       significantly fewer multiplications and additions. However, this
+       depends on the zero stuffing ratio, so it may not pay off. This would
+       basically be a polyphase filter.
+     */
+    /* One other way to do this fast is to look at the fir filter from a different angle:
+       After we zero stuff, we have input of all zeroes, except for every len_mult
+       sample. If we choose a sinc length equal to len_mult, then the fir filter becomes
+       much more simple: we're just taking a windowed sinc, shifting it to start at each
+       len_mult sample, and scaling it by the value of that sample. If we do this, then
+       we don't even need to worry about the sample histories, and the inner loop here is
+       unnecessary. This probably sacrifices some quality but could really speed things up as well.
+     */
+    /* We only calculate the values of samples which are 0 (mod len_div) because
+       those are the only ones used. All the other ones are discarded in the
+       third step of resampling. This is a huge speedup. As a warning, though,
+       if for some reason this is used elsewhere where there are no samples discarded,
+       the output will not be corrrect if len_div is not 1. To make this filter a
+       generic FIR filter, simply remove the if statement "if(i % cvt->len_div == 0)"
+       around the inner loop so that every sample is processed.
+     */
+    /* This is basically just a FIR filter. i.e. for input x_n and m coefficients,
+       y_n = x_n*sinc_0 + x_(n-1)*sinc_1 +  x_(n-2)*sinc_2 + ... + x_(n-m+1)*sinc_(m-1)
+     */
+#define filter_sinc(type, mult) { \
+            type *sinc = (type *)cvt->coeff; \
+            type *state = (type *)cvt->state_buf; \
+            type *buf = (type *)cvt->buf; \
+            for(i = 0; i < n; ++i) { \
+                state[cvt->state_pos] = buf[i]; \
+                buf[i] = 0; \
+                if( i % cvt->len_div == 0 ) { \
+                    for(j = 0; j < m;  ++j) { \
+                        buf[i] += mult(sinc[j], state[(cvt->state_pos + j) % m]); \
+                    } \
+                }\
+                cvt->state_pos = (cvt->state_pos + 1) % m; \
+            } \
+        }
+
+    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
+        filter_sinc(float, SDL_FloatMpy);
+    } else {
+        switch (SDL_AUDIO_BITSIZE(format)) {
+        case 8:
+            filter_sinc(Sint8, SDL_FixMpy8);
+            break;
+        case 16:
+            filter_sinc(Sint16, SDL_FixMpy16);
+            break;
+        case 32:
+            filter_sinc(Sint32, SDL_FixMpy32);
+            break;
+        }
+    }
+
+#undef filter_sinc
+
+}
+
+/* Generate the necessary windowed sinc filter for resampling.
+   Assume that the SDL_AudioCVT struct is already set up with
+   the correct values for len_mult and len_div, and use the
+   type of dst_format. Also assume the buffer is allocated.
+   Note the buffer needs to be m+1 units long.
+*/
+int
+SDL_BuildWindowedSinc(SDL_AudioCVT * cvt, SDL_AudioFormat format,
+                      unsigned int m)
+{
+    float fScale;               /* scale factor for fixed point */
+    float *fSinc;               /* floating point sinc buffer, to be converted to fixed point */
+    float fc;                   /* cutoff frequency */
+    float two_pi_fc, two_pi_over_m, four_pi_over_m, m_over_two;
+    float norm_sum, norm_fact;
+    unsigned int i;
+
+    /* Check that the buffer is allocated */
+    if (cvt->coeff == NULL) {
+        return -1;
+    }
+
+    /* Set the length */
+    cvt->len_sinc = m + 1;
+
+    /* Allocate the floating point windowed sinc. */
+    fSinc = (float *) malloc((m + 1) * sizeof(float));
+    if (fSinc == NULL) {
+        return -1;
+    }
+
+    /* Set up the filter parameters */
+    fc = (cvt->len_mult >
+          cvt->len_div) ? 0.5f / (float) cvt->len_mult : 0.5f /
+        (float) cvt->len_div;
+#ifdef DEBUG_CONVERT
+    printf("Lowpass cutoff frequency = %f\n", fc);
+#endif
+    two_pi_fc = 2.0f * M_PI * fc;
+    two_pi_over_m = 2.0f * M_PI / (float) m;
+    four_pi_over_m = 2.0f * two_pi_over_m;
+    m_over_two = (float) m / 2.0f;
+    norm_sum = 0.0f;
+
+    for (i = 0; i <= m; ++i) {
+        if (i == m / 2) {
+            fSinc[i] = two_pi_fc;
+        } else {
+            fSinc[i] =
+                sinf(two_pi_fc * ((float) i - m_over_two)) / ((float) i -
+                                                              m_over_two);
+            /* Apply blackman window */
+            fSinc[i] *=
+                0.42f - 0.5f * cosf(two_pi_over_m * (float) i) +
+                0.08f * cosf(four_pi_over_m * (float) i);
+        }
+        norm_sum += fabs(fSinc[i]);
+    }
+
+    norm_fact = 1.0f / norm_sum;
+
+#define convert_fixed(type, fix) { \
+        type *dst = (type *)cvt->coeff; \
+        for( i = 0; i <= m; ++i ) { \
+            dst[i] = fix(fSinc[i] * norm_fact); \
+        } \
+    }
+
+    /* If we're using floating point, we only need to normalize */
+    if (SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) {
+        float *fDest = (float *) cvt->coeff;
+        for (i = 0; i <= m; ++i) {
+            fDest[i] = fSinc[i] * norm_fact;
+        }
+    } else {
+        switch (SDL_AUDIO_BITSIZE(format)) {
+        case 8:
+            convert_fixed(Uint8, SDL_Make_1_7);
+            break;
+        case 16:
+            convert_fixed(Uint16, SDL_Make_1_15);
+            break;
+        case 32:
+            convert_fixed(Uint32, SDL_Make_1_31);
+            break;
+        }
+    }
+
+    /* Initialize the state buffer to all zeroes, and set initial position */
+    memset(cvt->state_buf, 0, cvt->len_sinc * SDL_AUDIO_BITSIZE(format) / 4);
+    cvt->state_pos = 0;
+
+    /* Clean up */
+#undef convert_fixed
+    free(fSinc);
+}
+
+/* This is used to reduce the resampling ratio */
+inline int
+SDL_GCD(int a, int b)
+{
+    int temp;
+    while (b != 0) {
+        temp = a % b;
+        a = b;
+        b = temp;
+    }
+    return a;
+}
+
+/* Perform proper resampling. This is pretty slow but it's the best-sounding method. */
+static void SDLCALL
+SDL_Resample(SDL_AudioCVT * cvt, SDL_AudioFormat format)
+{
+    int i, j;
+
+#ifdef DEBUG_CONVERT
+    printf("Converting audio rate via proper resampling (mono)\n");
+#endif
+
+#define zerostuff_mono(type) { \
+        const type *src = (const type *) (cvt->buf + cvt->len_cvt); \
+        type *dst = (type *) (cvt->buf + (cvt->len_cvt * cvt->len_mult)); \
+        for (i = cvt->len_cvt / sizeof (type); i; --i) { \
+            src--; \
+            dst[-1] = src[0]; \
+            for( j = -cvt->len_mult; j < -1; ++j ) { \
+                dst[j] = 0; \
+            } \
+            dst -= cvt->len_mult; \
+        } \
+    }
+
+#define discard_mono(type) { \
+        const type *src = (const type *) (cvt->buf); \
+        type *dst = (type *) (cvt->buf); \
+        for (i = 0; i < (cvt->len_cvt / sizeof(type)) / cvt->len_div; ++i) { \
+            dst[0] = src[0]; \
+            src += cvt->len_div; \
+            ++dst; \
+        } \
+    }
+
+    /* Step 1: Zero stuff the conversion buffer. This upsamples by a factor of len_mult,
+       creating aliasing at frequencies above the original nyquist frequency.
+     */
+#ifdef DEBUG_CONVERT
+    printf("Zero-stuffing by a factor of %u\n", cvt->len_mult);
+#endif
+    switch (SDL_AUDIO_BITSIZE(format)) {
+    case 8:
+        zerostuff_mono(Uint8);
+        break;
+    case 16:
+        zerostuff_mono(Uint16);
+        break;
+    case 32:
+        zerostuff_mono(Uint32);
+        break;
+    }
+
+    cvt->len_cvt *= cvt->len_mult;
+
+    /* Step 2: Use a windowed sinc FIR filter (lowpass filter) to remove the alias
+       frequencies. This is the slow part.
+     */
+    SDL_FilterFIR(cvt, format);
+
+    /* Step 3: Now downsample by discarding samples. */
+
+#ifdef DEBUG_CONVERT
+    printf("Discarding samples by a factor of %u\n", cvt->len_div);
+#endif
+    switch (SDL_AUDIO_BITSIZE(format)) {
+    case 8:
+        discard_mono(Uint8);
+        break;
+    case 16:
+        discard_mono(Uint16);
+        break;
+    case 32:
+        discard_mono(Uint32);
+        break;
+    }
+
+#undef zerostuff_mono
+#undef discard_mono
+
+    cvt->len_cvt /= cvt->len_div;
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
 
 
 /* Creates a set of audio filters to convert from one format to another.
@@ -1399,6 +1894,17 @@
     }
 
     /* Do rate conversion */
+    if (src_rate != dst_rate) {
+        int rate_gcd;
+        rate_gcd = SDL_GCD(src_rate, dst_rate);
+        cvt->len_mult = dst_rate / rate_gcd;
+        cvt->len_div = src_rate / rate_gcd;
+        cvt->len_ratio = (double) cvt->len_mult / (double) cvt->len_div;
+        cvt->filters[cvt->filter_index++] = SDL_Resample;
+        SDL_BuildWindowedSinc(cvt, dst_fmt, 768);
+    }
+
+/*
     cvt->rate_incr = 0.0;
     if ((src_rate / 100) != (dst_rate / 100)) {
         Uint32 hi_rate, lo_rate;
@@ -1448,25 +1954,25 @@
             }
             len_mult = 2;
             len_ratio = 2.0;
-        }
-        /* If hi_rate = lo_rate*2^x then conversion is easy */
-        while (((lo_rate * 2) / 100) <= (hi_rate / 100)) {
-            cvt->filters[cvt->filter_index++] = rate_cvt;
-            cvt->len_mult *= len_mult;
-            lo_rate *= 2;
-            cvt->len_ratio *= len_ratio;
-        }
-        /* We may need a slow conversion here to finish up */
-        if ((lo_rate / 100) != (hi_rate / 100)) {
-#if 1
-            /* The problem with this is that if the input buffer is
-               say 1K, and the conversion rate is say 1.1, then the
-               output buffer is 1.1K, which may not be an acceptable
-               buffer size for the audio driver (not a power of 2)
-             */
-            /* For now, punt and hope the rate distortion isn't great.
-             */
-#else
+        }*/
+    /* If hi_rate = lo_rate*2^x then conversion is easy */
+    /*   while (((lo_rate * 2) / 100) <= (hi_rate / 100)) {
+       cvt->filters[cvt->filter_index++] = rate_cvt;
+       cvt->len_mult *= len_mult;
+       lo_rate *= 2;
+       cvt->len_ratio *= len_ratio;
+       } */
+    /* We may need a slow conversion here to finish up */
+    /*    if ((lo_rate / 100) != (hi_rate / 100)) {
+       #if 1 */
+    /* The problem with this is that if the input buffer is
+       say 1K, and the conversion rate is say 1.1, then the
+       output buffer is 1.1K, which may not be an acceptable
+       buffer size for the audio driver (not a power of 2)
+     */
+    /* For now, punt and hope the rate distortion isn't great.
+     */
+/*#else
             if (src_rate < dst_rate) {
                 cvt->rate_incr = (double) lo_rate / hi_rate;
                 cvt->len_mult *= 2;
@@ -1478,7 +1984,7 @@
             cvt->filters[cvt->filter_index++] = SDL_RateSLOW;
 #endif
         }
-    }
+    }*/
 
     /* Set up the filter information */
     if (cvt->filter_index != 0) {
@@ -1492,4 +1998,15 @@
     return (cvt->needed);
 }
 
+#undef SDL_FixMpy8
+#undef SDL_FixMpy16
+#undef SDL_FixMpy32
+#undef SDL_FloatMpy
+#undef SDL_Make_1_7
+#undef SDL_Make_1_15
+#undef SDL_Make_1_31
+#undef SDL_Make_2_6
+#undef SDL_Make_2_14
+#undef SDL_Make_2_30
+
 /* vi: set ts=4 sw=4 expandtab: */
diff -r 336b604ec15b -r f8f68f47285a src/audio/SDL_sysaudio.h
--- a/src/audio/SDL_sysaudio.h	Mon Aug 25 10:14:21 2008 +0000
+++ b/src/audio/SDL_sysaudio.h	Mon Aug 25 15:08:59 2008 +0000
@@ -69,6 +69,15 @@
 } SDL_AudioDriver;
 
 
+/* Streamer */
+typedef struct
+{
+    Uint8 *buffer;
+    int max_len;                /* the maximum length in bytes */
+    int read_pos, write_pos;    /* the position of the write and read heads in bytes */
+} SDL_AudioStreamer;
+
+
 /* Define the SDL audio driver structure */
 struct SDL_AudioDevice
 {
@@ -81,6 +90,10 @@
     /* An audio conversion block for audio format emulation */
     SDL_AudioCVT convert;
 
+    /* The streamer, if sample rate conversion necessitates it */
+    int use_streamer;
+    SDL_AudioStreamer streamer;
+
     /* Current state flags */
     int iscapture;
     int enabled;