Mercurial > sdl-ios-xcode
comparison src/audio/SDL_audiocvt.c @ 2663:0caed045d01b gsoc2008_audio_resampling
General cleanup and fixed a buffer overrun bug. It may be necessary to normalize filter gain differently or something.
author | Aaron Wishnick <schnarf@gmail.com> |
---|---|
date | Tue, 12 Aug 2008 00:24:42 +0000 |
parents | 5470680ca587 |
children |
comparison
equal
deleted
inserted
replaced
2662:5470680ca587 | 2663:0caed045d01b |
---|---|
27 #include "SDL_audio.h" | 27 #include "SDL_audio.h" |
28 #include "SDL_audio_c.h" | 28 #include "SDL_audio_c.h" |
29 | 29 |
30 #define DEBUG_CONVERT | 30 #define DEBUG_CONVERT |
31 | 31 |
32 /* Perform fractional multiplication of two 32-bit integers to produce a 32-bit result. Assumes sizeof(long) = 4 */ | 32 /* These are fractional multiplication routines. That is, their inputs |
33 /*#define SDL_FixMpy32(v1, v2, dest) { \ | 33 are two numbers in the range [-1, 1) and the result falls in that |
34 long a, b, c, d; \ | 34 same range. The output is the same size as the inputs, i.e. |
35 long x, y; \ | 35 32-bit x 32-bit = 32-bit. |
36 a = (v1 >> 16) & 0xffff; \ | 36 */ |
37 b = v1 & 0xffff; \ | |
38 c = (v2 >> 16); \ | |
39 d = v2 & 0xffff; \ | |
40 x = a * d + c * b; \ | |
41 y = (((b*d) >> 16) & 0xffff) + x; \ | |
42 dest = ((y >> 16) & 0xffff) + (a * c); \ | |
43 }*/ | |
44 /* TODO: Check if 64-bit type exists. If not, see http://www.8052.com/mul16.phtml or http://www.cs.uaf.edu/~cs301/notes/Chapter5/node5.html */ | |
45 | 37 |
46 /* We hope here that the right shift includes sign extension */ | 38 /* We hope here that the right shift includes sign extension */ |
47 #ifdef SDL_HAS_64BIT_Type | 39 #ifdef SDL_HAS_64BIT_Type |
48 #define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff) | 40 #define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff) |
49 #else | 41 #else |
50 /* need to do something more complicated here */ | 42 /* If we don't have the 64-bit type, do something more complicated. See http://www.8052.com/mul16.phtml or http://www.cs.uaf.edu/~cs301/notes/Chapter5/node5.html */ |
51 #define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff) | 43 #define SDL_FixMpy32(a, b) ((((Sint64)a * (Sint64)b) >> 31) & 0xffffffff) |
52 #endif | 44 #endif |
53 /* Confirmed that SDL_FixMpy16 works, need to check 8 and 32 */ | |
54 #define SDL_FixMpy16(a, b) ((((Sint32)a * (Sint32)b) >> 14) & 0xffff) | 45 #define SDL_FixMpy16(a, b) ((((Sint32)a * (Sint32)b) >> 14) & 0xffff) |
55 #define SDL_FixMpy8(a, b) ((((Sint16)a * (Sint16)b) >> 7) & 0xff) | 46 #define SDL_FixMpy8(a, b) ((((Sint16)a * (Sint16)b) >> 7) & 0xff) |
56 /* Everything is signed! */ | 47 /* This macro just makes the floating point filtering code not have to be a special case. */ |
57 #define SDL_Make_1_7(a) (Sint8)(a * 127.0f) | 48 #define SDL_FloatMpy(a, b) (a * b) |
58 #define SDL_Make_1_15(a) (Sint16)(a * 32767.0f) | 49 |
59 #define SDL_Make_1_31(a) (Sint32)(a * 2147483647.0f) | 50 /* These macros take floating point numbers in the range [-1.0f, 1.0f) and |
60 #define SDL_Make_2_6(a) (Sint8)(a * 63.0f) | 51 represent them as fixed-point numbers in that same range. There's no |
61 #define SDL_Make_2_14(a) (Sint16)(a * 16383.0f) | 52 checking that the floating point argument is inside the appropriate range. |
62 #define SDL_Make_2_30(a) (Sint32)(a * 1073741823.0f) | 53 */ |
54 | |
55 #define SDL_Make_1_7(a) (Sint8)(a * 128.0f) | |
56 #define SDL_Make_1_15(a) (Sint16)(a * 32768.0f) | |
57 #define SDL_Make_1_31(a) (Sint32)(a * 2147483648.0f) | |
58 #define SDL_Make_2_6(a) (Sint8)(a * 64.0f) | |
59 #define SDL_Make_2_14(a) (Sint16)(a * 16384.0f) | |
60 #define SDL_Make_2_30(a) (Sint32)(a * 1073741824.0f) | |
63 | 61 |
64 /* Effectively mix right and left channels into a single channel */ | 62 /* Effectively mix right and left channels into a single channel */ |
65 static void SDLCALL | 63 static void SDLCALL |
66 SDL_ConvertMono(SDL_AudioCVT * cvt, SDL_AudioFormat format) | 64 SDL_ConvertMono(SDL_AudioCVT * cvt, SDL_AudioFormat format) |
67 { | 65 { |
1440 cvt->state_pos = 0; | 1438 cvt->state_pos = 0; |
1441 #undef convert_fixed | 1439 #undef convert_fixed |
1442 } | 1440 } |
1443 | 1441 |
1444 /* Apply the lowpass IIR filter to the given SDL_AudioCVT struct */ | 1442 /* Apply the lowpass IIR filter to the given SDL_AudioCVT struct */ |
1443 /* This was implemented because it would be much faster than the fir filter, | |
1444 but it doesn't seem to have a steep enough cutoff so we'd need several | |
1445 cascaded biquads, which probably isn't a great idea. Therefore, this | |
1446 function can probably be discarded. | |
1447 */ | |
1445 static void SDL_FilterIIR(SDL_AudioCVT * cvt, SDL_AudioFormat format) { | 1448 static void SDL_FilterIIR(SDL_AudioCVT * cvt, SDL_AudioFormat format) { |
1446 Uint32 i, n; | 1449 Uint32 i, n; |
1447 | 1450 |
1448 /* TODO: Check that n is calculated right */ | 1451 /* TODO: Check that n is calculated right */ |
1449 n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format); | 1452 n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format); |
1522 } | 1525 } |
1523 } | 1526 } |
1524 #undef iir_fix | 1527 #undef iir_fix |
1525 } | 1528 } |
1526 | 1529 |
1527 /* Apply the windowed sinc FIR filter to the given SDL_AudioCVT struct */ | 1530 /* Apply the windowed sinc FIR filter to the given SDL_AudioCVT struct. |
1531 */ | |
1528 static void SDL_FilterFIR(SDL_AudioCVT * cvt, SDL_AudioFormat format) { | 1532 static void SDL_FilterFIR(SDL_AudioCVT * cvt, SDL_AudioFormat format) { |
1529 int n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format); | 1533 int n = 8 * cvt->len_cvt / SDL_AUDIO_BITSIZE(format); |
1530 int m = cvt->len_sinc; | 1534 int m = cvt->len_sinc; |
1531 int i, j; | 1535 int i, j; |
1532 | 1536 |
1533 /* | 1537 /* |
1534 Note: We can make a big optimization here by taking advantage | 1538 Note: We can make a big optimization here by taking advantage |
1535 of the fact that the signal is zero stuffed, so we can do | 1539 of the fact that the signal is zero stuffed, so we can do |
1536 significantly fewer multiplications and additions. However, this | 1540 significantly fewer multiplications and additions. However, this |
1537 depends on the zero stuffing ratio, so it may not pay off. | 1541 depends on the zero stuffing ratio, so it may not pay off. This would |
1542 basically be a polyphase filter. | |
1538 */ | 1543 */ |
1539 /* We only calculate the values of samples which are 0 (mod len_div) because those are the only ones used */ | 1544 /* One other way to do this fast is to look at the fir filter from a different angle: |
1545 After we zero stuff, we have input of all zeroes, except for every len_mult | |
1546 sample. If we choose a sinc length equal to len_mult, then the fir filter becomes | |
1547 much more simple: we're just taking a windowed sinc, shifting it to start at each | |
1548 len_mult sample, and scaling it by the value of that sample. If we do this, then | |
1549 we don't even need to worry about the sample histories, and the inner loop here is | |
1550 unnecessary. This probably sacrifices some quality but could really speed things up as well. | |
1551 */ | |
1552 /* We only calculate the values of samples which are 0 (mod len_div) because | |
1553 those are the only ones used. All the other ones are discarded in the | |
1554 third step of resampling. This is a huge speedup. As a warning, though, | |
1555 if for some reason this is used elsewhere where there are no samples discarded, | |
1556 the output will not be corrrect if len_div is not 1. To make this filter a | |
1557 generic FIR filter, simply remove the if statement "if(i % cvt->len_div == 0)" | |
1558 around the inner loop so that every sample is processed. | |
1559 */ | |
1560 /* This is basically just a FIR filter. i.e. for input x_n and m coefficients, | |
1561 y_n = x_n*sinc_0 + x_(n-1)*sinc_1 + x_(n-2)*sinc_2 + ... + x_(n-m+1)*sinc_(m-1) | |
1562 */ | |
1540 #define filter_sinc(type, mult) { \ | 1563 #define filter_sinc(type, mult) { \ |
1541 type *sinc = (type *)cvt->coeff; \ | 1564 type *sinc = (type *)cvt->coeff; \ |
1542 type *state = (type *)cvt->state_buf; \ | 1565 type *state = (type *)cvt->state_buf; \ |
1543 type *buf = (type *)cvt->buf; \ | 1566 type *buf = (type *)cvt->buf; \ |
1544 for(i = 0; i < n; ++i) { \ | 1567 for(i = 0; i < n; ++i) { \ |
1551 }\ | 1574 }\ |
1552 cvt->state_pos = (cvt->state_pos + 1) % m; \ | 1575 cvt->state_pos = (cvt->state_pos + 1) % m; \ |
1553 } \ | 1576 } \ |
1554 } | 1577 } |
1555 | 1578 |
1556 /* If it's floating point, do it normally, otherwise used fixed-point code */ | |
1557 if(SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) { | 1579 if(SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) { |
1558 float *sinc = (float *)cvt->coeff; | 1580 filter_sinc(float, SDL_FloatMpy); |
1559 float *state = (float *)cvt->state_buf; | |
1560 float *buf = (float *)cvt->buf; | |
1561 | |
1562 for(i = 0; i < n; ++i) { | |
1563 state[cvt->state_pos] = buf[i]; | |
1564 buf[i] = 0.0f; | |
1565 for(j = 0; j < m; ++j) { | |
1566 buf[i] += sinc[j] * state[(cvt->state_pos + j) % m]; | |
1567 } | |
1568 cvt->state_pos = (cvt->state_pos + 1) % m; | |
1569 } | |
1570 } else { | 1581 } else { |
1571 switch (SDL_AUDIO_BITSIZE(format)) { | 1582 switch (SDL_AUDIO_BITSIZE(format)) { |
1572 case 8: | 1583 case 8: |
1573 filter_sinc(Sint8, SDL_FixMpy8); | 1584 filter_sinc(Sint8, SDL_FixMpy8); |
1574 break; | 1585 break; |
1607 | 1618 |
1608 /* Set the length */ | 1619 /* Set the length */ |
1609 cvt->len_sinc = m + 1; | 1620 cvt->len_sinc = m + 1; |
1610 | 1621 |
1611 /* Allocate the floating point windowed sinc. */ | 1622 /* Allocate the floating point windowed sinc. */ |
1612 fSinc = (float *)malloc(m * sizeof(float)); | 1623 fSinc = (float *)malloc((m + 1) * sizeof(float)); |
1613 if( fSinc == NULL ) { | 1624 if( fSinc == NULL ) { |
1614 return -1; | 1625 return -1; |
1615 } | 1626 } |
1616 | 1627 |
1617 /* Set up the filter parameters */ | 1628 /* Set up the filter parameters */ |
1633 /* Apply blackman window */ | 1644 /* Apply blackman window */ |
1634 fSinc[i] *= 0.42f - 0.5f * cosf(two_pi_over_m * (float)i) + 0.08f * cosf(four_pi_over_m * (float)i); | 1645 fSinc[i] *= 0.42f - 0.5f * cosf(two_pi_over_m * (float)i) + 0.08f * cosf(four_pi_over_m * (float)i); |
1635 } | 1646 } |
1636 norm_sum += fabs(fSinc[i]); | 1647 norm_sum += fabs(fSinc[i]); |
1637 } | 1648 } |
1638 | 1649 |
1650 norm_fact = 1.0f / norm_sum; | |
1651 | |
1639 #define convert_fixed(type, fix) { \ | 1652 #define convert_fixed(type, fix) { \ |
1640 norm_fact = 0.5f / norm_sum; \ | |
1641 type *dst = (type *)cvt->coeff; \ | 1653 type *dst = (type *)cvt->coeff; \ |
1642 for( i = 0; i <= m; ++i ) { \ | 1654 for( i = 0; i <= m; ++i ) { \ |
1643 dst[i] = fix(fSinc[i] * norm_fact); \ | 1655 dst[i] = fix(fSinc[i] * norm_fact); \ |
1644 } \ | 1656 } \ |
1645 } | 1657 } |
1646 | 1658 |
1647 /* If we're using floating point, we only need to normalize */ | 1659 /* If we're using floating point, we only need to normalize */ |
1648 if(SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) { | 1660 if(SDL_AUDIO_ISFLOAT(format) && SDL_AUDIO_BITSIZE(format) == 32) { |
1649 float *fDest = (float *)cvt->coeff; | 1661 float *fDest = (float *)cvt->coeff; |
1650 norm_fact = 1.0f / norm_sum; | |
1651 for(i = 0; i <= m; ++i) { | 1662 for(i = 0; i <= m; ++i) { |
1652 fDest[i] = fSinc[i] * norm_fact; | 1663 fDest[i] = fSinc[i] * norm_fact; |
1653 } | 1664 } |
1654 } else { | 1665 } else { |
1655 switch (SDL_AUDIO_BITSIZE(format)) { | 1666 switch (SDL_AUDIO_BITSIZE(format)) { |
1683 b = temp; | 1694 b = temp; |
1684 } | 1695 } |
1685 return a; | 1696 return a; |
1686 } | 1697 } |
1687 | 1698 |
1688 /* Perform proper resampling */ | 1699 /* Perform proper resampling. This is pretty slow but it's the best-sounding method. */ |
1689 static void SDLCALL | 1700 static void SDLCALL |
1690 SDL_Resample(SDL_AudioCVT * cvt, SDL_AudioFormat format) | 1701 SDL_Resample(SDL_AudioCVT * cvt, SDL_AudioFormat format) |
1691 { | 1702 { |
1692 int i, j; | 1703 int i, j; |
1693 | 1704 |
1716 src += cvt->len_div; \ | 1727 src += cvt->len_div; \ |
1717 ++dst; \ | 1728 ++dst; \ |
1718 } \ | 1729 } \ |
1719 } | 1730 } |
1720 | 1731 |
1721 // Step 1: Zero stuff the conversion buffer | 1732 /* Step 1: Zero stuff the conversion buffer. This upsamples by a factor of len_mult, |
1733 creating aliasing at frequencies above the original nyquist frequency. | |
1734 */ | |
1722 #ifdef DEBUG_CONVERT | 1735 #ifdef DEBUG_CONVERT |
1723 printf("Zero-stuffing by a factor of %u\n", cvt->len_mult); | 1736 printf("Zero-stuffing by a factor of %u\n", cvt->len_mult); |
1724 #endif | 1737 #endif |
1725 switch (SDL_AUDIO_BITSIZE(format)) { | 1738 switch (SDL_AUDIO_BITSIZE(format)) { |
1726 case 8: | 1739 case 8: |
1734 break; | 1747 break; |
1735 } | 1748 } |
1736 | 1749 |
1737 cvt->len_cvt *= cvt->len_mult; | 1750 cvt->len_cvt *= cvt->len_mult; |
1738 | 1751 |
1739 // Step 2: Use either a windowed sinc FIR filter or IIR lowpass filter to remove all alias frequencies | 1752 /* Step 2: Use a windowed sinc FIR filter (lowpass filter) to remove the alias |
1740 QSDL_FilterFIR( cvt, format ); | 1753 frequencies. This is the slow part. |
1741 | 1754 */ |
1742 // OPTIMIZATION: we only need to calculate the non-discarded samples. This could be a big speedup! | 1755 SDL_FilterFIR( cvt, format ); |
1743 | 1756 |
1744 // Step 3: Discard unnecessary samples | 1757 /* Step 3: Now downsample by discarding samples. */ |
1745 | 1758 |
1746 #ifdef DEBUG_CONVERT | 1759 #ifdef DEBUG_CONVERT |
1747 printf("Discarding samples by a factor of %u\n", cvt->len_div); | 1760 printf("Discarding samples by a factor of %u\n", cvt->len_div); |
1748 #endif | 1761 #endif |
1749 switch (SDL_AUDIO_BITSIZE(format)) { | 1762 switch (SDL_AUDIO_BITSIZE(format)) { |
1853 } | 1866 } |
1854 if (src_channels != dst_channels) { | 1867 if (src_channels != dst_channels) { |
1855 /* Uh oh.. */ ; | 1868 /* Uh oh.. */ ; |
1856 } | 1869 } |
1857 } | 1870 } |
1858 | 1871 |
1859 /* Do rate conversion */ | 1872 /* Do rate conversion */ |
1860 int rate_gcd; | 1873 if( src_rate != dst_rate ) { |
1861 rate_gcd = SDL_GCD(src_rate, dst_rate); | 1874 int rate_gcd; |
1862 cvt->len_mult = dst_rate / rate_gcd; | 1875 rate_gcd = SDL_GCD(src_rate, dst_rate); |
1863 cvt->len_div = src_rate / rate_gcd; | 1876 cvt->len_mult = dst_rate / rate_gcd; |
1864 cvt->len_ratio = (double)cvt->len_mult / (double)cvt->len_div; | 1877 cvt->len_div = src_rate / rate_gcd; |
1865 cvt->filters[cvt->filter_index++] = SDL_Resample; | 1878 cvt->len_ratio = (double)cvt->len_mult / (double)cvt->len_div; |
1866 //SDL_BuildIIRLowpass(cvt, dst_fmt); | 1879 cvt->filters[cvt->filter_index++] = SDL_Resample; |
1867 SDL_BuildWindowedSinc(cvt, dst_fmt, 768); | 1880 SDL_BuildWindowedSinc(cvt, dst_fmt, 768); |
1868 | 1881 } |
1869 /*cvt->rate_incr = 0.0; | 1882 |
1883 /* | |
1884 cvt->rate_incr = 0.0; | |
1870 if ((src_rate / 100) != (dst_rate / 100)) { | 1885 if ((src_rate / 100) != (dst_rate / 100)) { |
1871 Uint32 hi_rate, lo_rate; | 1886 Uint32 hi_rate, lo_rate; |
1872 int len_mult; | 1887 int len_mult; |
1873 double len_ratio; | 1888 double len_ratio; |
1874 SDL_AudioFilter rate_cvt = NULL; | 1889 SDL_AudioFilter rate_cvt = NULL; |
1915 } | 1930 } |
1916 len_mult = 2; | 1931 len_mult = 2; |
1917 len_ratio = 2.0; | 1932 len_ratio = 2.0; |
1918 }*/ | 1933 }*/ |
1919 /* If hi_rate = lo_rate*2^x then conversion is easy */ | 1934 /* If hi_rate = lo_rate*2^x then conversion is easy */ |
1920 /*while (((lo_rate * 2) / 100) <= (hi_rate / 100)) { | 1935 /* while (((lo_rate * 2) / 100) <= (hi_rate / 100)) { |
1921 cvt->filters[cvt->filter_index++] = rate_cvt; | 1936 cvt->filters[cvt->filter_index++] = rate_cvt; |
1922 cvt->len_mult *= len_mult; | 1937 cvt->len_mult *= len_mult; |
1923 lo_rate *= 2; | 1938 lo_rate *= 2; |
1924 cvt->len_ratio *= len_ratio; | 1939 cvt->len_ratio *= len_ratio; |
1925 }*/ | 1940 }*/ |
1926 /* We may need a slow conversion here to finish up */ | 1941 /* We may need a slow conversion here to finish up */ |
1927 /*if ((lo_rate / 100) != (hi_rate / 100)) {*/ | 1942 /* if ((lo_rate / 100) != (hi_rate / 100)) { |
1928 #if 1 | 1943 #if 1*/ |
1929 /* The problem with this is that if the input buffer is | 1944 /* The problem with this is that if the input buffer is |
1930 say 1K, and the conversion rate is say 1.1, then the | 1945 say 1K, and the conversion rate is say 1.1, then the |
1931 output buffer is 1.1K, which may not be an acceptable | 1946 output buffer is 1.1K, which may not be an acceptable |
1932 buffer size for the audio driver (not a power of 2) | 1947 buffer size for the audio driver (not a power of 2) |
1933 */ | 1948 */ |
1934 /* For now, punt and hope the rate distortion isn't great. | 1949 /* For now, punt and hope the rate distortion isn't great. |
1935 */ | 1950 */ |
1936 #else | 1951 /*#else |
1937 if (src_rate < dst_rate) { | 1952 if (src_rate < dst_rate) { |
1938 cvt->rate_incr = (double) lo_rate / hi_rate; | 1953 cvt->rate_incr = (double) lo_rate / hi_rate; |
1939 cvt->len_mult *= 2; | 1954 cvt->len_mult *= 2; |
1940 cvt->len_ratio /= cvt->rate_incr; | 1955 cvt->len_ratio /= cvt->rate_incr; |
1941 } else { | 1956 } else { |
1942 cvt->rate_incr = (double) hi_rate / lo_rate; | 1957 cvt->rate_incr = (double) hi_rate / lo_rate; |
1943 cvt->len_ratio *= cvt->rate_incr; | 1958 cvt->len_ratio *= cvt->rate_incr; |
1944 } | 1959 } |
1945 cvt->filters[cvt->filter_index++] = SDL_RateSLOW; | 1960 cvt->filters[cvt->filter_index++] = SDL_RateSLOW; |
1946 #endif | 1961 #endif |
1947 /* } | 1962 } |
1948 }*/ | 1963 }*/ |
1949 | 1964 |
1950 /* Set up the filter information */ | 1965 /* Set up the filter information */ |
1951 if (cvt->filter_index != 0) { | 1966 if (cvt->filter_index != 0) { |
1952 cvt->needed = 1; | 1967 cvt->needed = 1; |
1960 } | 1975 } |
1961 | 1976 |
1962 #undef SDL_FixMpy8 | 1977 #undef SDL_FixMpy8 |
1963 #undef SDL_FixMpy16 | 1978 #undef SDL_FixMpy16 |
1964 #undef SDL_FixMpy32 | 1979 #undef SDL_FixMpy32 |
1980 #undef SDL_FloatMpy | |
1965 #undef SDL_Make_1_7 | 1981 #undef SDL_Make_1_7 |
1966 #undef SDL_Make_1_15 | 1982 #undef SDL_Make_1_15 |
1967 #undef SDL_Make_1_31 | 1983 #undef SDL_Make_1_31 |
1968 #undef SDL_Make_2_6 | 1984 #undef SDL_Make_2_6 |
1969 #undef SDL_Make_2_14 | 1985 #undef SDL_Make_2_14 |