view src/audio/SDL_mixer_MMX_VC.c @ 942:41a59de7f2ed

Here are patches for SDL12 and SDL_mixer for 4 or 6 channel surround sound on Linux using the Alsa driver. To use them, naturally you need a sound card that will do 4 or 6 channels and probably also a recent version of the Alsa drivers and library. Since the only SDL output driver that knows about surround sound is the Alsa driver, you���ll want to choose it, using: export SDL_AUDIODRIVER=alsa There are no syntactic changes to the programming API. No new library calls, no differences in arguments. There are two semantic changes: (1) For library calls with number of channels as an argument, formerly you could use only 1 or 2 for the number of channels. Now you can also use 4 or 6. (2) The two "left" and "right" arguments to Mix_SetPanning, for the case of 4 or 6 channels, no longer simply control the volumes of the left and right channels. Now the "left" argument is converted to an angle and Mix_SetPosition is called, and the "right" argu- ment is ignored. With two exceptions, so far as I know, the modified SDL12 and SDL_mixer work the same way as the original versions, when opened for 1 or 2 channel output. The two exceptions are bugs which I fixed. Well, the first, anyway, is a bug for sure. When rate conversions up or down by a factor of two are applied (in src/audio/SDL_audiocvt.c), streams with different numbers of channels (that is, mono and stereo) are treated the same way: either each sample is copied or every other sample is omitted. This is ok for mono, but for stereo, it is frames that should be copied or omitted, where by "frame" I mean a portion of the stream containing one sample for each channel. (In the SDL source, confusingly, sometimes frames are called "samples".) So for these rate conversions, stereo streams have to be treated differently, and they are, in my modified version. The other problem that might be characterized as a bug arises when SDL_mixer is passed a multichannel chunk which does not have an integral number of frames. Due to the way the effect_position code loops over frames, when the chunk ends with a partial frame, memory outside the chunk buffer will be accessed. In the case of stereo, it���s possible that because malloc may give more memory than requested, this potential problem never actually causes a segment fault. I don���t know. For 6 channel chunks, I do know, and it does cause segment faults. If SDL_mixer is passed defective chunks and this causes a segment fault, arguably, that���s not a bug in SDL_mixer. Still, whether or not it counts as a bug, it���s easy to protect against, so why not? I added code in mixer.c to discard any partial frame at the end of a chunk. Then what about when SDL or SDL_mixer is opened for 4 or 6 chan- nel output? What happens with the parts of the current library designed for stereo? I don���t know whether I���ve covered all the bases, but I���ve tried: (1) For playing 2 channel waves, or other cases where SDL knows it has to match up a 2 channel source with a 4 or 6 channel output, I���ve added code in SDL_audiocvt.c to make the necessary conversions. (2) For playing midis using timidity, I���ve converted timidity to do 4 or 6 channel output, upon request. (3) For playing mods using mikmod, I put ad hoc code in music.c to convert the stereo output that mikmod produces to 4 or 6 chan- nels. Obviously it would be better to change the mikmod code to mix down into 4 or 6 channels, but I have a hard time following the code in mikmod, so I didn���t do that. (4) For playing mp3s, I put ad hoc code in smpeg to copy channels in the case when 4 or 6 channel output is needed. (5) There seems to be no problem with .ogg files - stereo .oggs can be up converted as .wavs are. (6) The effect_position code in SDL_mixer is now generalized to in- clude the cases of 4 and 6 channel streams. I���ve done a very limited amount of compatibility testing for some of the games using SDL I happen to have. For details, see the file TESTS. I���ve put into a separate archive, Surround-SDL-testfiles.tgz, a couple of 6 channel wave files for testing and a 6 channel ogg file. If you have the right hardware and version of Alsa, you should be able to play the wave files with the Alsa utility aplay (and hear all channels, except maybe lfe, for chan-id.wav, since it���s rather faint). Don���t expect aplay to give good sound, though. There���s something wrong with the current version of aplay. The canyon.ogg file is to test loading of 6 channel oggs. After patching and compiling, you can play it with playmus. (My version of ogg123 will not play it, and I had to patch mplayer to get it to play 6 channel oggs.) Greg Lee <greg@ling.lll.hawaii.edu> Thus, July 1, 2004
author Sam Lantinga <slouken@libsdl.org>
date Sat, 21 Aug 2004 12:27:02 +0000
parents 72ef7ce609ef
children b4117292e587
line wrap: on
line source

#if defined(USE_ASM_MIXER_VC)
// MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
// Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr)
// Converted to Intel ASM notation by Cth
// This code is licensed under the LGPL (see COPYING for details)
// 
// Assumes buffer size in bytes is a multiple of 16
// Assumes SDL_MIX_MAXVOLUME = 128


////////////////////////////////////////////////
// Mixing for 16 bit signed buffers
////////////////////////////////////////////////

#include <windows.h>
#include <stdio.h>

void SDL_MixAudio_MMX_S16_VC(char* dst,char* src,unsigned int nSize,int volume)
{
	__asm
	{
		align	16

		push	edi
		push	esi
		push	ebx
		
		mov		edi, dst		// edi = dst
		mov		esi, src		// esi = src
		mov		eax, volume		// eax = volume
		mov		ebx, nSize		// ebx = size
		shr		ebx, 4			// process 16 bytes per iteration = 8 samples
		jz		endS16
		
		pxor	mm0, mm0
		movd	mm0, eax		//%%eax,%%mm0
		movq	mm1, mm0		//%%mm0,%%mm1
		psllq	mm0, 16			//$16,%%mm0
		por		mm0, mm1		//%%mm1,%%mm0
		psllq	mm0, 16			//$16,%%mm0
		por		mm0, mm1		//%%mm1,%%mm0
		psllq	mm0, 16			//$16,%%mm0
		por		mm0, mm1		//%%mm1,%%mm0			// mm0 = vol|vol|vol|vol

mixloopS16:
		movq	mm1, [esi]		//(%%esi),%%mm1\n" // mm1 = a|b|c|d
		movq	mm2, mm1		//%%mm1,%%mm2\n" // mm2 = a|b|c|d
		movq	mm4, [esi + 8]	//8(%%esi),%%mm4\n" // mm4 = e|f|g|h
		// pre charger le buffer dst dans mm7
		movq	mm7, [edi]		//(%%edi),%%mm7\n" // mm7 = dst[0]"
		// multiplier par le volume
		pmullw	mm1, mm0		//%%mm0,%%mm1\n" // mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v)
		pmulhw	mm2, mm0		//%%mm0,%%mm2\n" // mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v)
		movq	mm5, mm4		//%%mm4,%%mm5\n" // mm5 = e|f|g|h
		pmullw	mm4, mm0		//%%mm0,%%mm4\n" // mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v)
		pmulhw	mm5, mm0		//%%mm0,%%mm5\n" // mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v)
		movq	mm3, mm1		//%%mm1,%%mm3\n" // mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v)
		punpckhwd	mm1, mm2	//%%mm2,%%mm1\n" // mm1 = a*v|b*v
		movq		mm6, mm4	//%%mm4,%%mm6\n" // mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v)
		punpcklwd	mm3, mm2	//%%mm2,%%mm3\n" // mm3 = c*v|d*v
		punpckhwd	mm4, mm5	//%%mm5,%%mm4\n" // mm4 = e*f|f*v
		punpcklwd	mm6, mm5	//%%mm5,%%mm6\n" // mm6 = g*v|h*v
		// pre charger le buffer dst dans mm5
		movq	mm5, [edi + 8]	//8(%%edi),%%mm5\n" // mm5 = dst[1]
		// diviser par 128
		psrad	mm1, 7			//$7,%%mm1\n" // mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME
		add		esi, 16			//$16,%%esi\n"
		psrad	mm3, 7			//$7,%%mm3\n" // mm3 = c*v/128|d*v/128
		psrad	mm4, 7			//$7,%%mm4\n" // mm4 = e*v/128|f*v/128
		// mm1 = le sample avec le volume modifie
		packssdw	mm3, mm1	//%%mm1,%%mm3\n" // mm3 = s(a*v|b*v|c*v|d*v)
		psrad	mm6, 7			//$7,%%mm6\n" // mm6= g*v/128|h*v/128
		paddsw	mm3, mm7		//%%mm7,%%mm3\n" // mm3 = adjust_volume(src)+dst
		// mm4 = le sample avec le volume modifie
		packssdw	mm6, mm4	//%%mm4,%%mm6\n" // mm6 = s(e*v|f*v|g*v|h*v)
		movq	[edi], mm3		//%%mm3,(%%edi)\n"
		paddsw	mm6, mm5		//%%mm5,%%mm6\n" // mm6 = adjust_volume(src)+dst
		movq	[edi + 8], mm6	//%%mm6,8(%%edi)\n"
		add		edi, 16			//$16,%%edi\n"
		dec		ebx				//%%ebx\n"
		jnz mixloopS16

ends16:
		emms
		
		pop		ebx
		pop		esi
		pop		edi
	}

}

////////////////////////////////////////////////
// Mixing for 8 bit signed buffers
////////////////////////////////////////////////

void SDL_MixAudio_MMX_S8_VC(char* dst,char* src,unsigned int nSize,int volume)
{
	_asm
	{
		align 16

		push	edi
		push	esi
		push	ebx
		
		mov		edi, dst	//movl	%0,%%edi	// edi = dst
		mov		esi, src	//%1,%%esi	// esi = src
		mov		eax, volume	//%3,%%eax	// eax = volume

		movd	mm0, ebx	//%%ebx,%%mm0
		movq	mm1, mm0	//%%mm0,%%mm1
		psllq	mm0, 16		//$16,%%mm0
		por		mm0, mm1	//%%mm1,%%mm0
		psllq	mm0, 16		//$16,%%mm0
		por		mm0, mm1	//%%mm1,%%mm0
		psllq	mm0, 16		//$16,%%mm0
		por		mm0, mm1	//%%mm1,%%mm0
		
		mov		ebx, nSize	//%2,%%ebx	// ebx = size
		shr		ebx, 3		//$3,%%ebx	// process 8 bytes per iteration = 8 samples
		cmp		ebx, 0		//$0,%%ebx
		je		endS8

mixloopS8:
		pxor	mm2, mm2	//%%mm2,%%mm2		// mm2 = 0
		movq	mm1, [esi]	//(%%esi),%%mm1	// mm1 = a|b|c|d|e|f|g|h
		movq	mm3, mm1	//%%mm1,%%mm3 	// mm3 = a|b|c|d|e|f|g|h
		// on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0
		pcmpgtb		mm2, mm1	//%%mm1,%%mm2	// mm2 = 11111111|00000000|00000000....
		punpckhbw	mm1, mm2	//%%mm2,%%mm1	// mm1 = 0|a|0|b|0|c|0|d
		punpcklbw	mm3, mm2	//%%mm2,%%mm3	// mm3 = 0|e|0|f|0|g|0|h
		movq	mm2, [edi]	//(%%edi),%%mm2	// mm2 = destination
		pmullw	mm1, mm0	//%%mm0,%%mm1	// mm1 = v*a|v*b|v*c|v*d
		add		esi, 8		//$8,%%esi
		pmullw	mm3, mm0	//%%mm0,%%mm3	// mm3 = v*e|v*f|v*g|v*h
		psraw	mm1, 7		//$7,%%mm1		// mm1 = v*a/128|v*b/128|v*c/128|v*d/128 
		psraw	mm3, 7		//$7,%%mm3		// mm3 = v*e/128|v*f/128|v*g/128|v*h/128
		packsswb mm3, mm1	//%%mm1,%%mm3	// mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128
		paddsb	mm3, mm2	//%%mm2,%%mm3	// add to destination buffer
		movq	[edi], mm3	//%%mm3,(%%edi)	// store back to ram
		add		edi, 8		//$8,%%edi
		dec		ebx			//%%ebx
		jnz		mixloopS8
		
endS8:
		emms
		
		pop		ebx
		pop		esi
		pop		edi
	}
}

#endif /* USE_ASM_MIXER_VC */