Mercurial > sdl-ios-xcode
comparison src/audio/SDL_mixer_MMX.c @ 1668:4da1ee79c9af SDL-1.3
more tweaking indent options
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 29 May 2006 04:04:35 +0000 |
parents | 782fd950bd46 |
children |
comparison
equal
deleted
inserted
replaced
1667:1fddae038bc8 | 1668:4da1ee79c9af |
---|---|
35 * Mixing for 16 bit signed buffers | 35 * Mixing for 16 bit signed buffers |
36 ***********************************************/ | 36 ***********************************************/ |
37 | 37 |
38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES) | 38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES) |
39 void | 39 void |
40 SDL_MixAudio_MMX_S16 (char *dst, char *src, unsigned int size, int volume) | 40 SDL_MixAudio_MMX_S16(char *dst, char *src, unsigned int size, int volume) |
41 { | 41 { |
42 __asm__ __volatile__ (" movl %3,%%eax\n" /* eax = volume */ | 42 __asm__ __volatile__(" movl %3,%%eax\n" /* eax = volume */ |
43 " movl %2,%%edx\n" /* edx = size */ | 43 " movl %2,%%edx\n" /* edx = size */ |
44 " shrl $4,%%edx\n" /* process 16 bytes per iteration = 8 samples */ | 44 " shrl $4,%%edx\n" /* process 16 bytes per iteration = 8 samples */ |
45 " jz .endS16\n" " pxor %%mm0,%%mm0\n" " movd %%eax,%%mm0\n" " movq %%mm0,%%mm1\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" /* mm0 = vol|vol|vol|vol */ | 45 " jz .endS16\n" " pxor %%mm0,%%mm0\n" " movd %%eax,%%mm0\n" " movq %%mm0,%%mm1\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" /* mm0 = vol|vol|vol|vol */ |
46 ".align 8\n" " .mixloopS16:\n" " movq (%1),%%mm1\n" /* mm1 = a|b|c|d */ | 46 ".align 8\n" " .mixloopS16:\n" " movq (%1),%%mm1\n" /* mm1 = a|b|c|d */ |
47 " movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */ | 47 " movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */ |
48 " movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */ | 48 " movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */ |
49 /* pré charger le buffer dst dans mm7 */ | 49 /* pré charger le buffer dst dans mm7 */ |
50 " movq (%0),%%mm7\n" /* mm7 = dst[0] */ | 50 " movq (%0),%%mm7\n" /* mm7 = dst[0] */ |
51 /* multiplier par le volume */ | 51 /* multiplier par le volume */ |
52 " pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ | 52 " pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ |
53 " pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */ | 53 " pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */ |
54 " movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */ | 54 " movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */ |
55 " pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ | 55 " pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ |
56 " pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */ | 56 " pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */ |
57 " movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ | 57 " movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ |
58 " punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */ | 58 " punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */ |
59 " movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ | 59 " movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ |
60 " punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */ | 60 " punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */ |
61 " punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */ | 61 " punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */ |
62 " punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */ | 62 " punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */ |
63 /* pré charger le buffer dst dans mm5 */ | 63 /* pré charger le buffer dst dans mm5 */ |
64 " movq 8(%0),%%mm5\n" /* mm5 = dst[1] */ | 64 " movq 8(%0),%%mm5\n" /* mm5 = dst[1] */ |
65 /* diviser par 128 */ | 65 /* diviser par 128 */ |
66 " psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */ | 66 " psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */ |
67 " add $16,%1\n" " psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */ | 67 " add $16,%1\n" " psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */ |
68 " psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */ | 68 " psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */ |
69 /* mm1 = le sample avec le volume modifié */ | 69 /* mm1 = le sample avec le volume modifié */ |
70 " packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */ | 70 " packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */ |
71 " psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */ | 71 " psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */ |
72 " paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */ | 72 " paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */ |
73 /* mm4 = le sample avec le volume modifié */ | 73 /* mm4 = le sample avec le volume modifié */ |
74 " packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */ | 74 " packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */ |
75 " movq %%mm3,(%0)\n" " paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */ | 75 " movq %%mm3,(%0)\n" " paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */ |
76 " movq %%mm6,8(%0)\n" | 76 " movq %%mm6,8(%0)\n" |
77 " add $16,%0\n" | 77 " add $16,%0\n" |
78 " dec %%edx\n" | 78 " dec %%edx\n" |
79 " jnz .mixloopS16\n" | 79 " jnz .mixloopS16\n" |
80 " emms\n" | 80 " emms\n" |
81 ".endS16:\n"::"r" (dst), "r" (src), | 81 ".endS16:\n"::"r"(dst), "r"(src), |
82 "m" (size), "m" (volume):"eax", "edx", "memory"); | 82 "m"(size), "m"(volume):"eax", "edx", "memory"); |
83 } | 83 } |
84 | 84 |
85 | 85 |
86 | 86 |
87 /*////////////////////////////////////////////// */ | 87 /*////////////////////////////////////////////// */ |
88 /* Mixing for 8 bit signed buffers */ | 88 /* Mixing for 8 bit signed buffers */ |
89 /*////////////////////////////////////////////// */ | 89 /*////////////////////////////////////////////// */ |
90 | 90 |
91 void | 91 void |
92 SDL_MixAudio_MMX_S8 (char *dst, char *src, unsigned int size, int volume) | 92 SDL_MixAudio_MMX_S8(char *dst, char *src, unsigned int size, int volume) |
93 { | 93 { |
94 __asm__ __volatile__ (" movl %3,%%eax\n" /* eax = volume */ | 94 __asm__ __volatile__(" movl %3,%%eax\n" /* eax = volume */ |
95 " movd %%eax,%%mm0\n" " movq %%mm0,%%mm1\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " movl %2,%%edx\n" /* edx = size */ | 95 " movd %%eax,%%mm0\n" " movq %%mm0,%%mm1\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " movl %2,%%edx\n" /* edx = size */ |
96 " shr $3,%%edx\n" /* process 8 bytes per iteration = 8 samples */ | 96 " shr $3,%%edx\n" /* process 8 bytes per iteration = 8 samples */ |
97 " cmp $0,%%edx\n" " je .endS8\n" ".align 8\n" " .mixloopS8:\n" " pxor %%mm2,%%mm2\n" /* mm2 = 0 */ | 97 " cmp $0,%%edx\n" " je .endS8\n" ".align 8\n" " .mixloopS8:\n" " pxor %%mm2,%%mm2\n" /* mm2 = 0 */ |
98 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d|e|f|g|h */ | 98 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d|e|f|g|h */ |
99 " movq %%mm1,%%mm3\n" /* mm3 = a|b|c|d|e|f|g|h */ | 99 " movq %%mm1,%%mm3\n" /* mm3 = a|b|c|d|e|f|g|h */ |
100 /* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */ | 100 /* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */ |
101 " pcmpgtb %%mm1,%%mm2\n" /* mm2 = 11111111|00000000|00000000.... */ | 101 " pcmpgtb %%mm1,%%mm2\n" /* mm2 = 11111111|00000000|00000000.... */ |
102 " punpckhbw %%mm2,%%mm1\n" /* mm1 = 0|a|0|b|0|c|0|d */ | 102 " punpckhbw %%mm2,%%mm1\n" /* mm1 = 0|a|0|b|0|c|0|d */ |
103 " punpcklbw %%mm2,%%mm3\n" /* mm3 = 0|e|0|f|0|g|0|h */ | 103 " punpcklbw %%mm2,%%mm3\n" /* mm3 = 0|e|0|f|0|g|0|h */ |
104 " movq (%0),%%mm2\n" /* mm2 = destination */ | 104 " movq (%0),%%mm2\n" /* mm2 = destination */ |
105 " pmullw %%mm0,%%mm1\n" /* mm1 = v*a|v*b|v*c|v*d */ | 105 " pmullw %%mm0,%%mm1\n" /* mm1 = v*a|v*b|v*c|v*d */ |
106 " add $8,%1\n" " pmullw %%mm0,%%mm3\n" /* mm3 = v*e|v*f|v*g|v*h */ | 106 " add $8,%1\n" " pmullw %%mm0,%%mm3\n" /* mm3 = v*e|v*f|v*g|v*h */ |
107 " psraw $7,%%mm1\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128 */ | 107 " psraw $7,%%mm1\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128 */ |
108 " psraw $7,%%mm3\n" /* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */ | 108 " psraw $7,%%mm3\n" /* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */ |
109 " packsswb %%mm1,%%mm3\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */ | 109 " packsswb %%mm1,%%mm3\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */ |
110 " paddsb %%mm2,%%mm3\n" /* add to destination buffer */ | 110 " paddsb %%mm2,%%mm3\n" /* add to destination buffer */ |
111 " movq %%mm3,(%0)\n" /* store back to ram */ | 111 " movq %%mm3,(%0)\n" /* store back to ram */ |
112 " add $8,%0\n" | 112 " add $8,%0\n" |
113 " dec %%edx\n" | 113 " dec %%edx\n" |
114 " jnz .mixloopS8\n" | 114 " jnz .mixloopS8\n" |
115 ".endS8:\n" | 115 ".endS8:\n" |
116 " emms\n"::"r" (dst), "r" (src), "m" (size), | 116 " emms\n"::"r"(dst), "r"(src), "m"(size), |
117 "m" (volume):"eax", "edx", "memory"); | 117 "m"(volume):"eax", "edx", "memory"); |
118 } | 118 } |
119 #endif | 119 #endif |
120 /* vi: set ts=4 sw=4 expandtab: */ | 120 /* vi: set ts=4 sw=4 expandtab: */ |