Mercurial > sdl-ios-xcode
comparison src/audio/SDL_mixer_MMX.c @ 1662:782fd950bd46 SDL-1.3
Revamp of the video system in progress - adding support for multiple displays, multiple windows, and a full video mode selection API.
WARNING: None of the video drivers have been updated for the new API yet! The API is still under design and very fluid.
The code is now run through a consistent indent format:
indent -i4 -nut -nsc -br -ce
The headers are being converted to automatically generate doxygen documentation.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Sun, 28 May 2006 13:04:16 +0000 |
parents | 97d0966f4bf7 |
children | 4da1ee79c9af |
comparison
equal
deleted
inserted
replaced
1661:281d3f4870e5 | 1662:782fd950bd46 |
---|---|
34 /*********************************************** | 34 /*********************************************** |
35 * Mixing for 16 bit signed buffers | 35 * Mixing for 16 bit signed buffers |
36 ***********************************************/ | 36 ***********************************************/ |
37 | 37 |
38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES) | 38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES) |
39 void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume) | 39 void |
40 SDL_MixAudio_MMX_S16 (char *dst, char *src, unsigned int size, int volume) | |
40 { | 41 { |
41 __asm__ __volatile__ ( | 42 __asm__ __volatile__ (" movl %3,%%eax\n" /* eax = volume */ |
42 | 43 " movl %2,%%edx\n" /* edx = size */ |
43 " movl %3,%%eax\n" /* eax = volume */ | 44 " shrl $4,%%edx\n" /* process 16 bytes per iteration = 8 samples */ |
44 | 45 " jz .endS16\n" " pxor %%mm0,%%mm0\n" " movd %%eax,%%mm0\n" " movq %%mm0,%%mm1\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" /* mm0 = vol|vol|vol|vol */ |
45 " movl %2,%%edx\n" /* edx = size */ | 46 ".align 8\n" " .mixloopS16:\n" " movq (%1),%%mm1\n" /* mm1 = a|b|c|d */ |
46 | 47 " movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */ |
47 " shrl $4,%%edx\n" /* process 16 bytes per iteration = 8 samples */ | 48 " movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */ |
48 | 49 /* pré charger le buffer dst dans mm7 */ |
49 " jz .endS16\n" | 50 " movq (%0),%%mm7\n" /* mm7 = dst[0] */ |
50 | 51 /* multiplier par le volume */ |
51 " pxor %%mm0,%%mm0\n" | 52 " pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ |
52 | 53 " pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */ |
53 " movd %%eax,%%mm0\n" | 54 " movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */ |
54 " movq %%mm0,%%mm1\n" | 55 " pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ |
55 " psllq $16,%%mm0\n" | 56 " pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */ |
56 " por %%mm1,%%mm0\n" | 57 " movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ |
57 " psllq $16,%%mm0\n" | 58 " punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */ |
58 " por %%mm1,%%mm0\n" | 59 " movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ |
59 " psllq $16,%%mm0\n" | 60 " punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */ |
60 " por %%mm1,%%mm0\n" /* mm0 = vol|vol|vol|vol */ | 61 " punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */ |
61 | 62 " punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */ |
62 ".align 8\n" | 63 /* pré charger le buffer dst dans mm5 */ |
63 " .mixloopS16:\n" | 64 " movq 8(%0),%%mm5\n" /* mm5 = dst[1] */ |
64 | 65 /* diviser par 128 */ |
65 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d */ | 66 " psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */ |
66 | 67 " add $16,%1\n" " psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */ |
67 " movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */ | 68 " psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */ |
68 | 69 /* mm1 = le sample avec le volume modifié */ |
69 " movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */ | 70 " packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */ |
70 | 71 " psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */ |
71 /* pré charger le buffer dst dans mm7 */ | 72 " paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */ |
72 " movq (%0),%%mm7\n" /* mm7 = dst[0] */ | 73 /* mm4 = le sample avec le volume modifié */ |
73 | 74 " packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */ |
74 /* multiplier par le volume */ | 75 " movq %%mm3,(%0)\n" " paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */ |
75 " pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ | 76 " movq %%mm6,8(%0)\n" |
76 | 77 " add $16,%0\n" |
77 " pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */ | 78 " dec %%edx\n" |
78 " movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */ | 79 " jnz .mixloopS16\n" |
79 | 80 " emms\n" |
80 " pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ | 81 ".endS16:\n"::"r" (dst), "r" (src), |
81 | 82 "m" (size), "m" (volume):"eax", "edx", "memory"); |
82 " pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */ | |
83 " movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ | |
84 | |
85 " punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */ | |
86 | |
87 " movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ | |
88 " punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */ | |
89 | |
90 " punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */ | |
91 | |
92 " punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */ | |
93 | |
94 /* pré charger le buffer dst dans mm5 */ | |
95 " movq 8(%0),%%mm5\n" /* mm5 = dst[1] */ | |
96 | |
97 /* diviser par 128 */ | |
98 " psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */ | |
99 " add $16,%1\n" | |
100 | |
101 " psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */ | |
102 | |
103 " psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */ | |
104 | |
105 /* mm1 = le sample avec le volume modifié */ | |
106 " packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */ | |
107 | |
108 " psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */ | |
109 " paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */ | |
110 | |
111 /* mm4 = le sample avec le volume modifié */ | |
112 " packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */ | |
113 " movq %%mm3,(%0)\n" | |
114 | |
115 " paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */ | |
116 | |
117 " movq %%mm6,8(%0)\n" | |
118 | |
119 " add $16,%0\n" | |
120 | |
121 " dec %%edx\n" | |
122 | |
123 " jnz .mixloopS16\n" | |
124 | |
125 " emms\n" | |
126 | |
127 ".endS16:\n" | |
128 : | |
129 : "r" (dst), "r"(src),"m"(size), | |
130 "m"(volume) | |
131 : "eax","edx","memory" | |
132 ); | |
133 } | 83 } |
134 | 84 |
135 | 85 |
136 | 86 |
137 /*////////////////////////////////////////////// */ | 87 /*////////////////////////////////////////////// */ |
138 /* Mixing for 8 bit signed buffers */ | 88 /* Mixing for 8 bit signed buffers */ |
139 /*////////////////////////////////////////////// */ | 89 /*////////////////////////////////////////////// */ |
140 | 90 |
141 void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume) | 91 void |
92 SDL_MixAudio_MMX_S8 (char *dst, char *src, unsigned int size, int volume) | |
142 { | 93 { |
143 __asm__ __volatile__ ( | 94 __asm__ __volatile__ (" movl %3,%%eax\n" /* eax = volume */ |
144 | 95 " movd %%eax,%%mm0\n" " movq %%mm0,%%mm1\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " psllq $16,%%mm0\n" " por %%mm1,%%mm0\n" " movl %2,%%edx\n" /* edx = size */ |
145 " movl %3,%%eax\n" /* eax = volume */ | 96 " shr $3,%%edx\n" /* process 8 bytes per iteration = 8 samples */ |
146 | 97 " cmp $0,%%edx\n" " je .endS8\n" ".align 8\n" " .mixloopS8:\n" " pxor %%mm2,%%mm2\n" /* mm2 = 0 */ |
147 " movd %%eax,%%mm0\n" | 98 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d|e|f|g|h */ |
148 " movq %%mm0,%%mm1\n" | 99 " movq %%mm1,%%mm3\n" /* mm3 = a|b|c|d|e|f|g|h */ |
149 " psllq $16,%%mm0\n" | 100 /* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */ |
150 " por %%mm1,%%mm0\n" | 101 " pcmpgtb %%mm1,%%mm2\n" /* mm2 = 11111111|00000000|00000000.... */ |
151 " psllq $16,%%mm0\n" | 102 " punpckhbw %%mm2,%%mm1\n" /* mm1 = 0|a|0|b|0|c|0|d */ |
152 " por %%mm1,%%mm0\n" | 103 " punpcklbw %%mm2,%%mm3\n" /* mm3 = 0|e|0|f|0|g|0|h */ |
153 " psllq $16,%%mm0\n" | 104 " movq (%0),%%mm2\n" /* mm2 = destination */ |
154 " por %%mm1,%%mm0\n" | 105 " pmullw %%mm0,%%mm1\n" /* mm1 = v*a|v*b|v*c|v*d */ |
155 | 106 " add $8,%1\n" " pmullw %%mm0,%%mm3\n" /* mm3 = v*e|v*f|v*g|v*h */ |
156 " movl %2,%%edx\n" /* edx = size */ | 107 " psraw $7,%%mm1\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128 */ |
157 " shr $3,%%edx\n" /* process 8 bytes per iteration = 8 samples */ | 108 " psraw $7,%%mm3\n" /* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */ |
158 | 109 " packsswb %%mm1,%%mm3\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */ |
159 " cmp $0,%%edx\n" | 110 " paddsb %%mm2,%%mm3\n" /* add to destination buffer */ |
160 " je .endS8\n" | 111 " movq %%mm3,(%0)\n" /* store back to ram */ |
161 | 112 " add $8,%0\n" |
162 ".align 8\n" | 113 " dec %%edx\n" |
163 " .mixloopS8:\n" | 114 " jnz .mixloopS8\n" |
164 | 115 ".endS8:\n" |
165 " pxor %%mm2,%%mm2\n" /* mm2 = 0 */ | 116 " emms\n"::"r" (dst), "r" (src), "m" (size), |
166 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d|e|f|g|h */ | 117 "m" (volume):"eax", "edx", "memory"); |
167 | |
168 " movq %%mm1,%%mm3\n" /* mm3 = a|b|c|d|e|f|g|h */ | |
169 | |
170 /* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */ | |
171 " pcmpgtb %%mm1,%%mm2\n" /* mm2 = 11111111|00000000|00000000.... */ | |
172 | |
173 " punpckhbw %%mm2,%%mm1\n" /* mm1 = 0|a|0|b|0|c|0|d */ | |
174 | |
175 " punpcklbw %%mm2,%%mm3\n" /* mm3 = 0|e|0|f|0|g|0|h */ | |
176 " movq (%0),%%mm2\n" /* mm2 = destination */ | |
177 | |
178 " pmullw %%mm0,%%mm1\n" /* mm1 = v*a|v*b|v*c|v*d */ | |
179 " add $8,%1\n" | |
180 | |
181 " pmullw %%mm0,%%mm3\n" /* mm3 = v*e|v*f|v*g|v*h */ | |
182 " psraw $7,%%mm1\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128 */ | |
183 | |
184 " psraw $7,%%mm3\n" /* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */ | |
185 | |
186 " packsswb %%mm1,%%mm3\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */ | |
187 | |
188 " paddsb %%mm2,%%mm3\n" /* add to destination buffer */ | |
189 | |
190 " movq %%mm3,(%0)\n" /* store back to ram */ | |
191 " add $8,%0\n" | |
192 | |
193 " dec %%edx\n" | |
194 | |
195 " jnz .mixloopS8\n" | |
196 | |
197 ".endS8:\n" | |
198 " emms\n" | |
199 : | |
200 : "r" (dst), "r"(src),"m"(size), | |
201 "m"(volume) | |
202 : "eax","edx","memory" | |
203 ); | |
204 } | 118 } |
205 #endif | 119 #endif |
206 | 120 /* vi: set ts=4 sw=4 expandtab: */ |