Mercurial > sdl-ios-xcode
comparison src/audio/SDL_mixer_MMX.c @ 887:b4b64bb88f2f
Date: Mon, 10 May 2004 10:17:46 -0400
From: Mike Frysinger
Subject: Re: [SDL] gcc-3.4.0 / PIC fix
here's a combined patch (yours and the one i mentioned earlier) that i tested
with gcc-3.4.0 and gcc-3.3.3
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Sun, 16 May 2004 17:19:48 +0000 |
parents | a9e38f3b8e4d |
children | e3b3130f3af8 |
comparison
equal
deleted
inserted
replaced
886:05c551e5bc64 | 887:b4b64bb88f2f |
---|---|
13 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) | 13 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) |
14 void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume) | 14 void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume) |
15 { | 15 { |
16 __asm__ __volatile__ ( | 16 __asm__ __volatile__ ( |
17 | 17 |
18 " movl %0,%%edi\n" // edi = dst | |
19 " movl %1,%%esi\n" // esi = src | |
20 " movl %3,%%eax\n" // eax = volume | 18 " movl %3,%%eax\n" // eax = volume |
21 | 19 |
22 " movl %2,%%ebx\n" // ebx = size | 20 " movl %2,%%edx\n" // edx = size |
23 | 21 |
24 " shrl $4,%%ebx\n" // process 16 bytes per iteration = 8 samples | 22 " shrl $4,%%edx\n" // process 16 bytes per iteration = 8 samples |
25 | 23 |
26 " jz .endS16\n" | 24 " jz .endS16\n" |
27 | 25 |
28 " pxor %%mm0,%%mm0\n" | 26 " pxor %%mm0,%%mm0\n" |
29 | 27 |
37 " por %%mm1,%%mm0\n" // mm0 = vol|vol|vol|vol | 35 " por %%mm1,%%mm0\n" // mm0 = vol|vol|vol|vol |
38 | 36 |
39 ".align 16\n" | 37 ".align 16\n" |
40 " .mixloopS16:\n" | 38 " .mixloopS16:\n" |
41 | 39 |
42 " movq (%%esi),%%mm1\n" // mm1 = a|b|c|d | 40 " movq (%1),%%mm1\n" // mm1 = a|b|c|d |
43 | 41 |
44 " movq %%mm1,%%mm2\n" // mm2 = a|b|c|d | 42 " movq %%mm1,%%mm2\n" // mm2 = a|b|c|d |
45 | 43 |
46 " movq 8(%%esi),%%mm4\n" // mm4 = e|f|g|h | 44 " movq 8(%1),%%mm4\n" // mm4 = e|f|g|h |
47 | 45 |
48 // pré charger le buffer dst dans mm7 | 46 // pré charger le buffer dst dans mm7 |
49 " movq (%%edi),%%mm7\n" // mm7 = dst[0]" | 47 " movq (%0),%%mm7\n" // mm7 = dst[0]" |
50 | 48 |
51 // multiplier par le volume | 49 // multiplier par le volume |
52 " pmullw %%mm0,%%mm1\n" // mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) | 50 " pmullw %%mm0,%%mm1\n" // mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) |
53 | 51 |
54 " pmulhw %%mm0,%%mm2\n" // mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) | 52 " pmulhw %%mm0,%%mm2\n" // mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) |
67 " punpckhwd %%mm5,%%mm4\n" // mm4 = e*f|f*v | 65 " punpckhwd %%mm5,%%mm4\n" // mm4 = e*f|f*v |
68 | 66 |
69 " punpcklwd %%mm5,%%mm6\n" // mm6 = g*v|h*v | 67 " punpcklwd %%mm5,%%mm6\n" // mm6 = g*v|h*v |
70 | 68 |
71 // pré charger le buffer dst dans mm5 | 69 // pré charger le buffer dst dans mm5 |
72 " movq 8(%%edi),%%mm5\n" // mm5 = dst[1] | 70 " movq 8(%0),%%mm5\n" // mm5 = dst[1] |
73 | 71 |
74 // diviser par 128 | 72 // diviser par 128 |
75 " psrad $7,%%mm1\n" // mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME | 73 " psrad $7,%%mm1\n" // mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME |
76 " addl $16,%%esi\n" | 74 " add $16,%1\n" |
77 | 75 |
78 " psrad $7,%%mm3\n" // mm3 = c*v/128|d*v/128 | 76 " psrad $7,%%mm3\n" // mm3 = c*v/128|d*v/128 |
79 | 77 |
80 " psrad $7,%%mm4\n" // mm4 = e*v/128|f*v/128 | 78 " psrad $7,%%mm4\n" // mm4 = e*v/128|f*v/128 |
81 | 79 |
85 " psrad $7,%%mm6\n" // mm6= g*v/128|h*v/128 | 83 " psrad $7,%%mm6\n" // mm6= g*v/128|h*v/128 |
86 " paddsw %%mm7,%%mm3\n" // mm3 = adjust_volume(src)+dst | 84 " paddsw %%mm7,%%mm3\n" // mm3 = adjust_volume(src)+dst |
87 | 85 |
88 // mm4 = le sample avec le volume modifié | 86 // mm4 = le sample avec le volume modifié |
89 " packssdw %%mm4,%%mm6\n" // mm6 = s(e*v|f*v|g*v|h*v) | 87 " packssdw %%mm4,%%mm6\n" // mm6 = s(e*v|f*v|g*v|h*v) |
90 " movq %%mm3,(%%edi)\n" | 88 " movq %%mm3,(%0)\n" |
91 | 89 |
92 " paddsw %%mm5,%%mm6\n" // mm6 = adjust_volume(src)+dst | 90 " paddsw %%mm5,%%mm6\n" // mm6 = adjust_volume(src)+dst |
93 | 91 |
94 " movq %%mm6,8(%%edi)\n" | 92 " movq %%mm6,8(%0)\n" |
95 | 93 |
96 " addl $16,%%edi\n" | 94 " add $16,%0\n" |
97 | 95 |
98 " dec %%ebx\n" | 96 " dec %%edx\n" |
99 | 97 |
100 " jnz .mixloopS16\n" | 98 " jnz .mixloopS16\n" |
101 | 99 |
102 " emms\n" | 100 " emms\n" |
103 | 101 |
104 ".endS16:\n" | 102 ".endS16:\n" |
105 : | 103 : |
106 : "m" (dst), "m"(src),"m"(size), | 104 : "r" (dst), "r"(src),"m"(size), |
107 "m"(volume) | 105 "m"(volume) |
108 : "eax","ebx", "esi", "edi","memory" | 106 : "eax","edx","memory" |
109 ); | 107 ); |
110 } | 108 } |
111 | 109 |
112 | 110 |
113 | 111 |
117 | 115 |
118 void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume) | 116 void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume) |
119 { | 117 { |
120 __asm__ __volatile__ ( | 118 __asm__ __volatile__ ( |
121 | 119 |
122 " movl %0,%%edi\n" // edi = dst | |
123 " movl %1,%%esi\n" // esi = src | |
124 " movl %3,%%eax\n" // eax = volume | 120 " movl %3,%%eax\n" // eax = volume |
125 | 121 |
126 " movd %%ebx,%%mm0\n" | 122 " movd %%edx,%%mm0\n" |
127 " movq %%mm0,%%mm1\n" | 123 " movq %%mm0,%%mm1\n" |
128 " psllq $16,%%mm0\n" | 124 " psllq $16,%%mm0\n" |
129 " por %%mm1,%%mm0\n" | 125 " por %%mm1,%%mm0\n" |
130 " psllq $16,%%mm0\n" | 126 " psllq $16,%%mm0\n" |
131 " por %%mm1,%%mm0\n" | 127 " por %%mm1,%%mm0\n" |
132 " psllq $16,%%mm0\n" | 128 " psllq $16,%%mm0\n" |
133 " por %%mm1,%%mm0\n" | 129 " por %%mm1,%%mm0\n" |
134 | 130 |
135 " movl %2,%%ebx\n" // ebx = size | 131 " movl %2,%%edx\n" // edx = size |
136 " shr $3,%%ebx\n" // process 8 bytes per iteration = 8 samples | 132 " shr $3,%%edx\n" // process 8 bytes per iteration = 8 samples |
137 | 133 |
138 " cmp $0,%%ebx\n" | 134 " cmp $0,%%edx\n" |
139 " je .endS8\n" | 135 " je .endS8\n" |
140 | 136 |
141 ".align 16\n" | 137 ".align 16\n" |
142 " .mixloopS8:\n" | 138 " .mixloopS8:\n" |
143 | 139 |
144 " pxor %%mm2,%%mm2\n" // mm2 = 0 | 140 " pxor %%mm2,%%mm2\n" // mm2 = 0 |
145 " movq (%%esi),%%mm1\n" // mm1 = a|b|c|d|e|f|g|h | 141 " movq (%1),%%mm1\n" // mm1 = a|b|c|d|e|f|g|h |
146 | 142 |
147 " movq %%mm1,%%mm3\n" // mm3 = a|b|c|d|e|f|g|h | 143 " movq %%mm1,%%mm3\n" // mm3 = a|b|c|d|e|f|g|h |
148 | 144 |
149 // on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 | 145 // on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 |
150 " pcmpgtb %%mm1,%%mm2\n" // mm2 = 11111111|00000000|00000000.... | 146 " pcmpgtb %%mm1,%%mm2\n" // mm2 = 11111111|00000000|00000000.... |
151 | 147 |
152 " punpckhbw %%mm2,%%mm1\n" // mm1 = 0|a|0|b|0|c|0|d | 148 " punpckhbw %%mm2,%%mm1\n" // mm1 = 0|a|0|b|0|c|0|d |
153 | 149 |
154 " punpcklbw %%mm2,%%mm3\n" // mm3 = 0|e|0|f|0|g|0|h | 150 " punpcklbw %%mm2,%%mm3\n" // mm3 = 0|e|0|f|0|g|0|h |
155 " movq (%%edi),%%mm2\n" // mm2 = destination | 151 " movq (%0),%%mm2\n" // mm2 = destination |
156 | 152 |
157 " pmullw %%mm0,%%mm1\n" // mm1 = v*a|v*b|v*c|v*d | 153 " pmullw %%mm0,%%mm1\n" // mm1 = v*a|v*b|v*c|v*d |
158 " addl $8,%%esi\n" | 154 " add $8,%1\n" |
159 | 155 |
160 " pmullw %%mm0,%%mm3\n" // mm3 = v*e|v*f|v*g|v*h | 156 " pmullw %%mm0,%%mm3\n" // mm3 = v*e|v*f|v*g|v*h |
161 " psraw $7,%%mm1\n" // mm1 = v*a/128|v*b/128|v*c/128|v*d/128 | 157 " psraw $7,%%mm1\n" // mm1 = v*a/128|v*b/128|v*c/128|v*d/128 |
162 | 158 |
163 " psraw $7,%%mm3\n" // mm3 = v*e/128|v*f/128|v*g/128|v*h/128 | 159 " psraw $7,%%mm3\n" // mm3 = v*e/128|v*f/128|v*g/128|v*h/128 |
164 | 160 |
165 " packsswb %%mm1,%%mm3\n" // mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 | 161 " packsswb %%mm1,%%mm3\n" // mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 |
166 | 162 |
167 " paddsb %%mm2,%%mm3\n" // add to destination buffer | 163 " paddsb %%mm2,%%mm3\n" // add to destination buffer |
168 | 164 |
169 " movq %%mm3,(%%edi)\n" // store back to ram | 165 " movq %%mm3,(%0)\n" // store back to ram |
170 " addl $8,%%edi\n" | 166 " add $8,%0\n" |
171 | 167 |
172 " dec %%ebx\n" | 168 " dec %%edx\n" |
173 | 169 |
174 " jnz .mixloopS8\n" | 170 " jnz .mixloopS8\n" |
175 | 171 |
176 ".endS8:\n" | 172 ".endS8:\n" |
177 " emms\n" | 173 " emms\n" |
178 : | 174 : |
179 : "m" (dst), "m"(src),"m"(size), | 175 : "r" (dst), "r"(src),"m"(size), |
180 "m"(volume) | 176 "m"(volume) |
181 : "eax","ebx", "esi", "edi","memory" | 177 : "eax","edx","memory" |
182 ); | 178 ); |
183 } | 179 } |
184 #endif | 180 #endif |
185 | 181 |