Mercurial > sdl-ios-xcode
comparison src/video/SDL_RLEaccel.c @ 689:5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
From: Stephane Marchesin
Subject: Re: [SDL] [patch] MMX alpha blit patches with MMX detection
I think everything is correct now. I've done as much testing as I could,
but some real-world testing wouldn't hurt, I think.
The patch is here : http://icps.u-strasbg.fr/~marchesin/sdl_mmxblit.patch
If you do byte-by-byte comparison of the output between C and MMX
functions, you'll notice that the results for 555 and 565 RGB alpha
blits aren't exactly the same. This is because MMX functions for 555 and
565 RGB have an higher accuracy. If you want the exact same behaviour
that's possible by masking the three lower alpha bits in the MMX
functions. Just ask !
I removed one MMX function because after I fixed it to match its C
equivalent, it revealed to be slower than the C version on a PIII
(although a bit faster on an Athlon XP).
I've also added MMX and PIII replacements for SDL_memcpy. Those provide
some speed up in testvidinfo -benchmark (at least for me, under linux &
X11).
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Fri, 22 Aug 2003 05:51:19 +0000 |
parents | 4314a501d7be |
children | 22dbf364c017 |
comparison
equal
deleted
inserted
replaced
688:c0522010bb6d | 689:5bb080d35049 |
---|---|
100 #include "SDL_sysvideo.h" | 100 #include "SDL_sysvideo.h" |
101 #include "SDL_blit.h" | 101 #include "SDL_blit.h" |
102 #include "SDL_memops.h" | 102 #include "SDL_memops.h" |
103 #include "SDL_RLEaccel_c.h" | 103 #include "SDL_RLEaccel_c.h" |
104 | 104 |
105 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) | |
106 #include "mmx.h" | |
107 /* Function to check the CPU flags */ | |
108 #define MMX_CPU 0x800000 | |
109 #define CPU_Flags() Hermes_X86_CPU() | |
110 #define X86_ASSEMBLER | |
111 #define HermesConverterInterface void | |
112 #define HermesClearInterface void | |
113 #define STACKCALL | |
114 #include "HeadX86.h" | |
115 #endif | |
116 | |
105 #ifndef MAX | 117 #ifndef MAX |
106 #define MAX(a, b) ((a) > (b) ? (a) : (b)) | 118 #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
107 #endif | 119 #endif |
108 #ifndef MIN | 120 #ifndef MIN |
109 #define MIN(a, b) ((a) < (b) ? (a) : (b)) | 121 #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
122 * Various colorkey blit methods, for opaque and per-surface alpha | 134 * Various colorkey blit methods, for opaque and per-surface alpha |
123 */ | 135 */ |
124 | 136 |
125 #define OPAQUE_BLIT(to, from, length, bpp, alpha) \ | 137 #define OPAQUE_BLIT(to, from, length, bpp, alpha) \ |
126 PIXEL_COPY(to, from, length, bpp) | 138 PIXEL_COPY(to, from, length, bpp) |
139 | |
140 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) | |
141 | |
142 #define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha) \ | |
143 do { \ | |
144 Uint32 *srcp = (Uint32 *)(from); \ | |
145 Uint32 *dstp = (Uint32 *)(to); \ | |
146 int i = 0x00FF00FF; \ | |
147 movd_m2r(*(&i), mm3); \ | |
148 punpckldq_r2r(mm3, mm3); \ | |
149 i = 0xFF000000; \ | |
150 movd_m2r(*(&i), mm7); \ | |
151 punpckldq_r2r(mm7, mm7); \ | |
152 i = alpha | alpha << 16; \ | |
153 movd_m2r(*(&i), mm4); \ | |
154 punpckldq_r2r(mm4, mm4); \ | |
155 pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */ \ | |
156 pxor_r2r(mm7, mm5); /* make clear alpha mask */ \ | |
157 i = length; \ | |
158 if(i & 1) { \ | |
159 movd_m2r((*srcp), mm1); /* src -> mm1 */ \ | |
160 punpcklbw_r2r(mm1, mm1); \ | |
161 pand_r2r(mm3, mm1); \ | |
162 movd_m2r((*dstp), mm2); /* dst -> mm2 */ \ | |
163 punpcklbw_r2r(mm2, mm2); \ | |
164 pand_r2r(mm3, mm2); \ | |
165 psubw_r2r(mm2, mm1); \ | |
166 pmullw_r2r(mm4, mm1); \ | |
167 psrlw_i2r(8, mm1); \ | |
168 paddw_r2r(mm1, mm2); \ | |
169 pand_r2r(mm3, mm2); \ | |
170 packuswb_r2r(mm2, mm2); \ | |
171 pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ | |
172 movd_r2m(mm2, *dstp); \ | |
173 ++srcp; \ | |
174 ++dstp; \ | |
175 i--; \ | |
176 } \ | |
177 for(; i > 0; --i) { \ | |
178 movq_m2r((*srcp), mm0); \ | |
179 movq_r2r(mm0, mm1); \ | |
180 punpcklbw_r2r(mm0, mm0); \ | |
181 movq_m2r((*dstp), mm2); \ | |
182 punpckhbw_r2r(mm1, mm1); \ | |
183 movq_r2r(mm2, mm6); \ | |
184 pand_r2r(mm3, mm0); \ | |
185 punpcklbw_r2r(mm2, mm2); \ | |
186 pand_r2r(mm3, mm1); \ | |
187 punpckhbw_r2r(mm6, mm6); \ | |
188 pand_r2r(mm3, mm2); \ | |
189 psubw_r2r(mm2, mm0); \ | |
190 pmullw_r2r(mm4, mm0); \ | |
191 pand_r2r(mm3, mm6); \ | |
192 psubw_r2r(mm6, mm1); \ | |
193 pmullw_r2r(mm4, mm1); \ | |
194 psrlw_i2r(8, mm0); \ | |
195 paddw_r2r(mm0, mm2); \ | |
196 psrlw_i2r(8, mm1); \ | |
197 paddw_r2r(mm1, mm6); \ | |
198 pand_r2r(mm3, mm2); \ | |
199 pand_r2r(mm3, mm6); \ | |
200 packuswb_r2r(mm2, mm2); \ | |
201 packuswb_r2r(mm6, mm6); \ | |
202 psrlq_i2r(32, mm2); \ | |
203 psllq_i2r(32, mm6); \ | |
204 por_r2r(mm6, mm2); \ | |
205 pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ | |
206 movq_r2m(mm2, *dstp); \ | |
207 srcp += 2; \ | |
208 dstp += 2; \ | |
209 i--; \ | |
210 } \ | |
211 emms(); \ | |
212 } while(0) | |
213 | |
214 #define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha) \ | |
215 do { \ | |
216 int i, n = 0; \ | |
217 Uint16 *srcp = (Uint16 *)(from); \ | |
218 Uint16 *dstp = (Uint16 *)(to); \ | |
219 Uint32 ALPHA = 0xF800; \ | |
220 movd_m2r(*(&ALPHA), mm1); \ | |
221 punpcklwd_r2r(mm1, mm1); \ | |
222 punpcklwd_r2r(mm1, mm1); \ | |
223 ALPHA = 0x07E0; \ | |
224 movd_m2r(*(&ALPHA), mm4); \ | |
225 punpcklwd_r2r(mm4, mm4); \ | |
226 punpcklwd_r2r(mm4, mm4); \ | |
227 ALPHA = 0x001F; \ | |
228 movd_m2r(*(&ALPHA), mm7); \ | |
229 punpcklwd_r2r(mm7, mm7); \ | |
230 punpcklwd_r2r(mm7, mm7); \ | |
231 alpha &= ~(1+2+4); \ | |
232 i = (Uint32)alpha | (Uint32)alpha << 16; \ | |
233 movd_m2r(*(&i), mm0); \ | |
234 punpckldq_r2r(mm0, mm0); \ | |
235 ALPHA = alpha >> 3; \ | |
236 i = ((int)(length) & 3); \ | |
237 for(; i > 0; --i) { \ | |
238 Uint32 s = *srcp++; \ | |
239 Uint32 d = *dstp; \ | |
240 s = (s | s << 16) & 0x07e0f81f; \ | |
241 d = (d | d << 16) & 0x07e0f81f; \ | |
242 d += (s - d) * ALPHA >> 5; \ | |
243 d &= 0x07e0f81f; \ | |
244 *dstp++ = d | d >> 16; \ | |
245 n++; \ | |
246 } \ | |
247 i = (int)(length) - n; \ | |
248 for(; i > 0; --i) { \ | |
249 movq_m2r((*dstp), mm3); \ | |
250 movq_m2r((*srcp), mm2); \ | |
251 movq_r2r(mm2, mm5); \ | |
252 pand_r2r(mm1 , mm5); \ | |
253 psrlq_i2r(11, mm5); \ | |
254 movq_r2r(mm3, mm6); \ | |
255 pand_r2r(mm1 , mm6); \ | |
256 psrlq_i2r(11, mm6); \ | |
257 psubw_r2r(mm6, mm5); \ | |
258 pmullw_r2r(mm0, mm5); \ | |
259 psrlw_i2r(8, mm5); \ | |
260 paddw_r2r(mm5, mm6); \ | |
261 psllq_i2r(11, mm6); \ | |
262 pand_r2r(mm1, mm6); \ | |
263 movq_r2r(mm4, mm5); \ | |
264 por_r2r(mm7, mm5); \ | |
265 pand_r2r(mm5, mm3); \ | |
266 por_r2r(mm6, mm3); \ | |
267 movq_r2r(mm2, mm5); \ | |
268 pand_r2r(mm4 , mm5); \ | |
269 psrlq_i2r(5, mm5); \ | |
270 movq_r2r(mm3, mm6); \ | |
271 pand_r2r(mm4 , mm6); \ | |
272 psrlq_i2r(5, mm6); \ | |
273 psubw_r2r(mm6, mm5); \ | |
274 pmullw_r2r(mm0, mm5); \ | |
275 psrlw_i2r(8, mm5); \ | |
276 paddw_r2r(mm5, mm6); \ | |
277 psllq_i2r(5, mm6); \ | |
278 pand_r2r(mm4, mm6); \ | |
279 movq_r2r(mm1, mm5); \ | |
280 por_r2r(mm7, mm5); \ | |
281 pand_r2r(mm5, mm3); \ | |
282 por_r2r(mm6, mm3); \ | |
283 movq_r2r(mm2, mm5); \ | |
284 pand_r2r(mm7 , mm5); \ | |
285 movq_r2r(mm3, mm6); \ | |
286 pand_r2r(mm7 , mm6); \ | |
287 psubw_r2r(mm6, mm5); \ | |
288 pmullw_r2r(mm0, mm5); \ | |
289 psrlw_i2r(8, mm5); \ | |
290 paddw_r2r(mm5, mm6); \ | |
291 pand_r2r(mm7, mm6); \ | |
292 movq_r2r(mm1, mm5); \ | |
293 por_r2r(mm4, mm5); \ | |
294 pand_r2r(mm5, mm3); \ | |
295 por_r2r(mm6, mm3); \ | |
296 movq_r2m(mm3, *dstp); \ | |
297 srcp += 4; \ | |
298 dstp += 4; \ | |
299 i -= 3; \ | |
300 } \ | |
301 emms(); \ | |
302 } while(0) | |
303 | |
304 #define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha) \ | |
305 do { \ | |
306 int i, n = 0; \ | |
307 Uint16 *srcp = (Uint16 *)(from); \ | |
308 Uint16 *dstp = (Uint16 *)(to); \ | |
309 Uint32 ALPHA = 0x7C00; \ | |
310 movd_m2r(*(&ALPHA), mm1); \ | |
311 punpcklwd_r2r(mm1, mm1); \ | |
312 punpcklwd_r2r(mm1, mm1); \ | |
313 ALPHA = 0x03E0; \ | |
314 movd_m2r(*(&ALPHA), mm4); \ | |
315 punpcklwd_r2r(mm4, mm4); \ | |
316 punpcklwd_r2r(mm4, mm4); \ | |
317 ALPHA = 0x001F; \ | |
318 movd_m2r(*(&ALPHA), mm7); \ | |
319 punpcklwd_r2r(mm7, mm7); \ | |
320 punpcklwd_r2r(mm7, mm7); \ | |
321 alpha &= ~(1+2+4); \ | |
322 i = (Uint32)alpha | (Uint32)alpha << 16; \ | |
323 movd_m2r(*(&i), mm0); \ | |
324 punpckldq_r2r(mm0, mm0); \ | |
325 i = ((int)(length) & 3); \ | |
326 ALPHA = alpha >> 3; \ | |
327 for(; i > 0; --i) { \ | |
328 Uint32 s = *srcp++; \ | |
329 Uint32 d = *dstp; \ | |
330 s = (s | s << 16) & 0x03e07c1f; \ | |
331 d = (d | d << 16) & 0x03e07c1f; \ | |
332 d += (s - d) * ALPHA >> 5; \ | |
333 d &= 0x03e07c1f; \ | |
334 *dstp++ = d | d >> 16; \ | |
335 n++; \ | |
336 } \ | |
337 i = (int)(length) - n; \ | |
338 for(; i > 0; --i) { \ | |
339 movq_m2r((*dstp), mm3); \ | |
340 movq_m2r((*srcp), mm2); \ | |
341 movq_r2r(mm2, mm5); \ | |
342 pand_r2r(mm1 , mm5); \ | |
343 psrlq_i2r(10, mm5); \ | |
344 movq_r2r(mm3, mm6); \ | |
345 pand_r2r(mm1 , mm6); \ | |
346 psrlq_i2r(10, mm6); \ | |
347 psubw_r2r(mm6, mm5); \ | |
348 pmullw_r2r(mm0, mm5); \ | |
349 psrlw_i2r(8, mm5); \ | |
350 paddw_r2r(mm5, mm6); \ | |
351 psllq_i2r(10, mm6); \ | |
352 pand_r2r(mm1, mm6); \ | |
353 movq_r2r(mm4, mm5); \ | |
354 por_r2r(mm7, mm5); \ | |
355 pand_r2r(mm5, mm3); \ | |
356 por_r2r(mm6, mm3); \ | |
357 movq_r2r(mm2, mm5); \ | |
358 pand_r2r(mm4 , mm5); \ | |
359 psrlq_i2r(5, mm5); \ | |
360 movq_r2r(mm3, mm6); \ | |
361 pand_r2r(mm4 , mm6); \ | |
362 psrlq_i2r(5, mm6); \ | |
363 psubw_r2r(mm6, mm5); \ | |
364 pmullw_r2r(mm0, mm5); \ | |
365 psrlw_i2r(8, mm5); \ | |
366 paddw_r2r(mm5, mm6); \ | |
367 psllq_i2r(5, mm6); \ | |
368 pand_r2r(mm4, mm6); \ | |
369 movq_r2r(mm1, mm5); \ | |
370 por_r2r(mm7, mm5); \ | |
371 pand_r2r(mm5, mm3); \ | |
372 por_r2r(mm6, mm3); \ | |
373 movq_r2r(mm2, mm5); \ | |
374 pand_r2r(mm7 , mm5); \ | |
375 movq_r2r(mm3, mm6); \ | |
376 pand_r2r(mm7 , mm6); \ | |
377 psubw_r2r(mm6, mm5); \ | |
378 pmullw_r2r(mm0, mm5); \ | |
379 psrlw_i2r(8, mm5); \ | |
380 paddw_r2r(mm5, mm6); \ | |
381 pand_r2r(mm7, mm6); \ | |
382 movq_r2r(mm1, mm5); \ | |
383 por_r2r(mm4, mm5); \ | |
384 pand_r2r(mm5, mm3); \ | |
385 por_r2r(mm6, mm3); \ | |
386 movq_r2m(mm3, *dstp); \ | |
387 srcp += 4; \ | |
388 dstp += 4; \ | |
389 i -= 3; \ | |
390 } \ | |
391 emms(); \ | |
392 } while(0) | |
393 | |
394 #endif | |
127 | 395 |
128 /* | 396 /* |
129 * For 32bpp pixels on the form 0x00rrggbb: | 397 * For 32bpp pixels on the form 0x00rrggbb: |
130 * If we treat the middle component separately, we can process the two | 398 * If we treat the middle component separately, we can process the two |
131 * remaining in parallel. This is safe to do because of the gap to the left | 399 * remaining in parallel. This is safe to do because of the gap to the left |
159 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \ | 427 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \ |
160 do { \ | 428 do { \ |
161 int i; \ | 429 int i; \ |
162 Uint16 *src = (Uint16 *)(from); \ | 430 Uint16 *src = (Uint16 *)(from); \ |
163 Uint16 *dst = (Uint16 *)(to); \ | 431 Uint16 *dst = (Uint16 *)(to); \ |
432 Uint32 ALPHA = alpha >> 3; \ | |
164 for(i = 0; i < (int)(length); i++) { \ | 433 for(i = 0; i < (int)(length); i++) { \ |
165 Uint32 s = *src++; \ | 434 Uint32 s = *src++; \ |
166 Uint32 d = *dst; \ | 435 Uint32 d = *dst; \ |
167 s = (s | s << 16) & 0x07e0f81f; \ | 436 s = (s | s << 16) & 0x07e0f81f; \ |
168 d = (d | d << 16) & 0x07e0f81f; \ | 437 d = (d | d << 16) & 0x07e0f81f; \ |
169 d += (s - d) * alpha >> 5; \ | 438 d += (s - d) * ALPHA >> 5; \ |
170 d &= 0x07e0f81f; \ | 439 d &= 0x07e0f81f; \ |
171 *dst++ = d | d >> 16; \ | 440 *dst++ = d | d >> 16; \ |
172 } \ | 441 } \ |
173 } while(0) | 442 } while(0) |
174 | 443 |
175 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \ | 444 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \ |
176 do { \ | 445 do { \ |
177 int i; \ | 446 int i; \ |
178 Uint16 *src = (Uint16 *)(from); \ | 447 Uint16 *src = (Uint16 *)(from); \ |
179 Uint16 *dst = (Uint16 *)(to); \ | 448 Uint16 *dst = (Uint16 *)(to); \ |
449 Uint32 ALPHA = alpha >> 3; \ | |
180 for(i = 0; i < (int)(length); i++) { \ | 450 for(i = 0; i < (int)(length); i++) { \ |
181 Uint32 s = *src++; \ | 451 Uint32 s = *src++; \ |
182 Uint32 d = *dst; \ | 452 Uint32 d = *dst; \ |
183 s = (s | s << 16) & 0x03e07c1f; \ | 453 s = (s | s << 16) & 0x03e07c1f; \ |
184 d = (d | d << 16) & 0x03e07c1f; \ | 454 d = (d | d << 16) & 0x03e07c1f; \ |
185 d += (s - d) * alpha >> 5; \ | 455 d += (s - d) * ALPHA >> 5; \ |
186 d &= 0x03e07c1f; \ | 456 d &= 0x03e07c1f; \ |
187 *dst++ = d | d >> 16; \ | 457 *dst++ = d | d >> 16; \ |
188 } \ | 458 } \ |
189 } while(0) | 459 } while(0) |
190 | 460 |
246 src += bpp; \ | 516 src += bpp; \ |
247 dst += bpp; \ | 517 dst += bpp; \ |
248 } \ | 518 } \ |
249 } while(0) | 519 } while(0) |
250 | 520 |
251 | 521 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) |
522 | |
523 #define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha) \ | |
524 do { \ | |
525 Uint32 *srcp = (Uint32 *)(from); \ | |
526 Uint32 *dstp = (Uint32 *)(to); \ | |
527 int i = 0x00fefefe; \ | |
528 movd_m2r(*(&i), mm4); \ | |
529 punpckldq_r2r(mm4, mm4); \ | |
530 i = 0x00010101; \ | |
531 movd_m2r(*(&i), mm3); \ | |
532 punpckldq_r2r(mm3, mm3); \ | |
533 i = (int)(length); \ | |
534 if( i & 1 ) { \ | |
535 Uint32 s = *srcp++; \ | |
536 Uint32 d = *dstp; \ | |
537 *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ | |
538 + (s & d & 0x00010101); \ | |
539 i--; \ | |
540 } \ | |
541 for(; i > 0; --i) { \ | |
542 movq_m2r((*dstp), mm2); /* dst -> mm2 */ \ | |
543 movq_r2r(mm2, mm6); /* dst -> mm6 */ \ | |
544 movq_m2r((*srcp), mm1); /* src -> mm1 */ \ | |
545 movq_r2r(mm1, mm5); /* src -> mm5 */ \ | |
546 pand_r2r(mm4, mm6); /* dst & 0x00fefefe -> mm6 */ \ | |
547 pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */ \ | |
548 paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */ \ | |
549 psrld_i2r(1, mm5); \ | |
550 pand_r2r(mm1, mm2); /* s & d -> mm2 */ \ | |
551 pand_r2r(mm3, mm2); /* s & d & 0x00010101 -> mm2 */ \ | |
552 paddd_r2r(mm5, mm2); \ | |
553 movq_r2m(mm2, (*dstp)); \ | |
554 dstp += 2; \ | |
555 srcp += 2; \ | |
556 i--; \ | |
557 } \ | |
558 emms(); \ | |
559 } while(0) | |
560 | |
561 #endif | |
562 | |
252 /* | 563 /* |
253 * Special case: 50% alpha (alpha=128) | 564 * Special case: 50% alpha (alpha=128) |
254 * This is treated specially because it can be optimized very well, and | 565 * This is treated specially because it can be optimized very well, and |
255 * since it is good for many cases of semi-translucency. | 566 * since it is good for many cases of semi-translucency. |
256 * The theory is to do all three components at the same time: | 567 * The theory is to do all three components at the same time: |
318 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de) | 629 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de) |
319 | 630 |
320 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \ | 631 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \ |
321 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde) | 632 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde) |
322 | 633 |
634 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT) | |
323 | 635 |
324 #define CHOOSE_BLIT(blitter, alpha, fmt) \ | 636 #define CHOOSE_BLIT(blitter, alpha, fmt) \ |
325 do { \ | 637 do { \ |
326 if(alpha == 255) { \ | 638 if(alpha == 255) { \ |
327 switch(fmt->BytesPerPixel) { \ | 639 switch(fmt->BytesPerPixel) { \ |
343 || fmt->Rmask == 0x07e0 \ | 655 || fmt->Rmask == 0x07e0 \ |
344 || fmt->Bmask == 0x07e0) { \ | 656 || fmt->Bmask == 0x07e0) { \ |
345 if(alpha == 128) \ | 657 if(alpha == 128) \ |
346 blitter(2, Uint8, ALPHA_BLIT16_565_50); \ | 658 blitter(2, Uint8, ALPHA_BLIT16_565_50); \ |
347 else { \ | 659 else { \ |
348 alpha >>= 3; /* use 5 bit alpha */ \ | 660 if((CPU_Flags()&MMX_CPU)!=0) \ |
661 blitter(2, Uint8, ALPHA_BLIT16_565MMX); \ | |
662 else \ | |
663 blitter(2, Uint8, ALPHA_BLIT16_565); \ | |
664 } \ | |
665 } else \ | |
666 goto general16; \ | |
667 break; \ | |
668 \ | |
669 case 0x7fff: \ | |
670 if(fmt->Gmask == 0x03e0 \ | |
671 || fmt->Rmask == 0x03e0 \ | |
672 || fmt->Bmask == 0x03e0) { \ | |
673 if(alpha == 128) \ | |
674 blitter(2, Uint8, ALPHA_BLIT16_555_50); \ | |
675 else { \ | |
676 if((CPU_Flags()&MMX_CPU)!=0) \ | |
677 blitter(2, Uint8, ALPHA_BLIT16_555MMX); \ | |
678 else \ | |
679 blitter(2, Uint8, ALPHA_BLIT16_555); \ | |
680 } \ | |
681 break; \ | |
682 } \ | |
683 /* fallthrough */ \ | |
684 \ | |
685 default: \ | |
686 general16: \ | |
687 blitter(2, Uint8, ALPHA_BLIT_ANY); \ | |
688 } \ | |
689 break; \ | |
690 \ | |
691 case 3: \ | |
692 blitter(3, Uint8, ALPHA_BLIT_ANY); \ | |
693 break; \ | |
694 \ | |
695 case 4: \ | |
696 if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ | |
697 && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ | |
698 || fmt->Bmask == 0xff00)) { \ | |
699 if(alpha == 128) \ | |
700 { \ | |
701 if((CPU_Flags()&MMX_CPU)!=0) \ | |
702 blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\ | |
703 else \ | |
704 blitter(4, Uint16, ALPHA_BLIT32_888_50);\ | |
705 } \ | |
706 else \ | |
707 { \ | |
708 if((CPU_Flags()&MMX_CPU)!=0) \ | |
709 blitter(4, Uint16, ALPHA_BLIT32_888MMX);\ | |
710 else \ | |
711 blitter(4, Uint16, ALPHA_BLIT32_888); \ | |
712 } \ | |
713 } else \ | |
714 blitter(4, Uint16, ALPHA_BLIT_ANY); \ | |
715 break; \ | |
716 } \ | |
717 } \ | |
718 } while(0) | |
719 | |
720 #else | |
721 | |
722 #define CHOOSE_BLIT(blitter, alpha, fmt) \ | |
723 do { \ | |
724 if(alpha == 255) { \ | |
725 switch(fmt->BytesPerPixel) { \ | |
726 case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ | |
727 case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ | |
728 case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ | |
729 case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ | |
730 } \ | |
731 } else { \ | |
732 switch(fmt->BytesPerPixel) { \ | |
733 case 1: \ | |
734 /* No 8bpp alpha blitting */ \ | |
735 break; \ | |
736 \ | |
737 case 2: \ | |
738 switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ | |
739 case 0xffff: \ | |
740 if(fmt->Gmask == 0x07e0 \ | |
741 || fmt->Rmask == 0x07e0 \ | |
742 || fmt->Bmask == 0x07e0) { \ | |
743 if(alpha == 128) \ | |
744 blitter(2, Uint8, ALPHA_BLIT16_565_50); \ | |
745 else { \ | |
349 blitter(2, Uint8, ALPHA_BLIT16_565); \ | 746 blitter(2, Uint8, ALPHA_BLIT16_565); \ |
350 } \ | 747 } \ |
351 } else \ | 748 } else \ |
352 goto general16; \ | 749 goto general16; \ |
353 break; \ | 750 break; \ |
357 || fmt->Rmask == 0x03e0 \ | 754 || fmt->Rmask == 0x03e0 \ |
358 || fmt->Bmask == 0x03e0) { \ | 755 || fmt->Bmask == 0x03e0) { \ |
359 if(alpha == 128) \ | 756 if(alpha == 128) \ |
360 blitter(2, Uint8, ALPHA_BLIT16_555_50); \ | 757 blitter(2, Uint8, ALPHA_BLIT16_555_50); \ |
361 else { \ | 758 else { \ |
362 alpha >>= 3; /* use 5 bit alpha */ \ | |
363 blitter(2, Uint8, ALPHA_BLIT16_555); \ | 759 blitter(2, Uint8, ALPHA_BLIT16_555); \ |
364 } \ | 760 } \ |
365 break; \ | 761 break; \ |
366 } \ | 762 } \ |
367 /* fallthrough */ \ | 763 /* fallthrough */ \ |
389 break; \ | 785 break; \ |
390 } \ | 786 } \ |
391 } \ | 787 } \ |
392 } while(0) | 788 } while(0) |
393 | 789 |
790 #endif | |
394 | 791 |
395 /* | 792 /* |
396 * This takes care of the case when the surface is clipped on the left and/or | 793 * This takes care of the case when the surface is clipped on the left and/or |
397 * right. Top clipping has already been taken care of. | 794 * right. Top clipping has already been taken care of. |
398 */ | 795 */ |