Mercurial > sdl-ios-xcode
comparison src/video/SDL_blit_A.c @ 2255:17b2369756be
Use MMX intrinsics over GCC inline assembly
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Thu, 16 Aug 2007 22:18:53 +0000 |
parents | 6630fefab312 |
children | 340942cfda48 |
comparison
equal
deleted
inserted
replaced
2254:79e00f5561f4 | 2255:17b2369756be |
---|---|
21 */ | 21 */ |
22 #include "SDL_config.h" | 22 #include "SDL_config.h" |
23 | 23 |
24 #include "SDL_video.h" | 24 #include "SDL_video.h" |
25 #include "SDL_blit.h" | 25 #include "SDL_blit.h" |
26 | |
27 /* | |
28 In Visual C, VC6 has mmintrin.h in the "Processor Pack" add-on. | |
29 Checking if _mm_free is #defined in malloc.h is is the only way to | |
30 determine if the Processor Pack is installed, as far as I can tell. | |
31 */ | |
32 | |
33 #if SDL_ASSEMBLY_ROUTINES | |
34 # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) | |
35 # define MMX_ASMBLIT 1 | |
36 # define GCC_ASMBLIT 1 | |
37 # elif defined(_MSC_VER) && defined(_M_IX86) | |
38 # if (_MSC_VER <= 1200) | |
39 # include <malloc.h> | |
40 # if defined(_mm_free) | |
41 # define HAVE_MMINTRIN_H 1 | |
42 # endif | |
43 # else /* Visual Studio > VC6 always has mmintrin.h */ | |
44 # define HAVE_MMINTRIN_H 1 | |
45 # endif | |
46 # if HAVE_MMINTRIN_H | |
47 # define MMX_ASMBLIT 1 | |
48 # define MSVC_ASMBLIT 1 | |
49 # endif | |
50 # endif | |
51 #endif /* SDL_ASSEMBLY_ROUTINES */ | |
52 | |
53 /* Function to check the CPU flags */ | |
54 #include "SDL_cpuinfo.h" | |
55 #if GCC_ASMBLIT | |
56 #include "mmx.h" | |
57 #elif MSVC_ASMBLIT | |
58 #include <mmintrin.h> | |
59 #include <mm3dnow.h> | |
60 #endif | |
61 | 26 |
62 /* Functions to perform alpha blended blitting */ | 27 /* Functions to perform alpha blended blitting */ |
63 | 28 |
64 /* N->1 blending with per-surface alpha */ | 29 /* N->1 blending with per-surface alpha */ |
65 static void | 30 static void |
230 src += srcskip; | 195 src += srcskip; |
231 dst += dstskip; | 196 dst += dstskip; |
232 } | 197 } |
233 } | 198 } |
234 | 199 |
235 #if GCC_ASMBLIT | 200 #ifdef __MMX__ |
236 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 201 |
237 static void | |
238 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info) | |
239 { | |
240 int width = info->d_width; | |
241 int height = info->d_height; | |
242 Uint32 *srcp = (Uint32 *) info->s_pixels; | |
243 int srcskip = info->s_skip >> 2; | |
244 Uint32 *dstp = (Uint32 *) info->d_pixels; | |
245 int dstskip = info->d_skip >> 2; | |
246 Uint32 dalpha = info->dst->Amask; | |
247 Uint8 load[8]; | |
248 | |
249 *(Uint64 *) load = 0x00fefefe00fefefeULL; /* alpha128 mask */ | |
250 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ | |
251 *(Uint64 *) load = 0x0001010100010101ULL; /* !alpha128 mask */ | |
252 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ | |
253 movd_m2r(dalpha, mm7); /* dst alpha mask */ | |
254 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
255 while (height--) { | |
256 /* *INDENT-OFF* */ | |
257 DUFFS_LOOP_DOUBLE2( | |
258 { | |
259 Uint32 s = *srcp++; | |
260 Uint32 d = *dstp; | |
261 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | |
262 + (s & d & 0x00010101)) | dalpha; | |
263 },{ | |
264 movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ | |
265 movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ | |
266 | |
267 movq_m2r((*srcp), mm1);/* 2 x src -> mm1(ARGBARGB) */ | |
268 movq_r2r(mm1, mm5); /* 2 x src -> mm5(ARGBARGB) */ | |
269 | |
270 pand_r2r(mm4, mm6); /* dst & mask -> mm6 */ | |
271 pand_r2r(mm4, mm5); /* src & mask -> mm5 */ | |
272 paddd_r2r(mm6, mm5); /* mm6 + mm5 -> mm5 */ | |
273 pand_r2r(mm1, mm2); /* src & dst -> mm2 */ | |
274 psrld_i2r(1, mm5); /* mm5 >> 1 -> mm5 */ | |
275 pand_r2r(mm3, mm2); /* mm2 & !mask -> mm2 */ | |
276 paddd_r2r(mm5, mm2); /* mm5 + mm2 -> mm2 */ | |
277 | |
278 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | |
279 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ | |
280 dstp += 2; | |
281 srcp += 2; | |
282 }, width); | |
283 /* *INDENT-ON* */ | |
284 srcp += srcskip; | |
285 dstp += dstskip; | |
286 } | |
287 emms(); | |
288 } | |
289 | |
290 /* fast RGB888->(A)RGB888 blending with surface alpha */ | |
291 static void | |
292 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info) | |
293 { | |
294 SDL_PixelFormat *df = info->dst; | |
295 unsigned alpha = info->src->alpha; | |
296 | |
297 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | |
298 /* only call a128 version when R,G,B occupy lower bits */ | |
299 BlitRGBtoRGBSurfaceAlpha128MMX(info); | |
300 } else { | |
301 int width = info->d_width; | |
302 int height = info->d_height; | |
303 Uint32 *srcp = (Uint32 *) info->s_pixels; | |
304 int srcskip = info->s_skip >> 2; | |
305 Uint32 *dstp = (Uint32 *) info->d_pixels; | |
306 int dstskip = info->d_skip >> 2; | |
307 | |
308 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ | |
309 /* form the alpha mult */ | |
310 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ | |
311 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | |
312 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | |
313 alpha = | |
314 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> | |
315 Bshift); | |
316 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ | |
317 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ | |
318 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ | |
319 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ | |
320 movd_m2r(df->Amask, mm7); /* dst alpha mask */ | |
321 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
322 | |
323 while (height--) { | |
324 /* *INDENT-OFF* */ | |
325 DUFFS_LOOP_DOUBLE2({ | |
326 /* One Pixel Blend */ | |
327 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
328 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
329 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ | |
330 punpcklbw_r2r(mm5, mm2); /* 0A0R0G0B -> mm2(dst) */ | |
331 | |
332 psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ | |
333 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
334 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | |
335 paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | |
336 | |
337 packuswb_r2r(mm5, mm2); /* ARGBARGB -> mm2 */ | |
338 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | |
339 movd_r2m(mm2, *dstp);/* mm2 -> pixel */ | |
340 ++srcp; | |
341 ++dstp; | |
342 },{ | |
343 /* Two Pixels Blend */ | |
344 movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ | |
345 movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ | |
346 movq_r2r(mm0, mm1); /* 2 x src -> mm1(ARGBARGB) */ | |
347 movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ | |
348 | |
349 punpcklbw_r2r(mm5, mm0); /* low - 0A0R0G0B -> mm0(src1) */ | |
350 punpckhbw_r2r(mm5, mm1); /* high - 0A0R0G0B -> mm1(src2) */ | |
351 punpcklbw_r2r(mm5, mm2); /* low - 0A0R0G0B -> mm2(dst1) */ | |
352 punpckhbw_r2r(mm5, mm6); /* high - 0A0R0G0B -> mm6(dst2) */ | |
353 | |
354 psubw_r2r(mm2, mm0);/* src1 - dst1 -> mm0 */ | |
355 pmullw_r2r(mm4, mm0); /* mm0 * alpha -> mm0 */ | |
356 psrlw_i2r(8, mm0); /* mm0 >> 8 -> mm1 */ | |
357 paddb_r2r(mm0, mm2); /* mm0 + mm2(dst1) -> mm2 */ | |
358 | |
359 psubw_r2r(mm6, mm1);/* src2 - dst2 -> mm1 */ | |
360 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
361 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | |
362 paddb_r2r(mm1, mm6); /* mm1 + mm6(dst2) -> mm6 */ | |
363 | |
364 packuswb_r2r(mm6, mm2); /* ARGBARGB -> mm2 */ | |
365 por_r2r(mm7, mm2); /* mm7(dst alpha) | mm2 -> mm2 */ | |
366 | |
367 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ | |
368 | |
369 srcp += 2; | |
370 dstp += 2; | |
371 }, width); | |
372 /* *INDENT-ON* */ | |
373 srcp += srcskip; | |
374 dstp += dstskip; | |
375 } | |
376 emms(); | |
377 } | |
378 } | |
379 | |
380 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | |
381 static void | |
382 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info) | |
383 { | |
384 int width = info->d_width; | |
385 int height = info->d_height; | |
386 Uint32 *srcp = (Uint32 *) info->s_pixels; | |
387 int srcskip = info->s_skip >> 2; | |
388 Uint32 *dstp = (Uint32 *) info->d_pixels; | |
389 int dstskip = info->d_skip >> 2; | |
390 SDL_PixelFormat *sf = info->src; | |
391 Uint32 amask = sf->Amask; | |
392 | |
393 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ | |
394 /* form multiplication mask */ | |
395 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ | |
396 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ | |
397 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ | |
398 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ | |
399 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ | |
400 /* form channel masks */ | |
401 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ | |
402 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ | |
403 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ | |
404 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ | |
405 /* get alpha channel shift */ | |
406 /* *INDENT-OFF* */ | |
407 __asm__ __volatile__ ( | |
408 "movd %0, %%mm5" | |
409 : : "rm" ((Uint32) sf->Ashift) ); /* Ashift -> mm5 */ | |
410 /* *INDENT-ON* */ | |
411 | |
412 while (height--) { | |
413 /* *INDENT-OFF* */ | |
414 DUFFS_LOOP4({ | |
415 Uint32 alpha = *srcp & amask; | |
416 /* FIXME: Here we special-case opaque alpha since the | |
417 compositioning used (>>8 instead of /255) doesn't handle | |
418 it correctly. Also special-case alpha=0 for speed? | |
419 Benchmark this! */ | |
420 if(alpha == 0) { | |
421 /* do nothing */ | |
422 } else if(alpha == amask) { | |
423 /* opaque alpha -- copy RGB, keep dst alpha */ | |
424 /* using MMX here to free up regular registers for other things */ | |
425 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
426 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
427 pand_r2r(mm0, mm1); /* src & chanmask -> mm1 */ | |
428 pand_r2r(mm3, mm2); /* dst & ~chanmask -> mm2 */ | |
429 por_r2r(mm1, mm2); /* src | dst -> mm2 */ | |
430 movd_r2m(mm2, (*dstp)); /* mm2 -> dst */ | |
431 } else { | |
432 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
433 punpcklbw_r2r(mm6, mm1); /* 0A0R0G0B -> mm1 */ | |
434 | |
435 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
436 punpcklbw_r2r(mm6, mm2); /* 0A0R0G0B -> mm2 */ | |
437 | |
438 __asm__ __volatile__ ( | |
439 "movd %0, %%mm4" | |
440 : : "r" (alpha) ); /* 0000A000 -> mm4 */ | |
441 psrld_r2r(mm5, mm4); /* mm4 >> mm5 -> mm4 (0000000A) */ | |
442 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | |
443 punpcklwd_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | |
444 pand_r2r(mm7, mm4); /* 000A0A0A -> mm4, preserve dst alpha on add */ | |
445 | |
446 /* blend */ | |
447 psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ | |
448 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
449 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1(000R0G0B) */ | |
450 paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | |
451 | |
452 packuswb_r2r(mm6, mm2); /* 0000ARGB -> mm2 */ | |
453 movd_r2m(mm2, *dstp);/* mm2 -> dst */ | |
454 } | |
455 ++srcp; | |
456 ++dstp; | |
457 }, width); | |
458 /* *INDENT-ON* */ | |
459 srcp += srcskip; | |
460 dstp += dstskip; | |
461 } | |
462 emms(); | |
463 } | |
464 | |
465 /* End GCC_ASMBLIT */ | |
466 | |
467 #elif MSVC_ASMBLIT | |
468 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 202 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
469 static void | 203 static void |
470 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info) | 204 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info) |
471 { | 205 { |
472 int width = info->d_width; | 206 int width = info->d_width; |
635 Uint64 multmask; | 369 Uint64 multmask; |
636 | 370 |
637 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | 371 __m64 src1, dst1, mm_alpha, mm_zero, dmask; |
638 | 372 |
639 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | 373 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
640 /* *INDENT-OFF* */ | 374 multmask = 0xFFFF; |
641 multmask = ~(0xFFFFI64 << (ashift * 2)); | 375 multmask <<= (ashift * 2); |
642 /* *INDENT-ON* */ | 376 multmask = ~multmask; |
643 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ | 377 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ |
644 | 378 |
645 while (height--) { | 379 while (height--) { |
646 /* *INDENT-OFF* */ | 380 /* *INDENT-OFF* */ |
647 DUFFS_LOOP4({ | 381 DUFFS_LOOP4({ |
681 dstp += dstskip; | 415 dstp += dstskip; |
682 } | 416 } |
683 _mm_empty(); | 417 _mm_empty(); |
684 } | 418 } |
685 | 419 |
686 /* End MSVC_ASMBLIT */ | 420 #endif /* __MMX__ */ |
687 | |
688 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689 | 421 |
690 #if SDL_ALTIVEC_BLITTERS | 422 #if SDL_ALTIVEC_BLITTERS |
691 #if __MWERKS__ | 423 #if __MWERKS__ |
692 #pragma altivec_model on | 424 #pragma altivec_model on |
693 #endif | 425 #endif |
1637 srcp += srcskip; | 1369 srcp += srcskip; |
1638 dstp += dstskip; | 1370 dstp += dstskip; |
1639 } | 1371 } |
1640 } | 1372 } |
1641 | 1373 |
1642 #if GCC_ASMBLIT | 1374 #ifdef __MMX__ |
1643 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | |
1644 static void | |
1645 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) | |
1646 { | |
1647 int width = info->d_width; | |
1648 int height = info->d_height; | |
1649 Uint32 *srcp = (Uint32 *) info->s_pixels; | |
1650 int srcskip = info->s_skip >> 2; | |
1651 Uint32 *dstp = (Uint32 *) info->d_pixels; | |
1652 int dstskip = info->d_skip >> 2; | |
1653 SDL_PixelFormat *sf = info->src; | |
1654 Uint32 amask = sf->Amask; | |
1655 | |
1656 __asm__( | |
1657 /* make mm6 all zeros. */ | |
1658 "pxor %%mm6, %%mm6\n" | |
1659 /* Make a mask to preserve the alpha. */ | |
1660 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ | |
1661 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ | |
1662 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ | |
1663 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ | |
1664 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ | |
1665 /* form channel masks */ | |
1666 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ | |
1667 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ | |
1668 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ | |
1669 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ | |
1670 /* get alpha channel shift */ | |
1671 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ | |
1672 : /* nothing */ : "rm"(amask), "rm"((Uint32) sf->Ashift)); | |
1673 | |
1674 while (height--) { | |
1675 | |
1676 /* *INDENT-OFF* */ | |
1677 DUFFS_LOOP4({ | |
1678 Uint32 alpha; | |
1679 | |
1680 __asm__ ( | |
1681 "prefetch 64(%0)\n" | |
1682 "prefetch 64(%1)\n" | |
1683 : : "r" (srcp), "r" (dstp) ); | |
1684 | |
1685 alpha = *srcp & amask; | |
1686 /* FIXME: Here we special-case opaque alpha since the | |
1687 compositioning used (>>8 instead of /255) doesn't handle | |
1688 it correctly. Also special-case alpha=0 for speed? | |
1689 Benchmark this! */ | |
1690 if(alpha == 0) { | |
1691 /* do nothing */ | |
1692 } | |
1693 else if(alpha == amask) { | |
1694 /* opaque alpha -- copy RGB, keep dst alpha */ | |
1695 /* using MMX here to free up regular registers for other things */ | |
1696 __asm__ ( | |
1697 "movd (%0), %%mm0\n\t" /* src(ARGB) -> mm0 (0000ARGB)*/ | |
1698 "movd (%1), %%mm1\n\t" /* dst(ARGB) -> mm1 (0000ARGB)*/ | |
1699 "pand %%mm4, %%mm0\n\t" /* src & chanmask -> mm0 */ | |
1700 "pand %%mm3, %%mm1\n\t" /* dst & ~chanmask -> mm2 */ | |
1701 "por %%mm0, %%mm1\n\t" /* src | dst -> mm1 */ | |
1702 "movd %%mm1, (%1) \n\t" /* mm1 -> dst */ | |
1703 | |
1704 : : "r" (srcp), "r" (dstp) ); | |
1705 } | |
1706 | |
1707 else { | |
1708 __asm__ ( | |
1709 /* load in the source, and dst. */ | |
1710 "movd (%0), %%mm0\n" /* mm0(s) = 0 0 0 0 | As Rs Gs Bs */ | |
1711 "movd (%1), %%mm1\n" /* mm1(d) = 0 0 0 0 | Ad Rd Gd Bd */ | |
1712 | |
1713 /* Move the src alpha into mm2 */ | |
1714 | |
1715 /* if supporting pshufw */ | |
1716 /*"pshufw $0x55, %%mm0, %%mm2\n" */ /* mm2 = 0 As 0 As | 0 As 0 As */ | |
1717 /*"psrlw $8, %%mm2\n" */ | |
1718 | |
1719 /* else: */ | |
1720 "movd %2, %%mm2\n" | |
1721 "psrld %%mm5, %%mm2\n" /* mm2 = 0 0 0 0 | 0 0 0 As */ | |
1722 "punpcklwd %%mm2, %%mm2\n" /* mm2 = 0 0 0 0 | 0 As 0 As */ | |
1723 "punpckldq %%mm2, %%mm2\n" /* mm2 = 0 As 0 As | 0 As 0 As */ | |
1724 "pand %%mm7, %%mm2\n" /* to preserve dest alpha */ | |
1725 | |
1726 /* move the colors into words. */ | |
1727 "punpcklbw %%mm6, %%mm0\n" /* mm0 = 0 As 0 Rs | 0 Gs 0 Bs */ | |
1728 "punpcklbw %%mm6, %%mm1\n" /* mm0 = 0 Ad 0 Rd | 0 Gd 0 Bd */ | |
1729 | |
1730 /* src - dst */ | |
1731 "psubw %%mm1, %%mm0\n" /* mm0 = As-Ad Rs-Rd | Gs-Gd Bs-Bd */ | |
1732 | |
1733 /* A * (src-dst) */ | |
1734 "pmullw %%mm2, %%mm0\n" /* mm0 = 0*As-d As*Rs-d | As*Gs-d As*Bs-d */ | |
1735 "psrlw $8, %%mm0\n" /* mm0 = 0>>8 Rc>>8 | Gc>>8 Bc>>8 */ | |
1736 "paddb %%mm1, %%mm0\n" /* mm0 = 0+Ad Rc+Rd | Gc+Gd Bc+Bd */ | |
1737 | |
1738 "packuswb %%mm0, %%mm0\n" /* mm0 = | Ac Rc Gc Bc */ | |
1739 | |
1740 "movd %%mm0, (%1)\n" /* result in mm0 */ | |
1741 | |
1742 : : "r" (srcp), "r" (dstp), "r" (alpha) ); | |
1743 | |
1744 } | |
1745 ++srcp; | |
1746 ++dstp; | |
1747 }, width); | |
1748 /* *INDENT-ON* */ | |
1749 srcp += srcskip; | |
1750 dstp += dstskip; | |
1751 } | |
1752 | |
1753 __asm__("emms\n":); | |
1754 } | |
1755 | |
1756 /* End GCC_ASMBLIT*/ | |
1757 | |
1758 #elif MSVC_ASMBLIT | |
1759 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | 1375 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ |
1760 static void | 1376 static void |
1761 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) | 1377 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) |
1762 { | 1378 { |
1763 int width = info->d_width; | 1379 int width = info->d_width; |
1773 Uint64 multmask; | 1389 Uint64 multmask; |
1774 | 1390 |
1775 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | 1391 __m64 src1, dst1, mm_alpha, mm_zero, dmask; |
1776 | 1392 |
1777 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | 1393 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
1778 /* *INDENT-OFF* */ | 1394 multmask = 0xFFFF; |
1779 multmask = ~(0xFFFFI64 << (ashift * 2)); | 1395 multmask <<= (ashift * 2); |
1780 /* *INDENT-ON* */ | 1396 multmask = ~multmask; |
1781 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ | 1397 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ |
1782 | 1398 |
1783 while (height--) { | 1399 while (height--) { |
1784 /* *INDENT-OFF* */ | 1400 /* *INDENT-OFF* */ |
1785 DUFFS_LOOP4({ | 1401 DUFFS_LOOP4({ |
1824 dstp += dstskip; | 1440 dstp += dstskip; |
1825 } | 1441 } |
1826 _mm_empty(); | 1442 _mm_empty(); |
1827 } | 1443 } |
1828 | 1444 |
1829 /* End MSVC_ASMBLIT */ | 1445 #endif /* __MMX__ */ |
1830 | |
1831 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
1832 | 1446 |
1833 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ | 1447 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ |
1834 | 1448 |
1835 /* blend a single 16 bit pixel at 50% */ | 1449 /* blend a single 16 bit pixel at 50% */ |
1836 #define BLEND16_50(d, s, mask) \ | 1450 #define BLEND16_50(d, s, mask) \ |
1938 dstp += dstskip; | 1552 dstp += dstskip; |
1939 } | 1553 } |
1940 } | 1554 } |
1941 } | 1555 } |
1942 | 1556 |
1943 #if GCC_ASMBLIT | 1557 #ifdef __MMX__ |
1944 /* fast RGB565->RGB565 blending with surface alpha */ | 1558 |
1945 static void | |
1946 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info) | |
1947 { | |
1948 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ | |
1949 if (alpha == 128) { | |
1950 Blit16to16SurfaceAlpha128(info, 0xf7de); | |
1951 } else { | |
1952 int width = info->d_width; | |
1953 int height = info->d_height; | |
1954 Uint16 *srcp = (Uint16 *) info->s_pixels; | |
1955 int srcskip = info->s_skip >> 1; | |
1956 Uint16 *dstp = (Uint16 *) info->d_pixels; | |
1957 int dstskip = info->d_skip >> 1; | |
1958 Uint32 s, d; | |
1959 Uint8 load[8]; | |
1960 | |
1961 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ | |
1962 *(Uint64 *) load = alpha; | |
1963 alpha >>= 3; /* downscale alpha to 5 bits */ | |
1964 | |
1965 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ | |
1966 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | |
1967 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | |
1968 /* position alpha to allow for mullo and mulhi on diff channels | |
1969 to reduce the number of operations */ | |
1970 psllq_i2r(3, mm0); | |
1971 | |
1972 /* Setup the 565 color channel masks */ | |
1973 *(Uint64 *) load = 0x07E007E007E007E0ULL; | |
1974 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ | |
1975 *(Uint64 *) load = 0x001F001F001F001FULL; | |
1976 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ | |
1977 while (height--) { | |
1978 /* *INDENT-OFF* */ | |
1979 DUFFS_LOOP_QUATRO2( | |
1980 { | |
1981 s = *srcp++; | |
1982 d = *dstp; | |
1983 /* | |
1984 * shift out the middle component (green) to | |
1985 * the high 16 bits, and process all three RGB | |
1986 * components at the same time. | |
1987 */ | |
1988 s = (s | s << 16) & 0x07e0f81f; | |
1989 d = (d | d << 16) & 0x07e0f81f; | |
1990 d += (s - d) * alpha >> 5; | |
1991 d &= 0x07e0f81f; | |
1992 *dstp++ = d | d >> 16; | |
1993 },{ | |
1994 s = *srcp++; | |
1995 d = *dstp; | |
1996 /* | |
1997 * shift out the middle component (green) to | |
1998 * the high 16 bits, and process all three RGB | |
1999 * components at the same time. | |
2000 */ | |
2001 s = (s | s << 16) & 0x07e0f81f; | |
2002 d = (d | d << 16) & 0x07e0f81f; | |
2003 d += (s - d) * alpha >> 5; | |
2004 d &= 0x07e0f81f; | |
2005 *dstp++ = d | d >> 16; | |
2006 s = *srcp++; | |
2007 d = *dstp; | |
2008 /* | |
2009 * shift out the middle component (green) to | |
2010 * the high 16 bits, and process all three RGB | |
2011 * components at the same time. | |
2012 */ | |
2013 s = (s | s << 16) & 0x07e0f81f; | |
2014 d = (d | d << 16) & 0x07e0f81f; | |
2015 d += (s - d) * alpha >> 5; | |
2016 d &= 0x07e0f81f; | |
2017 *dstp++ = d | d >> 16; | |
2018 },{ | |
2019 movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ | |
2020 movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ | |
2021 | |
2022 /* red -- does not need a mask since the right shift clears | |
2023 the uninteresting bits */ | |
2024 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2025 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2026 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 [000r 000r 000r 000r] */ | |
2027 psrlw_i2r(11, mm6); /* mm6 >> 11 -> mm6 [000r 000r 000r 000r] */ | |
2028 | |
2029 /* blend */ | |
2030 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2031 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2032 /* alpha used is actually 11 bits | |
2033 11 + 5 = 16 bits, so the sign bits are lost */ | |
2034 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
2035 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2036 psllw_i2r(11, mm6); /* mm6 << 11 -> mm6 */ | |
2037 | |
2038 movq_r2r(mm6, mm1); /* save new reds in dsts */ | |
2039 | |
2040 /* green -- process the bits in place */ | |
2041 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2042 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2043 pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ | |
2044 pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ | |
2045 | |
2046 /* blend */ | |
2047 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2048 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2049 /* 11 + 11 - 16 = 6 bits, so all the lower uninteresting | |
2050 bits are gone and the sign bits present */ | |
2051 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2052 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2053 | |
2054 por_r2r(mm6, mm1); /* save new greens in dsts */ | |
2055 | |
2056 /* blue */ | |
2057 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2058 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2059 pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ | |
2060 pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2061 | |
2062 /* blend */ | |
2063 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2064 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2065 /* 11 + 5 = 16 bits, so the sign bits are lost and | |
2066 the interesting bits will need to be MASKed */ | |
2067 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
2068 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2069 pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2070 | |
2071 por_r2r(mm6, mm1); /* save new blues in dsts */ | |
2072 | |
2073 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ | |
2074 | |
2075 srcp += 4; | |
2076 dstp += 4; | |
2077 }, width); | |
2078 /* *INDENT-ON* */ | |
2079 srcp += srcskip; | |
2080 dstp += dstskip; | |
2081 } | |
2082 emms(); | |
2083 } | |
2084 } | |
2085 | |
2086 /* fast RGB555->RGB555 blending with surface alpha */ | |
2087 static void | |
2088 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info) | |
2089 { | |
2090 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ | |
2091 if (alpha == 128) { | |
2092 Blit16to16SurfaceAlpha128(info, 0xfbde); | |
2093 } else { | |
2094 int width = info->d_width; | |
2095 int height = info->d_height; | |
2096 Uint16 *srcp = (Uint16 *) info->s_pixels; | |
2097 int srcskip = info->s_skip >> 1; | |
2098 Uint16 *dstp = (Uint16 *) info->d_pixels; | |
2099 int dstskip = info->d_skip >> 1; | |
2100 Uint32 s, d; | |
2101 Uint8 load[8]; | |
2102 | |
2103 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ | |
2104 *(Uint64 *) load = alpha; | |
2105 alpha >>= 3; /* downscale alpha to 5 bits */ | |
2106 | |
2107 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ | |
2108 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | |
2109 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | |
2110 /* position alpha to allow for mullo and mulhi on diff channels | |
2111 to reduce the number of operations */ | |
2112 psllq_i2r(3, mm0); | |
2113 | |
2114 /* Setup the 555 color channel masks */ | |
2115 *(Uint64 *) load = 0x03E003E003E003E0ULL; | |
2116 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ | |
2117 *(Uint64 *) load = 0x001F001F001F001FULL; | |
2118 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ | |
2119 while (height--) { | |
2120 /* *INDENT-OFF* */ | |
2121 DUFFS_LOOP_QUATRO2( | |
2122 { | |
2123 s = *srcp++; | |
2124 d = *dstp; | |
2125 /* | |
2126 * shift out the middle component (green) to | |
2127 * the high 16 bits, and process all three RGB | |
2128 * components at the same time. | |
2129 */ | |
2130 s = (s | s << 16) & 0x03e07c1f; | |
2131 d = (d | d << 16) & 0x03e07c1f; | |
2132 d += (s - d) * alpha >> 5; | |
2133 d &= 0x03e07c1f; | |
2134 *dstp++ = d | d >> 16; | |
2135 },{ | |
2136 s = *srcp++; | |
2137 d = *dstp; | |
2138 /* | |
2139 * shift out the middle component (green) to | |
2140 * the high 16 bits, and process all three RGB | |
2141 * components at the same time. | |
2142 */ | |
2143 s = (s | s << 16) & 0x03e07c1f; | |
2144 d = (d | d << 16) & 0x03e07c1f; | |
2145 d += (s - d) * alpha >> 5; | |
2146 d &= 0x03e07c1f; | |
2147 *dstp++ = d | d >> 16; | |
2148 s = *srcp++; | |
2149 d = *dstp; | |
2150 /* | |
2151 * shift out the middle component (green) to | |
2152 * the high 16 bits, and process all three RGB | |
2153 * components at the same time. | |
2154 */ | |
2155 s = (s | s << 16) & 0x03e07c1f; | |
2156 d = (d | d << 16) & 0x03e07c1f; | |
2157 d += (s - d) * alpha >> 5; | |
2158 d &= 0x03e07c1f; | |
2159 *dstp++ = d | d >> 16; | |
2160 },{ | |
2161 movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ | |
2162 movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ | |
2163 | |
2164 /* red -- process the bits in place */ | |
2165 psllq_i2r(5, mm4); /* turn MASKGREEN into MASKRED */ | |
2166 /* by reusing the GREEN mask we free up another mmx | |
2167 register to accumulate the result */ | |
2168 | |
2169 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2170 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2171 pand_r2r(mm4, mm5); /* src & MASKRED -> mm5 */ | |
2172 pand_r2r(mm4, mm6); /* dst & MASKRED -> mm6 */ | |
2173 | |
2174 /* blend */ | |
2175 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2176 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2177 /* 11 + 15 - 16 = 10 bits, uninteresting bits will be | |
2178 cleared by a MASK below */ | |
2179 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2180 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2181 pand_r2r(mm4, mm6); /* mm6 & MASKRED -> mm6 */ | |
2182 | |
2183 psrlq_i2r(5, mm4); /* turn MASKRED back into MASKGREEN */ | |
2184 | |
2185 movq_r2r(mm6, mm1); /* save new reds in dsts */ | |
2186 | |
2187 /* green -- process the bits in place */ | |
2188 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2189 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2190 pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ | |
2191 pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ | |
2192 | |
2193 /* blend */ | |
2194 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2195 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2196 /* 11 + 10 - 16 = 5 bits, so all the lower uninteresting | |
2197 bits are gone and the sign bits present */ | |
2198 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2199 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2200 | |
2201 por_r2r(mm6, mm1); /* save new greens in dsts */ | |
2202 | |
2203 /* blue */ | |
2204 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2205 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2206 pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ | |
2207 pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2208 | |
2209 /* blend */ | |
2210 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2211 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2212 /* 11 + 5 = 16 bits, so the sign bits are lost and | |
2213 the interesting bits will need to be MASKed */ | |
2214 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
2215 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2216 pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2217 | |
2218 por_r2r(mm6, mm1); /* save new blues in dsts */ | |
2219 | |
2220 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ | |
2221 | |
2222 srcp += 4; | |
2223 dstp += 4; | |
2224 }, width); | |
2225 /* *INDENT-ON* */ | |
2226 srcp += srcskip; | |
2227 dstp += dstskip; | |
2228 } | |
2229 emms(); | |
2230 } | |
2231 } | |
2232 | |
2233 /* End GCC_ASMBLIT */ | |
2234 | |
2235 #elif MSVC_ASMBLIT | |
2236 /* fast RGB565->RGB565 blending with surface alpha */ | 1559 /* fast RGB565->RGB565 blending with surface alpha */ |
2237 static void | 1560 static void |
2238 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info) | 1561 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info) |
2239 { | 1562 { |
2240 unsigned alpha = info->src->alpha; | 1563 unsigned alpha = info->src->alpha; |
2505 dstp += dstskip; | 1828 dstp += dstskip; |
2506 } | 1829 } |
2507 _mm_empty(); | 1830 _mm_empty(); |
2508 } | 1831 } |
2509 } | 1832 } |
2510 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | 1833 |
1834 #endif /* __MMX__ */ | |
2511 | 1835 |
2512 /* fast RGB565->RGB565 blending with surface alpha */ | 1836 /* fast RGB565->RGB565 blending with surface alpha */ |
2513 static void | 1837 static void |
2514 Blit565to565SurfaceAlpha(SDL_BlitInfo * info) | 1838 Blit565to565SurfaceAlpha(SDL_BlitInfo * info) |
2515 { | 1839 { |
2850 return BlitNto1SurfaceAlpha; | 2174 return BlitNto1SurfaceAlpha; |
2851 | 2175 |
2852 case 2: | 2176 case 2: |
2853 if (surface->map->identity) { | 2177 if (surface->map->identity) { |
2854 if (df->Gmask == 0x7e0) { | 2178 if (df->Gmask == 0x7e0) { |
2855 #if MMX_ASMBLIT | 2179 #ifdef __MMX__ |
2856 if (SDL_HasMMX()) | 2180 if (SDL_HasMMX()) |
2857 return Blit565to565SurfaceAlphaMMX; | 2181 return Blit565to565SurfaceAlphaMMX; |
2858 else | 2182 else |
2859 #endif | 2183 #endif |
2860 return Blit565to565SurfaceAlpha; | 2184 return Blit565to565SurfaceAlpha; |
2861 } else if (df->Gmask == 0x3e0) { | 2185 } else if (df->Gmask == 0x3e0) { |
2862 #if MMX_ASMBLIT | 2186 #ifdef __MMX__ |
2863 if (SDL_HasMMX()) | 2187 if (SDL_HasMMX()) |
2864 return Blit555to555SurfaceAlphaMMX; | 2188 return Blit555to555SurfaceAlphaMMX; |
2865 else | 2189 else |
2866 #endif | 2190 #endif |
2867 return Blit555to555SurfaceAlpha; | 2191 return Blit555to555SurfaceAlpha; |
2871 | 2195 |
2872 case 4: | 2196 case 4: |
2873 if (sf->Rmask == df->Rmask | 2197 if (sf->Rmask == df->Rmask |
2874 && sf->Gmask == df->Gmask | 2198 && sf->Gmask == df->Gmask |
2875 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { | 2199 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { |
2876 #if MMX_ASMBLIT | 2200 #ifdef __MMX__ |
2877 if (sf->Rshift % 8 == 0 | 2201 if (sf->Rshift % 8 == 0 |
2878 && sf->Gshift % 8 == 0 | 2202 && sf->Gshift % 8 == 0 |
2879 && sf->Bshift % 8 == 0 && SDL_HasMMX()) | 2203 && sf->Bshift % 8 == 0 && SDL_HasMMX()) |
2880 return BlitRGBtoRGBSurfaceAlphaMMX; | 2204 return BlitRGBtoRGBSurfaceAlphaMMX; |
2881 #endif | 2205 #endif |
2926 | 2250 |
2927 case 4: | 2251 case 4: |
2928 if (sf->Rmask == df->Rmask | 2252 if (sf->Rmask == df->Rmask |
2929 && sf->Gmask == df->Gmask | 2253 && sf->Gmask == df->Gmask |
2930 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { | 2254 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { |
2931 #if MMX_ASMBLIT | 2255 #ifdef __MMX__ |
2932 if (sf->Rshift % 8 == 0 | 2256 if (sf->Rshift % 8 == 0 |
2933 && sf->Gshift % 8 == 0 | 2257 && sf->Gshift % 8 == 0 |
2934 && sf->Bshift % 8 == 0 | 2258 && sf->Bshift % 8 == 0 |
2935 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { | 2259 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { |
2936 if (SDL_Has3DNow()) | 2260 if (SDL_Has3DNow()) |