comparison src/video/SDL_blit_A.c @ 1668:4da1ee79c9af SDL-1.3

more tweaking indent options
author Sam Lantinga <slouken@libsdl.org>
date Mon, 29 May 2006 04:04:35 +0000
parents 782fd950bd46
children a1ebb17f9c52
comparison
equal deleted inserted replaced
1667:1fddae038bc8 1668:4da1ee79c9af
45 45
46 /* Functions to perform alpha blended blitting */ 46 /* Functions to perform alpha blended blitting */
47 47
48 /* N->1 blending with per-surface alpha */ 48 /* N->1 blending with per-surface alpha */
49 static void 49 static void
50 BlitNto1SurfaceAlpha (SDL_BlitInfo * info) 50 BlitNto1SurfaceAlpha(SDL_BlitInfo * info)
51 { 51 {
52 int width = info->d_width; 52 int width = info->d_width;
53 int height = info->d_height; 53 int height = info->d_height;
54 Uint8 *src = info->s_pixels; 54 Uint8 *src = info->s_pixels;
55 int srcskip = info->s_skip; 55 int srcskip = info->s_skip;
101 } 101 }
102 } 102 }
103 103
104 /* N->1 blending with pixel alpha */ 104 /* N->1 blending with pixel alpha */
105 static void 105 static void
106 BlitNto1PixelAlpha (SDL_BlitInfo * info) 106 BlitNto1PixelAlpha(SDL_BlitInfo * info)
107 { 107 {
108 int width = info->d_width; 108 int width = info->d_width;
109 int height = info->d_height; 109 int height = info->d_height;
110 Uint8 *src = info->s_pixels; 110 Uint8 *src = info->s_pixels;
111 int srcskip = info->s_skip; 111 int srcskip = info->s_skip;
157 } 157 }
158 } 158 }
159 159
160 /* colorkeyed N->1 blending with per-surface alpha */ 160 /* colorkeyed N->1 blending with per-surface alpha */
161 static void 161 static void
162 BlitNto1SurfaceAlphaKey (SDL_BlitInfo * info) 162 BlitNto1SurfaceAlphaKey(SDL_BlitInfo * info)
163 { 163 {
164 int width = info->d_width; 164 int width = info->d_width;
165 int height = info->d_height; 165 int height = info->d_height;
166 Uint8 *src = info->s_pixels; 166 Uint8 *src = info->s_pixels;
167 int srcskip = info->s_skip; 167 int srcskip = info->s_skip;
217 } 217 }
218 218
219 #if GCC_ASMBLIT 219 #if GCC_ASMBLIT
220 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 220 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
221 static void 221 static void
222 BlitRGBtoRGBSurfaceAlpha128MMX (SDL_BlitInfo * info) 222 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info)
223 { 223 {
224 int width = info->d_width; 224 int width = info->d_width;
225 int height = info->d_height; 225 int height = info->d_height;
226 Uint32 *srcp = (Uint32 *) info->s_pixels; 226 Uint32 *srcp = (Uint32 *) info->s_pixels;
227 int srcskip = info->s_skip >> 2; 227 int srcskip = info->s_skip >> 2;
229 int dstskip = info->d_skip >> 2; 229 int dstskip = info->d_skip >> 2;
230 Uint32 dalpha = info->dst->Amask; 230 Uint32 dalpha = info->dst->Amask;
231 Uint8 load[8]; 231 Uint8 load[8];
232 232
233 *(Uint64 *) load = 0x00fefefe00fefefeULL; /* alpha128 mask */ 233 *(Uint64 *) load = 0x00fefefe00fefefeULL; /* alpha128 mask */
234 movq_m2r (*load, mm4); /* alpha128 mask -> mm4 */ 234 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */
235 *(Uint64 *) load = 0x0001010100010101ULL; /* !alpha128 mask */ 235 *(Uint64 *) load = 0x0001010100010101ULL; /* !alpha128 mask */
236 movq_m2r (*load, mm3); /* !alpha128 mask -> mm3 */ 236 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */
237 movd_m2r (dalpha, mm7); /* dst alpha mask */ 237 movd_m2r(dalpha, mm7); /* dst alpha mask */
238 punpckldq_r2r (mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ 238 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */
239 while (height--) { 239 while (height--) {
240 /* *INDENT-OFF* */ 240 /* *INDENT-OFF* */
241 DUFFS_LOOP_DOUBLE2( 241 DUFFS_LOOP_DOUBLE2(
242 { 242 {
243 Uint32 s = *srcp++; 243 Uint32 s = *srcp++;
266 }, width); 266 }, width);
267 /* *INDENT-ON* */ 267 /* *INDENT-ON* */
268 srcp += srcskip; 268 srcp += srcskip;
269 dstp += dstskip; 269 dstp += dstskip;
270 } 270 }
271 emms (); 271 emms();
272 } 272 }
273 273
274 /* fast RGB888->(A)RGB888 blending with surface alpha */ 274 /* fast RGB888->(A)RGB888 blending with surface alpha */
275 static void 275 static void
276 BlitRGBtoRGBSurfaceAlphaMMX (SDL_BlitInfo * info) 276 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info)
277 { 277 {
278 SDL_PixelFormat *df = info->dst; 278 SDL_PixelFormat *df = info->dst;
279 unsigned alpha = info->src->alpha; 279 unsigned alpha = info->src->alpha;
280 280
281 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 281 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
282 /* only call a128 version when R,G,B occupy lower bits */ 282 /* only call a128 version when R,G,B occupy lower bits */
283 BlitRGBtoRGBSurfaceAlpha128MMX (info); 283 BlitRGBtoRGBSurfaceAlpha128MMX(info);
284 } else { 284 } else {
285 int width = info->d_width; 285 int width = info->d_width;
286 int height = info->d_height; 286 int height = info->d_height;
287 Uint32 *srcp = (Uint32 *) info->s_pixels; 287 Uint32 *srcp = (Uint32 *) info->s_pixels;
288 int srcskip = info->s_skip >> 2; 288 int srcskip = info->s_skip >> 2;
289 Uint32 *dstp = (Uint32 *) info->d_pixels; 289 Uint32 *dstp = (Uint32 *) info->d_pixels;
290 int dstskip = info->d_skip >> 2; 290 int dstskip = info->d_skip >> 2;
291 291
292 pxor_r2r (mm5, mm5); /* 0 -> mm5 */ 292 pxor_r2r(mm5, mm5); /* 0 -> mm5 */
293 /* form the alpha mult */ 293 /* form the alpha mult */
294 movd_m2r (alpha, mm4); /* 0000000A -> mm4 */ 294 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */
295 punpcklwd_r2r (mm4, mm4); /* 00000A0A -> mm4 */ 295 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */
296 punpckldq_r2r (mm4, mm4); /* 0A0A0A0A -> mm4 */ 296 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */
297 alpha = 297 alpha =
298 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> 298 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->
299 Bshift); 299 Bshift);
300 movd_m2r (alpha, mm0); /* 00000FFF -> mm0 */ 300 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */
301 punpcklbw_r2r (mm0, mm0); /* 00FFFFFF -> mm0 */ 301 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */
302 pand_r2r (mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ 302 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */
303 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ 303 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */
304 movd_m2r (df->Amask, mm7); /* dst alpha mask */ 304 movd_m2r(df->Amask, mm7); /* dst alpha mask */
305 punpckldq_r2r (mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ 305 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */
306 306
307 while (height--) { 307 while (height--) {
308 /* *INDENT-OFF* */ 308 /* *INDENT-OFF* */
309 DUFFS_LOOP_DOUBLE2({ 309 DUFFS_LOOP_DOUBLE2({
310 /* One Pixel Blend */ 310 /* One Pixel Blend */
355 }, width); 355 }, width);
356 /* *INDENT-ON* */ 356 /* *INDENT-ON* */
357 srcp += srcskip; 357 srcp += srcskip;
358 dstp += dstskip; 358 dstp += dstskip;
359 } 359 }
360 emms (); 360 emms();
361 } 361 }
362 } 362 }
363 363
364 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 364 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
365 static void 365 static void
366 BlitRGBtoRGBPixelAlphaMMX (SDL_BlitInfo * info) 366 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
367 { 367 {
368 int width = info->d_width; 368 int width = info->d_width;
369 int height = info->d_height; 369 int height = info->d_height;
370 Uint32 *srcp = (Uint32 *) info->s_pixels; 370 Uint32 *srcp = (Uint32 *) info->s_pixels;
371 int srcskip = info->s_skip >> 2; 371 int srcskip = info->s_skip >> 2;
372 Uint32 *dstp = (Uint32 *) info->d_pixels; 372 Uint32 *dstp = (Uint32 *) info->d_pixels;
373 int dstskip = info->d_skip >> 2; 373 int dstskip = info->d_skip >> 2;
374 SDL_PixelFormat *sf = info->src; 374 SDL_PixelFormat *sf = info->src;
375 Uint32 amask = sf->Amask; 375 Uint32 amask = sf->Amask;
376 376
377 pxor_r2r (mm6, mm6); /* 0 -> mm6 */ 377 pxor_r2r(mm6, mm6); /* 0 -> mm6 */
378 /* form multiplication mask */ 378 /* form multiplication mask */
379 movd_m2r (sf->Amask, mm7); /* 0000F000 -> mm7 */ 379 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */
380 punpcklbw_r2r (mm7, mm7); /* FF000000 -> mm7 */ 380 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */
381 pcmpeqb_r2r (mm0, mm0); /* FFFFFFFF -> mm0 */ 381 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */
382 movq_r2r (mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ 382 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */
383 pxor_r2r (mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ 383 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */
384 /* form channel masks */ 384 /* form channel masks */
385 movq_r2r (mm7, mm0); /* 00FFFFFF -> mm0 */ 385 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */
386 packsswb_r2r (mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ 386 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */
387 packsswb_r2r (mm6, mm3); /* 0000FFFF -> mm3 */ 387 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */
388 pxor_r2r (mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ 388 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */
389 /* get alpha channel shift */ 389 /* get alpha channel shift */
390 movd_m2r (sf->Ashift, mm5); /* Ashift -> mm5 */ 390 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */
391 391
392 while (height--) { 392 while (height--) {
393 /* *INDENT-OFF* */ 393 /* *INDENT-OFF* */
394 DUFFS_LOOP4({ 394 DUFFS_LOOP4({
395 Uint32 alpha = *srcp & amask; 395 Uint32 alpha = *srcp & amask;
437 }, width); 437 }, width);
438 /* *INDENT-ON* */ 438 /* *INDENT-ON* */
439 srcp += srcskip; 439 srcp += srcskip;
440 dstp += dstskip; 440 dstp += dstskip;
441 } 441 }
442 emms (); 442 emms();
443 } 443 }
444 444
445 /* End GCC_ASMBLIT */ 445 /* End GCC_ASMBLIT */
446 446
447 #elif MSVC_ASMBLIT 447 #elif MSVC_ASMBLIT
448 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 448 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
449 static void 449 static void
450 BlitRGBtoRGBSurfaceAlpha128MMX (SDL_BlitInfo * info) 450 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info)
451 { 451 {
452 int width = info->d_width; 452 int width = info->d_width;
453 int height = info->d_height; 453 int height = info->d_height;
454 Uint32 *srcp = (Uint32 *) info->s_pixels; 454 Uint32 *srcp = (Uint32 *) info->s_pixels;
455 int srcskip = info->s_skip >> 2; 455 int srcskip = info->s_skip >> 2;
457 int dstskip = info->d_skip >> 2; 457 int dstskip = info->d_skip >> 2;
458 Uint32 dalpha = info->dst->Amask; 458 Uint32 dalpha = info->dst->Amask;
459 459
460 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; 460 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
461 461
462 hmask = _mm_set_pi32 (0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ 462 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */
463 lmask = _mm_set_pi32 (0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ 463 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */
464 dsta = _mm_set_pi32 (dalpha, dalpha); /* dst alpha mask -> dsta */ 464 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
465 465
466 while (height--) { 466 while (height--) {
467 int n = width; 467 int n = width;
468 if (n & 1) { 468 if (n & 1) {
469 Uint32 s = *srcp++; 469 Uint32 s = *srcp++;
478 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 478 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
479 479
480 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ 480 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
481 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 481 src2 = src1; /* 2 x src -> src2(ARGBARGB) */
482 482
483 dst2 = _mm_and_si64 (dst2, hmask); /* dst & mask -> dst2 */ 483 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */
484 src2 = _mm_and_si64 (src2, hmask); /* src & mask -> src2 */ 484 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */
485 src2 = _mm_add_pi32 (src2, dst2); /* dst2 + src2 -> src2 */ 485 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */
486 src2 = _mm_srli_pi32 (src2, 1); /* src2 >> 1 -> src2 */ 486 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */
487 487
488 dst1 = _mm_and_si64 (dst1, src1); /* src & dst -> dst1 */ 488 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */
489 dst1 = _mm_and_si64 (dst1, lmask); /* dst1 & !mask -> dst1 */ 489 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */
490 dst1 = _mm_add_pi32 (dst1, src2); /* src2 + dst1 -> dst1 */ 490 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */
491 dst1 = _mm_or_si64 (dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ 491 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */
492 492
493 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */ 493 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */
494 dstp += 2; 494 dstp += 2;
495 srcp += 2; 495 srcp += 2;
496 } 496 }
497 497
498 srcp += srcskip; 498 srcp += srcskip;
499 dstp += dstskip; 499 dstp += dstskip;
500 } 500 }
501 _mm_empty (); 501 _mm_empty();
502 } 502 }
503 503
504 /* fast RGB888->(A)RGB888 blending with surface alpha */ 504 /* fast RGB888->(A)RGB888 blending with surface alpha */
505 static void 505 static void
506 BlitRGBtoRGBSurfaceAlphaMMX (SDL_BlitInfo * info) 506 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info)
507 { 507 {
508 SDL_PixelFormat *df = info->dst; 508 SDL_PixelFormat *df = info->dst;
509 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; 509 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask;
510 unsigned alpha = info->src->alpha; 510 unsigned alpha = info->src->alpha;
511 511
512 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 512 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
513 /* only call a128 version when R,G,B occupy lower bits */ 513 /* only call a128 version when R,G,B occupy lower bits */
514 BlitRGBtoRGBSurfaceAlpha128MMX (info); 514 BlitRGBtoRGBSurfaceAlpha128MMX(info);
515 } else { 515 } else {
516 int width = info->d_width; 516 int width = info->d_width;
517 int height = info->d_height; 517 int height = info->d_height;
518 Uint32 *srcp = (Uint32 *) info->s_pixels; 518 Uint32 *srcp = (Uint32 *) info->s_pixels;
519 int srcskip = info->s_skip >> 2; 519 int srcskip = info->s_skip >> 2;
522 Uint32 dalpha = df->Amask; 522 Uint32 dalpha = df->Amask;
523 Uint32 amult; 523 Uint32 amult;
524 524
525 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; 525 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
526 526
527 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */ 527 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
528 /* form the alpha mult */ 528 /* form the alpha mult */
529 amult = alpha | (alpha << 8); 529 amult = alpha | (alpha << 8);
530 amult = amult | (amult << 16); 530 amult = amult | (amult << 16);
531 chanmask = 531 chanmask =
532 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> 532 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->
533 Bshift); 533 Bshift);
534 mm_alpha = _mm_set_pi32 (0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ 534 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */
535 mm_alpha = _mm_unpacklo_pi8 (mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ 535 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */
536 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ 536 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */
537 dsta = _mm_set_pi32 (dalpha, dalpha); /* dst alpha mask -> dsta */ 537 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
538 538
539 while (height--) { 539 while (height--) {
540 int n = width; 540 int n = width;
541 if (n & 1) { 541 if (n & 1) {
542 /* One Pixel Blend */ 542 /* One Pixel Blend */
543 src2 = _mm_cvtsi32_si64 (*srcp); /* src(ARGB) -> src2 (0000ARGB) */ 543 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */
544 src2 = _mm_unpacklo_pi8 (src2, mm_zero); /* 0A0R0G0B -> src2 */ 544 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */
545 545
546 dst1 = _mm_cvtsi32_si64 (*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */ 546 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
547 dst1 = _mm_unpacklo_pi8 (dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 547 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
548 548
549 src2 = _mm_sub_pi16 (src2, dst1); /* src2 - dst2 -> src2 */ 549 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */
550 src2 = _mm_mullo_pi16 (src2, mm_alpha); /* src2 * alpha -> src2 */ 550 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
551 src2 = _mm_srli_pi16 (src2, 8); /* src2 >> 8 -> src2 */ 551 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
552 dst1 = _mm_add_pi8 (src2, dst1); /* src2 + dst1 -> dst1 */ 552 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */
553 553
554 dst1 = _mm_packs_pu16 (dst1, mm_zero); /* 0000ARGB -> dst1 */ 554 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */
555 dst1 = _mm_or_si64 (dst1, dsta); /* dsta | dst1 -> dst1 */ 555 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
556 *dstp = _mm_cvtsi64_si32 (dst1); /* dst1 -> pixel */ 556 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
557 557
558 ++srcp; 558 ++srcp;
559 ++dstp; 559 ++dstp;
560 560
561 n--; 561 n--;
563 563
564 for (n >>= 1; n > 0; --n) { 564 for (n >>= 1; n > 0; --n) {
565 /* Two Pixels Blend */ 565 /* Two Pixels Blend */
566 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ 566 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
567 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 567 src2 = src1; /* 2 x src -> src2(ARGBARGB) */
568 src1 = _mm_unpacklo_pi8 (src1, mm_zero); /* low - 0A0R0G0B -> src1 */ 568 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */
569 src2 = _mm_unpackhi_pi8 (src2, mm_zero); /* high - 0A0R0G0B -> src2 */ 569 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */
570 570
571 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */ 571 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
572 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 572 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
573 dst1 = _mm_unpacklo_pi8 (dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ 573 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */
574 dst2 = _mm_unpackhi_pi8 (dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ 574 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */
575 575
576 src1 = _mm_sub_pi16 (src1, dst1); /* src1 - dst1 -> src1 */ 576 src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */
577 src1 = _mm_mullo_pi16 (src1, mm_alpha); /* src1 * alpha -> src1 */ 577 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */
578 src1 = _mm_srli_pi16 (src1, 8); /* src1 >> 8 -> src1 */ 578 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */
579 dst1 = _mm_add_pi8 (src1, dst1); /* src1 + dst1(dst1) -> dst1 */ 579 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */
580 580
581 src2 = _mm_sub_pi16 (src2, dst2); /* src2 - dst2 -> src2 */ 581 src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */
582 src2 = _mm_mullo_pi16 (src2, mm_alpha); /* src2 * alpha -> src2 */ 582 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
583 src2 = _mm_srli_pi16 (src2, 8); /* src2 >> 8 -> src2 */ 583 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
584 dst2 = _mm_add_pi8 (src2, dst2); /* src2 + dst2(dst2) -> dst2 */ 584 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */
585 585
586 dst1 = _mm_packs_pu16 (dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ 586 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */
587 dst1 = _mm_or_si64 (dst1, dsta); /* dsta | dst1 -> dst1 */ 587 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
588 588
589 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */ 589 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */
590 590
591 srcp += 2; 591 srcp += 2;
592 dstp += 2; 592 dstp += 2;
593 } 593 }
594 srcp += srcskip; 594 srcp += srcskip;
595 dstp += dstskip; 595 dstp += dstskip;
596 } 596 }
597 _mm_empty (); 597 _mm_empty();
598 } 598 }
599 } 599 }
600 600
601 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 601 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
602 static void 602 static void
603 BlitRGBtoRGBPixelAlphaMMX (SDL_BlitInfo * info) 603 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
604 { 604 {
605 int width = info->d_width; 605 int width = info->d_width;
606 int height = info->d_height; 606 int height = info->d_height;
607 Uint32 *srcp = (Uint32 *) info->s_pixels; 607 Uint32 *srcp = (Uint32 *) info->s_pixels;
608 int srcskip = info->s_skip >> 2; 608 int srcskip = info->s_skip >> 2;
614 Uint32 ashift = sf->Ashift; 614 Uint32 ashift = sf->Ashift;
615 Uint64 multmask; 615 Uint64 multmask;
616 616
617 __m64 src1, dst1, mm_alpha, mm_zero, dmask; 617 __m64 src1, dst1, mm_alpha, mm_zero, dmask;
618 618
619 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */ 619 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
620 multmask = ~(0xFFFFi 64 << (ashift * 2)); 620 multmask = ~(0xFFFFi 64 << (ashift * 2));
621 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ 621 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */
622 622
623 while (height--) { 623 while (height--) {
624 /* *INDENT-OFF* */ 624 /* *INDENT-OFF* */
656 }, width); 656 }, width);
657 /* *INDENT-ON* */ 657 /* *INDENT-ON* */
658 srcp += srcskip; 658 srcp += srcskip;
659 dstp += dstskip; 659 dstp += dstskip;
660 } 660 }
661 _mm_empty (); 661 _mm_empty();
662 } 662 }
663 663
664 /* End MSVC_ASMBLIT */ 664 /* End MSVC_ASMBLIT */
665 665
666 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 666 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
734 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ 734 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
735 } while (0) 735 } while (0)
736 736
737 /* Calculate the permute vector used for 32->32 swizzling */ 737 /* Calculate the permute vector used for 32->32 swizzling */
738 static vector unsigned char 738 static vector unsigned char
739 calc_swizzle32 (const SDL_PixelFormat * srcfmt, 739 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
740 const SDL_PixelFormat * dstfmt)
741 { 740 {
742 /* 741 /*
743 * We have to assume that the bits that aren't used by other 742 * We have to assume that the bits that aren't used by other
744 * colors is alpha, and it's one complete byte, since some formats 743 * colors is alpha, and it's one complete byte, since some formats
745 * leave alpha with a zero mask, but we should still swizzle the bits. 744 * leave alpha with a zero mask, but we should still swizzle the bits.
756 srcfmt = &default_pixel_format; 755 srcfmt = &default_pixel_format;
757 } 756 }
758 if (!dstfmt) { 757 if (!dstfmt) {
759 dstfmt = &default_pixel_format; 758 dstfmt = &default_pixel_format;
760 } 759 }
761 const vector unsigned char plus = VECUINT8_LITERAL 760 const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
762 (0x00, 0x00, 0x00, 0x00, 761 0x04, 0x04, 0x04, 0x04,
763 0x04, 0x04, 0x04, 0x04, 762 0x08, 0x08, 0x08, 0x08,
764 0x08, 0x08, 0x08, 0x08, 763 0x0C, 0x0C, 0x0C,
765 0x0C, 0x0C, 0x0C, 0x0C); 764 0x0C);
766 vector unsigned char vswiz; 765 vector unsigned char vswiz;
767 vector unsigned int srcvec; 766 vector unsigned int srcvec;
768 #define RESHIFT(X) (3 - ((X) >> 3)) 767 #define RESHIFT(X) (3 - ((X) >> 3))
769 Uint32 rmask = RESHIFT (srcfmt->Rshift) << (dstfmt->Rshift); 768 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
770 Uint32 gmask = RESHIFT (srcfmt->Gshift) << (dstfmt->Gshift); 769 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
771 Uint32 bmask = RESHIFT (srcfmt->Bshift) << (dstfmt->Bshift); 770 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
772 Uint32 amask; 771 Uint32 amask;
773 /* Use zero for alpha if either surface doesn't have alpha */ 772 /* Use zero for alpha if either surface doesn't have alpha */
774 if (dstfmt->Amask) { 773 if (dstfmt->Amask) {
775 amask = 774 amask =
776 ((srcfmt->Amask) ? RESHIFT (srcfmt->Ashift) : 0x10) << (dstfmt-> 775 ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->
777 Ashift); 776 Ashift);
778 } else { 777 } else {
779 amask = 778 amask =
780 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 779 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
781 0xFFFFFFFF); 780 0xFFFFFFFF);
782 } 781 }
783 #undef RESHIFT 782 #undef RESHIFT
784 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask); 783 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
785 vswiz = vec_add (plus, (vector unsigned char) vec_splat (srcvec, 0)); 784 vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
786 return (vswiz); 785 return (vswiz);
787 } 786 }
788 787
789 static void 788 static void
790 Blit32to565PixelAlphaAltivec (SDL_BlitInfo * info) 789 Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info)
791 { 790 {
792 int height = info->d_height; 791 int height = info->d_height;
793 Uint8 *src = (Uint8 *) info->s_pixels; 792 Uint8 *src = (Uint8 *) info->s_pixels;
794 int srcskip = info->s_skip; 793 int srcskip = info->s_skip;
795 Uint8 *dst = (Uint8 *) info->d_pixels; 794 Uint8 *dst = (Uint8 *) info->d_pixels;
796 int dstskip = info->d_skip; 795 int dstskip = info->d_skip;
797 SDL_PixelFormat *srcfmt = info->src; 796 SDL_PixelFormat *srcfmt = info->src;
798 797
799 vector unsigned char v0 = vec_splat_u8 (0); 798 vector unsigned char v0 = vec_splat_u8(0);
800 vector unsigned short v8_16 = vec_splat_u16 (8); 799 vector unsigned short v8_16 = vec_splat_u16(8);
801 vector unsigned short v1_16 = vec_splat_u16 (1); 800 vector unsigned short v1_16 = vec_splat_u16(1);
802 vector unsigned short v2_16 = vec_splat_u16 (2); 801 vector unsigned short v2_16 = vec_splat_u16(2);
803 vector unsigned short v3_16 = vec_splat_u16 (3); 802 vector unsigned short v3_16 = vec_splat_u16(3);
804 vector unsigned int v8_32 = vec_splat_u32 (8); 803 vector unsigned int v8_32 = vec_splat_u32(8);
805 vector unsigned int v16_32 = vec_add (v8_32, v8_32); 804 vector unsigned int v16_32 = vec_add(v8_32, v8_32);
806 vector unsigned short v3f = 805 vector unsigned short v3f =
807 VECUINT16_LITERAL (0x003f, 0x003f, 0x003f, 0x003f, 806 VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
808 0x003f, 0x003f, 0x003f, 0x003f); 807 0x003f, 0x003f, 0x003f, 0x003f);
809 vector unsigned short vfc = 808 vector unsigned short vfc =
810 VECUINT16_LITERAL (0x00fc, 0x00fc, 0x00fc, 0x00fc, 809 VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
811 0x00fc, 0x00fc, 0x00fc, 0x00fc); 810 0x00fc, 0x00fc, 0x00fc, 0x00fc);
812 811
813 /* 812 /*
814 0x10 - 0x1f is the alpha 813 0x10 - 0x1f is the alpha
815 0x00 - 0x0e evens are the red 814 0x00 - 0x0e evens are the red
816 0x01 - 0x0f odds are zero 815 0x01 - 0x0f odds are zero
817 */ 816 */
818 vector unsigned char vredalpha1 = 817 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
819 VECUINT8_LITERAL (0x10, 0x00, 0x01, 0x01, 818 0x10, 0x02, 0x01, 0x01,
820 0x10, 0x02, 0x01, 0x01, 819 0x10, 0x04, 0x01, 0x01,
821 0x10, 0x04, 0x01, 0x01, 820 0x10, 0x06, 0x01,
822 0x10, 0x06, 0x01, 0x01); 821 0x01);
823 vector unsigned char vredalpha2 = 822 vector unsigned char vredalpha2 =
824 (vector unsigned char) (vec_add ((vector unsigned int) vredalpha1, 823 (vector unsigned char) (vec_add((vector unsigned int) vredalpha1,
825 vec_sl (v8_32, v16_32)) 824 vec_sl(v8_32, v16_32))
826 ); 825 );
827 /* 826 /*
828 0x00 - 0x0f is ARxx ARxx ARxx ARxx 827 0x00 - 0x0f is ARxx ARxx ARxx ARxx
829 0x11 - 0x0f odds are blue 828 0x11 - 0x0f odds are blue
830 */ 829 */
831 vector unsigned char vblue1 = VECUINT8_LITERAL (0x00, 0x01, 0x02, 0x11, 830 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
832 0x04, 0x05, 0x06, 0x13, 831 0x04, 0x05, 0x06, 0x13,
833 0x08, 0x09, 0x0a, 0x15, 832 0x08, 0x09, 0x0a, 0x15,
834 0x0c, 0x0d, 0x0e, 0x17); 833 0x0c, 0x0d, 0x0e, 0x17);
835 vector unsigned char vblue2 = 834 vector unsigned char vblue2 =
836 (vector unsigned char) (vec_add ((vector unsigned int) vblue1, v8_32) 835 (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32)
837 ); 836 );
838 /* 837 /*
839 0x00 - 0x0f is ARxB ARxB ARxB ARxB 838 0x00 - 0x0f is ARxB ARxB ARxB ARxB
840 0x10 - 0x0e evens are green 839 0x10 - 0x0e evens are green
841 */ 840 */
842 vector unsigned char vgreen1 = VECUINT8_LITERAL (0x00, 0x01, 0x10, 0x03, 841 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
843 0x04, 0x05, 0x12, 0x07, 842 0x04, 0x05, 0x12, 0x07,
844 0x08, 0x09, 0x14, 0x0b, 843 0x08, 0x09, 0x14, 0x0b,
845 0x0c, 0x0d, 0x16, 0x0f); 844 0x0c, 0x0d, 0x16, 0x0f);
846 vector unsigned char vgreen2 = 845 vector unsigned char vgreen2 =
847 (vector unsigned 846 (vector unsigned
848 char) (vec_add ((vector unsigned int) vgreen1, vec_sl (v8_32, v8_32)) 847 char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32))
849 ); 848 );
850 vector unsigned char vgmerge = VECUINT8_LITERAL (0x00, 0x02, 0x00, 0x06, 849 vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
851 0x00, 0x0a, 0x00, 0x0e, 850 0x00, 0x0a, 0x00, 0x0e,
852 0x00, 0x12, 0x00, 0x16, 851 0x00, 0x12, 0x00, 0x16,
853 0x00, 0x1a, 0x00, 0x1e); 852 0x00, 0x1a, 0x00, 0x1e);
854 vector unsigned char mergePermute = VEC_MERGE_PERMUTE (); 853 vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
855 vector unsigned char vpermute = calc_swizzle32 (srcfmt, NULL); 854 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
856 vector unsigned char valphaPermute = 855 vector unsigned char valphaPermute =
857 vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC)); 856 vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
858 857
859 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8 (-7); 858 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
860 vf800 = vec_sl (vf800, vec_splat_u16 (8)); 859 vf800 = vec_sl(vf800, vec_splat_u16(8));
861 860
862 while (height--) { 861 while (height--) {
863 int extrawidth; 862 int extrawidth;
864 vector unsigned char valigner; 863 vector unsigned char valigner;
865 vector unsigned char vsrc; 864 vector unsigned char vsrc;
883 } \ 882 } \
884 src += 4; \ 883 src += 4; \
885 dst += 2; \ 884 dst += 2; \
886 widthvar--; \ 885 widthvar--; \
887 } 886 }
888 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dst)) && (width), width); 887 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width);
889 extrawidth = (width % 8); 888 extrawidth = (width % 8);
890 valigner = VEC_ALIGNER (src); 889 valigner = VEC_ALIGNER(src);
891 vsrc = (vector unsigned char) vec_ld (0, src); 890 vsrc = (vector unsigned char) vec_ld(0, src);
892 width -= extrawidth; 891 width -= extrawidth;
893 while (width) { 892 while (width) {
894 vector unsigned char valpha; 893 vector unsigned char valpha;
895 vector unsigned char vsrc1, vsrc2; 894 vector unsigned char vsrc1, vsrc2;
896 vector unsigned char vdst1, vdst2; 895 vector unsigned char vdst1, vdst2;
897 vector unsigned short vR, vG, vB; 896 vector unsigned short vR, vG, vB;
898 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; 897 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
899 898
900 /* Load 8 pixels from src as ARGB */ 899 /* Load 8 pixels from src as ARGB */
901 voverflow = (vector unsigned char) vec_ld (15, src); 900 voverflow = (vector unsigned char) vec_ld(15, src);
902 vsrc = vec_perm (vsrc, voverflow, valigner); 901 vsrc = vec_perm(vsrc, voverflow, valigner);
903 vsrc1 = vec_perm (vsrc, vsrc, vpermute); 902 vsrc1 = vec_perm(vsrc, vsrc, vpermute);
904 src += 16; 903 src += 16;
905 vsrc = (vector unsigned char) vec_ld (15, src); 904 vsrc = (vector unsigned char) vec_ld(15, src);
906 voverflow = vec_perm (voverflow, vsrc, valigner); 905 voverflow = vec_perm(voverflow, vsrc, valigner);
907 vsrc2 = vec_perm (voverflow, voverflow, vpermute); 906 vsrc2 = vec_perm(voverflow, voverflow, vpermute);
908 src += 16; 907 src += 16;
909 908
910 /* Load 8 pixels from dst as XRGB */ 909 /* Load 8 pixels from dst as XRGB */
911 voverflow = vec_ld (0, dst); 910 voverflow = vec_ld(0, dst);
912 vR = vec_and ((vector unsigned short) voverflow, vf800); 911 vR = vec_and((vector unsigned short) voverflow, vf800);
913 vB = vec_sl ((vector unsigned short) voverflow, v3_16); 912 vB = vec_sl((vector unsigned short) voverflow, v3_16);
914 vG = vec_sl (vB, v2_16); 913 vG = vec_sl(vB, v2_16);
915 vdst1 = 914 vdst1 =
916 (vector unsigned char) vec_perm ((vector unsigned char) vR, 915 (vector unsigned char) vec_perm((vector unsigned char) vR,
917 (vector unsigned char) vR, 916 (vector unsigned char) vR,
918 vredalpha1); 917 vredalpha1);
919 vdst1 = vec_perm (vdst1, (vector unsigned char) vB, vblue1); 918 vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
920 vdst1 = vec_perm (vdst1, (vector unsigned char) vG, vgreen1); 919 vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
921 vdst2 = 920 vdst2 =
922 (vector unsigned char) vec_perm ((vector unsigned char) vR, 921 (vector unsigned char) vec_perm((vector unsigned char) vR,
923 (vector unsigned char) vR, 922 (vector unsigned char) vR,
924 vredalpha2); 923 vredalpha2);
925 vdst2 = vec_perm (vdst2, (vector unsigned char) vB, vblue2); 924 vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
926 vdst2 = vec_perm (vdst2, (vector unsigned char) vG, vgreen2); 925 vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
927 926
928 /* Alpha blend 8 pixels as ARGB */ 927 /* Alpha blend 8 pixels as ARGB */
929 valpha = vec_perm (vsrc1, v0, valphaPermute); 928 valpha = vec_perm(vsrc1, v0, valphaPermute);
930 VEC_MULTIPLY_ALPHA (vsrc1, vdst1, valpha, mergePermute, v1_16, 929 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16,
931 v8_16); 930 v8_16);
932 valpha = vec_perm (vsrc2, v0, valphaPermute); 931 valpha = vec_perm(vsrc2, v0, valphaPermute);
933 VEC_MULTIPLY_ALPHA (vsrc2, vdst2, valpha, mergePermute, v1_16, 932 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16,
934 v8_16); 933 v8_16);
935 934
936 /* Convert 8 pixels to 565 */ 935 /* Convert 8 pixels to 565 */
937 vpixel = (vector unsigned short) vec_packpx ((vector unsigned int) 936 vpixel = (vector unsigned short) vec_packpx((vector unsigned int)
938 vdst1, 937 vdst1,
939 (vector unsigned int) 938 (vector unsigned int)
940 vdst2); 939 vdst2);
941 vgpixel = 940 vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge);
942 (vector unsigned short) vec_perm (vdst1, vdst2, vgmerge); 941 vgpixel = vec_and(vgpixel, vfc);
943 vgpixel = vec_and (vgpixel, vfc); 942 vgpixel = vec_sl(vgpixel, v3_16);
944 vgpixel = vec_sl (vgpixel, v3_16); 943 vrpixel = vec_sl(vpixel, v1_16);
945 vrpixel = vec_sl (vpixel, v1_16); 944 vrpixel = vec_and(vrpixel, vf800);
946 vrpixel = vec_and (vrpixel, vf800); 945 vbpixel = vec_and(vpixel, v3f);
947 vbpixel = vec_and (vpixel, v3f);
948 vdst1 = 946 vdst1 =
949 vec_or ((vector unsigned char) vrpixel, 947 vec_or((vector unsigned char) vrpixel,
950 (vector unsigned char) vgpixel); 948 (vector unsigned char) vgpixel);
951 vdst1 = vec_or (vdst1, (vector unsigned char) vbpixel); 949 vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel);
952 950
953 /* Store 8 pixels */ 951 /* Store 8 pixels */
954 vec_st (vdst1, 0, dst); 952 vec_st(vdst1, 0, dst);
955 953
956 width -= 8; 954 width -= 8;
957 dst += 16; 955 dst += 16;
958 } 956 }
959 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 957 ONE_PIXEL_BLEND((extrawidth), extrawidth);
960 #undef ONE_PIXEL_BLEND 958 #undef ONE_PIXEL_BLEND
961 src += srcskip; 959 src += srcskip;
962 dst += dstskip; 960 dst += dstskip;
963 } 961 }
964 } 962 }
965 963
966 static void 964 static void
967 Blit32to32SurfaceAlphaKeyAltivec (SDL_BlitInfo * info) 965 Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info)
968 { 966 {
969 unsigned alpha = info->src->alpha; 967 unsigned alpha = info->src->alpha;
970 int height = info->d_height; 968 int height = info->d_height;
971 Uint32 *srcp = (Uint32 *) info->s_pixels; 969 Uint32 *srcp = (Uint32 *) info->s_pixels;
972 int srcskip = info->s_skip >> 2; 970 int srcskip = info->s_skip >> 2;
989 vector unsigned short v1; 987 vector unsigned short v1;
990 vector unsigned short v8; 988 vector unsigned short v8;
991 vector unsigned int vckey; 989 vector unsigned int vckey;
992 vector unsigned int vrgbmask; 990 vector unsigned int vrgbmask;
993 991
994 mergePermute = VEC_MERGE_PERMUTE (); 992 mergePermute = VEC_MERGE_PERMUTE();
995 v0 = vec_splat_u8 (0); 993 v0 = vec_splat_u8(0);
996 v1 = vec_splat_u16 (1); 994 v1 = vec_splat_u16(1);
997 v8 = vec_splat_u16 (8); 995 v8 = vec_splat_u16(8);
998 996
999 /* set the alpha to 255 on the destination surf */ 997 /* set the alpha to 255 on the destination surf */
1000 valphamask = VEC_ALPHA_MASK (); 998 valphamask = VEC_ALPHA_MASK();
1001 999
1002 vsrcPermute = calc_swizzle32 (srcfmt, NULL); 1000 vsrcPermute = calc_swizzle32(srcfmt, NULL);
1003 vdstPermute = calc_swizzle32 (NULL, dstfmt); 1001 vdstPermute = calc_swizzle32(NULL, dstfmt);
1004 vsdstPermute = calc_swizzle32 (dstfmt, NULL); 1002 vsdstPermute = calc_swizzle32(dstfmt, NULL);
1005 1003
1006 /* set a vector full of alpha and 255-alpha */ 1004 /* set a vector full of alpha and 255-alpha */
1007 ((unsigned char *) &valpha)[0] = alpha; 1005 ((unsigned char *) &valpha)[0] = alpha;
1008 valpha = vec_splat (valpha, 0); 1006 valpha = vec_splat(valpha, 0);
1009 vbits = (vector unsigned char) vec_splat_s8 (-1); 1007 vbits = (vector unsigned char) vec_splat_s8(-1);
1010 1008
1011 ckey &= rgbmask; 1009 ckey &= rgbmask;
1012 ((unsigned int *) (char *) &vckey)[0] = ckey; 1010 ((unsigned int *) (char *) &vckey)[0] = ckey;
1013 vckey = vec_splat (vckey, 0); 1011 vckey = vec_splat(vckey, 0);
1014 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask; 1012 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
1015 vrgbmask = vec_splat (vrgbmask, 0); 1013 vrgbmask = vec_splat(vrgbmask, 0);
1016 1014
1017 while (height--) { 1015 while (height--) {
1018 int width = info->d_width; 1016 int width = info->d_width;
1019 #define ONE_PIXEL_BLEND(condition, widthvar) \ 1017 #define ONE_PIXEL_BLEND(condition, widthvar) \
1020 while (condition) { \ 1018 while (condition) { \
1029 } \ 1027 } \
1030 dstp++; \ 1028 dstp++; \
1031 srcp++; \ 1029 srcp++; \
1032 widthvar--; \ 1030 widthvar--; \
1033 } 1031 }
1034 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); 1032 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
1035 if (width > 0) { 1033 if (width > 0) {
1036 int extrawidth = (width % 4); 1034 int extrawidth = (width % 4);
1037 vector unsigned char valigner = VEC_ALIGNER (srcp); 1035 vector unsigned char valigner = VEC_ALIGNER(srcp);
1038 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); 1036 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1039 width -= extrawidth; 1037 width -= extrawidth;
1040 while (width) { 1038 while (width) {
1041 vector unsigned char vsel; 1039 vector unsigned char vsel;
1042 vector unsigned char voverflow; 1040 vector unsigned char voverflow;
1043 vector unsigned char vd; 1041 vector unsigned char vd;
1044 vector unsigned char vd_orig; 1042 vector unsigned char vd_orig;
1045 1043
1046 /* s = *srcp */ 1044 /* s = *srcp */
1047 voverflow = (vector unsigned char) vec_ld (15, srcp); 1045 voverflow = (vector unsigned char) vec_ld(15, srcp);
1048 vs = vec_perm (vs, voverflow, valigner); 1046 vs = vec_perm(vs, voverflow, valigner);
1049 1047
1050 /* vsel is set for items that match the key */ 1048 /* vsel is set for items that match the key */
1051 vsel = 1049 vsel =
1052 (vector unsigned char) vec_and ((vector unsigned int) vs, 1050 (vector unsigned char) vec_and((vector unsigned int) vs,
1053 vrgbmask); 1051 vrgbmask);
1054 vsel = (vector unsigned char) vec_cmpeq ((vector unsigned int) 1052 vsel = (vector unsigned char) vec_cmpeq((vector unsigned int)
1055 vsel, vckey); 1053 vsel, vckey);
1056 1054
1057 /* permute to source format */ 1055 /* permute to source format */
1058 vs = vec_perm (vs, valpha, vsrcPermute); 1056 vs = vec_perm(vs, valpha, vsrcPermute);
1059 1057
1060 /* d = *dstp */ 1058 /* d = *dstp */
1061 vd = (vector unsigned char) vec_ld (0, dstp); 1059 vd = (vector unsigned char) vec_ld(0, dstp);
1062 vd_orig = vd = vec_perm (vd, v0, vsdstPermute); 1060 vd_orig = vd = vec_perm(vd, v0, vsdstPermute);
1063 1061
1064 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); 1062 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1065 1063
1066 /* set the alpha channel to full on */ 1064 /* set the alpha channel to full on */
1067 vd = vec_or (vd, valphamask); 1065 vd = vec_or(vd, valphamask);
1068 1066
1069 /* mask out color key */ 1067 /* mask out color key */
1070 vd = vec_sel (vd, vd_orig, vsel); 1068 vd = vec_sel(vd, vd_orig, vsel);
1071 1069
1072 /* permute to dest format */ 1070 /* permute to dest format */
1073 vd = vec_perm (vd, vbits, vdstPermute); 1071 vd = vec_perm(vd, vbits, vdstPermute);
1074 1072
1075 /* *dstp = res */ 1073 /* *dstp = res */
1076 vec_st ((vector unsigned int) vd, 0, dstp); 1074 vec_st((vector unsigned int) vd, 0, dstp);
1077 1075
1078 srcp += 4; 1076 srcp += 4;
1079 dstp += 4; 1077 dstp += 4;
1080 width -= 4; 1078 width -= 4;
1081 vs = voverflow; 1079 vs = voverflow;
1082 } 1080 }
1083 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 1081 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1084 } 1082 }
1085 #undef ONE_PIXEL_BLEND 1083 #undef ONE_PIXEL_BLEND
1086 1084
1087 srcp += srcskip; 1085 srcp += srcskip;
1088 dstp += dstskip; 1086 dstp += dstskip;
1089 } 1087 }
1090 } 1088 }
1091 1089
1092 1090
1093 static void 1091 static void
1094 Blit32to32PixelAlphaAltivec (SDL_BlitInfo * info) 1092 Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info)
1095 { 1093 {
1096 int width = info->d_width; 1094 int width = info->d_width;
1097 int height = info->d_height; 1095 int height = info->d_height;
1098 Uint32 *srcp = (Uint32 *) info->s_pixels; 1096 Uint32 *srcp = (Uint32 *) info->s_pixels;
1099 int srcskip = info->s_skip >> 2; 1097 int srcskip = info->s_skip >> 2;
1110 vector unsigned char vpixelmask; 1108 vector unsigned char vpixelmask;
1111 vector unsigned char v0; 1109 vector unsigned char v0;
1112 vector unsigned short v1; 1110 vector unsigned short v1;
1113 vector unsigned short v8; 1111 vector unsigned short v8;
1114 1112
1115 v0 = vec_splat_u8 (0); 1113 v0 = vec_splat_u8(0);
1116 v1 = vec_splat_u16 (1); 1114 v1 = vec_splat_u16(1);
1117 v8 = vec_splat_u16 (8); 1115 v8 = vec_splat_u16(8);
1118 mergePermute = VEC_MERGE_PERMUTE (); 1116 mergePermute = VEC_MERGE_PERMUTE();
1119 valphamask = VEC_ALPHA_MASK (); 1117 valphamask = VEC_ALPHA_MASK();
1120 valphaPermute = vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC)); 1118 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
1121 vpixelmask = vec_nor (valphamask, v0); 1119 vpixelmask = vec_nor(valphamask, v0);
1122 vsrcPermute = calc_swizzle32 (srcfmt, NULL); 1120 vsrcPermute = calc_swizzle32(srcfmt, NULL);
1123 vdstPermute = calc_swizzle32 (NULL, dstfmt); 1121 vdstPermute = calc_swizzle32(NULL, dstfmt);
1124 vsdstPermute = calc_swizzle32 (dstfmt, NULL); 1122 vsdstPermute = calc_swizzle32(dstfmt, NULL);
1125 1123
1126 while (height--) { 1124 while (height--) {
1127 width = info->d_width; 1125 width = info->d_width;
1128 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1126 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1129 Uint32 Pixel; \ 1127 Uint32 Pixel; \
1136 } \ 1134 } \
1137 ++srcp; \ 1135 ++srcp; \
1138 ++dstp; \ 1136 ++dstp; \
1139 widthvar--; \ 1137 widthvar--; \
1140 } 1138 }
1141 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); 1139 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
1142 if (width > 0) { 1140 if (width > 0) {
1143 /* vsrcPermute */ 1141 /* vsrcPermute */
1144 /* vdstPermute */ 1142 /* vdstPermute */
1145 int extrawidth = (width % 4); 1143 int extrawidth = (width % 4);
1146 vector unsigned char valigner = VEC_ALIGNER (srcp); 1144 vector unsigned char valigner = VEC_ALIGNER(srcp);
1147 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); 1145 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1148 width -= extrawidth; 1146 width -= extrawidth;
1149 while (width) { 1147 while (width) {
1150 vector unsigned char voverflow; 1148 vector unsigned char voverflow;
1151 vector unsigned char vd; 1149 vector unsigned char vd;
1152 vector unsigned char valpha; 1150 vector unsigned char valpha;
1153 vector unsigned char vdstalpha; 1151 vector unsigned char vdstalpha;
1154 /* s = *srcp */ 1152 /* s = *srcp */
1155 voverflow = (vector unsigned char) vec_ld (15, srcp); 1153 voverflow = (vector unsigned char) vec_ld(15, srcp);
1156 vs = vec_perm (vs, voverflow, valigner); 1154 vs = vec_perm(vs, voverflow, valigner);
1157 vs = vec_perm (vs, v0, vsrcPermute); 1155 vs = vec_perm(vs, v0, vsrcPermute);
1158 1156
1159 valpha = vec_perm (vs, v0, valphaPermute); 1157 valpha = vec_perm(vs, v0, valphaPermute);
1160 1158
1161 /* d = *dstp */ 1159 /* d = *dstp */
1162 vd = (vector unsigned char) vec_ld (0, dstp); 1160 vd = (vector unsigned char) vec_ld(0, dstp);
1163 vd = vec_perm (vd, v0, vsdstPermute); 1161 vd = vec_perm(vd, v0, vsdstPermute);
1164 vdstalpha = vec_and (vd, valphamask); 1162 vdstalpha = vec_and(vd, valphamask);
1165 1163
1166 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); 1164 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1167 1165
1168 /* set the alpha to the dest alpha */ 1166 /* set the alpha to the dest alpha */
1169 vd = vec_and (vd, vpixelmask); 1167 vd = vec_and(vd, vpixelmask);
1170 vd = vec_or (vd, vdstalpha); 1168 vd = vec_or(vd, vdstalpha);
1171 vd = vec_perm (vd, v0, vdstPermute); 1169 vd = vec_perm(vd, v0, vdstPermute);
1172 1170
1173 /* *dstp = res */ 1171 /* *dstp = res */
1174 vec_st ((vector unsigned int) vd, 0, dstp); 1172 vec_st((vector unsigned int) vd, 0, dstp);
1175 1173
1176 srcp += 4; 1174 srcp += 4;
1177 dstp += 4; 1175 dstp += 4;
1178 width -= 4; 1176 width -= 4;
1179 vs = voverflow; 1177 vs = voverflow;
1180 1178
1181 } 1179 }
1182 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 1180 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1183 } 1181 }
1184 srcp += srcskip; 1182 srcp += srcskip;
1185 dstp += dstskip; 1183 dstp += dstskip;
1186 #undef ONE_PIXEL_BLEND 1184 #undef ONE_PIXEL_BLEND
1187 } 1185 }
1188 } 1186 }
1189 1187
1190 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 1188 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
1191 static void 1189 static void
1192 BlitRGBtoRGBPixelAlphaAltivec (SDL_BlitInfo * info) 1190 BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info)
1193 { 1191 {
1194 int width = info->d_width; 1192 int width = info->d_width;
1195 int height = info->d_height; 1193 int height = info->d_height;
1196 Uint32 *srcp = (Uint32 *) info->s_pixels; 1194 Uint32 *srcp = (Uint32 *) info->s_pixels;
1197 int srcskip = info->s_skip >> 2; 1195 int srcskip = info->s_skip >> 2;
1202 vector unsigned char valphamask; 1200 vector unsigned char valphamask;
1203 vector unsigned char vpixelmask; 1201 vector unsigned char vpixelmask;
1204 vector unsigned char v0; 1202 vector unsigned char v0;
1205 vector unsigned short v1; 1203 vector unsigned short v1;
1206 vector unsigned short v8; 1204 vector unsigned short v8;
1207 v0 = vec_splat_u8 (0); 1205 v0 = vec_splat_u8(0);
1208 v1 = vec_splat_u16 (1); 1206 v1 = vec_splat_u16(1);
1209 v8 = vec_splat_u16 (8); 1207 v8 = vec_splat_u16(8);
1210 mergePermute = VEC_MERGE_PERMUTE (); 1208 mergePermute = VEC_MERGE_PERMUTE();
1211 valphamask = VEC_ALPHA_MASK (); 1209 valphamask = VEC_ALPHA_MASK();
1212 valphaPermute = vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC)); 1210 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
1213 1211
1214 1212
1215 vpixelmask = vec_nor (valphamask, v0); 1213 vpixelmask = vec_nor(valphamask, v0);
1216 while (height--) { 1214 while (height--) {
1217 width = info->d_width; 1215 width = info->d_width;
1218 #define ONE_PIXEL_BLEND(condition, widthvar) \ 1216 #define ONE_PIXEL_BLEND(condition, widthvar) \
1219 while ((condition)) { \ 1217 while ((condition)) { \
1220 Uint32 dalpha; \ 1218 Uint32 dalpha; \
1240 } \ 1238 } \
1241 ++srcp; \ 1239 ++srcp; \
1242 ++dstp; \ 1240 ++dstp; \
1243 widthvar--; \ 1241 widthvar--; \
1244 } 1242 }
1245 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); 1243 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
1246 if (width > 0) { 1244 if (width > 0) {
1247 int extrawidth = (width % 4); 1245 int extrawidth = (width % 4);
1248 vector unsigned char valigner = VEC_ALIGNER (srcp); 1246 vector unsigned char valigner = VEC_ALIGNER(srcp);
1249 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); 1247 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1250 width -= extrawidth; 1248 width -= extrawidth;
1251 while (width) { 1249 while (width) {
1252 vector unsigned char voverflow; 1250 vector unsigned char voverflow;
1253 vector unsigned char vd; 1251 vector unsigned char vd;
1254 vector unsigned char valpha; 1252 vector unsigned char valpha;
1255 vector unsigned char vdstalpha; 1253 vector unsigned char vdstalpha;
1256 /* s = *srcp */ 1254 /* s = *srcp */
1257 voverflow = (vector unsigned char) vec_ld (15, srcp); 1255 voverflow = (vector unsigned char) vec_ld(15, srcp);
1258 vs = vec_perm (vs, voverflow, valigner); 1256 vs = vec_perm(vs, voverflow, valigner);
1259 1257
1260 valpha = vec_perm (vs, v0, valphaPermute); 1258 valpha = vec_perm(vs, v0, valphaPermute);
1261 1259
1262 /* d = *dstp */ 1260 /* d = *dstp */
1263 vd = (vector unsigned char) vec_ld (0, dstp); 1261 vd = (vector unsigned char) vec_ld(0, dstp);
1264 vdstalpha = vec_and (vd, valphamask); 1262 vdstalpha = vec_and(vd, valphamask);
1265 1263
1266 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); 1264 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1267 1265
1268 /* set the alpha to the dest alpha */ 1266 /* set the alpha to the dest alpha */
1269 vd = vec_and (vd, vpixelmask); 1267 vd = vec_and(vd, vpixelmask);
1270 vd = vec_or (vd, vdstalpha); 1268 vd = vec_or(vd, vdstalpha);
1271 1269
1272 /* *dstp = res */ 1270 /* *dstp = res */
1273 vec_st ((vector unsigned int) vd, 0, dstp); 1271 vec_st((vector unsigned int) vd, 0, dstp);
1274 1272
1275 srcp += 4; 1273 srcp += 4;
1276 dstp += 4; 1274 dstp += 4;
1277 width -= 4; 1275 width -= 4;
1278 vs = voverflow; 1276 vs = voverflow;
1279 } 1277 }
1280 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 1278 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1281 } 1279 }
1282 srcp += srcskip; 1280 srcp += srcskip;
1283 dstp += dstskip; 1281 dstp += dstskip;
1284 } 1282 }
1285 #undef ONE_PIXEL_BLEND 1283 #undef ONE_PIXEL_BLEND
1286 } 1284 }
1287 1285
1288 static void 1286 static void
1289 Blit32to32SurfaceAlphaAltivec (SDL_BlitInfo * info) 1287 Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info)
1290 { 1288 {
1291 /* XXX : 6 */ 1289 /* XXX : 6 */
1292 unsigned alpha = info->src->alpha; 1290 unsigned alpha = info->src->alpha;
1293 int height = info->d_height; 1291 int height = info->d_height;
1294 Uint32 *srcp = (Uint32 *) info->s_pixels; 1292 Uint32 *srcp = (Uint32 *) info->s_pixels;
1307 vector unsigned char valphamask; 1305 vector unsigned char valphamask;
1308 vector unsigned char vbits; 1306 vector unsigned char vbits;
1309 vector unsigned short v1; 1307 vector unsigned short v1;
1310 vector unsigned short v8; 1308 vector unsigned short v8;
1311 1309
1312 mergePermute = VEC_MERGE_PERMUTE (); 1310 mergePermute = VEC_MERGE_PERMUTE();
1313 v1 = vec_splat_u16 (1); 1311 v1 = vec_splat_u16(1);
1314 v8 = vec_splat_u16 (8); 1312 v8 = vec_splat_u16(8);
1315 1313
1316 /* set the alpha to 255 on the destination surf */ 1314 /* set the alpha to 255 on the destination surf */
1317 valphamask = VEC_ALPHA_MASK (); 1315 valphamask = VEC_ALPHA_MASK();
1318 1316
1319 vsrcPermute = calc_swizzle32 (srcfmt, NULL); 1317 vsrcPermute = calc_swizzle32(srcfmt, NULL);
1320 vdstPermute = calc_swizzle32 (NULL, dstfmt); 1318 vdstPermute = calc_swizzle32(NULL, dstfmt);
1321 vsdstPermute = calc_swizzle32 (dstfmt, NULL); 1319 vsdstPermute = calc_swizzle32(dstfmt, NULL);
1322 1320
1323 /* set a vector full of alpha and 255-alpha */ 1321 /* set a vector full of alpha and 255-alpha */
1324 ((unsigned char *) &valpha)[0] = alpha; 1322 ((unsigned char *) &valpha)[0] = alpha;
1325 valpha = vec_splat (valpha, 0); 1323 valpha = vec_splat(valpha, 0);
1326 vbits = (vector unsigned char) vec_splat_s8 (-1); 1324 vbits = (vector unsigned char) vec_splat_s8(-1);
1327 1325
1328 while (height--) { 1326 while (height--) {
1329 int width = info->d_width; 1327 int width = info->d_width;
1330 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1328 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1331 Uint32 Pixel; \ 1329 Uint32 Pixel; \
1336 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ 1334 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
1337 ++srcp; \ 1335 ++srcp; \
1338 ++dstp; \ 1336 ++dstp; \
1339 widthvar--; \ 1337 widthvar--; \
1340 } 1338 }
1341 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); 1339 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
1342 if (width > 0) { 1340 if (width > 0) {
1343 int extrawidth = (width % 4); 1341 int extrawidth = (width % 4);
1344 vector unsigned char valigner = vec_lvsl (0, srcp); 1342 vector unsigned char valigner = vec_lvsl(0, srcp);
1345 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); 1343 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1346 width -= extrawidth; 1344 width -= extrawidth;
1347 while (width) { 1345 while (width) {
1348 vector unsigned char voverflow; 1346 vector unsigned char voverflow;
1349 vector unsigned char vd; 1347 vector unsigned char vd;
1350 1348
1351 /* s = *srcp */ 1349 /* s = *srcp */
1352 voverflow = (vector unsigned char) vec_ld (15, srcp); 1350 voverflow = (vector unsigned char) vec_ld(15, srcp);
1353 vs = vec_perm (vs, voverflow, valigner); 1351 vs = vec_perm(vs, voverflow, valigner);
1354 vs = vec_perm (vs, valpha, vsrcPermute); 1352 vs = vec_perm(vs, valpha, vsrcPermute);
1355 1353
1356 /* d = *dstp */ 1354 /* d = *dstp */
1357 vd = (vector unsigned char) vec_ld (0, dstp); 1355 vd = (vector unsigned char) vec_ld(0, dstp);
1358 vd = vec_perm (vd, vd, vsdstPermute); 1356 vd = vec_perm(vd, vd, vsdstPermute);
1359 1357
1360 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); 1358 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1361 1359
1362 /* set the alpha channel to full on */ 1360 /* set the alpha channel to full on */
1363 vd = vec_or (vd, valphamask); 1361 vd = vec_or(vd, valphamask);
1364 vd = vec_perm (vd, vbits, vdstPermute); 1362 vd = vec_perm(vd, vbits, vdstPermute);
1365 1363
1366 /* *dstp = res */ 1364 /* *dstp = res */
1367 vec_st ((vector unsigned int) vd, 0, dstp); 1365 vec_st((vector unsigned int) vd, 0, dstp);
1368 1366
1369 srcp += 4; 1367 srcp += 4;
1370 dstp += 4; 1368 dstp += 4;
1371 width -= 4; 1369 width -= 4;
1372 vs = voverflow; 1370 vs = voverflow;
1373 } 1371 }
1374 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 1372 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1375 } 1373 }
1376 #undef ONE_PIXEL_BLEND 1374 #undef ONE_PIXEL_BLEND
1377 1375
1378 srcp += srcskip; 1376 srcp += srcskip;
1379 dstp += dstskip; 1377 dstp += dstskip;
1382 } 1380 }
1383 1381
1384 1382
1385 /* fast RGB888->(A)RGB888 blending */ 1383 /* fast RGB888->(A)RGB888 blending */
1386 static void 1384 static void
1387 BlitRGBtoRGBSurfaceAlphaAltivec (SDL_BlitInfo * info) 1385 BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info)
1388 { 1386 {
1389 unsigned alpha = info->src->alpha; 1387 unsigned alpha = info->src->alpha;
1390 int height = info->d_height; 1388 int height = info->d_height;
1391 Uint32 *srcp = (Uint32 *) info->s_pixels; 1389 Uint32 *srcp = (Uint32 *) info->s_pixels;
1392 int srcskip = info->s_skip >> 2; 1390 int srcskip = info->s_skip >> 2;
1396 vector unsigned char valpha; 1394 vector unsigned char valpha;
1397 vector unsigned char valphamask; 1395 vector unsigned char valphamask;
1398 vector unsigned short v1; 1396 vector unsigned short v1;
1399 vector unsigned short v8; 1397 vector unsigned short v8;
1400 1398
1401 mergePermute = VEC_MERGE_PERMUTE (); 1399 mergePermute = VEC_MERGE_PERMUTE();
1402 v1 = vec_splat_u16 (1); 1400 v1 = vec_splat_u16(1);
1403 v8 = vec_splat_u16 (8); 1401 v8 = vec_splat_u16(8);
1404 1402
1405 /* set the alpha to 255 on the destination surf */ 1403 /* set the alpha to 255 on the destination surf */
1406 valphamask = VEC_ALPHA_MASK (); 1404 valphamask = VEC_ALPHA_MASK();
1407 1405
1408 /* set a vector full of alpha and 255-alpha */ 1406 /* set a vector full of alpha and 255-alpha */
1409 ((unsigned char *) &valpha)[0] = alpha; 1407 ((unsigned char *) &valpha)[0] = alpha;
1410 valpha = vec_splat (valpha, 0); 1408 valpha = vec_splat(valpha, 0);
1411 1409
1412 while (height--) { 1410 while (height--) {
1413 int width = info->d_width; 1411 int width = info->d_width;
1414 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1412 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1415 Uint32 s = *srcp; \ 1413 Uint32 s = *srcp; \
1424 *dstp = d1 | d | 0xff000000; \ 1422 *dstp = d1 | d | 0xff000000; \
1425 ++srcp; \ 1423 ++srcp; \
1426 ++dstp; \ 1424 ++dstp; \
1427 widthvar--; \ 1425 widthvar--; \
1428 } 1426 }
1429 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); 1427 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
1430 if (width > 0) { 1428 if (width > 0) {
1431 int extrawidth = (width % 4); 1429 int extrawidth = (width % 4);
1432 vector unsigned char valigner = VEC_ALIGNER (srcp); 1430 vector unsigned char valigner = VEC_ALIGNER(srcp);
1433 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); 1431 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1434 width -= extrawidth; 1432 width -= extrawidth;
1435 while (width) { 1433 while (width) {
1436 vector unsigned char voverflow; 1434 vector unsigned char voverflow;
1437 vector unsigned char vd; 1435 vector unsigned char vd;
1438 1436
1439 /* s = *srcp */ 1437 /* s = *srcp */
1440 voverflow = (vector unsigned char) vec_ld (15, srcp); 1438 voverflow = (vector unsigned char) vec_ld(15, srcp);
1441 vs = vec_perm (vs, voverflow, valigner); 1439 vs = vec_perm(vs, voverflow, valigner);
1442 1440
1443 /* d = *dstp */ 1441 /* d = *dstp */
1444 vd = (vector unsigned char) vec_ld (0, dstp); 1442 vd = (vector unsigned char) vec_ld(0, dstp);
1445 1443
1446 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); 1444 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1447 1445
1448 /* set the alpha channel to full on */ 1446 /* set the alpha channel to full on */
1449 vd = vec_or (vd, valphamask); 1447 vd = vec_or(vd, valphamask);
1450 1448
1451 /* *dstp = res */ 1449 /* *dstp = res */
1452 vec_st ((vector unsigned int) vd, 0, dstp); 1450 vec_st((vector unsigned int) vd, 0, dstp);
1453 1451
1454 srcp += 4; 1452 srcp += 4;
1455 dstp += 4; 1453 dstp += 4;
1456 width -= 4; 1454 width -= 4;
1457 vs = voverflow; 1455 vs = voverflow;
1458 } 1456 }
1459 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 1457 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1460 } 1458 }
1461 #undef ONE_PIXEL_BLEND 1459 #undef ONE_PIXEL_BLEND
1462 1460
1463 srcp += srcskip; 1461 srcp += srcskip;
1464 dstp += dstskip; 1462 dstp += dstskip;
1470 #endif 1468 #endif
1471 #endif /* SDL_ALTIVEC_BLITTERS */ 1469 #endif /* SDL_ALTIVEC_BLITTERS */
1472 1470
1473 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 1471 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
1474 static void 1472 static void
1475 BlitRGBtoRGBSurfaceAlpha128 (SDL_BlitInfo * info) 1473 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
1476 { 1474 {
1477 int width = info->d_width; 1475 int width = info->d_width;
1478 int height = info->d_height; 1476 int height = info->d_height;
1479 Uint32 *srcp = (Uint32 *) info->s_pixels; 1477 Uint32 *srcp = (Uint32 *) info->s_pixels;
1480 int srcskip = info->s_skip >> 2; 1478 int srcskip = info->s_skip >> 2;
1495 } 1493 }
1496 } 1494 }
1497 1495
1498 /* fast RGB888->(A)RGB888 blending with surface alpha */ 1496 /* fast RGB888->(A)RGB888 blending with surface alpha */
1499 static void 1497 static void
1500 BlitRGBtoRGBSurfaceAlpha (SDL_BlitInfo * info) 1498 BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo * info)
1501 { 1499 {
1502 unsigned alpha = info->src->alpha; 1500 unsigned alpha = info->src->alpha;
1503 if (alpha == 128) { 1501 if (alpha == 128) {
1504 BlitRGBtoRGBSurfaceAlpha128 (info); 1502 BlitRGBtoRGBSurfaceAlpha128(info);
1505 } else { 1503 } else {
1506 int width = info->d_width; 1504 int width = info->d_width;
1507 int height = info->d_height; 1505 int height = info->d_height;
1508 Uint32 *srcp = (Uint32 *) info->s_pixels; 1506 Uint32 *srcp = (Uint32 *) info->s_pixels;
1509 int srcskip = info->s_skip >> 2; 1507 int srcskip = info->s_skip >> 2;
1567 } 1565 }
1568 } 1566 }
1569 1567
1570 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 1568 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
1571 static void 1569 static void
1572 BlitRGBtoRGBPixelAlpha (SDL_BlitInfo * info) 1570 BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info)
1573 { 1571 {
1574 int width = info->d_width; 1572 int width = info->d_width;
1575 int height = info->d_height; 1573 int height = info->d_height;
1576 Uint32 *srcp = (Uint32 *) info->s_pixels; 1574 Uint32 *srcp = (Uint32 *) info->s_pixels;
1577 int srcskip = info->s_skip >> 2; 1575 int srcskip = info->s_skip >> 2;
1620 } 1618 }
1621 1619
1622 #if GCC_ASMBLIT 1620 #if GCC_ASMBLIT
1623 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 1621 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
1624 inline static void 1622 inline static void
1625 BlitRGBtoRGBPixelAlphaMMX3DNOW (SDL_BlitInfo * info) 1623 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
1626 { 1624 {
1627 int width = info->d_width; 1625 int width = info->d_width;
1628 int height = info->d_height; 1626 int height = info->d_height;
1629 Uint32 *srcp = (Uint32 *) info->s_pixels; 1627 Uint32 *srcp = (Uint32 *) info->s_pixels;
1630 int srcskip = info->s_skip >> 2; 1628 int srcskip = info->s_skip >> 2;
1631 Uint32 *dstp = (Uint32 *) info->d_pixels; 1629 Uint32 *dstp = (Uint32 *) info->d_pixels;
1632 int dstskip = info->d_skip >> 2; 1630 int dstskip = info->d_skip >> 2;
1633 SDL_PixelFormat *sf = info->src; 1631 SDL_PixelFormat *sf = info->src;
1634 Uint32 amask = sf->Amask; 1632 Uint32 amask = sf->Amask;
1635 1633
1636 __asm__ ( 1634 __asm__(
1637 /* make mm6 all zeros. */ 1635 /* make mm6 all zeros. */
1638 "pxor %%mm6, %%mm6\n" 1636 "pxor %%mm6, %%mm6\n"
1639 /* Make a mask to preserve the alpha. */ 1637 /* Make a mask to preserve the alpha. */
1640 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ 1638 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */
1641 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ 1639 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */
1642 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ 1640 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */
1643 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ 1641 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */
1644 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ 1642 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */
1645 /* form channel masks */ 1643 /* form channel masks */
1646 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ 1644 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */
1647 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ 1645 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */
1648 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ 1646 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */
1649 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ 1647 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */
1650 /* get alpha channel shift */ 1648 /* get alpha channel shift */
1651 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ 1649 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */
1652 : /* nothing */ : "m" (sf->Amask), "m" (sf->Ashift)); 1650 : /* nothing */ : "m"(sf->Amask), "m"(sf->Ashift));
1653 1651
1654 while (height--) { 1652 while (height--) {
1655 1653
1656 /* *INDENT-OFF* */ 1654 /* *INDENT-OFF* */
1657 DUFFS_LOOP4({ 1655 DUFFS_LOOP4({
1728 /* *INDENT-ON* */ 1726 /* *INDENT-ON* */
1729 srcp += srcskip; 1727 srcp += srcskip;
1730 dstp += dstskip; 1728 dstp += dstskip;
1731 } 1729 }
1732 1730
1733 __asm__ ("emms\n":); 1731 __asm__("emms\n":);
1734 } 1732 }
1735 1733
1736 /* End GCC_ASMBLIT*/ 1734 /* End GCC_ASMBLIT*/
1737 1735
1738 #elif MSVC_ASMBLIT 1736 #elif MSVC_ASMBLIT
1739 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 1737 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
1740 static void 1738 static void
1741 BlitRGBtoRGBPixelAlphaMMX3DNOW (SDL_BlitInfo * info) 1739 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
1742 { 1740 {
1743 int width = info->d_width; 1741 int width = info->d_width;
1744 int height = info->d_height; 1742 int height = info->d_height;
1745 Uint32 *srcp = (Uint32 *) info->s_pixels; 1743 Uint32 *srcp = (Uint32 *) info->s_pixels;
1746 int srcskip = info->s_skip >> 2; 1744 int srcskip = info->s_skip >> 2;
1752 Uint32 ashift = sf->Ashift; 1750 Uint32 ashift = sf->Ashift;
1753 Uint64 multmask; 1751 Uint64 multmask;
1754 1752
1755 __m64 src1, dst1, mm_alpha, mm_zero, dmask; 1753 __m64 src1, dst1, mm_alpha, mm_zero, dmask;
1756 1754
1757 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */ 1755 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
1758 multmask = ~(0xFFFFi 64 << (ashift * 2)); 1756 multmask = ~(0xFFFFi 64 << (ashift * 2));
1759 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ 1757 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */
1760 1758
1761 while (height--) { 1759 while (height--) {
1762 /* *INDENT-OFF* */ 1760 /* *INDENT-OFF* */
1799 }, width); 1797 }, width);
1800 /* *INDENT-ON* */ 1798 /* *INDENT-ON* */
1801 srcp += srcskip; 1799 srcp += srcskip;
1802 dstp += dstskip; 1800 dstp += dstskip;
1803 } 1801 }
1804 _mm_empty (); 1802 _mm_empty();
1805 } 1803 }
1806 1804
1807 /* End MSVC_ASMBLIT */ 1805 /* End MSVC_ASMBLIT */
1808 1806
1809 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 1807 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
1818 #define BLEND2x16_50(d, s, mask) \ 1816 #define BLEND2x16_50(d, s, mask) \
1819 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ 1817 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
1820 + (s & d & (~(mask | mask << 16)))) 1818 + (s & d & (~(mask | mask << 16))))
1821 1819
1822 static void 1820 static void
1823 Blit16to16SurfaceAlpha128 (SDL_BlitInfo * info, Uint16 mask) 1821 Blit16to16SurfaceAlpha128(SDL_BlitInfo * info, Uint16 mask)
1824 { 1822 {
1825 int width = info->d_width; 1823 int width = info->d_width;
1826 int height = info->d_height; 1824 int height = info->d_height;
1827 Uint16 *srcp = (Uint16 *) info->s_pixels; 1825 Uint16 *srcp = (Uint16 *) info->s_pixels;
1828 int srcskip = info->s_skip >> 1; 1826 int srcskip = info->s_skip >> 1;
1840 int w = width; 1838 int w = width;
1841 1839
1842 /* handle odd destination */ 1840 /* handle odd destination */
1843 if ((uintptr_t) dstp & 2) { 1841 if ((uintptr_t) dstp & 2) {
1844 Uint16 d = *dstp, s = *srcp; 1842 Uint16 d = *dstp, s = *srcp;
1845 *dstp = BLEND16_50 (d, s, mask); 1843 *dstp = BLEND16_50(d, s, mask);
1846 dstp++; 1844 dstp++;
1847 srcp++; 1845 srcp++;
1848 w--; 1846 w--;
1849 } 1847 }
1850 srcp++; /* srcp is now 32-bit aligned */ 1848 srcp++; /* srcp is now 32-bit aligned */
1860 s = (prev_sw << 16) + (sw >> 16); 1858 s = (prev_sw << 16) + (sw >> 16);
1861 #else 1859 #else
1862 s = (prev_sw >> 16) + (sw << 16); 1860 s = (prev_sw >> 16) + (sw << 16);
1863 #endif 1861 #endif
1864 prev_sw = sw; 1862 prev_sw = sw;
1865 *(Uint32 *) dstp = BLEND2x16_50 (dw, s, mask); 1863 *(Uint32 *) dstp = BLEND2x16_50(dw, s, mask);
1866 dstp += 2; 1864 dstp += 2;
1867 srcp += 2; 1865 srcp += 2;
1868 w -= 2; 1866 w -= 2;
1869 } 1867 }
1870 1868
1874 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 1872 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
1875 s = (Uint16) prev_sw; 1873 s = (Uint16) prev_sw;
1876 #else 1874 #else
1877 s = (Uint16) (prev_sw >> 16); 1875 s = (Uint16) (prev_sw >> 16);
1878 #endif 1876 #endif
1879 *dstp = BLEND16_50 (d, s, mask); 1877 *dstp = BLEND16_50(d, s, mask);
1880 srcp++; 1878 srcp++;
1881 dstp++; 1879 dstp++;
1882 } 1880 }
1883 srcp += srcskip - 1; 1881 srcp += srcskip - 1;
1884 dstp += dstskip; 1882 dstp += dstskip;
1887 int w = width; 1885 int w = width;
1888 1886
1889 /* first odd pixel? */ 1887 /* first odd pixel? */
1890 if ((uintptr_t) srcp & 2) { 1888 if ((uintptr_t) srcp & 2) {
1891 Uint16 d = *dstp, s = *srcp; 1889 Uint16 d = *dstp, s = *srcp;
1892 *dstp = BLEND16_50 (d, s, mask); 1890 *dstp = BLEND16_50(d, s, mask);
1893 srcp++; 1891 srcp++;
1894 dstp++; 1892 dstp++;
1895 w--; 1893 w--;
1896 } 1894 }
1897 /* srcp and dstp are now 32-bit aligned */ 1895 /* srcp and dstp are now 32-bit aligned */
1898 1896
1899 while (w > 1) { 1897 while (w > 1) {
1900 Uint32 sw = *(Uint32 *) srcp; 1898 Uint32 sw = *(Uint32 *) srcp;
1901 Uint32 dw = *(Uint32 *) dstp; 1899 Uint32 dw = *(Uint32 *) dstp;
1902 *(Uint32 *) dstp = BLEND2x16_50 (dw, sw, mask); 1900 *(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask);
1903 srcp += 2; 1901 srcp += 2;
1904 dstp += 2; 1902 dstp += 2;
1905 w -= 2; 1903 w -= 2;
1906 } 1904 }
1907 1905
1908 /* last odd pixel? */ 1906 /* last odd pixel? */
1909 if (w) { 1907 if (w) {
1910 Uint16 d = *dstp, s = *srcp; 1908 Uint16 d = *dstp, s = *srcp;
1911 *dstp = BLEND16_50 (d, s, mask); 1909 *dstp = BLEND16_50(d, s, mask);
1912 srcp++; 1910 srcp++;
1913 dstp++; 1911 dstp++;
1914 } 1912 }
1915 srcp += srcskip; 1913 srcp += srcskip;
1916 dstp += dstskip; 1914 dstp += dstskip;
1919 } 1917 }
1920 1918
1921 #if GCC_ASMBLIT 1919 #if GCC_ASMBLIT
1922 /* fast RGB565->RGB565 blending with surface alpha */ 1920 /* fast RGB565->RGB565 blending with surface alpha */
1923 static void 1921 static void
1924 Blit565to565SurfaceAlphaMMX (SDL_BlitInfo * info) 1922 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info)
1925 { 1923 {
1926 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 1924 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
1927 if (alpha == 128) { 1925 if (alpha == 128) {
1928 Blit16to16SurfaceAlpha128 (info, 0xf7de); 1926 Blit16to16SurfaceAlpha128(info, 0xf7de);
1929 } else { 1927 } else {
1930 int width = info->d_width; 1928 int width = info->d_width;
1931 int height = info->d_height; 1929 int height = info->d_height;
1932 Uint16 *srcp = (Uint16 *) info->s_pixels; 1930 Uint16 *srcp = (Uint16 *) info->s_pixels;
1933 int srcskip = info->s_skip >> 1; 1931 int srcskip = info->s_skip >> 1;
1938 1936
1939 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ 1937 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
1940 *(Uint64 *) load = alpha; 1938 *(Uint64 *) load = alpha;
1941 alpha >>= 3; /* downscale alpha to 5 bits */ 1939 alpha >>= 3; /* downscale alpha to 5 bits */
1942 1940
1943 movq_m2r (*load, mm0); /* alpha(0000000A) -> mm0 */ 1941 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */
1944 punpcklwd_r2r (mm0, mm0); /* 00000A0A -> mm0 */ 1942 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */
1945 punpcklwd_r2r (mm0, mm0); /* 0A0A0A0A -> mm0 */ 1943 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */
1946 /* position alpha to allow for mullo and mulhi on diff channels 1944 /* position alpha to allow for mullo and mulhi on diff channels
1947 to reduce the number of operations */ 1945 to reduce the number of operations */
1948 psllq_i2r (3, mm0); 1946 psllq_i2r(3, mm0);
1949 1947
1950 /* Setup the 565 color channel masks */ 1948 /* Setup the 565 color channel masks */
1951 *(Uint64 *) load = 0x07E007E007E007E0ULL; 1949 *(Uint64 *) load = 0x07E007E007E007E0ULL;
1952 movq_m2r (*load, mm4); /* MASKGREEN -> mm4 */ 1950 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */
1953 *(Uint64 *) load = 0x001F001F001F001FULL; 1951 *(Uint64 *) load = 0x001F001F001F001FULL;
1954 movq_m2r (*load, mm7); /* MASKBLUE -> mm7 */ 1952 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */
1955 while (height--) { 1953 while (height--) {
1956 /* *INDENT-OFF* */ 1954 /* *INDENT-OFF* */
1957 DUFFS_LOOP_QUATRO2( 1955 DUFFS_LOOP_QUATRO2(
1958 { 1956 {
1959 s = *srcp++; 1957 s = *srcp++;
2055 }, width); 2053 }, width);
2056 /* *INDENT-ON* */ 2054 /* *INDENT-ON* */
2057 srcp += srcskip; 2055 srcp += srcskip;
2058 dstp += dstskip; 2056 dstp += dstskip;
2059 } 2057 }
2060 emms (); 2058 emms();
2061 } 2059 }
2062 } 2060 }
2063 2061
2064 /* fast RGB555->RGB555 blending with surface alpha */ 2062 /* fast RGB555->RGB555 blending with surface alpha */
2065 static void 2063 static void
2066 Blit555to555SurfaceAlphaMMX (SDL_BlitInfo * info) 2064 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info)
2067 { 2065 {
2068 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 2066 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
2069 if (alpha == 128) { 2067 if (alpha == 128) {
2070 Blit16to16SurfaceAlpha128 (info, 0xfbde); 2068 Blit16to16SurfaceAlpha128(info, 0xfbde);
2071 } else { 2069 } else {
2072 int width = info->d_width; 2070 int width = info->d_width;
2073 int height = info->d_height; 2071 int height = info->d_height;
2074 Uint16 *srcp = (Uint16 *) info->s_pixels; 2072 Uint16 *srcp = (Uint16 *) info->s_pixels;
2075 int srcskip = info->s_skip >> 1; 2073 int srcskip = info->s_skip >> 1;
2080 2078
2081 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ 2079 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2082 *(Uint64 *) load = alpha; 2080 *(Uint64 *) load = alpha;
2083 alpha >>= 3; /* downscale alpha to 5 bits */ 2081 alpha >>= 3; /* downscale alpha to 5 bits */
2084 2082
2085 movq_m2r (*load, mm0); /* alpha(0000000A) -> mm0 */ 2083 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */
2086 punpcklwd_r2r (mm0, mm0); /* 00000A0A -> mm0 */ 2084 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */
2087 punpcklwd_r2r (mm0, mm0); /* 0A0A0A0A -> mm0 */ 2085 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */
2088 /* position alpha to allow for mullo and mulhi on diff channels 2086 /* position alpha to allow for mullo and mulhi on diff channels
2089 to reduce the number of operations */ 2087 to reduce the number of operations */
2090 psllq_i2r (3, mm0); 2088 psllq_i2r(3, mm0);
2091 2089
2092 /* Setup the 555 color channel masks */ 2090 /* Setup the 555 color channel masks */
2093 *(Uint64 *) load = 0x03E003E003E003E0ULL; 2091 *(Uint64 *) load = 0x03E003E003E003E0ULL;
2094 movq_m2r (*load, mm4); /* MASKGREEN -> mm4 */ 2092 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */
2095 *(Uint64 *) load = 0x001F001F001F001FULL; 2093 *(Uint64 *) load = 0x001F001F001F001FULL;
2096 movq_m2r (*load, mm7); /* MASKBLUE -> mm7 */ 2094 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */
2097 while (height--) { 2095 while (height--) {
2098 /* *INDENT-OFF* */ 2096 /* *INDENT-OFF* */
2099 DUFFS_LOOP_QUATRO2( 2097 DUFFS_LOOP_QUATRO2(
2100 { 2098 {
2101 s = *srcp++; 2099 s = *srcp++;
2202 }, width); 2200 }, width);
2203 /* *INDENT-ON* */ 2201 /* *INDENT-ON* */
2204 srcp += srcskip; 2202 srcp += srcskip;
2205 dstp += dstskip; 2203 dstp += dstskip;
2206 } 2204 }
2207 emms (); 2205 emms();
2208 } 2206 }
2209 } 2207 }
2210 2208
2211 /* End GCC_ASMBLIT */ 2209 /* End GCC_ASMBLIT */
2212 2210
2213 #elif MSVC_ASMBLIT 2211 #elif MSVC_ASMBLIT
2214 /* fast RGB565->RGB565 blending with surface alpha */ 2212 /* fast RGB565->RGB565 blending with surface alpha */
2215 static void 2213 static void
2216 Blit565to565SurfaceAlphaMMX (SDL_BlitInfo * info) 2214 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info)
2217 { 2215 {
2218 unsigned alpha = info->src->alpha; 2216 unsigned alpha = info->src->alpha;
2219 if (alpha == 128) { 2217 if (alpha == 128) {
2220 Blit16to16SurfaceAlpha128 (info, 0xf7de); 2218 Blit16to16SurfaceAlpha128(info, 0xf7de);
2221 } else { 2219 } else {
2222 int width = info->d_width; 2220 int width = info->d_width;
2223 int height = info->d_height; 2221 int height = info->d_height;
2224 Uint16 *srcp = (Uint16 *) info->s_pixels; 2222 Uint16 *srcp = (Uint16 *) info->s_pixels;
2225 int srcskip = info->s_skip >> 1; 2223 int srcskip = info->s_skip >> 1;
2228 Uint32 s, d; 2226 Uint32 s, d;
2229 2227
2230 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; 2228 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
2231 2229
2232 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ 2230 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2233 mm_alpha = _mm_set_pi32 (0, alpha); /* 0000000A -> mm_alpha */ 2231 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
2234 alpha >>= 3; /* downscale alpha to 5 bits */ 2232 alpha >>= 3; /* downscale alpha to 5 bits */
2235 2233
2236 mm_alpha = _mm_unpacklo_pi16 (mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 2234 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
2237 mm_alpha = _mm_unpacklo_pi32 (mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 2235 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
2238 /* position alpha to allow for mullo and mulhi on diff channels 2236 /* position alpha to allow for mullo and mulhi on diff channels
2239 to reduce the number of operations */ 2237 to reduce the number of operations */
2240 mm_alpha = _mm_slli_si64 (mm_alpha, 3); 2238 mm_alpha = _mm_slli_si64(mm_alpha, 3);
2241 2239
2242 /* Setup the 565 color channel masks */ 2240 /* Setup the 565 color channel masks */
2243 gmask = _mm_set_pi32 (0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ 2241 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */
2244 bmask = _mm_set_pi32 (0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 2242 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
2245 2243
2246 while (height--) { 2244 while (height--) {
2247 /* *INDENT-OFF* */ 2245 /* *INDENT-OFF* */
2248 DUFFS_LOOP_QUATRO2( 2246 DUFFS_LOOP_QUATRO2(
2249 { 2247 {
2342 }, width); 2340 }, width);
2343 /* *INDENT-ON* */ 2341 /* *INDENT-ON* */
2344 srcp += srcskip; 2342 srcp += srcskip;
2345 dstp += dstskip; 2343 dstp += dstskip;
2346 } 2344 }
2347 _mm_empty (); 2345 _mm_empty();
2348 } 2346 }
2349 } 2347 }
2350 2348
2351 /* fast RGB555->RGB555 blending with surface alpha */ 2349 /* fast RGB555->RGB555 blending with surface alpha */
2352 static void 2350 static void
2353 Blit555to555SurfaceAlphaMMX (SDL_BlitInfo * info) 2351 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info)
2354 { 2352 {
2355 unsigned alpha = info->src->alpha; 2353 unsigned alpha = info->src->alpha;
2356 if (alpha == 128) { 2354 if (alpha == 128) {
2357 Blit16to16SurfaceAlpha128 (info, 0xfbde); 2355 Blit16to16SurfaceAlpha128(info, 0xfbde);
2358 } else { 2356 } else {
2359 int width = info->d_width; 2357 int width = info->d_width;
2360 int height = info->d_height; 2358 int height = info->d_height;
2361 Uint16 *srcp = (Uint16 *) info->s_pixels; 2359 Uint16 *srcp = (Uint16 *) info->s_pixels;
2362 int srcskip = info->s_skip >> 1; 2360 int srcskip = info->s_skip >> 1;
2365 Uint32 s, d; 2363 Uint32 s, d;
2366 2364
2367 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; 2365 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
2368 2366
2369 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ 2367 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2370 mm_alpha = _mm_set_pi32 (0, alpha); /* 0000000A -> mm_alpha */ 2368 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
2371 alpha >>= 3; /* downscale alpha to 5 bits */ 2369 alpha >>= 3; /* downscale alpha to 5 bits */
2372 2370
2373 mm_alpha = _mm_unpacklo_pi16 (mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 2371 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
2374 mm_alpha = _mm_unpacklo_pi32 (mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 2372 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
2375 /* position alpha to allow for mullo and mulhi on diff channels 2373 /* position alpha to allow for mullo and mulhi on diff channels
2376 to reduce the number of operations */ 2374 to reduce the number of operations */
2377 mm_alpha = _mm_slli_si64 (mm_alpha, 3); 2375 mm_alpha = _mm_slli_si64(mm_alpha, 3);
2378 2376
2379 /* Setup the 555 color channel masks */ 2377 /* Setup the 555 color channel masks */
2380 rmask = _mm_set_pi32 (0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ 2378 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */
2381 gmask = _mm_set_pi32 (0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ 2379 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */
2382 bmask = _mm_set_pi32 (0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 2380 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
2383 2381
2384 while (height--) { 2382 while (height--) {
2385 /* *INDENT-OFF* */ 2383 /* *INDENT-OFF* */
2386 DUFFS_LOOP_QUATRO2( 2384 DUFFS_LOOP_QUATRO2(
2387 { 2385 {
2480 }, width); 2478 }, width);
2481 /* *INDENT-ON* */ 2479 /* *INDENT-ON* */
2482 srcp += srcskip; 2480 srcp += srcskip;
2483 dstp += dstskip; 2481 dstp += dstskip;
2484 } 2482 }
2485 _mm_empty (); 2483 _mm_empty();
2486 } 2484 }
2487 } 2485 }
2488 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 2486 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
2489 2487
2490 /* fast RGB565->RGB565 blending with surface alpha */ 2488 /* fast RGB565->RGB565 blending with surface alpha */
2491 static void 2489 static void
2492 Blit565to565SurfaceAlpha (SDL_BlitInfo * info) 2490 Blit565to565SurfaceAlpha(SDL_BlitInfo * info)
2493 { 2491 {
2494 unsigned alpha = info->src->alpha; 2492 unsigned alpha = info->src->alpha;
2495 if (alpha == 128) { 2493 if (alpha == 128) {
2496 Blit16to16SurfaceAlpha128 (info, 0xf7de); 2494 Blit16to16SurfaceAlpha128(info, 0xf7de);
2497 } else { 2495 } else {
2498 int width = info->d_width; 2496 int width = info->d_width;
2499 int height = info->d_height; 2497 int height = info->d_height;
2500 Uint16 *srcp = (Uint16 *) info->s_pixels; 2498 Uint16 *srcp = (Uint16 *) info->s_pixels;
2501 int srcskip = info->s_skip >> 1; 2499 int srcskip = info->s_skip >> 1;
2526 } 2524 }
2527 } 2525 }
2528 2526
2529 /* fast RGB555->RGB555 blending with surface alpha */ 2527 /* fast RGB555->RGB555 blending with surface alpha */
2530 static void 2528 static void
2531 Blit555to555SurfaceAlpha (SDL_BlitInfo * info) 2529 Blit555to555SurfaceAlpha(SDL_BlitInfo * info)
2532 { 2530 {
2533 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 2531 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
2534 if (alpha == 128) { 2532 if (alpha == 128) {
2535 Blit16to16SurfaceAlpha128 (info, 0xfbde); 2533 Blit16to16SurfaceAlpha128(info, 0xfbde);
2536 } else { 2534 } else {
2537 int width = info->d_width; 2535 int width = info->d_width;
2538 int height = info->d_height; 2536 int height = info->d_height;
2539 Uint16 *srcp = (Uint16 *) info->s_pixels; 2537 Uint16 *srcp = (Uint16 *) info->s_pixels;
2540 int srcskip = info->s_skip >> 1; 2538 int srcskip = info->s_skip >> 1;
2565 } 2563 }
2566 } 2564 }
2567 2565
2568 /* fast ARGB8888->RGB565 blending with pixel alpha */ 2566 /* fast ARGB8888->RGB565 blending with pixel alpha */
2569 static void 2567 static void
2570 BlitARGBto565PixelAlpha (SDL_BlitInfo * info) 2568 BlitARGBto565PixelAlpha(SDL_BlitInfo * info)
2571 { 2569 {
2572 int width = info->d_width; 2570 int width = info->d_width;
2573 int height = info->d_height; 2571 int height = info->d_height;
2574 Uint32 *srcp = (Uint32 *) info->s_pixels; 2572 Uint32 *srcp = (Uint32 *) info->s_pixels;
2575 int srcskip = info->s_skip >> 2; 2573 int srcskip = info->s_skip >> 2;
2611 } 2609 }
2612 } 2610 }
2613 2611
2614 /* fast ARGB8888->RGB555 blending with pixel alpha */ 2612 /* fast ARGB8888->RGB555 blending with pixel alpha */
2615 static void 2613 static void
2616 BlitARGBto555PixelAlpha (SDL_BlitInfo * info) 2614 BlitARGBto555PixelAlpha(SDL_BlitInfo * info)
2617 { 2615 {
2618 int width = info->d_width; 2616 int width = info->d_width;
2619 int height = info->d_height; 2617 int height = info->d_height;
2620 Uint32 *srcp = (Uint32 *) info->s_pixels; 2618 Uint32 *srcp = (Uint32 *) info->s_pixels;
2621 int srcskip = info->s_skip >> 2; 2619 int srcskip = info->s_skip >> 2;
2658 } 2656 }
2659 } 2657 }
2660 2658
2661 /* General (slow) N->N blending with per-surface alpha */ 2659 /* General (slow) N->N blending with per-surface alpha */
2662 static void 2660 static void
2663 BlitNtoNSurfaceAlpha (SDL_BlitInfo * info) 2661 BlitNtoNSurfaceAlpha(SDL_BlitInfo * info)
2664 { 2662 {
2665 int width = info->d_width; 2663 int width = info->d_width;
2666 int height = info->d_height; 2664 int height = info->d_height;
2667 Uint8 *src = info->s_pixels; 2665 Uint8 *src = info->s_pixels;
2668 int srcskip = info->s_skip; 2666 int srcskip = info->s_skip;
2702 } 2700 }
2703 } 2701 }
2704 2702
2705 /* General (slow) colorkeyed N->N blending with per-surface alpha */ 2703 /* General (slow) colorkeyed N->N blending with per-surface alpha */
2706 static void 2704 static void
2707 BlitNtoNSurfaceAlphaKey (SDL_BlitInfo * info) 2705 BlitNtoNSurfaceAlphaKey(SDL_BlitInfo * info)
2708 { 2706 {
2709 int width = info->d_width; 2707 int width = info->d_width;
2710 int height = info->d_height; 2708 int height = info->d_height;
2711 Uint8 *src = info->s_pixels; 2709 Uint8 *src = info->s_pixels;
2712 int srcskip = info->s_skip; 2710 int srcskip = info->s_skip;
2748 } 2746 }
2749 } 2747 }
2750 2748
2751 /* General (slow) N->N blending with pixel alpha */ 2749 /* General (slow) N->N blending with pixel alpha */
2752 static void 2750 static void
2753 BlitNtoNPixelAlpha (SDL_BlitInfo * info) 2751 BlitNtoNPixelAlpha(SDL_BlitInfo * info)
2754 { 2752 {
2755 int width = info->d_width; 2753 int width = info->d_width;
2756 int height = info->d_height; 2754 int height = info->d_height;
2757 Uint8 *src = info->s_pixels; 2755 Uint8 *src = info->s_pixels;
2758 int srcskip = info->s_skip; 2756 int srcskip = info->s_skip;
2802 } 2800 }
2803 } 2801 }
2804 2802
2805 2803
2806 SDL_loblit 2804 SDL_loblit
2807 SDL_CalculateAlphaBlit (SDL_Surface * surface, int blit_index) 2805 SDL_CalculateAlphaBlit(SDL_Surface * surface, int blit_index)
2808 { 2806 {
2809 SDL_PixelFormat *sf = surface->format; 2807 SDL_PixelFormat *sf = surface->format;
2810 SDL_PixelFormat *df = surface->map->dst->format; 2808 SDL_PixelFormat *df = surface->map->dst->format;
2811 2809
2812 if (sf->Amask == 0) { 2810 if (sf->Amask == 0) {
2815 return BlitNto1SurfaceAlphaKey; 2813 return BlitNto1SurfaceAlphaKey;
2816 else 2814 else
2817 #if SDL_ALTIVEC_BLITTERS 2815 #if SDL_ALTIVEC_BLITTERS
2818 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && 2816 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
2819 !(surface->map->dst->flags & SDL_HWSURFACE) 2817 !(surface->map->dst->flags & SDL_HWSURFACE)
2820 && SDL_HasAltiVec ()) 2818 && SDL_HasAltiVec())
2821 return Blit32to32SurfaceAlphaKeyAltivec; 2819 return Blit32to32SurfaceAlphaKeyAltivec;
2822 else 2820 else
2823 #endif 2821 #endif
2824 return BlitNtoNSurfaceAlphaKey; 2822 return BlitNtoNSurfaceAlphaKey;
2825 } else { 2823 } else {
2830 2828
2831 case 2: 2829 case 2:
2832 if (surface->map->identity) { 2830 if (surface->map->identity) {
2833 if (df->Gmask == 0x7e0) { 2831 if (df->Gmask == 0x7e0) {
2834 #if MMX_ASMBLIT 2832 #if MMX_ASMBLIT
2835 if (SDL_HasMMX ()) 2833 if (SDL_HasMMX())
2836 return Blit565to565SurfaceAlphaMMX; 2834 return Blit565to565SurfaceAlphaMMX;
2837 else 2835 else
2838 #endif 2836 #endif
2839 return Blit565to565SurfaceAlpha; 2837 return Blit565to565SurfaceAlpha;
2840 } else if (df->Gmask == 0x3e0) { 2838 } else if (df->Gmask == 0x3e0) {
2841 #if MMX_ASMBLIT 2839 #if MMX_ASMBLIT
2842 if (SDL_HasMMX ()) 2840 if (SDL_HasMMX())
2843 return Blit555to555SurfaceAlphaMMX; 2841 return Blit555to555SurfaceAlphaMMX;
2844 else 2842 else
2845 #endif 2843 #endif
2846 return Blit555to555SurfaceAlpha; 2844 return Blit555to555SurfaceAlpha;
2847 } 2845 }
2853 && sf->Gmask == df->Gmask 2851 && sf->Gmask == df->Gmask
2854 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { 2852 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
2855 #if MMX_ASMBLIT 2853 #if MMX_ASMBLIT
2856 if (sf->Rshift % 8 == 0 2854 if (sf->Rshift % 8 == 0
2857 && sf->Gshift % 8 == 0 2855 && sf->Gshift % 8 == 0
2858 && sf->Bshift % 8 == 0 && SDL_HasMMX ()) 2856 && sf->Bshift % 8 == 0 && SDL_HasMMX())
2859 return BlitRGBtoRGBSurfaceAlphaMMX; 2857 return BlitRGBtoRGBSurfaceAlphaMMX;
2860 #endif 2858 #endif
2861 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { 2859 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
2862 #if SDL_ALTIVEC_BLITTERS 2860 #if SDL_ALTIVEC_BLITTERS
2863 if (!(surface->map->dst->flags & SDL_HWSURFACE) 2861 if (!(surface->map->dst->flags & SDL_HWSURFACE)
2864 && SDL_HasAltiVec ()) 2862 && SDL_HasAltiVec())
2865 return BlitRGBtoRGBSurfaceAlphaAltivec; 2863 return BlitRGBtoRGBSurfaceAlphaAltivec;
2866 #endif 2864 #endif
2867 return BlitRGBtoRGBSurfaceAlpha; 2865 return BlitRGBtoRGBSurfaceAlpha;
2868 } 2866 }
2869 } 2867 }
2870 #if SDL_ALTIVEC_BLITTERS 2868 #if SDL_ALTIVEC_BLITTERS
2871 if ((sf->BytesPerPixel == 4) && 2869 if ((sf->BytesPerPixel == 4) &&
2872 !(surface->map->dst->flags & SDL_HWSURFACE) 2870 !(surface->map->dst->flags & SDL_HWSURFACE)
2873 && SDL_HasAltiVec ()) 2871 && SDL_HasAltiVec())
2874 return Blit32to32SurfaceAlphaAltivec; 2872 return Blit32to32SurfaceAlphaAltivec;
2875 else 2873 else
2876 #endif 2874 #endif
2877 return BlitNtoNSurfaceAlpha; 2875 return BlitNtoNSurfaceAlpha;
2878 2876
2890 case 2: 2888 case 2:
2891 #if SDL_ALTIVEC_BLITTERS 2889 #if SDL_ALTIVEC_BLITTERS
2892 if (sf->BytesPerPixel == 4 2890 if (sf->BytesPerPixel == 4
2893 && !(surface->map->dst->flags & SDL_HWSURFACE) 2891 && !(surface->map->dst->flags & SDL_HWSURFACE)
2894 && df->Gmask == 0x7e0 && df->Bmask == 0x1f 2892 && df->Gmask == 0x7e0 && df->Bmask == 0x1f
2895 && SDL_HasAltiVec ()) 2893 && SDL_HasAltiVec())
2896 return Blit32to565PixelAlphaAltivec; 2894 return Blit32to565PixelAlphaAltivec;
2897 else 2895 else
2898 #endif 2896 #endif
2899 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 2897 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
2900 && sf->Gmask == 0xff00 2898 && sf->Gmask == 0xff00
2914 #if MMX_ASMBLIT 2912 #if MMX_ASMBLIT
2915 if (sf->Rshift % 8 == 0 2913 if (sf->Rshift % 8 == 0
2916 && sf->Gshift % 8 == 0 2914 && sf->Gshift % 8 == 0
2917 && sf->Bshift % 8 == 0 2915 && sf->Bshift % 8 == 0
2918 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { 2916 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
2919 if (SDL_Has3DNow ()) 2917 if (SDL_Has3DNow())
2920 return BlitRGBtoRGBPixelAlphaMMX3DNOW; 2918 return BlitRGBtoRGBPixelAlphaMMX3DNOW;
2921 if (SDL_HasMMX ()) 2919 if (SDL_HasMMX())
2922 return BlitRGBtoRGBPixelAlphaMMX; 2920 return BlitRGBtoRGBPixelAlphaMMX;
2923 } 2921 }
2924 #endif 2922 #endif
2925 if (sf->Amask == 0xff000000) { 2923 if (sf->Amask == 0xff000000) {
2926 #if SDL_ALTIVEC_BLITTERS 2924 #if SDL_ALTIVEC_BLITTERS
2927 if (!(surface->map->dst->flags & SDL_HWSURFACE) 2925 if (!(surface->map->dst->flags & SDL_HWSURFACE)
2928 && SDL_HasAltiVec ()) 2926 && SDL_HasAltiVec())
2929 return BlitRGBtoRGBPixelAlphaAltivec; 2927 return BlitRGBtoRGBPixelAlphaAltivec;
2930 #endif 2928 #endif
2931 return BlitRGBtoRGBPixelAlpha; 2929 return BlitRGBtoRGBPixelAlpha;
2932 } 2930 }
2933 } 2931 }
2934 #if SDL_ALTIVEC_BLITTERS 2932 #if SDL_ALTIVEC_BLITTERS
2935 if (sf->Amask && sf->BytesPerPixel == 4 && 2933 if (sf->Amask && sf->BytesPerPixel == 4 &&
2936 !(surface->map->dst->flags & SDL_HWSURFACE) 2934 !(surface->map->dst->flags & SDL_HWSURFACE)
2937 && SDL_HasAltiVec ()) 2935 && SDL_HasAltiVec())
2938 return Blit32to32PixelAlphaAltivec; 2936 return Blit32to32PixelAlphaAltivec;
2939 else 2937 else
2940 #endif 2938 #endif
2941 return BlitNtoNPixelAlpha; 2939 return BlitNtoNPixelAlpha;
2942 2940