Mercurial > sdl-ios-xcode
comparison src/video/SDL_blit_A.c @ 1668:4da1ee79c9af SDL-1.3
more tweaking indent options
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 29 May 2006 04:04:35 +0000 |
parents | 782fd950bd46 |
children | a1ebb17f9c52 |
comparison
equal
deleted
inserted
replaced
1667:1fddae038bc8 | 1668:4da1ee79c9af |
---|---|
45 | 45 |
46 /* Functions to perform alpha blended blitting */ | 46 /* Functions to perform alpha blended blitting */ |
47 | 47 |
48 /* N->1 blending with per-surface alpha */ | 48 /* N->1 blending with per-surface alpha */ |
49 static void | 49 static void |
50 BlitNto1SurfaceAlpha (SDL_BlitInfo * info) | 50 BlitNto1SurfaceAlpha(SDL_BlitInfo * info) |
51 { | 51 { |
52 int width = info->d_width; | 52 int width = info->d_width; |
53 int height = info->d_height; | 53 int height = info->d_height; |
54 Uint8 *src = info->s_pixels; | 54 Uint8 *src = info->s_pixels; |
55 int srcskip = info->s_skip; | 55 int srcskip = info->s_skip; |
101 } | 101 } |
102 } | 102 } |
103 | 103 |
104 /* N->1 blending with pixel alpha */ | 104 /* N->1 blending with pixel alpha */ |
105 static void | 105 static void |
106 BlitNto1PixelAlpha (SDL_BlitInfo * info) | 106 BlitNto1PixelAlpha(SDL_BlitInfo * info) |
107 { | 107 { |
108 int width = info->d_width; | 108 int width = info->d_width; |
109 int height = info->d_height; | 109 int height = info->d_height; |
110 Uint8 *src = info->s_pixels; | 110 Uint8 *src = info->s_pixels; |
111 int srcskip = info->s_skip; | 111 int srcskip = info->s_skip; |
157 } | 157 } |
158 } | 158 } |
159 | 159 |
160 /* colorkeyed N->1 blending with per-surface alpha */ | 160 /* colorkeyed N->1 blending with per-surface alpha */ |
161 static void | 161 static void |
162 BlitNto1SurfaceAlphaKey (SDL_BlitInfo * info) | 162 BlitNto1SurfaceAlphaKey(SDL_BlitInfo * info) |
163 { | 163 { |
164 int width = info->d_width; | 164 int width = info->d_width; |
165 int height = info->d_height; | 165 int height = info->d_height; |
166 Uint8 *src = info->s_pixels; | 166 Uint8 *src = info->s_pixels; |
167 int srcskip = info->s_skip; | 167 int srcskip = info->s_skip; |
217 } | 217 } |
218 | 218 |
219 #if GCC_ASMBLIT | 219 #if GCC_ASMBLIT |
220 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 220 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
221 static void | 221 static void |
222 BlitRGBtoRGBSurfaceAlpha128MMX (SDL_BlitInfo * info) | 222 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info) |
223 { | 223 { |
224 int width = info->d_width; | 224 int width = info->d_width; |
225 int height = info->d_height; | 225 int height = info->d_height; |
226 Uint32 *srcp = (Uint32 *) info->s_pixels; | 226 Uint32 *srcp = (Uint32 *) info->s_pixels; |
227 int srcskip = info->s_skip >> 2; | 227 int srcskip = info->s_skip >> 2; |
229 int dstskip = info->d_skip >> 2; | 229 int dstskip = info->d_skip >> 2; |
230 Uint32 dalpha = info->dst->Amask; | 230 Uint32 dalpha = info->dst->Amask; |
231 Uint8 load[8]; | 231 Uint8 load[8]; |
232 | 232 |
233 *(Uint64 *) load = 0x00fefefe00fefefeULL; /* alpha128 mask */ | 233 *(Uint64 *) load = 0x00fefefe00fefefeULL; /* alpha128 mask */ |
234 movq_m2r (*load, mm4); /* alpha128 mask -> mm4 */ | 234 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ |
235 *(Uint64 *) load = 0x0001010100010101ULL; /* !alpha128 mask */ | 235 *(Uint64 *) load = 0x0001010100010101ULL; /* !alpha128 mask */ |
236 movq_m2r (*load, mm3); /* !alpha128 mask -> mm3 */ | 236 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ |
237 movd_m2r (dalpha, mm7); /* dst alpha mask */ | 237 movd_m2r(dalpha, mm7); /* dst alpha mask */ |
238 punpckldq_r2r (mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | 238 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ |
239 while (height--) { | 239 while (height--) { |
240 /* *INDENT-OFF* */ | 240 /* *INDENT-OFF* */ |
241 DUFFS_LOOP_DOUBLE2( | 241 DUFFS_LOOP_DOUBLE2( |
242 { | 242 { |
243 Uint32 s = *srcp++; | 243 Uint32 s = *srcp++; |
266 }, width); | 266 }, width); |
267 /* *INDENT-ON* */ | 267 /* *INDENT-ON* */ |
268 srcp += srcskip; | 268 srcp += srcskip; |
269 dstp += dstskip; | 269 dstp += dstskip; |
270 } | 270 } |
271 emms (); | 271 emms(); |
272 } | 272 } |
273 | 273 |
274 /* fast RGB888->(A)RGB888 blending with surface alpha */ | 274 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
275 static void | 275 static void |
276 BlitRGBtoRGBSurfaceAlphaMMX (SDL_BlitInfo * info) | 276 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info) |
277 { | 277 { |
278 SDL_PixelFormat *df = info->dst; | 278 SDL_PixelFormat *df = info->dst; |
279 unsigned alpha = info->src->alpha; | 279 unsigned alpha = info->src->alpha; |
280 | 280 |
281 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | 281 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { |
282 /* only call a128 version when R,G,B occupy lower bits */ | 282 /* only call a128 version when R,G,B occupy lower bits */ |
283 BlitRGBtoRGBSurfaceAlpha128MMX (info); | 283 BlitRGBtoRGBSurfaceAlpha128MMX(info); |
284 } else { | 284 } else { |
285 int width = info->d_width; | 285 int width = info->d_width; |
286 int height = info->d_height; | 286 int height = info->d_height; |
287 Uint32 *srcp = (Uint32 *) info->s_pixels; | 287 Uint32 *srcp = (Uint32 *) info->s_pixels; |
288 int srcskip = info->s_skip >> 2; | 288 int srcskip = info->s_skip >> 2; |
289 Uint32 *dstp = (Uint32 *) info->d_pixels; | 289 Uint32 *dstp = (Uint32 *) info->d_pixels; |
290 int dstskip = info->d_skip >> 2; | 290 int dstskip = info->d_skip >> 2; |
291 | 291 |
292 pxor_r2r (mm5, mm5); /* 0 -> mm5 */ | 292 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ |
293 /* form the alpha mult */ | 293 /* form the alpha mult */ |
294 movd_m2r (alpha, mm4); /* 0000000A -> mm4 */ | 294 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ |
295 punpcklwd_r2r (mm4, mm4); /* 00000A0A -> mm4 */ | 295 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ |
296 punpckldq_r2r (mm4, mm4); /* 0A0A0A0A -> mm4 */ | 296 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ |
297 alpha = | 297 alpha = |
298 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> | 298 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> |
299 Bshift); | 299 Bshift); |
300 movd_m2r (alpha, mm0); /* 00000FFF -> mm0 */ | 300 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ |
301 punpcklbw_r2r (mm0, mm0); /* 00FFFFFF -> mm0 */ | 301 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ |
302 pand_r2r (mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ | 302 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ |
303 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ | 303 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ |
304 movd_m2r (df->Amask, mm7); /* dst alpha mask */ | 304 movd_m2r(df->Amask, mm7); /* dst alpha mask */ |
305 punpckldq_r2r (mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | 305 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ |
306 | 306 |
307 while (height--) { | 307 while (height--) { |
308 /* *INDENT-OFF* */ | 308 /* *INDENT-OFF* */ |
309 DUFFS_LOOP_DOUBLE2({ | 309 DUFFS_LOOP_DOUBLE2({ |
310 /* One Pixel Blend */ | 310 /* One Pixel Blend */ |
355 }, width); | 355 }, width); |
356 /* *INDENT-ON* */ | 356 /* *INDENT-ON* */ |
357 srcp += srcskip; | 357 srcp += srcskip; |
358 dstp += dstskip; | 358 dstp += dstskip; |
359 } | 359 } |
360 emms (); | 360 emms(); |
361 } | 361 } |
362 } | 362 } |
363 | 363 |
364 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | 364 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
365 static void | 365 static void |
366 BlitRGBtoRGBPixelAlphaMMX (SDL_BlitInfo * info) | 366 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info) |
367 { | 367 { |
368 int width = info->d_width; | 368 int width = info->d_width; |
369 int height = info->d_height; | 369 int height = info->d_height; |
370 Uint32 *srcp = (Uint32 *) info->s_pixels; | 370 Uint32 *srcp = (Uint32 *) info->s_pixels; |
371 int srcskip = info->s_skip >> 2; | 371 int srcskip = info->s_skip >> 2; |
372 Uint32 *dstp = (Uint32 *) info->d_pixels; | 372 Uint32 *dstp = (Uint32 *) info->d_pixels; |
373 int dstskip = info->d_skip >> 2; | 373 int dstskip = info->d_skip >> 2; |
374 SDL_PixelFormat *sf = info->src; | 374 SDL_PixelFormat *sf = info->src; |
375 Uint32 amask = sf->Amask; | 375 Uint32 amask = sf->Amask; |
376 | 376 |
377 pxor_r2r (mm6, mm6); /* 0 -> mm6 */ | 377 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ |
378 /* form multiplication mask */ | 378 /* form multiplication mask */ |
379 movd_m2r (sf->Amask, mm7); /* 0000F000 -> mm7 */ | 379 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ |
380 punpcklbw_r2r (mm7, mm7); /* FF000000 -> mm7 */ | 380 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ |
381 pcmpeqb_r2r (mm0, mm0); /* FFFFFFFF -> mm0 */ | 381 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ |
382 movq_r2r (mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ | 382 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ |
383 pxor_r2r (mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ | 383 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ |
384 /* form channel masks */ | 384 /* form channel masks */ |
385 movq_r2r (mm7, mm0); /* 00FFFFFF -> mm0 */ | 385 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ |
386 packsswb_r2r (mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ | 386 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ |
387 packsswb_r2r (mm6, mm3); /* 0000FFFF -> mm3 */ | 387 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ |
388 pxor_r2r (mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ | 388 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ |
389 /* get alpha channel shift */ | 389 /* get alpha channel shift */ |
390 movd_m2r (sf->Ashift, mm5); /* Ashift -> mm5 */ | 390 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */ |
391 | 391 |
392 while (height--) { | 392 while (height--) { |
393 /* *INDENT-OFF* */ | 393 /* *INDENT-OFF* */ |
394 DUFFS_LOOP4({ | 394 DUFFS_LOOP4({ |
395 Uint32 alpha = *srcp & amask; | 395 Uint32 alpha = *srcp & amask; |
437 }, width); | 437 }, width); |
438 /* *INDENT-ON* */ | 438 /* *INDENT-ON* */ |
439 srcp += srcskip; | 439 srcp += srcskip; |
440 dstp += dstskip; | 440 dstp += dstskip; |
441 } | 441 } |
442 emms (); | 442 emms(); |
443 } | 443 } |
444 | 444 |
445 /* End GCC_ASMBLIT */ | 445 /* End GCC_ASMBLIT */ |
446 | 446 |
447 #elif MSVC_ASMBLIT | 447 #elif MSVC_ASMBLIT |
448 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 448 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
449 static void | 449 static void |
450 BlitRGBtoRGBSurfaceAlpha128MMX (SDL_BlitInfo * info) | 450 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info) |
451 { | 451 { |
452 int width = info->d_width; | 452 int width = info->d_width; |
453 int height = info->d_height; | 453 int height = info->d_height; |
454 Uint32 *srcp = (Uint32 *) info->s_pixels; | 454 Uint32 *srcp = (Uint32 *) info->s_pixels; |
455 int srcskip = info->s_skip >> 2; | 455 int srcskip = info->s_skip >> 2; |
457 int dstskip = info->d_skip >> 2; | 457 int dstskip = info->d_skip >> 2; |
458 Uint32 dalpha = info->dst->Amask; | 458 Uint32 dalpha = info->dst->Amask; |
459 | 459 |
460 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; | 460 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; |
461 | 461 |
462 hmask = _mm_set_pi32 (0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ | 462 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ |
463 lmask = _mm_set_pi32 (0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ | 463 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ |
464 dsta = _mm_set_pi32 (dalpha, dalpha); /* dst alpha mask -> dsta */ | 464 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ |
465 | 465 |
466 while (height--) { | 466 while (height--) { |
467 int n = width; | 467 int n = width; |
468 if (n & 1) { | 468 if (n & 1) { |
469 Uint32 s = *srcp++; | 469 Uint32 s = *srcp++; |
478 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | 478 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ |
479 | 479 |
480 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ | 480 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ |
481 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | 481 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ |
482 | 482 |
483 dst2 = _mm_and_si64 (dst2, hmask); /* dst & mask -> dst2 */ | 483 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ |
484 src2 = _mm_and_si64 (src2, hmask); /* src & mask -> src2 */ | 484 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ |
485 src2 = _mm_add_pi32 (src2, dst2); /* dst2 + src2 -> src2 */ | 485 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ |
486 src2 = _mm_srli_pi32 (src2, 1); /* src2 >> 1 -> src2 */ | 486 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ |
487 | 487 |
488 dst1 = _mm_and_si64 (dst1, src1); /* src & dst -> dst1 */ | 488 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ |
489 dst1 = _mm_and_si64 (dst1, lmask); /* dst1 & !mask -> dst1 */ | 489 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ |
490 dst1 = _mm_add_pi32 (dst1, src2); /* src2 + dst1 -> dst1 */ | 490 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ |
491 dst1 = _mm_or_si64 (dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ | 491 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ |
492 | 492 |
493 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */ | 493 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */ |
494 dstp += 2; | 494 dstp += 2; |
495 srcp += 2; | 495 srcp += 2; |
496 } | 496 } |
497 | 497 |
498 srcp += srcskip; | 498 srcp += srcskip; |
499 dstp += dstskip; | 499 dstp += dstskip; |
500 } | 500 } |
501 _mm_empty (); | 501 _mm_empty(); |
502 } | 502 } |
503 | 503 |
504 /* fast RGB888->(A)RGB888 blending with surface alpha */ | 504 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
505 static void | 505 static void |
506 BlitRGBtoRGBSurfaceAlphaMMX (SDL_BlitInfo * info) | 506 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info) |
507 { | 507 { |
508 SDL_PixelFormat *df = info->dst; | 508 SDL_PixelFormat *df = info->dst; |
509 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; | 509 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; |
510 unsigned alpha = info->src->alpha; | 510 unsigned alpha = info->src->alpha; |
511 | 511 |
512 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | 512 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { |
513 /* only call a128 version when R,G,B occupy lower bits */ | 513 /* only call a128 version when R,G,B occupy lower bits */ |
514 BlitRGBtoRGBSurfaceAlpha128MMX (info); | 514 BlitRGBtoRGBSurfaceAlpha128MMX(info); |
515 } else { | 515 } else { |
516 int width = info->d_width; | 516 int width = info->d_width; |
517 int height = info->d_height; | 517 int height = info->d_height; |
518 Uint32 *srcp = (Uint32 *) info->s_pixels; | 518 Uint32 *srcp = (Uint32 *) info->s_pixels; |
519 int srcskip = info->s_skip >> 2; | 519 int srcskip = info->s_skip >> 2; |
522 Uint32 dalpha = df->Amask; | 522 Uint32 dalpha = df->Amask; |
523 Uint32 amult; | 523 Uint32 amult; |
524 | 524 |
525 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; | 525 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; |
526 | 526 |
527 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */ | 527 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
528 /* form the alpha mult */ | 528 /* form the alpha mult */ |
529 amult = alpha | (alpha << 8); | 529 amult = alpha | (alpha << 8); |
530 amult = amult | (amult << 16); | 530 amult = amult | (amult << 16); |
531 chanmask = | 531 chanmask = |
532 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> | 532 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> |
533 Bshift); | 533 Bshift); |
534 mm_alpha = _mm_set_pi32 (0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ | 534 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ |
535 mm_alpha = _mm_unpacklo_pi8 (mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ | 535 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ |
536 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ | 536 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ |
537 dsta = _mm_set_pi32 (dalpha, dalpha); /* dst alpha mask -> dsta */ | 537 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ |
538 | 538 |
539 while (height--) { | 539 while (height--) { |
540 int n = width; | 540 int n = width; |
541 if (n & 1) { | 541 if (n & 1) { |
542 /* One Pixel Blend */ | 542 /* One Pixel Blend */ |
543 src2 = _mm_cvtsi32_si64 (*srcp); /* src(ARGB) -> src2 (0000ARGB) */ | 543 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */ |
544 src2 = _mm_unpacklo_pi8 (src2, mm_zero); /* 0A0R0G0B -> src2 */ | 544 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ |
545 | 545 |
546 dst1 = _mm_cvtsi32_si64 (*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */ | 546 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */ |
547 dst1 = _mm_unpacklo_pi8 (dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | 547 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ |
548 | 548 |
549 src2 = _mm_sub_pi16 (src2, dst1); /* src2 - dst2 -> src2 */ | 549 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ |
550 src2 = _mm_mullo_pi16 (src2, mm_alpha); /* src2 * alpha -> src2 */ | 550 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ |
551 src2 = _mm_srli_pi16 (src2, 8); /* src2 >> 8 -> src2 */ | 551 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ |
552 dst1 = _mm_add_pi8 (src2, dst1); /* src2 + dst1 -> dst1 */ | 552 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ |
553 | 553 |
554 dst1 = _mm_packs_pu16 (dst1, mm_zero); /* 0000ARGB -> dst1 */ | 554 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ |
555 dst1 = _mm_or_si64 (dst1, dsta); /* dsta | dst1 -> dst1 */ | 555 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ |
556 *dstp = _mm_cvtsi64_si32 (dst1); /* dst1 -> pixel */ | 556 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ |
557 | 557 |
558 ++srcp; | 558 ++srcp; |
559 ++dstp; | 559 ++dstp; |
560 | 560 |
561 n--; | 561 n--; |
563 | 563 |
564 for (n >>= 1; n > 0; --n) { | 564 for (n >>= 1; n > 0; --n) { |
565 /* Two Pixels Blend */ | 565 /* Two Pixels Blend */ |
566 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ | 566 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ |
567 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | 567 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ |
568 src1 = _mm_unpacklo_pi8 (src1, mm_zero); /* low - 0A0R0G0B -> src1 */ | 568 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ |
569 src2 = _mm_unpackhi_pi8 (src2, mm_zero); /* high - 0A0R0G0B -> src2 */ | 569 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ |
570 | 570 |
571 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */ | 571 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */ |
572 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | 572 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ |
573 dst1 = _mm_unpacklo_pi8 (dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ | 573 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ |
574 dst2 = _mm_unpackhi_pi8 (dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ | 574 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ |
575 | 575 |
576 src1 = _mm_sub_pi16 (src1, dst1); /* src1 - dst1 -> src1 */ | 576 src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */ |
577 src1 = _mm_mullo_pi16 (src1, mm_alpha); /* src1 * alpha -> src1 */ | 577 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ |
578 src1 = _mm_srli_pi16 (src1, 8); /* src1 >> 8 -> src1 */ | 578 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ |
579 dst1 = _mm_add_pi8 (src1, dst1); /* src1 + dst1(dst1) -> dst1 */ | 579 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ |
580 | 580 |
581 src2 = _mm_sub_pi16 (src2, dst2); /* src2 - dst2 -> src2 */ | 581 src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */ |
582 src2 = _mm_mullo_pi16 (src2, mm_alpha); /* src2 * alpha -> src2 */ | 582 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ |
583 src2 = _mm_srli_pi16 (src2, 8); /* src2 >> 8 -> src2 */ | 583 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ |
584 dst2 = _mm_add_pi8 (src2, dst2); /* src2 + dst2(dst2) -> dst2 */ | 584 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ |
585 | 585 |
586 dst1 = _mm_packs_pu16 (dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ | 586 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ |
587 dst1 = _mm_or_si64 (dst1, dsta); /* dsta | dst1 -> dst1 */ | 587 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ |
588 | 588 |
589 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */ | 589 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */ |
590 | 590 |
591 srcp += 2; | 591 srcp += 2; |
592 dstp += 2; | 592 dstp += 2; |
593 } | 593 } |
594 srcp += srcskip; | 594 srcp += srcskip; |
595 dstp += dstskip; | 595 dstp += dstskip; |
596 } | 596 } |
597 _mm_empty (); | 597 _mm_empty(); |
598 } | 598 } |
599 } | 599 } |
600 | 600 |
601 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | 601 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
602 static void | 602 static void |
603 BlitRGBtoRGBPixelAlphaMMX (SDL_BlitInfo * info) | 603 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info) |
604 { | 604 { |
605 int width = info->d_width; | 605 int width = info->d_width; |
606 int height = info->d_height; | 606 int height = info->d_height; |
607 Uint32 *srcp = (Uint32 *) info->s_pixels; | 607 Uint32 *srcp = (Uint32 *) info->s_pixels; |
608 int srcskip = info->s_skip >> 2; | 608 int srcskip = info->s_skip >> 2; |
614 Uint32 ashift = sf->Ashift; | 614 Uint32 ashift = sf->Ashift; |
615 Uint64 multmask; | 615 Uint64 multmask; |
616 | 616 |
617 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | 617 __m64 src1, dst1, mm_alpha, mm_zero, dmask; |
618 | 618 |
619 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */ | 619 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
620 multmask = ~(0xFFFFi 64 << (ashift * 2)); | 620 multmask = ~(0xFFFFi 64 << (ashift * 2)); |
621 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ | 621 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ |
622 | 622 |
623 while (height--) { | 623 while (height--) { |
624 /* *INDENT-OFF* */ | 624 /* *INDENT-OFF* */ |
656 }, width); | 656 }, width); |
657 /* *INDENT-ON* */ | 657 /* *INDENT-ON* */ |
658 srcp += srcskip; | 658 srcp += srcskip; |
659 dstp += dstskip; | 659 dstp += dstskip; |
660 } | 660 } |
661 _mm_empty (); | 661 _mm_empty(); |
662 } | 662 } |
663 | 663 |
664 /* End MSVC_ASMBLIT */ | 664 /* End MSVC_ASMBLIT */ |
665 | 665 |
666 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | 666 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ |
734 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ | 734 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ |
735 } while (0) | 735 } while (0) |
736 | 736 |
737 /* Calculate the permute vector used for 32->32 swizzling */ | 737 /* Calculate the permute vector used for 32->32 swizzling */ |
738 static vector unsigned char | 738 static vector unsigned char |
739 calc_swizzle32 (const SDL_PixelFormat * srcfmt, | 739 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt) |
740 const SDL_PixelFormat * dstfmt) | |
741 { | 740 { |
742 /* | 741 /* |
743 * We have to assume that the bits that aren't used by other | 742 * We have to assume that the bits that aren't used by other |
744 * colors is alpha, and it's one complete byte, since some formats | 743 * colors is alpha, and it's one complete byte, since some formats |
745 * leave alpha with a zero mask, but we should still swizzle the bits. | 744 * leave alpha with a zero mask, but we should still swizzle the bits. |
756 srcfmt = &default_pixel_format; | 755 srcfmt = &default_pixel_format; |
757 } | 756 } |
758 if (!dstfmt) { | 757 if (!dstfmt) { |
759 dstfmt = &default_pixel_format; | 758 dstfmt = &default_pixel_format; |
760 } | 759 } |
761 const vector unsigned char plus = VECUINT8_LITERAL | 760 const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00, |
762 (0x00, 0x00, 0x00, 0x00, | 761 0x04, 0x04, 0x04, 0x04, |
763 0x04, 0x04, 0x04, 0x04, | 762 0x08, 0x08, 0x08, 0x08, |
764 0x08, 0x08, 0x08, 0x08, | 763 0x0C, 0x0C, 0x0C, |
765 0x0C, 0x0C, 0x0C, 0x0C); | 764 0x0C); |
766 vector unsigned char vswiz; | 765 vector unsigned char vswiz; |
767 vector unsigned int srcvec; | 766 vector unsigned int srcvec; |
768 #define RESHIFT(X) (3 - ((X) >> 3)) | 767 #define RESHIFT(X) (3 - ((X) >> 3)) |
769 Uint32 rmask = RESHIFT (srcfmt->Rshift) << (dstfmt->Rshift); | 768 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); |
770 Uint32 gmask = RESHIFT (srcfmt->Gshift) << (dstfmt->Gshift); | 769 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); |
771 Uint32 bmask = RESHIFT (srcfmt->Bshift) << (dstfmt->Bshift); | 770 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); |
772 Uint32 amask; | 771 Uint32 amask; |
773 /* Use zero for alpha if either surface doesn't have alpha */ | 772 /* Use zero for alpha if either surface doesn't have alpha */ |
774 if (dstfmt->Amask) { | 773 if (dstfmt->Amask) { |
775 amask = | 774 amask = |
776 ((srcfmt->Amask) ? RESHIFT (srcfmt->Ashift) : 0x10) << (dstfmt-> | 775 ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt-> |
777 Ashift); | 776 Ashift); |
778 } else { | 777 } else { |
779 amask = | 778 amask = |
780 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ | 779 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ |
781 0xFFFFFFFF); | 780 0xFFFFFFFF); |
782 } | 781 } |
783 #undef RESHIFT | 782 #undef RESHIFT |
784 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask); | 783 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask); |
785 vswiz = vec_add (plus, (vector unsigned char) vec_splat (srcvec, 0)); | 784 vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0)); |
786 return (vswiz); | 785 return (vswiz); |
787 } | 786 } |
788 | 787 |
789 static void | 788 static void |
790 Blit32to565PixelAlphaAltivec (SDL_BlitInfo * info) | 789 Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info) |
791 { | 790 { |
792 int height = info->d_height; | 791 int height = info->d_height; |
793 Uint8 *src = (Uint8 *) info->s_pixels; | 792 Uint8 *src = (Uint8 *) info->s_pixels; |
794 int srcskip = info->s_skip; | 793 int srcskip = info->s_skip; |
795 Uint8 *dst = (Uint8 *) info->d_pixels; | 794 Uint8 *dst = (Uint8 *) info->d_pixels; |
796 int dstskip = info->d_skip; | 795 int dstskip = info->d_skip; |
797 SDL_PixelFormat *srcfmt = info->src; | 796 SDL_PixelFormat *srcfmt = info->src; |
798 | 797 |
799 vector unsigned char v0 = vec_splat_u8 (0); | 798 vector unsigned char v0 = vec_splat_u8(0); |
800 vector unsigned short v8_16 = vec_splat_u16 (8); | 799 vector unsigned short v8_16 = vec_splat_u16(8); |
801 vector unsigned short v1_16 = vec_splat_u16 (1); | 800 vector unsigned short v1_16 = vec_splat_u16(1); |
802 vector unsigned short v2_16 = vec_splat_u16 (2); | 801 vector unsigned short v2_16 = vec_splat_u16(2); |
803 vector unsigned short v3_16 = vec_splat_u16 (3); | 802 vector unsigned short v3_16 = vec_splat_u16(3); |
804 vector unsigned int v8_32 = vec_splat_u32 (8); | 803 vector unsigned int v8_32 = vec_splat_u32(8); |
805 vector unsigned int v16_32 = vec_add (v8_32, v8_32); | 804 vector unsigned int v16_32 = vec_add(v8_32, v8_32); |
806 vector unsigned short v3f = | 805 vector unsigned short v3f = |
807 VECUINT16_LITERAL (0x003f, 0x003f, 0x003f, 0x003f, | 806 VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f, |
808 0x003f, 0x003f, 0x003f, 0x003f); | 807 0x003f, 0x003f, 0x003f, 0x003f); |
809 vector unsigned short vfc = | 808 vector unsigned short vfc = |
810 VECUINT16_LITERAL (0x00fc, 0x00fc, 0x00fc, 0x00fc, | 809 VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc, |
811 0x00fc, 0x00fc, 0x00fc, 0x00fc); | 810 0x00fc, 0x00fc, 0x00fc, 0x00fc); |
812 | 811 |
813 /* | 812 /* |
814 0x10 - 0x1f is the alpha | 813 0x10 - 0x1f is the alpha |
815 0x00 - 0x0e evens are the red | 814 0x00 - 0x0e evens are the red |
816 0x01 - 0x0f odds are zero | 815 0x01 - 0x0f odds are zero |
817 */ | 816 */ |
818 vector unsigned char vredalpha1 = | 817 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01, |
819 VECUINT8_LITERAL (0x10, 0x00, 0x01, 0x01, | 818 0x10, 0x02, 0x01, 0x01, |
820 0x10, 0x02, 0x01, 0x01, | 819 0x10, 0x04, 0x01, 0x01, |
821 0x10, 0x04, 0x01, 0x01, | 820 0x10, 0x06, 0x01, |
822 0x10, 0x06, 0x01, 0x01); | 821 0x01); |
823 vector unsigned char vredalpha2 = | 822 vector unsigned char vredalpha2 = |
824 (vector unsigned char) (vec_add ((vector unsigned int) vredalpha1, | 823 (vector unsigned char) (vec_add((vector unsigned int) vredalpha1, |
825 vec_sl (v8_32, v16_32)) | 824 vec_sl(v8_32, v16_32)) |
826 ); | 825 ); |
827 /* | 826 /* |
828 0x00 - 0x0f is ARxx ARxx ARxx ARxx | 827 0x00 - 0x0f is ARxx ARxx ARxx ARxx |
829 0x11 - 0x0f odds are blue | 828 0x11 - 0x0f odds are blue |
830 */ | 829 */ |
831 vector unsigned char vblue1 = VECUINT8_LITERAL (0x00, 0x01, 0x02, 0x11, | 830 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11, |
832 0x04, 0x05, 0x06, 0x13, | 831 0x04, 0x05, 0x06, 0x13, |
833 0x08, 0x09, 0x0a, 0x15, | 832 0x08, 0x09, 0x0a, 0x15, |
834 0x0c, 0x0d, 0x0e, 0x17); | 833 0x0c, 0x0d, 0x0e, 0x17); |
835 vector unsigned char vblue2 = | 834 vector unsigned char vblue2 = |
836 (vector unsigned char) (vec_add ((vector unsigned int) vblue1, v8_32) | 835 (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32) |
837 ); | 836 ); |
838 /* | 837 /* |
839 0x00 - 0x0f is ARxB ARxB ARxB ARxB | 838 0x00 - 0x0f is ARxB ARxB ARxB ARxB |
840 0x10 - 0x0e evens are green | 839 0x10 - 0x0e evens are green |
841 */ | 840 */ |
842 vector unsigned char vgreen1 = VECUINT8_LITERAL (0x00, 0x01, 0x10, 0x03, | 841 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03, |
843 0x04, 0x05, 0x12, 0x07, | 842 0x04, 0x05, 0x12, 0x07, |
844 0x08, 0x09, 0x14, 0x0b, | 843 0x08, 0x09, 0x14, 0x0b, |
845 0x0c, 0x0d, 0x16, 0x0f); | 844 0x0c, 0x0d, 0x16, 0x0f); |
846 vector unsigned char vgreen2 = | 845 vector unsigned char vgreen2 = |
847 (vector unsigned | 846 (vector unsigned |
848 char) (vec_add ((vector unsigned int) vgreen1, vec_sl (v8_32, v8_32)) | 847 char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32)) |
849 ); | 848 ); |
850 vector unsigned char vgmerge = VECUINT8_LITERAL (0x00, 0x02, 0x00, 0x06, | 849 vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06, |
851 0x00, 0x0a, 0x00, 0x0e, | 850 0x00, 0x0a, 0x00, 0x0e, |
852 0x00, 0x12, 0x00, 0x16, | 851 0x00, 0x12, 0x00, 0x16, |
853 0x00, 0x1a, 0x00, 0x1e); | 852 0x00, 0x1a, 0x00, 0x1e); |
854 vector unsigned char mergePermute = VEC_MERGE_PERMUTE (); | 853 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); |
855 vector unsigned char vpermute = calc_swizzle32 (srcfmt, NULL); | 854 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); |
856 vector unsigned char valphaPermute = | 855 vector unsigned char valphaPermute = |
857 vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC)); | 856 vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); |
858 | 857 |
859 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8 (-7); | 858 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7); |
860 vf800 = vec_sl (vf800, vec_splat_u16 (8)); | 859 vf800 = vec_sl(vf800, vec_splat_u16(8)); |
861 | 860 |
862 while (height--) { | 861 while (height--) { |
863 int extrawidth; | 862 int extrawidth; |
864 vector unsigned char valigner; | 863 vector unsigned char valigner; |
865 vector unsigned char vsrc; | 864 vector unsigned char vsrc; |
883 } \ | 882 } \ |
884 src += 4; \ | 883 src += 4; \ |
885 dst += 2; \ | 884 dst += 2; \ |
886 widthvar--; \ | 885 widthvar--; \ |
887 } | 886 } |
888 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dst)) && (width), width); | 887 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); |
889 extrawidth = (width % 8); | 888 extrawidth = (width % 8); |
890 valigner = VEC_ALIGNER (src); | 889 valigner = VEC_ALIGNER(src); |
891 vsrc = (vector unsigned char) vec_ld (0, src); | 890 vsrc = (vector unsigned char) vec_ld(0, src); |
892 width -= extrawidth; | 891 width -= extrawidth; |
893 while (width) { | 892 while (width) { |
894 vector unsigned char valpha; | 893 vector unsigned char valpha; |
895 vector unsigned char vsrc1, vsrc2; | 894 vector unsigned char vsrc1, vsrc2; |
896 vector unsigned char vdst1, vdst2; | 895 vector unsigned char vdst1, vdst2; |
897 vector unsigned short vR, vG, vB; | 896 vector unsigned short vR, vG, vB; |
898 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; | 897 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; |
899 | 898 |
900 /* Load 8 pixels from src as ARGB */ | 899 /* Load 8 pixels from src as ARGB */ |
901 voverflow = (vector unsigned char) vec_ld (15, src); | 900 voverflow = (vector unsigned char) vec_ld(15, src); |
902 vsrc = vec_perm (vsrc, voverflow, valigner); | 901 vsrc = vec_perm(vsrc, voverflow, valigner); |
903 vsrc1 = vec_perm (vsrc, vsrc, vpermute); | 902 vsrc1 = vec_perm(vsrc, vsrc, vpermute); |
904 src += 16; | 903 src += 16; |
905 vsrc = (vector unsigned char) vec_ld (15, src); | 904 vsrc = (vector unsigned char) vec_ld(15, src); |
906 voverflow = vec_perm (voverflow, vsrc, valigner); | 905 voverflow = vec_perm(voverflow, vsrc, valigner); |
907 vsrc2 = vec_perm (voverflow, voverflow, vpermute); | 906 vsrc2 = vec_perm(voverflow, voverflow, vpermute); |
908 src += 16; | 907 src += 16; |
909 | 908 |
910 /* Load 8 pixels from dst as XRGB */ | 909 /* Load 8 pixels from dst as XRGB */ |
911 voverflow = vec_ld (0, dst); | 910 voverflow = vec_ld(0, dst); |
912 vR = vec_and ((vector unsigned short) voverflow, vf800); | 911 vR = vec_and((vector unsigned short) voverflow, vf800); |
913 vB = vec_sl ((vector unsigned short) voverflow, v3_16); | 912 vB = vec_sl((vector unsigned short) voverflow, v3_16); |
914 vG = vec_sl (vB, v2_16); | 913 vG = vec_sl(vB, v2_16); |
915 vdst1 = | 914 vdst1 = |
916 (vector unsigned char) vec_perm ((vector unsigned char) vR, | 915 (vector unsigned char) vec_perm((vector unsigned char) vR, |
917 (vector unsigned char) vR, | 916 (vector unsigned char) vR, |
918 vredalpha1); | 917 vredalpha1); |
919 vdst1 = vec_perm (vdst1, (vector unsigned char) vB, vblue1); | 918 vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1); |
920 vdst1 = vec_perm (vdst1, (vector unsigned char) vG, vgreen1); | 919 vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1); |
921 vdst2 = | 920 vdst2 = |
922 (vector unsigned char) vec_perm ((vector unsigned char) vR, | 921 (vector unsigned char) vec_perm((vector unsigned char) vR, |
923 (vector unsigned char) vR, | 922 (vector unsigned char) vR, |
924 vredalpha2); | 923 vredalpha2); |
925 vdst2 = vec_perm (vdst2, (vector unsigned char) vB, vblue2); | 924 vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2); |
926 vdst2 = vec_perm (vdst2, (vector unsigned char) vG, vgreen2); | 925 vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2); |
927 | 926 |
928 /* Alpha blend 8 pixels as ARGB */ | 927 /* Alpha blend 8 pixels as ARGB */ |
929 valpha = vec_perm (vsrc1, v0, valphaPermute); | 928 valpha = vec_perm(vsrc1, v0, valphaPermute); |
930 VEC_MULTIPLY_ALPHA (vsrc1, vdst1, valpha, mergePermute, v1_16, | 929 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, |
931 v8_16); | 930 v8_16); |
932 valpha = vec_perm (vsrc2, v0, valphaPermute); | 931 valpha = vec_perm(vsrc2, v0, valphaPermute); |
933 VEC_MULTIPLY_ALPHA (vsrc2, vdst2, valpha, mergePermute, v1_16, | 932 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, |
934 v8_16); | 933 v8_16); |
935 | 934 |
936 /* Convert 8 pixels to 565 */ | 935 /* Convert 8 pixels to 565 */ |
937 vpixel = (vector unsigned short) vec_packpx ((vector unsigned int) | 936 vpixel = (vector unsigned short) vec_packpx((vector unsigned int) |
938 vdst1, | 937 vdst1, |
939 (vector unsigned int) | 938 (vector unsigned int) |
940 vdst2); | 939 vdst2); |
941 vgpixel = | 940 vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge); |
942 (vector unsigned short) vec_perm (vdst1, vdst2, vgmerge); | 941 vgpixel = vec_and(vgpixel, vfc); |
943 vgpixel = vec_and (vgpixel, vfc); | 942 vgpixel = vec_sl(vgpixel, v3_16); |
944 vgpixel = vec_sl (vgpixel, v3_16); | 943 vrpixel = vec_sl(vpixel, v1_16); |
945 vrpixel = vec_sl (vpixel, v1_16); | 944 vrpixel = vec_and(vrpixel, vf800); |
946 vrpixel = vec_and (vrpixel, vf800); | 945 vbpixel = vec_and(vpixel, v3f); |
947 vbpixel = vec_and (vpixel, v3f); | |
948 vdst1 = | 946 vdst1 = |
949 vec_or ((vector unsigned char) vrpixel, | 947 vec_or((vector unsigned char) vrpixel, |
950 (vector unsigned char) vgpixel); | 948 (vector unsigned char) vgpixel); |
951 vdst1 = vec_or (vdst1, (vector unsigned char) vbpixel); | 949 vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel); |
952 | 950 |
953 /* Store 8 pixels */ | 951 /* Store 8 pixels */ |
954 vec_st (vdst1, 0, dst); | 952 vec_st(vdst1, 0, dst); |
955 | 953 |
956 width -= 8; | 954 width -= 8; |
957 dst += 16; | 955 dst += 16; |
958 } | 956 } |
959 ONE_PIXEL_BLEND ((extrawidth), extrawidth); | 957 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
960 #undef ONE_PIXEL_BLEND | 958 #undef ONE_PIXEL_BLEND |
961 src += srcskip; | 959 src += srcskip; |
962 dst += dstskip; | 960 dst += dstskip; |
963 } | 961 } |
964 } | 962 } |
965 | 963 |
966 static void | 964 static void |
967 Blit32to32SurfaceAlphaKeyAltivec (SDL_BlitInfo * info) | 965 Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info) |
968 { | 966 { |
969 unsigned alpha = info->src->alpha; | 967 unsigned alpha = info->src->alpha; |
970 int height = info->d_height; | 968 int height = info->d_height; |
971 Uint32 *srcp = (Uint32 *) info->s_pixels; | 969 Uint32 *srcp = (Uint32 *) info->s_pixels; |
972 int srcskip = info->s_skip >> 2; | 970 int srcskip = info->s_skip >> 2; |
989 vector unsigned short v1; | 987 vector unsigned short v1; |
990 vector unsigned short v8; | 988 vector unsigned short v8; |
991 vector unsigned int vckey; | 989 vector unsigned int vckey; |
992 vector unsigned int vrgbmask; | 990 vector unsigned int vrgbmask; |
993 | 991 |
994 mergePermute = VEC_MERGE_PERMUTE (); | 992 mergePermute = VEC_MERGE_PERMUTE(); |
995 v0 = vec_splat_u8 (0); | 993 v0 = vec_splat_u8(0); |
996 v1 = vec_splat_u16 (1); | 994 v1 = vec_splat_u16(1); |
997 v8 = vec_splat_u16 (8); | 995 v8 = vec_splat_u16(8); |
998 | 996 |
999 /* set the alpha to 255 on the destination surf */ | 997 /* set the alpha to 255 on the destination surf */ |
1000 valphamask = VEC_ALPHA_MASK (); | 998 valphamask = VEC_ALPHA_MASK(); |
1001 | 999 |
1002 vsrcPermute = calc_swizzle32 (srcfmt, NULL); | 1000 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
1003 vdstPermute = calc_swizzle32 (NULL, dstfmt); | 1001 vdstPermute = calc_swizzle32(NULL, dstfmt); |
1004 vsdstPermute = calc_swizzle32 (dstfmt, NULL); | 1002 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
1005 | 1003 |
1006 /* set a vector full of alpha and 255-alpha */ | 1004 /* set a vector full of alpha and 255-alpha */ |
1007 ((unsigned char *) &valpha)[0] = alpha; | 1005 ((unsigned char *) &valpha)[0] = alpha; |
1008 valpha = vec_splat (valpha, 0); | 1006 valpha = vec_splat(valpha, 0); |
1009 vbits = (vector unsigned char) vec_splat_s8 (-1); | 1007 vbits = (vector unsigned char) vec_splat_s8(-1); |
1010 | 1008 |
1011 ckey &= rgbmask; | 1009 ckey &= rgbmask; |
1012 ((unsigned int *) (char *) &vckey)[0] = ckey; | 1010 ((unsigned int *) (char *) &vckey)[0] = ckey; |
1013 vckey = vec_splat (vckey, 0); | 1011 vckey = vec_splat(vckey, 0); |
1014 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask; | 1012 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask; |
1015 vrgbmask = vec_splat (vrgbmask, 0); | 1013 vrgbmask = vec_splat(vrgbmask, 0); |
1016 | 1014 |
1017 while (height--) { | 1015 while (height--) { |
1018 int width = info->d_width; | 1016 int width = info->d_width; |
1019 #define ONE_PIXEL_BLEND(condition, widthvar) \ | 1017 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
1020 while (condition) { \ | 1018 while (condition) { \ |
1029 } \ | 1027 } \ |
1030 dstp++; \ | 1028 dstp++; \ |
1031 srcp++; \ | 1029 srcp++; \ |
1032 widthvar--; \ | 1030 widthvar--; \ |
1033 } | 1031 } |
1034 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); | 1032 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
1035 if (width > 0) { | 1033 if (width > 0) { |
1036 int extrawidth = (width % 4); | 1034 int extrawidth = (width % 4); |
1037 vector unsigned char valigner = VEC_ALIGNER (srcp); | 1035 vector unsigned char valigner = VEC_ALIGNER(srcp); |
1038 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); | 1036 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1039 width -= extrawidth; | 1037 width -= extrawidth; |
1040 while (width) { | 1038 while (width) { |
1041 vector unsigned char vsel; | 1039 vector unsigned char vsel; |
1042 vector unsigned char voverflow; | 1040 vector unsigned char voverflow; |
1043 vector unsigned char vd; | 1041 vector unsigned char vd; |
1044 vector unsigned char vd_orig; | 1042 vector unsigned char vd_orig; |
1045 | 1043 |
1046 /* s = *srcp */ | 1044 /* s = *srcp */ |
1047 voverflow = (vector unsigned char) vec_ld (15, srcp); | 1045 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1048 vs = vec_perm (vs, voverflow, valigner); | 1046 vs = vec_perm(vs, voverflow, valigner); |
1049 | 1047 |
1050 /* vsel is set for items that match the key */ | 1048 /* vsel is set for items that match the key */ |
1051 vsel = | 1049 vsel = |
1052 (vector unsigned char) vec_and ((vector unsigned int) vs, | 1050 (vector unsigned char) vec_and((vector unsigned int) vs, |
1053 vrgbmask); | 1051 vrgbmask); |
1054 vsel = (vector unsigned char) vec_cmpeq ((vector unsigned int) | 1052 vsel = (vector unsigned char) vec_cmpeq((vector unsigned int) |
1055 vsel, vckey); | 1053 vsel, vckey); |
1056 | 1054 |
1057 /* permute to source format */ | 1055 /* permute to source format */ |
1058 vs = vec_perm (vs, valpha, vsrcPermute); | 1056 vs = vec_perm(vs, valpha, vsrcPermute); |
1059 | 1057 |
1060 /* d = *dstp */ | 1058 /* d = *dstp */ |
1061 vd = (vector unsigned char) vec_ld (0, dstp); | 1059 vd = (vector unsigned char) vec_ld(0, dstp); |
1062 vd_orig = vd = vec_perm (vd, v0, vsdstPermute); | 1060 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); |
1063 | 1061 |
1064 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); | 1062 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1065 | 1063 |
1066 /* set the alpha channel to full on */ | 1064 /* set the alpha channel to full on */ |
1067 vd = vec_or (vd, valphamask); | 1065 vd = vec_or(vd, valphamask); |
1068 | 1066 |
1069 /* mask out color key */ | 1067 /* mask out color key */ |
1070 vd = vec_sel (vd, vd_orig, vsel); | 1068 vd = vec_sel(vd, vd_orig, vsel); |
1071 | 1069 |
1072 /* permute to dest format */ | 1070 /* permute to dest format */ |
1073 vd = vec_perm (vd, vbits, vdstPermute); | 1071 vd = vec_perm(vd, vbits, vdstPermute); |
1074 | 1072 |
1075 /* *dstp = res */ | 1073 /* *dstp = res */ |
1076 vec_st ((vector unsigned int) vd, 0, dstp); | 1074 vec_st((vector unsigned int) vd, 0, dstp); |
1077 | 1075 |
1078 srcp += 4; | 1076 srcp += 4; |
1079 dstp += 4; | 1077 dstp += 4; |
1080 width -= 4; | 1078 width -= 4; |
1081 vs = voverflow; | 1079 vs = voverflow; |
1082 } | 1080 } |
1083 ONE_PIXEL_BLEND ((extrawidth), extrawidth); | 1081 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1084 } | 1082 } |
1085 #undef ONE_PIXEL_BLEND | 1083 #undef ONE_PIXEL_BLEND |
1086 | 1084 |
1087 srcp += srcskip; | 1085 srcp += srcskip; |
1088 dstp += dstskip; | 1086 dstp += dstskip; |
1089 } | 1087 } |
1090 } | 1088 } |
1091 | 1089 |
1092 | 1090 |
1093 static void | 1091 static void |
1094 Blit32to32PixelAlphaAltivec (SDL_BlitInfo * info) | 1092 Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info) |
1095 { | 1093 { |
1096 int width = info->d_width; | 1094 int width = info->d_width; |
1097 int height = info->d_height; | 1095 int height = info->d_height; |
1098 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1096 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1099 int srcskip = info->s_skip >> 2; | 1097 int srcskip = info->s_skip >> 2; |
1110 vector unsigned char vpixelmask; | 1108 vector unsigned char vpixelmask; |
1111 vector unsigned char v0; | 1109 vector unsigned char v0; |
1112 vector unsigned short v1; | 1110 vector unsigned short v1; |
1113 vector unsigned short v8; | 1111 vector unsigned short v8; |
1114 | 1112 |
1115 v0 = vec_splat_u8 (0); | 1113 v0 = vec_splat_u8(0); |
1116 v1 = vec_splat_u16 (1); | 1114 v1 = vec_splat_u16(1); |
1117 v8 = vec_splat_u16 (8); | 1115 v8 = vec_splat_u16(8); |
1118 mergePermute = VEC_MERGE_PERMUTE (); | 1116 mergePermute = VEC_MERGE_PERMUTE(); |
1119 valphamask = VEC_ALPHA_MASK (); | 1117 valphamask = VEC_ALPHA_MASK(); |
1120 valphaPermute = vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC)); | 1118 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); |
1121 vpixelmask = vec_nor (valphamask, v0); | 1119 vpixelmask = vec_nor(valphamask, v0); |
1122 vsrcPermute = calc_swizzle32 (srcfmt, NULL); | 1120 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
1123 vdstPermute = calc_swizzle32 (NULL, dstfmt); | 1121 vdstPermute = calc_swizzle32(NULL, dstfmt); |
1124 vsdstPermute = calc_swizzle32 (dstfmt, NULL); | 1122 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
1125 | 1123 |
1126 while (height--) { | 1124 while (height--) { |
1127 width = info->d_width; | 1125 width = info->d_width; |
1128 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | 1126 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1129 Uint32 Pixel; \ | 1127 Uint32 Pixel; \ |
1136 } \ | 1134 } \ |
1137 ++srcp; \ | 1135 ++srcp; \ |
1138 ++dstp; \ | 1136 ++dstp; \ |
1139 widthvar--; \ | 1137 widthvar--; \ |
1140 } | 1138 } |
1141 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); | 1139 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
1142 if (width > 0) { | 1140 if (width > 0) { |
1143 /* vsrcPermute */ | 1141 /* vsrcPermute */ |
1144 /* vdstPermute */ | 1142 /* vdstPermute */ |
1145 int extrawidth = (width % 4); | 1143 int extrawidth = (width % 4); |
1146 vector unsigned char valigner = VEC_ALIGNER (srcp); | 1144 vector unsigned char valigner = VEC_ALIGNER(srcp); |
1147 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); | 1145 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1148 width -= extrawidth; | 1146 width -= extrawidth; |
1149 while (width) { | 1147 while (width) { |
1150 vector unsigned char voverflow; | 1148 vector unsigned char voverflow; |
1151 vector unsigned char vd; | 1149 vector unsigned char vd; |
1152 vector unsigned char valpha; | 1150 vector unsigned char valpha; |
1153 vector unsigned char vdstalpha; | 1151 vector unsigned char vdstalpha; |
1154 /* s = *srcp */ | 1152 /* s = *srcp */ |
1155 voverflow = (vector unsigned char) vec_ld (15, srcp); | 1153 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1156 vs = vec_perm (vs, voverflow, valigner); | 1154 vs = vec_perm(vs, voverflow, valigner); |
1157 vs = vec_perm (vs, v0, vsrcPermute); | 1155 vs = vec_perm(vs, v0, vsrcPermute); |
1158 | 1156 |
1159 valpha = vec_perm (vs, v0, valphaPermute); | 1157 valpha = vec_perm(vs, v0, valphaPermute); |
1160 | 1158 |
1161 /* d = *dstp */ | 1159 /* d = *dstp */ |
1162 vd = (vector unsigned char) vec_ld (0, dstp); | 1160 vd = (vector unsigned char) vec_ld(0, dstp); |
1163 vd = vec_perm (vd, v0, vsdstPermute); | 1161 vd = vec_perm(vd, v0, vsdstPermute); |
1164 vdstalpha = vec_and (vd, valphamask); | 1162 vdstalpha = vec_and(vd, valphamask); |
1165 | 1163 |
1166 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); | 1164 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1167 | 1165 |
1168 /* set the alpha to the dest alpha */ | 1166 /* set the alpha to the dest alpha */ |
1169 vd = vec_and (vd, vpixelmask); | 1167 vd = vec_and(vd, vpixelmask); |
1170 vd = vec_or (vd, vdstalpha); | 1168 vd = vec_or(vd, vdstalpha); |
1171 vd = vec_perm (vd, v0, vdstPermute); | 1169 vd = vec_perm(vd, v0, vdstPermute); |
1172 | 1170 |
1173 /* *dstp = res */ | 1171 /* *dstp = res */ |
1174 vec_st ((vector unsigned int) vd, 0, dstp); | 1172 vec_st((vector unsigned int) vd, 0, dstp); |
1175 | 1173 |
1176 srcp += 4; | 1174 srcp += 4; |
1177 dstp += 4; | 1175 dstp += 4; |
1178 width -= 4; | 1176 width -= 4; |
1179 vs = voverflow; | 1177 vs = voverflow; |
1180 | 1178 |
1181 } | 1179 } |
1182 ONE_PIXEL_BLEND ((extrawidth), extrawidth); | 1180 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1183 } | 1181 } |
1184 srcp += srcskip; | 1182 srcp += srcskip; |
1185 dstp += dstskip; | 1183 dstp += dstskip; |
1186 #undef ONE_PIXEL_BLEND | 1184 #undef ONE_PIXEL_BLEND |
1187 } | 1185 } |
1188 } | 1186 } |
1189 | 1187 |
1190 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | 1188 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
1191 static void | 1189 static void |
1192 BlitRGBtoRGBPixelAlphaAltivec (SDL_BlitInfo * info) | 1190 BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info) |
1193 { | 1191 { |
1194 int width = info->d_width; | 1192 int width = info->d_width; |
1195 int height = info->d_height; | 1193 int height = info->d_height; |
1196 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1194 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1197 int srcskip = info->s_skip >> 2; | 1195 int srcskip = info->s_skip >> 2; |
1202 vector unsigned char valphamask; | 1200 vector unsigned char valphamask; |
1203 vector unsigned char vpixelmask; | 1201 vector unsigned char vpixelmask; |
1204 vector unsigned char v0; | 1202 vector unsigned char v0; |
1205 vector unsigned short v1; | 1203 vector unsigned short v1; |
1206 vector unsigned short v8; | 1204 vector unsigned short v8; |
1207 v0 = vec_splat_u8 (0); | 1205 v0 = vec_splat_u8(0); |
1208 v1 = vec_splat_u16 (1); | 1206 v1 = vec_splat_u16(1); |
1209 v8 = vec_splat_u16 (8); | 1207 v8 = vec_splat_u16(8); |
1210 mergePermute = VEC_MERGE_PERMUTE (); | 1208 mergePermute = VEC_MERGE_PERMUTE(); |
1211 valphamask = VEC_ALPHA_MASK (); | 1209 valphamask = VEC_ALPHA_MASK(); |
1212 valphaPermute = vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC)); | 1210 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); |
1213 | 1211 |
1214 | 1212 |
1215 vpixelmask = vec_nor (valphamask, v0); | 1213 vpixelmask = vec_nor(valphamask, v0); |
1216 while (height--) { | 1214 while (height--) { |
1217 width = info->d_width; | 1215 width = info->d_width; |
1218 #define ONE_PIXEL_BLEND(condition, widthvar) \ | 1216 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
1219 while ((condition)) { \ | 1217 while ((condition)) { \ |
1220 Uint32 dalpha; \ | 1218 Uint32 dalpha; \ |
1240 } \ | 1238 } \ |
1241 ++srcp; \ | 1239 ++srcp; \ |
1242 ++dstp; \ | 1240 ++dstp; \ |
1243 widthvar--; \ | 1241 widthvar--; \ |
1244 } | 1242 } |
1245 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); | 1243 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
1246 if (width > 0) { | 1244 if (width > 0) { |
1247 int extrawidth = (width % 4); | 1245 int extrawidth = (width % 4); |
1248 vector unsigned char valigner = VEC_ALIGNER (srcp); | 1246 vector unsigned char valigner = VEC_ALIGNER(srcp); |
1249 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); | 1247 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1250 width -= extrawidth; | 1248 width -= extrawidth; |
1251 while (width) { | 1249 while (width) { |
1252 vector unsigned char voverflow; | 1250 vector unsigned char voverflow; |
1253 vector unsigned char vd; | 1251 vector unsigned char vd; |
1254 vector unsigned char valpha; | 1252 vector unsigned char valpha; |
1255 vector unsigned char vdstalpha; | 1253 vector unsigned char vdstalpha; |
1256 /* s = *srcp */ | 1254 /* s = *srcp */ |
1257 voverflow = (vector unsigned char) vec_ld (15, srcp); | 1255 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1258 vs = vec_perm (vs, voverflow, valigner); | 1256 vs = vec_perm(vs, voverflow, valigner); |
1259 | 1257 |
1260 valpha = vec_perm (vs, v0, valphaPermute); | 1258 valpha = vec_perm(vs, v0, valphaPermute); |
1261 | 1259 |
1262 /* d = *dstp */ | 1260 /* d = *dstp */ |
1263 vd = (vector unsigned char) vec_ld (0, dstp); | 1261 vd = (vector unsigned char) vec_ld(0, dstp); |
1264 vdstalpha = vec_and (vd, valphamask); | 1262 vdstalpha = vec_and(vd, valphamask); |
1265 | 1263 |
1266 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); | 1264 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1267 | 1265 |
1268 /* set the alpha to the dest alpha */ | 1266 /* set the alpha to the dest alpha */ |
1269 vd = vec_and (vd, vpixelmask); | 1267 vd = vec_and(vd, vpixelmask); |
1270 vd = vec_or (vd, vdstalpha); | 1268 vd = vec_or(vd, vdstalpha); |
1271 | 1269 |
1272 /* *dstp = res */ | 1270 /* *dstp = res */ |
1273 vec_st ((vector unsigned int) vd, 0, dstp); | 1271 vec_st((vector unsigned int) vd, 0, dstp); |
1274 | 1272 |
1275 srcp += 4; | 1273 srcp += 4; |
1276 dstp += 4; | 1274 dstp += 4; |
1277 width -= 4; | 1275 width -= 4; |
1278 vs = voverflow; | 1276 vs = voverflow; |
1279 } | 1277 } |
1280 ONE_PIXEL_BLEND ((extrawidth), extrawidth); | 1278 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1281 } | 1279 } |
1282 srcp += srcskip; | 1280 srcp += srcskip; |
1283 dstp += dstskip; | 1281 dstp += dstskip; |
1284 } | 1282 } |
1285 #undef ONE_PIXEL_BLEND | 1283 #undef ONE_PIXEL_BLEND |
1286 } | 1284 } |
1287 | 1285 |
1288 static void | 1286 static void |
1289 Blit32to32SurfaceAlphaAltivec (SDL_BlitInfo * info) | 1287 Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info) |
1290 { | 1288 { |
1291 /* XXX : 6 */ | 1289 /* XXX : 6 */ |
1292 unsigned alpha = info->src->alpha; | 1290 unsigned alpha = info->src->alpha; |
1293 int height = info->d_height; | 1291 int height = info->d_height; |
1294 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1292 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1307 vector unsigned char valphamask; | 1305 vector unsigned char valphamask; |
1308 vector unsigned char vbits; | 1306 vector unsigned char vbits; |
1309 vector unsigned short v1; | 1307 vector unsigned short v1; |
1310 vector unsigned short v8; | 1308 vector unsigned short v8; |
1311 | 1309 |
1312 mergePermute = VEC_MERGE_PERMUTE (); | 1310 mergePermute = VEC_MERGE_PERMUTE(); |
1313 v1 = vec_splat_u16 (1); | 1311 v1 = vec_splat_u16(1); |
1314 v8 = vec_splat_u16 (8); | 1312 v8 = vec_splat_u16(8); |
1315 | 1313 |
1316 /* set the alpha to 255 on the destination surf */ | 1314 /* set the alpha to 255 on the destination surf */ |
1317 valphamask = VEC_ALPHA_MASK (); | 1315 valphamask = VEC_ALPHA_MASK(); |
1318 | 1316 |
1319 vsrcPermute = calc_swizzle32 (srcfmt, NULL); | 1317 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
1320 vdstPermute = calc_swizzle32 (NULL, dstfmt); | 1318 vdstPermute = calc_swizzle32(NULL, dstfmt); |
1321 vsdstPermute = calc_swizzle32 (dstfmt, NULL); | 1319 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
1322 | 1320 |
1323 /* set a vector full of alpha and 255-alpha */ | 1321 /* set a vector full of alpha and 255-alpha */ |
1324 ((unsigned char *) &valpha)[0] = alpha; | 1322 ((unsigned char *) &valpha)[0] = alpha; |
1325 valpha = vec_splat (valpha, 0); | 1323 valpha = vec_splat(valpha, 0); |
1326 vbits = (vector unsigned char) vec_splat_s8 (-1); | 1324 vbits = (vector unsigned char) vec_splat_s8(-1); |
1327 | 1325 |
1328 while (height--) { | 1326 while (height--) { |
1329 int width = info->d_width; | 1327 int width = info->d_width; |
1330 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | 1328 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1331 Uint32 Pixel; \ | 1329 Uint32 Pixel; \ |
1336 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ | 1334 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
1337 ++srcp; \ | 1335 ++srcp; \ |
1338 ++dstp; \ | 1336 ++dstp; \ |
1339 widthvar--; \ | 1337 widthvar--; \ |
1340 } | 1338 } |
1341 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); | 1339 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
1342 if (width > 0) { | 1340 if (width > 0) { |
1343 int extrawidth = (width % 4); | 1341 int extrawidth = (width % 4); |
1344 vector unsigned char valigner = vec_lvsl (0, srcp); | 1342 vector unsigned char valigner = vec_lvsl(0, srcp); |
1345 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); | 1343 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1346 width -= extrawidth; | 1344 width -= extrawidth; |
1347 while (width) { | 1345 while (width) { |
1348 vector unsigned char voverflow; | 1346 vector unsigned char voverflow; |
1349 vector unsigned char vd; | 1347 vector unsigned char vd; |
1350 | 1348 |
1351 /* s = *srcp */ | 1349 /* s = *srcp */ |
1352 voverflow = (vector unsigned char) vec_ld (15, srcp); | 1350 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1353 vs = vec_perm (vs, voverflow, valigner); | 1351 vs = vec_perm(vs, voverflow, valigner); |
1354 vs = vec_perm (vs, valpha, vsrcPermute); | 1352 vs = vec_perm(vs, valpha, vsrcPermute); |
1355 | 1353 |
1356 /* d = *dstp */ | 1354 /* d = *dstp */ |
1357 vd = (vector unsigned char) vec_ld (0, dstp); | 1355 vd = (vector unsigned char) vec_ld(0, dstp); |
1358 vd = vec_perm (vd, vd, vsdstPermute); | 1356 vd = vec_perm(vd, vd, vsdstPermute); |
1359 | 1357 |
1360 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); | 1358 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1361 | 1359 |
1362 /* set the alpha channel to full on */ | 1360 /* set the alpha channel to full on */ |
1363 vd = vec_or (vd, valphamask); | 1361 vd = vec_or(vd, valphamask); |
1364 vd = vec_perm (vd, vbits, vdstPermute); | 1362 vd = vec_perm(vd, vbits, vdstPermute); |
1365 | 1363 |
1366 /* *dstp = res */ | 1364 /* *dstp = res */ |
1367 vec_st ((vector unsigned int) vd, 0, dstp); | 1365 vec_st((vector unsigned int) vd, 0, dstp); |
1368 | 1366 |
1369 srcp += 4; | 1367 srcp += 4; |
1370 dstp += 4; | 1368 dstp += 4; |
1371 width -= 4; | 1369 width -= 4; |
1372 vs = voverflow; | 1370 vs = voverflow; |
1373 } | 1371 } |
1374 ONE_PIXEL_BLEND ((extrawidth), extrawidth); | 1372 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1375 } | 1373 } |
1376 #undef ONE_PIXEL_BLEND | 1374 #undef ONE_PIXEL_BLEND |
1377 | 1375 |
1378 srcp += srcskip; | 1376 srcp += srcskip; |
1379 dstp += dstskip; | 1377 dstp += dstskip; |
1382 } | 1380 } |
1383 | 1381 |
1384 | 1382 |
1385 /* fast RGB888->(A)RGB888 blending */ | 1383 /* fast RGB888->(A)RGB888 blending */ |
1386 static void | 1384 static void |
1387 BlitRGBtoRGBSurfaceAlphaAltivec (SDL_BlitInfo * info) | 1385 BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info) |
1388 { | 1386 { |
1389 unsigned alpha = info->src->alpha; | 1387 unsigned alpha = info->src->alpha; |
1390 int height = info->d_height; | 1388 int height = info->d_height; |
1391 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1389 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1392 int srcskip = info->s_skip >> 2; | 1390 int srcskip = info->s_skip >> 2; |
1396 vector unsigned char valpha; | 1394 vector unsigned char valpha; |
1397 vector unsigned char valphamask; | 1395 vector unsigned char valphamask; |
1398 vector unsigned short v1; | 1396 vector unsigned short v1; |
1399 vector unsigned short v8; | 1397 vector unsigned short v8; |
1400 | 1398 |
1401 mergePermute = VEC_MERGE_PERMUTE (); | 1399 mergePermute = VEC_MERGE_PERMUTE(); |
1402 v1 = vec_splat_u16 (1); | 1400 v1 = vec_splat_u16(1); |
1403 v8 = vec_splat_u16 (8); | 1401 v8 = vec_splat_u16(8); |
1404 | 1402 |
1405 /* set the alpha to 255 on the destination surf */ | 1403 /* set the alpha to 255 on the destination surf */ |
1406 valphamask = VEC_ALPHA_MASK (); | 1404 valphamask = VEC_ALPHA_MASK(); |
1407 | 1405 |
1408 /* set a vector full of alpha and 255-alpha */ | 1406 /* set a vector full of alpha and 255-alpha */ |
1409 ((unsigned char *) &valpha)[0] = alpha; | 1407 ((unsigned char *) &valpha)[0] = alpha; |
1410 valpha = vec_splat (valpha, 0); | 1408 valpha = vec_splat(valpha, 0); |
1411 | 1409 |
1412 while (height--) { | 1410 while (height--) { |
1413 int width = info->d_width; | 1411 int width = info->d_width; |
1414 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | 1412 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1415 Uint32 s = *srcp; \ | 1413 Uint32 s = *srcp; \ |
1424 *dstp = d1 | d | 0xff000000; \ | 1422 *dstp = d1 | d | 0xff000000; \ |
1425 ++srcp; \ | 1423 ++srcp; \ |
1426 ++dstp; \ | 1424 ++dstp; \ |
1427 widthvar--; \ | 1425 widthvar--; \ |
1428 } | 1426 } |
1429 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); | 1427 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
1430 if (width > 0) { | 1428 if (width > 0) { |
1431 int extrawidth = (width % 4); | 1429 int extrawidth = (width % 4); |
1432 vector unsigned char valigner = VEC_ALIGNER (srcp); | 1430 vector unsigned char valigner = VEC_ALIGNER(srcp); |
1433 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp); | 1431 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1434 width -= extrawidth; | 1432 width -= extrawidth; |
1435 while (width) { | 1433 while (width) { |
1436 vector unsigned char voverflow; | 1434 vector unsigned char voverflow; |
1437 vector unsigned char vd; | 1435 vector unsigned char vd; |
1438 | 1436 |
1439 /* s = *srcp */ | 1437 /* s = *srcp */ |
1440 voverflow = (vector unsigned char) vec_ld (15, srcp); | 1438 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1441 vs = vec_perm (vs, voverflow, valigner); | 1439 vs = vec_perm(vs, voverflow, valigner); |
1442 | 1440 |
1443 /* d = *dstp */ | 1441 /* d = *dstp */ |
1444 vd = (vector unsigned char) vec_ld (0, dstp); | 1442 vd = (vector unsigned char) vec_ld(0, dstp); |
1445 | 1443 |
1446 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8); | 1444 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1447 | 1445 |
1448 /* set the alpha channel to full on */ | 1446 /* set the alpha channel to full on */ |
1449 vd = vec_or (vd, valphamask); | 1447 vd = vec_or(vd, valphamask); |
1450 | 1448 |
1451 /* *dstp = res */ | 1449 /* *dstp = res */ |
1452 vec_st ((vector unsigned int) vd, 0, dstp); | 1450 vec_st((vector unsigned int) vd, 0, dstp); |
1453 | 1451 |
1454 srcp += 4; | 1452 srcp += 4; |
1455 dstp += 4; | 1453 dstp += 4; |
1456 width -= 4; | 1454 width -= 4; |
1457 vs = voverflow; | 1455 vs = voverflow; |
1458 } | 1456 } |
1459 ONE_PIXEL_BLEND ((extrawidth), extrawidth); | 1457 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1460 } | 1458 } |
1461 #undef ONE_PIXEL_BLEND | 1459 #undef ONE_PIXEL_BLEND |
1462 | 1460 |
1463 srcp += srcskip; | 1461 srcp += srcskip; |
1464 dstp += dstskip; | 1462 dstp += dstskip; |
1470 #endif | 1468 #endif |
1471 #endif /* SDL_ALTIVEC_BLITTERS */ | 1469 #endif /* SDL_ALTIVEC_BLITTERS */ |
1472 | 1470 |
1473 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 1471 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
1474 static void | 1472 static void |
1475 BlitRGBtoRGBSurfaceAlpha128 (SDL_BlitInfo * info) | 1473 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info) |
1476 { | 1474 { |
1477 int width = info->d_width; | 1475 int width = info->d_width; |
1478 int height = info->d_height; | 1476 int height = info->d_height; |
1479 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1477 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1480 int srcskip = info->s_skip >> 2; | 1478 int srcskip = info->s_skip >> 2; |
1495 } | 1493 } |
1496 } | 1494 } |
1497 | 1495 |
1498 /* fast RGB888->(A)RGB888 blending with surface alpha */ | 1496 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
1499 static void | 1497 static void |
1500 BlitRGBtoRGBSurfaceAlpha (SDL_BlitInfo * info) | 1498 BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo * info) |
1501 { | 1499 { |
1502 unsigned alpha = info->src->alpha; | 1500 unsigned alpha = info->src->alpha; |
1503 if (alpha == 128) { | 1501 if (alpha == 128) { |
1504 BlitRGBtoRGBSurfaceAlpha128 (info); | 1502 BlitRGBtoRGBSurfaceAlpha128(info); |
1505 } else { | 1503 } else { |
1506 int width = info->d_width; | 1504 int width = info->d_width; |
1507 int height = info->d_height; | 1505 int height = info->d_height; |
1508 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1506 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1509 int srcskip = info->s_skip >> 2; | 1507 int srcskip = info->s_skip >> 2; |
1567 } | 1565 } |
1568 } | 1566 } |
1569 | 1567 |
1570 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | 1568 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
1571 static void | 1569 static void |
1572 BlitRGBtoRGBPixelAlpha (SDL_BlitInfo * info) | 1570 BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info) |
1573 { | 1571 { |
1574 int width = info->d_width; | 1572 int width = info->d_width; |
1575 int height = info->d_height; | 1573 int height = info->d_height; |
1576 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1574 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1577 int srcskip = info->s_skip >> 2; | 1575 int srcskip = info->s_skip >> 2; |
1620 } | 1618 } |
1621 | 1619 |
1622 #if GCC_ASMBLIT | 1620 #if GCC_ASMBLIT |
1623 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | 1621 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ |
1624 inline static void | 1622 inline static void |
1625 BlitRGBtoRGBPixelAlphaMMX3DNOW (SDL_BlitInfo * info) | 1623 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) |
1626 { | 1624 { |
1627 int width = info->d_width; | 1625 int width = info->d_width; |
1628 int height = info->d_height; | 1626 int height = info->d_height; |
1629 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1627 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1630 int srcskip = info->s_skip >> 2; | 1628 int srcskip = info->s_skip >> 2; |
1631 Uint32 *dstp = (Uint32 *) info->d_pixels; | 1629 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1632 int dstskip = info->d_skip >> 2; | 1630 int dstskip = info->d_skip >> 2; |
1633 SDL_PixelFormat *sf = info->src; | 1631 SDL_PixelFormat *sf = info->src; |
1634 Uint32 amask = sf->Amask; | 1632 Uint32 amask = sf->Amask; |
1635 | 1633 |
1636 __asm__ ( | 1634 __asm__( |
1637 /* make mm6 all zeros. */ | 1635 /* make mm6 all zeros. */ |
1638 "pxor %%mm6, %%mm6\n" | 1636 "pxor %%mm6, %%mm6\n" |
1639 /* Make a mask to preserve the alpha. */ | 1637 /* Make a mask to preserve the alpha. */ |
1640 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ | 1638 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ |
1641 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ | 1639 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ |
1642 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ | 1640 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ |
1643 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ | 1641 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ |
1644 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ | 1642 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ |
1645 /* form channel masks */ | 1643 /* form channel masks */ |
1646 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ | 1644 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ |
1647 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ | 1645 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ |
1648 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ | 1646 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ |
1649 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ | 1647 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ |
1650 /* get alpha channel shift */ | 1648 /* get alpha channel shift */ |
1651 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ | 1649 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ |
1652 : /* nothing */ : "m" (sf->Amask), "m" (sf->Ashift)); | 1650 : /* nothing */ : "m"(sf->Amask), "m"(sf->Ashift)); |
1653 | 1651 |
1654 while (height--) { | 1652 while (height--) { |
1655 | 1653 |
1656 /* *INDENT-OFF* */ | 1654 /* *INDENT-OFF* */ |
1657 DUFFS_LOOP4({ | 1655 DUFFS_LOOP4({ |
1728 /* *INDENT-ON* */ | 1726 /* *INDENT-ON* */ |
1729 srcp += srcskip; | 1727 srcp += srcskip; |
1730 dstp += dstskip; | 1728 dstp += dstskip; |
1731 } | 1729 } |
1732 | 1730 |
1733 __asm__ ("emms\n":); | 1731 __asm__("emms\n":); |
1734 } | 1732 } |
1735 | 1733 |
1736 /* End GCC_ASMBLIT*/ | 1734 /* End GCC_ASMBLIT*/ |
1737 | 1735 |
1738 #elif MSVC_ASMBLIT | 1736 #elif MSVC_ASMBLIT |
1739 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | 1737 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ |
1740 static void | 1738 static void |
1741 BlitRGBtoRGBPixelAlphaMMX3DNOW (SDL_BlitInfo * info) | 1739 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) |
1742 { | 1740 { |
1743 int width = info->d_width; | 1741 int width = info->d_width; |
1744 int height = info->d_height; | 1742 int height = info->d_height; |
1745 Uint32 *srcp = (Uint32 *) info->s_pixels; | 1743 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1746 int srcskip = info->s_skip >> 2; | 1744 int srcskip = info->s_skip >> 2; |
1752 Uint32 ashift = sf->Ashift; | 1750 Uint32 ashift = sf->Ashift; |
1753 Uint64 multmask; | 1751 Uint64 multmask; |
1754 | 1752 |
1755 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | 1753 __m64 src1, dst1, mm_alpha, mm_zero, dmask; |
1756 | 1754 |
1757 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */ | 1755 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
1758 multmask = ~(0xFFFFi 64 << (ashift * 2)); | 1756 multmask = ~(0xFFFFi 64 << (ashift * 2)); |
1759 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ | 1757 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ |
1760 | 1758 |
1761 while (height--) { | 1759 while (height--) { |
1762 /* *INDENT-OFF* */ | 1760 /* *INDENT-OFF* */ |
1799 }, width); | 1797 }, width); |
1800 /* *INDENT-ON* */ | 1798 /* *INDENT-ON* */ |
1801 srcp += srcskip; | 1799 srcp += srcskip; |
1802 dstp += dstskip; | 1800 dstp += dstskip; |
1803 } | 1801 } |
1804 _mm_empty (); | 1802 _mm_empty(); |
1805 } | 1803 } |
1806 | 1804 |
1807 /* End MSVC_ASMBLIT */ | 1805 /* End MSVC_ASMBLIT */ |
1808 | 1806 |
1809 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | 1807 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ |
1818 #define BLEND2x16_50(d, s, mask) \ | 1816 #define BLEND2x16_50(d, s, mask) \ |
1819 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ | 1817 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ |
1820 + (s & d & (~(mask | mask << 16)))) | 1818 + (s & d & (~(mask | mask << 16)))) |
1821 | 1819 |
1822 static void | 1820 static void |
1823 Blit16to16SurfaceAlpha128 (SDL_BlitInfo * info, Uint16 mask) | 1821 Blit16to16SurfaceAlpha128(SDL_BlitInfo * info, Uint16 mask) |
1824 { | 1822 { |
1825 int width = info->d_width; | 1823 int width = info->d_width; |
1826 int height = info->d_height; | 1824 int height = info->d_height; |
1827 Uint16 *srcp = (Uint16 *) info->s_pixels; | 1825 Uint16 *srcp = (Uint16 *) info->s_pixels; |
1828 int srcskip = info->s_skip >> 1; | 1826 int srcskip = info->s_skip >> 1; |
1840 int w = width; | 1838 int w = width; |
1841 | 1839 |
1842 /* handle odd destination */ | 1840 /* handle odd destination */ |
1843 if ((uintptr_t) dstp & 2) { | 1841 if ((uintptr_t) dstp & 2) { |
1844 Uint16 d = *dstp, s = *srcp; | 1842 Uint16 d = *dstp, s = *srcp; |
1845 *dstp = BLEND16_50 (d, s, mask); | 1843 *dstp = BLEND16_50(d, s, mask); |
1846 dstp++; | 1844 dstp++; |
1847 srcp++; | 1845 srcp++; |
1848 w--; | 1846 w--; |
1849 } | 1847 } |
1850 srcp++; /* srcp is now 32-bit aligned */ | 1848 srcp++; /* srcp is now 32-bit aligned */ |
1860 s = (prev_sw << 16) + (sw >> 16); | 1858 s = (prev_sw << 16) + (sw >> 16); |
1861 #else | 1859 #else |
1862 s = (prev_sw >> 16) + (sw << 16); | 1860 s = (prev_sw >> 16) + (sw << 16); |
1863 #endif | 1861 #endif |
1864 prev_sw = sw; | 1862 prev_sw = sw; |
1865 *(Uint32 *) dstp = BLEND2x16_50 (dw, s, mask); | 1863 *(Uint32 *) dstp = BLEND2x16_50(dw, s, mask); |
1866 dstp += 2; | 1864 dstp += 2; |
1867 srcp += 2; | 1865 srcp += 2; |
1868 w -= 2; | 1866 w -= 2; |
1869 } | 1867 } |
1870 | 1868 |
1874 #if SDL_BYTEORDER == SDL_BIG_ENDIAN | 1872 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
1875 s = (Uint16) prev_sw; | 1873 s = (Uint16) prev_sw; |
1876 #else | 1874 #else |
1877 s = (Uint16) (prev_sw >> 16); | 1875 s = (Uint16) (prev_sw >> 16); |
1878 #endif | 1876 #endif |
1879 *dstp = BLEND16_50 (d, s, mask); | 1877 *dstp = BLEND16_50(d, s, mask); |
1880 srcp++; | 1878 srcp++; |
1881 dstp++; | 1879 dstp++; |
1882 } | 1880 } |
1883 srcp += srcskip - 1; | 1881 srcp += srcskip - 1; |
1884 dstp += dstskip; | 1882 dstp += dstskip; |
1887 int w = width; | 1885 int w = width; |
1888 | 1886 |
1889 /* first odd pixel? */ | 1887 /* first odd pixel? */ |
1890 if ((uintptr_t) srcp & 2) { | 1888 if ((uintptr_t) srcp & 2) { |
1891 Uint16 d = *dstp, s = *srcp; | 1889 Uint16 d = *dstp, s = *srcp; |
1892 *dstp = BLEND16_50 (d, s, mask); | 1890 *dstp = BLEND16_50(d, s, mask); |
1893 srcp++; | 1891 srcp++; |
1894 dstp++; | 1892 dstp++; |
1895 w--; | 1893 w--; |
1896 } | 1894 } |
1897 /* srcp and dstp are now 32-bit aligned */ | 1895 /* srcp and dstp are now 32-bit aligned */ |
1898 | 1896 |
1899 while (w > 1) { | 1897 while (w > 1) { |
1900 Uint32 sw = *(Uint32 *) srcp; | 1898 Uint32 sw = *(Uint32 *) srcp; |
1901 Uint32 dw = *(Uint32 *) dstp; | 1899 Uint32 dw = *(Uint32 *) dstp; |
1902 *(Uint32 *) dstp = BLEND2x16_50 (dw, sw, mask); | 1900 *(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask); |
1903 srcp += 2; | 1901 srcp += 2; |
1904 dstp += 2; | 1902 dstp += 2; |
1905 w -= 2; | 1903 w -= 2; |
1906 } | 1904 } |
1907 | 1905 |
1908 /* last odd pixel? */ | 1906 /* last odd pixel? */ |
1909 if (w) { | 1907 if (w) { |
1910 Uint16 d = *dstp, s = *srcp; | 1908 Uint16 d = *dstp, s = *srcp; |
1911 *dstp = BLEND16_50 (d, s, mask); | 1909 *dstp = BLEND16_50(d, s, mask); |
1912 srcp++; | 1910 srcp++; |
1913 dstp++; | 1911 dstp++; |
1914 } | 1912 } |
1915 srcp += srcskip; | 1913 srcp += srcskip; |
1916 dstp += dstskip; | 1914 dstp += dstskip; |
1919 } | 1917 } |
1920 | 1918 |
1921 #if GCC_ASMBLIT | 1919 #if GCC_ASMBLIT |
1922 /* fast RGB565->RGB565 blending with surface alpha */ | 1920 /* fast RGB565->RGB565 blending with surface alpha */ |
1923 static void | 1921 static void |
1924 Blit565to565SurfaceAlphaMMX (SDL_BlitInfo * info) | 1922 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info) |
1925 { | 1923 { |
1926 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ | 1924 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
1927 if (alpha == 128) { | 1925 if (alpha == 128) { |
1928 Blit16to16SurfaceAlpha128 (info, 0xf7de); | 1926 Blit16to16SurfaceAlpha128(info, 0xf7de); |
1929 } else { | 1927 } else { |
1930 int width = info->d_width; | 1928 int width = info->d_width; |
1931 int height = info->d_height; | 1929 int height = info->d_height; |
1932 Uint16 *srcp = (Uint16 *) info->s_pixels; | 1930 Uint16 *srcp = (Uint16 *) info->s_pixels; |
1933 int srcskip = info->s_skip >> 1; | 1931 int srcskip = info->s_skip >> 1; |
1938 | 1936 |
1939 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ | 1937 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ |
1940 *(Uint64 *) load = alpha; | 1938 *(Uint64 *) load = alpha; |
1941 alpha >>= 3; /* downscale alpha to 5 bits */ | 1939 alpha >>= 3; /* downscale alpha to 5 bits */ |
1942 | 1940 |
1943 movq_m2r (*load, mm0); /* alpha(0000000A) -> mm0 */ | 1941 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
1944 punpcklwd_r2r (mm0, mm0); /* 00000A0A -> mm0 */ | 1942 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ |
1945 punpcklwd_r2r (mm0, mm0); /* 0A0A0A0A -> mm0 */ | 1943 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ |
1946 /* position alpha to allow for mullo and mulhi on diff channels | 1944 /* position alpha to allow for mullo and mulhi on diff channels |
1947 to reduce the number of operations */ | 1945 to reduce the number of operations */ |
1948 psllq_i2r (3, mm0); | 1946 psllq_i2r(3, mm0); |
1949 | 1947 |
1950 /* Setup the 565 color channel masks */ | 1948 /* Setup the 565 color channel masks */ |
1951 *(Uint64 *) load = 0x07E007E007E007E0ULL; | 1949 *(Uint64 *) load = 0x07E007E007E007E0ULL; |
1952 movq_m2r (*load, mm4); /* MASKGREEN -> mm4 */ | 1950 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
1953 *(Uint64 *) load = 0x001F001F001F001FULL; | 1951 *(Uint64 *) load = 0x001F001F001F001FULL; |
1954 movq_m2r (*load, mm7); /* MASKBLUE -> mm7 */ | 1952 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
1955 while (height--) { | 1953 while (height--) { |
1956 /* *INDENT-OFF* */ | 1954 /* *INDENT-OFF* */ |
1957 DUFFS_LOOP_QUATRO2( | 1955 DUFFS_LOOP_QUATRO2( |
1958 { | 1956 { |
1959 s = *srcp++; | 1957 s = *srcp++; |
2055 }, width); | 2053 }, width); |
2056 /* *INDENT-ON* */ | 2054 /* *INDENT-ON* */ |
2057 srcp += srcskip; | 2055 srcp += srcskip; |
2058 dstp += dstskip; | 2056 dstp += dstskip; |
2059 } | 2057 } |
2060 emms (); | 2058 emms(); |
2061 } | 2059 } |
2062 } | 2060 } |
2063 | 2061 |
2064 /* fast RGB555->RGB555 blending with surface alpha */ | 2062 /* fast RGB555->RGB555 blending with surface alpha */ |
2065 static void | 2063 static void |
2066 Blit555to555SurfaceAlphaMMX (SDL_BlitInfo * info) | 2064 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info) |
2067 { | 2065 { |
2068 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ | 2066 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
2069 if (alpha == 128) { | 2067 if (alpha == 128) { |
2070 Blit16to16SurfaceAlpha128 (info, 0xfbde); | 2068 Blit16to16SurfaceAlpha128(info, 0xfbde); |
2071 } else { | 2069 } else { |
2072 int width = info->d_width; | 2070 int width = info->d_width; |
2073 int height = info->d_height; | 2071 int height = info->d_height; |
2074 Uint16 *srcp = (Uint16 *) info->s_pixels; | 2072 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2075 int srcskip = info->s_skip >> 1; | 2073 int srcskip = info->s_skip >> 1; |
2080 | 2078 |
2081 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ | 2079 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ |
2082 *(Uint64 *) load = alpha; | 2080 *(Uint64 *) load = alpha; |
2083 alpha >>= 3; /* downscale alpha to 5 bits */ | 2081 alpha >>= 3; /* downscale alpha to 5 bits */ |
2084 | 2082 |
2085 movq_m2r (*load, mm0); /* alpha(0000000A) -> mm0 */ | 2083 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
2086 punpcklwd_r2r (mm0, mm0); /* 00000A0A -> mm0 */ | 2084 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ |
2087 punpcklwd_r2r (mm0, mm0); /* 0A0A0A0A -> mm0 */ | 2085 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ |
2088 /* position alpha to allow for mullo and mulhi on diff channels | 2086 /* position alpha to allow for mullo and mulhi on diff channels |
2089 to reduce the number of operations */ | 2087 to reduce the number of operations */ |
2090 psllq_i2r (3, mm0); | 2088 psllq_i2r(3, mm0); |
2091 | 2089 |
2092 /* Setup the 555 color channel masks */ | 2090 /* Setup the 555 color channel masks */ |
2093 *(Uint64 *) load = 0x03E003E003E003E0ULL; | 2091 *(Uint64 *) load = 0x03E003E003E003E0ULL; |
2094 movq_m2r (*load, mm4); /* MASKGREEN -> mm4 */ | 2092 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
2095 *(Uint64 *) load = 0x001F001F001F001FULL; | 2093 *(Uint64 *) load = 0x001F001F001F001FULL; |
2096 movq_m2r (*load, mm7); /* MASKBLUE -> mm7 */ | 2094 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
2097 while (height--) { | 2095 while (height--) { |
2098 /* *INDENT-OFF* */ | 2096 /* *INDENT-OFF* */ |
2099 DUFFS_LOOP_QUATRO2( | 2097 DUFFS_LOOP_QUATRO2( |
2100 { | 2098 { |
2101 s = *srcp++; | 2099 s = *srcp++; |
2202 }, width); | 2200 }, width); |
2203 /* *INDENT-ON* */ | 2201 /* *INDENT-ON* */ |
2204 srcp += srcskip; | 2202 srcp += srcskip; |
2205 dstp += dstskip; | 2203 dstp += dstskip; |
2206 } | 2204 } |
2207 emms (); | 2205 emms(); |
2208 } | 2206 } |
2209 } | 2207 } |
2210 | 2208 |
2211 /* End GCC_ASMBLIT */ | 2209 /* End GCC_ASMBLIT */ |
2212 | 2210 |
2213 #elif MSVC_ASMBLIT | 2211 #elif MSVC_ASMBLIT |
2214 /* fast RGB565->RGB565 blending with surface alpha */ | 2212 /* fast RGB565->RGB565 blending with surface alpha */ |
2215 static void | 2213 static void |
2216 Blit565to565SurfaceAlphaMMX (SDL_BlitInfo * info) | 2214 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info) |
2217 { | 2215 { |
2218 unsigned alpha = info->src->alpha; | 2216 unsigned alpha = info->src->alpha; |
2219 if (alpha == 128) { | 2217 if (alpha == 128) { |
2220 Blit16to16SurfaceAlpha128 (info, 0xf7de); | 2218 Blit16to16SurfaceAlpha128(info, 0xf7de); |
2221 } else { | 2219 } else { |
2222 int width = info->d_width; | 2220 int width = info->d_width; |
2223 int height = info->d_height; | 2221 int height = info->d_height; |
2224 Uint16 *srcp = (Uint16 *) info->s_pixels; | 2222 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2225 int srcskip = info->s_skip >> 1; | 2223 int srcskip = info->s_skip >> 1; |
2228 Uint32 s, d; | 2226 Uint32 s, d; |
2229 | 2227 |
2230 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; | 2228 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; |
2231 | 2229 |
2232 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ | 2230 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ |
2233 mm_alpha = _mm_set_pi32 (0, alpha); /* 0000000A -> mm_alpha */ | 2231 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ |
2234 alpha >>= 3; /* downscale alpha to 5 bits */ | 2232 alpha >>= 3; /* downscale alpha to 5 bits */ |
2235 | 2233 |
2236 mm_alpha = _mm_unpacklo_pi16 (mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | 2234 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ |
2237 mm_alpha = _mm_unpacklo_pi32 (mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | 2235 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ |
2238 /* position alpha to allow for mullo and mulhi on diff channels | 2236 /* position alpha to allow for mullo and mulhi on diff channels |
2239 to reduce the number of operations */ | 2237 to reduce the number of operations */ |
2240 mm_alpha = _mm_slli_si64 (mm_alpha, 3); | 2238 mm_alpha = _mm_slli_si64(mm_alpha, 3); |
2241 | 2239 |
2242 /* Setup the 565 color channel masks */ | 2240 /* Setup the 565 color channel masks */ |
2243 gmask = _mm_set_pi32 (0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ | 2241 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ |
2244 bmask = _mm_set_pi32 (0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | 2242 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ |
2245 | 2243 |
2246 while (height--) { | 2244 while (height--) { |
2247 /* *INDENT-OFF* */ | 2245 /* *INDENT-OFF* */ |
2248 DUFFS_LOOP_QUATRO2( | 2246 DUFFS_LOOP_QUATRO2( |
2249 { | 2247 { |
2342 }, width); | 2340 }, width); |
2343 /* *INDENT-ON* */ | 2341 /* *INDENT-ON* */ |
2344 srcp += srcskip; | 2342 srcp += srcskip; |
2345 dstp += dstskip; | 2343 dstp += dstskip; |
2346 } | 2344 } |
2347 _mm_empty (); | 2345 _mm_empty(); |
2348 } | 2346 } |
2349 } | 2347 } |
2350 | 2348 |
2351 /* fast RGB555->RGB555 blending with surface alpha */ | 2349 /* fast RGB555->RGB555 blending with surface alpha */ |
2352 static void | 2350 static void |
2353 Blit555to555SurfaceAlphaMMX (SDL_BlitInfo * info) | 2351 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info) |
2354 { | 2352 { |
2355 unsigned alpha = info->src->alpha; | 2353 unsigned alpha = info->src->alpha; |
2356 if (alpha == 128) { | 2354 if (alpha == 128) { |
2357 Blit16to16SurfaceAlpha128 (info, 0xfbde); | 2355 Blit16to16SurfaceAlpha128(info, 0xfbde); |
2358 } else { | 2356 } else { |
2359 int width = info->d_width; | 2357 int width = info->d_width; |
2360 int height = info->d_height; | 2358 int height = info->d_height; |
2361 Uint16 *srcp = (Uint16 *) info->s_pixels; | 2359 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2362 int srcskip = info->s_skip >> 1; | 2360 int srcskip = info->s_skip >> 1; |
2365 Uint32 s, d; | 2363 Uint32 s, d; |
2366 | 2364 |
2367 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; | 2365 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; |
2368 | 2366 |
2369 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ | 2367 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ |
2370 mm_alpha = _mm_set_pi32 (0, alpha); /* 0000000A -> mm_alpha */ | 2368 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ |
2371 alpha >>= 3; /* downscale alpha to 5 bits */ | 2369 alpha >>= 3; /* downscale alpha to 5 bits */ |
2372 | 2370 |
2373 mm_alpha = _mm_unpacklo_pi16 (mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | 2371 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ |
2374 mm_alpha = _mm_unpacklo_pi32 (mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | 2372 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ |
2375 /* position alpha to allow for mullo and mulhi on diff channels | 2373 /* position alpha to allow for mullo and mulhi on diff channels |
2376 to reduce the number of operations */ | 2374 to reduce the number of operations */ |
2377 mm_alpha = _mm_slli_si64 (mm_alpha, 3); | 2375 mm_alpha = _mm_slli_si64(mm_alpha, 3); |
2378 | 2376 |
2379 /* Setup the 555 color channel masks */ | 2377 /* Setup the 555 color channel masks */ |
2380 rmask = _mm_set_pi32 (0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ | 2378 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ |
2381 gmask = _mm_set_pi32 (0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ | 2379 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ |
2382 bmask = _mm_set_pi32 (0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | 2380 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ |
2383 | 2381 |
2384 while (height--) { | 2382 while (height--) { |
2385 /* *INDENT-OFF* */ | 2383 /* *INDENT-OFF* */ |
2386 DUFFS_LOOP_QUATRO2( | 2384 DUFFS_LOOP_QUATRO2( |
2387 { | 2385 { |
2480 }, width); | 2478 }, width); |
2481 /* *INDENT-ON* */ | 2479 /* *INDENT-ON* */ |
2482 srcp += srcskip; | 2480 srcp += srcskip; |
2483 dstp += dstskip; | 2481 dstp += dstskip; |
2484 } | 2482 } |
2485 _mm_empty (); | 2483 _mm_empty(); |
2486 } | 2484 } |
2487 } | 2485 } |
2488 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | 2486 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ |
2489 | 2487 |
2490 /* fast RGB565->RGB565 blending with surface alpha */ | 2488 /* fast RGB565->RGB565 blending with surface alpha */ |
2491 static void | 2489 static void |
2492 Blit565to565SurfaceAlpha (SDL_BlitInfo * info) | 2490 Blit565to565SurfaceAlpha(SDL_BlitInfo * info) |
2493 { | 2491 { |
2494 unsigned alpha = info->src->alpha; | 2492 unsigned alpha = info->src->alpha; |
2495 if (alpha == 128) { | 2493 if (alpha == 128) { |
2496 Blit16to16SurfaceAlpha128 (info, 0xf7de); | 2494 Blit16to16SurfaceAlpha128(info, 0xf7de); |
2497 } else { | 2495 } else { |
2498 int width = info->d_width; | 2496 int width = info->d_width; |
2499 int height = info->d_height; | 2497 int height = info->d_height; |
2500 Uint16 *srcp = (Uint16 *) info->s_pixels; | 2498 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2501 int srcskip = info->s_skip >> 1; | 2499 int srcskip = info->s_skip >> 1; |
2526 } | 2524 } |
2527 } | 2525 } |
2528 | 2526 |
2529 /* fast RGB555->RGB555 blending with surface alpha */ | 2527 /* fast RGB555->RGB555 blending with surface alpha */ |
2530 static void | 2528 static void |
2531 Blit555to555SurfaceAlpha (SDL_BlitInfo * info) | 2529 Blit555to555SurfaceAlpha(SDL_BlitInfo * info) |
2532 { | 2530 { |
2533 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ | 2531 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
2534 if (alpha == 128) { | 2532 if (alpha == 128) { |
2535 Blit16to16SurfaceAlpha128 (info, 0xfbde); | 2533 Blit16to16SurfaceAlpha128(info, 0xfbde); |
2536 } else { | 2534 } else { |
2537 int width = info->d_width; | 2535 int width = info->d_width; |
2538 int height = info->d_height; | 2536 int height = info->d_height; |
2539 Uint16 *srcp = (Uint16 *) info->s_pixels; | 2537 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2540 int srcskip = info->s_skip >> 1; | 2538 int srcskip = info->s_skip >> 1; |
2565 } | 2563 } |
2566 } | 2564 } |
2567 | 2565 |
2568 /* fast ARGB8888->RGB565 blending with pixel alpha */ | 2566 /* fast ARGB8888->RGB565 blending with pixel alpha */ |
2569 static void | 2567 static void |
2570 BlitARGBto565PixelAlpha (SDL_BlitInfo * info) | 2568 BlitARGBto565PixelAlpha(SDL_BlitInfo * info) |
2571 { | 2569 { |
2572 int width = info->d_width; | 2570 int width = info->d_width; |
2573 int height = info->d_height; | 2571 int height = info->d_height; |
2574 Uint32 *srcp = (Uint32 *) info->s_pixels; | 2572 Uint32 *srcp = (Uint32 *) info->s_pixels; |
2575 int srcskip = info->s_skip >> 2; | 2573 int srcskip = info->s_skip >> 2; |
2611 } | 2609 } |
2612 } | 2610 } |
2613 | 2611 |
2614 /* fast ARGB8888->RGB555 blending with pixel alpha */ | 2612 /* fast ARGB8888->RGB555 blending with pixel alpha */ |
2615 static void | 2613 static void |
2616 BlitARGBto555PixelAlpha (SDL_BlitInfo * info) | 2614 BlitARGBto555PixelAlpha(SDL_BlitInfo * info) |
2617 { | 2615 { |
2618 int width = info->d_width; | 2616 int width = info->d_width; |
2619 int height = info->d_height; | 2617 int height = info->d_height; |
2620 Uint32 *srcp = (Uint32 *) info->s_pixels; | 2618 Uint32 *srcp = (Uint32 *) info->s_pixels; |
2621 int srcskip = info->s_skip >> 2; | 2619 int srcskip = info->s_skip >> 2; |
2658 } | 2656 } |
2659 } | 2657 } |
2660 | 2658 |
2661 /* General (slow) N->N blending with per-surface alpha */ | 2659 /* General (slow) N->N blending with per-surface alpha */ |
2662 static void | 2660 static void |
2663 BlitNtoNSurfaceAlpha (SDL_BlitInfo * info) | 2661 BlitNtoNSurfaceAlpha(SDL_BlitInfo * info) |
2664 { | 2662 { |
2665 int width = info->d_width; | 2663 int width = info->d_width; |
2666 int height = info->d_height; | 2664 int height = info->d_height; |
2667 Uint8 *src = info->s_pixels; | 2665 Uint8 *src = info->s_pixels; |
2668 int srcskip = info->s_skip; | 2666 int srcskip = info->s_skip; |
2702 } | 2700 } |
2703 } | 2701 } |
2704 | 2702 |
2705 /* General (slow) colorkeyed N->N blending with per-surface alpha */ | 2703 /* General (slow) colorkeyed N->N blending with per-surface alpha */ |
2706 static void | 2704 static void |
2707 BlitNtoNSurfaceAlphaKey (SDL_BlitInfo * info) | 2705 BlitNtoNSurfaceAlphaKey(SDL_BlitInfo * info) |
2708 { | 2706 { |
2709 int width = info->d_width; | 2707 int width = info->d_width; |
2710 int height = info->d_height; | 2708 int height = info->d_height; |
2711 Uint8 *src = info->s_pixels; | 2709 Uint8 *src = info->s_pixels; |
2712 int srcskip = info->s_skip; | 2710 int srcskip = info->s_skip; |
2748 } | 2746 } |
2749 } | 2747 } |
2750 | 2748 |
2751 /* General (slow) N->N blending with pixel alpha */ | 2749 /* General (slow) N->N blending with pixel alpha */ |
2752 static void | 2750 static void |
2753 BlitNtoNPixelAlpha (SDL_BlitInfo * info) | 2751 BlitNtoNPixelAlpha(SDL_BlitInfo * info) |
2754 { | 2752 { |
2755 int width = info->d_width; | 2753 int width = info->d_width; |
2756 int height = info->d_height; | 2754 int height = info->d_height; |
2757 Uint8 *src = info->s_pixels; | 2755 Uint8 *src = info->s_pixels; |
2758 int srcskip = info->s_skip; | 2756 int srcskip = info->s_skip; |
2802 } | 2800 } |
2803 } | 2801 } |
2804 | 2802 |
2805 | 2803 |
2806 SDL_loblit | 2804 SDL_loblit |
2807 SDL_CalculateAlphaBlit (SDL_Surface * surface, int blit_index) | 2805 SDL_CalculateAlphaBlit(SDL_Surface * surface, int blit_index) |
2808 { | 2806 { |
2809 SDL_PixelFormat *sf = surface->format; | 2807 SDL_PixelFormat *sf = surface->format; |
2810 SDL_PixelFormat *df = surface->map->dst->format; | 2808 SDL_PixelFormat *df = surface->map->dst->format; |
2811 | 2809 |
2812 if (sf->Amask == 0) { | 2810 if (sf->Amask == 0) { |
2815 return BlitNto1SurfaceAlphaKey; | 2813 return BlitNto1SurfaceAlphaKey; |
2816 else | 2814 else |
2817 #if SDL_ALTIVEC_BLITTERS | 2815 #if SDL_ALTIVEC_BLITTERS |
2818 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && | 2816 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && |
2819 !(surface->map->dst->flags & SDL_HWSURFACE) | 2817 !(surface->map->dst->flags & SDL_HWSURFACE) |
2820 && SDL_HasAltiVec ()) | 2818 && SDL_HasAltiVec()) |
2821 return Blit32to32SurfaceAlphaKeyAltivec; | 2819 return Blit32to32SurfaceAlphaKeyAltivec; |
2822 else | 2820 else |
2823 #endif | 2821 #endif |
2824 return BlitNtoNSurfaceAlphaKey; | 2822 return BlitNtoNSurfaceAlphaKey; |
2825 } else { | 2823 } else { |
2830 | 2828 |
2831 case 2: | 2829 case 2: |
2832 if (surface->map->identity) { | 2830 if (surface->map->identity) { |
2833 if (df->Gmask == 0x7e0) { | 2831 if (df->Gmask == 0x7e0) { |
2834 #if MMX_ASMBLIT | 2832 #if MMX_ASMBLIT |
2835 if (SDL_HasMMX ()) | 2833 if (SDL_HasMMX()) |
2836 return Blit565to565SurfaceAlphaMMX; | 2834 return Blit565to565SurfaceAlphaMMX; |
2837 else | 2835 else |
2838 #endif | 2836 #endif |
2839 return Blit565to565SurfaceAlpha; | 2837 return Blit565to565SurfaceAlpha; |
2840 } else if (df->Gmask == 0x3e0) { | 2838 } else if (df->Gmask == 0x3e0) { |
2841 #if MMX_ASMBLIT | 2839 #if MMX_ASMBLIT |
2842 if (SDL_HasMMX ()) | 2840 if (SDL_HasMMX()) |
2843 return Blit555to555SurfaceAlphaMMX; | 2841 return Blit555to555SurfaceAlphaMMX; |
2844 else | 2842 else |
2845 #endif | 2843 #endif |
2846 return Blit555to555SurfaceAlpha; | 2844 return Blit555to555SurfaceAlpha; |
2847 } | 2845 } |
2853 && sf->Gmask == df->Gmask | 2851 && sf->Gmask == df->Gmask |
2854 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { | 2852 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { |
2855 #if MMX_ASMBLIT | 2853 #if MMX_ASMBLIT |
2856 if (sf->Rshift % 8 == 0 | 2854 if (sf->Rshift % 8 == 0 |
2857 && sf->Gshift % 8 == 0 | 2855 && sf->Gshift % 8 == 0 |
2858 && sf->Bshift % 8 == 0 && SDL_HasMMX ()) | 2856 && sf->Bshift % 8 == 0 && SDL_HasMMX()) |
2859 return BlitRGBtoRGBSurfaceAlphaMMX; | 2857 return BlitRGBtoRGBSurfaceAlphaMMX; |
2860 #endif | 2858 #endif |
2861 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { | 2859 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { |
2862 #if SDL_ALTIVEC_BLITTERS | 2860 #if SDL_ALTIVEC_BLITTERS |
2863 if (!(surface->map->dst->flags & SDL_HWSURFACE) | 2861 if (!(surface->map->dst->flags & SDL_HWSURFACE) |
2864 && SDL_HasAltiVec ()) | 2862 && SDL_HasAltiVec()) |
2865 return BlitRGBtoRGBSurfaceAlphaAltivec; | 2863 return BlitRGBtoRGBSurfaceAlphaAltivec; |
2866 #endif | 2864 #endif |
2867 return BlitRGBtoRGBSurfaceAlpha; | 2865 return BlitRGBtoRGBSurfaceAlpha; |
2868 } | 2866 } |
2869 } | 2867 } |
2870 #if SDL_ALTIVEC_BLITTERS | 2868 #if SDL_ALTIVEC_BLITTERS |
2871 if ((sf->BytesPerPixel == 4) && | 2869 if ((sf->BytesPerPixel == 4) && |
2872 !(surface->map->dst->flags & SDL_HWSURFACE) | 2870 !(surface->map->dst->flags & SDL_HWSURFACE) |
2873 && SDL_HasAltiVec ()) | 2871 && SDL_HasAltiVec()) |
2874 return Blit32to32SurfaceAlphaAltivec; | 2872 return Blit32to32SurfaceAlphaAltivec; |
2875 else | 2873 else |
2876 #endif | 2874 #endif |
2877 return BlitNtoNSurfaceAlpha; | 2875 return BlitNtoNSurfaceAlpha; |
2878 | 2876 |
2890 case 2: | 2888 case 2: |
2891 #if SDL_ALTIVEC_BLITTERS | 2889 #if SDL_ALTIVEC_BLITTERS |
2892 if (sf->BytesPerPixel == 4 | 2890 if (sf->BytesPerPixel == 4 |
2893 && !(surface->map->dst->flags & SDL_HWSURFACE) | 2891 && !(surface->map->dst->flags & SDL_HWSURFACE) |
2894 && df->Gmask == 0x7e0 && df->Bmask == 0x1f | 2892 && df->Gmask == 0x7e0 && df->Bmask == 0x1f |
2895 && SDL_HasAltiVec ()) | 2893 && SDL_HasAltiVec()) |
2896 return Blit32to565PixelAlphaAltivec; | 2894 return Blit32to565PixelAlphaAltivec; |
2897 else | 2895 else |
2898 #endif | 2896 #endif |
2899 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 | 2897 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 |
2900 && sf->Gmask == 0xff00 | 2898 && sf->Gmask == 0xff00 |
2914 #if MMX_ASMBLIT | 2912 #if MMX_ASMBLIT |
2915 if (sf->Rshift % 8 == 0 | 2913 if (sf->Rshift % 8 == 0 |
2916 && sf->Gshift % 8 == 0 | 2914 && sf->Gshift % 8 == 0 |
2917 && sf->Bshift % 8 == 0 | 2915 && sf->Bshift % 8 == 0 |
2918 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { | 2916 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { |
2919 if (SDL_Has3DNow ()) | 2917 if (SDL_Has3DNow()) |
2920 return BlitRGBtoRGBPixelAlphaMMX3DNOW; | 2918 return BlitRGBtoRGBPixelAlphaMMX3DNOW; |
2921 if (SDL_HasMMX ()) | 2919 if (SDL_HasMMX()) |
2922 return BlitRGBtoRGBPixelAlphaMMX; | 2920 return BlitRGBtoRGBPixelAlphaMMX; |
2923 } | 2921 } |
2924 #endif | 2922 #endif |
2925 if (sf->Amask == 0xff000000) { | 2923 if (sf->Amask == 0xff000000) { |
2926 #if SDL_ALTIVEC_BLITTERS | 2924 #if SDL_ALTIVEC_BLITTERS |
2927 if (!(surface->map->dst->flags & SDL_HWSURFACE) | 2925 if (!(surface->map->dst->flags & SDL_HWSURFACE) |
2928 && SDL_HasAltiVec ()) | 2926 && SDL_HasAltiVec()) |
2929 return BlitRGBtoRGBPixelAlphaAltivec; | 2927 return BlitRGBtoRGBPixelAlphaAltivec; |
2930 #endif | 2928 #endif |
2931 return BlitRGBtoRGBPixelAlpha; | 2929 return BlitRGBtoRGBPixelAlpha; |
2932 } | 2930 } |
2933 } | 2931 } |
2934 #if SDL_ALTIVEC_BLITTERS | 2932 #if SDL_ALTIVEC_BLITTERS |
2935 if (sf->Amask && sf->BytesPerPixel == 4 && | 2933 if (sf->Amask && sf->BytesPerPixel == 4 && |
2936 !(surface->map->dst->flags & SDL_HWSURFACE) | 2934 !(surface->map->dst->flags & SDL_HWSURFACE) |
2937 && SDL_HasAltiVec ()) | 2935 && SDL_HasAltiVec()) |
2938 return Blit32to32PixelAlphaAltivec; | 2936 return Blit32to32PixelAlphaAltivec; |
2939 else | 2937 else |
2940 #endif | 2938 #endif |
2941 return BlitNtoNPixelAlpha; | 2939 return BlitNtoNPixelAlpha; |
2942 | 2940 |