comparison src/video/SDL_blit_A.c @ 1895:c121d94672cb

SDL 1.2 is moving to a branch, and SDL 1.3 is becoming the head.
author Sam Lantinga <slouken@libsdl.org>
date Mon, 10 Jul 2006 21:04:37 +0000
parents 398ac0f88e4d
children eb5aedc79992
comparison
equal deleted inserted replaced
1894:c69cee13dd76 1895:c121d94672cb
44 #endif 44 #endif
45 45
46 /* Functions to perform alpha blended blitting */ 46 /* Functions to perform alpha blended blitting */
47 47
48 /* N->1 blending with per-surface alpha */ 48 /* N->1 blending with per-surface alpha */
49 static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) 49 static void
50 { 50 BlitNto1SurfaceAlpha(SDL_BlitInfo * info)
51 int width = info->d_width; 51 {
52 int height = info->d_height; 52 int width = info->d_width;
53 Uint8 *src = info->s_pixels; 53 int height = info->d_height;
54 int srcskip = info->s_skip; 54 Uint8 *src = info->s_pixels;
55 Uint8 *dst = info->d_pixels; 55 int srcskip = info->s_skip;
56 int dstskip = info->d_skip; 56 Uint8 *dst = info->d_pixels;
57 Uint8 *palmap = info->table; 57 int dstskip = info->d_skip;
58 SDL_PixelFormat *srcfmt = info->src; 58 Uint8 *palmap = info->table;
59 SDL_PixelFormat *dstfmt = info->dst; 59 SDL_PixelFormat *srcfmt = info->src;
60 int srcbpp = srcfmt->BytesPerPixel; 60 SDL_PixelFormat *dstfmt = info->dst;
61 61 int srcbpp = srcfmt->BytesPerPixel;
62 const unsigned A = srcfmt->alpha; 62
63 63 const unsigned A = srcfmt->alpha;
64 while ( height-- ) { 64
65 while (height--) {
66 /* *INDENT-OFF* */
65 DUFFS_LOOP4( 67 DUFFS_LOOP4(
66 { 68 {
67 Uint32 Pixel; 69 Uint32 Pixel;
68 unsigned sR; 70 unsigned sR;
69 unsigned sG; 71 unsigned sG;
91 } 93 }
92 dst++; 94 dst++;
93 src += srcbpp; 95 src += srcbpp;
94 }, 96 },
95 width); 97 width);
96 src += srcskip; 98 /* *INDENT-ON* */
97 dst += dstskip; 99 src += srcskip;
98 } 100 dst += dstskip;
101 }
99 } 102 }
100 103
101 /* N->1 blending with pixel alpha */ 104 /* N->1 blending with pixel alpha */
102 static void BlitNto1PixelAlpha(SDL_BlitInfo *info) 105 static void
103 { 106 BlitNto1PixelAlpha(SDL_BlitInfo * info)
104 int width = info->d_width; 107 {
105 int height = info->d_height; 108 int width = info->d_width;
106 Uint8 *src = info->s_pixels; 109 int height = info->d_height;
107 int srcskip = info->s_skip; 110 Uint8 *src = info->s_pixels;
108 Uint8 *dst = info->d_pixels; 111 int srcskip = info->s_skip;
109 int dstskip = info->d_skip; 112 Uint8 *dst = info->d_pixels;
110 Uint8 *palmap = info->table; 113 int dstskip = info->d_skip;
111 SDL_PixelFormat *srcfmt = info->src; 114 Uint8 *palmap = info->table;
112 SDL_PixelFormat *dstfmt = info->dst; 115 SDL_PixelFormat *srcfmt = info->src;
113 int srcbpp = srcfmt->BytesPerPixel; 116 SDL_PixelFormat *dstfmt = info->dst;
114 117 int srcbpp = srcfmt->BytesPerPixel;
115 /* FIXME: fix alpha bit field expansion here too? */ 118
116 while ( height-- ) { 119 /* FIXME: fix alpha bit field expansion here too? */
120 while (height--) {
121 /* *INDENT-OFF* */
117 DUFFS_LOOP4( 122 DUFFS_LOOP4(
118 { 123 {
119 Uint32 Pixel; 124 Uint32 Pixel;
120 unsigned sR; 125 unsigned sR;
121 unsigned sG; 126 unsigned sG;
144 } 149 }
145 dst++; 150 dst++;
146 src += srcbpp; 151 src += srcbpp;
147 }, 152 },
148 width); 153 width);
149 src += srcskip; 154 /* *INDENT-ON* */
150 dst += dstskip; 155 src += srcskip;
151 } 156 dst += dstskip;
157 }
152 } 158 }
153 159
154 /* colorkeyed N->1 blending with per-surface alpha */ 160 /* colorkeyed N->1 blending with per-surface alpha */
155 static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) 161 static void
156 { 162 BlitNto1SurfaceAlphaKey(SDL_BlitInfo * info)
157 int width = info->d_width; 163 {
158 int height = info->d_height; 164 int width = info->d_width;
159 Uint8 *src = info->s_pixels; 165 int height = info->d_height;
160 int srcskip = info->s_skip; 166 Uint8 *src = info->s_pixels;
161 Uint8 *dst = info->d_pixels; 167 int srcskip = info->s_skip;
162 int dstskip = info->d_skip; 168 Uint8 *dst = info->d_pixels;
163 Uint8 *palmap = info->table; 169 int dstskip = info->d_skip;
164 SDL_PixelFormat *srcfmt = info->src; 170 Uint8 *palmap = info->table;
165 SDL_PixelFormat *dstfmt = info->dst; 171 SDL_PixelFormat *srcfmt = info->src;
166 int srcbpp = srcfmt->BytesPerPixel; 172 SDL_PixelFormat *dstfmt = info->dst;
167 Uint32 ckey = srcfmt->colorkey; 173 int srcbpp = srcfmt->BytesPerPixel;
168 174 Uint32 ckey = srcfmt->colorkey;
169 const int A = srcfmt->alpha; 175
170 176 const int A = srcfmt->alpha;
171 while ( height-- ) { 177
178 while (height--) {
179 /* *INDENT-OFF* */
172 DUFFS_LOOP( 180 DUFFS_LOOP(
173 { 181 {
174 Uint32 Pixel; 182 Uint32 Pixel;
175 unsigned sR; 183 unsigned sR;
176 unsigned sG; 184 unsigned sG;
200 } 208 }
201 dst++; 209 dst++;
202 src += srcbpp; 210 src += srcbpp;
203 }, 211 },
204 width); 212 width);
205 src += srcskip; 213 /* *INDENT-ON* */
206 dst += dstskip; 214 src += srcskip;
207 } 215 dst += dstskip;
216 }
208 } 217 }
209 218
210 #if GCC_ASMBLIT 219 #if GCC_ASMBLIT
211 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 220 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
212 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) 221 static void
213 { 222 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info)
214 int width = info->d_width; 223 {
215 int height = info->d_height; 224 int width = info->d_width;
216 Uint32 *srcp = (Uint32 *)info->s_pixels; 225 int height = info->d_height;
217 int srcskip = info->s_skip >> 2; 226 Uint32 *srcp = (Uint32 *) info->s_pixels;
218 Uint32 *dstp = (Uint32 *)info->d_pixels; 227 int srcskip = info->s_skip >> 2;
219 int dstskip = info->d_skip >> 2; 228 Uint32 *dstp = (Uint32 *) info->d_pixels;
220 Uint32 dalpha = info->dst->Amask; 229 int dstskip = info->d_skip >> 2;
221 Uint8 load[8]; 230 Uint32 dalpha = info->dst->Amask;
222 231 Uint8 load[8];
223 *(Uint64 *)load = 0x00fefefe00fefefeULL;/* alpha128 mask */ 232
224 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ 233 *(Uint64 *) load = 0x00fefefe00fefefeULL; /* alpha128 mask */
225 *(Uint64 *)load = 0x0001010100010101ULL;/* !alpha128 mask */ 234 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */
226 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ 235 *(Uint64 *) load = 0x0001010100010101ULL; /* !alpha128 mask */
227 movd_m2r(dalpha, mm7); /* dst alpha mask */ 236 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */
228 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ 237 movd_m2r(dalpha, mm7); /* dst alpha mask */
229 while(height--) { 238 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */
239 while (height--) {
240 /* *INDENT-OFF* */
230 DUFFS_LOOP_DOUBLE2( 241 DUFFS_LOOP_DOUBLE2(
231 { 242 {
232 Uint32 s = *srcp++; 243 Uint32 s = *srcp++;
233 Uint32 d = *dstp; 244 Uint32 d = *dstp;
234 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 245 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
251 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ 262 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */
252 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ 263 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */
253 dstp += 2; 264 dstp += 2;
254 srcp += 2; 265 srcp += 2;
255 }, width); 266 }, width);
256 srcp += srcskip; 267 /* *INDENT-ON* */
257 dstp += dstskip; 268 srcp += srcskip;
258 } 269 dstp += dstskip;
259 emms(); 270 }
271 emms();
260 } 272 }
261 273
262 /* fast RGB888->(A)RGB888 blending with surface alpha */ 274 /* fast RGB888->(A)RGB888 blending with surface alpha */
263 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) 275 static void
264 { 276 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info)
265 SDL_PixelFormat* df = info->dst; 277 {
266 unsigned alpha = info->src->alpha; 278 SDL_PixelFormat *df = info->dst;
267 279 unsigned alpha = info->src->alpha;
268 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 280
269 /* only call a128 version when R,G,B occupy lower bits */ 281 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
270 BlitRGBtoRGBSurfaceAlpha128MMX(info); 282 /* only call a128 version when R,G,B occupy lower bits */
271 } else { 283 BlitRGBtoRGBSurfaceAlpha128MMX(info);
272 int width = info->d_width; 284 } else {
273 int height = info->d_height; 285 int width = info->d_width;
274 Uint32 *srcp = (Uint32 *)info->s_pixels; 286 int height = info->d_height;
275 int srcskip = info->s_skip >> 2; 287 Uint32 *srcp = (Uint32 *) info->s_pixels;
276 Uint32 *dstp = (Uint32 *)info->d_pixels; 288 int srcskip = info->s_skip >> 2;
277 int dstskip = info->d_skip >> 2; 289 Uint32 *dstp = (Uint32 *) info->d_pixels;
278 290 int dstskip = info->d_skip >> 2;
279 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ 291
280 /* form the alpha mult */ 292 pxor_r2r(mm5, mm5); /* 0 -> mm5 */
281 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ 293 /* form the alpha mult */
282 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ 294 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */
283 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ 295 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */
284 alpha = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); 296 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */
285 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ 297 alpha =
286 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ 298 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->
287 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ 299 Bshift);
288 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ 300 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */
289 movd_m2r(df->Amask, mm7); /* dst alpha mask */ 301 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */
290 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ 302 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */
291 303 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */
292 while(height--) { 304 movd_m2r(df->Amask, mm7); /* dst alpha mask */
305 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */
306
307 while (height--) {
308 /* *INDENT-OFF* */
293 DUFFS_LOOP_DOUBLE2({ 309 DUFFS_LOOP_DOUBLE2({
294 /* One Pixel Blend */ 310 /* One Pixel Blend */
295 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ 311 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/
296 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ 312 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/
297 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ 313 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */
335 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ 351 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */
336 352
337 srcp += 2; 353 srcp += 2;
338 dstp += 2; 354 dstp += 2;
339 }, width); 355 }, width);
340 srcp += srcskip; 356 /* *INDENT-ON* */
341 dstp += dstskip; 357 srcp += srcskip;
342 } 358 dstp += dstskip;
343 emms(); 359 }
344 } 360 emms();
361 }
345 } 362 }
346 363
347 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 364 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
348 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) 365 static void
349 { 366 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
350 int width = info->d_width; 367 {
351 int height = info->d_height; 368 int width = info->d_width;
352 Uint32 *srcp = (Uint32 *)info->s_pixels; 369 int height = info->d_height;
353 int srcskip = info->s_skip >> 2; 370 Uint32 *srcp = (Uint32 *) info->s_pixels;
354 Uint32 *dstp = (Uint32 *)info->d_pixels; 371 int srcskip = info->s_skip >> 2;
355 int dstskip = info->d_skip >> 2; 372 Uint32 *dstp = (Uint32 *) info->d_pixels;
356 SDL_PixelFormat* sf = info->src; 373 int dstskip = info->d_skip >> 2;
357 Uint32 amask = sf->Amask; 374 SDL_PixelFormat *sf = info->src;
358 375 Uint32 amask = sf->Amask;
359 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ 376
360 /* form multiplication mask */ 377 pxor_r2r(mm6, mm6); /* 0 -> mm6 */
361 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ 378 /* form multiplication mask */
362 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ 379 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */
363 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ 380 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */
364 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ 381 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */
365 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ 382 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */
366 /* form channel masks */ 383 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */
367 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ 384 /* form channel masks */
368 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ 385 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */
369 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ 386 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */
370 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ 387 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */
371 /* get alpha channel shift */ 388 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */
372 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */ 389 /* get alpha channel shift */
373 390 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */
374 while(height--) { 391
392 while (height--) {
393 /* *INDENT-OFF* */
375 DUFFS_LOOP4({ 394 DUFFS_LOOP4({
376 Uint32 alpha = *srcp & amask; 395 Uint32 alpha = *srcp & amask;
377 /* FIXME: Here we special-case opaque alpha since the 396 /* FIXME: Here we special-case opaque alpha since the
378 compositioning used (>>8 instead of /255) doesn't handle 397 compositioning used (>>8 instead of /255) doesn't handle
379 it correctly. Also special-case alpha=0 for speed? 398 it correctly. Also special-case alpha=0 for speed?
414 movd_r2m(mm2, *dstp);/* mm2 -> dst */ 433 movd_r2m(mm2, *dstp);/* mm2 -> dst */
415 } 434 }
416 ++srcp; 435 ++srcp;
417 ++dstp; 436 ++dstp;
418 }, width); 437 }, width);
419 srcp += srcskip; 438 /* *INDENT-ON* */
420 dstp += dstskip; 439 srcp += srcskip;
421 } 440 dstp += dstskip;
422 emms(); 441 }
423 } 442 emms();
443 }
444
424 /* End GCC_ASMBLIT */ 445 /* End GCC_ASMBLIT */
425 446
426 #elif MSVC_ASMBLIT 447 #elif MSVC_ASMBLIT
427 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 448 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
428 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) 449 static void
429 { 450 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info)
430 int width = info->d_width; 451 {
431 int height = info->d_height; 452 int width = info->d_width;
432 Uint32 *srcp = (Uint32 *)info->s_pixels; 453 int height = info->d_height;
433 int srcskip = info->s_skip >> 2; 454 Uint32 *srcp = (Uint32 *) info->s_pixels;
434 Uint32 *dstp = (Uint32 *)info->d_pixels; 455 int srcskip = info->s_skip >> 2;
435 int dstskip = info->d_skip >> 2; 456 Uint32 *dstp = (Uint32 *) info->d_pixels;
436 Uint32 dalpha = info->dst->Amask; 457 int dstskip = info->d_skip >> 2;
437 458 Uint32 dalpha = info->dst->Amask;
438 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; 459
439 460 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
440 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ 461
441 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ 462 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */
442 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ 463 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */
443 464 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
444 while (height--) { 465
445 int n = width; 466 while (height--) {
446 if ( n & 1 ) { 467 int n = width;
447 Uint32 s = *srcp++; 468 if (n & 1) {
448 Uint32 d = *dstp; 469 Uint32 s = *srcp++;
449 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 470 Uint32 d = *dstp;
450 + (s & d & 0x00010101)) | dalpha; 471 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
451 n--; 472 + (s & d & 0x00010101)) | dalpha;
452 } 473 n--;
453 474 }
454 for (n >>= 1; n > 0; --n) { 475
455 dst1 = *(__m64*)dstp; /* 2 x dst -> dst1(ARGBARGB) */ 476 for (n >>= 1; n > 0; --n) {
456 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 477 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
457 478 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
458 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB) */ 479
459 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 480 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
460 481 src2 = src1; /* 2 x src -> src2(ARGBARGB) */
461 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ 482
462 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ 483 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */
463 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ 484 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */
464 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ 485 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */
465 486 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */
466 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ 487
467 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ 488 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */
468 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ 489 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */
469 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ 490 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */
470 491 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */
471 *(__m64*)dstp = dst1; /* dst1 -> 2 x dst pixels */ 492
472 dstp += 2; 493 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */
473 srcp += 2; 494 dstp += 2;
474 } 495 srcp += 2;
475 496 }
476 srcp += srcskip; 497
477 dstp += dstskip; 498 srcp += srcskip;
478 } 499 dstp += dstskip;
479 _mm_empty(); 500 }
501 _mm_empty();
480 } 502 }
481 503
482 /* fast RGB888->(A)RGB888 blending with surface alpha */ 504 /* fast RGB888->(A)RGB888 blending with surface alpha */
483 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) 505 static void
484 { 506 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info)
485 SDL_PixelFormat* df = info->dst; 507 {
486 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; 508 SDL_PixelFormat *df = info->dst;
487 unsigned alpha = info->src->alpha; 509 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask;
488 510 unsigned alpha = info->src->alpha;
489 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 511
490 /* only call a128 version when R,G,B occupy lower bits */ 512 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
491 BlitRGBtoRGBSurfaceAlpha128MMX(info); 513 /* only call a128 version when R,G,B occupy lower bits */
492 } else { 514 BlitRGBtoRGBSurfaceAlpha128MMX(info);
493 int width = info->d_width; 515 } else {
494 int height = info->d_height; 516 int width = info->d_width;
495 Uint32 *srcp = (Uint32 *)info->s_pixels; 517 int height = info->d_height;
496 int srcskip = info->s_skip >> 2; 518 Uint32 *srcp = (Uint32 *) info->s_pixels;
497 Uint32 *dstp = (Uint32 *)info->d_pixels; 519 int srcskip = info->s_skip >> 2;
498 int dstskip = info->d_skip >> 2; 520 Uint32 *dstp = (Uint32 *) info->d_pixels;
499 Uint32 dalpha = df->Amask; 521 int dstskip = info->d_skip >> 2;
500 Uint32 amult; 522 Uint32 dalpha = df->Amask;
501 523 Uint32 amult;
502 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; 524
503 525 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
504 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 526
505 /* form the alpha mult */ 527 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
506 amult = alpha | (alpha << 8); 528 /* form the alpha mult */
507 amult = amult | (amult << 16); 529 amult = alpha | (alpha << 8);
508 chanmask = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); 530 amult = amult | (amult << 16);
509 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ 531 chanmask =
510 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ 532 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->
511 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ 533 Bshift);
512 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ 534 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */
513 535 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */
514 while (height--) { 536 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */
515 int n = width; 537 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
516 if (n & 1) { 538
517 /* One Pixel Blend */ 539 while (height--) {
518 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB)*/ 540 int n = width;
519 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ 541 if (n & 1) {
520 542 /* One Pixel Blend */
521 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ 543 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */
522 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 544 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */
523 545
524 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ 546 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
525 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 547 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
526 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ 548
527 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ 549 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */
528 550 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
529 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ 551 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
530 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ 552 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */
531 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 553
532 554 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */
533 ++srcp; 555 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
534 ++dstp; 556 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
535 557
536 n--; 558 ++srcp;
537 } 559 ++dstp;
538 560
539 for (n >>= 1; n > 0; --n) { 561 n--;
540 /* Two Pixels Blend */ 562 }
541 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB)*/ 563
542 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 564 for (n >>= 1; n > 0; --n) {
543 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ 565 /* Two Pixels Blend */
544 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ 566 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
545 567 src2 = src1; /* 2 x src -> src2(ARGBARGB) */
546 dst1 = *(__m64*)dstp;/* 2 x dst -> dst1(ARGBARGB) */ 568 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */
547 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 569 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */
548 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ 570
549 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ 571 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
550 572 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
551 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ 573 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */
552 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ 574 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */
553 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ 575
554 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ 576 src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */
555 577 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */
556 src2 = _mm_sub_pi16(src2, dst2);/* src2 - dst2 -> src2 */ 578 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */
557 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 579 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */
558 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ 580
559 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ 581 src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */
560 582 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
561 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ 583 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
562 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ 584 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */
563 585
564 *(__m64*)dstp = dst1; /* dst1 -> 2 x pixel */ 586 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */
565 587 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
566 srcp += 2; 588
567 dstp += 2; 589 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */
568 } 590
569 srcp += srcskip; 591 srcp += 2;
570 dstp += dstskip; 592 dstp += 2;
571 } 593 }
572 _mm_empty(); 594 srcp += srcskip;
573 } 595 dstp += dstskip;
596 }
597 _mm_empty();
598 }
574 } 599 }
575 600
576 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 601 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
577 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) 602 static void
578 { 603 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
579 int width = info->d_width; 604 {
580 int height = info->d_height; 605 int width = info->d_width;
581 Uint32 *srcp = (Uint32 *)info->s_pixels; 606 int height = info->d_height;
582 int srcskip = info->s_skip >> 2; 607 Uint32 *srcp = (Uint32 *) info->s_pixels;
583 Uint32 *dstp = (Uint32 *)info->d_pixels; 608 int srcskip = info->s_skip >> 2;
584 int dstskip = info->d_skip >> 2; 609 Uint32 *dstp = (Uint32 *) info->d_pixels;
585 SDL_PixelFormat* sf = info->src; 610 int dstskip = info->d_skip >> 2;
586 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; 611 SDL_PixelFormat *sf = info->src;
587 Uint32 amask = sf->Amask; 612 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
588 Uint32 ashift = sf->Ashift; 613 Uint32 amask = sf->Amask;
589 Uint64 multmask; 614 Uint32 ashift = sf->Ashift;
590 615 Uint64 multmask;
591 __m64 src1, dst1, mm_alpha, mm_zero, dmask; 616
592 617 __m64 src1, dst1, mm_alpha, mm_zero, dmask;
593 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 618
594 multmask = ~(0xFFFFi64 << (ashift * 2)); 619 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
595 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ 620 /* *INDENT-OFF* */
596 621 multmask = ~(0xFFFFI64 << (ashift * 2));
597 while(height--) { 622 /* *INDENT-ON* */
623 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */
624
625 while (height--) {
626 /* *INDENT-OFF* */
598 DUFFS_LOOP4({ 627 DUFFS_LOOP4({
599 Uint32 alpha = *srcp & amask; 628 Uint32 alpha = *srcp & amask;
600 if (alpha == 0) { 629 if (alpha == 0) {
601 /* do nothing */ 630 /* do nothing */
602 } else if (alpha == amask) { 631 } else if (alpha == amask) {
625 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 654 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
626 } 655 }
627 ++srcp; 656 ++srcp;
628 ++dstp; 657 ++dstp;
629 }, width); 658 }, width);
630 srcp += srcskip; 659 /* *INDENT-ON* */
631 dstp += dstskip; 660 srcp += srcskip;
632 } 661 dstp += dstskip;
633 _mm_empty(); 662 }
634 } 663 _mm_empty();
664 }
665
635 /* End MSVC_ASMBLIT */ 666 /* End MSVC_ASMBLIT */
636 667
637 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 668 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
638 669
639 #if SDL_ALTIVEC_BLITTERS 670 #if SDL_ALTIVEC_BLITTERS
644 #include <altivec.h> 675 #include <altivec.h>
645 #endif 676 #endif
646 #include <assert.h> 677 #include <assert.h>
647 678
648 #if (defined(__MACOSX__) && (__GNUC__ < 4)) 679 #if (defined(__MACOSX__) && (__GNUC__ < 4))
649 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ 680 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
650 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) 681 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
651 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ 682 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
652 (vector unsigned short) ( a,b,c,d,e,f,g,h ) 683 (vector unsigned short) ( a,b,c,d,e,f,g,h )
653 #else 684 #else
654 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ 685 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
655 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } 686 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
656 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ 687 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
657 (vector unsigned short) { a,b,c,d,e,f,g,h } 688 (vector unsigned short) { a,b,c,d,e,f,g,h }
658 #endif 689 #endif
659 690
660 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) 691 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
661 #define VECPRINT(msg, v) do { \ 692 #define VECPRINT(msg, v) do { \
676 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) 707 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
677 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ 708 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
678 ? vec_lvsl(0, src) \ 709 ? vec_lvsl(0, src) \
679 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) 710 : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
680 711
681 712
682 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ 713 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
683 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ 714 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \
684 vector unsigned short vtemp1 = vec_mule(vs, valpha); \ 715 vector unsigned short vtemp1 = vec_mule(vs, valpha); \
685 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ 716 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \
686 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ 717 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
702 vtemp4 = vec_sr(vtemp2, v8_16); \ 733 vtemp4 = vec_sr(vtemp2, v8_16); \
703 vtemp2 = vec_add(vtemp2, vtemp4); \ 734 vtemp2 = vec_add(vtemp2, vtemp4); \
704 /* (>>8) and get ARGBARGBARGBARGB */ \ 735 /* (>>8) and get ARGBARGBARGBARGB */ \
705 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ 736 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
706 } while (0) 737 } while (0)
707 738
708 /* Calculate the permute vector used for 32->32 swizzling */ 739 /* Calculate the permute vector used for 32->32 swizzling */
709 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, 740 static vector unsigned char
710 const SDL_PixelFormat *dstfmt) 741 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
711 { 742 {
712 /* 743 /*
713 * We have to assume that the bits that aren't used by other 744 * We have to assume that the bits that aren't used by other
714 * colors is alpha, and it's one complete byte, since some formats 745 * colors is alpha, and it's one complete byte, since some formats
715 * leave alpha with a zero mask, but we should still swizzle the bits. 746 * leave alpha with a zero mask, but we should still swizzle the bits.
718 const static struct SDL_PixelFormat default_pixel_format = { 749 const static struct SDL_PixelFormat default_pixel_format = {
719 NULL, 0, 0, 750 NULL, 0, 0,
720 0, 0, 0, 0, 751 0, 0, 0, 0,
721 16, 8, 0, 24, 752 16, 8, 0, 24,
722 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, 753 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
723 0, 0}; 754 0, 0
755 };
724 if (!srcfmt) { 756 if (!srcfmt) {
725 srcfmt = &default_pixel_format; 757 srcfmt = &default_pixel_format;
726 } 758 }
727 if (!dstfmt) { 759 if (!dstfmt) {
728 dstfmt = &default_pixel_format; 760 dstfmt = &default_pixel_format;
729 } 761 }
730 const vector unsigned char plus = VECUINT8_LITERAL 762 const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
731 ( 0x00, 0x00, 0x00, 0x00, 763 0x04, 0x04, 0x04, 0x04,
732 0x04, 0x04, 0x04, 0x04, 764 0x08, 0x08, 0x08, 0x08,
733 0x08, 0x08, 0x08, 0x08, 765 0x0C, 0x0C, 0x0C,
734 0x0C, 0x0C, 0x0C, 0x0C ); 766 0x0C);
735 vector unsigned char vswiz; 767 vector unsigned char vswiz;
736 vector unsigned int srcvec; 768 vector unsigned int srcvec;
737 #define RESHIFT(X) (3 - ((X) >> 3)) 769 #define RESHIFT(X) (3 - ((X) >> 3))
738 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); 770 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
739 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); 771 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
740 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); 772 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
741 Uint32 amask; 773 Uint32 amask;
742 /* Use zero for alpha if either surface doesn't have alpha */ 774 /* Use zero for alpha if either surface doesn't have alpha */
743 if (dstfmt->Amask) { 775 if (dstfmt->Amask) {
744 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); 776 amask =
777 ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->
778 Ashift);
745 } else { 779 } else {
746 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); 780 amask =
747 } 781 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
748 #undef RESHIFT 782 0xFFFFFFFF);
749 ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); 783 }
750 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); 784 #undef RESHIFT
751 return(vswiz); 785 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
752 } 786 vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
753 787 return (vswiz);
754 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) 788 }
789
790 static void
791 Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info)
755 { 792 {
756 int height = info->d_height; 793 int height = info->d_height;
757 Uint8 *src = (Uint8 *)info->s_pixels; 794 Uint8 *src = (Uint8 *) info->s_pixels;
758 int srcskip = info->s_skip; 795 int srcskip = info->s_skip;
759 Uint8 *dst = (Uint8 *)info->d_pixels; 796 Uint8 *dst = (Uint8 *) info->d_pixels;
760 int dstskip = info->d_skip; 797 int dstskip = info->d_skip;
761 SDL_PixelFormat *srcfmt = info->src; 798 SDL_PixelFormat *srcfmt = info->src;
762 799
763 vector unsigned char v0 = vec_splat_u8(0); 800 vector unsigned char v0 = vec_splat_u8(0);
764 vector unsigned short v8_16 = vec_splat_u16(8); 801 vector unsigned short v8_16 = vec_splat_u16(8);
765 vector unsigned short v1_16 = vec_splat_u16(1); 802 vector unsigned short v1_16 = vec_splat_u16(1);
766 vector unsigned short v2_16 = vec_splat_u16(2); 803 vector unsigned short v2_16 = vec_splat_u16(2);
767 vector unsigned short v3_16 = vec_splat_u16(3); 804 vector unsigned short v3_16 = vec_splat_u16(3);
768 vector unsigned int v8_32 = vec_splat_u32(8); 805 vector unsigned int v8_32 = vec_splat_u32(8);
769 vector unsigned int v16_32 = vec_add(v8_32, v8_32); 806 vector unsigned int v16_32 = vec_add(v8_32, v8_32);
770 vector unsigned short v3f = VECUINT16_LITERAL( 807 vector unsigned short v3f =
771 0x003f, 0x003f, 0x003f, 0x003f, 808 VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
772 0x003f, 0x003f, 0x003f, 0x003f); 809 0x003f, 0x003f, 0x003f, 0x003f);
773 vector unsigned short vfc = VECUINT16_LITERAL( 810 vector unsigned short vfc =
774 0x00fc, 0x00fc, 0x00fc, 0x00fc, 811 VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
775 0x00fc, 0x00fc, 0x00fc, 0x00fc); 812 0x00fc, 0x00fc, 0x00fc, 0x00fc);
776 813
777 /* 814 /*
778 0x10 - 0x1f is the alpha 815 0x10 - 0x1f is the alpha
779 0x00 - 0x0e evens are the red 816 0x00 - 0x0e evens are the red
780 0x01 - 0x0f odds are zero 817 0x01 - 0x0f odds are zero
781 */ 818 */
782 vector unsigned char vredalpha1 = VECUINT8_LITERAL( 819 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
783 0x10, 0x00, 0x01, 0x01, 820 0x10, 0x02, 0x01, 0x01,
784 0x10, 0x02, 0x01, 0x01, 821 0x10, 0x04, 0x01, 0x01,
785 0x10, 0x04, 0x01, 0x01, 822 0x10, 0x06, 0x01,
786 0x10, 0x06, 0x01, 0x01 823 0x01);
787 ); 824 vector unsigned char vredalpha2 =
788 vector unsigned char vredalpha2 = (vector unsigned char)( 825 (vector unsigned char) (vec_add((vector unsigned int) vredalpha1,
789 vec_add((vector unsigned int)vredalpha1, vec_sl(v8_32, v16_32)) 826 vec_sl(v8_32, v16_32))
790 ); 827 );
791 /* 828 /*
792 0x00 - 0x0f is ARxx ARxx ARxx ARxx 829 0x00 - 0x0f is ARxx ARxx ARxx ARxx
793 0x11 - 0x0f odds are blue 830 0x11 - 0x0f odds are blue
794 */ 831 */
795 vector unsigned char vblue1 = VECUINT8_LITERAL( 832 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
796 0x00, 0x01, 0x02, 0x11, 833 0x04, 0x05, 0x06, 0x13,
797 0x04, 0x05, 0x06, 0x13, 834 0x08, 0x09, 0x0a, 0x15,
798 0x08, 0x09, 0x0a, 0x15, 835 0x0c, 0x0d, 0x0e, 0x17);
799 0x0c, 0x0d, 0x0e, 0x17 836 vector unsigned char vblue2 =
800 ); 837 (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32)
801 vector unsigned char vblue2 = (vector unsigned char)( 838 );
802 vec_add((vector unsigned int)vblue1, v8_32)
803 );
804 /* 839 /*
805 0x00 - 0x0f is ARxB ARxB ARxB ARxB 840 0x00 - 0x0f is ARxB ARxB ARxB ARxB
806 0x10 - 0x0e evens are green 841 0x10 - 0x0e evens are green
807 */ 842 */
808 vector unsigned char vgreen1 = VECUINT8_LITERAL( 843 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
809 0x00, 0x01, 0x10, 0x03, 844 0x04, 0x05, 0x12, 0x07,
810 0x04, 0x05, 0x12, 0x07, 845 0x08, 0x09, 0x14, 0x0b,
811 0x08, 0x09, 0x14, 0x0b, 846 0x0c, 0x0d, 0x16, 0x0f);
812 0x0c, 0x0d, 0x16, 0x0f 847 vector unsigned char vgreen2 =
813 ); 848 (vector unsigned
814 vector unsigned char vgreen2 = (vector unsigned char)( 849 char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32))
815 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) 850 );
816 ); 851 vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
817 vector unsigned char vgmerge = VECUINT8_LITERAL( 852 0x00, 0x0a, 0x00, 0x0e,
818 0x00, 0x02, 0x00, 0x06, 853 0x00, 0x12, 0x00, 0x16,
819 0x00, 0x0a, 0x00, 0x0e, 854 0x00, 0x1a, 0x00, 0x1e);
820 0x00, 0x12, 0x00, 0x16,
821 0x00, 0x1a, 0x00, 0x1e);
822 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); 855 vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
823 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); 856 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
824 vector unsigned char valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 857 vector unsigned char valphaPermute =
825 858 vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
826 vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); 859
860 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
827 vf800 = vec_sl(vf800, vec_splat_u16(8)); 861 vf800 = vec_sl(vf800, vec_splat_u16(8));
828 862
829 while(height--) { 863 while (height--) {
830 int extrawidth; 864 int extrawidth;
831 vector unsigned char valigner; 865 vector unsigned char valigner;
832 vector unsigned char vsrc; 866 vector unsigned char vsrc;
833 vector unsigned char voverflow; 867 vector unsigned char voverflow;
834 int width = info->d_width; 868 int width = info->d_width;
853 widthvar--; \ 887 widthvar--; \
854 } 888 }
855 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); 889 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width);
856 extrawidth = (width % 8); 890 extrawidth = (width % 8);
857 valigner = VEC_ALIGNER(src); 891 valigner = VEC_ALIGNER(src);
858 vsrc = (vector unsigned char)vec_ld(0, src); 892 vsrc = (vector unsigned char) vec_ld(0, src);
859 width -= extrawidth; 893 width -= extrawidth;
860 while (width) { 894 while (width) {
861 vector unsigned char valpha; 895 vector unsigned char valpha;
862 vector unsigned char vsrc1, vsrc2; 896 vector unsigned char vsrc1, vsrc2;
863 vector unsigned char vdst1, vdst2; 897 vector unsigned char vdst1, vdst2;
864 vector unsigned short vR, vG, vB; 898 vector unsigned short vR, vG, vB;
865 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; 899 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
866 900
867 /* Load 8 pixels from src as ARGB */ 901 /* Load 8 pixels from src as ARGB */
868 voverflow = (vector unsigned char)vec_ld(15, src); 902 voverflow = (vector unsigned char) vec_ld(15, src);
869 vsrc = vec_perm(vsrc, voverflow, valigner); 903 vsrc = vec_perm(vsrc, voverflow, valigner);
870 vsrc1 = vec_perm(vsrc, vsrc, vpermute); 904 vsrc1 = vec_perm(vsrc, vsrc, vpermute);
871 src += 16; 905 src += 16;
872 vsrc = (vector unsigned char)vec_ld(15, src); 906 vsrc = (vector unsigned char) vec_ld(15, src);
873 voverflow = vec_perm(voverflow, vsrc, valigner); 907 voverflow = vec_perm(voverflow, vsrc, valigner);
874 vsrc2 = vec_perm(voverflow, voverflow, vpermute); 908 vsrc2 = vec_perm(voverflow, voverflow, vpermute);
875 src += 16; 909 src += 16;
876 910
877 /* Load 8 pixels from dst as XRGB */ 911 /* Load 8 pixels from dst as XRGB */
878 voverflow = vec_ld(0, dst); 912 voverflow = vec_ld(0, dst);
879 vR = vec_and((vector unsigned short)voverflow, vf800); 913 vR = vec_and((vector unsigned short) voverflow, vf800);
880 vB = vec_sl((vector unsigned short)voverflow, v3_16); 914 vB = vec_sl((vector unsigned short) voverflow, v3_16);
881 vG = vec_sl(vB, v2_16); 915 vG = vec_sl(vB, v2_16);
882 vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha1); 916 vdst1 =
883 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); 917 (vector unsigned char) vec_perm((vector unsigned char) vR,
884 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); 918 (vector unsigned char) vR,
885 vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha2); 919 vredalpha1);
886 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); 920 vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
887 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); 921 vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
922 vdst2 =
923 (vector unsigned char) vec_perm((vector unsigned char) vR,
924 (vector unsigned char) vR,
925 vredalpha2);
926 vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
927 vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
888 928
889 /* Alpha blend 8 pixels as ARGB */ 929 /* Alpha blend 8 pixels as ARGB */
890 valpha = vec_perm(vsrc1, v0, valphaPermute); 930 valpha = vec_perm(vsrc1, v0, valphaPermute);
891 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, v8_16); 931 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16,
932 v8_16);
892 valpha = vec_perm(vsrc2, v0, valphaPermute); 933 valpha = vec_perm(vsrc2, v0, valphaPermute);
893 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, v8_16); 934 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16,
935 v8_16);
894 936
895 /* Convert 8 pixels to 565 */ 937 /* Convert 8 pixels to 565 */
896 vpixel = (vector unsigned short)vec_packpx((vector unsigned int)vdst1, (vector unsigned int)vdst2); 938 vpixel = (vector unsigned short) vec_packpx((vector unsigned int)
897 vgpixel = (vector unsigned short)vec_perm(vdst1, vdst2, vgmerge); 939 vdst1,
940 (vector unsigned int)
941 vdst2);
942 vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge);
898 vgpixel = vec_and(vgpixel, vfc); 943 vgpixel = vec_and(vgpixel, vfc);
899 vgpixel = vec_sl(vgpixel, v3_16); 944 vgpixel = vec_sl(vgpixel, v3_16);
900 vrpixel = vec_sl(vpixel, v1_16); 945 vrpixel = vec_sl(vpixel, v1_16);
901 vrpixel = vec_and(vrpixel, vf800); 946 vrpixel = vec_and(vrpixel, vf800);
902 vbpixel = vec_and(vpixel, v3f); 947 vbpixel = vec_and(vpixel, v3f);
903 vdst1 = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); 948 vdst1 =
904 vdst1 = vec_or(vdst1, (vector unsigned char)vbpixel); 949 vec_or((vector unsigned char) vrpixel,
905 950 (vector unsigned char) vgpixel);
951 vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel);
952
906 /* Store 8 pixels */ 953 /* Store 8 pixels */
907 vec_st(vdst1, 0, dst); 954 vec_st(vdst1, 0, dst);
908 955
909 width -= 8; 956 width -= 8;
910 dst += 16; 957 dst += 16;
914 src += srcskip; 961 src += srcskip;
915 dst += dstskip; 962 dst += dstskip;
916 } 963 }
917 } 964 }
918 965
919 static void Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo *info) 966 static void
967 Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info)
920 { 968 {
921 unsigned alpha = info->src->alpha; 969 unsigned alpha = info->src->alpha;
922 int height = info->d_height; 970 int height = info->d_height;
923 Uint32 *srcp = (Uint32 *)info->s_pixels; 971 Uint32 *srcp = (Uint32 *) info->s_pixels;
924 int srcskip = info->s_skip >> 2; 972 int srcskip = info->s_skip >> 2;
925 Uint32 *dstp = (Uint32 *)info->d_pixels; 973 Uint32 *dstp = (Uint32 *) info->d_pixels;
926 int dstskip = info->d_skip >> 2; 974 int dstskip = info->d_skip >> 2;
927 SDL_PixelFormat *srcfmt = info->src; 975 SDL_PixelFormat *srcfmt = info->src;
928 SDL_PixelFormat *dstfmt = info->dst; 976 SDL_PixelFormat *dstfmt = info->dst;
929 unsigned sA = srcfmt->alpha; 977 unsigned sA = srcfmt->alpha;
930 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 978 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
954 vsrcPermute = calc_swizzle32(srcfmt, NULL); 1002 vsrcPermute = calc_swizzle32(srcfmt, NULL);
955 vdstPermute = calc_swizzle32(NULL, dstfmt); 1003 vdstPermute = calc_swizzle32(NULL, dstfmt);
956 vsdstPermute = calc_swizzle32(dstfmt, NULL); 1004 vsdstPermute = calc_swizzle32(dstfmt, NULL);
957 1005
958 /* set a vector full of alpha and 255-alpha */ 1006 /* set a vector full of alpha and 255-alpha */
959 ((unsigned char *)&valpha)[0] = alpha; 1007 ((unsigned char *) &valpha)[0] = alpha;
960 valpha = vec_splat(valpha, 0); 1008 valpha = vec_splat(valpha, 0);
961 vbits = (vector unsigned char)vec_splat_s8(-1); 1009 vbits = (vector unsigned char) vec_splat_s8(-1);
962 1010
963 ckey &= rgbmask; 1011 ckey &= rgbmask;
964 ((unsigned int *)(char*)&vckey)[0] = ckey; 1012 ((unsigned int *) (char *) &vckey)[0] = ckey;
965 vckey = vec_splat(vckey, 0); 1013 vckey = vec_splat(vckey, 0);
966 ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; 1014 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
967 vrgbmask = vec_splat(vrgbmask, 0); 1015 vrgbmask = vec_splat(vrgbmask, 0);
968 1016
969 while(height--) { 1017 while (height--) {
970 int width = info->d_width; 1018 int width = info->d_width;
971 #define ONE_PIXEL_BLEND(condition, widthvar) \ 1019 #define ONE_PIXEL_BLEND(condition, widthvar) \
972 while (condition) { \ 1020 while (condition) { \
973 Uint32 Pixel; \ 1021 Uint32 Pixel; \
974 unsigned sR, sG, sB, dR, dG, dB; \ 1022 unsigned sR, sG, sB, dR, dG, dB; \
985 } 1033 }
986 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1034 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
987 if (width > 0) { 1035 if (width > 0) {
988 int extrawidth = (width % 4); 1036 int extrawidth = (width % 4);
989 vector unsigned char valigner = VEC_ALIGNER(srcp); 1037 vector unsigned char valigner = VEC_ALIGNER(srcp);
990 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1038 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
991 width -= extrawidth; 1039 width -= extrawidth;
992 while (width) { 1040 while (width) {
993 vector unsigned char vsel; 1041 vector unsigned char vsel;
994 vector unsigned char voverflow; 1042 vector unsigned char voverflow;
995 vector unsigned char vd; 1043 vector unsigned char vd;
996 vector unsigned char vd_orig; 1044 vector unsigned char vd_orig;
997 1045
998 /* s = *srcp */ 1046 /* s = *srcp */
999 voverflow = (vector unsigned char)vec_ld(15, srcp); 1047 voverflow = (vector unsigned char) vec_ld(15, srcp);
1000 vs = vec_perm(vs, voverflow, valigner); 1048 vs = vec_perm(vs, voverflow, valigner);
1001 1049
1002 /* vsel is set for items that match the key */ 1050 /* vsel is set for items that match the key */
1003 vsel = (vector unsigned char)vec_and((vector unsigned int)vs, vrgbmask); 1051 vsel =
1004 vsel = (vector unsigned char)vec_cmpeq((vector unsigned int)vsel, vckey); 1052 (vector unsigned char) vec_and((vector unsigned int) vs,
1053 vrgbmask);
1054 vsel = (vector unsigned char) vec_cmpeq((vector unsigned int)
1055 vsel, vckey);
1005 1056
1006 /* permute to source format */ 1057 /* permute to source format */
1007 vs = vec_perm(vs, valpha, vsrcPermute); 1058 vs = vec_perm(vs, valpha, vsrcPermute);
1008 1059
1009 /* d = *dstp */ 1060 /* d = *dstp */
1010 vd = (vector unsigned char)vec_ld(0, dstp); 1061 vd = (vector unsigned char) vec_ld(0, dstp);
1011 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); 1062 vd_orig = vd = vec_perm(vd, v0, vsdstPermute);
1012 1063
1013 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1064 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1014 1065
1015 /* set the alpha channel to full on */ 1066 /* set the alpha channel to full on */
1016 vd = vec_or(vd, valphamask); 1067 vd = vec_or(vd, valphamask);
1017 1068
1018 /* mask out color key */ 1069 /* mask out color key */
1019 vd = vec_sel(vd, vd_orig, vsel); 1070 vd = vec_sel(vd, vd_orig, vsel);
1020 1071
1021 /* permute to dest format */ 1072 /* permute to dest format */
1022 vd = vec_perm(vd, vbits, vdstPermute); 1073 vd = vec_perm(vd, vbits, vdstPermute);
1023 1074
1024 /* *dstp = res */ 1075 /* *dstp = res */
1025 vec_st((vector unsigned int)vd, 0, dstp); 1076 vec_st((vector unsigned int) vd, 0, dstp);
1026 1077
1027 srcp += 4; 1078 srcp += 4;
1028 dstp += 4; 1079 dstp += 4;
1029 width -= 4; 1080 width -= 4;
1030 vs = voverflow; 1081 vs = voverflow;
1031 } 1082 }
1032 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1083 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1033 } 1084 }
1034 #undef ONE_PIXEL_BLEND 1085 #undef ONE_PIXEL_BLEND
1035 1086
1036 srcp += srcskip; 1087 srcp += srcskip;
1037 dstp += dstskip; 1088 dstp += dstskip;
1038 } 1089 }
1039 } 1090 }
1040 1091
1041 1092
1042 static void Blit32to32PixelAlphaAltivec(SDL_BlitInfo *info) 1093 static void
1094 Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info)
1043 { 1095 {
1044 int width = info->d_width; 1096 int width = info->d_width;
1045 int height = info->d_height; 1097 int height = info->d_height;
1046 Uint32 *srcp = (Uint32 *)info->s_pixels; 1098 Uint32 *srcp = (Uint32 *) info->s_pixels;
1047 int srcskip = info->s_skip >> 2; 1099 int srcskip = info->s_skip >> 2;
1048 Uint32 *dstp = (Uint32 *)info->d_pixels; 1100 Uint32 *dstp = (Uint32 *) info->d_pixels;
1049 int dstskip = info->d_skip >> 2; 1101 int dstskip = info->d_skip >> 2;
1050 SDL_PixelFormat *srcfmt = info->src; 1102 SDL_PixelFormat *srcfmt = info->src;
1051 SDL_PixelFormat *dstfmt = info->dst; 1103 SDL_PixelFormat *dstfmt = info->dst;
1052 vector unsigned char mergePermute; 1104 vector unsigned char mergePermute;
1053 vector unsigned char valphaPermute; 1105 vector unsigned char valphaPermute;
1063 v0 = vec_splat_u8(0); 1115 v0 = vec_splat_u8(0);
1064 v1 = vec_splat_u16(1); 1116 v1 = vec_splat_u16(1);
1065 v8 = vec_splat_u16(8); 1117 v8 = vec_splat_u16(8);
1066 mergePermute = VEC_MERGE_PERMUTE(); 1118 mergePermute = VEC_MERGE_PERMUTE();
1067 valphamask = VEC_ALPHA_MASK(); 1119 valphamask = VEC_ALPHA_MASK();
1068 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 1120 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
1069 vpixelmask = vec_nor(valphamask, v0); 1121 vpixelmask = vec_nor(valphamask, v0);
1070 vsrcPermute = calc_swizzle32(srcfmt, NULL); 1122 vsrcPermute = calc_swizzle32(srcfmt, NULL);
1071 vdstPermute = calc_swizzle32(NULL, dstfmt); 1123 vdstPermute = calc_swizzle32(NULL, dstfmt);
1072 vsdstPermute = calc_swizzle32(dstfmt, NULL); 1124 vsdstPermute = calc_swizzle32(dstfmt, NULL);
1073 1125
1074 while ( height-- ) { 1126 while (height--) {
1075 width = info->d_width; 1127 width = info->d_width;
1076 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1128 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1077 Uint32 Pixel; \ 1129 Uint32 Pixel; \
1078 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ 1130 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
1079 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ 1131 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
1090 if (width > 0) { 1142 if (width > 0) {
1091 /* vsrcPermute */ 1143 /* vsrcPermute */
1092 /* vdstPermute */ 1144 /* vdstPermute */
1093 int extrawidth = (width % 4); 1145 int extrawidth = (width % 4);
1094 vector unsigned char valigner = VEC_ALIGNER(srcp); 1146 vector unsigned char valigner = VEC_ALIGNER(srcp);
1095 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1147 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1096 width -= extrawidth; 1148 width -= extrawidth;
1097 while (width) { 1149 while (width) {
1098 vector unsigned char voverflow; 1150 vector unsigned char voverflow;
1099 vector unsigned char vd; 1151 vector unsigned char vd;
1100 vector unsigned char valpha; 1152 vector unsigned char valpha;
1101 vector unsigned char vdstalpha; 1153 vector unsigned char vdstalpha;
1102 /* s = *srcp */ 1154 /* s = *srcp */
1103 voverflow = (vector unsigned char)vec_ld(15, srcp); 1155 voverflow = (vector unsigned char) vec_ld(15, srcp);
1104 vs = vec_perm(vs, voverflow, valigner); 1156 vs = vec_perm(vs, voverflow, valigner);
1105 vs = vec_perm(vs, v0, vsrcPermute); 1157 vs = vec_perm(vs, v0, vsrcPermute);
1106 1158
1107 valpha = vec_perm(vs, v0, valphaPermute); 1159 valpha = vec_perm(vs, v0, valphaPermute);
1108 1160
1109 /* d = *dstp */ 1161 /* d = *dstp */
1110 vd = (vector unsigned char)vec_ld(0, dstp); 1162 vd = (vector unsigned char) vec_ld(0, dstp);
1111 vd = vec_perm(vd, v0, vsdstPermute); 1163 vd = vec_perm(vd, v0, vsdstPermute);
1112 vdstalpha = vec_and(vd, valphamask); 1164 vdstalpha = vec_and(vd, valphamask);
1113 1165
1114 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1166 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1115 1167
1117 vd = vec_and(vd, vpixelmask); 1169 vd = vec_and(vd, vpixelmask);
1118 vd = vec_or(vd, vdstalpha); 1170 vd = vec_or(vd, vdstalpha);
1119 vd = vec_perm(vd, v0, vdstPermute); 1171 vd = vec_perm(vd, v0, vdstPermute);
1120 1172
1121 /* *dstp = res */ 1173 /* *dstp = res */
1122 vec_st((vector unsigned int)vd, 0, dstp); 1174 vec_st((vector unsigned int) vd, 0, dstp);
1123 1175
1124 srcp += 4; 1176 srcp += 4;
1125 dstp += 4; 1177 dstp += 4;
1126 width -= 4; 1178 width -= 4;
1127 vs = voverflow; 1179 vs = voverflow;
1128 1180
1129 } 1181 }
1130 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1182 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1131 } 1183 }
1132 srcp += srcskip; 1184 srcp += srcskip;
1133 dstp += dstskip; 1185 dstp += dstskip;
1134 #undef ONE_PIXEL_BLEND 1186 #undef ONE_PIXEL_BLEND
1135 } 1187 }
1136 } 1188 }
1137 1189
1138 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 1190 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
1139 static void BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo *info) 1191 static void
1140 { 1192 BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info)
1141 int width = info->d_width; 1193 {
1142 int height = info->d_height; 1194 int width = info->d_width;
1143 Uint32 *srcp = (Uint32 *)info->s_pixels; 1195 int height = info->d_height;
1144 int srcskip = info->s_skip >> 2; 1196 Uint32 *srcp = (Uint32 *) info->s_pixels;
1145 Uint32 *dstp = (Uint32 *)info->d_pixels; 1197 int srcskip = info->s_skip >> 2;
1146 int dstskip = info->d_skip >> 2; 1198 Uint32 *dstp = (Uint32 *) info->d_pixels;
1199 int dstskip = info->d_skip >> 2;
1147 vector unsigned char mergePermute; 1200 vector unsigned char mergePermute;
1148 vector unsigned char valphaPermute; 1201 vector unsigned char valphaPermute;
1149 vector unsigned char valphamask; 1202 vector unsigned char valphamask;
1150 vector unsigned char vpixelmask; 1203 vector unsigned char vpixelmask;
1151 vector unsigned char v0; 1204 vector unsigned char v0;
1154 v0 = vec_splat_u8(0); 1207 v0 = vec_splat_u8(0);
1155 v1 = vec_splat_u16(1); 1208 v1 = vec_splat_u16(1);
1156 v8 = vec_splat_u16(8); 1209 v8 = vec_splat_u16(8);
1157 mergePermute = VEC_MERGE_PERMUTE(); 1210 mergePermute = VEC_MERGE_PERMUTE();
1158 valphamask = VEC_ALPHA_MASK(); 1211 valphamask = VEC_ALPHA_MASK();
1159 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 1212 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
1160 1213
1161 1214
1162 vpixelmask = vec_nor(valphamask, v0); 1215 vpixelmask = vec_nor(valphamask, v0);
1163 while(height--) { 1216 while (height--) {
1164 width = info->d_width; 1217 width = info->d_width;
1165 #define ONE_PIXEL_BLEND(condition, widthvar) \ 1218 #define ONE_PIXEL_BLEND(condition, widthvar) \
1166 while ((condition)) { \ 1219 while ((condition)) { \
1167 Uint32 dalpha; \ 1220 Uint32 dalpha; \
1168 Uint32 d; \ 1221 Uint32 d; \
1191 } 1244 }
1192 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1245 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
1193 if (width > 0) { 1246 if (width > 0) {
1194 int extrawidth = (width % 4); 1247 int extrawidth = (width % 4);
1195 vector unsigned char valigner = VEC_ALIGNER(srcp); 1248 vector unsigned char valigner = VEC_ALIGNER(srcp);
1196 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1249 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1197 width -= extrawidth; 1250 width -= extrawidth;
1198 while (width) { 1251 while (width) {
1199 vector unsigned char voverflow; 1252 vector unsigned char voverflow;
1200 vector unsigned char vd; 1253 vector unsigned char vd;
1201 vector unsigned char valpha; 1254 vector unsigned char valpha;
1202 vector unsigned char vdstalpha; 1255 vector unsigned char vdstalpha;
1203 /* s = *srcp */ 1256 /* s = *srcp */
1204 voverflow = (vector unsigned char)vec_ld(15, srcp); 1257 voverflow = (vector unsigned char) vec_ld(15, srcp);
1205 vs = vec_perm(vs, voverflow, valigner); 1258 vs = vec_perm(vs, voverflow, valigner);
1206 1259
1207 valpha = vec_perm(vs, v0, valphaPermute); 1260 valpha = vec_perm(vs, v0, valphaPermute);
1208 1261
1209 /* d = *dstp */ 1262 /* d = *dstp */
1210 vd = (vector unsigned char)vec_ld(0, dstp); 1263 vd = (vector unsigned char) vec_ld(0, dstp);
1211 vdstalpha = vec_and(vd, valphamask); 1264 vdstalpha = vec_and(vd, valphamask);
1212 1265
1213 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1266 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1214 1267
1215 /* set the alpha to the dest alpha */ 1268 /* set the alpha to the dest alpha */
1216 vd = vec_and(vd, vpixelmask); 1269 vd = vec_and(vd, vpixelmask);
1217 vd = vec_or(vd, vdstalpha); 1270 vd = vec_or(vd, vdstalpha);
1218 1271
1219 /* *dstp = res */ 1272 /* *dstp = res */
1220 vec_st((vector unsigned int)vd, 0, dstp); 1273 vec_st((vector unsigned int) vd, 0, dstp);
1221 1274
1222 srcp += 4; 1275 srcp += 4;
1223 dstp += 4; 1276 dstp += 4;
1224 width -= 4; 1277 width -= 4;
1225 vs = voverflow; 1278 vs = voverflow;
1226 } 1279 }
1227 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1280 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1228 } 1281 }
1229 srcp += srcskip; 1282 srcp += srcskip;
1230 dstp += dstskip; 1283 dstp += dstskip;
1231 } 1284 }
1232 #undef ONE_PIXEL_BLEND 1285 #undef ONE_PIXEL_BLEND
1233 } 1286 }
1234 1287
1235 static void Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo *info) 1288 static void
1289 Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info)
1236 { 1290 {
1237 /* XXX : 6 */ 1291 /* XXX : 6 */
1238 unsigned alpha = info->src->alpha; 1292 unsigned alpha = info->src->alpha;
1239 int height = info->d_height; 1293 int height = info->d_height;
1240 Uint32 *srcp = (Uint32 *)info->s_pixels; 1294 Uint32 *srcp = (Uint32 *) info->s_pixels;
1241 int srcskip = info->s_skip >> 2; 1295 int srcskip = info->s_skip >> 2;
1242 Uint32 *dstp = (Uint32 *)info->d_pixels; 1296 Uint32 *dstp = (Uint32 *) info->d_pixels;
1243 int dstskip = info->d_skip >> 2; 1297 int dstskip = info->d_skip >> 2;
1244 SDL_PixelFormat *srcfmt = info->src; 1298 SDL_PixelFormat *srcfmt = info->src;
1245 SDL_PixelFormat *dstfmt = info->dst; 1299 SDL_PixelFormat *dstfmt = info->dst;
1246 unsigned sA = srcfmt->alpha; 1300 unsigned sA = srcfmt->alpha;
1247 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 1301 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
1248 vector unsigned char mergePermute; 1302 vector unsigned char mergePermute;
1249 vector unsigned char vsrcPermute; 1303 vector unsigned char vsrcPermute;
1250 vector unsigned char vdstPermute; 1304 vector unsigned char vdstPermute;
1251 vector unsigned char vsdstPermute; 1305 vector unsigned char vsdstPermute;
1252 vector unsigned char valpha; 1306 vector unsigned char valpha;
1265 vsrcPermute = calc_swizzle32(srcfmt, NULL); 1319 vsrcPermute = calc_swizzle32(srcfmt, NULL);
1266 vdstPermute = calc_swizzle32(NULL, dstfmt); 1320 vdstPermute = calc_swizzle32(NULL, dstfmt);
1267 vsdstPermute = calc_swizzle32(dstfmt, NULL); 1321 vsdstPermute = calc_swizzle32(dstfmt, NULL);
1268 1322
1269 /* set a vector full of alpha and 255-alpha */ 1323 /* set a vector full of alpha and 255-alpha */
1270 ((unsigned char *)&valpha)[0] = alpha; 1324 ((unsigned char *) &valpha)[0] = alpha;
1271 valpha = vec_splat(valpha, 0); 1325 valpha = vec_splat(valpha, 0);
1272 vbits = (vector unsigned char)vec_splat_s8(-1); 1326 vbits = (vector unsigned char) vec_splat_s8(-1);
1273 1327
1274 while(height--) { 1328 while (height--) {
1275 int width = info->d_width; 1329 int width = info->d_width;
1276 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1330 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1277 Uint32 Pixel; \ 1331 Uint32 Pixel; \
1278 unsigned sR, sG, sB, dR, dG, dB; \ 1332 unsigned sR, sG, sB, dR, dG, dB; \
1279 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ 1333 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
1286 } 1340 }
1287 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1341 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
1288 if (width > 0) { 1342 if (width > 0) {
1289 int extrawidth = (width % 4); 1343 int extrawidth = (width % 4);
1290 vector unsigned char valigner = vec_lvsl(0, srcp); 1344 vector unsigned char valigner = vec_lvsl(0, srcp);
1291 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1345 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1292 width -= extrawidth; 1346 width -= extrawidth;
1293 while (width) { 1347 while (width) {
1294 vector unsigned char voverflow; 1348 vector unsigned char voverflow;
1295 vector unsigned char vd; 1349 vector unsigned char vd;
1296 1350
1297 /* s = *srcp */ 1351 /* s = *srcp */
1298 voverflow = (vector unsigned char)vec_ld(15, srcp); 1352 voverflow = (vector unsigned char) vec_ld(15, srcp);
1299 vs = vec_perm(vs, voverflow, valigner); 1353 vs = vec_perm(vs, voverflow, valigner);
1300 vs = vec_perm(vs, valpha, vsrcPermute); 1354 vs = vec_perm(vs, valpha, vsrcPermute);
1301 1355
1302 /* d = *dstp */ 1356 /* d = *dstp */
1303 vd = (vector unsigned char)vec_ld(0, dstp); 1357 vd = (vector unsigned char) vec_ld(0, dstp);
1304 vd = vec_perm(vd, vd, vsdstPermute); 1358 vd = vec_perm(vd, vd, vsdstPermute);
1305 1359
1306 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1360 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1307 1361
1308 /* set the alpha channel to full on */ 1362 /* set the alpha channel to full on */
1309 vd = vec_or(vd, valphamask); 1363 vd = vec_or(vd, valphamask);
1310 vd = vec_perm(vd, vbits, vdstPermute); 1364 vd = vec_perm(vd, vbits, vdstPermute);
1311 1365
1312 /* *dstp = res */ 1366 /* *dstp = res */
1313 vec_st((vector unsigned int)vd, 0, dstp); 1367 vec_st((vector unsigned int) vd, 0, dstp);
1314 1368
1315 srcp += 4; 1369 srcp += 4;
1316 dstp += 4; 1370 dstp += 4;
1317 width -= 4; 1371 width -= 4;
1318 vs = voverflow; 1372 vs = voverflow;
1319 } 1373 }
1320 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1374 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1321 } 1375 }
1322 #undef ONE_PIXEL_BLEND 1376 #undef ONE_PIXEL_BLEND
1323 1377
1324 srcp += srcskip; 1378 srcp += srcskip;
1325 dstp += dstskip; 1379 dstp += dstskip;
1326 } 1380 }
1327 1381
1328 } 1382 }
1329 1383
1330 1384
1331 /* fast RGB888->(A)RGB888 blending */ 1385 /* fast RGB888->(A)RGB888 blending */
1332 static void BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo *info) 1386 static void
1333 { 1387 BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info)
1334 unsigned alpha = info->src->alpha; 1388 {
1389 unsigned alpha = info->src->alpha;
1335 int height = info->d_height; 1390 int height = info->d_height;
1336 Uint32 *srcp = (Uint32 *)info->s_pixels; 1391 Uint32 *srcp = (Uint32 *) info->s_pixels;
1337 int srcskip = info->s_skip >> 2; 1392 int srcskip = info->s_skip >> 2;
1338 Uint32 *dstp = (Uint32 *)info->d_pixels; 1393 Uint32 *dstp = (Uint32 *) info->d_pixels;
1339 int dstskip = info->d_skip >> 2; 1394 int dstskip = info->d_skip >> 2;
1340 vector unsigned char mergePermute; 1395 vector unsigned char mergePermute;
1341 vector unsigned char valpha; 1396 vector unsigned char valpha;
1342 vector unsigned char valphamask; 1397 vector unsigned char valphamask;
1343 vector unsigned short v1; 1398 vector unsigned short v1;
1349 1404
1350 /* set the alpha to 255 on the destination surf */ 1405 /* set the alpha to 255 on the destination surf */
1351 valphamask = VEC_ALPHA_MASK(); 1406 valphamask = VEC_ALPHA_MASK();
1352 1407
1353 /* set a vector full of alpha and 255-alpha */ 1408 /* set a vector full of alpha and 255-alpha */
1354 ((unsigned char *)&valpha)[0] = alpha; 1409 ((unsigned char *) &valpha)[0] = alpha;
1355 valpha = vec_splat(valpha, 0); 1410 valpha = vec_splat(valpha, 0);
1356 1411
1357 while(height--) { 1412 while (height--) {
1358 int width = info->d_width; 1413 int width = info->d_width;
1359 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1414 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1360 Uint32 s = *srcp; \ 1415 Uint32 s = *srcp; \
1361 Uint32 d = *dstp; \ 1416 Uint32 d = *dstp; \
1362 Uint32 s1 = s & 0xff00ff; \ 1417 Uint32 s1 = s & 0xff00ff; \
1373 } 1428 }
1374 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1429 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
1375 if (width > 0) { 1430 if (width > 0) {
1376 int extrawidth = (width % 4); 1431 int extrawidth = (width % 4);
1377 vector unsigned char valigner = VEC_ALIGNER(srcp); 1432 vector unsigned char valigner = VEC_ALIGNER(srcp);
1378 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1433 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
1379 width -= extrawidth; 1434 width -= extrawidth;
1380 while (width) { 1435 while (width) {
1381 vector unsigned char voverflow; 1436 vector unsigned char voverflow;
1382 vector unsigned char vd; 1437 vector unsigned char vd;
1383 1438
1384 /* s = *srcp */ 1439 /* s = *srcp */
1385 voverflow = (vector unsigned char)vec_ld(15, srcp); 1440 voverflow = (vector unsigned char) vec_ld(15, srcp);
1386 vs = vec_perm(vs, voverflow, valigner); 1441 vs = vec_perm(vs, voverflow, valigner);
1387 1442
1388 /* d = *dstp */ 1443 /* d = *dstp */
1389 vd = (vector unsigned char)vec_ld(0, dstp); 1444 vd = (vector unsigned char) vec_ld(0, dstp);
1390 1445
1391 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1446 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
1392 1447
1393 /* set the alpha channel to full on */ 1448 /* set the alpha channel to full on */
1394 vd = vec_or(vd, valphamask); 1449 vd = vec_or(vd, valphamask);
1395 1450
1396 /* *dstp = res */ 1451 /* *dstp = res */
1397 vec_st((vector unsigned int)vd, 0, dstp); 1452 vec_st((vector unsigned int) vd, 0, dstp);
1398 1453
1399 srcp += 4; 1454 srcp += 4;
1400 dstp += 4; 1455 dstp += 4;
1401 width -= 4; 1456 width -= 4;
1402 vs = voverflow; 1457 vs = voverflow;
1403 } 1458 }
1404 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1459 ONE_PIXEL_BLEND((extrawidth), extrawidth);
1405 } 1460 }
1406 #undef ONE_PIXEL_BLEND 1461 #undef ONE_PIXEL_BLEND
1407 1462
1408 srcp += srcskip; 1463 srcp += srcskip;
1409 dstp += dstskip; 1464 dstp += dstskip;
1410 } 1465 }
1411 } 1466 }
1467
1412 #if __MWERKS__ 1468 #if __MWERKS__
1413 #pragma altivec_model off 1469 #pragma altivec_model off
1414 #endif 1470 #endif
1415 #endif /* SDL_ALTIVEC_BLITTERS */ 1471 #endif /* SDL_ALTIVEC_BLITTERS */
1416 1472
1417 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 1473 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
1418 static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) 1474 static void
1419 { 1475 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
1420 int width = info->d_width; 1476 {
1421 int height = info->d_height; 1477 int width = info->d_width;
1422 Uint32 *srcp = (Uint32 *)info->s_pixels; 1478 int height = info->d_height;
1423 int srcskip = info->s_skip >> 2; 1479 Uint32 *srcp = (Uint32 *) info->s_pixels;
1424 Uint32 *dstp = (Uint32 *)info->d_pixels; 1480 int srcskip = info->s_skip >> 2;
1425 int dstskip = info->d_skip >> 2; 1481 Uint32 *dstp = (Uint32 *) info->d_pixels;
1426 1482 int dstskip = info->d_skip >> 2;
1427 while(height--) { 1483
1484 while (height--) {
1485 /* *INDENT-OFF* */
1428 DUFFS_LOOP4({ 1486 DUFFS_LOOP4({
1429 Uint32 s = *srcp++; 1487 Uint32 s = *srcp++;
1430 Uint32 d = *dstp; 1488 Uint32 d = *dstp;
1431 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 1489 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
1432 + (s & d & 0x00010101)) | 0xff000000; 1490 + (s & d & 0x00010101)) | 0xff000000;
1433 }, width); 1491 }, width);
1434 srcp += srcskip; 1492 /* *INDENT-ON* */
1435 dstp += dstskip; 1493 srcp += srcskip;
1436 } 1494 dstp += dstskip;
1495 }
1437 } 1496 }
1438 1497
1439 /* fast RGB888->(A)RGB888 blending with surface alpha */ 1498 /* fast RGB888->(A)RGB888 blending with surface alpha */
1440 static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) 1499 static void
1441 { 1500 BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo * info)
1442 unsigned alpha = info->src->alpha; 1501 {
1443 if(alpha == 128) { 1502 unsigned alpha = info->src->alpha;
1444 BlitRGBtoRGBSurfaceAlpha128(info); 1503 if (alpha == 128) {
1445 } else { 1504 BlitRGBtoRGBSurfaceAlpha128(info);
1446 int width = info->d_width; 1505 } else {
1447 int height = info->d_height; 1506 int width = info->d_width;
1448 Uint32 *srcp = (Uint32 *)info->s_pixels; 1507 int height = info->d_height;
1449 int srcskip = info->s_skip >> 2; 1508 Uint32 *srcp = (Uint32 *) info->s_pixels;
1450 Uint32 *dstp = (Uint32 *)info->d_pixels; 1509 int srcskip = info->s_skip >> 2;
1451 int dstskip = info->d_skip >> 2; 1510 Uint32 *dstp = (Uint32 *) info->d_pixels;
1452 Uint32 s; 1511 int dstskip = info->d_skip >> 2;
1453 Uint32 d; 1512 Uint32 s;
1454 Uint32 s1; 1513 Uint32 d;
1455 Uint32 d1; 1514 Uint32 s1;
1456 1515 Uint32 d1;
1457 while(height--) { 1516
1517 while (height--) {
1518 /* *INDENT-OFF* */
1458 DUFFS_LOOP_DOUBLE2({ 1519 DUFFS_LOOP_DOUBLE2({
1459 /* One Pixel Blend */ 1520 /* One Pixel Blend */
1460 s = *srcp; 1521 s = *srcp;
1461 d = *dstp; 1522 d = *dstp;
1462 s1 = s & 0xff00ff; 1523 s1 = s & 0xff00ff;
1497 1558
1498 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000; 1559 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000;
1499 ++srcp; 1560 ++srcp;
1500 ++dstp; 1561 ++dstp;
1501 }, width); 1562 }, width);
1502 srcp += srcskip; 1563 /* *INDENT-ON* */
1503 dstp += dstskip; 1564 srcp += srcskip;
1504 } 1565 dstp += dstskip;
1505 } 1566 }
1567 }
1506 } 1568 }
1507 1569
1508 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 1570 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
1509 static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info) 1571 static void
1510 { 1572 BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info)
1511 int width = info->d_width; 1573 {
1512 int height = info->d_height; 1574 int width = info->d_width;
1513 Uint32 *srcp = (Uint32 *)info->s_pixels; 1575 int height = info->d_height;
1514 int srcskip = info->s_skip >> 2; 1576 Uint32 *srcp = (Uint32 *) info->s_pixels;
1515 Uint32 *dstp = (Uint32 *)info->d_pixels; 1577 int srcskip = info->s_skip >> 2;
1516 int dstskip = info->d_skip >> 2; 1578 Uint32 *dstp = (Uint32 *) info->d_pixels;
1517 1579 int dstskip = info->d_skip >> 2;
1518 while(height--) { 1580
1581 while (height--) {
1582 /* *INDENT-OFF* */
1519 DUFFS_LOOP4({ 1583 DUFFS_LOOP4({
1520 Uint32 dalpha; 1584 Uint32 dalpha;
1521 Uint32 d; 1585 Uint32 d;
1522 Uint32 s1; 1586 Uint32 s1;
1523 Uint32 d1; 1587 Uint32 d1;
1547 } 1611 }
1548 } 1612 }
1549 ++srcp; 1613 ++srcp;
1550 ++dstp; 1614 ++dstp;
1551 }, width); 1615 }, width);
1552 srcp += srcskip; 1616 /* *INDENT-ON* */
1553 dstp += dstskip; 1617 srcp += srcskip;
1554 } 1618 dstp += dstskip;
1619 }
1555 } 1620 }
1556 1621
1557 #if GCC_ASMBLIT 1622 #if GCC_ASMBLIT
1558 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 1623 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
1559 inline static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) 1624 inline static void
1560 { 1625 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
1561 int width = info->d_width; 1626 {
1562 int height = info->d_height; 1627 int width = info->d_width;
1563 Uint32 *srcp = (Uint32 *)info->s_pixels; 1628 int height = info->d_height;
1564 int srcskip = info->s_skip >> 2; 1629 Uint32 *srcp = (Uint32 *) info->s_pixels;
1565 Uint32 *dstp = (Uint32 *)info->d_pixels; 1630 int srcskip = info->s_skip >> 2;
1566 int dstskip = info->d_skip >> 2; 1631 Uint32 *dstp = (Uint32 *) info->d_pixels;
1567 SDL_PixelFormat* sf = info->src; 1632 int dstskip = info->d_skip >> 2;
1568 Uint32 amask = sf->Amask; 1633 SDL_PixelFormat *sf = info->src;
1569 1634 Uint32 amask = sf->Amask;
1570 __asm__ ( 1635
1571 /* make mm6 all zeros. */ 1636 __asm__(
1572 "pxor %%mm6, %%mm6\n" 1637 /* make mm6 all zeros. */
1573 1638 "pxor %%mm6, %%mm6\n"
1574 /* Make a mask to preserve the alpha. */ 1639 /* Make a mask to preserve the alpha. */
1575 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ 1640 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */
1576 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ 1641 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */
1577 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ 1642 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */
1578 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ 1643 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */
1579 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ 1644 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */
1580 1645 /* form channel masks */
1581 /* form channel masks */ 1646 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */
1582 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ 1647 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */
1583 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ 1648 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */
1584 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ 1649 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */
1585 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ 1650 /* get alpha channel shift */
1586 1651 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */
1587 /* get alpha channel shift */ 1652 : /* nothing */ : "m"(sf->Amask), "m"(sf->Ashift));
1588 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ 1653
1589 1654 while (height--) {
1590 : /* nothing */ : "m" (sf->Amask), "m" (sf->Ashift) ); 1655
1591 1656 /* *INDENT-OFF* */
1592 while(height--) {
1593
1594 DUFFS_LOOP4({ 1657 DUFFS_LOOP4({
1595 Uint32 alpha; 1658 Uint32 alpha;
1596 1659
1597 __asm__ ( 1660 __asm__ (
1598 "prefetch 64(%0)\n" 1661 "prefetch 64(%0)\n"
1660 1723
1661 } 1724 }
1662 ++srcp; 1725 ++srcp;
1663 ++dstp; 1726 ++dstp;
1664 }, width); 1727 }, width);
1665 srcp += srcskip; 1728 /* *INDENT-ON* */
1666 dstp += dstskip; 1729 srcp += srcskip;
1667 } 1730 dstp += dstskip;
1668 1731 }
1669 __asm__ ( 1732
1670 "emms\n" 1733 __asm__("emms\n":);
1671 : ); 1734 }
1672 } 1735
1673 /* End GCC_ASMBLIT*/ 1736 /* End GCC_ASMBLIT*/
1674 1737
1675 #elif MSVC_ASMBLIT 1738 #elif MSVC_ASMBLIT
1676 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 1739 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
1677 static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) 1740 static void
1678 { 1741 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
1679 int width = info->d_width; 1742 {
1680 int height = info->d_height; 1743 int width = info->d_width;
1681 Uint32 *srcp = (Uint32 *)info->s_pixels; 1744 int height = info->d_height;
1682 int srcskip = info->s_skip >> 2; 1745 Uint32 *srcp = (Uint32 *) info->s_pixels;
1683 Uint32 *dstp = (Uint32 *)info->d_pixels; 1746 int srcskip = info->s_skip >> 2;
1684 int dstskip = info->d_skip >> 2; 1747 Uint32 *dstp = (Uint32 *) info->d_pixels;
1685 SDL_PixelFormat* sf = info->src; 1748 int dstskip = info->d_skip >> 2;
1686 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; 1749 SDL_PixelFormat *sf = info->src;
1687 Uint32 amask = sf->Amask; 1750 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
1688 Uint32 ashift = sf->Ashift; 1751 Uint32 amask = sf->Amask;
1689 Uint64 multmask; 1752 Uint32 ashift = sf->Ashift;
1690 1753 Uint64 multmask;
1691 __m64 src1, dst1, mm_alpha, mm_zero, dmask; 1754
1692 1755 __m64 src1, dst1, mm_alpha, mm_zero, dmask;
1693 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 1756
1694 multmask = ~(0xFFFFi64 << (ashift * 2)); 1757 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
1695 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ 1758 /* *INDENT-OFF* */
1696 1759 multmask = ~(0xFFFFI64 << (ashift * 2));
1697 while(height--) { 1760 /* *INDENT-ON* */
1761 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */
1762
1763 while (height--) {
1764 /* *INDENT-OFF* */
1698 DUFFS_LOOP4({ 1765 DUFFS_LOOP4({
1699 Uint32 alpha; 1766 Uint32 alpha;
1700 1767
1701 _m_prefetch(srcp + 16); 1768 _m_prefetch(srcp + 16);
1702 _m_prefetch(dstp + 16); 1769 _m_prefetch(dstp + 16);
1730 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 1797 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
1731 } 1798 }
1732 ++srcp; 1799 ++srcp;
1733 ++dstp; 1800 ++dstp;
1734 }, width); 1801 }, width);
1735 srcp += srcskip; 1802 /* *INDENT-ON* */
1736 dstp += dstskip; 1803 srcp += srcskip;
1737 } 1804 dstp += dstskip;
1738 _mm_empty(); 1805 }
1739 } 1806 _mm_empty();
1807 }
1808
1740 /* End MSVC_ASMBLIT */ 1809 /* End MSVC_ASMBLIT */
1741 1810
1742 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 1811 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
1743 1812
1744 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ 1813 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
1750 /* blend two 16 bit pixels at 50% */ 1819 /* blend two 16 bit pixels at 50% */
1751 #define BLEND2x16_50(d, s, mask) \ 1820 #define BLEND2x16_50(d, s, mask) \
1752 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ 1821 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
1753 + (s & d & (~(mask | mask << 16)))) 1822 + (s & d & (~(mask | mask << 16))))
1754 1823
1755 static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask) 1824 static void
1756 { 1825 Blit16to16SurfaceAlpha128(SDL_BlitInfo * info, Uint16 mask)
1757 int width = info->d_width; 1826 {
1758 int height = info->d_height; 1827 int width = info->d_width;
1759 Uint16 *srcp = (Uint16 *)info->s_pixels; 1828 int height = info->d_height;
1760 int srcskip = info->s_skip >> 1; 1829 Uint16 *srcp = (Uint16 *) info->s_pixels;
1761 Uint16 *dstp = (Uint16 *)info->d_pixels; 1830 int srcskip = info->s_skip >> 1;
1762 int dstskip = info->d_skip >> 1; 1831 Uint16 *dstp = (Uint16 *) info->d_pixels;
1763 1832 int dstskip = info->d_skip >> 1;
1764 while(height--) { 1833
1765 if(((uintptr_t)srcp ^ (uintptr_t)dstp) & 2) { 1834 while (height--) {
1766 /* 1835 if (((uintptr_t) srcp ^ (uintptr_t) dstp) & 2) {
1767 * Source and destination not aligned, pipeline it. 1836 /*
1768 * This is mostly a win for big blits but no loss for 1837 * Source and destination not aligned, pipeline it.
1769 * small ones 1838 * This is mostly a win for big blits but no loss for
1770 */ 1839 * small ones
1771 Uint32 prev_sw; 1840 */
1772 int w = width; 1841 Uint32 prev_sw;
1773 1842 int w = width;
1774 /* handle odd destination */ 1843
1775 if((uintptr_t)dstp & 2) { 1844 /* handle odd destination */
1776 Uint16 d = *dstp, s = *srcp; 1845 if ((uintptr_t) dstp & 2) {
1777 *dstp = BLEND16_50(d, s, mask); 1846 Uint16 d = *dstp, s = *srcp;
1778 dstp++; 1847 *dstp = BLEND16_50(d, s, mask);
1779 srcp++; 1848 dstp++;
1780 w--; 1849 srcp++;
1781 } 1850 w--;
1782 srcp++; /* srcp is now 32-bit aligned */ 1851 }
1783 1852 srcp++; /* srcp is now 32-bit aligned */
1784 /* bootstrap pipeline with first halfword */ 1853
1785 prev_sw = ((Uint32 *)srcp)[-1]; 1854 /* bootstrap pipeline with first halfword */
1786 1855 prev_sw = ((Uint32 *) srcp)[-1];
1787 while(w > 1) { 1856
1788 Uint32 sw, dw, s; 1857 while (w > 1) {
1789 sw = *(Uint32 *)srcp; 1858 Uint32 sw, dw, s;
1790 dw = *(Uint32 *)dstp; 1859 sw = *(Uint32 *) srcp;
1860 dw = *(Uint32 *) dstp;
1791 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 1861 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
1792 s = (prev_sw << 16) + (sw >> 16); 1862 s = (prev_sw << 16) + (sw >> 16);
1793 #else 1863 #else
1794 s = (prev_sw >> 16) + (sw << 16); 1864 s = (prev_sw >> 16) + (sw << 16);
1795 #endif 1865 #endif
1796 prev_sw = sw; 1866 prev_sw = sw;
1797 *(Uint32 *)dstp = BLEND2x16_50(dw, s, mask); 1867 *(Uint32 *) dstp = BLEND2x16_50(dw, s, mask);
1798 dstp += 2; 1868 dstp += 2;
1799 srcp += 2; 1869 srcp += 2;
1800 w -= 2; 1870 w -= 2;
1801 } 1871 }
1802 1872
1803 /* final pixel if any */ 1873 /* final pixel if any */
1804 if(w) { 1874 if (w) {
1805 Uint16 d = *dstp, s; 1875 Uint16 d = *dstp, s;
1806 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 1876 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
1807 s = (Uint16)prev_sw; 1877 s = (Uint16) prev_sw;
1808 #else 1878 #else
1809 s = (Uint16)(prev_sw >> 16); 1879 s = (Uint16) (prev_sw >> 16);
1810 #endif 1880 #endif
1811 *dstp = BLEND16_50(d, s, mask); 1881 *dstp = BLEND16_50(d, s, mask);
1812 srcp++; 1882 srcp++;
1813 dstp++; 1883 dstp++;
1814 } 1884 }
1815 srcp += srcskip - 1; 1885 srcp += srcskip - 1;
1816 dstp += dstskip; 1886 dstp += dstskip;
1817 } else { 1887 } else {
1818 /* source and destination are aligned */ 1888 /* source and destination are aligned */
1819 int w = width; 1889 int w = width;
1820 1890
1821 /* first odd pixel? */ 1891 /* first odd pixel? */
1822 if((uintptr_t)srcp & 2) { 1892 if ((uintptr_t) srcp & 2) {
1823 Uint16 d = *dstp, s = *srcp; 1893 Uint16 d = *dstp, s = *srcp;
1824 *dstp = BLEND16_50(d, s, mask); 1894 *dstp = BLEND16_50(d, s, mask);
1825 srcp++; 1895 srcp++;
1826 dstp++; 1896 dstp++;
1827 w--; 1897 w--;
1828 } 1898 }
1829 /* srcp and dstp are now 32-bit aligned */ 1899 /* srcp and dstp are now 32-bit aligned */
1830 1900
1831 while(w > 1) { 1901 while (w > 1) {
1832 Uint32 sw = *(Uint32 *)srcp; 1902 Uint32 sw = *(Uint32 *) srcp;
1833 Uint32 dw = *(Uint32 *)dstp; 1903 Uint32 dw = *(Uint32 *) dstp;
1834 *(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask); 1904 *(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask);
1835 srcp += 2; 1905 srcp += 2;
1836 dstp += 2; 1906 dstp += 2;
1837 w -= 2; 1907 w -= 2;
1838 } 1908 }
1839 1909
1840 /* last odd pixel? */ 1910 /* last odd pixel? */
1841 if(w) { 1911 if (w) {
1842 Uint16 d = *dstp, s = *srcp; 1912 Uint16 d = *dstp, s = *srcp;
1843 *dstp = BLEND16_50(d, s, mask); 1913 *dstp = BLEND16_50(d, s, mask);
1844 srcp++; 1914 srcp++;
1845 dstp++; 1915 dstp++;
1846 } 1916 }
1847 srcp += srcskip; 1917 srcp += srcskip;
1848 dstp += dstskip; 1918 dstp += dstskip;
1849 } 1919 }
1850 } 1920 }
1851 } 1921 }
1852 1922
1853 #if GCC_ASMBLIT 1923 #if GCC_ASMBLIT
1854 /* fast RGB565->RGB565 blending with surface alpha */ 1924 /* fast RGB565->RGB565 blending with surface alpha */
1855 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) 1925 static void
1856 { 1926 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info)
1857 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 1927 {
1858 if(alpha == 128) { 1928 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
1859 Blit16to16SurfaceAlpha128(info, 0xf7de); 1929 if (alpha == 128) {
1860 } else { 1930 Blit16to16SurfaceAlpha128(info, 0xf7de);
1861 int width = info->d_width; 1931 } else {
1862 int height = info->d_height; 1932 int width = info->d_width;
1863 Uint16 *srcp = (Uint16 *)info->s_pixels; 1933 int height = info->d_height;
1864 int srcskip = info->s_skip >> 1; 1934 Uint16 *srcp = (Uint16 *) info->s_pixels;
1865 Uint16 *dstp = (Uint16 *)info->d_pixels; 1935 int srcskip = info->s_skip >> 1;
1866 int dstskip = info->d_skip >> 1; 1936 Uint16 *dstp = (Uint16 *) info->d_pixels;
1867 Uint32 s, d; 1937 int dstskip = info->d_skip >> 1;
1868 Uint8 load[8]; 1938 Uint32 s, d;
1869 1939 Uint8 load[8];
1870 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 1940
1871 *(Uint64 *)load = alpha; 1941 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
1872 alpha >>= 3; /* downscale alpha to 5 bits */ 1942 *(Uint64 *) load = alpha;
1873 1943 alpha >>= 3; /* downscale alpha to 5 bits */
1874 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ 1944
1875 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ 1945 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */
1876 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ 1946 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */
1877 /* position alpha to allow for mullo and mulhi on diff channels 1947 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */
1878 to reduce the number of operations */ 1948 /* position alpha to allow for mullo and mulhi on diff channels
1879 psllq_i2r(3, mm0); 1949 to reduce the number of operations */
1880 1950 psllq_i2r(3, mm0);
1881 /* Setup the 565 color channel masks */ 1951
1882 *(Uint64 *)load = 0x07E007E007E007E0ULL; 1952 /* Setup the 565 color channel masks */
1883 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ 1953 *(Uint64 *) load = 0x07E007E007E007E0ULL;
1884 *(Uint64 *)load = 0x001F001F001F001FULL; 1954 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */
1885 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ 1955 *(Uint64 *) load = 0x001F001F001F001FULL;
1886 while(height--) { 1956 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */
1957 while (height--) {
1958 /* *INDENT-OFF* */
1887 DUFFS_LOOP_QUATRO2( 1959 DUFFS_LOOP_QUATRO2(
1888 { 1960 {
1889 s = *srcp++; 1961 s = *srcp++;
1890 d = *dstp; 1962 d = *dstp;
1891 /* 1963 /*
1981 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ 2053 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */
1982 2054
1983 srcp += 4; 2055 srcp += 4;
1984 dstp += 4; 2056 dstp += 4;
1985 }, width); 2057 }, width);
1986 srcp += srcskip; 2058 /* *INDENT-ON* */
1987 dstp += dstskip; 2059 srcp += srcskip;
1988 } 2060 dstp += dstskip;
1989 emms(); 2061 }
1990 } 2062 emms();
2063 }
1991 } 2064 }
1992 2065
1993 /* fast RGB555->RGB555 blending with surface alpha */ 2066 /* fast RGB555->RGB555 blending with surface alpha */
1994 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) 2067 static void
1995 { 2068 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info)
1996 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 2069 {
1997 if(alpha == 128) { 2070 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
1998 Blit16to16SurfaceAlpha128(info, 0xfbde); 2071 if (alpha == 128) {
1999 } else { 2072 Blit16to16SurfaceAlpha128(info, 0xfbde);
2000 int width = info->d_width; 2073 } else {
2001 int height = info->d_height; 2074 int width = info->d_width;
2002 Uint16 *srcp = (Uint16 *)info->s_pixels; 2075 int height = info->d_height;
2003 int srcskip = info->s_skip >> 1; 2076 Uint16 *srcp = (Uint16 *) info->s_pixels;
2004 Uint16 *dstp = (Uint16 *)info->d_pixels; 2077 int srcskip = info->s_skip >> 1;
2005 int dstskip = info->d_skip >> 1; 2078 Uint16 *dstp = (Uint16 *) info->d_pixels;
2006 Uint32 s, d; 2079 int dstskip = info->d_skip >> 1;
2007 Uint8 load[8]; 2080 Uint32 s, d;
2008 2081 Uint8 load[8];
2009 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 2082
2010 *(Uint64 *)load = alpha; 2083 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2011 alpha >>= 3; /* downscale alpha to 5 bits */ 2084 *(Uint64 *) load = alpha;
2012 2085 alpha >>= 3; /* downscale alpha to 5 bits */
2013 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ 2086
2014 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ 2087 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */
2015 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ 2088 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */
2016 /* position alpha to allow for mullo and mulhi on diff channels 2089 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */
2017 to reduce the number of operations */ 2090 /* position alpha to allow for mullo and mulhi on diff channels
2018 psllq_i2r(3, mm0); 2091 to reduce the number of operations */
2019 2092 psllq_i2r(3, mm0);
2020 /* Setup the 555 color channel masks */ 2093
2021 *(Uint64 *)load = 0x03E003E003E003E0ULL; 2094 /* Setup the 555 color channel masks */
2022 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ 2095 *(Uint64 *) load = 0x03E003E003E003E0ULL;
2023 *(Uint64 *)load = 0x001F001F001F001FULL; 2096 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */
2024 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ 2097 *(Uint64 *) load = 0x001F001F001F001FULL;
2025 while(height--) { 2098 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */
2099 while (height--) {
2100 /* *INDENT-OFF* */
2026 DUFFS_LOOP_QUATRO2( 2101 DUFFS_LOOP_QUATRO2(
2027 { 2102 {
2028 s = *srcp++; 2103 s = *srcp++;
2029 d = *dstp; 2104 d = *dstp;
2030 /* 2105 /*
2124 2199
2125 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ 2200 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */
2126 2201
2127 srcp += 4; 2202 srcp += 4;
2128 dstp += 4; 2203 dstp += 4;
2129 }, width); 2204 }, width);
2130 srcp += srcskip; 2205 /* *INDENT-ON* */
2131 dstp += dstskip; 2206 srcp += srcskip;
2132 } 2207 dstp += dstskip;
2133 emms(); 2208 }
2134 } 2209 emms();
2135 } 2210 }
2211 }
2212
2136 /* End GCC_ASMBLIT */ 2213 /* End GCC_ASMBLIT */
2137 2214
2138 #elif MSVC_ASMBLIT 2215 #elif MSVC_ASMBLIT
2139 /* fast RGB565->RGB565 blending with surface alpha */ 2216 /* fast RGB565->RGB565 blending with surface alpha */
2140 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) 2217 static void
2141 { 2218 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info)
2142 unsigned alpha = info->src->alpha; 2219 {
2143 if(alpha == 128) { 2220 unsigned alpha = info->src->alpha;
2144 Blit16to16SurfaceAlpha128(info, 0xf7de); 2221 if (alpha == 128) {
2145 } else { 2222 Blit16to16SurfaceAlpha128(info, 0xf7de);
2146 int width = info->d_width; 2223 } else {
2147 int height = info->d_height; 2224 int width = info->d_width;
2148 Uint16 *srcp = (Uint16 *)info->s_pixels; 2225 int height = info->d_height;
2149 int srcskip = info->s_skip >> 1; 2226 Uint16 *srcp = (Uint16 *) info->s_pixels;
2150 Uint16 *dstp = (Uint16 *)info->d_pixels; 2227 int srcskip = info->s_skip >> 1;
2151 int dstskip = info->d_skip >> 1; 2228 Uint16 *dstp = (Uint16 *) info->d_pixels;
2152 Uint32 s, d; 2229 int dstskip = info->d_skip >> 1;
2153 2230 Uint32 s, d;
2154 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; 2231
2155 2232 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
2156 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 2233
2157 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ 2234 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2158 alpha >>= 3; /* downscale alpha to 5 bits */ 2235 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
2159 2236 alpha >>= 3; /* downscale alpha to 5 bits */
2160 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 2237
2161 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 2238 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
2162 /* position alpha to allow for mullo and mulhi on diff channels 2239 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
2163 to reduce the number of operations */ 2240 /* position alpha to allow for mullo and mulhi on diff channels
2164 mm_alpha = _mm_slli_si64(mm_alpha, 3); 2241 to reduce the number of operations */
2165 2242 mm_alpha = _mm_slli_si64(mm_alpha, 3);
2166 /* Setup the 565 color channel masks */ 2243
2167 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ 2244 /* Setup the 565 color channel masks */
2168 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 2245 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */
2169 2246 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
2170 while(height--) { 2247
2248 while (height--) {
2249 /* *INDENT-OFF* */
2171 DUFFS_LOOP_QUATRO2( 2250 DUFFS_LOOP_QUATRO2(
2172 { 2251 {
2173 s = *srcp++; 2252 s = *srcp++;
2174 d = *dstp; 2253 d = *dstp;
2175 /* 2254 /*
2260 2339
2261 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ 2340 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
2262 2341
2263 srcp += 4; 2342 srcp += 4;
2264 dstp += 4; 2343 dstp += 4;
2265 }, width); 2344 }, width);
2266 srcp += srcskip; 2345 /* *INDENT-ON* */
2267 dstp += dstskip; 2346 srcp += srcskip;
2268 } 2347 dstp += dstskip;
2269 _mm_empty(); 2348 }
2270 } 2349 _mm_empty();
2350 }
2271 } 2351 }
2272 2352
2273 /* fast RGB555->RGB555 blending with surface alpha */ 2353 /* fast RGB555->RGB555 blending with surface alpha */
2274 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) 2354 static void
2275 { 2355 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info)
2276 unsigned alpha = info->src->alpha; 2356 {
2277 if(alpha == 128) { 2357 unsigned alpha = info->src->alpha;
2278 Blit16to16SurfaceAlpha128(info, 0xfbde); 2358 if (alpha == 128) {
2279 } else { 2359 Blit16to16SurfaceAlpha128(info, 0xfbde);
2280 int width = info->d_width; 2360 } else {
2281 int height = info->d_height; 2361 int width = info->d_width;
2282 Uint16 *srcp = (Uint16 *)info->s_pixels; 2362 int height = info->d_height;
2283 int srcskip = info->s_skip >> 1; 2363 Uint16 *srcp = (Uint16 *) info->s_pixels;
2284 Uint16 *dstp = (Uint16 *)info->d_pixels; 2364 int srcskip = info->s_skip >> 1;
2285 int dstskip = info->d_skip >> 1; 2365 Uint16 *dstp = (Uint16 *) info->d_pixels;
2286 Uint32 s, d; 2366 int dstskip = info->d_skip >> 1;
2287 2367 Uint32 s, d;
2288 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; 2368
2289 2369 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
2290 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 2370
2291 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ 2371 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2292 alpha >>= 3; /* downscale alpha to 5 bits */ 2372 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
2293 2373 alpha >>= 3; /* downscale alpha to 5 bits */
2294 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 2374
2295 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 2375 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
2296 /* position alpha to allow for mullo and mulhi on diff channels 2376 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
2297 to reduce the number of operations */ 2377 /* position alpha to allow for mullo and mulhi on diff channels
2298 mm_alpha = _mm_slli_si64(mm_alpha, 3); 2378 to reduce the number of operations */
2299 2379 mm_alpha = _mm_slli_si64(mm_alpha, 3);
2300 /* Setup the 555 color channel masks */ 2380
2301 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ 2381 /* Setup the 555 color channel masks */
2302 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ 2382 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */
2303 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 2383 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */
2304 2384 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
2305 while(height--) { 2385
2386 while (height--) {
2387 /* *INDENT-OFF* */
2306 DUFFS_LOOP_QUATRO2( 2388 DUFFS_LOOP_QUATRO2(
2307 { 2389 {
2308 s = *srcp++; 2390 s = *srcp++;
2309 d = *dstp; 2391 d = *dstp;
2310 /* 2392 /*
2395 2477
2396 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ 2478 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
2397 2479
2398 srcp += 4; 2480 srcp += 4;
2399 dstp += 4; 2481 dstp += 4;
2400 }, width); 2482 }, width);
2401 srcp += srcskip; 2483 /* *INDENT-ON* */
2402 dstp += dstskip; 2484 srcp += srcskip;
2403 } 2485 dstp += dstskip;
2404 _mm_empty(); 2486 }
2405 } 2487 _mm_empty();
2488 }
2406 } 2489 }
2407 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 2490 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
2408 2491
2409 /* fast RGB565->RGB565 blending with surface alpha */ 2492 /* fast RGB565->RGB565 blending with surface alpha */
2410 static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) 2493 static void
2411 { 2494 Blit565to565SurfaceAlpha(SDL_BlitInfo * info)
2412 unsigned alpha = info->src->alpha; 2495 {
2413 if(alpha == 128) { 2496 unsigned alpha = info->src->alpha;
2414 Blit16to16SurfaceAlpha128(info, 0xf7de); 2497 if (alpha == 128) {
2415 } else { 2498 Blit16to16SurfaceAlpha128(info, 0xf7de);
2416 int width = info->d_width; 2499 } else {
2417 int height = info->d_height; 2500 int width = info->d_width;
2418 Uint16 *srcp = (Uint16 *)info->s_pixels; 2501 int height = info->d_height;
2419 int srcskip = info->s_skip >> 1; 2502 Uint16 *srcp = (Uint16 *) info->s_pixels;
2420 Uint16 *dstp = (Uint16 *)info->d_pixels; 2503 int srcskip = info->s_skip >> 1;
2421 int dstskip = info->d_skip >> 1; 2504 Uint16 *dstp = (Uint16 *) info->d_pixels;
2422 alpha >>= 3; /* downscale alpha to 5 bits */ 2505 int dstskip = info->d_skip >> 1;
2423 2506 alpha >>= 3; /* downscale alpha to 5 bits */
2424 while(height--) { 2507
2508 while (height--) {
2509 /* *INDENT-OFF* */
2425 DUFFS_LOOP4({ 2510 DUFFS_LOOP4({
2426 Uint32 s = *srcp++; 2511 Uint32 s = *srcp++;
2427 Uint32 d = *dstp; 2512 Uint32 d = *dstp;
2428 /* 2513 /*
2429 * shift out the middle component (green) to 2514 * shift out the middle component (green) to
2434 d = (d | d << 16) & 0x07e0f81f; 2519 d = (d | d << 16) & 0x07e0f81f;
2435 d += (s - d) * alpha >> 5; 2520 d += (s - d) * alpha >> 5;
2436 d &= 0x07e0f81f; 2521 d &= 0x07e0f81f;
2437 *dstp++ = (Uint16)(d | d >> 16); 2522 *dstp++ = (Uint16)(d | d >> 16);
2438 }, width); 2523 }, width);
2439 srcp += srcskip; 2524 /* *INDENT-ON* */
2440 dstp += dstskip; 2525 srcp += srcskip;
2441 } 2526 dstp += dstskip;
2442 } 2527 }
2528 }
2443 } 2529 }
2444 2530
2445 /* fast RGB555->RGB555 blending with surface alpha */ 2531 /* fast RGB555->RGB555 blending with surface alpha */
2446 static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) 2532 static void
2447 { 2533 Blit555to555SurfaceAlpha(SDL_BlitInfo * info)
2448 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 2534 {
2449 if(alpha == 128) { 2535 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
2450 Blit16to16SurfaceAlpha128(info, 0xfbde); 2536 if (alpha == 128) {
2451 } else { 2537 Blit16to16SurfaceAlpha128(info, 0xfbde);
2452 int width = info->d_width; 2538 } else {
2453 int height = info->d_height; 2539 int width = info->d_width;
2454 Uint16 *srcp = (Uint16 *)info->s_pixels; 2540 int height = info->d_height;
2455 int srcskip = info->s_skip >> 1; 2541 Uint16 *srcp = (Uint16 *) info->s_pixels;
2456 Uint16 *dstp = (Uint16 *)info->d_pixels; 2542 int srcskip = info->s_skip >> 1;
2457 int dstskip = info->d_skip >> 1; 2543 Uint16 *dstp = (Uint16 *) info->d_pixels;
2458 alpha >>= 3; /* downscale alpha to 5 bits */ 2544 int dstskip = info->d_skip >> 1;
2459 2545 alpha >>= 3; /* downscale alpha to 5 bits */
2460 while(height--) { 2546
2547 while (height--) {
2548 /* *INDENT-OFF* */
2461 DUFFS_LOOP4({ 2549 DUFFS_LOOP4({
2462 Uint32 s = *srcp++; 2550 Uint32 s = *srcp++;
2463 Uint32 d = *dstp; 2551 Uint32 d = *dstp;
2464 /* 2552 /*
2465 * shift out the middle component (green) to 2553 * shift out the middle component (green) to
2470 d = (d | d << 16) & 0x03e07c1f; 2558 d = (d | d << 16) & 0x03e07c1f;
2471 d += (s - d) * alpha >> 5; 2559 d += (s - d) * alpha >> 5;
2472 d &= 0x03e07c1f; 2560 d &= 0x03e07c1f;
2473 *dstp++ = (Uint16)(d | d >> 16); 2561 *dstp++ = (Uint16)(d | d >> 16);
2474 }, width); 2562 }, width);
2475 srcp += srcskip; 2563 /* *INDENT-ON* */
2476 dstp += dstskip; 2564 srcp += srcskip;
2477 } 2565 dstp += dstskip;
2478 } 2566 }
2567 }
2479 } 2568 }
2480 2569
2481 /* fast ARGB8888->RGB565 blending with pixel alpha */ 2570 /* fast ARGB8888->RGB565 blending with pixel alpha */
2482 static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) 2571 static void
2483 { 2572 BlitARGBto565PixelAlpha(SDL_BlitInfo * info)
2484 int width = info->d_width; 2573 {
2485 int height = info->d_height; 2574 int width = info->d_width;
2486 Uint32 *srcp = (Uint32 *)info->s_pixels; 2575 int height = info->d_height;
2487 int srcskip = info->s_skip >> 2; 2576 Uint32 *srcp = (Uint32 *) info->s_pixels;
2488 Uint16 *dstp = (Uint16 *)info->d_pixels; 2577 int srcskip = info->s_skip >> 2;
2489 int dstskip = info->d_skip >> 1; 2578 Uint16 *dstp = (Uint16 *) info->d_pixels;
2490 2579 int dstskip = info->d_skip >> 1;
2491 while(height--) { 2580
2581 while (height--) {
2582 /* *INDENT-OFF* */
2492 DUFFS_LOOP4({ 2583 DUFFS_LOOP4({
2493 Uint32 s = *srcp; 2584 Uint32 s = *srcp;
2494 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ 2585 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */
2495 /* FIXME: Here we special-case opaque alpha since the 2586 /* FIXME: Here we special-case opaque alpha since the
2496 compositioning used (>>8 instead of /255) doesn't handle 2587 compositioning used (>>8 instead of /255) doesn't handle
2514 } 2605 }
2515 } 2606 }
2516 srcp++; 2607 srcp++;
2517 dstp++; 2608 dstp++;
2518 }, width); 2609 }, width);
2519 srcp += srcskip; 2610 /* *INDENT-ON* */
2520 dstp += dstskip; 2611 srcp += srcskip;
2521 } 2612 dstp += dstskip;
2613 }
2522 } 2614 }
2523 2615
2524 /* fast ARGB8888->RGB555 blending with pixel alpha */ 2616 /* fast ARGB8888->RGB555 blending with pixel alpha */
2525 static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) 2617 static void
2526 { 2618 BlitARGBto555PixelAlpha(SDL_BlitInfo * info)
2527 int width = info->d_width; 2619 {
2528 int height = info->d_height; 2620 int width = info->d_width;
2529 Uint32 *srcp = (Uint32 *)info->s_pixels; 2621 int height = info->d_height;
2530 int srcskip = info->s_skip >> 2; 2622 Uint32 *srcp = (Uint32 *) info->s_pixels;
2531 Uint16 *dstp = (Uint16 *)info->d_pixels; 2623 int srcskip = info->s_skip >> 2;
2532 int dstskip = info->d_skip >> 1; 2624 Uint16 *dstp = (Uint16 *) info->d_pixels;
2533 2625 int dstskip = info->d_skip >> 1;
2534 while(height--) { 2626
2627 while (height--) {
2628 /* *INDENT-OFF* */
2535 DUFFS_LOOP4({ 2629 DUFFS_LOOP4({
2536 unsigned alpha; 2630 unsigned alpha;
2537 Uint32 s = *srcp; 2631 Uint32 s = *srcp;
2538 alpha = s >> 27; /* downscale alpha to 5 bits */ 2632 alpha = s >> 27; /* downscale alpha to 5 bits */
2539 /* FIXME: Here we special-case opaque alpha since the 2633 /* FIXME: Here we special-case opaque alpha since the
2558 } 2652 }
2559 } 2653 }
2560 srcp++; 2654 srcp++;
2561 dstp++; 2655 dstp++;
2562 }, width); 2656 }, width);
2563 srcp += srcskip; 2657 /* *INDENT-ON* */
2564 dstp += dstskip; 2658 srcp += srcskip;
2565 } 2659 dstp += dstskip;
2660 }
2566 } 2661 }
2567 2662
2568 /* General (slow) N->N blending with per-surface alpha */ 2663 /* General (slow) N->N blending with per-surface alpha */
2569 static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) 2664 static void
2570 { 2665 BlitNtoNSurfaceAlpha(SDL_BlitInfo * info)
2571 int width = info->d_width; 2666 {
2572 int height = info->d_height; 2667 int width = info->d_width;
2573 Uint8 *src = info->s_pixels; 2668 int height = info->d_height;
2574 int srcskip = info->s_skip; 2669 Uint8 *src = info->s_pixels;
2575 Uint8 *dst = info->d_pixels; 2670 int srcskip = info->s_skip;
2576 int dstskip = info->d_skip; 2671 Uint8 *dst = info->d_pixels;
2577 SDL_PixelFormat *srcfmt = info->src; 2672 int dstskip = info->d_skip;
2578 SDL_PixelFormat *dstfmt = info->dst; 2673 SDL_PixelFormat *srcfmt = info->src;
2579 int srcbpp = srcfmt->BytesPerPixel; 2674 SDL_PixelFormat *dstfmt = info->dst;
2580 int dstbpp = dstfmt->BytesPerPixel; 2675 int srcbpp = srcfmt->BytesPerPixel;
2581 unsigned sA = srcfmt->alpha; 2676 int dstbpp = dstfmt->BytesPerPixel;
2582 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 2677 unsigned sA = srcfmt->alpha;
2583 2678 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
2584 if(sA) { 2679
2585 while ( height-- ) { 2680 if (sA) {
2681 while (height--) {
2682 /* *INDENT-OFF* */
2586 DUFFS_LOOP4( 2683 DUFFS_LOOP4(
2587 { 2684 {
2588 Uint32 Pixel; 2685 Uint32 Pixel;
2589 unsigned sR; 2686 unsigned sR;
2590 unsigned sG; 2687 unsigned sG;
2598 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 2695 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
2599 src += srcbpp; 2696 src += srcbpp;
2600 dst += dstbpp; 2697 dst += dstbpp;
2601 }, 2698 },
2602 width); 2699 width);
2603 src += srcskip; 2700 /* *INDENT-ON* */
2604 dst += dstskip; 2701 src += srcskip;
2605 } 2702 dst += dstskip;
2606 } 2703 }
2704 }
2607 } 2705 }
2608 2706
2609 /* General (slow) colorkeyed N->N blending with per-surface alpha */ 2707 /* General (slow) colorkeyed N->N blending with per-surface alpha */
2610 static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) 2708 static void
2611 { 2709 BlitNtoNSurfaceAlphaKey(SDL_BlitInfo * info)
2612 int width = info->d_width; 2710 {
2613 int height = info->d_height; 2711 int width = info->d_width;
2614 Uint8 *src = info->s_pixels; 2712 int height = info->d_height;
2615 int srcskip = info->s_skip; 2713 Uint8 *src = info->s_pixels;
2616 Uint8 *dst = info->d_pixels; 2714 int srcskip = info->s_skip;
2617 int dstskip = info->d_skip; 2715 Uint8 *dst = info->d_pixels;
2618 SDL_PixelFormat *srcfmt = info->src; 2716 int dstskip = info->d_skip;
2619 SDL_PixelFormat *dstfmt = info->dst; 2717 SDL_PixelFormat *srcfmt = info->src;
2620 Uint32 ckey = srcfmt->colorkey; 2718 SDL_PixelFormat *dstfmt = info->dst;
2621 int srcbpp = srcfmt->BytesPerPixel; 2719 Uint32 ckey = srcfmt->colorkey;
2622 int dstbpp = dstfmt->BytesPerPixel; 2720 int srcbpp = srcfmt->BytesPerPixel;
2623 unsigned sA = srcfmt->alpha; 2721 int dstbpp = dstfmt->BytesPerPixel;
2624 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 2722 unsigned sA = srcfmt->alpha;
2625 2723 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
2626 while ( height-- ) { 2724
2725 while (height--) {
2726 /* *INDENT-OFF* */
2627 DUFFS_LOOP4( 2727 DUFFS_LOOP4(
2628 { 2728 {
2629 Uint32 Pixel; 2729 Uint32 Pixel;
2630 unsigned sR; 2730 unsigned sR;
2631 unsigned sG; 2731 unsigned sG;
2642 } 2742 }
2643 src += srcbpp; 2743 src += srcbpp;
2644 dst += dstbpp; 2744 dst += dstbpp;
2645 }, 2745 },
2646 width); 2746 width);
2647 src += srcskip; 2747 /* *INDENT-ON* */
2648 dst += dstskip; 2748 src += srcskip;
2649 } 2749 dst += dstskip;
2750 }
2650 } 2751 }
2651 2752
2652 /* General (slow) N->N blending with pixel alpha */ 2753 /* General (slow) N->N blending with pixel alpha */
2653 static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) 2754 static void
2654 { 2755 BlitNtoNPixelAlpha(SDL_BlitInfo * info)
2655 int width = info->d_width; 2756 {
2656 int height = info->d_height; 2757 int width = info->d_width;
2657 Uint8 *src = info->s_pixels; 2758 int height = info->d_height;
2658 int srcskip = info->s_skip; 2759 Uint8 *src = info->s_pixels;
2659 Uint8 *dst = info->d_pixels; 2760 int srcskip = info->s_skip;
2660 int dstskip = info->d_skip; 2761 Uint8 *dst = info->d_pixels;
2661 SDL_PixelFormat *srcfmt = info->src; 2762 int dstskip = info->d_skip;
2662 SDL_PixelFormat *dstfmt = info->dst; 2763 SDL_PixelFormat *srcfmt = info->src;
2663 2764 SDL_PixelFormat *dstfmt = info->dst;
2664 int srcbpp; 2765
2665 int dstbpp; 2766 int srcbpp;
2666 2767 int dstbpp;
2667 /* Set up some basic variables */ 2768
2668 srcbpp = srcfmt->BytesPerPixel; 2769 /* Set up some basic variables */
2669 dstbpp = dstfmt->BytesPerPixel; 2770 srcbpp = srcfmt->BytesPerPixel;
2670 2771 dstbpp = dstfmt->BytesPerPixel;
2671 /* FIXME: for 8bpp source alpha, this doesn't get opaque values 2772
2672 quite right. for <8bpp source alpha, it gets them very wrong 2773 /* FIXME: for 8bpp source alpha, this doesn't get opaque values
2673 (check all macros!) 2774 quite right. for <8bpp source alpha, it gets them very wrong
2674 It is unclear whether there is a good general solution that doesn't 2775 (check all macros!)
2675 need a branch (or a divide). */ 2776 It is unclear whether there is a good general solution that doesn't
2676 while ( height-- ) { 2777 need a branch (or a divide). */
2778 while (height--) {
2779 /* *INDENT-OFF* */
2677 DUFFS_LOOP4( 2780 DUFFS_LOOP4(
2678 { 2781 {
2679 Uint32 Pixel; 2782 Uint32 Pixel;
2680 unsigned sR; 2783 unsigned sR;
2681 unsigned sG; 2784 unsigned sG;
2693 } 2796 }
2694 src += srcbpp; 2797 src += srcbpp;
2695 dst += dstbpp; 2798 dst += dstbpp;
2696 }, 2799 },
2697 width); 2800 width);
2698 src += srcskip; 2801 /* *INDENT-ON* */
2699 dst += dstskip; 2802 src += srcskip;
2700 } 2803 dst += dstskip;
2701 } 2804 }
2702 2805 }
2703 2806
2704 SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) 2807
2808 SDL_loblit
2809 SDL_CalculateAlphaBlit(SDL_Surface * surface, int blit_index)
2705 { 2810 {
2706 SDL_PixelFormat *sf = surface->format; 2811 SDL_PixelFormat *sf = surface->format;
2707 SDL_PixelFormat *df = surface->map->dst->format; 2812 SDL_PixelFormat *df = surface->map->dst->format;
2708 2813
2709 if(sf->Amask == 0) { 2814 if (sf->Amask == 0) {
2710 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { 2815 if ((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) {
2711 if(df->BytesPerPixel == 1) 2816 if (df->BytesPerPixel == 1)
2712 return BlitNto1SurfaceAlphaKey; 2817 return BlitNto1SurfaceAlphaKey;
2713 else 2818 else
2714 #if SDL_ALTIVEC_BLITTERS 2819 #if SDL_ALTIVEC_BLITTERS
2715 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && 2820 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
2716 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 2821 !(surface->map->dst->flags & SDL_HWSURFACE)
2717 return Blit32to32SurfaceAlphaKeyAltivec; 2822 && SDL_HasAltiVec())
2718 else 2823 return Blit32to32SurfaceAlphaKeyAltivec;
2824 else
2719 #endif 2825 #endif
2720 return BlitNtoNSurfaceAlphaKey; 2826 return BlitNtoNSurfaceAlphaKey;
2721 } else { 2827 } else {
2722 /* Per-surface alpha blits */ 2828 /* Per-surface alpha blits */
2723 switch(df->BytesPerPixel) { 2829 switch (df->BytesPerPixel) {
2724 case 1: 2830 case 1:
2725 return BlitNto1SurfaceAlpha; 2831 return BlitNto1SurfaceAlpha;
2726 2832
2727 case 2: 2833 case 2:
2728 if(surface->map->identity) { 2834 if (surface->map->identity) {
2729 if(df->Gmask == 0x7e0) 2835 if (df->Gmask == 0x7e0) {
2730 {
2731 #if MMX_ASMBLIT 2836 #if MMX_ASMBLIT
2732 if(SDL_HasMMX()) 2837 if (SDL_HasMMX())
2733 return Blit565to565SurfaceAlphaMMX; 2838 return Blit565to565SurfaceAlphaMMX;
2734 else 2839 else
2735 #endif 2840 #endif
2736 return Blit565to565SurfaceAlpha; 2841 return Blit565to565SurfaceAlpha;
2737 } 2842 } else if (df->Gmask == 0x3e0) {
2738 else if(df->Gmask == 0x3e0)
2739 {
2740 #if MMX_ASMBLIT 2843 #if MMX_ASMBLIT
2741 if(SDL_HasMMX()) 2844 if (SDL_HasMMX())
2742 return Blit555to555SurfaceAlphaMMX; 2845 return Blit555to555SurfaceAlphaMMX;
2743 else 2846 else
2744 #endif 2847 #endif
2745 return Blit555to555SurfaceAlpha; 2848 return Blit555to555SurfaceAlpha;
2746 } 2849 }
2747 } 2850 }
2748 return BlitNtoNSurfaceAlpha; 2851 return BlitNtoNSurfaceAlpha;
2749 2852
2750 case 4: 2853 case 4:
2751 if(sf->Rmask == df->Rmask 2854 if (sf->Rmask == df->Rmask
2752 && sf->Gmask == df->Gmask 2855 && sf->Gmask == df->Gmask
2753 && sf->Bmask == df->Bmask 2856 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
2754 && sf->BytesPerPixel == 4)
2755 {
2756 #if MMX_ASMBLIT 2857 #if MMX_ASMBLIT
2757 if(sf->Rshift % 8 == 0 2858 if (sf->Rshift % 8 == 0
2758 && sf->Gshift % 8 == 0 2859 && sf->Gshift % 8 == 0
2759 && sf->Bshift % 8 == 0 2860 && sf->Bshift % 8 == 0 && SDL_HasMMX())
2760 && SDL_HasMMX()) 2861 return BlitRGBtoRGBSurfaceAlphaMMX;
2761 return BlitRGBtoRGBSurfaceAlphaMMX;
2762 #endif 2862 #endif
2763 if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) 2863 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
2764 {
2765 #if SDL_ALTIVEC_BLITTERS 2864 #if SDL_ALTIVEC_BLITTERS
2766 if(!(surface->map->dst->flags & SDL_HWSURFACE) 2865 if (!(surface->map->dst->flags & SDL_HWSURFACE)
2767 && SDL_HasAltiVec()) 2866 && SDL_HasAltiVec())
2768 return BlitRGBtoRGBSurfaceAlphaAltivec; 2867 return BlitRGBtoRGBSurfaceAlphaAltivec;
2769 #endif 2868 #endif
2770 return BlitRGBtoRGBSurfaceAlpha; 2869 return BlitRGBtoRGBSurfaceAlpha;
2771 } 2870 }
2772 } 2871 }
2773 #if SDL_ALTIVEC_BLITTERS 2872 #if SDL_ALTIVEC_BLITTERS
2774 if((sf->BytesPerPixel == 4) && 2873 if ((sf->BytesPerPixel == 4) &&
2775 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 2874 !(surface->map->dst->flags & SDL_HWSURFACE)
2776 return Blit32to32SurfaceAlphaAltivec; 2875 && SDL_HasAltiVec())
2777 else 2876 return Blit32to32SurfaceAlphaAltivec;
2877 else
2778 #endif 2878 #endif
2779 return BlitNtoNSurfaceAlpha; 2879 return BlitNtoNSurfaceAlpha;
2780 2880
2781 case 3: 2881 case 3:
2782 default: 2882 default:
2783 return BlitNtoNSurfaceAlpha; 2883 return BlitNtoNSurfaceAlpha;
2784 } 2884 }
2785 } 2885 }
2786 } else { 2886 } else {
2787 /* Per-pixel alpha blits */ 2887 /* Per-pixel alpha blits */
2788 switch(df->BytesPerPixel) { 2888 switch (df->BytesPerPixel) {
2789 case 1: 2889 case 1:
2790 return BlitNto1PixelAlpha; 2890 return BlitNto1PixelAlpha;
2791 2891
2792 case 2: 2892 case 2:
2793 #if SDL_ALTIVEC_BLITTERS 2893 #if SDL_ALTIVEC_BLITTERS
2794 if(sf->BytesPerPixel == 4 && !(surface->map->dst->flags & SDL_HWSURFACE) && 2894 if (sf->BytesPerPixel == 4
2795 df->Gmask == 0x7e0 && 2895 && !(surface->map->dst->flags & SDL_HWSURFACE)
2796 df->Bmask == 0x1f && SDL_HasAltiVec()) 2896 && df->Gmask == 0x7e0 && df->Bmask == 0x1f
2797 return Blit32to565PixelAlphaAltivec; 2897 && SDL_HasAltiVec())
2798 else 2898 return Blit32to565PixelAlphaAltivec;
2899 else
2799 #endif 2900 #endif
2800 if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 2901 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
2801 && sf->Gmask == 0xff00 2902 && sf->Gmask == 0xff00
2802 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) 2903 && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
2803 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { 2904 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
2804 if(df->Gmask == 0x7e0) 2905 if (df->Gmask == 0x7e0)
2805 return BlitARGBto565PixelAlpha; 2906 return BlitARGBto565PixelAlpha;
2806 else if(df->Gmask == 0x3e0) 2907 else if (df->Gmask == 0x3e0)
2807 return BlitARGBto555PixelAlpha; 2908 return BlitARGBto555PixelAlpha;
2808 } 2909 }
2809 return BlitNtoNPixelAlpha; 2910 return BlitNtoNPixelAlpha;
2810 2911
2811 case 4: 2912 case 4:
2812 if(sf->Rmask == df->Rmask 2913 if (sf->Rmask == df->Rmask
2813 && sf->Gmask == df->Gmask 2914 && sf->Gmask == df->Gmask
2814 && sf->Bmask == df->Bmask 2915 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
2815 && sf->BytesPerPixel == 4)
2816 {
2817 #if MMX_ASMBLIT 2916 #if MMX_ASMBLIT
2818 if(sf->Rshift % 8 == 0 2917 if (sf->Rshift % 8 == 0
2819 && sf->Gshift % 8 == 0 2918 && sf->Gshift % 8 == 0
2820 && sf->Bshift % 8 == 0 2919 && sf->Bshift % 8 == 0
2821 && sf->Ashift % 8 == 0 2920 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
2822 && sf->Aloss == 0) 2921 if (SDL_Has3DNow())
2823 { 2922 return BlitRGBtoRGBPixelAlphaMMX3DNOW;
2824 if(SDL_Has3DNow()) 2923 if (SDL_HasMMX())
2825 return BlitRGBtoRGBPixelAlphaMMX3DNOW; 2924 return BlitRGBtoRGBPixelAlphaMMX;
2826 if(SDL_HasMMX()) 2925 }
2827 return BlitRGBtoRGBPixelAlphaMMX;
2828 }
2829 #endif 2926 #endif
2830 if(sf->Amask == 0xff000000) 2927 if (sf->Amask == 0xff000000) {
2831 {
2832 #if SDL_ALTIVEC_BLITTERS 2928 #if SDL_ALTIVEC_BLITTERS
2833 if(!(surface->map->dst->flags & SDL_HWSURFACE) 2929 if (!(surface->map->dst->flags & SDL_HWSURFACE)
2834 && SDL_HasAltiVec()) 2930 && SDL_HasAltiVec())
2835 return BlitRGBtoRGBPixelAlphaAltivec; 2931 return BlitRGBtoRGBPixelAlphaAltivec;
2836 #endif 2932 #endif
2837 return BlitRGBtoRGBPixelAlpha; 2933 return BlitRGBtoRGBPixelAlpha;
2838 } 2934 }
2839 } 2935 }
2840 #if SDL_ALTIVEC_BLITTERS 2936 #if SDL_ALTIVEC_BLITTERS
2841 if (sf->Amask && sf->BytesPerPixel == 4 && 2937 if (sf->Amask && sf->BytesPerPixel == 4 &&
2842 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 2938 !(surface->map->dst->flags & SDL_HWSURFACE)
2843 return Blit32to32PixelAlphaAltivec; 2939 && SDL_HasAltiVec())
2844 else 2940 return Blit32to32PixelAlphaAltivec;
2941 else
2845 #endif 2942 #endif
2846 return BlitNtoNPixelAlpha; 2943 return BlitNtoNPixelAlpha;
2847 2944
2848 case 3: 2945 case 3:
2849 default: 2946 default:
2850 return BlitNtoNPixelAlpha; 2947 return BlitNtoNPixelAlpha;
2851 } 2948 }
2852 } 2949 }
2853 } 2950 }
2854 2951
2952 /* vi: set ts=4 sw=4 expandtab: */