comparison src/video/SDL_blit_A.c @ 1662:782fd950bd46 SDL-1.3

Revamp of the video system in progress - adding support for multiple displays, multiple windows, and a full video mode selection API. WARNING: None of the video drivers have been updated for the new API yet! The API is still under design and very fluid. The code is now run through a consistent indent format: indent -i4 -nut -nsc -br -ce The headers are being converted to automatically generate doxygen documentation.
author Sam Lantinga <slouken@libsdl.org>
date Sun, 28 May 2006 13:04:16 +0000
parents 14717b52abc0
children 4da1ee79c9af
comparison
equal deleted inserted replaced
1661:281d3f4870e5 1662:782fd950bd46
44 #endif 44 #endif
45 45
46 /* Functions to perform alpha blended blitting */ 46 /* Functions to perform alpha blended blitting */
47 47
48 /* N->1 blending with per-surface alpha */ 48 /* N->1 blending with per-surface alpha */
49 static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) 49 static void
50 { 50 BlitNto1SurfaceAlpha (SDL_BlitInfo * info)
51 int width = info->d_width; 51 {
52 int height = info->d_height; 52 int width = info->d_width;
53 Uint8 *src = info->s_pixels; 53 int height = info->d_height;
54 int srcskip = info->s_skip; 54 Uint8 *src = info->s_pixels;
55 Uint8 *dst = info->d_pixels; 55 int srcskip = info->s_skip;
56 int dstskip = info->d_skip; 56 Uint8 *dst = info->d_pixels;
57 Uint8 *palmap = info->table; 57 int dstskip = info->d_skip;
58 SDL_PixelFormat *srcfmt = info->src; 58 Uint8 *palmap = info->table;
59 SDL_PixelFormat *dstfmt = info->dst; 59 SDL_PixelFormat *srcfmt = info->src;
60 int srcbpp = srcfmt->BytesPerPixel; 60 SDL_PixelFormat *dstfmt = info->dst;
61 61 int srcbpp = srcfmt->BytesPerPixel;
62 const unsigned A = srcfmt->alpha; 62
63 63 const unsigned A = srcfmt->alpha;
64 while ( height-- ) { 64
65 while (height--) {
66 /* *INDENT-OFF* */
65 DUFFS_LOOP4( 67 DUFFS_LOOP4(
66 { 68 {
67 Uint32 Pixel; 69 Uint32 Pixel;
68 unsigned sR; 70 unsigned sR;
69 unsigned sG; 71 unsigned sG;
91 } 93 }
92 dst++; 94 dst++;
93 src += srcbpp; 95 src += srcbpp;
94 }, 96 },
95 width); 97 width);
96 src += srcskip; 98 /* *INDENT-ON* */
97 dst += dstskip; 99 src += srcskip;
98 } 100 dst += dstskip;
101 }
99 } 102 }
100 103
101 /* N->1 blending with pixel alpha */ 104 /* N->1 blending with pixel alpha */
102 static void BlitNto1PixelAlpha(SDL_BlitInfo *info) 105 static void
103 { 106 BlitNto1PixelAlpha (SDL_BlitInfo * info)
104 int width = info->d_width; 107 {
105 int height = info->d_height; 108 int width = info->d_width;
106 Uint8 *src = info->s_pixels; 109 int height = info->d_height;
107 int srcskip = info->s_skip; 110 Uint8 *src = info->s_pixels;
108 Uint8 *dst = info->d_pixels; 111 int srcskip = info->s_skip;
109 int dstskip = info->d_skip; 112 Uint8 *dst = info->d_pixels;
110 Uint8 *palmap = info->table; 113 int dstskip = info->d_skip;
111 SDL_PixelFormat *srcfmt = info->src; 114 Uint8 *palmap = info->table;
112 SDL_PixelFormat *dstfmt = info->dst; 115 SDL_PixelFormat *srcfmt = info->src;
113 int srcbpp = srcfmt->BytesPerPixel; 116 SDL_PixelFormat *dstfmt = info->dst;
114 117 int srcbpp = srcfmt->BytesPerPixel;
115 /* FIXME: fix alpha bit field expansion here too? */ 118
116 while ( height-- ) { 119 /* FIXME: fix alpha bit field expansion here too? */
120 while (height--) {
121 /* *INDENT-OFF* */
117 DUFFS_LOOP4( 122 DUFFS_LOOP4(
118 { 123 {
119 Uint32 Pixel; 124 Uint32 Pixel;
120 unsigned sR; 125 unsigned sR;
121 unsigned sG; 126 unsigned sG;
144 } 149 }
145 dst++; 150 dst++;
146 src += srcbpp; 151 src += srcbpp;
147 }, 152 },
148 width); 153 width);
149 src += srcskip; 154 /* *INDENT-ON* */
150 dst += dstskip; 155 src += srcskip;
151 } 156 dst += dstskip;
157 }
152 } 158 }
153 159
154 /* colorkeyed N->1 blending with per-surface alpha */ 160 /* colorkeyed N->1 blending with per-surface alpha */
155 static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) 161 static void
156 { 162 BlitNto1SurfaceAlphaKey (SDL_BlitInfo * info)
157 int width = info->d_width; 163 {
158 int height = info->d_height; 164 int width = info->d_width;
159 Uint8 *src = info->s_pixels; 165 int height = info->d_height;
160 int srcskip = info->s_skip; 166 Uint8 *src = info->s_pixels;
161 Uint8 *dst = info->d_pixels; 167 int srcskip = info->s_skip;
162 int dstskip = info->d_skip; 168 Uint8 *dst = info->d_pixels;
163 Uint8 *palmap = info->table; 169 int dstskip = info->d_skip;
164 SDL_PixelFormat *srcfmt = info->src; 170 Uint8 *palmap = info->table;
165 SDL_PixelFormat *dstfmt = info->dst; 171 SDL_PixelFormat *srcfmt = info->src;
166 int srcbpp = srcfmt->BytesPerPixel; 172 SDL_PixelFormat *dstfmt = info->dst;
167 Uint32 ckey = srcfmt->colorkey; 173 int srcbpp = srcfmt->BytesPerPixel;
168 174 Uint32 ckey = srcfmt->colorkey;
169 const int A = srcfmt->alpha; 175
170 176 const int A = srcfmt->alpha;
171 while ( height-- ) { 177
178 while (height--) {
179 /* *INDENT-OFF* */
172 DUFFS_LOOP( 180 DUFFS_LOOP(
173 { 181 {
174 Uint32 Pixel; 182 Uint32 Pixel;
175 unsigned sR; 183 unsigned sR;
176 unsigned sG; 184 unsigned sG;
200 } 208 }
201 dst++; 209 dst++;
202 src += srcbpp; 210 src += srcbpp;
203 }, 211 },
204 width); 212 width);
205 src += srcskip; 213 /* *INDENT-ON* */
206 dst += dstskip; 214 src += srcskip;
207 } 215 dst += dstskip;
216 }
208 } 217 }
209 218
210 #if GCC_ASMBLIT 219 #if GCC_ASMBLIT
211 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 220 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
212 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) 221 static void
213 { 222 BlitRGBtoRGBSurfaceAlpha128MMX (SDL_BlitInfo * info)
214 int width = info->d_width; 223 {
215 int height = info->d_height; 224 int width = info->d_width;
216 Uint32 *srcp = (Uint32 *)info->s_pixels; 225 int height = info->d_height;
217 int srcskip = info->s_skip >> 2; 226 Uint32 *srcp = (Uint32 *) info->s_pixels;
218 Uint32 *dstp = (Uint32 *)info->d_pixels; 227 int srcskip = info->s_skip >> 2;
219 int dstskip = info->d_skip >> 2; 228 Uint32 *dstp = (Uint32 *) info->d_pixels;
220 Uint32 dalpha = info->dst->Amask; 229 int dstskip = info->d_skip >> 2;
221 Uint8 load[8]; 230 Uint32 dalpha = info->dst->Amask;
222 231 Uint8 load[8];
223 *(Uint64 *)load = 0x00fefefe00fefefeULL;/* alpha128 mask */ 232
224 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ 233 *(Uint64 *) load = 0x00fefefe00fefefeULL; /* alpha128 mask */
225 *(Uint64 *)load = 0x0001010100010101ULL;/* !alpha128 mask */ 234 movq_m2r (*load, mm4); /* alpha128 mask -> mm4 */
226 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ 235 *(Uint64 *) load = 0x0001010100010101ULL; /* !alpha128 mask */
227 movd_m2r(dalpha, mm7); /* dst alpha mask */ 236 movq_m2r (*load, mm3); /* !alpha128 mask -> mm3 */
228 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ 237 movd_m2r (dalpha, mm7); /* dst alpha mask */
229 while(height--) { 238 punpckldq_r2r (mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */
239 while (height--) {
240 /* *INDENT-OFF* */
230 DUFFS_LOOP_DOUBLE2( 241 DUFFS_LOOP_DOUBLE2(
231 { 242 {
232 Uint32 s = *srcp++; 243 Uint32 s = *srcp++;
233 Uint32 d = *dstp; 244 Uint32 d = *dstp;
234 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 245 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
251 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ 262 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */
252 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ 263 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */
253 dstp += 2; 264 dstp += 2;
254 srcp += 2; 265 srcp += 2;
255 }, width); 266 }, width);
256 srcp += srcskip; 267 /* *INDENT-ON* */
257 dstp += dstskip; 268 srcp += srcskip;
258 } 269 dstp += dstskip;
259 emms(); 270 }
271 emms ();
260 } 272 }
261 273
262 /* fast RGB888->(A)RGB888 blending with surface alpha */ 274 /* fast RGB888->(A)RGB888 blending with surface alpha */
263 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) 275 static void
264 { 276 BlitRGBtoRGBSurfaceAlphaMMX (SDL_BlitInfo * info)
265 SDL_PixelFormat* df = info->dst; 277 {
266 unsigned alpha = info->src->alpha; 278 SDL_PixelFormat *df = info->dst;
267 279 unsigned alpha = info->src->alpha;
268 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 280
269 /* only call a128 version when R,G,B occupy lower bits */ 281 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
270 BlitRGBtoRGBSurfaceAlpha128MMX(info); 282 /* only call a128 version when R,G,B occupy lower bits */
271 } else { 283 BlitRGBtoRGBSurfaceAlpha128MMX (info);
272 int width = info->d_width; 284 } else {
273 int height = info->d_height; 285 int width = info->d_width;
274 Uint32 *srcp = (Uint32 *)info->s_pixels; 286 int height = info->d_height;
275 int srcskip = info->s_skip >> 2; 287 Uint32 *srcp = (Uint32 *) info->s_pixels;
276 Uint32 *dstp = (Uint32 *)info->d_pixels; 288 int srcskip = info->s_skip >> 2;
277 int dstskip = info->d_skip >> 2; 289 Uint32 *dstp = (Uint32 *) info->d_pixels;
278 290 int dstskip = info->d_skip >> 2;
279 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ 291
280 /* form the alpha mult */ 292 pxor_r2r (mm5, mm5); /* 0 -> mm5 */
281 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ 293 /* form the alpha mult */
282 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ 294 movd_m2r (alpha, mm4); /* 0000000A -> mm4 */
283 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ 295 punpcklwd_r2r (mm4, mm4); /* 00000A0A -> mm4 */
284 alpha = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); 296 punpckldq_r2r (mm4, mm4); /* 0A0A0A0A -> mm4 */
285 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ 297 alpha =
286 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ 298 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->
287 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ 299 Bshift);
288 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ 300 movd_m2r (alpha, mm0); /* 00000FFF -> mm0 */
289 movd_m2r(df->Amask, mm7); /* dst alpha mask */ 301 punpcklbw_r2r (mm0, mm0); /* 00FFFFFF -> mm0 */
290 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ 302 pand_r2r (mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */
291 303 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */
292 while(height--) { 304 movd_m2r (df->Amask, mm7); /* dst alpha mask */
305 punpckldq_r2r (mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */
306
307 while (height--) {
308 /* *INDENT-OFF* */
293 DUFFS_LOOP_DOUBLE2({ 309 DUFFS_LOOP_DOUBLE2({
294 /* One Pixel Blend */ 310 /* One Pixel Blend */
295 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ 311 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/
296 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ 312 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/
297 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ 313 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */
335 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ 351 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */
336 352
337 srcp += 2; 353 srcp += 2;
338 dstp += 2; 354 dstp += 2;
339 }, width); 355 }, width);
340 srcp += srcskip; 356 /* *INDENT-ON* */
341 dstp += dstskip; 357 srcp += srcskip;
342 } 358 dstp += dstskip;
343 emms(); 359 }
344 } 360 emms ();
361 }
345 } 362 }
346 363
347 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 364 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
348 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) 365 static void
349 { 366 BlitRGBtoRGBPixelAlphaMMX (SDL_BlitInfo * info)
350 int width = info->d_width; 367 {
351 int height = info->d_height; 368 int width = info->d_width;
352 Uint32 *srcp = (Uint32 *)info->s_pixels; 369 int height = info->d_height;
353 int srcskip = info->s_skip >> 2; 370 Uint32 *srcp = (Uint32 *) info->s_pixels;
354 Uint32 *dstp = (Uint32 *)info->d_pixels; 371 int srcskip = info->s_skip >> 2;
355 int dstskip = info->d_skip >> 2; 372 Uint32 *dstp = (Uint32 *) info->d_pixels;
356 SDL_PixelFormat* sf = info->src; 373 int dstskip = info->d_skip >> 2;
357 Uint32 amask = sf->Amask; 374 SDL_PixelFormat *sf = info->src;
358 375 Uint32 amask = sf->Amask;
359 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ 376
360 /* form multiplication mask */ 377 pxor_r2r (mm6, mm6); /* 0 -> mm6 */
361 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ 378 /* form multiplication mask */
362 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ 379 movd_m2r (sf->Amask, mm7); /* 0000F000 -> mm7 */
363 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ 380 punpcklbw_r2r (mm7, mm7); /* FF000000 -> mm7 */
364 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ 381 pcmpeqb_r2r (mm0, mm0); /* FFFFFFFF -> mm0 */
365 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ 382 movq_r2r (mm0, mm3); /* FFFFFFFF -> mm3 (for later) */
366 /* form channel masks */ 383 pxor_r2r (mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */
367 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ 384 /* form channel masks */
368 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ 385 movq_r2r (mm7, mm0); /* 00FFFFFF -> mm0 */
369 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ 386 packsswb_r2r (mm6, mm0); /* 00000FFF -> mm0 (channel mask) */
370 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ 387 packsswb_r2r (mm6, mm3); /* 0000FFFF -> mm3 */
371 /* get alpha channel shift */ 388 pxor_r2r (mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */
372 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */ 389 /* get alpha channel shift */
373 390 movd_m2r (sf->Ashift, mm5); /* Ashift -> mm5 */
374 while(height--) { 391
392 while (height--) {
393 /* *INDENT-OFF* */
375 DUFFS_LOOP4({ 394 DUFFS_LOOP4({
376 Uint32 alpha = *srcp & amask; 395 Uint32 alpha = *srcp & amask;
377 /* FIXME: Here we special-case opaque alpha since the 396 /* FIXME: Here we special-case opaque alpha since the
378 compositioning used (>>8 instead of /255) doesn't handle 397 compositioning used (>>8 instead of /255) doesn't handle
379 it correctly. Also special-case alpha=0 for speed? 398 it correctly. Also special-case alpha=0 for speed?
414 movd_r2m(mm2, *dstp);/* mm2 -> dst */ 433 movd_r2m(mm2, *dstp);/* mm2 -> dst */
415 } 434 }
416 ++srcp; 435 ++srcp;
417 ++dstp; 436 ++dstp;
418 }, width); 437 }, width);
419 srcp += srcskip; 438 /* *INDENT-ON* */
420 dstp += dstskip; 439 srcp += srcskip;
421 } 440 dstp += dstskip;
422 emms(); 441 }
423 } 442 emms ();
443 }
444
424 /* End GCC_ASMBLIT */ 445 /* End GCC_ASMBLIT */
425 446
426 #elif MSVC_ASMBLIT 447 #elif MSVC_ASMBLIT
427 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 448 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
428 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) 449 static void
429 { 450 BlitRGBtoRGBSurfaceAlpha128MMX (SDL_BlitInfo * info)
430 int width = info->d_width; 451 {
431 int height = info->d_height; 452 int width = info->d_width;
432 Uint32 *srcp = (Uint32 *)info->s_pixels; 453 int height = info->d_height;
433 int srcskip = info->s_skip >> 2; 454 Uint32 *srcp = (Uint32 *) info->s_pixels;
434 Uint32 *dstp = (Uint32 *)info->d_pixels; 455 int srcskip = info->s_skip >> 2;
435 int dstskip = info->d_skip >> 2; 456 Uint32 *dstp = (Uint32 *) info->d_pixels;
436 Uint32 dalpha = info->dst->Amask; 457 int dstskip = info->d_skip >> 2;
437 458 Uint32 dalpha = info->dst->Amask;
438 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; 459
439 460 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
440 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ 461
441 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ 462 hmask = _mm_set_pi32 (0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */
442 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ 463 lmask = _mm_set_pi32 (0x00010101, 0x00010101); /* !alpha128 mask -> lmask */
443 464 dsta = _mm_set_pi32 (dalpha, dalpha); /* dst alpha mask -> dsta */
444 while (height--) { 465
445 int n = width; 466 while (height--) {
446 if ( n & 1 ) { 467 int n = width;
447 Uint32 s = *srcp++; 468 if (n & 1) {
448 Uint32 d = *dstp; 469 Uint32 s = *srcp++;
449 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 470 Uint32 d = *dstp;
450 + (s & d & 0x00010101)) | dalpha; 471 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
451 n--; 472 + (s & d & 0x00010101)) | dalpha;
452 } 473 n--;
453 474 }
454 for (n >>= 1; n > 0; --n) { 475
455 dst1 = *(__m64*)dstp; /* 2 x dst -> dst1(ARGBARGB) */ 476 for (n >>= 1; n > 0; --n) {
456 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 477 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
457 478 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
458 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB) */ 479
459 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 480 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
460 481 src2 = src1; /* 2 x src -> src2(ARGBARGB) */
461 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ 482
462 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ 483 dst2 = _mm_and_si64 (dst2, hmask); /* dst & mask -> dst2 */
463 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ 484 src2 = _mm_and_si64 (src2, hmask); /* src & mask -> src2 */
464 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ 485 src2 = _mm_add_pi32 (src2, dst2); /* dst2 + src2 -> src2 */
465 486 src2 = _mm_srli_pi32 (src2, 1); /* src2 >> 1 -> src2 */
466 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ 487
467 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ 488 dst1 = _mm_and_si64 (dst1, src1); /* src & dst -> dst1 */
468 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ 489 dst1 = _mm_and_si64 (dst1, lmask); /* dst1 & !mask -> dst1 */
469 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ 490 dst1 = _mm_add_pi32 (dst1, src2); /* src2 + dst1 -> dst1 */
470 491 dst1 = _mm_or_si64 (dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */
471 *(__m64*)dstp = dst1; /* dst1 -> 2 x dst pixels */ 492
472 dstp += 2; 493 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */
473 srcp += 2; 494 dstp += 2;
474 } 495 srcp += 2;
475 496 }
476 srcp += srcskip; 497
477 dstp += dstskip; 498 srcp += srcskip;
478 } 499 dstp += dstskip;
479 _mm_empty(); 500 }
501 _mm_empty ();
480 } 502 }
481 503
482 /* fast RGB888->(A)RGB888 blending with surface alpha */ 504 /* fast RGB888->(A)RGB888 blending with surface alpha */
483 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) 505 static void
484 { 506 BlitRGBtoRGBSurfaceAlphaMMX (SDL_BlitInfo * info)
485 SDL_PixelFormat* df = info->dst; 507 {
486 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; 508 SDL_PixelFormat *df = info->dst;
487 unsigned alpha = info->src->alpha; 509 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask;
488 510 unsigned alpha = info->src->alpha;
489 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 511
490 /* only call a128 version when R,G,B occupy lower bits */ 512 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
491 BlitRGBtoRGBSurfaceAlpha128MMX(info); 513 /* only call a128 version when R,G,B occupy lower bits */
492 } else { 514 BlitRGBtoRGBSurfaceAlpha128MMX (info);
493 int width = info->d_width; 515 } else {
494 int height = info->d_height; 516 int width = info->d_width;
495 Uint32 *srcp = (Uint32 *)info->s_pixels; 517 int height = info->d_height;
496 int srcskip = info->s_skip >> 2; 518 Uint32 *srcp = (Uint32 *) info->s_pixels;
497 Uint32 *dstp = (Uint32 *)info->d_pixels; 519 int srcskip = info->s_skip >> 2;
498 int dstskip = info->d_skip >> 2; 520 Uint32 *dstp = (Uint32 *) info->d_pixels;
499 Uint32 dalpha = df->Amask; 521 int dstskip = info->d_skip >> 2;
500 Uint32 amult; 522 Uint32 dalpha = df->Amask;
501 523 Uint32 amult;
502 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; 524
503 525 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
504 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 526
505 /* form the alpha mult */ 527 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */
506 amult = alpha | (alpha << 8); 528 /* form the alpha mult */
507 amult = amult | (amult << 16); 529 amult = alpha | (alpha << 8);
508 chanmask = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); 530 amult = amult | (amult << 16);
509 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ 531 chanmask =
510 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ 532 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->
511 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ 533 Bshift);
512 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ 534 mm_alpha = _mm_set_pi32 (0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */
513 535 mm_alpha = _mm_unpacklo_pi8 (mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */
514 while (height--) { 536 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */
515 int n = width; 537 dsta = _mm_set_pi32 (dalpha, dalpha); /* dst alpha mask -> dsta */
516 if (n & 1) { 538
517 /* One Pixel Blend */ 539 while (height--) {
518 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB)*/ 540 int n = width;
519 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ 541 if (n & 1) {
520 542 /* One Pixel Blend */
521 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ 543 src2 = _mm_cvtsi32_si64 (*srcp); /* src(ARGB) -> src2 (0000ARGB) */
522 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 544 src2 = _mm_unpacklo_pi8 (src2, mm_zero); /* 0A0R0G0B -> src2 */
523 545
524 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ 546 dst1 = _mm_cvtsi32_si64 (*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
525 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 547 dst1 = _mm_unpacklo_pi8 (dst1, mm_zero); /* 0A0R0G0B -> dst1 */
526 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ 548
527 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ 549 src2 = _mm_sub_pi16 (src2, dst1); /* src2 - dst2 -> src2 */
528 550 src2 = _mm_mullo_pi16 (src2, mm_alpha); /* src2 * alpha -> src2 */
529 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ 551 src2 = _mm_srli_pi16 (src2, 8); /* src2 >> 8 -> src2 */
530 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ 552 dst1 = _mm_add_pi8 (src2, dst1); /* src2 + dst1 -> dst1 */
531 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 553
532 554 dst1 = _mm_packs_pu16 (dst1, mm_zero); /* 0000ARGB -> dst1 */
533 ++srcp; 555 dst1 = _mm_or_si64 (dst1, dsta); /* dsta | dst1 -> dst1 */
534 ++dstp; 556 *dstp = _mm_cvtsi64_si32 (dst1); /* dst1 -> pixel */
535 557
536 n--; 558 ++srcp;
537 } 559 ++dstp;
538 560
539 for (n >>= 1; n > 0; --n) { 561 n--;
540 /* Two Pixels Blend */ 562 }
541 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB)*/ 563
542 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 564 for (n >>= 1; n > 0; --n) {
543 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ 565 /* Two Pixels Blend */
544 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ 566 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
545 567 src2 = src1; /* 2 x src -> src2(ARGBARGB) */
546 dst1 = *(__m64*)dstp;/* 2 x dst -> dst1(ARGBARGB) */ 568 src1 = _mm_unpacklo_pi8 (src1, mm_zero); /* low - 0A0R0G0B -> src1 */
547 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 569 src2 = _mm_unpackhi_pi8 (src2, mm_zero); /* high - 0A0R0G0B -> src2 */
548 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ 570
549 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ 571 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
550 572 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
551 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ 573 dst1 = _mm_unpacklo_pi8 (dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */
552 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ 574 dst2 = _mm_unpackhi_pi8 (dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */
553 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ 575
554 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ 576 src1 = _mm_sub_pi16 (src1, dst1); /* src1 - dst1 -> src1 */
555 577 src1 = _mm_mullo_pi16 (src1, mm_alpha); /* src1 * alpha -> src1 */
556 src2 = _mm_sub_pi16(src2, dst2);/* src2 - dst2 -> src2 */ 578 src1 = _mm_srli_pi16 (src1, 8); /* src1 >> 8 -> src1 */
557 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 579 dst1 = _mm_add_pi8 (src1, dst1); /* src1 + dst1(dst1) -> dst1 */
558 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ 580
559 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ 581 src2 = _mm_sub_pi16 (src2, dst2); /* src2 - dst2 -> src2 */
560 582 src2 = _mm_mullo_pi16 (src2, mm_alpha); /* src2 * alpha -> src2 */
561 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ 583 src2 = _mm_srli_pi16 (src2, 8); /* src2 >> 8 -> src2 */
562 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ 584 dst2 = _mm_add_pi8 (src2, dst2); /* src2 + dst2(dst2) -> dst2 */
563 585
564 *(__m64*)dstp = dst1; /* dst1 -> 2 x pixel */ 586 dst1 = _mm_packs_pu16 (dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */
565 587 dst1 = _mm_or_si64 (dst1, dsta); /* dsta | dst1 -> dst1 */
566 srcp += 2; 588
567 dstp += 2; 589 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */
568 } 590
569 srcp += srcskip; 591 srcp += 2;
570 dstp += dstskip; 592 dstp += 2;
571 } 593 }
572 _mm_empty(); 594 srcp += srcskip;
573 } 595 dstp += dstskip;
596 }
597 _mm_empty ();
598 }
574 } 599 }
575 600
576 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 601 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
577 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) 602 static void
578 { 603 BlitRGBtoRGBPixelAlphaMMX (SDL_BlitInfo * info)
579 int width = info->d_width; 604 {
580 int height = info->d_height; 605 int width = info->d_width;
581 Uint32 *srcp = (Uint32 *)info->s_pixels; 606 int height = info->d_height;
582 int srcskip = info->s_skip >> 2; 607 Uint32 *srcp = (Uint32 *) info->s_pixels;
583 Uint32 *dstp = (Uint32 *)info->d_pixels; 608 int srcskip = info->s_skip >> 2;
584 int dstskip = info->d_skip >> 2; 609 Uint32 *dstp = (Uint32 *) info->d_pixels;
585 SDL_PixelFormat* sf = info->src; 610 int dstskip = info->d_skip >> 2;
586 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; 611 SDL_PixelFormat *sf = info->src;
587 Uint32 amask = sf->Amask; 612 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
588 Uint32 ashift = sf->Ashift; 613 Uint32 amask = sf->Amask;
589 Uint64 multmask; 614 Uint32 ashift = sf->Ashift;
590 615 Uint64 multmask;
591 __m64 src1, dst1, mm_alpha, mm_zero, dmask; 616
592 617 __m64 src1, dst1, mm_alpha, mm_zero, dmask;
593 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 618
594 multmask = ~(0xFFFFi64 << (ashift * 2)); 619 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */
595 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ 620 multmask = ~(0xFFFFi 64 << (ashift * 2));
596 621 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */
597 while(height--) { 622
623 while (height--) {
624 /* *INDENT-OFF* */
598 DUFFS_LOOP4({ 625 DUFFS_LOOP4({
599 Uint32 alpha = *srcp & amask; 626 Uint32 alpha = *srcp & amask;
600 if (alpha == 0) { 627 if (alpha == 0) {
601 /* do nothing */ 628 /* do nothing */
602 } else if (alpha == amask) { 629 } else if (alpha == amask) {
625 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 652 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
626 } 653 }
627 ++srcp; 654 ++srcp;
628 ++dstp; 655 ++dstp;
629 }, width); 656 }, width);
630 srcp += srcskip; 657 /* *INDENT-ON* */
631 dstp += dstskip; 658 srcp += srcskip;
632 } 659 dstp += dstskip;
633 _mm_empty(); 660 }
634 } 661 _mm_empty ();
662 }
663
635 /* End MSVC_ASMBLIT */ 664 /* End MSVC_ASMBLIT */
636 665
637 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 666 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
638 667
639 #if SDL_ALTIVEC_BLITTERS 668 #if SDL_ALTIVEC_BLITTERS
644 #include <altivec.h> 673 #include <altivec.h>
645 #endif 674 #endif
646 #include <assert.h> 675 #include <assert.h>
647 676
648 #if (defined(__MACOSX__) && (__GNUC__ < 4)) 677 #if (defined(__MACOSX__) && (__GNUC__ < 4))
649 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ 678 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
650 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) 679 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
651 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ 680 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
652 (vector unsigned short) ( a,b,c,d,e,f,g,h ) 681 (vector unsigned short) ( a,b,c,d,e,f,g,h )
653 #else 682 #else
654 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ 683 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
655 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } 684 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
656 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ 685 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
657 (vector unsigned short) { a,b,c,d,e,f,g,h } 686 (vector unsigned short) { a,b,c,d,e,f,g,h }
658 #endif 687 #endif
659 688
660 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) 689 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
661 #define VECPRINT(msg, v) do { \ 690 #define VECPRINT(msg, v) do { \
676 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) 705 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
677 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ 706 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
678 ? vec_lvsl(0, src) \ 707 ? vec_lvsl(0, src) \
679 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) 708 : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
680 709
681 710
682 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ 711 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
683 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ 712 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \
684 vector unsigned short vtemp1 = vec_mule(vs, valpha); \ 713 vector unsigned short vtemp1 = vec_mule(vs, valpha); \
685 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ 714 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \
686 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ 715 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
702 vtemp4 = vec_sr(vtemp2, v8_16); \ 731 vtemp4 = vec_sr(vtemp2, v8_16); \
703 vtemp2 = vec_add(vtemp2, vtemp4); \ 732 vtemp2 = vec_add(vtemp2, vtemp4); \
704 /* (>>8) and get ARGBARGBARGBARGB */ \ 733 /* (>>8) and get ARGBARGBARGBARGB */ \
705 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ 734 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
706 } while (0) 735 } while (0)
707 736
708 /* Calculate the permute vector used for 32->32 swizzling */ 737 /* Calculate the permute vector used for 32->32 swizzling */
709 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, 738 static vector unsigned char
710 const SDL_PixelFormat *dstfmt) 739 calc_swizzle32 (const SDL_PixelFormat * srcfmt,
740 const SDL_PixelFormat * dstfmt)
711 { 741 {
712 /* 742 /*
713 * We have to assume that the bits that aren't used by other 743 * We have to assume that the bits that aren't used by other
714 * colors is alpha, and it's one complete byte, since some formats 744 * colors is alpha, and it's one complete byte, since some formats
715 * leave alpha with a zero mask, but we should still swizzle the bits. 745 * leave alpha with a zero mask, but we should still swizzle the bits.
718 const static struct SDL_PixelFormat default_pixel_format = { 748 const static struct SDL_PixelFormat default_pixel_format = {
719 NULL, 0, 0, 749 NULL, 0, 0,
720 0, 0, 0, 0, 750 0, 0, 0, 0,
721 16, 8, 0, 24, 751 16, 8, 0, 24,
722 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, 752 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
723 0, 0}; 753 0, 0
754 };
724 if (!srcfmt) { 755 if (!srcfmt) {
725 srcfmt = &default_pixel_format; 756 srcfmt = &default_pixel_format;
726 } 757 }
727 if (!dstfmt) { 758 if (!dstfmt) {
728 dstfmt = &default_pixel_format; 759 dstfmt = &default_pixel_format;
729 } 760 }
730 const vector unsigned char plus = VECUINT8_LITERAL 761 const vector unsigned char plus = VECUINT8_LITERAL
731 ( 0x00, 0x00, 0x00, 0x00, 762 (0x00, 0x00, 0x00, 0x00,
732 0x04, 0x04, 0x04, 0x04, 763 0x04, 0x04, 0x04, 0x04,
733 0x08, 0x08, 0x08, 0x08, 764 0x08, 0x08, 0x08, 0x08,
734 0x0C, 0x0C, 0x0C, 0x0C ); 765 0x0C, 0x0C, 0x0C, 0x0C);
735 vector unsigned char vswiz; 766 vector unsigned char vswiz;
736 vector unsigned int srcvec; 767 vector unsigned int srcvec;
737 #define RESHIFT(X) (3 - ((X) >> 3)) 768 #define RESHIFT(X) (3 - ((X) >> 3))
738 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); 769 Uint32 rmask = RESHIFT (srcfmt->Rshift) << (dstfmt->Rshift);
739 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); 770 Uint32 gmask = RESHIFT (srcfmt->Gshift) << (dstfmt->Gshift);
740 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); 771 Uint32 bmask = RESHIFT (srcfmt->Bshift) << (dstfmt->Bshift);
741 Uint32 amask; 772 Uint32 amask;
742 /* Use zero for alpha if either surface doesn't have alpha */ 773 /* Use zero for alpha if either surface doesn't have alpha */
743 if (dstfmt->Amask) { 774 if (dstfmt->Amask) {
744 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); 775 amask =
776 ((srcfmt->Amask) ? RESHIFT (srcfmt->Ashift) : 0x10) << (dstfmt->
777 Ashift);
745 } else { 778 } else {
746 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); 779 amask =
747 } 780 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
748 #undef RESHIFT 781 0xFFFFFFFF);
749 ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); 782 }
750 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); 783 #undef RESHIFT
751 return(vswiz); 784 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
752 } 785 vswiz = vec_add (plus, (vector unsigned char) vec_splat (srcvec, 0));
753 786 return (vswiz);
754 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) 787 }
788
789 static void
790 Blit32to565PixelAlphaAltivec (SDL_BlitInfo * info)
755 { 791 {
756 int height = info->d_height; 792 int height = info->d_height;
757 Uint8 *src = (Uint8 *)info->s_pixels; 793 Uint8 *src = (Uint8 *) info->s_pixels;
758 int srcskip = info->s_skip; 794 int srcskip = info->s_skip;
759 Uint8 *dst = (Uint8 *)info->d_pixels; 795 Uint8 *dst = (Uint8 *) info->d_pixels;
760 int dstskip = info->d_skip; 796 int dstskip = info->d_skip;
761 SDL_PixelFormat *srcfmt = info->src; 797 SDL_PixelFormat *srcfmt = info->src;
762 798
763 vector unsigned char v0 = vec_splat_u8(0); 799 vector unsigned char v0 = vec_splat_u8 (0);
764 vector unsigned short v8_16 = vec_splat_u16(8); 800 vector unsigned short v8_16 = vec_splat_u16 (8);
765 vector unsigned short v1_16 = vec_splat_u16(1); 801 vector unsigned short v1_16 = vec_splat_u16 (1);
766 vector unsigned short v2_16 = vec_splat_u16(2); 802 vector unsigned short v2_16 = vec_splat_u16 (2);
767 vector unsigned short v3_16 = vec_splat_u16(3); 803 vector unsigned short v3_16 = vec_splat_u16 (3);
768 vector unsigned int v8_32 = vec_splat_u32(8); 804 vector unsigned int v8_32 = vec_splat_u32 (8);
769 vector unsigned int v16_32 = vec_add(v8_32, v8_32); 805 vector unsigned int v16_32 = vec_add (v8_32, v8_32);
770 vector unsigned short v3f = VECUINT16_LITERAL( 806 vector unsigned short v3f =
771 0x003f, 0x003f, 0x003f, 0x003f, 807 VECUINT16_LITERAL (0x003f, 0x003f, 0x003f, 0x003f,
772 0x003f, 0x003f, 0x003f, 0x003f); 808 0x003f, 0x003f, 0x003f, 0x003f);
773 vector unsigned short vfc = VECUINT16_LITERAL( 809 vector unsigned short vfc =
774 0x00fc, 0x00fc, 0x00fc, 0x00fc, 810 VECUINT16_LITERAL (0x00fc, 0x00fc, 0x00fc, 0x00fc,
775 0x00fc, 0x00fc, 0x00fc, 0x00fc); 811 0x00fc, 0x00fc, 0x00fc, 0x00fc);
776 812
777 /* 813 /*
778 0x10 - 0x1f is the alpha 814 0x10 - 0x1f is the alpha
779 0x00 - 0x0e evens are the red 815 0x00 - 0x0e evens are the red
780 0x01 - 0x0f odds are zero 816 0x01 - 0x0f odds are zero
781 */ 817 */
782 vector unsigned char vredalpha1 = VECUINT8_LITERAL( 818 vector unsigned char vredalpha1 =
783 0x10, 0x00, 0x01, 0x01, 819 VECUINT8_LITERAL (0x10, 0x00, 0x01, 0x01,
784 0x10, 0x02, 0x01, 0x01, 820 0x10, 0x02, 0x01, 0x01,
785 0x10, 0x04, 0x01, 0x01, 821 0x10, 0x04, 0x01, 0x01,
786 0x10, 0x06, 0x01, 0x01 822 0x10, 0x06, 0x01, 0x01);
787 ); 823 vector unsigned char vredalpha2 =
788 vector unsigned char vredalpha2 = (vector unsigned char)( 824 (vector unsigned char) (vec_add ((vector unsigned int) vredalpha1,
789 vec_add((vector unsigned int)vredalpha1, vec_sl(v8_32, v16_32)) 825 vec_sl (v8_32, v16_32))
790 ); 826 );
791 /* 827 /*
792 0x00 - 0x0f is ARxx ARxx ARxx ARxx 828 0x00 - 0x0f is ARxx ARxx ARxx ARxx
793 0x11 - 0x0f odds are blue 829 0x11 - 0x0f odds are blue
794 */ 830 */
795 vector unsigned char vblue1 = VECUINT8_LITERAL( 831 vector unsigned char vblue1 = VECUINT8_LITERAL (0x00, 0x01, 0x02, 0x11,
796 0x00, 0x01, 0x02, 0x11, 832 0x04, 0x05, 0x06, 0x13,
797 0x04, 0x05, 0x06, 0x13, 833 0x08, 0x09, 0x0a, 0x15,
798 0x08, 0x09, 0x0a, 0x15, 834 0x0c, 0x0d, 0x0e, 0x17);
799 0x0c, 0x0d, 0x0e, 0x17 835 vector unsigned char vblue2 =
800 ); 836 (vector unsigned char) (vec_add ((vector unsigned int) vblue1, v8_32)
801 vector unsigned char vblue2 = (vector unsigned char)( 837 );
802 vec_add((vector unsigned int)vblue1, v8_32)
803 );
804 /* 838 /*
805 0x00 - 0x0f is ARxB ARxB ARxB ARxB 839 0x00 - 0x0f is ARxB ARxB ARxB ARxB
806 0x10 - 0x0e evens are green 840 0x10 - 0x0e evens are green
807 */ 841 */
808 vector unsigned char vgreen1 = VECUINT8_LITERAL( 842 vector unsigned char vgreen1 = VECUINT8_LITERAL (0x00, 0x01, 0x10, 0x03,
809 0x00, 0x01, 0x10, 0x03, 843 0x04, 0x05, 0x12, 0x07,
810 0x04, 0x05, 0x12, 0x07, 844 0x08, 0x09, 0x14, 0x0b,
811 0x08, 0x09, 0x14, 0x0b, 845 0x0c, 0x0d, 0x16, 0x0f);
812 0x0c, 0x0d, 0x16, 0x0f 846 vector unsigned char vgreen2 =
813 ); 847 (vector unsigned
814 vector unsigned char vgreen2 = (vector unsigned char)( 848 char) (vec_add ((vector unsigned int) vgreen1, vec_sl (v8_32, v8_32))
815 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) 849 );
816 ); 850 vector unsigned char vgmerge = VECUINT8_LITERAL (0x00, 0x02, 0x00, 0x06,
817 vector unsigned char vgmerge = VECUINT8_LITERAL( 851 0x00, 0x0a, 0x00, 0x0e,
818 0x00, 0x02, 0x00, 0x06, 852 0x00, 0x12, 0x00, 0x16,
819 0x00, 0x0a, 0x00, 0x0e, 853 0x00, 0x1a, 0x00, 0x1e);
820 0x00, 0x12, 0x00, 0x16, 854 vector unsigned char mergePermute = VEC_MERGE_PERMUTE ();
821 0x00, 0x1a, 0x00, 0x1e); 855 vector unsigned char vpermute = calc_swizzle32 (srcfmt, NULL);
822 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); 856 vector unsigned char valphaPermute =
823 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); 857 vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC));
824 vector unsigned char valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 858
825 859 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8 (-7);
826 vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); 860 vf800 = vec_sl (vf800, vec_splat_u16 (8));
827 vf800 = vec_sl(vf800, vec_splat_u16(8)); 861
828 862 while (height--) {
829 while(height--) {
830 int extrawidth; 863 int extrawidth;
831 vector unsigned char valigner; 864 vector unsigned char valigner;
832 vector unsigned char vsrc; 865 vector unsigned char vsrc;
833 vector unsigned char voverflow; 866 vector unsigned char voverflow;
834 int width = info->d_width; 867 int width = info->d_width;
850 } \ 883 } \
851 src += 4; \ 884 src += 4; \
852 dst += 2; \ 885 dst += 2; \
853 widthvar--; \ 886 widthvar--; \
854 } 887 }
855 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); 888 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dst)) && (width), width);
856 extrawidth = (width % 8); 889 extrawidth = (width % 8);
857 valigner = VEC_ALIGNER(src); 890 valigner = VEC_ALIGNER (src);
858 vsrc = (vector unsigned char)vec_ld(0, src); 891 vsrc = (vector unsigned char) vec_ld (0, src);
859 width -= extrawidth; 892 width -= extrawidth;
860 while (width) { 893 while (width) {
861 vector unsigned char valpha; 894 vector unsigned char valpha;
862 vector unsigned char vsrc1, vsrc2; 895 vector unsigned char vsrc1, vsrc2;
863 vector unsigned char vdst1, vdst2; 896 vector unsigned char vdst1, vdst2;
864 vector unsigned short vR, vG, vB; 897 vector unsigned short vR, vG, vB;
865 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; 898 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
866 899
867 /* Load 8 pixels from src as ARGB */ 900 /* Load 8 pixels from src as ARGB */
868 voverflow = (vector unsigned char)vec_ld(15, src); 901 voverflow = (vector unsigned char) vec_ld (15, src);
869 vsrc = vec_perm(vsrc, voverflow, valigner); 902 vsrc = vec_perm (vsrc, voverflow, valigner);
870 vsrc1 = vec_perm(vsrc, vsrc, vpermute); 903 vsrc1 = vec_perm (vsrc, vsrc, vpermute);
871 src += 16; 904 src += 16;
872 vsrc = (vector unsigned char)vec_ld(15, src); 905 vsrc = (vector unsigned char) vec_ld (15, src);
873 voverflow = vec_perm(voverflow, vsrc, valigner); 906 voverflow = vec_perm (voverflow, vsrc, valigner);
874 vsrc2 = vec_perm(voverflow, voverflow, vpermute); 907 vsrc2 = vec_perm (voverflow, voverflow, vpermute);
875 src += 16; 908 src += 16;
876 909
877 /* Load 8 pixels from dst as XRGB */ 910 /* Load 8 pixels from dst as XRGB */
878 voverflow = vec_ld(0, dst); 911 voverflow = vec_ld (0, dst);
879 vR = vec_and((vector unsigned short)voverflow, vf800); 912 vR = vec_and ((vector unsigned short) voverflow, vf800);
880 vB = vec_sl((vector unsigned short)voverflow, v3_16); 913 vB = vec_sl ((vector unsigned short) voverflow, v3_16);
881 vG = vec_sl(vB, v2_16); 914 vG = vec_sl (vB, v2_16);
882 vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha1); 915 vdst1 =
883 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); 916 (vector unsigned char) vec_perm ((vector unsigned char) vR,
884 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); 917 (vector unsigned char) vR,
885 vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha2); 918 vredalpha1);
886 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); 919 vdst1 = vec_perm (vdst1, (vector unsigned char) vB, vblue1);
887 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); 920 vdst1 = vec_perm (vdst1, (vector unsigned char) vG, vgreen1);
921 vdst2 =
922 (vector unsigned char) vec_perm ((vector unsigned char) vR,
923 (vector unsigned char) vR,
924 vredalpha2);
925 vdst2 = vec_perm (vdst2, (vector unsigned char) vB, vblue2);
926 vdst2 = vec_perm (vdst2, (vector unsigned char) vG, vgreen2);
888 927
889 /* Alpha blend 8 pixels as ARGB */ 928 /* Alpha blend 8 pixels as ARGB */
890 valpha = vec_perm(vsrc1, v0, valphaPermute); 929 valpha = vec_perm (vsrc1, v0, valphaPermute);
891 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, v8_16); 930 VEC_MULTIPLY_ALPHA (vsrc1, vdst1, valpha, mergePermute, v1_16,
892 valpha = vec_perm(vsrc2, v0, valphaPermute); 931 v8_16);
893 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, v8_16); 932 valpha = vec_perm (vsrc2, v0, valphaPermute);
933 VEC_MULTIPLY_ALPHA (vsrc2, vdst2, valpha, mergePermute, v1_16,
934 v8_16);
894 935
895 /* Convert 8 pixels to 565 */ 936 /* Convert 8 pixels to 565 */
896 vpixel = (vector unsigned short)vec_packpx((vector unsigned int)vdst1, (vector unsigned int)vdst2); 937 vpixel = (vector unsigned short) vec_packpx ((vector unsigned int)
897 vgpixel = (vector unsigned short)vec_perm(vdst1, vdst2, vgmerge); 938 vdst1,
898 vgpixel = vec_and(vgpixel, vfc); 939 (vector unsigned int)
899 vgpixel = vec_sl(vgpixel, v3_16); 940 vdst2);
900 vrpixel = vec_sl(vpixel, v1_16); 941 vgpixel =
901 vrpixel = vec_and(vrpixel, vf800); 942 (vector unsigned short) vec_perm (vdst1, vdst2, vgmerge);
902 vbpixel = vec_and(vpixel, v3f); 943 vgpixel = vec_and (vgpixel, vfc);
903 vdst1 = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); 944 vgpixel = vec_sl (vgpixel, v3_16);
904 vdst1 = vec_or(vdst1, (vector unsigned char)vbpixel); 945 vrpixel = vec_sl (vpixel, v1_16);
905 946 vrpixel = vec_and (vrpixel, vf800);
947 vbpixel = vec_and (vpixel, v3f);
948 vdst1 =
949 vec_or ((vector unsigned char) vrpixel,
950 (vector unsigned char) vgpixel);
951 vdst1 = vec_or (vdst1, (vector unsigned char) vbpixel);
952
906 /* Store 8 pixels */ 953 /* Store 8 pixels */
907 vec_st(vdst1, 0, dst); 954 vec_st (vdst1, 0, dst);
908 955
909 width -= 8; 956 width -= 8;
910 dst += 16; 957 dst += 16;
911 } 958 }
912 ONE_PIXEL_BLEND((extrawidth), extrawidth); 959 ONE_PIXEL_BLEND ((extrawidth), extrawidth);
913 #undef ONE_PIXEL_BLEND 960 #undef ONE_PIXEL_BLEND
914 src += srcskip; 961 src += srcskip;
915 dst += dstskip; 962 dst += dstskip;
916 } 963 }
917 } 964 }
918 965
919 static void Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo *info) 966 static void
967 Blit32to32SurfaceAlphaKeyAltivec (SDL_BlitInfo * info)
920 { 968 {
921 unsigned alpha = info->src->alpha; 969 unsigned alpha = info->src->alpha;
922 int height = info->d_height; 970 int height = info->d_height;
923 Uint32 *srcp = (Uint32 *)info->s_pixels; 971 Uint32 *srcp = (Uint32 *) info->s_pixels;
924 int srcskip = info->s_skip >> 2; 972 int srcskip = info->s_skip >> 2;
925 Uint32 *dstp = (Uint32 *)info->d_pixels; 973 Uint32 *dstp = (Uint32 *) info->d_pixels;
926 int dstskip = info->d_skip >> 2; 974 int dstskip = info->d_skip >> 2;
927 SDL_PixelFormat *srcfmt = info->src; 975 SDL_PixelFormat *srcfmt = info->src;
928 SDL_PixelFormat *dstfmt = info->dst; 976 SDL_PixelFormat *dstfmt = info->dst;
929 unsigned sA = srcfmt->alpha; 977 unsigned sA = srcfmt->alpha;
930 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 978 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
941 vector unsigned short v1; 989 vector unsigned short v1;
942 vector unsigned short v8; 990 vector unsigned short v8;
943 vector unsigned int vckey; 991 vector unsigned int vckey;
944 vector unsigned int vrgbmask; 992 vector unsigned int vrgbmask;
945 993
946 mergePermute = VEC_MERGE_PERMUTE(); 994 mergePermute = VEC_MERGE_PERMUTE ();
947 v0 = vec_splat_u8(0); 995 v0 = vec_splat_u8 (0);
948 v1 = vec_splat_u16(1); 996 v1 = vec_splat_u16 (1);
949 v8 = vec_splat_u16(8); 997 v8 = vec_splat_u16 (8);
950 998
951 /* set the alpha to 255 on the destination surf */ 999 /* set the alpha to 255 on the destination surf */
952 valphamask = VEC_ALPHA_MASK(); 1000 valphamask = VEC_ALPHA_MASK ();
953 1001
954 vsrcPermute = calc_swizzle32(srcfmt, NULL); 1002 vsrcPermute = calc_swizzle32 (srcfmt, NULL);
955 vdstPermute = calc_swizzle32(NULL, dstfmt); 1003 vdstPermute = calc_swizzle32 (NULL, dstfmt);
956 vsdstPermute = calc_swizzle32(dstfmt, NULL); 1004 vsdstPermute = calc_swizzle32 (dstfmt, NULL);
957 1005
958 /* set a vector full of alpha and 255-alpha */ 1006 /* set a vector full of alpha and 255-alpha */
959 ((unsigned char *)&valpha)[0] = alpha; 1007 ((unsigned char *) &valpha)[0] = alpha;
960 valpha = vec_splat(valpha, 0); 1008 valpha = vec_splat (valpha, 0);
961 vbits = (vector unsigned char)vec_splat_s8(-1); 1009 vbits = (vector unsigned char) vec_splat_s8 (-1);
962 1010
963 ckey &= rgbmask; 1011 ckey &= rgbmask;
964 ((unsigned int *)(char*)&vckey)[0] = ckey; 1012 ((unsigned int *) (char *) &vckey)[0] = ckey;
965 vckey = vec_splat(vckey, 0); 1013 vckey = vec_splat (vckey, 0);
966 ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; 1014 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
967 vrgbmask = vec_splat(vrgbmask, 0); 1015 vrgbmask = vec_splat (vrgbmask, 0);
968 1016
969 while(height--) { 1017 while (height--) {
970 int width = info->d_width; 1018 int width = info->d_width;
971 #define ONE_PIXEL_BLEND(condition, widthvar) \ 1019 #define ONE_PIXEL_BLEND(condition, widthvar) \
972 while (condition) { \ 1020 while (condition) { \
973 Uint32 Pixel; \ 1021 Uint32 Pixel; \
974 unsigned sR, sG, sB, dR, dG, dB; \ 1022 unsigned sR, sG, sB, dR, dG, dB; \
981 } \ 1029 } \
982 dstp++; \ 1030 dstp++; \
983 srcp++; \ 1031 srcp++; \
984 widthvar--; \ 1032 widthvar--; \
985 } 1033 }
986 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1034 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width);
987 if (width > 0) { 1035 if (width > 0) {
988 int extrawidth = (width % 4); 1036 int extrawidth = (width % 4);
989 vector unsigned char valigner = VEC_ALIGNER(srcp); 1037 vector unsigned char valigner = VEC_ALIGNER (srcp);
990 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1038 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp);
991 width -= extrawidth; 1039 width -= extrawidth;
992 while (width) { 1040 while (width) {
993 vector unsigned char vsel; 1041 vector unsigned char vsel;
994 vector unsigned char voverflow; 1042 vector unsigned char voverflow;
995 vector unsigned char vd; 1043 vector unsigned char vd;
996 vector unsigned char vd_orig; 1044 vector unsigned char vd_orig;
997 1045
998 /* s = *srcp */ 1046 /* s = *srcp */
999 voverflow = (vector unsigned char)vec_ld(15, srcp); 1047 voverflow = (vector unsigned char) vec_ld (15, srcp);
1000 vs = vec_perm(vs, voverflow, valigner); 1048 vs = vec_perm (vs, voverflow, valigner);
1001 1049
1002 /* vsel is set for items that match the key */ 1050 /* vsel is set for items that match the key */
1003 vsel = (vector unsigned char)vec_and((vector unsigned int)vs, vrgbmask); 1051 vsel =
1004 vsel = (vector unsigned char)vec_cmpeq((vector unsigned int)vsel, vckey); 1052 (vector unsigned char) vec_and ((vector unsigned int) vs,
1053 vrgbmask);
1054 vsel = (vector unsigned char) vec_cmpeq ((vector unsigned int)
1055 vsel, vckey);
1005 1056
1006 /* permute to source format */ 1057 /* permute to source format */
1007 vs = vec_perm(vs, valpha, vsrcPermute); 1058 vs = vec_perm (vs, valpha, vsrcPermute);
1008 1059
1009 /* d = *dstp */ 1060 /* d = *dstp */
1010 vd = (vector unsigned char)vec_ld(0, dstp); 1061 vd = (vector unsigned char) vec_ld (0, dstp);
1011 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); 1062 vd_orig = vd = vec_perm (vd, v0, vsdstPermute);
1012 1063
1013 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1064 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8);
1014 1065
1015 /* set the alpha channel to full on */ 1066 /* set the alpha channel to full on */
1016 vd = vec_or(vd, valphamask); 1067 vd = vec_or (vd, valphamask);
1017 1068
1018 /* mask out color key */ 1069 /* mask out color key */
1019 vd = vec_sel(vd, vd_orig, vsel); 1070 vd = vec_sel (vd, vd_orig, vsel);
1020 1071
1021 /* permute to dest format */ 1072 /* permute to dest format */
1022 vd = vec_perm(vd, vbits, vdstPermute); 1073 vd = vec_perm (vd, vbits, vdstPermute);
1023 1074
1024 /* *dstp = res */ 1075 /* *dstp = res */
1025 vec_st((vector unsigned int)vd, 0, dstp); 1076 vec_st ((vector unsigned int) vd, 0, dstp);
1026 1077
1027 srcp += 4; 1078 srcp += 4;
1028 dstp += 4; 1079 dstp += 4;
1029 width -= 4; 1080 width -= 4;
1030 vs = voverflow; 1081 vs = voverflow;
1031 } 1082 }
1032 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1083 ONE_PIXEL_BLEND ((extrawidth), extrawidth);
1033 } 1084 }
1034 #undef ONE_PIXEL_BLEND 1085 #undef ONE_PIXEL_BLEND
1035 1086
1036 srcp += srcskip; 1087 srcp += srcskip;
1037 dstp += dstskip; 1088 dstp += dstskip;
1038 } 1089 }
1039 } 1090 }
1040 1091
1041 1092
1042 static void Blit32to32PixelAlphaAltivec(SDL_BlitInfo *info) 1093 static void
1094 Blit32to32PixelAlphaAltivec (SDL_BlitInfo * info)
1043 { 1095 {
1044 int width = info->d_width; 1096 int width = info->d_width;
1045 int height = info->d_height; 1097 int height = info->d_height;
1046 Uint32 *srcp = (Uint32 *)info->s_pixels; 1098 Uint32 *srcp = (Uint32 *) info->s_pixels;
1047 int srcskip = info->s_skip >> 2; 1099 int srcskip = info->s_skip >> 2;
1048 Uint32 *dstp = (Uint32 *)info->d_pixels; 1100 Uint32 *dstp = (Uint32 *) info->d_pixels;
1049 int dstskip = info->d_skip >> 2; 1101 int dstskip = info->d_skip >> 2;
1050 SDL_PixelFormat *srcfmt = info->src; 1102 SDL_PixelFormat *srcfmt = info->src;
1051 SDL_PixelFormat *dstfmt = info->dst; 1103 SDL_PixelFormat *dstfmt = info->dst;
1052 vector unsigned char mergePermute; 1104 vector unsigned char mergePermute;
1053 vector unsigned char valphaPermute; 1105 vector unsigned char valphaPermute;
1058 vector unsigned char vpixelmask; 1110 vector unsigned char vpixelmask;
1059 vector unsigned char v0; 1111 vector unsigned char v0;
1060 vector unsigned short v1; 1112 vector unsigned short v1;
1061 vector unsigned short v8; 1113 vector unsigned short v8;
1062 1114
1063 v0 = vec_splat_u8(0); 1115 v0 = vec_splat_u8 (0);
1064 v1 = vec_splat_u16(1); 1116 v1 = vec_splat_u16 (1);
1065 v8 = vec_splat_u16(8); 1117 v8 = vec_splat_u16 (8);
1066 mergePermute = VEC_MERGE_PERMUTE(); 1118 mergePermute = VEC_MERGE_PERMUTE ();
1067 valphamask = VEC_ALPHA_MASK(); 1119 valphamask = VEC_ALPHA_MASK ();
1068 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 1120 valphaPermute = vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC));
1069 vpixelmask = vec_nor(valphamask, v0); 1121 vpixelmask = vec_nor (valphamask, v0);
1070 vsrcPermute = calc_swizzle32(srcfmt, NULL); 1122 vsrcPermute = calc_swizzle32 (srcfmt, NULL);
1071 vdstPermute = calc_swizzle32(NULL, dstfmt); 1123 vdstPermute = calc_swizzle32 (NULL, dstfmt);
1072 vsdstPermute = calc_swizzle32(dstfmt, NULL); 1124 vsdstPermute = calc_swizzle32 (dstfmt, NULL);
1073 1125
1074 while ( height-- ) { 1126 while (height--) {
1075 width = info->d_width; 1127 width = info->d_width;
1076 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1128 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1077 Uint32 Pixel; \ 1129 Uint32 Pixel; \
1078 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ 1130 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
1079 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ 1131 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
1084 } \ 1136 } \
1085 ++srcp; \ 1137 ++srcp; \
1086 ++dstp; \ 1138 ++dstp; \
1087 widthvar--; \ 1139 widthvar--; \
1088 } 1140 }
1089 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1141 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width);
1090 if (width > 0) { 1142 if (width > 0) {
1091 /* vsrcPermute */ 1143 /* vsrcPermute */
1092 /* vdstPermute */ 1144 /* vdstPermute */
1093 int extrawidth = (width % 4); 1145 int extrawidth = (width % 4);
1094 vector unsigned char valigner = VEC_ALIGNER(srcp); 1146 vector unsigned char valigner = VEC_ALIGNER (srcp);
1095 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1147 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp);
1096 width -= extrawidth; 1148 width -= extrawidth;
1097 while (width) { 1149 while (width) {
1098 vector unsigned char voverflow; 1150 vector unsigned char voverflow;
1099 vector unsigned char vd; 1151 vector unsigned char vd;
1100 vector unsigned char valpha; 1152 vector unsigned char valpha;
1101 vector unsigned char vdstalpha; 1153 vector unsigned char vdstalpha;
1102 /* s = *srcp */ 1154 /* s = *srcp */
1103 voverflow = (vector unsigned char)vec_ld(15, srcp); 1155 voverflow = (vector unsigned char) vec_ld (15, srcp);
1104 vs = vec_perm(vs, voverflow, valigner); 1156 vs = vec_perm (vs, voverflow, valigner);
1105 vs = vec_perm(vs, v0, vsrcPermute); 1157 vs = vec_perm (vs, v0, vsrcPermute);
1106 1158
1107 valpha = vec_perm(vs, v0, valphaPermute); 1159 valpha = vec_perm (vs, v0, valphaPermute);
1108 1160
1109 /* d = *dstp */ 1161 /* d = *dstp */
1110 vd = (vector unsigned char)vec_ld(0, dstp); 1162 vd = (vector unsigned char) vec_ld (0, dstp);
1111 vd = vec_perm(vd, v0, vsdstPermute); 1163 vd = vec_perm (vd, v0, vsdstPermute);
1112 vdstalpha = vec_and(vd, valphamask); 1164 vdstalpha = vec_and (vd, valphamask);
1113 1165
1114 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1166 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8);
1115 1167
1116 /* set the alpha to the dest alpha */ 1168 /* set the alpha to the dest alpha */
1117 vd = vec_and(vd, vpixelmask); 1169 vd = vec_and (vd, vpixelmask);
1118 vd = vec_or(vd, vdstalpha); 1170 vd = vec_or (vd, vdstalpha);
1119 vd = vec_perm(vd, v0, vdstPermute); 1171 vd = vec_perm (vd, v0, vdstPermute);
1120 1172
1121 /* *dstp = res */ 1173 /* *dstp = res */
1122 vec_st((vector unsigned int)vd, 0, dstp); 1174 vec_st ((vector unsigned int) vd, 0, dstp);
1123 1175
1124 srcp += 4; 1176 srcp += 4;
1125 dstp += 4; 1177 dstp += 4;
1126 width -= 4; 1178 width -= 4;
1127 vs = voverflow; 1179 vs = voverflow;
1128 1180
1129 } 1181 }
1130 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1182 ONE_PIXEL_BLEND ((extrawidth), extrawidth);
1131 } 1183 }
1132 srcp += srcskip; 1184 srcp += srcskip;
1133 dstp += dstskip; 1185 dstp += dstskip;
1134 #undef ONE_PIXEL_BLEND 1186 #undef ONE_PIXEL_BLEND
1135 } 1187 }
1136 } 1188 }
1137 1189
1138 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 1190 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
1139 static void BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo *info) 1191 static void
1140 { 1192 BlitRGBtoRGBPixelAlphaAltivec (SDL_BlitInfo * info)
1141 int width = info->d_width; 1193 {
1142 int height = info->d_height; 1194 int width = info->d_width;
1143 Uint32 *srcp = (Uint32 *)info->s_pixels; 1195 int height = info->d_height;
1144 int srcskip = info->s_skip >> 2; 1196 Uint32 *srcp = (Uint32 *) info->s_pixels;
1145 Uint32 *dstp = (Uint32 *)info->d_pixels; 1197 int srcskip = info->s_skip >> 2;
1146 int dstskip = info->d_skip >> 2; 1198 Uint32 *dstp = (Uint32 *) info->d_pixels;
1199 int dstskip = info->d_skip >> 2;
1147 vector unsigned char mergePermute; 1200 vector unsigned char mergePermute;
1148 vector unsigned char valphaPermute; 1201 vector unsigned char valphaPermute;
1149 vector unsigned char valphamask; 1202 vector unsigned char valphamask;
1150 vector unsigned char vpixelmask; 1203 vector unsigned char vpixelmask;
1151 vector unsigned char v0; 1204 vector unsigned char v0;
1152 vector unsigned short v1; 1205 vector unsigned short v1;
1153 vector unsigned short v8; 1206 vector unsigned short v8;
1154 v0 = vec_splat_u8(0); 1207 v0 = vec_splat_u8 (0);
1155 v1 = vec_splat_u16(1); 1208 v1 = vec_splat_u16 (1);
1156 v8 = vec_splat_u16(8); 1209 v8 = vec_splat_u16 (8);
1157 mergePermute = VEC_MERGE_PERMUTE(); 1210 mergePermute = VEC_MERGE_PERMUTE ();
1158 valphamask = VEC_ALPHA_MASK(); 1211 valphamask = VEC_ALPHA_MASK ();
1159 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 1212 valphaPermute = vec_and (vec_lvsl (0, (int *) NULL), vec_splat_u8 (0xC));
1160 1213
1161 1214
1162 vpixelmask = vec_nor(valphamask, v0); 1215 vpixelmask = vec_nor (valphamask, v0);
1163 while(height--) { 1216 while (height--) {
1164 width = info->d_width; 1217 width = info->d_width;
1165 #define ONE_PIXEL_BLEND(condition, widthvar) \ 1218 #define ONE_PIXEL_BLEND(condition, widthvar) \
1166 while ((condition)) { \ 1219 while ((condition)) { \
1167 Uint32 dalpha; \ 1220 Uint32 dalpha; \
1168 Uint32 d; \ 1221 Uint32 d; \
1187 } \ 1240 } \
1188 ++srcp; \ 1241 ++srcp; \
1189 ++dstp; \ 1242 ++dstp; \
1190 widthvar--; \ 1243 widthvar--; \
1191 } 1244 }
1192 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1245 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width);
1193 if (width > 0) { 1246 if (width > 0) {
1194 int extrawidth = (width % 4); 1247 int extrawidth = (width % 4);
1195 vector unsigned char valigner = VEC_ALIGNER(srcp); 1248 vector unsigned char valigner = VEC_ALIGNER (srcp);
1196 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1249 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp);
1197 width -= extrawidth; 1250 width -= extrawidth;
1198 while (width) { 1251 while (width) {
1199 vector unsigned char voverflow; 1252 vector unsigned char voverflow;
1200 vector unsigned char vd; 1253 vector unsigned char vd;
1201 vector unsigned char valpha; 1254 vector unsigned char valpha;
1202 vector unsigned char vdstalpha; 1255 vector unsigned char vdstalpha;
1203 /* s = *srcp */ 1256 /* s = *srcp */
1204 voverflow = (vector unsigned char)vec_ld(15, srcp); 1257 voverflow = (vector unsigned char) vec_ld (15, srcp);
1205 vs = vec_perm(vs, voverflow, valigner); 1258 vs = vec_perm (vs, voverflow, valigner);
1206 1259
1207 valpha = vec_perm(vs, v0, valphaPermute); 1260 valpha = vec_perm (vs, v0, valphaPermute);
1208 1261
1209 /* d = *dstp */ 1262 /* d = *dstp */
1210 vd = (vector unsigned char)vec_ld(0, dstp); 1263 vd = (vector unsigned char) vec_ld (0, dstp);
1211 vdstalpha = vec_and(vd, valphamask); 1264 vdstalpha = vec_and (vd, valphamask);
1212 1265
1213 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1266 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8);
1214 1267
1215 /* set the alpha to the dest alpha */ 1268 /* set the alpha to the dest alpha */
1216 vd = vec_and(vd, vpixelmask); 1269 vd = vec_and (vd, vpixelmask);
1217 vd = vec_or(vd, vdstalpha); 1270 vd = vec_or (vd, vdstalpha);
1218 1271
1219 /* *dstp = res */ 1272 /* *dstp = res */
1220 vec_st((vector unsigned int)vd, 0, dstp); 1273 vec_st ((vector unsigned int) vd, 0, dstp);
1221 1274
1222 srcp += 4; 1275 srcp += 4;
1223 dstp += 4; 1276 dstp += 4;
1224 width -= 4; 1277 width -= 4;
1225 vs = voverflow; 1278 vs = voverflow;
1226 } 1279 }
1227 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1280 ONE_PIXEL_BLEND ((extrawidth), extrawidth);
1228 } 1281 }
1229 srcp += srcskip; 1282 srcp += srcskip;
1230 dstp += dstskip; 1283 dstp += dstskip;
1231 } 1284 }
1232 #undef ONE_PIXEL_BLEND 1285 #undef ONE_PIXEL_BLEND
1233 } 1286 }
1234 1287
1235 static void Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo *info) 1288 static void
1289 Blit32to32SurfaceAlphaAltivec (SDL_BlitInfo * info)
1236 { 1290 {
1237 /* XXX : 6 */ 1291 /* XXX : 6 */
1238 unsigned alpha = info->src->alpha; 1292 unsigned alpha = info->src->alpha;
1239 int height = info->d_height; 1293 int height = info->d_height;
1240 Uint32 *srcp = (Uint32 *)info->s_pixels; 1294 Uint32 *srcp = (Uint32 *) info->s_pixels;
1241 int srcskip = info->s_skip >> 2; 1295 int srcskip = info->s_skip >> 2;
1242 Uint32 *dstp = (Uint32 *)info->d_pixels; 1296 Uint32 *dstp = (Uint32 *) info->d_pixels;
1243 int dstskip = info->d_skip >> 2; 1297 int dstskip = info->d_skip >> 2;
1244 SDL_PixelFormat *srcfmt = info->src; 1298 SDL_PixelFormat *srcfmt = info->src;
1245 SDL_PixelFormat *dstfmt = info->dst; 1299 SDL_PixelFormat *dstfmt = info->dst;
1246 unsigned sA = srcfmt->alpha; 1300 unsigned sA = srcfmt->alpha;
1247 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 1301 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
1248 vector unsigned char mergePermute; 1302 vector unsigned char mergePermute;
1249 vector unsigned char vsrcPermute; 1303 vector unsigned char vsrcPermute;
1250 vector unsigned char vdstPermute; 1304 vector unsigned char vdstPermute;
1251 vector unsigned char vsdstPermute; 1305 vector unsigned char vsdstPermute;
1252 vector unsigned char valpha; 1306 vector unsigned char valpha;
1253 vector unsigned char valphamask; 1307 vector unsigned char valphamask;
1254 vector unsigned char vbits; 1308 vector unsigned char vbits;
1255 vector unsigned short v1; 1309 vector unsigned short v1;
1256 vector unsigned short v8; 1310 vector unsigned short v8;
1257 1311
1258 mergePermute = VEC_MERGE_PERMUTE(); 1312 mergePermute = VEC_MERGE_PERMUTE ();
1259 v1 = vec_splat_u16(1); 1313 v1 = vec_splat_u16 (1);
1260 v8 = vec_splat_u16(8); 1314 v8 = vec_splat_u16 (8);
1261 1315
1262 /* set the alpha to 255 on the destination surf */ 1316 /* set the alpha to 255 on the destination surf */
1263 valphamask = VEC_ALPHA_MASK(); 1317 valphamask = VEC_ALPHA_MASK ();
1264 1318
1265 vsrcPermute = calc_swizzle32(srcfmt, NULL); 1319 vsrcPermute = calc_swizzle32 (srcfmt, NULL);
1266 vdstPermute = calc_swizzle32(NULL, dstfmt); 1320 vdstPermute = calc_swizzle32 (NULL, dstfmt);
1267 vsdstPermute = calc_swizzle32(dstfmt, NULL); 1321 vsdstPermute = calc_swizzle32 (dstfmt, NULL);
1268 1322
1269 /* set a vector full of alpha and 255-alpha */ 1323 /* set a vector full of alpha and 255-alpha */
1270 ((unsigned char *)&valpha)[0] = alpha; 1324 ((unsigned char *) &valpha)[0] = alpha;
1271 valpha = vec_splat(valpha, 0); 1325 valpha = vec_splat (valpha, 0);
1272 vbits = (vector unsigned char)vec_splat_s8(-1); 1326 vbits = (vector unsigned char) vec_splat_s8 (-1);
1273 1327
1274 while(height--) { 1328 while (height--) {
1275 int width = info->d_width; 1329 int width = info->d_width;
1276 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1330 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1277 Uint32 Pixel; \ 1331 Uint32 Pixel; \
1278 unsigned sR, sG, sB, dR, dG, dB; \ 1332 unsigned sR, sG, sB, dR, dG, dB; \
1279 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ 1333 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
1282 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ 1336 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
1283 ++srcp; \ 1337 ++srcp; \
1284 ++dstp; \ 1338 ++dstp; \
1285 widthvar--; \ 1339 widthvar--; \
1286 } 1340 }
1287 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1341 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width);
1288 if (width > 0) { 1342 if (width > 0) {
1289 int extrawidth = (width % 4); 1343 int extrawidth = (width % 4);
1290 vector unsigned char valigner = vec_lvsl(0, srcp); 1344 vector unsigned char valigner = vec_lvsl (0, srcp);
1291 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1345 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp);
1292 width -= extrawidth; 1346 width -= extrawidth;
1293 while (width) { 1347 while (width) {
1294 vector unsigned char voverflow; 1348 vector unsigned char voverflow;
1295 vector unsigned char vd; 1349 vector unsigned char vd;
1296 1350
1297 /* s = *srcp */ 1351 /* s = *srcp */
1298 voverflow = (vector unsigned char)vec_ld(15, srcp); 1352 voverflow = (vector unsigned char) vec_ld (15, srcp);
1299 vs = vec_perm(vs, voverflow, valigner); 1353 vs = vec_perm (vs, voverflow, valigner);
1300 vs = vec_perm(vs, valpha, vsrcPermute); 1354 vs = vec_perm (vs, valpha, vsrcPermute);
1301 1355
1302 /* d = *dstp */ 1356 /* d = *dstp */
1303 vd = (vector unsigned char)vec_ld(0, dstp); 1357 vd = (vector unsigned char) vec_ld (0, dstp);
1304 vd = vec_perm(vd, vd, vsdstPermute); 1358 vd = vec_perm (vd, vd, vsdstPermute);
1305 1359
1306 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1360 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8);
1307 1361
1308 /* set the alpha channel to full on */ 1362 /* set the alpha channel to full on */
1309 vd = vec_or(vd, valphamask); 1363 vd = vec_or (vd, valphamask);
1310 vd = vec_perm(vd, vbits, vdstPermute); 1364 vd = vec_perm (vd, vbits, vdstPermute);
1311 1365
1312 /* *dstp = res */ 1366 /* *dstp = res */
1313 vec_st((vector unsigned int)vd, 0, dstp); 1367 vec_st ((vector unsigned int) vd, 0, dstp);
1314 1368
1315 srcp += 4; 1369 srcp += 4;
1316 dstp += 4; 1370 dstp += 4;
1317 width -= 4; 1371 width -= 4;
1318 vs = voverflow; 1372 vs = voverflow;
1319 } 1373 }
1320 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1374 ONE_PIXEL_BLEND ((extrawidth), extrawidth);
1321 } 1375 }
1322 #undef ONE_PIXEL_BLEND 1376 #undef ONE_PIXEL_BLEND
1323 1377
1324 srcp += srcskip; 1378 srcp += srcskip;
1325 dstp += dstskip; 1379 dstp += dstskip;
1326 } 1380 }
1327 1381
1328 } 1382 }
1329 1383
1330 1384
1331 /* fast RGB888->(A)RGB888 blending */ 1385 /* fast RGB888->(A)RGB888 blending */
1332 static void BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo *info) 1386 static void
1333 { 1387 BlitRGBtoRGBSurfaceAlphaAltivec (SDL_BlitInfo * info)
1334 unsigned alpha = info->src->alpha; 1388 {
1389 unsigned alpha = info->src->alpha;
1335 int height = info->d_height; 1390 int height = info->d_height;
1336 Uint32 *srcp = (Uint32 *)info->s_pixels; 1391 Uint32 *srcp = (Uint32 *) info->s_pixels;
1337 int srcskip = info->s_skip >> 2; 1392 int srcskip = info->s_skip >> 2;
1338 Uint32 *dstp = (Uint32 *)info->d_pixels; 1393 Uint32 *dstp = (Uint32 *) info->d_pixels;
1339 int dstskip = info->d_skip >> 2; 1394 int dstskip = info->d_skip >> 2;
1340 vector unsigned char mergePermute; 1395 vector unsigned char mergePermute;
1341 vector unsigned char valpha; 1396 vector unsigned char valpha;
1342 vector unsigned char valphamask; 1397 vector unsigned char valphamask;
1343 vector unsigned short v1; 1398 vector unsigned short v1;
1344 vector unsigned short v8; 1399 vector unsigned short v8;
1345 1400
1346 mergePermute = VEC_MERGE_PERMUTE(); 1401 mergePermute = VEC_MERGE_PERMUTE ();
1347 v1 = vec_splat_u16(1); 1402 v1 = vec_splat_u16 (1);
1348 v8 = vec_splat_u16(8); 1403 v8 = vec_splat_u16 (8);
1349 1404
1350 /* set the alpha to 255 on the destination surf */ 1405 /* set the alpha to 255 on the destination surf */
1351 valphamask = VEC_ALPHA_MASK(); 1406 valphamask = VEC_ALPHA_MASK ();
1352 1407
1353 /* set a vector full of alpha and 255-alpha */ 1408 /* set a vector full of alpha and 255-alpha */
1354 ((unsigned char *)&valpha)[0] = alpha; 1409 ((unsigned char *) &valpha)[0] = alpha;
1355 valpha = vec_splat(valpha, 0); 1410 valpha = vec_splat (valpha, 0);
1356 1411
1357 while(height--) { 1412 while (height--) {
1358 int width = info->d_width; 1413 int width = info->d_width;
1359 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1414 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1360 Uint32 s = *srcp; \ 1415 Uint32 s = *srcp; \
1361 Uint32 d = *dstp; \ 1416 Uint32 d = *dstp; \
1362 Uint32 s1 = s & 0xff00ff; \ 1417 Uint32 s1 = s & 0xff00ff; \
1369 *dstp = d1 | d | 0xff000000; \ 1424 *dstp = d1 | d | 0xff000000; \
1370 ++srcp; \ 1425 ++srcp; \
1371 ++dstp; \ 1426 ++dstp; \
1372 widthvar--; \ 1427 widthvar--; \
1373 } 1428 }
1374 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 1429 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width);
1375 if (width > 0) { 1430 if (width > 0) {
1376 int extrawidth = (width % 4); 1431 int extrawidth = (width % 4);
1377 vector unsigned char valigner = VEC_ALIGNER(srcp); 1432 vector unsigned char valigner = VEC_ALIGNER (srcp);
1378 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 1433 vector unsigned char vs = (vector unsigned char) vec_ld (0, srcp);
1379 width -= extrawidth; 1434 width -= extrawidth;
1380 while (width) { 1435 while (width) {
1381 vector unsigned char voverflow; 1436 vector unsigned char voverflow;
1382 vector unsigned char vd; 1437 vector unsigned char vd;
1383 1438
1384 /* s = *srcp */ 1439 /* s = *srcp */
1385 voverflow = (vector unsigned char)vec_ld(15, srcp); 1440 voverflow = (vector unsigned char) vec_ld (15, srcp);
1386 vs = vec_perm(vs, voverflow, valigner); 1441 vs = vec_perm (vs, voverflow, valigner);
1387 1442
1388 /* d = *dstp */ 1443 /* d = *dstp */
1389 vd = (vector unsigned char)vec_ld(0, dstp); 1444 vd = (vector unsigned char) vec_ld (0, dstp);
1390 1445
1391 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 1446 VEC_MULTIPLY_ALPHA (vs, vd, valpha, mergePermute, v1, v8);
1392 1447
1393 /* set the alpha channel to full on */ 1448 /* set the alpha channel to full on */
1394 vd = vec_or(vd, valphamask); 1449 vd = vec_or (vd, valphamask);
1395 1450
1396 /* *dstp = res */ 1451 /* *dstp = res */
1397 vec_st((vector unsigned int)vd, 0, dstp); 1452 vec_st ((vector unsigned int) vd, 0, dstp);
1398 1453
1399 srcp += 4; 1454 srcp += 4;
1400 dstp += 4; 1455 dstp += 4;
1401 width -= 4; 1456 width -= 4;
1402 vs = voverflow; 1457 vs = voverflow;
1403 } 1458 }
1404 ONE_PIXEL_BLEND((extrawidth), extrawidth); 1459 ONE_PIXEL_BLEND ((extrawidth), extrawidth);
1405 } 1460 }
1406 #undef ONE_PIXEL_BLEND 1461 #undef ONE_PIXEL_BLEND
1407 1462
1408 srcp += srcskip; 1463 srcp += srcskip;
1409 dstp += dstskip; 1464 dstp += dstskip;
1410 } 1465 }
1411 } 1466 }
1467
1412 #if __MWERKS__ 1468 #if __MWERKS__
1413 #pragma altivec_model off 1469 #pragma altivec_model off
1414 #endif 1470 #endif
1415 #endif /* SDL_ALTIVEC_BLITTERS */ 1471 #endif /* SDL_ALTIVEC_BLITTERS */
1416 1472
1417 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 1473 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
1418 static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) 1474 static void
1419 { 1475 BlitRGBtoRGBSurfaceAlpha128 (SDL_BlitInfo * info)
1420 int width = info->d_width; 1476 {
1421 int height = info->d_height; 1477 int width = info->d_width;
1422 Uint32 *srcp = (Uint32 *)info->s_pixels; 1478 int height = info->d_height;
1423 int srcskip = info->s_skip >> 2; 1479 Uint32 *srcp = (Uint32 *) info->s_pixels;
1424 Uint32 *dstp = (Uint32 *)info->d_pixels; 1480 int srcskip = info->s_skip >> 2;
1425 int dstskip = info->d_skip >> 2; 1481 Uint32 *dstp = (Uint32 *) info->d_pixels;
1426 1482 int dstskip = info->d_skip >> 2;
1427 while(height--) { 1483
1484 while (height--) {
1485 /* *INDENT-OFF* */
1428 DUFFS_LOOP4({ 1486 DUFFS_LOOP4({
1429 Uint32 s = *srcp++; 1487 Uint32 s = *srcp++;
1430 Uint32 d = *dstp; 1488 Uint32 d = *dstp;
1431 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 1489 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
1432 + (s & d & 0x00010101)) | 0xff000000; 1490 + (s & d & 0x00010101)) | 0xff000000;
1433 }, width); 1491 }, width);
1434 srcp += srcskip; 1492 /* *INDENT-ON* */
1435 dstp += dstskip; 1493 srcp += srcskip;
1436 } 1494 dstp += dstskip;
1495 }
1437 } 1496 }
1438 1497
1439 /* fast RGB888->(A)RGB888 blending with surface alpha */ 1498 /* fast RGB888->(A)RGB888 blending with surface alpha */
1440 static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) 1499 static void
1441 { 1500 BlitRGBtoRGBSurfaceAlpha (SDL_BlitInfo * info)
1442 unsigned alpha = info->src->alpha; 1501 {
1443 if(alpha == 128) { 1502 unsigned alpha = info->src->alpha;
1444 BlitRGBtoRGBSurfaceAlpha128(info); 1503 if (alpha == 128) {
1445 } else { 1504 BlitRGBtoRGBSurfaceAlpha128 (info);
1446 int width = info->d_width; 1505 } else {
1447 int height = info->d_height; 1506 int width = info->d_width;
1448 Uint32 *srcp = (Uint32 *)info->s_pixels; 1507 int height = info->d_height;
1449 int srcskip = info->s_skip >> 2; 1508 Uint32 *srcp = (Uint32 *) info->s_pixels;
1450 Uint32 *dstp = (Uint32 *)info->d_pixels; 1509 int srcskip = info->s_skip >> 2;
1451 int dstskip = info->d_skip >> 2; 1510 Uint32 *dstp = (Uint32 *) info->d_pixels;
1452 Uint32 s; 1511 int dstskip = info->d_skip >> 2;
1453 Uint32 d; 1512 Uint32 s;
1454 Uint32 s1; 1513 Uint32 d;
1455 Uint32 d1; 1514 Uint32 s1;
1456 1515 Uint32 d1;
1457 while(height--) { 1516
1517 while (height--) {
1518 /* *INDENT-OFF* */
1458 DUFFS_LOOP_DOUBLE2({ 1519 DUFFS_LOOP_DOUBLE2({
1459 /* One Pixel Blend */ 1520 /* One Pixel Blend */
1460 s = *srcp; 1521 s = *srcp;
1461 d = *dstp; 1522 d = *dstp;
1462 s1 = s & 0xff00ff; 1523 s1 = s & 0xff00ff;
1497 1558
1498 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000; 1559 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000;
1499 ++srcp; 1560 ++srcp;
1500 ++dstp; 1561 ++dstp;
1501 }, width); 1562 }, width);
1502 srcp += srcskip; 1563 /* *INDENT-ON* */
1503 dstp += dstskip; 1564 srcp += srcskip;
1504 } 1565 dstp += dstskip;
1505 } 1566 }
1567 }
1506 } 1568 }
1507 1569
1508 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ 1570 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
1509 static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info) 1571 static void
1510 { 1572 BlitRGBtoRGBPixelAlpha (SDL_BlitInfo * info)
1511 int width = info->d_width; 1573 {
1512 int height = info->d_height; 1574 int width = info->d_width;
1513 Uint32 *srcp = (Uint32 *)info->s_pixels; 1575 int height = info->d_height;
1514 int srcskip = info->s_skip >> 2; 1576 Uint32 *srcp = (Uint32 *) info->s_pixels;
1515 Uint32 *dstp = (Uint32 *)info->d_pixels; 1577 int srcskip = info->s_skip >> 2;
1516 int dstskip = info->d_skip >> 2; 1578 Uint32 *dstp = (Uint32 *) info->d_pixels;
1517 1579 int dstskip = info->d_skip >> 2;
1518 while(height--) { 1580
1581 while (height--) {
1582 /* *INDENT-OFF* */
1519 DUFFS_LOOP4({ 1583 DUFFS_LOOP4({
1520 Uint32 dalpha; 1584 Uint32 dalpha;
1521 Uint32 d; 1585 Uint32 d;
1522 Uint32 s1; 1586 Uint32 s1;
1523 Uint32 d1; 1587 Uint32 d1;
1547 } 1611 }
1548 } 1612 }
1549 ++srcp; 1613 ++srcp;
1550 ++dstp; 1614 ++dstp;
1551 }, width); 1615 }, width);
1552 srcp += srcskip; 1616 /* *INDENT-ON* */
1553 dstp += dstskip; 1617 srcp += srcskip;
1554 } 1618 dstp += dstskip;
1619 }
1555 } 1620 }
1556 1621
1557 #if GCC_ASMBLIT 1622 #if GCC_ASMBLIT
1558 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 1623 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
1559 inline static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) 1624 inline static void
1560 { 1625 BlitRGBtoRGBPixelAlphaMMX3DNOW (SDL_BlitInfo * info)
1561 int width = info->d_width; 1626 {
1562 int height = info->d_height; 1627 int width = info->d_width;
1563 Uint32 *srcp = (Uint32 *)info->s_pixels; 1628 int height = info->d_height;
1564 int srcskip = info->s_skip >> 2; 1629 Uint32 *srcp = (Uint32 *) info->s_pixels;
1565 Uint32 *dstp = (Uint32 *)info->d_pixels; 1630 int srcskip = info->s_skip >> 2;
1566 int dstskip = info->d_skip >> 2; 1631 Uint32 *dstp = (Uint32 *) info->d_pixels;
1567 SDL_PixelFormat* sf = info->src; 1632 int dstskip = info->d_skip >> 2;
1568 Uint32 amask = sf->Amask; 1633 SDL_PixelFormat *sf = info->src;
1569 1634 Uint32 amask = sf->Amask;
1570 __asm__ ( 1635
1571 /* make mm6 all zeros. */ 1636 __asm__ (
1572 "pxor %%mm6, %%mm6\n" 1637 /* make mm6 all zeros. */
1573 1638 "pxor %%mm6, %%mm6\n"
1574 /* Make a mask to preserve the alpha. */ 1639 /* Make a mask to preserve the alpha. */
1575 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ 1640 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */
1576 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ 1641 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */
1577 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ 1642 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */
1578 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ 1643 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */
1579 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ 1644 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */
1580 1645 /* form channel masks */
1581 /* form channel masks */ 1646 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */
1582 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ 1647 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */
1583 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ 1648 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */
1584 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ 1649 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */
1585 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ 1650 /* get alpha channel shift */
1586 1651 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */
1587 /* get alpha channel shift */ 1652 : /* nothing */ : "m" (sf->Amask), "m" (sf->Ashift));
1588 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ 1653
1589 1654 while (height--) {
1590 : /* nothing */ : "m" (sf->Amask), "m" (sf->Ashift) ); 1655
1591 1656 /* *INDENT-OFF* */
1592 while(height--) {
1593
1594 DUFFS_LOOP4({ 1657 DUFFS_LOOP4({
1595 Uint32 alpha; 1658 Uint32 alpha;
1596 1659
1597 __asm__ ( 1660 __asm__ (
1598 "prefetch 64(%0)\n" 1661 "prefetch 64(%0)\n"
1660 1723
1661 } 1724 }
1662 ++srcp; 1725 ++srcp;
1663 ++dstp; 1726 ++dstp;
1664 }, width); 1727 }, width);
1665 srcp += srcskip; 1728 /* *INDENT-ON* */
1666 dstp += dstskip; 1729 srcp += srcskip;
1667 } 1730 dstp += dstskip;
1668 1731 }
1669 __asm__ ( 1732
1670 "emms\n" 1733 __asm__ ("emms\n":);
1671 : ); 1734 }
1672 } 1735
1673 /* End GCC_ASMBLIT*/ 1736 /* End GCC_ASMBLIT*/
1674 1737
1675 #elif MSVC_ASMBLIT 1738 #elif MSVC_ASMBLIT
1676 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 1739 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
1677 static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) 1740 static void
1678 { 1741 BlitRGBtoRGBPixelAlphaMMX3DNOW (SDL_BlitInfo * info)
1679 int width = info->d_width; 1742 {
1680 int height = info->d_height; 1743 int width = info->d_width;
1681 Uint32 *srcp = (Uint32 *)info->s_pixels; 1744 int height = info->d_height;
1682 int srcskip = info->s_skip >> 2; 1745 Uint32 *srcp = (Uint32 *) info->s_pixels;
1683 Uint32 *dstp = (Uint32 *)info->d_pixels; 1746 int srcskip = info->s_skip >> 2;
1684 int dstskip = info->d_skip >> 2; 1747 Uint32 *dstp = (Uint32 *) info->d_pixels;
1685 SDL_PixelFormat* sf = info->src; 1748 int dstskip = info->d_skip >> 2;
1686 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; 1749 SDL_PixelFormat *sf = info->src;
1687 Uint32 amask = sf->Amask; 1750 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
1688 Uint32 ashift = sf->Ashift; 1751 Uint32 amask = sf->Amask;
1689 Uint64 multmask; 1752 Uint32 ashift = sf->Ashift;
1690 1753 Uint64 multmask;
1691 __m64 src1, dst1, mm_alpha, mm_zero, dmask; 1754
1692 1755 __m64 src1, dst1, mm_alpha, mm_zero, dmask;
1693 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 1756
1694 multmask = ~(0xFFFFi64 << (ashift * 2)); 1757 mm_zero = _mm_setzero_si64 (); /* 0 -> mm_zero */
1695 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ 1758 multmask = ~(0xFFFFi 64 << (ashift * 2));
1696 1759 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */
1697 while(height--) { 1760
1761 while (height--) {
1762 /* *INDENT-OFF* */
1698 DUFFS_LOOP4({ 1763 DUFFS_LOOP4({
1699 Uint32 alpha; 1764 Uint32 alpha;
1700 1765
1701 _m_prefetch(srcp + 16); 1766 _m_prefetch(srcp + 16);
1702 _m_prefetch(dstp + 16); 1767 _m_prefetch(dstp + 16);
1730 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 1795 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
1731 } 1796 }
1732 ++srcp; 1797 ++srcp;
1733 ++dstp; 1798 ++dstp;
1734 }, width); 1799 }, width);
1735 srcp += srcskip; 1800 /* *INDENT-ON* */
1736 dstp += dstskip; 1801 srcp += srcskip;
1737 } 1802 dstp += dstskip;
1738 _mm_empty(); 1803 }
1739 } 1804 _mm_empty ();
1805 }
1806
1740 /* End MSVC_ASMBLIT */ 1807 /* End MSVC_ASMBLIT */
1741 1808
1742 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 1809 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
1743 1810
1744 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ 1811 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
1750 /* blend two 16 bit pixels at 50% */ 1817 /* blend two 16 bit pixels at 50% */
1751 #define BLEND2x16_50(d, s, mask) \ 1818 #define BLEND2x16_50(d, s, mask) \
1752 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ 1819 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
1753 + (s & d & (~(mask | mask << 16)))) 1820 + (s & d & (~(mask | mask << 16))))
1754 1821
1755 static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask) 1822 static void
1756 { 1823 Blit16to16SurfaceAlpha128 (SDL_BlitInfo * info, Uint16 mask)
1757 int width = info->d_width; 1824 {
1758 int height = info->d_height; 1825 int width = info->d_width;
1759 Uint16 *srcp = (Uint16 *)info->s_pixels; 1826 int height = info->d_height;
1760 int srcskip = info->s_skip >> 1; 1827 Uint16 *srcp = (Uint16 *) info->s_pixels;
1761 Uint16 *dstp = (Uint16 *)info->d_pixels; 1828 int srcskip = info->s_skip >> 1;
1762 int dstskip = info->d_skip >> 1; 1829 Uint16 *dstp = (Uint16 *) info->d_pixels;
1763 1830 int dstskip = info->d_skip >> 1;
1764 while(height--) { 1831
1765 if(((uintptr_t)srcp ^ (uintptr_t)dstp) & 2) { 1832 while (height--) {
1766 /* 1833 if (((uintptr_t) srcp ^ (uintptr_t) dstp) & 2) {
1767 * Source and destination not aligned, pipeline it. 1834 /*
1768 * This is mostly a win for big blits but no loss for 1835 * Source and destination not aligned, pipeline it.
1769 * small ones 1836 * This is mostly a win for big blits but no loss for
1770 */ 1837 * small ones
1771 Uint32 prev_sw; 1838 */
1772 int w = width; 1839 Uint32 prev_sw;
1773 1840 int w = width;
1774 /* handle odd destination */ 1841
1775 if((uintptr_t)dstp & 2) { 1842 /* handle odd destination */
1776 Uint16 d = *dstp, s = *srcp; 1843 if ((uintptr_t) dstp & 2) {
1777 *dstp = BLEND16_50(d, s, mask); 1844 Uint16 d = *dstp, s = *srcp;
1778 dstp++; 1845 *dstp = BLEND16_50 (d, s, mask);
1779 srcp++; 1846 dstp++;
1780 w--; 1847 srcp++;
1781 } 1848 w--;
1782 srcp++; /* srcp is now 32-bit aligned */ 1849 }
1783 1850 srcp++; /* srcp is now 32-bit aligned */
1784 /* bootstrap pipeline with first halfword */ 1851
1785 prev_sw = ((Uint32 *)srcp)[-1]; 1852 /* bootstrap pipeline with first halfword */
1786 1853 prev_sw = ((Uint32 *) srcp)[-1];
1787 while(w > 1) { 1854
1788 Uint32 sw, dw, s; 1855 while (w > 1) {
1789 sw = *(Uint32 *)srcp; 1856 Uint32 sw, dw, s;
1790 dw = *(Uint32 *)dstp; 1857 sw = *(Uint32 *) srcp;
1858 dw = *(Uint32 *) dstp;
1791 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 1859 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
1792 s = (prev_sw << 16) + (sw >> 16); 1860 s = (prev_sw << 16) + (sw >> 16);
1793 #else 1861 #else
1794 s = (prev_sw >> 16) + (sw << 16); 1862 s = (prev_sw >> 16) + (sw << 16);
1795 #endif 1863 #endif
1796 prev_sw = sw; 1864 prev_sw = sw;
1797 *(Uint32 *)dstp = BLEND2x16_50(dw, s, mask); 1865 *(Uint32 *) dstp = BLEND2x16_50 (dw, s, mask);
1798 dstp += 2; 1866 dstp += 2;
1799 srcp += 2; 1867 srcp += 2;
1800 w -= 2; 1868 w -= 2;
1801 } 1869 }
1802 1870
1803 /* final pixel if any */ 1871 /* final pixel if any */
1804 if(w) { 1872 if (w) {
1805 Uint16 d = *dstp, s; 1873 Uint16 d = *dstp, s;
1806 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 1874 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
1807 s = (Uint16)prev_sw; 1875 s = (Uint16) prev_sw;
1808 #else 1876 #else
1809 s = (Uint16)(prev_sw >> 16); 1877 s = (Uint16) (prev_sw >> 16);
1810 #endif 1878 #endif
1811 *dstp = BLEND16_50(d, s, mask); 1879 *dstp = BLEND16_50 (d, s, mask);
1812 srcp++; 1880 srcp++;
1813 dstp++; 1881 dstp++;
1814 } 1882 }
1815 srcp += srcskip - 1; 1883 srcp += srcskip - 1;
1816 dstp += dstskip; 1884 dstp += dstskip;
1817 } else { 1885 } else {
1818 /* source and destination are aligned */ 1886 /* source and destination are aligned */
1819 int w = width; 1887 int w = width;
1820 1888
1821 /* first odd pixel? */ 1889 /* first odd pixel? */
1822 if((uintptr_t)srcp & 2) { 1890 if ((uintptr_t) srcp & 2) {
1823 Uint16 d = *dstp, s = *srcp; 1891 Uint16 d = *dstp, s = *srcp;
1824 *dstp = BLEND16_50(d, s, mask); 1892 *dstp = BLEND16_50 (d, s, mask);
1825 srcp++; 1893 srcp++;
1826 dstp++; 1894 dstp++;
1827 w--; 1895 w--;
1828 } 1896 }
1829 /* srcp and dstp are now 32-bit aligned */ 1897 /* srcp and dstp are now 32-bit aligned */
1830 1898
1831 while(w > 1) { 1899 while (w > 1) {
1832 Uint32 sw = *(Uint32 *)srcp; 1900 Uint32 sw = *(Uint32 *) srcp;
1833 Uint32 dw = *(Uint32 *)dstp; 1901 Uint32 dw = *(Uint32 *) dstp;
1834 *(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask); 1902 *(Uint32 *) dstp = BLEND2x16_50 (dw, sw, mask);
1835 srcp += 2; 1903 srcp += 2;
1836 dstp += 2; 1904 dstp += 2;
1837 w -= 2; 1905 w -= 2;
1838 } 1906 }
1839 1907
1840 /* last odd pixel? */ 1908 /* last odd pixel? */
1841 if(w) { 1909 if (w) {
1842 Uint16 d = *dstp, s = *srcp; 1910 Uint16 d = *dstp, s = *srcp;
1843 *dstp = BLEND16_50(d, s, mask); 1911 *dstp = BLEND16_50 (d, s, mask);
1844 srcp++; 1912 srcp++;
1845 dstp++; 1913 dstp++;
1846 } 1914 }
1847 srcp += srcskip; 1915 srcp += srcskip;
1848 dstp += dstskip; 1916 dstp += dstskip;
1849 } 1917 }
1850 } 1918 }
1851 } 1919 }
1852 1920
1853 #if GCC_ASMBLIT 1921 #if GCC_ASMBLIT
1854 /* fast RGB565->RGB565 blending with surface alpha */ 1922 /* fast RGB565->RGB565 blending with surface alpha */
1855 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) 1923 static void
1856 { 1924 Blit565to565SurfaceAlphaMMX (SDL_BlitInfo * info)
1857 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 1925 {
1858 if(alpha == 128) { 1926 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
1859 Blit16to16SurfaceAlpha128(info, 0xf7de); 1927 if (alpha == 128) {
1860 } else { 1928 Blit16to16SurfaceAlpha128 (info, 0xf7de);
1861 int width = info->d_width; 1929 } else {
1862 int height = info->d_height; 1930 int width = info->d_width;
1863 Uint16 *srcp = (Uint16 *)info->s_pixels; 1931 int height = info->d_height;
1864 int srcskip = info->s_skip >> 1; 1932 Uint16 *srcp = (Uint16 *) info->s_pixels;
1865 Uint16 *dstp = (Uint16 *)info->d_pixels; 1933 int srcskip = info->s_skip >> 1;
1866 int dstskip = info->d_skip >> 1; 1934 Uint16 *dstp = (Uint16 *) info->d_pixels;
1867 Uint32 s, d; 1935 int dstskip = info->d_skip >> 1;
1868 Uint8 load[8]; 1936 Uint32 s, d;
1869 1937 Uint8 load[8];
1870 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 1938
1871 *(Uint64 *)load = alpha; 1939 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
1872 alpha >>= 3; /* downscale alpha to 5 bits */ 1940 *(Uint64 *) load = alpha;
1873 1941 alpha >>= 3; /* downscale alpha to 5 bits */
1874 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ 1942
1875 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ 1943 movq_m2r (*load, mm0); /* alpha(0000000A) -> mm0 */
1876 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ 1944 punpcklwd_r2r (mm0, mm0); /* 00000A0A -> mm0 */
1877 /* position alpha to allow for mullo and mulhi on diff channels 1945 punpcklwd_r2r (mm0, mm0); /* 0A0A0A0A -> mm0 */
1878 to reduce the number of operations */ 1946 /* position alpha to allow for mullo and mulhi on diff channels
1879 psllq_i2r(3, mm0); 1947 to reduce the number of operations */
1880 1948 psllq_i2r (3, mm0);
1881 /* Setup the 565 color channel masks */ 1949
1882 *(Uint64 *)load = 0x07E007E007E007E0ULL; 1950 /* Setup the 565 color channel masks */
1883 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ 1951 *(Uint64 *) load = 0x07E007E007E007E0ULL;
1884 *(Uint64 *)load = 0x001F001F001F001FULL; 1952 movq_m2r (*load, mm4); /* MASKGREEN -> mm4 */
1885 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ 1953 *(Uint64 *) load = 0x001F001F001F001FULL;
1886 while(height--) { 1954 movq_m2r (*load, mm7); /* MASKBLUE -> mm7 */
1955 while (height--) {
1956 /* *INDENT-OFF* */
1887 DUFFS_LOOP_QUATRO2( 1957 DUFFS_LOOP_QUATRO2(
1888 { 1958 {
1889 s = *srcp++; 1959 s = *srcp++;
1890 d = *dstp; 1960 d = *dstp;
1891 /* 1961 /*
1981 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ 2051 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */
1982 2052
1983 srcp += 4; 2053 srcp += 4;
1984 dstp += 4; 2054 dstp += 4;
1985 }, width); 2055 }, width);
1986 srcp += srcskip; 2056 /* *INDENT-ON* */
1987 dstp += dstskip; 2057 srcp += srcskip;
1988 } 2058 dstp += dstskip;
1989 emms(); 2059 }
1990 } 2060 emms ();
2061 }
1991 } 2062 }
1992 2063
1993 /* fast RGB555->RGB555 blending with surface alpha */ 2064 /* fast RGB555->RGB555 blending with surface alpha */
1994 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) 2065 static void
1995 { 2066 Blit555to555SurfaceAlphaMMX (SDL_BlitInfo * info)
1996 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 2067 {
1997 if(alpha == 128) { 2068 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
1998 Blit16to16SurfaceAlpha128(info, 0xfbde); 2069 if (alpha == 128) {
1999 } else { 2070 Blit16to16SurfaceAlpha128 (info, 0xfbde);
2000 int width = info->d_width; 2071 } else {
2001 int height = info->d_height; 2072 int width = info->d_width;
2002 Uint16 *srcp = (Uint16 *)info->s_pixels; 2073 int height = info->d_height;
2003 int srcskip = info->s_skip >> 1; 2074 Uint16 *srcp = (Uint16 *) info->s_pixels;
2004 Uint16 *dstp = (Uint16 *)info->d_pixels; 2075 int srcskip = info->s_skip >> 1;
2005 int dstskip = info->d_skip >> 1; 2076 Uint16 *dstp = (Uint16 *) info->d_pixels;
2006 Uint32 s, d; 2077 int dstskip = info->d_skip >> 1;
2007 Uint8 load[8]; 2078 Uint32 s, d;
2008 2079 Uint8 load[8];
2009 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 2080
2010 *(Uint64 *)load = alpha; 2081 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2011 alpha >>= 3; /* downscale alpha to 5 bits */ 2082 *(Uint64 *) load = alpha;
2012 2083 alpha >>= 3; /* downscale alpha to 5 bits */
2013 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ 2084
2014 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ 2085 movq_m2r (*load, mm0); /* alpha(0000000A) -> mm0 */
2015 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ 2086 punpcklwd_r2r (mm0, mm0); /* 00000A0A -> mm0 */
2016 /* position alpha to allow for mullo and mulhi on diff channels 2087 punpcklwd_r2r (mm0, mm0); /* 0A0A0A0A -> mm0 */
2017 to reduce the number of operations */ 2088 /* position alpha to allow for mullo and mulhi on diff channels
2018 psllq_i2r(3, mm0); 2089 to reduce the number of operations */
2019 2090 psllq_i2r (3, mm0);
2020 /* Setup the 555 color channel masks */ 2091
2021 *(Uint64 *)load = 0x03E003E003E003E0ULL; 2092 /* Setup the 555 color channel masks */
2022 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ 2093 *(Uint64 *) load = 0x03E003E003E003E0ULL;
2023 *(Uint64 *)load = 0x001F001F001F001FULL; 2094 movq_m2r (*load, mm4); /* MASKGREEN -> mm4 */
2024 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ 2095 *(Uint64 *) load = 0x001F001F001F001FULL;
2025 while(height--) { 2096 movq_m2r (*load, mm7); /* MASKBLUE -> mm7 */
2097 while (height--) {
2098 /* *INDENT-OFF* */
2026 DUFFS_LOOP_QUATRO2( 2099 DUFFS_LOOP_QUATRO2(
2027 { 2100 {
2028 s = *srcp++; 2101 s = *srcp++;
2029 d = *dstp; 2102 d = *dstp;
2030 /* 2103 /*
2124 2197
2125 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ 2198 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */
2126 2199
2127 srcp += 4; 2200 srcp += 4;
2128 dstp += 4; 2201 dstp += 4;
2129 }, width); 2202 }, width);
2130 srcp += srcskip; 2203 /* *INDENT-ON* */
2131 dstp += dstskip; 2204 srcp += srcskip;
2132 } 2205 dstp += dstskip;
2133 emms(); 2206 }
2134 } 2207 emms ();
2135 } 2208 }
2209 }
2210
2136 /* End GCC_ASMBLIT */ 2211 /* End GCC_ASMBLIT */
2137 2212
2138 #elif MSVC_ASMBLIT 2213 #elif MSVC_ASMBLIT
2139 /* fast RGB565->RGB565 blending with surface alpha */ 2214 /* fast RGB565->RGB565 blending with surface alpha */
2140 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) 2215 static void
2141 { 2216 Blit565to565SurfaceAlphaMMX (SDL_BlitInfo * info)
2142 unsigned alpha = info->src->alpha; 2217 {
2143 if(alpha == 128) { 2218 unsigned alpha = info->src->alpha;
2144 Blit16to16SurfaceAlpha128(info, 0xf7de); 2219 if (alpha == 128) {
2145 } else { 2220 Blit16to16SurfaceAlpha128 (info, 0xf7de);
2146 int width = info->d_width; 2221 } else {
2147 int height = info->d_height; 2222 int width = info->d_width;
2148 Uint16 *srcp = (Uint16 *)info->s_pixels; 2223 int height = info->d_height;
2149 int srcskip = info->s_skip >> 1; 2224 Uint16 *srcp = (Uint16 *) info->s_pixels;
2150 Uint16 *dstp = (Uint16 *)info->d_pixels; 2225 int srcskip = info->s_skip >> 1;
2151 int dstskip = info->d_skip >> 1; 2226 Uint16 *dstp = (Uint16 *) info->d_pixels;
2152 Uint32 s, d; 2227 int dstskip = info->d_skip >> 1;
2153 2228 Uint32 s, d;
2154 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; 2229
2155 2230 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
2156 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 2231
2157 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ 2232 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2158 alpha >>= 3; /* downscale alpha to 5 bits */ 2233 mm_alpha = _mm_set_pi32 (0, alpha); /* 0000000A -> mm_alpha */
2159 2234 alpha >>= 3; /* downscale alpha to 5 bits */
2160 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 2235
2161 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 2236 mm_alpha = _mm_unpacklo_pi16 (mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
2162 /* position alpha to allow for mullo and mulhi on diff channels 2237 mm_alpha = _mm_unpacklo_pi32 (mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
2163 to reduce the number of operations */ 2238 /* position alpha to allow for mullo and mulhi on diff channels
2164 mm_alpha = _mm_slli_si64(mm_alpha, 3); 2239 to reduce the number of operations */
2165 2240 mm_alpha = _mm_slli_si64 (mm_alpha, 3);
2166 /* Setup the 565 color channel masks */ 2241
2167 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ 2242 /* Setup the 565 color channel masks */
2168 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 2243 gmask = _mm_set_pi32 (0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */
2169 2244 bmask = _mm_set_pi32 (0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
2170 while(height--) { 2245
2246 while (height--) {
2247 /* *INDENT-OFF* */
2171 DUFFS_LOOP_QUATRO2( 2248 DUFFS_LOOP_QUATRO2(
2172 { 2249 {
2173 s = *srcp++; 2250 s = *srcp++;
2174 d = *dstp; 2251 d = *dstp;
2175 /* 2252 /*
2260 2337
2261 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ 2338 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
2262 2339
2263 srcp += 4; 2340 srcp += 4;
2264 dstp += 4; 2341 dstp += 4;
2265 }, width); 2342 }, width);
2266 srcp += srcskip; 2343 /* *INDENT-ON* */
2267 dstp += dstskip; 2344 srcp += srcskip;
2268 } 2345 dstp += dstskip;
2269 _mm_empty(); 2346 }
2270 } 2347 _mm_empty ();
2348 }
2271 } 2349 }
2272 2350
2273 /* fast RGB555->RGB555 blending with surface alpha */ 2351 /* fast RGB555->RGB555 blending with surface alpha */
2274 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) 2352 static void
2275 { 2353 Blit555to555SurfaceAlphaMMX (SDL_BlitInfo * info)
2276 unsigned alpha = info->src->alpha; 2354 {
2277 if(alpha == 128) { 2355 unsigned alpha = info->src->alpha;
2278 Blit16to16SurfaceAlpha128(info, 0xfbde); 2356 if (alpha == 128) {
2279 } else { 2357 Blit16to16SurfaceAlpha128 (info, 0xfbde);
2280 int width = info->d_width; 2358 } else {
2281 int height = info->d_height; 2359 int width = info->d_width;
2282 Uint16 *srcp = (Uint16 *)info->s_pixels; 2360 int height = info->d_height;
2283 int srcskip = info->s_skip >> 1; 2361 Uint16 *srcp = (Uint16 *) info->s_pixels;
2284 Uint16 *dstp = (Uint16 *)info->d_pixels; 2362 int srcskip = info->s_skip >> 1;
2285 int dstskip = info->d_skip >> 1; 2363 Uint16 *dstp = (Uint16 *) info->d_pixels;
2286 Uint32 s, d; 2364 int dstskip = info->d_skip >> 1;
2287 2365 Uint32 s, d;
2288 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; 2366
2289 2367 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
2290 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 2368
2291 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ 2369 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
2292 alpha >>= 3; /* downscale alpha to 5 bits */ 2370 mm_alpha = _mm_set_pi32 (0, alpha); /* 0000000A -> mm_alpha */
2293 2371 alpha >>= 3; /* downscale alpha to 5 bits */
2294 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 2372
2295 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 2373 mm_alpha = _mm_unpacklo_pi16 (mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
2296 /* position alpha to allow for mullo and mulhi on diff channels 2374 mm_alpha = _mm_unpacklo_pi32 (mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
2297 to reduce the number of operations */ 2375 /* position alpha to allow for mullo and mulhi on diff channels
2298 mm_alpha = _mm_slli_si64(mm_alpha, 3); 2376 to reduce the number of operations */
2299 2377 mm_alpha = _mm_slli_si64 (mm_alpha, 3);
2300 /* Setup the 555 color channel masks */ 2378
2301 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ 2379 /* Setup the 555 color channel masks */
2302 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ 2380 rmask = _mm_set_pi32 (0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */
2303 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 2381 gmask = _mm_set_pi32 (0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */
2304 2382 bmask = _mm_set_pi32 (0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
2305 while(height--) { 2383
2384 while (height--) {
2385 /* *INDENT-OFF* */
2306 DUFFS_LOOP_QUATRO2( 2386 DUFFS_LOOP_QUATRO2(
2307 { 2387 {
2308 s = *srcp++; 2388 s = *srcp++;
2309 d = *dstp; 2389 d = *dstp;
2310 /* 2390 /*
2395 2475
2396 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ 2476 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
2397 2477
2398 srcp += 4; 2478 srcp += 4;
2399 dstp += 4; 2479 dstp += 4;
2400 }, width); 2480 }, width);
2401 srcp += srcskip; 2481 /* *INDENT-ON* */
2402 dstp += dstskip; 2482 srcp += srcskip;
2403 } 2483 dstp += dstskip;
2404 _mm_empty(); 2484 }
2405 } 2485 _mm_empty ();
2486 }
2406 } 2487 }
2407 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 2488 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */
2408 2489
2409 /* fast RGB565->RGB565 blending with surface alpha */ 2490 /* fast RGB565->RGB565 blending with surface alpha */
2410 static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) 2491 static void
2411 { 2492 Blit565to565SurfaceAlpha (SDL_BlitInfo * info)
2412 unsigned alpha = info->src->alpha; 2493 {
2413 if(alpha == 128) { 2494 unsigned alpha = info->src->alpha;
2414 Blit16to16SurfaceAlpha128(info, 0xf7de); 2495 if (alpha == 128) {
2415 } else { 2496 Blit16to16SurfaceAlpha128 (info, 0xf7de);
2416 int width = info->d_width; 2497 } else {
2417 int height = info->d_height; 2498 int width = info->d_width;
2418 Uint16 *srcp = (Uint16 *)info->s_pixels; 2499 int height = info->d_height;
2419 int srcskip = info->s_skip >> 1; 2500 Uint16 *srcp = (Uint16 *) info->s_pixels;
2420 Uint16 *dstp = (Uint16 *)info->d_pixels; 2501 int srcskip = info->s_skip >> 1;
2421 int dstskip = info->d_skip >> 1; 2502 Uint16 *dstp = (Uint16 *) info->d_pixels;
2422 alpha >>= 3; /* downscale alpha to 5 bits */ 2503 int dstskip = info->d_skip >> 1;
2423 2504 alpha >>= 3; /* downscale alpha to 5 bits */
2424 while(height--) { 2505
2506 while (height--) {
2507 /* *INDENT-OFF* */
2425 DUFFS_LOOP4({ 2508 DUFFS_LOOP4({
2426 Uint32 s = *srcp++; 2509 Uint32 s = *srcp++;
2427 Uint32 d = *dstp; 2510 Uint32 d = *dstp;
2428 /* 2511 /*
2429 * shift out the middle component (green) to 2512 * shift out the middle component (green) to
2434 d = (d | d << 16) & 0x07e0f81f; 2517 d = (d | d << 16) & 0x07e0f81f;
2435 d += (s - d) * alpha >> 5; 2518 d += (s - d) * alpha >> 5;
2436 d &= 0x07e0f81f; 2519 d &= 0x07e0f81f;
2437 *dstp++ = (Uint16)(d | d >> 16); 2520 *dstp++ = (Uint16)(d | d >> 16);
2438 }, width); 2521 }, width);
2439 srcp += srcskip; 2522 /* *INDENT-ON* */
2440 dstp += dstskip; 2523 srcp += srcskip;
2441 } 2524 dstp += dstskip;
2442 } 2525 }
2526 }
2443 } 2527 }
2444 2528
2445 /* fast RGB555->RGB555 blending with surface alpha */ 2529 /* fast RGB555->RGB555 blending with surface alpha */
2446 static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) 2530 static void
2447 { 2531 Blit555to555SurfaceAlpha (SDL_BlitInfo * info)
2448 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 2532 {
2449 if(alpha == 128) { 2533 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
2450 Blit16to16SurfaceAlpha128(info, 0xfbde); 2534 if (alpha == 128) {
2451 } else { 2535 Blit16to16SurfaceAlpha128 (info, 0xfbde);
2452 int width = info->d_width; 2536 } else {
2453 int height = info->d_height; 2537 int width = info->d_width;
2454 Uint16 *srcp = (Uint16 *)info->s_pixels; 2538 int height = info->d_height;
2455 int srcskip = info->s_skip >> 1; 2539 Uint16 *srcp = (Uint16 *) info->s_pixels;
2456 Uint16 *dstp = (Uint16 *)info->d_pixels; 2540 int srcskip = info->s_skip >> 1;
2457 int dstskip = info->d_skip >> 1; 2541 Uint16 *dstp = (Uint16 *) info->d_pixels;
2458 alpha >>= 3; /* downscale alpha to 5 bits */ 2542 int dstskip = info->d_skip >> 1;
2459 2543 alpha >>= 3; /* downscale alpha to 5 bits */
2460 while(height--) { 2544
2545 while (height--) {
2546 /* *INDENT-OFF* */
2461 DUFFS_LOOP4({ 2547 DUFFS_LOOP4({
2462 Uint32 s = *srcp++; 2548 Uint32 s = *srcp++;
2463 Uint32 d = *dstp; 2549 Uint32 d = *dstp;
2464 /* 2550 /*
2465 * shift out the middle component (green) to 2551 * shift out the middle component (green) to
2470 d = (d | d << 16) & 0x03e07c1f; 2556 d = (d | d << 16) & 0x03e07c1f;
2471 d += (s - d) * alpha >> 5; 2557 d += (s - d) * alpha >> 5;
2472 d &= 0x03e07c1f; 2558 d &= 0x03e07c1f;
2473 *dstp++ = (Uint16)(d | d >> 16); 2559 *dstp++ = (Uint16)(d | d >> 16);
2474 }, width); 2560 }, width);
2475 srcp += srcskip; 2561 /* *INDENT-ON* */
2476 dstp += dstskip; 2562 srcp += srcskip;
2477 } 2563 dstp += dstskip;
2478 } 2564 }
2565 }
2479 } 2566 }
2480 2567
2481 /* fast ARGB8888->RGB565 blending with pixel alpha */ 2568 /* fast ARGB8888->RGB565 blending with pixel alpha */
2482 static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) 2569 static void
2483 { 2570 BlitARGBto565PixelAlpha (SDL_BlitInfo * info)
2484 int width = info->d_width; 2571 {
2485 int height = info->d_height; 2572 int width = info->d_width;
2486 Uint32 *srcp = (Uint32 *)info->s_pixels; 2573 int height = info->d_height;
2487 int srcskip = info->s_skip >> 2; 2574 Uint32 *srcp = (Uint32 *) info->s_pixels;
2488 Uint16 *dstp = (Uint16 *)info->d_pixels; 2575 int srcskip = info->s_skip >> 2;
2489 int dstskip = info->d_skip >> 1; 2576 Uint16 *dstp = (Uint16 *) info->d_pixels;
2490 2577 int dstskip = info->d_skip >> 1;
2491 while(height--) { 2578
2579 while (height--) {
2580 /* *INDENT-OFF* */
2492 DUFFS_LOOP4({ 2581 DUFFS_LOOP4({
2493 Uint32 s = *srcp; 2582 Uint32 s = *srcp;
2494 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ 2583 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */
2495 /* FIXME: Here we special-case opaque alpha since the 2584 /* FIXME: Here we special-case opaque alpha since the
2496 compositioning used (>>8 instead of /255) doesn't handle 2585 compositioning used (>>8 instead of /255) doesn't handle
2514 } 2603 }
2515 } 2604 }
2516 srcp++; 2605 srcp++;
2517 dstp++; 2606 dstp++;
2518 }, width); 2607 }, width);
2519 srcp += srcskip; 2608 /* *INDENT-ON* */
2520 dstp += dstskip; 2609 srcp += srcskip;
2521 } 2610 dstp += dstskip;
2611 }
2522 } 2612 }
2523 2613
2524 /* fast ARGB8888->RGB555 blending with pixel alpha */ 2614 /* fast ARGB8888->RGB555 blending with pixel alpha */
2525 static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) 2615 static void
2526 { 2616 BlitARGBto555PixelAlpha (SDL_BlitInfo * info)
2527 int width = info->d_width; 2617 {
2528 int height = info->d_height; 2618 int width = info->d_width;
2529 Uint32 *srcp = (Uint32 *)info->s_pixels; 2619 int height = info->d_height;
2530 int srcskip = info->s_skip >> 2; 2620 Uint32 *srcp = (Uint32 *) info->s_pixels;
2531 Uint16 *dstp = (Uint16 *)info->d_pixels; 2621 int srcskip = info->s_skip >> 2;
2532 int dstskip = info->d_skip >> 1; 2622 Uint16 *dstp = (Uint16 *) info->d_pixels;
2533 2623 int dstskip = info->d_skip >> 1;
2534 while(height--) { 2624
2625 while (height--) {
2626 /* *INDENT-OFF* */
2535 DUFFS_LOOP4({ 2627 DUFFS_LOOP4({
2536 unsigned alpha; 2628 unsigned alpha;
2537 Uint32 s = *srcp; 2629 Uint32 s = *srcp;
2538 alpha = s >> 27; /* downscale alpha to 5 bits */ 2630 alpha = s >> 27; /* downscale alpha to 5 bits */
2539 /* FIXME: Here we special-case opaque alpha since the 2631 /* FIXME: Here we special-case opaque alpha since the
2558 } 2650 }
2559 } 2651 }
2560 srcp++; 2652 srcp++;
2561 dstp++; 2653 dstp++;
2562 }, width); 2654 }, width);
2563 srcp += srcskip; 2655 /* *INDENT-ON* */
2564 dstp += dstskip; 2656 srcp += srcskip;
2565 } 2657 dstp += dstskip;
2658 }
2566 } 2659 }
2567 2660
2568 /* General (slow) N->N blending with per-surface alpha */ 2661 /* General (slow) N->N blending with per-surface alpha */
2569 static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) 2662 static void
2570 { 2663 BlitNtoNSurfaceAlpha (SDL_BlitInfo * info)
2571 int width = info->d_width; 2664 {
2572 int height = info->d_height; 2665 int width = info->d_width;
2573 Uint8 *src = info->s_pixels; 2666 int height = info->d_height;
2574 int srcskip = info->s_skip; 2667 Uint8 *src = info->s_pixels;
2575 Uint8 *dst = info->d_pixels; 2668 int srcskip = info->s_skip;
2576 int dstskip = info->d_skip; 2669 Uint8 *dst = info->d_pixels;
2577 SDL_PixelFormat *srcfmt = info->src; 2670 int dstskip = info->d_skip;
2578 SDL_PixelFormat *dstfmt = info->dst; 2671 SDL_PixelFormat *srcfmt = info->src;
2579 int srcbpp = srcfmt->BytesPerPixel; 2672 SDL_PixelFormat *dstfmt = info->dst;
2580 int dstbpp = dstfmt->BytesPerPixel; 2673 int srcbpp = srcfmt->BytesPerPixel;
2581 unsigned sA = srcfmt->alpha; 2674 int dstbpp = dstfmt->BytesPerPixel;
2582 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 2675 unsigned sA = srcfmt->alpha;
2583 2676 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
2584 if(sA) { 2677
2585 while ( height-- ) { 2678 if (sA) {
2679 while (height--) {
2680 /* *INDENT-OFF* */
2586 DUFFS_LOOP4( 2681 DUFFS_LOOP4(
2587 { 2682 {
2588 Uint32 Pixel; 2683 Uint32 Pixel;
2589 unsigned sR; 2684 unsigned sR;
2590 unsigned sG; 2685 unsigned sG;
2598 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 2693 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
2599 src += srcbpp; 2694 src += srcbpp;
2600 dst += dstbpp; 2695 dst += dstbpp;
2601 }, 2696 },
2602 width); 2697 width);
2603 src += srcskip; 2698 /* *INDENT-ON* */
2604 dst += dstskip; 2699 src += srcskip;
2605 } 2700 dst += dstskip;
2606 } 2701 }
2702 }
2607 } 2703 }
2608 2704
2609 /* General (slow) colorkeyed N->N blending with per-surface alpha */ 2705 /* General (slow) colorkeyed N->N blending with per-surface alpha */
2610 static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) 2706 static void
2611 { 2707 BlitNtoNSurfaceAlphaKey (SDL_BlitInfo * info)
2612 int width = info->d_width; 2708 {
2613 int height = info->d_height; 2709 int width = info->d_width;
2614 Uint8 *src = info->s_pixels; 2710 int height = info->d_height;
2615 int srcskip = info->s_skip; 2711 Uint8 *src = info->s_pixels;
2616 Uint8 *dst = info->d_pixels; 2712 int srcskip = info->s_skip;
2617 int dstskip = info->d_skip; 2713 Uint8 *dst = info->d_pixels;
2618 SDL_PixelFormat *srcfmt = info->src; 2714 int dstskip = info->d_skip;
2619 SDL_PixelFormat *dstfmt = info->dst; 2715 SDL_PixelFormat *srcfmt = info->src;
2620 Uint32 ckey = srcfmt->colorkey; 2716 SDL_PixelFormat *dstfmt = info->dst;
2621 int srcbpp = srcfmt->BytesPerPixel; 2717 Uint32 ckey = srcfmt->colorkey;
2622 int dstbpp = dstfmt->BytesPerPixel; 2718 int srcbpp = srcfmt->BytesPerPixel;
2623 unsigned sA = srcfmt->alpha; 2719 int dstbpp = dstfmt->BytesPerPixel;
2624 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 2720 unsigned sA = srcfmt->alpha;
2625 2721 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
2626 while ( height-- ) { 2722
2723 while (height--) {
2724 /* *INDENT-OFF* */
2627 DUFFS_LOOP4( 2725 DUFFS_LOOP4(
2628 { 2726 {
2629 Uint32 Pixel; 2727 Uint32 Pixel;
2630 unsigned sR; 2728 unsigned sR;
2631 unsigned sG; 2729 unsigned sG;
2642 } 2740 }
2643 src += srcbpp; 2741 src += srcbpp;
2644 dst += dstbpp; 2742 dst += dstbpp;
2645 }, 2743 },
2646 width); 2744 width);
2647 src += srcskip; 2745 /* *INDENT-ON* */
2648 dst += dstskip; 2746 src += srcskip;
2649 } 2747 dst += dstskip;
2748 }
2650 } 2749 }
2651 2750
2652 /* General (slow) N->N blending with pixel alpha */ 2751 /* General (slow) N->N blending with pixel alpha */
2653 static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) 2752 static void
2654 { 2753 BlitNtoNPixelAlpha (SDL_BlitInfo * info)
2655 int width = info->d_width; 2754 {
2656 int height = info->d_height; 2755 int width = info->d_width;
2657 Uint8 *src = info->s_pixels; 2756 int height = info->d_height;
2658 int srcskip = info->s_skip; 2757 Uint8 *src = info->s_pixels;
2659 Uint8 *dst = info->d_pixels; 2758 int srcskip = info->s_skip;
2660 int dstskip = info->d_skip; 2759 Uint8 *dst = info->d_pixels;
2661 SDL_PixelFormat *srcfmt = info->src; 2760 int dstskip = info->d_skip;
2662 SDL_PixelFormat *dstfmt = info->dst; 2761 SDL_PixelFormat *srcfmt = info->src;
2663 2762 SDL_PixelFormat *dstfmt = info->dst;
2664 int srcbpp; 2763
2665 int dstbpp; 2764 int srcbpp;
2666 2765 int dstbpp;
2667 /* Set up some basic variables */ 2766
2668 srcbpp = srcfmt->BytesPerPixel; 2767 /* Set up some basic variables */
2669 dstbpp = dstfmt->BytesPerPixel; 2768 srcbpp = srcfmt->BytesPerPixel;
2670 2769 dstbpp = dstfmt->BytesPerPixel;
2671 /* FIXME: for 8bpp source alpha, this doesn't get opaque values 2770
2672 quite right. for <8bpp source alpha, it gets them very wrong 2771 /* FIXME: for 8bpp source alpha, this doesn't get opaque values
2673 (check all macros!) 2772 quite right. for <8bpp source alpha, it gets them very wrong
2674 It is unclear whether there is a good general solution that doesn't 2773 (check all macros!)
2675 need a branch (or a divide). */ 2774 It is unclear whether there is a good general solution that doesn't
2676 while ( height-- ) { 2775 need a branch (or a divide). */
2776 while (height--) {
2777 /* *INDENT-OFF* */
2677 DUFFS_LOOP4( 2778 DUFFS_LOOP4(
2678 { 2779 {
2679 Uint32 Pixel; 2780 Uint32 Pixel;
2680 unsigned sR; 2781 unsigned sR;
2681 unsigned sG; 2782 unsigned sG;
2693 } 2794 }
2694 src += srcbpp; 2795 src += srcbpp;
2695 dst += dstbpp; 2796 dst += dstbpp;
2696 }, 2797 },
2697 width); 2798 width);
2698 src += srcskip; 2799 /* *INDENT-ON* */
2699 dst += dstskip; 2800 src += srcskip;
2700 } 2801 dst += dstskip;
2701 } 2802 }
2702 2803 }
2703 2804
2704 SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) 2805
2806 SDL_loblit
2807 SDL_CalculateAlphaBlit (SDL_Surface * surface, int blit_index)
2705 { 2808 {
2706 SDL_PixelFormat *sf = surface->format; 2809 SDL_PixelFormat *sf = surface->format;
2707 SDL_PixelFormat *df = surface->map->dst->format; 2810 SDL_PixelFormat *df = surface->map->dst->format;
2708 2811
2709 if(sf->Amask == 0) { 2812 if (sf->Amask == 0) {
2710 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { 2813 if ((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) {
2711 if(df->BytesPerPixel == 1) 2814 if (df->BytesPerPixel == 1)
2712 return BlitNto1SurfaceAlphaKey; 2815 return BlitNto1SurfaceAlphaKey;
2713 else 2816 else
2714 #if SDL_ALTIVEC_BLITTERS 2817 #if SDL_ALTIVEC_BLITTERS
2715 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && 2818 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
2716 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 2819 !(surface->map->dst->flags & SDL_HWSURFACE)
2717 return Blit32to32SurfaceAlphaKeyAltivec; 2820 && SDL_HasAltiVec ())
2718 else 2821 return Blit32to32SurfaceAlphaKeyAltivec;
2822 else
2719 #endif 2823 #endif
2720 return BlitNtoNSurfaceAlphaKey; 2824 return BlitNtoNSurfaceAlphaKey;
2721 } else { 2825 } else {
2722 /* Per-surface alpha blits */ 2826 /* Per-surface alpha blits */
2723 switch(df->BytesPerPixel) { 2827 switch (df->BytesPerPixel) {
2724 case 1: 2828 case 1:
2725 return BlitNto1SurfaceAlpha; 2829 return BlitNto1SurfaceAlpha;
2726 2830
2727 case 2: 2831 case 2:
2728 if(surface->map->identity) { 2832 if (surface->map->identity) {
2729 if(df->Gmask == 0x7e0) 2833 if (df->Gmask == 0x7e0) {
2730 {
2731 #if MMX_ASMBLIT 2834 #if MMX_ASMBLIT
2732 if(SDL_HasMMX()) 2835 if (SDL_HasMMX ())
2733 return Blit565to565SurfaceAlphaMMX; 2836 return Blit565to565SurfaceAlphaMMX;
2734 else 2837 else
2735 #endif 2838 #endif
2736 return Blit565to565SurfaceAlpha; 2839 return Blit565to565SurfaceAlpha;
2737 } 2840 } else if (df->Gmask == 0x3e0) {
2738 else if(df->Gmask == 0x3e0)
2739 {
2740 #if MMX_ASMBLIT 2841 #if MMX_ASMBLIT
2741 if(SDL_HasMMX()) 2842 if (SDL_HasMMX ())
2742 return Blit555to555SurfaceAlphaMMX; 2843 return Blit555to555SurfaceAlphaMMX;
2743 else 2844 else
2744 #endif 2845 #endif
2745 return Blit555to555SurfaceAlpha; 2846 return Blit555to555SurfaceAlpha;
2746 } 2847 }
2747 } 2848 }
2748 return BlitNtoNSurfaceAlpha; 2849 return BlitNtoNSurfaceAlpha;
2749 2850
2750 case 4: 2851 case 4:
2751 if(sf->Rmask == df->Rmask 2852 if (sf->Rmask == df->Rmask
2752 && sf->Gmask == df->Gmask 2853 && sf->Gmask == df->Gmask
2753 && sf->Bmask == df->Bmask 2854 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
2754 && sf->BytesPerPixel == 4)
2755 {
2756 #if MMX_ASMBLIT 2855 #if MMX_ASMBLIT
2757 if(sf->Rshift % 8 == 0 2856 if (sf->Rshift % 8 == 0
2758 && sf->Gshift % 8 == 0 2857 && sf->Gshift % 8 == 0
2759 && sf->Bshift % 8 == 0 2858 && sf->Bshift % 8 == 0 && SDL_HasMMX ())
2760 && SDL_HasMMX()) 2859 return BlitRGBtoRGBSurfaceAlphaMMX;
2761 return BlitRGBtoRGBSurfaceAlphaMMX;
2762 #endif 2860 #endif
2763 if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) 2861 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
2764 {
2765 #if SDL_ALTIVEC_BLITTERS 2862 #if SDL_ALTIVEC_BLITTERS
2766 if(!(surface->map->dst->flags & SDL_HWSURFACE) 2863 if (!(surface->map->dst->flags & SDL_HWSURFACE)
2767 && SDL_HasAltiVec()) 2864 && SDL_HasAltiVec ())
2768 return BlitRGBtoRGBSurfaceAlphaAltivec; 2865 return BlitRGBtoRGBSurfaceAlphaAltivec;
2769 #endif 2866 #endif
2770 return BlitRGBtoRGBSurfaceAlpha; 2867 return BlitRGBtoRGBSurfaceAlpha;
2771 } 2868 }
2772 } 2869 }
2773 #if SDL_ALTIVEC_BLITTERS 2870 #if SDL_ALTIVEC_BLITTERS
2774 if((sf->BytesPerPixel == 4) && 2871 if ((sf->BytesPerPixel == 4) &&
2775 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 2872 !(surface->map->dst->flags & SDL_HWSURFACE)
2776 return Blit32to32SurfaceAlphaAltivec; 2873 && SDL_HasAltiVec ())
2777 else 2874 return Blit32to32SurfaceAlphaAltivec;
2875 else
2778 #endif 2876 #endif
2779 return BlitNtoNSurfaceAlpha; 2877 return BlitNtoNSurfaceAlpha;
2780 2878
2781 case 3: 2879 case 3:
2782 default: 2880 default:
2783 return BlitNtoNSurfaceAlpha; 2881 return BlitNtoNSurfaceAlpha;
2784 } 2882 }
2785 } 2883 }
2786 } else { 2884 } else {
2787 /* Per-pixel alpha blits */ 2885 /* Per-pixel alpha blits */
2788 switch(df->BytesPerPixel) { 2886 switch (df->BytesPerPixel) {
2789 case 1: 2887 case 1:
2790 return BlitNto1PixelAlpha; 2888 return BlitNto1PixelAlpha;
2791 2889
2792 case 2: 2890 case 2:
2793 #if SDL_ALTIVEC_BLITTERS 2891 #if SDL_ALTIVEC_BLITTERS
2794 if(sf->BytesPerPixel == 4 && !(surface->map->dst->flags & SDL_HWSURFACE) && 2892 if (sf->BytesPerPixel == 4
2795 df->Gmask == 0x7e0 && 2893 && !(surface->map->dst->flags & SDL_HWSURFACE)
2796 df->Bmask == 0x1f && SDL_HasAltiVec()) 2894 && df->Gmask == 0x7e0 && df->Bmask == 0x1f
2797 return Blit32to565PixelAlphaAltivec; 2895 && SDL_HasAltiVec ())
2798 else 2896 return Blit32to565PixelAlphaAltivec;
2897 else
2799 #endif 2898 #endif
2800 if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 2899 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
2801 && sf->Gmask == 0xff00 2900 && sf->Gmask == 0xff00
2802 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) 2901 && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
2803 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { 2902 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
2804 if(df->Gmask == 0x7e0) 2903 if (df->Gmask == 0x7e0)
2805 return BlitARGBto565PixelAlpha; 2904 return BlitARGBto565PixelAlpha;
2806 else if(df->Gmask == 0x3e0) 2905 else if (df->Gmask == 0x3e0)
2807 return BlitARGBto555PixelAlpha; 2906 return BlitARGBto555PixelAlpha;
2808 } 2907 }
2809 return BlitNtoNPixelAlpha; 2908 return BlitNtoNPixelAlpha;
2810 2909
2811 case 4: 2910 case 4:
2812 if(sf->Rmask == df->Rmask 2911 if (sf->Rmask == df->Rmask
2813 && sf->Gmask == df->Gmask 2912 && sf->Gmask == df->Gmask
2814 && sf->Bmask == df->Bmask 2913 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
2815 && sf->BytesPerPixel == 4)
2816 {
2817 #if MMX_ASMBLIT 2914 #if MMX_ASMBLIT
2818 if(sf->Rshift % 8 == 0 2915 if (sf->Rshift % 8 == 0
2819 && sf->Gshift % 8 == 0 2916 && sf->Gshift % 8 == 0
2820 && sf->Bshift % 8 == 0 2917 && sf->Bshift % 8 == 0
2821 && sf->Ashift % 8 == 0 2918 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
2822 && sf->Aloss == 0) 2919 if (SDL_Has3DNow ())
2823 { 2920 return BlitRGBtoRGBPixelAlphaMMX3DNOW;
2824 if(SDL_Has3DNow()) 2921 if (SDL_HasMMX ())
2825 return BlitRGBtoRGBPixelAlphaMMX3DNOW; 2922 return BlitRGBtoRGBPixelAlphaMMX;
2826 if(SDL_HasMMX()) 2923 }
2827 return BlitRGBtoRGBPixelAlphaMMX;
2828 }
2829 #endif 2924 #endif
2830 if(sf->Amask == 0xff000000) 2925 if (sf->Amask == 0xff000000) {
2831 {
2832 #if SDL_ALTIVEC_BLITTERS 2926 #if SDL_ALTIVEC_BLITTERS
2833 if(!(surface->map->dst->flags & SDL_HWSURFACE) 2927 if (!(surface->map->dst->flags & SDL_HWSURFACE)
2834 && SDL_HasAltiVec()) 2928 && SDL_HasAltiVec ())
2835 return BlitRGBtoRGBPixelAlphaAltivec; 2929 return BlitRGBtoRGBPixelAlphaAltivec;
2836 #endif 2930 #endif
2837 return BlitRGBtoRGBPixelAlpha; 2931 return BlitRGBtoRGBPixelAlpha;
2838 } 2932 }
2839 } 2933 }
2840 #if SDL_ALTIVEC_BLITTERS 2934 #if SDL_ALTIVEC_BLITTERS
2841 if (sf->Amask && sf->BytesPerPixel == 4 && 2935 if (sf->Amask && sf->BytesPerPixel == 4 &&
2842 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 2936 !(surface->map->dst->flags & SDL_HWSURFACE)
2843 return Blit32to32PixelAlphaAltivec; 2937 && SDL_HasAltiVec ())
2844 else 2938 return Blit32to32PixelAlphaAltivec;
2939 else
2845 #endif 2940 #endif
2846 return BlitNtoNPixelAlpha; 2941 return BlitNtoNPixelAlpha;
2847 2942
2848 case 3: 2943 case 3:
2849 default: 2944 default:
2850 return BlitNtoNPixelAlpha; 2945 return BlitNtoNPixelAlpha;
2851 } 2946 }
2852 } 2947 }
2853 } 2948 }
2854 2949
2950 /* vi: set ts=4 sw=4 expandtab: */