Mercurial > sdl-ios-xcode
comparison src/video/SDL_blit_A.c @ 1895:c121d94672cb
SDL 1.2 is moving to a branch, and SDL 1.3 is becoming the head.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 10 Jul 2006 21:04:37 +0000 |
parents | 398ac0f88e4d |
children | eb5aedc79992 |
comparison
equal
deleted
inserted
replaced
1894:c69cee13dd76 | 1895:c121d94672cb |
---|---|
44 #endif | 44 #endif |
45 | 45 |
46 /* Functions to perform alpha blended blitting */ | 46 /* Functions to perform alpha blended blitting */ |
47 | 47 |
48 /* N->1 blending with per-surface alpha */ | 48 /* N->1 blending with per-surface alpha */ |
49 static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) | 49 static void |
50 { | 50 BlitNto1SurfaceAlpha(SDL_BlitInfo * info) |
51 int width = info->d_width; | 51 { |
52 int height = info->d_height; | 52 int width = info->d_width; |
53 Uint8 *src = info->s_pixels; | 53 int height = info->d_height; |
54 int srcskip = info->s_skip; | 54 Uint8 *src = info->s_pixels; |
55 Uint8 *dst = info->d_pixels; | 55 int srcskip = info->s_skip; |
56 int dstskip = info->d_skip; | 56 Uint8 *dst = info->d_pixels; |
57 Uint8 *palmap = info->table; | 57 int dstskip = info->d_skip; |
58 SDL_PixelFormat *srcfmt = info->src; | 58 Uint8 *palmap = info->table; |
59 SDL_PixelFormat *dstfmt = info->dst; | 59 SDL_PixelFormat *srcfmt = info->src; |
60 int srcbpp = srcfmt->BytesPerPixel; | 60 SDL_PixelFormat *dstfmt = info->dst; |
61 | 61 int srcbpp = srcfmt->BytesPerPixel; |
62 const unsigned A = srcfmt->alpha; | 62 |
63 | 63 const unsigned A = srcfmt->alpha; |
64 while ( height-- ) { | 64 |
65 while (height--) { | |
66 /* *INDENT-OFF* */ | |
65 DUFFS_LOOP4( | 67 DUFFS_LOOP4( |
66 { | 68 { |
67 Uint32 Pixel; | 69 Uint32 Pixel; |
68 unsigned sR; | 70 unsigned sR; |
69 unsigned sG; | 71 unsigned sG; |
91 } | 93 } |
92 dst++; | 94 dst++; |
93 src += srcbpp; | 95 src += srcbpp; |
94 }, | 96 }, |
95 width); | 97 width); |
96 src += srcskip; | 98 /* *INDENT-ON* */ |
97 dst += dstskip; | 99 src += srcskip; |
98 } | 100 dst += dstskip; |
101 } | |
99 } | 102 } |
100 | 103 |
101 /* N->1 blending with pixel alpha */ | 104 /* N->1 blending with pixel alpha */ |
102 static void BlitNto1PixelAlpha(SDL_BlitInfo *info) | 105 static void |
103 { | 106 BlitNto1PixelAlpha(SDL_BlitInfo * info) |
104 int width = info->d_width; | 107 { |
105 int height = info->d_height; | 108 int width = info->d_width; |
106 Uint8 *src = info->s_pixels; | 109 int height = info->d_height; |
107 int srcskip = info->s_skip; | 110 Uint8 *src = info->s_pixels; |
108 Uint8 *dst = info->d_pixels; | 111 int srcskip = info->s_skip; |
109 int dstskip = info->d_skip; | 112 Uint8 *dst = info->d_pixels; |
110 Uint8 *palmap = info->table; | 113 int dstskip = info->d_skip; |
111 SDL_PixelFormat *srcfmt = info->src; | 114 Uint8 *palmap = info->table; |
112 SDL_PixelFormat *dstfmt = info->dst; | 115 SDL_PixelFormat *srcfmt = info->src; |
113 int srcbpp = srcfmt->BytesPerPixel; | 116 SDL_PixelFormat *dstfmt = info->dst; |
114 | 117 int srcbpp = srcfmt->BytesPerPixel; |
115 /* FIXME: fix alpha bit field expansion here too? */ | 118 |
116 while ( height-- ) { | 119 /* FIXME: fix alpha bit field expansion here too? */ |
120 while (height--) { | |
121 /* *INDENT-OFF* */ | |
117 DUFFS_LOOP4( | 122 DUFFS_LOOP4( |
118 { | 123 { |
119 Uint32 Pixel; | 124 Uint32 Pixel; |
120 unsigned sR; | 125 unsigned sR; |
121 unsigned sG; | 126 unsigned sG; |
144 } | 149 } |
145 dst++; | 150 dst++; |
146 src += srcbpp; | 151 src += srcbpp; |
147 }, | 152 }, |
148 width); | 153 width); |
149 src += srcskip; | 154 /* *INDENT-ON* */ |
150 dst += dstskip; | 155 src += srcskip; |
151 } | 156 dst += dstskip; |
157 } | |
152 } | 158 } |
153 | 159 |
154 /* colorkeyed N->1 blending with per-surface alpha */ | 160 /* colorkeyed N->1 blending with per-surface alpha */ |
155 static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) | 161 static void |
156 { | 162 BlitNto1SurfaceAlphaKey(SDL_BlitInfo * info) |
157 int width = info->d_width; | 163 { |
158 int height = info->d_height; | 164 int width = info->d_width; |
159 Uint8 *src = info->s_pixels; | 165 int height = info->d_height; |
160 int srcskip = info->s_skip; | 166 Uint8 *src = info->s_pixels; |
161 Uint8 *dst = info->d_pixels; | 167 int srcskip = info->s_skip; |
162 int dstskip = info->d_skip; | 168 Uint8 *dst = info->d_pixels; |
163 Uint8 *palmap = info->table; | 169 int dstskip = info->d_skip; |
164 SDL_PixelFormat *srcfmt = info->src; | 170 Uint8 *palmap = info->table; |
165 SDL_PixelFormat *dstfmt = info->dst; | 171 SDL_PixelFormat *srcfmt = info->src; |
166 int srcbpp = srcfmt->BytesPerPixel; | 172 SDL_PixelFormat *dstfmt = info->dst; |
167 Uint32 ckey = srcfmt->colorkey; | 173 int srcbpp = srcfmt->BytesPerPixel; |
168 | 174 Uint32 ckey = srcfmt->colorkey; |
169 const int A = srcfmt->alpha; | 175 |
170 | 176 const int A = srcfmt->alpha; |
171 while ( height-- ) { | 177 |
178 while (height--) { | |
179 /* *INDENT-OFF* */ | |
172 DUFFS_LOOP( | 180 DUFFS_LOOP( |
173 { | 181 { |
174 Uint32 Pixel; | 182 Uint32 Pixel; |
175 unsigned sR; | 183 unsigned sR; |
176 unsigned sG; | 184 unsigned sG; |
200 } | 208 } |
201 dst++; | 209 dst++; |
202 src += srcbpp; | 210 src += srcbpp; |
203 }, | 211 }, |
204 width); | 212 width); |
205 src += srcskip; | 213 /* *INDENT-ON* */ |
206 dst += dstskip; | 214 src += srcskip; |
207 } | 215 dst += dstskip; |
216 } | |
208 } | 217 } |
209 | 218 |
210 #if GCC_ASMBLIT | 219 #if GCC_ASMBLIT |
211 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 220 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
212 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) | 221 static void |
213 { | 222 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info) |
214 int width = info->d_width; | 223 { |
215 int height = info->d_height; | 224 int width = info->d_width; |
216 Uint32 *srcp = (Uint32 *)info->s_pixels; | 225 int height = info->d_height; |
217 int srcskip = info->s_skip >> 2; | 226 Uint32 *srcp = (Uint32 *) info->s_pixels; |
218 Uint32 *dstp = (Uint32 *)info->d_pixels; | 227 int srcskip = info->s_skip >> 2; |
219 int dstskip = info->d_skip >> 2; | 228 Uint32 *dstp = (Uint32 *) info->d_pixels; |
220 Uint32 dalpha = info->dst->Amask; | 229 int dstskip = info->d_skip >> 2; |
221 Uint8 load[8]; | 230 Uint32 dalpha = info->dst->Amask; |
222 | 231 Uint8 load[8]; |
223 *(Uint64 *)load = 0x00fefefe00fefefeULL;/* alpha128 mask */ | 232 |
224 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ | 233 *(Uint64 *) load = 0x00fefefe00fefefeULL; /* alpha128 mask */ |
225 *(Uint64 *)load = 0x0001010100010101ULL;/* !alpha128 mask */ | 234 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ |
226 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ | 235 *(Uint64 *) load = 0x0001010100010101ULL; /* !alpha128 mask */ |
227 movd_m2r(dalpha, mm7); /* dst alpha mask */ | 236 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ |
228 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | 237 movd_m2r(dalpha, mm7); /* dst alpha mask */ |
229 while(height--) { | 238 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ |
239 while (height--) { | |
240 /* *INDENT-OFF* */ | |
230 DUFFS_LOOP_DOUBLE2( | 241 DUFFS_LOOP_DOUBLE2( |
231 { | 242 { |
232 Uint32 s = *srcp++; | 243 Uint32 s = *srcp++; |
233 Uint32 d = *dstp; | 244 Uint32 d = *dstp; |
234 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | 245 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) |
251 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | 262 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ |
252 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ | 263 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ |
253 dstp += 2; | 264 dstp += 2; |
254 srcp += 2; | 265 srcp += 2; |
255 }, width); | 266 }, width); |
256 srcp += srcskip; | 267 /* *INDENT-ON* */ |
257 dstp += dstskip; | 268 srcp += srcskip; |
258 } | 269 dstp += dstskip; |
259 emms(); | 270 } |
271 emms(); | |
260 } | 272 } |
261 | 273 |
262 /* fast RGB888->(A)RGB888 blending with surface alpha */ | 274 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
263 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) | 275 static void |
264 { | 276 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info) |
265 SDL_PixelFormat* df = info->dst; | 277 { |
266 unsigned alpha = info->src->alpha; | 278 SDL_PixelFormat *df = info->dst; |
267 | 279 unsigned alpha = info->src->alpha; |
268 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | 280 |
269 /* only call a128 version when R,G,B occupy lower bits */ | 281 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { |
270 BlitRGBtoRGBSurfaceAlpha128MMX(info); | 282 /* only call a128 version when R,G,B occupy lower bits */ |
271 } else { | 283 BlitRGBtoRGBSurfaceAlpha128MMX(info); |
272 int width = info->d_width; | 284 } else { |
273 int height = info->d_height; | 285 int width = info->d_width; |
274 Uint32 *srcp = (Uint32 *)info->s_pixels; | 286 int height = info->d_height; |
275 int srcskip = info->s_skip >> 2; | 287 Uint32 *srcp = (Uint32 *) info->s_pixels; |
276 Uint32 *dstp = (Uint32 *)info->d_pixels; | 288 int srcskip = info->s_skip >> 2; |
277 int dstskip = info->d_skip >> 2; | 289 Uint32 *dstp = (Uint32 *) info->d_pixels; |
278 | 290 int dstskip = info->d_skip >> 2; |
279 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ | 291 |
280 /* form the alpha mult */ | 292 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ |
281 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ | 293 /* form the alpha mult */ |
282 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | 294 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ |
283 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | 295 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ |
284 alpha = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); | 296 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ |
285 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ | 297 alpha = |
286 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ | 298 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> |
287 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ | 299 Bshift); |
288 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ | 300 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ |
289 movd_m2r(df->Amask, mm7); /* dst alpha mask */ | 301 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ |
290 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | 302 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ |
291 | 303 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ |
292 while(height--) { | 304 movd_m2r(df->Amask, mm7); /* dst alpha mask */ |
305 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
306 | |
307 while (height--) { | |
308 /* *INDENT-OFF* */ | |
293 DUFFS_LOOP_DOUBLE2({ | 309 DUFFS_LOOP_DOUBLE2({ |
294 /* One Pixel Blend */ | 310 /* One Pixel Blend */ |
295 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | 311 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ |
296 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | 312 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ |
297 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ | 313 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ |
335 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ | 351 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ |
336 | 352 |
337 srcp += 2; | 353 srcp += 2; |
338 dstp += 2; | 354 dstp += 2; |
339 }, width); | 355 }, width); |
340 srcp += srcskip; | 356 /* *INDENT-ON* */ |
341 dstp += dstskip; | 357 srcp += srcskip; |
342 } | 358 dstp += dstskip; |
343 emms(); | 359 } |
344 } | 360 emms(); |
361 } | |
345 } | 362 } |
346 | 363 |
347 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | 364 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
348 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) | 365 static void |
349 { | 366 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info) |
350 int width = info->d_width; | 367 { |
351 int height = info->d_height; | 368 int width = info->d_width; |
352 Uint32 *srcp = (Uint32 *)info->s_pixels; | 369 int height = info->d_height; |
353 int srcskip = info->s_skip >> 2; | 370 Uint32 *srcp = (Uint32 *) info->s_pixels; |
354 Uint32 *dstp = (Uint32 *)info->d_pixels; | 371 int srcskip = info->s_skip >> 2; |
355 int dstskip = info->d_skip >> 2; | 372 Uint32 *dstp = (Uint32 *) info->d_pixels; |
356 SDL_PixelFormat* sf = info->src; | 373 int dstskip = info->d_skip >> 2; |
357 Uint32 amask = sf->Amask; | 374 SDL_PixelFormat *sf = info->src; |
358 | 375 Uint32 amask = sf->Amask; |
359 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ | 376 |
360 /* form multiplication mask */ | 377 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ |
361 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ | 378 /* form multiplication mask */ |
362 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ | 379 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ |
363 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ | 380 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ |
364 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ | 381 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ |
365 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ | 382 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ |
366 /* form channel masks */ | 383 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ |
367 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ | 384 /* form channel masks */ |
368 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ | 385 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ |
369 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ | 386 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ |
370 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ | 387 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ |
371 /* get alpha channel shift */ | 388 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ |
372 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */ | 389 /* get alpha channel shift */ |
373 | 390 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */ |
374 while(height--) { | 391 |
392 while (height--) { | |
393 /* *INDENT-OFF* */ | |
375 DUFFS_LOOP4({ | 394 DUFFS_LOOP4({ |
376 Uint32 alpha = *srcp & amask; | 395 Uint32 alpha = *srcp & amask; |
377 /* FIXME: Here we special-case opaque alpha since the | 396 /* FIXME: Here we special-case opaque alpha since the |
378 compositioning used (>>8 instead of /255) doesn't handle | 397 compositioning used (>>8 instead of /255) doesn't handle |
379 it correctly. Also special-case alpha=0 for speed? | 398 it correctly. Also special-case alpha=0 for speed? |
414 movd_r2m(mm2, *dstp);/* mm2 -> dst */ | 433 movd_r2m(mm2, *dstp);/* mm2 -> dst */ |
415 } | 434 } |
416 ++srcp; | 435 ++srcp; |
417 ++dstp; | 436 ++dstp; |
418 }, width); | 437 }, width); |
419 srcp += srcskip; | 438 /* *INDENT-ON* */ |
420 dstp += dstskip; | 439 srcp += srcskip; |
421 } | 440 dstp += dstskip; |
422 emms(); | 441 } |
423 } | 442 emms(); |
443 } | |
444 | |
424 /* End GCC_ASMBLIT */ | 445 /* End GCC_ASMBLIT */ |
425 | 446 |
426 #elif MSVC_ASMBLIT | 447 #elif MSVC_ASMBLIT |
427 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 448 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
428 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) | 449 static void |
429 { | 450 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info) |
430 int width = info->d_width; | 451 { |
431 int height = info->d_height; | 452 int width = info->d_width; |
432 Uint32 *srcp = (Uint32 *)info->s_pixels; | 453 int height = info->d_height; |
433 int srcskip = info->s_skip >> 2; | 454 Uint32 *srcp = (Uint32 *) info->s_pixels; |
434 Uint32 *dstp = (Uint32 *)info->d_pixels; | 455 int srcskip = info->s_skip >> 2; |
435 int dstskip = info->d_skip >> 2; | 456 Uint32 *dstp = (Uint32 *) info->d_pixels; |
436 Uint32 dalpha = info->dst->Amask; | 457 int dstskip = info->d_skip >> 2; |
437 | 458 Uint32 dalpha = info->dst->Amask; |
438 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; | 459 |
439 | 460 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; |
440 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ | 461 |
441 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ | 462 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ |
442 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ | 463 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ |
443 | 464 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ |
444 while (height--) { | 465 |
445 int n = width; | 466 while (height--) { |
446 if ( n & 1 ) { | 467 int n = width; |
447 Uint32 s = *srcp++; | 468 if (n & 1) { |
448 Uint32 d = *dstp; | 469 Uint32 s = *srcp++; |
449 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | 470 Uint32 d = *dstp; |
450 + (s & d & 0x00010101)) | dalpha; | 471 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) |
451 n--; | 472 + (s & d & 0x00010101)) | dalpha; |
452 } | 473 n--; |
453 | 474 } |
454 for (n >>= 1; n > 0; --n) { | 475 |
455 dst1 = *(__m64*)dstp; /* 2 x dst -> dst1(ARGBARGB) */ | 476 for (n >>= 1; n > 0; --n) { |
456 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | 477 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */ |
457 | 478 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ |
458 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB) */ | 479 |
459 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | 480 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ |
460 | 481 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ |
461 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ | 482 |
462 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ | 483 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ |
463 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ | 484 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ |
464 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ | 485 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ |
465 | 486 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ |
466 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ | 487 |
467 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ | 488 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ |
468 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ | 489 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ |
469 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ | 490 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ |
470 | 491 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ |
471 *(__m64*)dstp = dst1; /* dst1 -> 2 x dst pixels */ | 492 |
472 dstp += 2; | 493 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */ |
473 srcp += 2; | 494 dstp += 2; |
474 } | 495 srcp += 2; |
475 | 496 } |
476 srcp += srcskip; | 497 |
477 dstp += dstskip; | 498 srcp += srcskip; |
478 } | 499 dstp += dstskip; |
479 _mm_empty(); | 500 } |
501 _mm_empty(); | |
480 } | 502 } |
481 | 503 |
482 /* fast RGB888->(A)RGB888 blending with surface alpha */ | 504 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
483 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) | 505 static void |
484 { | 506 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info) |
485 SDL_PixelFormat* df = info->dst; | 507 { |
486 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; | 508 SDL_PixelFormat *df = info->dst; |
487 unsigned alpha = info->src->alpha; | 509 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; |
488 | 510 unsigned alpha = info->src->alpha; |
489 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | 511 |
490 /* only call a128 version when R,G,B occupy lower bits */ | 512 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { |
491 BlitRGBtoRGBSurfaceAlpha128MMX(info); | 513 /* only call a128 version when R,G,B occupy lower bits */ |
492 } else { | 514 BlitRGBtoRGBSurfaceAlpha128MMX(info); |
493 int width = info->d_width; | 515 } else { |
494 int height = info->d_height; | 516 int width = info->d_width; |
495 Uint32 *srcp = (Uint32 *)info->s_pixels; | 517 int height = info->d_height; |
496 int srcskip = info->s_skip >> 2; | 518 Uint32 *srcp = (Uint32 *) info->s_pixels; |
497 Uint32 *dstp = (Uint32 *)info->d_pixels; | 519 int srcskip = info->s_skip >> 2; |
498 int dstskip = info->d_skip >> 2; | 520 Uint32 *dstp = (Uint32 *) info->d_pixels; |
499 Uint32 dalpha = df->Amask; | 521 int dstskip = info->d_skip >> 2; |
500 Uint32 amult; | 522 Uint32 dalpha = df->Amask; |
501 | 523 Uint32 amult; |
502 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; | 524 |
503 | 525 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; |
504 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | 526 |
505 /* form the alpha mult */ | 527 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
506 amult = alpha | (alpha << 8); | 528 /* form the alpha mult */ |
507 amult = amult | (amult << 16); | 529 amult = alpha | (alpha << 8); |
508 chanmask = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); | 530 amult = amult | (amult << 16); |
509 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ | 531 chanmask = |
510 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ | 532 (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df-> |
511 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ | 533 Bshift); |
512 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ | 534 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ |
513 | 535 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ |
514 while (height--) { | 536 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ |
515 int n = width; | 537 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ |
516 if (n & 1) { | 538 |
517 /* One Pixel Blend */ | 539 while (height--) { |
518 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB)*/ | 540 int n = width; |
519 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ | 541 if (n & 1) { |
520 | 542 /* One Pixel Blend */ |
521 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | 543 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */ |
522 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | 544 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ |
523 | 545 |
524 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ | 546 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */ |
525 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | 547 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ |
526 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ | 548 |
527 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ | 549 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ |
528 | 550 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ |
529 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | 551 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ |
530 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ | 552 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ |
531 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | 553 |
532 | 554 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ |
533 ++srcp; | 555 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ |
534 ++dstp; | 556 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ |
535 | 557 |
536 n--; | 558 ++srcp; |
537 } | 559 ++dstp; |
538 | 560 |
539 for (n >>= 1; n > 0; --n) { | 561 n--; |
540 /* Two Pixels Blend */ | 562 } |
541 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB)*/ | 563 |
542 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | 564 for (n >>= 1; n > 0; --n) { |
543 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ | 565 /* Two Pixels Blend */ |
544 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ | 566 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ |
545 | 567 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ |
546 dst1 = *(__m64*)dstp;/* 2 x dst -> dst1(ARGBARGB) */ | 568 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ |
547 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | 569 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ |
548 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ | 570 |
549 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ | 571 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */ |
550 | 572 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ |
551 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ | 573 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ |
552 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ | 574 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ |
553 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ | 575 |
554 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ | 576 src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */ |
555 | 577 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ |
556 src2 = _mm_sub_pi16(src2, dst2);/* src2 - dst2 -> src2 */ | 578 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ |
557 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | 579 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ |
558 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ | 580 |
559 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ | 581 src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */ |
560 | 582 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ |
561 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ | 583 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ |
562 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ | 584 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ |
563 | 585 |
564 *(__m64*)dstp = dst1; /* dst1 -> 2 x pixel */ | 586 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ |
565 | 587 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ |
566 srcp += 2; | 588 |
567 dstp += 2; | 589 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */ |
568 } | 590 |
569 srcp += srcskip; | 591 srcp += 2; |
570 dstp += dstskip; | 592 dstp += 2; |
571 } | 593 } |
572 _mm_empty(); | 594 srcp += srcskip; |
573 } | 595 dstp += dstskip; |
596 } | |
597 _mm_empty(); | |
598 } | |
574 } | 599 } |
575 | 600 |
576 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | 601 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
577 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) | 602 static void |
578 { | 603 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info) |
579 int width = info->d_width; | 604 { |
580 int height = info->d_height; | 605 int width = info->d_width; |
581 Uint32 *srcp = (Uint32 *)info->s_pixels; | 606 int height = info->d_height; |
582 int srcskip = info->s_skip >> 2; | 607 Uint32 *srcp = (Uint32 *) info->s_pixels; |
583 Uint32 *dstp = (Uint32 *)info->d_pixels; | 608 int srcskip = info->s_skip >> 2; |
584 int dstskip = info->d_skip >> 2; | 609 Uint32 *dstp = (Uint32 *) info->d_pixels; |
585 SDL_PixelFormat* sf = info->src; | 610 int dstskip = info->d_skip >> 2; |
586 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | 611 SDL_PixelFormat *sf = info->src; |
587 Uint32 amask = sf->Amask; | 612 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; |
588 Uint32 ashift = sf->Ashift; | 613 Uint32 amask = sf->Amask; |
589 Uint64 multmask; | 614 Uint32 ashift = sf->Ashift; |
590 | 615 Uint64 multmask; |
591 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | 616 |
592 | 617 __m64 src1, dst1, mm_alpha, mm_zero, dmask; |
593 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | 618 |
594 multmask = ~(0xFFFFi64 << (ashift * 2)); | 619 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
595 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ | 620 /* *INDENT-OFF* */ |
596 | 621 multmask = ~(0xFFFFI64 << (ashift * 2)); |
597 while(height--) { | 622 /* *INDENT-ON* */ |
623 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ | |
624 | |
625 while (height--) { | |
626 /* *INDENT-OFF* */ | |
598 DUFFS_LOOP4({ | 627 DUFFS_LOOP4({ |
599 Uint32 alpha = *srcp & amask; | 628 Uint32 alpha = *srcp & amask; |
600 if (alpha == 0) { | 629 if (alpha == 0) { |
601 /* do nothing */ | 630 /* do nothing */ |
602 } else if (alpha == amask) { | 631 } else if (alpha == amask) { |
625 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | 654 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ |
626 } | 655 } |
627 ++srcp; | 656 ++srcp; |
628 ++dstp; | 657 ++dstp; |
629 }, width); | 658 }, width); |
630 srcp += srcskip; | 659 /* *INDENT-ON* */ |
631 dstp += dstskip; | 660 srcp += srcskip; |
632 } | 661 dstp += dstskip; |
633 _mm_empty(); | 662 } |
634 } | 663 _mm_empty(); |
664 } | |
665 | |
635 /* End MSVC_ASMBLIT */ | 666 /* End MSVC_ASMBLIT */ |
636 | 667 |
637 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | 668 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ |
638 | 669 |
639 #if SDL_ALTIVEC_BLITTERS | 670 #if SDL_ALTIVEC_BLITTERS |
644 #include <altivec.h> | 675 #include <altivec.h> |
645 #endif | 676 #endif |
646 #include <assert.h> | 677 #include <assert.h> |
647 | 678 |
648 #if (defined(__MACOSX__) && (__GNUC__ < 4)) | 679 #if (defined(__MACOSX__) && (__GNUC__ < 4)) |
649 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ | 680 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
650 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) | 681 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) |
651 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ | 682 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
652 (vector unsigned short) ( a,b,c,d,e,f,g,h ) | 683 (vector unsigned short) ( a,b,c,d,e,f,g,h ) |
653 #else | 684 #else |
654 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ | 685 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
655 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } | 686 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } |
656 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ | 687 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
657 (vector unsigned short) { a,b,c,d,e,f,g,h } | 688 (vector unsigned short) { a,b,c,d,e,f,g,h } |
658 #endif | 689 #endif |
659 | 690 |
660 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) | 691 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) |
661 #define VECPRINT(msg, v) do { \ | 692 #define VECPRINT(msg, v) do { \ |
676 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) | 707 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) |
677 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ | 708 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ |
678 ? vec_lvsl(0, src) \ | 709 ? vec_lvsl(0, src) \ |
679 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) | 710 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) |
680 | 711 |
681 | 712 |
682 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ | 713 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ |
683 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ | 714 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ |
684 vector unsigned short vtemp1 = vec_mule(vs, valpha); \ | 715 vector unsigned short vtemp1 = vec_mule(vs, valpha); \ |
685 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ | 716 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ |
686 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ | 717 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ |
702 vtemp4 = vec_sr(vtemp2, v8_16); \ | 733 vtemp4 = vec_sr(vtemp2, v8_16); \ |
703 vtemp2 = vec_add(vtemp2, vtemp4); \ | 734 vtemp2 = vec_add(vtemp2, vtemp4); \ |
704 /* (>>8) and get ARGBARGBARGBARGB */ \ | 735 /* (>>8) and get ARGBARGBARGBARGB */ \ |
705 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ | 736 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ |
706 } while (0) | 737 } while (0) |
707 | 738 |
708 /* Calculate the permute vector used for 32->32 swizzling */ | 739 /* Calculate the permute vector used for 32->32 swizzling */ |
709 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, | 740 static vector unsigned char |
710 const SDL_PixelFormat *dstfmt) | 741 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt) |
711 { | 742 { |
712 /* | 743 /* |
713 * We have to assume that the bits that aren't used by other | 744 * We have to assume that the bits that aren't used by other |
714 * colors is alpha, and it's one complete byte, since some formats | 745 * colors is alpha, and it's one complete byte, since some formats |
715 * leave alpha with a zero mask, but we should still swizzle the bits. | 746 * leave alpha with a zero mask, but we should still swizzle the bits. |
718 const static struct SDL_PixelFormat default_pixel_format = { | 749 const static struct SDL_PixelFormat default_pixel_format = { |
719 NULL, 0, 0, | 750 NULL, 0, 0, |
720 0, 0, 0, 0, | 751 0, 0, 0, 0, |
721 16, 8, 0, 24, | 752 16, 8, 0, 24, |
722 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, | 753 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, |
723 0, 0}; | 754 0, 0 |
755 }; | |
724 if (!srcfmt) { | 756 if (!srcfmt) { |
725 srcfmt = &default_pixel_format; | 757 srcfmt = &default_pixel_format; |
726 } | 758 } |
727 if (!dstfmt) { | 759 if (!dstfmt) { |
728 dstfmt = &default_pixel_format; | 760 dstfmt = &default_pixel_format; |
729 } | 761 } |
730 const vector unsigned char plus = VECUINT8_LITERAL | 762 const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00, |
731 ( 0x00, 0x00, 0x00, 0x00, | 763 0x04, 0x04, 0x04, 0x04, |
732 0x04, 0x04, 0x04, 0x04, | 764 0x08, 0x08, 0x08, 0x08, |
733 0x08, 0x08, 0x08, 0x08, | 765 0x0C, 0x0C, 0x0C, |
734 0x0C, 0x0C, 0x0C, 0x0C ); | 766 0x0C); |
735 vector unsigned char vswiz; | 767 vector unsigned char vswiz; |
736 vector unsigned int srcvec; | 768 vector unsigned int srcvec; |
737 #define RESHIFT(X) (3 - ((X) >> 3)) | 769 #define RESHIFT(X) (3 - ((X) >> 3)) |
738 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); | 770 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); |
739 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); | 771 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); |
740 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); | 772 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); |
741 Uint32 amask; | 773 Uint32 amask; |
742 /* Use zero for alpha if either surface doesn't have alpha */ | 774 /* Use zero for alpha if either surface doesn't have alpha */ |
743 if (dstfmt->Amask) { | 775 if (dstfmt->Amask) { |
744 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); | 776 amask = |
777 ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt-> | |
778 Ashift); | |
745 } else { | 779 } else { |
746 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); | 780 amask = |
747 } | 781 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ |
748 #undef RESHIFT | 782 0xFFFFFFFF); |
749 ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); | 783 } |
750 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); | 784 #undef RESHIFT |
751 return(vswiz); | 785 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask); |
752 } | 786 vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0)); |
753 | 787 return (vswiz); |
754 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) | 788 } |
789 | |
790 static void | |
791 Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info) | |
755 { | 792 { |
756 int height = info->d_height; | 793 int height = info->d_height; |
757 Uint8 *src = (Uint8 *)info->s_pixels; | 794 Uint8 *src = (Uint8 *) info->s_pixels; |
758 int srcskip = info->s_skip; | 795 int srcskip = info->s_skip; |
759 Uint8 *dst = (Uint8 *)info->d_pixels; | 796 Uint8 *dst = (Uint8 *) info->d_pixels; |
760 int dstskip = info->d_skip; | 797 int dstskip = info->d_skip; |
761 SDL_PixelFormat *srcfmt = info->src; | 798 SDL_PixelFormat *srcfmt = info->src; |
762 | 799 |
763 vector unsigned char v0 = vec_splat_u8(0); | 800 vector unsigned char v0 = vec_splat_u8(0); |
764 vector unsigned short v8_16 = vec_splat_u16(8); | 801 vector unsigned short v8_16 = vec_splat_u16(8); |
765 vector unsigned short v1_16 = vec_splat_u16(1); | 802 vector unsigned short v1_16 = vec_splat_u16(1); |
766 vector unsigned short v2_16 = vec_splat_u16(2); | 803 vector unsigned short v2_16 = vec_splat_u16(2); |
767 vector unsigned short v3_16 = vec_splat_u16(3); | 804 vector unsigned short v3_16 = vec_splat_u16(3); |
768 vector unsigned int v8_32 = vec_splat_u32(8); | 805 vector unsigned int v8_32 = vec_splat_u32(8); |
769 vector unsigned int v16_32 = vec_add(v8_32, v8_32); | 806 vector unsigned int v16_32 = vec_add(v8_32, v8_32); |
770 vector unsigned short v3f = VECUINT16_LITERAL( | 807 vector unsigned short v3f = |
771 0x003f, 0x003f, 0x003f, 0x003f, | 808 VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f, |
772 0x003f, 0x003f, 0x003f, 0x003f); | 809 0x003f, 0x003f, 0x003f, 0x003f); |
773 vector unsigned short vfc = VECUINT16_LITERAL( | 810 vector unsigned short vfc = |
774 0x00fc, 0x00fc, 0x00fc, 0x00fc, | 811 VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc, |
775 0x00fc, 0x00fc, 0x00fc, 0x00fc); | 812 0x00fc, 0x00fc, 0x00fc, 0x00fc); |
776 | 813 |
777 /* | 814 /* |
778 0x10 - 0x1f is the alpha | 815 0x10 - 0x1f is the alpha |
779 0x00 - 0x0e evens are the red | 816 0x00 - 0x0e evens are the red |
780 0x01 - 0x0f odds are zero | 817 0x01 - 0x0f odds are zero |
781 */ | 818 */ |
782 vector unsigned char vredalpha1 = VECUINT8_LITERAL( | 819 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01, |
783 0x10, 0x00, 0x01, 0x01, | 820 0x10, 0x02, 0x01, 0x01, |
784 0x10, 0x02, 0x01, 0x01, | 821 0x10, 0x04, 0x01, 0x01, |
785 0x10, 0x04, 0x01, 0x01, | 822 0x10, 0x06, 0x01, |
786 0x10, 0x06, 0x01, 0x01 | 823 0x01); |
787 ); | 824 vector unsigned char vredalpha2 = |
788 vector unsigned char vredalpha2 = (vector unsigned char)( | 825 (vector unsigned char) (vec_add((vector unsigned int) vredalpha1, |
789 vec_add((vector unsigned int)vredalpha1, vec_sl(v8_32, v16_32)) | 826 vec_sl(v8_32, v16_32)) |
790 ); | 827 ); |
791 /* | 828 /* |
792 0x00 - 0x0f is ARxx ARxx ARxx ARxx | 829 0x00 - 0x0f is ARxx ARxx ARxx ARxx |
793 0x11 - 0x0f odds are blue | 830 0x11 - 0x0f odds are blue |
794 */ | 831 */ |
795 vector unsigned char vblue1 = VECUINT8_LITERAL( | 832 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11, |
796 0x00, 0x01, 0x02, 0x11, | 833 0x04, 0x05, 0x06, 0x13, |
797 0x04, 0x05, 0x06, 0x13, | 834 0x08, 0x09, 0x0a, 0x15, |
798 0x08, 0x09, 0x0a, 0x15, | 835 0x0c, 0x0d, 0x0e, 0x17); |
799 0x0c, 0x0d, 0x0e, 0x17 | 836 vector unsigned char vblue2 = |
800 ); | 837 (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32) |
801 vector unsigned char vblue2 = (vector unsigned char)( | 838 ); |
802 vec_add((vector unsigned int)vblue1, v8_32) | |
803 ); | |
804 /* | 839 /* |
805 0x00 - 0x0f is ARxB ARxB ARxB ARxB | 840 0x00 - 0x0f is ARxB ARxB ARxB ARxB |
806 0x10 - 0x0e evens are green | 841 0x10 - 0x0e evens are green |
807 */ | 842 */ |
808 vector unsigned char vgreen1 = VECUINT8_LITERAL( | 843 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03, |
809 0x00, 0x01, 0x10, 0x03, | 844 0x04, 0x05, 0x12, 0x07, |
810 0x04, 0x05, 0x12, 0x07, | 845 0x08, 0x09, 0x14, 0x0b, |
811 0x08, 0x09, 0x14, 0x0b, | 846 0x0c, 0x0d, 0x16, 0x0f); |
812 0x0c, 0x0d, 0x16, 0x0f | 847 vector unsigned char vgreen2 = |
813 ); | 848 (vector unsigned |
814 vector unsigned char vgreen2 = (vector unsigned char)( | 849 char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32)) |
815 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) | 850 ); |
816 ); | 851 vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06, |
817 vector unsigned char vgmerge = VECUINT8_LITERAL( | 852 0x00, 0x0a, 0x00, 0x0e, |
818 0x00, 0x02, 0x00, 0x06, | 853 0x00, 0x12, 0x00, 0x16, |
819 0x00, 0x0a, 0x00, 0x0e, | 854 0x00, 0x1a, 0x00, 0x1e); |
820 0x00, 0x12, 0x00, 0x16, | |
821 0x00, 0x1a, 0x00, 0x1e); | |
822 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); | 855 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); |
823 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); | 856 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); |
824 vector unsigned char valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); | 857 vector unsigned char valphaPermute = |
825 | 858 vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); |
826 vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); | 859 |
860 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7); | |
827 vf800 = vec_sl(vf800, vec_splat_u16(8)); | 861 vf800 = vec_sl(vf800, vec_splat_u16(8)); |
828 | 862 |
829 while(height--) { | 863 while (height--) { |
830 int extrawidth; | 864 int extrawidth; |
831 vector unsigned char valigner; | 865 vector unsigned char valigner; |
832 vector unsigned char vsrc; | 866 vector unsigned char vsrc; |
833 vector unsigned char voverflow; | 867 vector unsigned char voverflow; |
834 int width = info->d_width; | 868 int width = info->d_width; |
853 widthvar--; \ | 887 widthvar--; \ |
854 } | 888 } |
855 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); | 889 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); |
856 extrawidth = (width % 8); | 890 extrawidth = (width % 8); |
857 valigner = VEC_ALIGNER(src); | 891 valigner = VEC_ALIGNER(src); |
858 vsrc = (vector unsigned char)vec_ld(0, src); | 892 vsrc = (vector unsigned char) vec_ld(0, src); |
859 width -= extrawidth; | 893 width -= extrawidth; |
860 while (width) { | 894 while (width) { |
861 vector unsigned char valpha; | 895 vector unsigned char valpha; |
862 vector unsigned char vsrc1, vsrc2; | 896 vector unsigned char vsrc1, vsrc2; |
863 vector unsigned char vdst1, vdst2; | 897 vector unsigned char vdst1, vdst2; |
864 vector unsigned short vR, vG, vB; | 898 vector unsigned short vR, vG, vB; |
865 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; | 899 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; |
866 | 900 |
867 /* Load 8 pixels from src as ARGB */ | 901 /* Load 8 pixels from src as ARGB */ |
868 voverflow = (vector unsigned char)vec_ld(15, src); | 902 voverflow = (vector unsigned char) vec_ld(15, src); |
869 vsrc = vec_perm(vsrc, voverflow, valigner); | 903 vsrc = vec_perm(vsrc, voverflow, valigner); |
870 vsrc1 = vec_perm(vsrc, vsrc, vpermute); | 904 vsrc1 = vec_perm(vsrc, vsrc, vpermute); |
871 src += 16; | 905 src += 16; |
872 vsrc = (vector unsigned char)vec_ld(15, src); | 906 vsrc = (vector unsigned char) vec_ld(15, src); |
873 voverflow = vec_perm(voverflow, vsrc, valigner); | 907 voverflow = vec_perm(voverflow, vsrc, valigner); |
874 vsrc2 = vec_perm(voverflow, voverflow, vpermute); | 908 vsrc2 = vec_perm(voverflow, voverflow, vpermute); |
875 src += 16; | 909 src += 16; |
876 | 910 |
877 /* Load 8 pixels from dst as XRGB */ | 911 /* Load 8 pixels from dst as XRGB */ |
878 voverflow = vec_ld(0, dst); | 912 voverflow = vec_ld(0, dst); |
879 vR = vec_and((vector unsigned short)voverflow, vf800); | 913 vR = vec_and((vector unsigned short) voverflow, vf800); |
880 vB = vec_sl((vector unsigned short)voverflow, v3_16); | 914 vB = vec_sl((vector unsigned short) voverflow, v3_16); |
881 vG = vec_sl(vB, v2_16); | 915 vG = vec_sl(vB, v2_16); |
882 vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha1); | 916 vdst1 = |
883 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); | 917 (vector unsigned char) vec_perm((vector unsigned char) vR, |
884 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); | 918 (vector unsigned char) vR, |
885 vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha2); | 919 vredalpha1); |
886 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); | 920 vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1); |
887 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); | 921 vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1); |
922 vdst2 = | |
923 (vector unsigned char) vec_perm((vector unsigned char) vR, | |
924 (vector unsigned char) vR, | |
925 vredalpha2); | |
926 vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2); | |
927 vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2); | |
888 | 928 |
889 /* Alpha blend 8 pixels as ARGB */ | 929 /* Alpha blend 8 pixels as ARGB */ |
890 valpha = vec_perm(vsrc1, v0, valphaPermute); | 930 valpha = vec_perm(vsrc1, v0, valphaPermute); |
891 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, v8_16); | 931 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, |
932 v8_16); | |
892 valpha = vec_perm(vsrc2, v0, valphaPermute); | 933 valpha = vec_perm(vsrc2, v0, valphaPermute); |
893 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, v8_16); | 934 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, |
935 v8_16); | |
894 | 936 |
895 /* Convert 8 pixels to 565 */ | 937 /* Convert 8 pixels to 565 */ |
896 vpixel = (vector unsigned short)vec_packpx((vector unsigned int)vdst1, (vector unsigned int)vdst2); | 938 vpixel = (vector unsigned short) vec_packpx((vector unsigned int) |
897 vgpixel = (vector unsigned short)vec_perm(vdst1, vdst2, vgmerge); | 939 vdst1, |
940 (vector unsigned int) | |
941 vdst2); | |
942 vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge); | |
898 vgpixel = vec_and(vgpixel, vfc); | 943 vgpixel = vec_and(vgpixel, vfc); |
899 vgpixel = vec_sl(vgpixel, v3_16); | 944 vgpixel = vec_sl(vgpixel, v3_16); |
900 vrpixel = vec_sl(vpixel, v1_16); | 945 vrpixel = vec_sl(vpixel, v1_16); |
901 vrpixel = vec_and(vrpixel, vf800); | 946 vrpixel = vec_and(vrpixel, vf800); |
902 vbpixel = vec_and(vpixel, v3f); | 947 vbpixel = vec_and(vpixel, v3f); |
903 vdst1 = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); | 948 vdst1 = |
904 vdst1 = vec_or(vdst1, (vector unsigned char)vbpixel); | 949 vec_or((vector unsigned char) vrpixel, |
905 | 950 (vector unsigned char) vgpixel); |
951 vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel); | |
952 | |
906 /* Store 8 pixels */ | 953 /* Store 8 pixels */ |
907 vec_st(vdst1, 0, dst); | 954 vec_st(vdst1, 0, dst); |
908 | 955 |
909 width -= 8; | 956 width -= 8; |
910 dst += 16; | 957 dst += 16; |
914 src += srcskip; | 961 src += srcskip; |
915 dst += dstskip; | 962 dst += dstskip; |
916 } | 963 } |
917 } | 964 } |
918 | 965 |
919 static void Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo *info) | 966 static void |
967 Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info) | |
920 { | 968 { |
921 unsigned alpha = info->src->alpha; | 969 unsigned alpha = info->src->alpha; |
922 int height = info->d_height; | 970 int height = info->d_height; |
923 Uint32 *srcp = (Uint32 *)info->s_pixels; | 971 Uint32 *srcp = (Uint32 *) info->s_pixels; |
924 int srcskip = info->s_skip >> 2; | 972 int srcskip = info->s_skip >> 2; |
925 Uint32 *dstp = (Uint32 *)info->d_pixels; | 973 Uint32 *dstp = (Uint32 *) info->d_pixels; |
926 int dstskip = info->d_skip >> 2; | 974 int dstskip = info->d_skip >> 2; |
927 SDL_PixelFormat *srcfmt = info->src; | 975 SDL_PixelFormat *srcfmt = info->src; |
928 SDL_PixelFormat *dstfmt = info->dst; | 976 SDL_PixelFormat *dstfmt = info->dst; |
929 unsigned sA = srcfmt->alpha; | 977 unsigned sA = srcfmt->alpha; |
930 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | 978 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
954 vsrcPermute = calc_swizzle32(srcfmt, NULL); | 1002 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
955 vdstPermute = calc_swizzle32(NULL, dstfmt); | 1003 vdstPermute = calc_swizzle32(NULL, dstfmt); |
956 vsdstPermute = calc_swizzle32(dstfmt, NULL); | 1004 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
957 | 1005 |
958 /* set a vector full of alpha and 255-alpha */ | 1006 /* set a vector full of alpha and 255-alpha */ |
959 ((unsigned char *)&valpha)[0] = alpha; | 1007 ((unsigned char *) &valpha)[0] = alpha; |
960 valpha = vec_splat(valpha, 0); | 1008 valpha = vec_splat(valpha, 0); |
961 vbits = (vector unsigned char)vec_splat_s8(-1); | 1009 vbits = (vector unsigned char) vec_splat_s8(-1); |
962 | 1010 |
963 ckey &= rgbmask; | 1011 ckey &= rgbmask; |
964 ((unsigned int *)(char*)&vckey)[0] = ckey; | 1012 ((unsigned int *) (char *) &vckey)[0] = ckey; |
965 vckey = vec_splat(vckey, 0); | 1013 vckey = vec_splat(vckey, 0); |
966 ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; | 1014 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask; |
967 vrgbmask = vec_splat(vrgbmask, 0); | 1015 vrgbmask = vec_splat(vrgbmask, 0); |
968 | 1016 |
969 while(height--) { | 1017 while (height--) { |
970 int width = info->d_width; | 1018 int width = info->d_width; |
971 #define ONE_PIXEL_BLEND(condition, widthvar) \ | 1019 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
972 while (condition) { \ | 1020 while (condition) { \ |
973 Uint32 Pixel; \ | 1021 Uint32 Pixel; \ |
974 unsigned sR, sG, sB, dR, dG, dB; \ | 1022 unsigned sR, sG, sB, dR, dG, dB; \ |
985 } | 1033 } |
986 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | 1034 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
987 if (width > 0) { | 1035 if (width > 0) { |
988 int extrawidth = (width % 4); | 1036 int extrawidth = (width % 4); |
989 vector unsigned char valigner = VEC_ALIGNER(srcp); | 1037 vector unsigned char valigner = VEC_ALIGNER(srcp); |
990 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); | 1038 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
991 width -= extrawidth; | 1039 width -= extrawidth; |
992 while (width) { | 1040 while (width) { |
993 vector unsigned char vsel; | 1041 vector unsigned char vsel; |
994 vector unsigned char voverflow; | 1042 vector unsigned char voverflow; |
995 vector unsigned char vd; | 1043 vector unsigned char vd; |
996 vector unsigned char vd_orig; | 1044 vector unsigned char vd_orig; |
997 | 1045 |
998 /* s = *srcp */ | 1046 /* s = *srcp */ |
999 voverflow = (vector unsigned char)vec_ld(15, srcp); | 1047 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1000 vs = vec_perm(vs, voverflow, valigner); | 1048 vs = vec_perm(vs, voverflow, valigner); |
1001 | 1049 |
1002 /* vsel is set for items that match the key */ | 1050 /* vsel is set for items that match the key */ |
1003 vsel = (vector unsigned char)vec_and((vector unsigned int)vs, vrgbmask); | 1051 vsel = |
1004 vsel = (vector unsigned char)vec_cmpeq((vector unsigned int)vsel, vckey); | 1052 (vector unsigned char) vec_and((vector unsigned int) vs, |
1053 vrgbmask); | |
1054 vsel = (vector unsigned char) vec_cmpeq((vector unsigned int) | |
1055 vsel, vckey); | |
1005 | 1056 |
1006 /* permute to source format */ | 1057 /* permute to source format */ |
1007 vs = vec_perm(vs, valpha, vsrcPermute); | 1058 vs = vec_perm(vs, valpha, vsrcPermute); |
1008 | 1059 |
1009 /* d = *dstp */ | 1060 /* d = *dstp */ |
1010 vd = (vector unsigned char)vec_ld(0, dstp); | 1061 vd = (vector unsigned char) vec_ld(0, dstp); |
1011 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); | 1062 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); |
1012 | 1063 |
1013 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | 1064 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1014 | 1065 |
1015 /* set the alpha channel to full on */ | 1066 /* set the alpha channel to full on */ |
1016 vd = vec_or(vd, valphamask); | 1067 vd = vec_or(vd, valphamask); |
1017 | 1068 |
1018 /* mask out color key */ | 1069 /* mask out color key */ |
1019 vd = vec_sel(vd, vd_orig, vsel); | 1070 vd = vec_sel(vd, vd_orig, vsel); |
1020 | 1071 |
1021 /* permute to dest format */ | 1072 /* permute to dest format */ |
1022 vd = vec_perm(vd, vbits, vdstPermute); | 1073 vd = vec_perm(vd, vbits, vdstPermute); |
1023 | 1074 |
1024 /* *dstp = res */ | 1075 /* *dstp = res */ |
1025 vec_st((vector unsigned int)vd, 0, dstp); | 1076 vec_st((vector unsigned int) vd, 0, dstp); |
1026 | 1077 |
1027 srcp += 4; | 1078 srcp += 4; |
1028 dstp += 4; | 1079 dstp += 4; |
1029 width -= 4; | 1080 width -= 4; |
1030 vs = voverflow; | 1081 vs = voverflow; |
1031 } | 1082 } |
1032 ONE_PIXEL_BLEND((extrawidth), extrawidth); | 1083 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1033 } | 1084 } |
1034 #undef ONE_PIXEL_BLEND | 1085 #undef ONE_PIXEL_BLEND |
1035 | 1086 |
1036 srcp += srcskip; | 1087 srcp += srcskip; |
1037 dstp += dstskip; | 1088 dstp += dstskip; |
1038 } | 1089 } |
1039 } | 1090 } |
1040 | 1091 |
1041 | 1092 |
1042 static void Blit32to32PixelAlphaAltivec(SDL_BlitInfo *info) | 1093 static void |
1094 Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info) | |
1043 { | 1095 { |
1044 int width = info->d_width; | 1096 int width = info->d_width; |
1045 int height = info->d_height; | 1097 int height = info->d_height; |
1046 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1098 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1047 int srcskip = info->s_skip >> 2; | 1099 int srcskip = info->s_skip >> 2; |
1048 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1100 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1049 int dstskip = info->d_skip >> 2; | 1101 int dstskip = info->d_skip >> 2; |
1050 SDL_PixelFormat *srcfmt = info->src; | 1102 SDL_PixelFormat *srcfmt = info->src; |
1051 SDL_PixelFormat *dstfmt = info->dst; | 1103 SDL_PixelFormat *dstfmt = info->dst; |
1052 vector unsigned char mergePermute; | 1104 vector unsigned char mergePermute; |
1053 vector unsigned char valphaPermute; | 1105 vector unsigned char valphaPermute; |
1063 v0 = vec_splat_u8(0); | 1115 v0 = vec_splat_u8(0); |
1064 v1 = vec_splat_u16(1); | 1116 v1 = vec_splat_u16(1); |
1065 v8 = vec_splat_u16(8); | 1117 v8 = vec_splat_u16(8); |
1066 mergePermute = VEC_MERGE_PERMUTE(); | 1118 mergePermute = VEC_MERGE_PERMUTE(); |
1067 valphamask = VEC_ALPHA_MASK(); | 1119 valphamask = VEC_ALPHA_MASK(); |
1068 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); | 1120 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); |
1069 vpixelmask = vec_nor(valphamask, v0); | 1121 vpixelmask = vec_nor(valphamask, v0); |
1070 vsrcPermute = calc_swizzle32(srcfmt, NULL); | 1122 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
1071 vdstPermute = calc_swizzle32(NULL, dstfmt); | 1123 vdstPermute = calc_swizzle32(NULL, dstfmt); |
1072 vsdstPermute = calc_swizzle32(dstfmt, NULL); | 1124 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
1073 | 1125 |
1074 while ( height-- ) { | 1126 while (height--) { |
1075 width = info->d_width; | 1127 width = info->d_width; |
1076 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | 1128 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1077 Uint32 Pixel; \ | 1129 Uint32 Pixel; \ |
1078 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ | 1130 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ |
1079 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ | 1131 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1090 if (width > 0) { | 1142 if (width > 0) { |
1091 /* vsrcPermute */ | 1143 /* vsrcPermute */ |
1092 /* vdstPermute */ | 1144 /* vdstPermute */ |
1093 int extrawidth = (width % 4); | 1145 int extrawidth = (width % 4); |
1094 vector unsigned char valigner = VEC_ALIGNER(srcp); | 1146 vector unsigned char valigner = VEC_ALIGNER(srcp); |
1095 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); | 1147 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1096 width -= extrawidth; | 1148 width -= extrawidth; |
1097 while (width) { | 1149 while (width) { |
1098 vector unsigned char voverflow; | 1150 vector unsigned char voverflow; |
1099 vector unsigned char vd; | 1151 vector unsigned char vd; |
1100 vector unsigned char valpha; | 1152 vector unsigned char valpha; |
1101 vector unsigned char vdstalpha; | 1153 vector unsigned char vdstalpha; |
1102 /* s = *srcp */ | 1154 /* s = *srcp */ |
1103 voverflow = (vector unsigned char)vec_ld(15, srcp); | 1155 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1104 vs = vec_perm(vs, voverflow, valigner); | 1156 vs = vec_perm(vs, voverflow, valigner); |
1105 vs = vec_perm(vs, v0, vsrcPermute); | 1157 vs = vec_perm(vs, v0, vsrcPermute); |
1106 | 1158 |
1107 valpha = vec_perm(vs, v0, valphaPermute); | 1159 valpha = vec_perm(vs, v0, valphaPermute); |
1108 | 1160 |
1109 /* d = *dstp */ | 1161 /* d = *dstp */ |
1110 vd = (vector unsigned char)vec_ld(0, dstp); | 1162 vd = (vector unsigned char) vec_ld(0, dstp); |
1111 vd = vec_perm(vd, v0, vsdstPermute); | 1163 vd = vec_perm(vd, v0, vsdstPermute); |
1112 vdstalpha = vec_and(vd, valphamask); | 1164 vdstalpha = vec_and(vd, valphamask); |
1113 | 1165 |
1114 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | 1166 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1115 | 1167 |
1117 vd = vec_and(vd, vpixelmask); | 1169 vd = vec_and(vd, vpixelmask); |
1118 vd = vec_or(vd, vdstalpha); | 1170 vd = vec_or(vd, vdstalpha); |
1119 vd = vec_perm(vd, v0, vdstPermute); | 1171 vd = vec_perm(vd, v0, vdstPermute); |
1120 | 1172 |
1121 /* *dstp = res */ | 1173 /* *dstp = res */ |
1122 vec_st((vector unsigned int)vd, 0, dstp); | 1174 vec_st((vector unsigned int) vd, 0, dstp); |
1123 | 1175 |
1124 srcp += 4; | 1176 srcp += 4; |
1125 dstp += 4; | 1177 dstp += 4; |
1126 width -= 4; | 1178 width -= 4; |
1127 vs = voverflow; | 1179 vs = voverflow; |
1128 | 1180 |
1129 } | 1181 } |
1130 ONE_PIXEL_BLEND((extrawidth), extrawidth); | 1182 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1131 } | 1183 } |
1132 srcp += srcskip; | 1184 srcp += srcskip; |
1133 dstp += dstskip; | 1185 dstp += dstskip; |
1134 #undef ONE_PIXEL_BLEND | 1186 #undef ONE_PIXEL_BLEND |
1135 } | 1187 } |
1136 } | 1188 } |
1137 | 1189 |
1138 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | 1190 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
1139 static void BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo *info) | 1191 static void |
1140 { | 1192 BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info) |
1141 int width = info->d_width; | 1193 { |
1142 int height = info->d_height; | 1194 int width = info->d_width; |
1143 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1195 int height = info->d_height; |
1144 int srcskip = info->s_skip >> 2; | 1196 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1145 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1197 int srcskip = info->s_skip >> 2; |
1146 int dstskip = info->d_skip >> 2; | 1198 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1199 int dstskip = info->d_skip >> 2; | |
1147 vector unsigned char mergePermute; | 1200 vector unsigned char mergePermute; |
1148 vector unsigned char valphaPermute; | 1201 vector unsigned char valphaPermute; |
1149 vector unsigned char valphamask; | 1202 vector unsigned char valphamask; |
1150 vector unsigned char vpixelmask; | 1203 vector unsigned char vpixelmask; |
1151 vector unsigned char v0; | 1204 vector unsigned char v0; |
1154 v0 = vec_splat_u8(0); | 1207 v0 = vec_splat_u8(0); |
1155 v1 = vec_splat_u16(1); | 1208 v1 = vec_splat_u16(1); |
1156 v8 = vec_splat_u16(8); | 1209 v8 = vec_splat_u16(8); |
1157 mergePermute = VEC_MERGE_PERMUTE(); | 1210 mergePermute = VEC_MERGE_PERMUTE(); |
1158 valphamask = VEC_ALPHA_MASK(); | 1211 valphamask = VEC_ALPHA_MASK(); |
1159 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); | 1212 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); |
1160 | 1213 |
1161 | 1214 |
1162 vpixelmask = vec_nor(valphamask, v0); | 1215 vpixelmask = vec_nor(valphamask, v0); |
1163 while(height--) { | 1216 while (height--) { |
1164 width = info->d_width; | 1217 width = info->d_width; |
1165 #define ONE_PIXEL_BLEND(condition, widthvar) \ | 1218 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
1166 while ((condition)) { \ | 1219 while ((condition)) { \ |
1167 Uint32 dalpha; \ | 1220 Uint32 dalpha; \ |
1168 Uint32 d; \ | 1221 Uint32 d; \ |
1191 } | 1244 } |
1192 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | 1245 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
1193 if (width > 0) { | 1246 if (width > 0) { |
1194 int extrawidth = (width % 4); | 1247 int extrawidth = (width % 4); |
1195 vector unsigned char valigner = VEC_ALIGNER(srcp); | 1248 vector unsigned char valigner = VEC_ALIGNER(srcp); |
1196 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); | 1249 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1197 width -= extrawidth; | 1250 width -= extrawidth; |
1198 while (width) { | 1251 while (width) { |
1199 vector unsigned char voverflow; | 1252 vector unsigned char voverflow; |
1200 vector unsigned char vd; | 1253 vector unsigned char vd; |
1201 vector unsigned char valpha; | 1254 vector unsigned char valpha; |
1202 vector unsigned char vdstalpha; | 1255 vector unsigned char vdstalpha; |
1203 /* s = *srcp */ | 1256 /* s = *srcp */ |
1204 voverflow = (vector unsigned char)vec_ld(15, srcp); | 1257 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1205 vs = vec_perm(vs, voverflow, valigner); | 1258 vs = vec_perm(vs, voverflow, valigner); |
1206 | 1259 |
1207 valpha = vec_perm(vs, v0, valphaPermute); | 1260 valpha = vec_perm(vs, v0, valphaPermute); |
1208 | 1261 |
1209 /* d = *dstp */ | 1262 /* d = *dstp */ |
1210 vd = (vector unsigned char)vec_ld(0, dstp); | 1263 vd = (vector unsigned char) vec_ld(0, dstp); |
1211 vdstalpha = vec_and(vd, valphamask); | 1264 vdstalpha = vec_and(vd, valphamask); |
1212 | 1265 |
1213 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | 1266 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1214 | 1267 |
1215 /* set the alpha to the dest alpha */ | 1268 /* set the alpha to the dest alpha */ |
1216 vd = vec_and(vd, vpixelmask); | 1269 vd = vec_and(vd, vpixelmask); |
1217 vd = vec_or(vd, vdstalpha); | 1270 vd = vec_or(vd, vdstalpha); |
1218 | 1271 |
1219 /* *dstp = res */ | 1272 /* *dstp = res */ |
1220 vec_st((vector unsigned int)vd, 0, dstp); | 1273 vec_st((vector unsigned int) vd, 0, dstp); |
1221 | 1274 |
1222 srcp += 4; | 1275 srcp += 4; |
1223 dstp += 4; | 1276 dstp += 4; |
1224 width -= 4; | 1277 width -= 4; |
1225 vs = voverflow; | 1278 vs = voverflow; |
1226 } | 1279 } |
1227 ONE_PIXEL_BLEND((extrawidth), extrawidth); | 1280 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1228 } | 1281 } |
1229 srcp += srcskip; | 1282 srcp += srcskip; |
1230 dstp += dstskip; | 1283 dstp += dstskip; |
1231 } | 1284 } |
1232 #undef ONE_PIXEL_BLEND | 1285 #undef ONE_PIXEL_BLEND |
1233 } | 1286 } |
1234 | 1287 |
1235 static void Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo *info) | 1288 static void |
1289 Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info) | |
1236 { | 1290 { |
1237 /* XXX : 6 */ | 1291 /* XXX : 6 */ |
1238 unsigned alpha = info->src->alpha; | 1292 unsigned alpha = info->src->alpha; |
1239 int height = info->d_height; | 1293 int height = info->d_height; |
1240 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1294 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1241 int srcskip = info->s_skip >> 2; | 1295 int srcskip = info->s_skip >> 2; |
1242 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1296 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1243 int dstskip = info->d_skip >> 2; | 1297 int dstskip = info->d_skip >> 2; |
1244 SDL_PixelFormat *srcfmt = info->src; | 1298 SDL_PixelFormat *srcfmt = info->src; |
1245 SDL_PixelFormat *dstfmt = info->dst; | 1299 SDL_PixelFormat *dstfmt = info->dst; |
1246 unsigned sA = srcfmt->alpha; | 1300 unsigned sA = srcfmt->alpha; |
1247 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | 1301 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
1248 vector unsigned char mergePermute; | 1302 vector unsigned char mergePermute; |
1249 vector unsigned char vsrcPermute; | 1303 vector unsigned char vsrcPermute; |
1250 vector unsigned char vdstPermute; | 1304 vector unsigned char vdstPermute; |
1251 vector unsigned char vsdstPermute; | 1305 vector unsigned char vsdstPermute; |
1252 vector unsigned char valpha; | 1306 vector unsigned char valpha; |
1265 vsrcPermute = calc_swizzle32(srcfmt, NULL); | 1319 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
1266 vdstPermute = calc_swizzle32(NULL, dstfmt); | 1320 vdstPermute = calc_swizzle32(NULL, dstfmt); |
1267 vsdstPermute = calc_swizzle32(dstfmt, NULL); | 1321 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
1268 | 1322 |
1269 /* set a vector full of alpha and 255-alpha */ | 1323 /* set a vector full of alpha and 255-alpha */ |
1270 ((unsigned char *)&valpha)[0] = alpha; | 1324 ((unsigned char *) &valpha)[0] = alpha; |
1271 valpha = vec_splat(valpha, 0); | 1325 valpha = vec_splat(valpha, 0); |
1272 vbits = (vector unsigned char)vec_splat_s8(-1); | 1326 vbits = (vector unsigned char) vec_splat_s8(-1); |
1273 | 1327 |
1274 while(height--) { | 1328 while (height--) { |
1275 int width = info->d_width; | 1329 int width = info->d_width; |
1276 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | 1330 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1277 Uint32 Pixel; \ | 1331 Uint32 Pixel; \ |
1278 unsigned sR, sG, sB, dR, dG, dB; \ | 1332 unsigned sR, sG, sB, dR, dG, dB; \ |
1279 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ | 1333 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ |
1286 } | 1340 } |
1287 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | 1341 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
1288 if (width > 0) { | 1342 if (width > 0) { |
1289 int extrawidth = (width % 4); | 1343 int extrawidth = (width % 4); |
1290 vector unsigned char valigner = vec_lvsl(0, srcp); | 1344 vector unsigned char valigner = vec_lvsl(0, srcp); |
1291 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); | 1345 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1292 width -= extrawidth; | 1346 width -= extrawidth; |
1293 while (width) { | 1347 while (width) { |
1294 vector unsigned char voverflow; | 1348 vector unsigned char voverflow; |
1295 vector unsigned char vd; | 1349 vector unsigned char vd; |
1296 | 1350 |
1297 /* s = *srcp */ | 1351 /* s = *srcp */ |
1298 voverflow = (vector unsigned char)vec_ld(15, srcp); | 1352 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1299 vs = vec_perm(vs, voverflow, valigner); | 1353 vs = vec_perm(vs, voverflow, valigner); |
1300 vs = vec_perm(vs, valpha, vsrcPermute); | 1354 vs = vec_perm(vs, valpha, vsrcPermute); |
1301 | 1355 |
1302 /* d = *dstp */ | 1356 /* d = *dstp */ |
1303 vd = (vector unsigned char)vec_ld(0, dstp); | 1357 vd = (vector unsigned char) vec_ld(0, dstp); |
1304 vd = vec_perm(vd, vd, vsdstPermute); | 1358 vd = vec_perm(vd, vd, vsdstPermute); |
1305 | 1359 |
1306 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | 1360 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1307 | 1361 |
1308 /* set the alpha channel to full on */ | 1362 /* set the alpha channel to full on */ |
1309 vd = vec_or(vd, valphamask); | 1363 vd = vec_or(vd, valphamask); |
1310 vd = vec_perm(vd, vbits, vdstPermute); | 1364 vd = vec_perm(vd, vbits, vdstPermute); |
1311 | 1365 |
1312 /* *dstp = res */ | 1366 /* *dstp = res */ |
1313 vec_st((vector unsigned int)vd, 0, dstp); | 1367 vec_st((vector unsigned int) vd, 0, dstp); |
1314 | 1368 |
1315 srcp += 4; | 1369 srcp += 4; |
1316 dstp += 4; | 1370 dstp += 4; |
1317 width -= 4; | 1371 width -= 4; |
1318 vs = voverflow; | 1372 vs = voverflow; |
1319 } | 1373 } |
1320 ONE_PIXEL_BLEND((extrawidth), extrawidth); | 1374 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1321 } | 1375 } |
1322 #undef ONE_PIXEL_BLEND | 1376 #undef ONE_PIXEL_BLEND |
1323 | 1377 |
1324 srcp += srcskip; | 1378 srcp += srcskip; |
1325 dstp += dstskip; | 1379 dstp += dstskip; |
1326 } | 1380 } |
1327 | 1381 |
1328 } | 1382 } |
1329 | 1383 |
1330 | 1384 |
1331 /* fast RGB888->(A)RGB888 blending */ | 1385 /* fast RGB888->(A)RGB888 blending */ |
1332 static void BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo *info) | 1386 static void |
1333 { | 1387 BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info) |
1334 unsigned alpha = info->src->alpha; | 1388 { |
1389 unsigned alpha = info->src->alpha; | |
1335 int height = info->d_height; | 1390 int height = info->d_height; |
1336 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1391 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1337 int srcskip = info->s_skip >> 2; | 1392 int srcskip = info->s_skip >> 2; |
1338 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1393 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1339 int dstskip = info->d_skip >> 2; | 1394 int dstskip = info->d_skip >> 2; |
1340 vector unsigned char mergePermute; | 1395 vector unsigned char mergePermute; |
1341 vector unsigned char valpha; | 1396 vector unsigned char valpha; |
1342 vector unsigned char valphamask; | 1397 vector unsigned char valphamask; |
1343 vector unsigned short v1; | 1398 vector unsigned short v1; |
1349 | 1404 |
1350 /* set the alpha to 255 on the destination surf */ | 1405 /* set the alpha to 255 on the destination surf */ |
1351 valphamask = VEC_ALPHA_MASK(); | 1406 valphamask = VEC_ALPHA_MASK(); |
1352 | 1407 |
1353 /* set a vector full of alpha and 255-alpha */ | 1408 /* set a vector full of alpha and 255-alpha */ |
1354 ((unsigned char *)&valpha)[0] = alpha; | 1409 ((unsigned char *) &valpha)[0] = alpha; |
1355 valpha = vec_splat(valpha, 0); | 1410 valpha = vec_splat(valpha, 0); |
1356 | 1411 |
1357 while(height--) { | 1412 while (height--) { |
1358 int width = info->d_width; | 1413 int width = info->d_width; |
1359 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | 1414 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1360 Uint32 s = *srcp; \ | 1415 Uint32 s = *srcp; \ |
1361 Uint32 d = *dstp; \ | 1416 Uint32 d = *dstp; \ |
1362 Uint32 s1 = s & 0xff00ff; \ | 1417 Uint32 s1 = s & 0xff00ff; \ |
1373 } | 1428 } |
1374 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | 1429 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
1375 if (width > 0) { | 1430 if (width > 0) { |
1376 int extrawidth = (width % 4); | 1431 int extrawidth = (width % 4); |
1377 vector unsigned char valigner = VEC_ALIGNER(srcp); | 1432 vector unsigned char valigner = VEC_ALIGNER(srcp); |
1378 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); | 1433 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); |
1379 width -= extrawidth; | 1434 width -= extrawidth; |
1380 while (width) { | 1435 while (width) { |
1381 vector unsigned char voverflow; | 1436 vector unsigned char voverflow; |
1382 vector unsigned char vd; | 1437 vector unsigned char vd; |
1383 | 1438 |
1384 /* s = *srcp */ | 1439 /* s = *srcp */ |
1385 voverflow = (vector unsigned char)vec_ld(15, srcp); | 1440 voverflow = (vector unsigned char) vec_ld(15, srcp); |
1386 vs = vec_perm(vs, voverflow, valigner); | 1441 vs = vec_perm(vs, voverflow, valigner); |
1387 | 1442 |
1388 /* d = *dstp */ | 1443 /* d = *dstp */ |
1389 vd = (vector unsigned char)vec_ld(0, dstp); | 1444 vd = (vector unsigned char) vec_ld(0, dstp); |
1390 | 1445 |
1391 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | 1446 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
1392 | 1447 |
1393 /* set the alpha channel to full on */ | 1448 /* set the alpha channel to full on */ |
1394 vd = vec_or(vd, valphamask); | 1449 vd = vec_or(vd, valphamask); |
1395 | 1450 |
1396 /* *dstp = res */ | 1451 /* *dstp = res */ |
1397 vec_st((vector unsigned int)vd, 0, dstp); | 1452 vec_st((vector unsigned int) vd, 0, dstp); |
1398 | 1453 |
1399 srcp += 4; | 1454 srcp += 4; |
1400 dstp += 4; | 1455 dstp += 4; |
1401 width -= 4; | 1456 width -= 4; |
1402 vs = voverflow; | 1457 vs = voverflow; |
1403 } | 1458 } |
1404 ONE_PIXEL_BLEND((extrawidth), extrawidth); | 1459 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
1405 } | 1460 } |
1406 #undef ONE_PIXEL_BLEND | 1461 #undef ONE_PIXEL_BLEND |
1407 | 1462 |
1408 srcp += srcskip; | 1463 srcp += srcskip; |
1409 dstp += dstskip; | 1464 dstp += dstskip; |
1410 } | 1465 } |
1411 } | 1466 } |
1467 | |
1412 #if __MWERKS__ | 1468 #if __MWERKS__ |
1413 #pragma altivec_model off | 1469 #pragma altivec_model off |
1414 #endif | 1470 #endif |
1415 #endif /* SDL_ALTIVEC_BLITTERS */ | 1471 #endif /* SDL_ALTIVEC_BLITTERS */ |
1416 | 1472 |
1417 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 1473 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
1418 static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) | 1474 static void |
1419 { | 1475 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info) |
1420 int width = info->d_width; | 1476 { |
1421 int height = info->d_height; | 1477 int width = info->d_width; |
1422 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1478 int height = info->d_height; |
1423 int srcskip = info->s_skip >> 2; | 1479 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1424 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1480 int srcskip = info->s_skip >> 2; |
1425 int dstskip = info->d_skip >> 2; | 1481 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1426 | 1482 int dstskip = info->d_skip >> 2; |
1427 while(height--) { | 1483 |
1484 while (height--) { | |
1485 /* *INDENT-OFF* */ | |
1428 DUFFS_LOOP4({ | 1486 DUFFS_LOOP4({ |
1429 Uint32 s = *srcp++; | 1487 Uint32 s = *srcp++; |
1430 Uint32 d = *dstp; | 1488 Uint32 d = *dstp; |
1431 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | 1489 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) |
1432 + (s & d & 0x00010101)) | 0xff000000; | 1490 + (s & d & 0x00010101)) | 0xff000000; |
1433 }, width); | 1491 }, width); |
1434 srcp += srcskip; | 1492 /* *INDENT-ON* */ |
1435 dstp += dstskip; | 1493 srcp += srcskip; |
1436 } | 1494 dstp += dstskip; |
1495 } | |
1437 } | 1496 } |
1438 | 1497 |
1439 /* fast RGB888->(A)RGB888 blending with surface alpha */ | 1498 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
1440 static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) | 1499 static void |
1441 { | 1500 BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo * info) |
1442 unsigned alpha = info->src->alpha; | 1501 { |
1443 if(alpha == 128) { | 1502 unsigned alpha = info->src->alpha; |
1444 BlitRGBtoRGBSurfaceAlpha128(info); | 1503 if (alpha == 128) { |
1445 } else { | 1504 BlitRGBtoRGBSurfaceAlpha128(info); |
1446 int width = info->d_width; | 1505 } else { |
1447 int height = info->d_height; | 1506 int width = info->d_width; |
1448 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1507 int height = info->d_height; |
1449 int srcskip = info->s_skip >> 2; | 1508 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1450 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1509 int srcskip = info->s_skip >> 2; |
1451 int dstskip = info->d_skip >> 2; | 1510 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1452 Uint32 s; | 1511 int dstskip = info->d_skip >> 2; |
1453 Uint32 d; | 1512 Uint32 s; |
1454 Uint32 s1; | 1513 Uint32 d; |
1455 Uint32 d1; | 1514 Uint32 s1; |
1456 | 1515 Uint32 d1; |
1457 while(height--) { | 1516 |
1517 while (height--) { | |
1518 /* *INDENT-OFF* */ | |
1458 DUFFS_LOOP_DOUBLE2({ | 1519 DUFFS_LOOP_DOUBLE2({ |
1459 /* One Pixel Blend */ | 1520 /* One Pixel Blend */ |
1460 s = *srcp; | 1521 s = *srcp; |
1461 d = *dstp; | 1522 d = *dstp; |
1462 s1 = s & 0xff00ff; | 1523 s1 = s & 0xff00ff; |
1497 | 1558 |
1498 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000; | 1559 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000; |
1499 ++srcp; | 1560 ++srcp; |
1500 ++dstp; | 1561 ++dstp; |
1501 }, width); | 1562 }, width); |
1502 srcp += srcskip; | 1563 /* *INDENT-ON* */ |
1503 dstp += dstskip; | 1564 srcp += srcskip; |
1504 } | 1565 dstp += dstskip; |
1505 } | 1566 } |
1567 } | |
1506 } | 1568 } |
1507 | 1569 |
1508 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | 1570 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
1509 static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info) | 1571 static void |
1510 { | 1572 BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info) |
1511 int width = info->d_width; | 1573 { |
1512 int height = info->d_height; | 1574 int width = info->d_width; |
1513 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1575 int height = info->d_height; |
1514 int srcskip = info->s_skip >> 2; | 1576 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1515 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1577 int srcskip = info->s_skip >> 2; |
1516 int dstskip = info->d_skip >> 2; | 1578 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1517 | 1579 int dstskip = info->d_skip >> 2; |
1518 while(height--) { | 1580 |
1581 while (height--) { | |
1582 /* *INDENT-OFF* */ | |
1519 DUFFS_LOOP4({ | 1583 DUFFS_LOOP4({ |
1520 Uint32 dalpha; | 1584 Uint32 dalpha; |
1521 Uint32 d; | 1585 Uint32 d; |
1522 Uint32 s1; | 1586 Uint32 s1; |
1523 Uint32 d1; | 1587 Uint32 d1; |
1547 } | 1611 } |
1548 } | 1612 } |
1549 ++srcp; | 1613 ++srcp; |
1550 ++dstp; | 1614 ++dstp; |
1551 }, width); | 1615 }, width); |
1552 srcp += srcskip; | 1616 /* *INDENT-ON* */ |
1553 dstp += dstskip; | 1617 srcp += srcskip; |
1554 } | 1618 dstp += dstskip; |
1619 } | |
1555 } | 1620 } |
1556 | 1621 |
1557 #if GCC_ASMBLIT | 1622 #if GCC_ASMBLIT |
1558 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | 1623 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ |
1559 inline static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) | 1624 inline static void |
1560 { | 1625 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) |
1561 int width = info->d_width; | 1626 { |
1562 int height = info->d_height; | 1627 int width = info->d_width; |
1563 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1628 int height = info->d_height; |
1564 int srcskip = info->s_skip >> 2; | 1629 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1565 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1630 int srcskip = info->s_skip >> 2; |
1566 int dstskip = info->d_skip >> 2; | 1631 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1567 SDL_PixelFormat* sf = info->src; | 1632 int dstskip = info->d_skip >> 2; |
1568 Uint32 amask = sf->Amask; | 1633 SDL_PixelFormat *sf = info->src; |
1569 | 1634 Uint32 amask = sf->Amask; |
1570 __asm__ ( | 1635 |
1571 /* make mm6 all zeros. */ | 1636 __asm__( |
1572 "pxor %%mm6, %%mm6\n" | 1637 /* make mm6 all zeros. */ |
1573 | 1638 "pxor %%mm6, %%mm6\n" |
1574 /* Make a mask to preserve the alpha. */ | 1639 /* Make a mask to preserve the alpha. */ |
1575 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ | 1640 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ |
1576 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ | 1641 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ |
1577 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ | 1642 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ |
1578 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ | 1643 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ |
1579 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ | 1644 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ |
1580 | 1645 /* form channel masks */ |
1581 /* form channel masks */ | 1646 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ |
1582 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ | 1647 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ |
1583 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ | 1648 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ |
1584 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ | 1649 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ |
1585 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ | 1650 /* get alpha channel shift */ |
1586 | 1651 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ |
1587 /* get alpha channel shift */ | 1652 : /* nothing */ : "m"(sf->Amask), "m"(sf->Ashift)); |
1588 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ | 1653 |
1589 | 1654 while (height--) { |
1590 : /* nothing */ : "m" (sf->Amask), "m" (sf->Ashift) ); | 1655 |
1591 | 1656 /* *INDENT-OFF* */ |
1592 while(height--) { | |
1593 | |
1594 DUFFS_LOOP4({ | 1657 DUFFS_LOOP4({ |
1595 Uint32 alpha; | 1658 Uint32 alpha; |
1596 | 1659 |
1597 __asm__ ( | 1660 __asm__ ( |
1598 "prefetch 64(%0)\n" | 1661 "prefetch 64(%0)\n" |
1660 | 1723 |
1661 } | 1724 } |
1662 ++srcp; | 1725 ++srcp; |
1663 ++dstp; | 1726 ++dstp; |
1664 }, width); | 1727 }, width); |
1665 srcp += srcskip; | 1728 /* *INDENT-ON* */ |
1666 dstp += dstskip; | 1729 srcp += srcskip; |
1667 } | 1730 dstp += dstskip; |
1668 | 1731 } |
1669 __asm__ ( | 1732 |
1670 "emms\n" | 1733 __asm__("emms\n":); |
1671 : ); | 1734 } |
1672 } | 1735 |
1673 /* End GCC_ASMBLIT*/ | 1736 /* End GCC_ASMBLIT*/ |
1674 | 1737 |
1675 #elif MSVC_ASMBLIT | 1738 #elif MSVC_ASMBLIT |
1676 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | 1739 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ |
1677 static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) | 1740 static void |
1678 { | 1741 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) |
1679 int width = info->d_width; | 1742 { |
1680 int height = info->d_height; | 1743 int width = info->d_width; |
1681 Uint32 *srcp = (Uint32 *)info->s_pixels; | 1744 int height = info->d_height; |
1682 int srcskip = info->s_skip >> 2; | 1745 Uint32 *srcp = (Uint32 *) info->s_pixels; |
1683 Uint32 *dstp = (Uint32 *)info->d_pixels; | 1746 int srcskip = info->s_skip >> 2; |
1684 int dstskip = info->d_skip >> 2; | 1747 Uint32 *dstp = (Uint32 *) info->d_pixels; |
1685 SDL_PixelFormat* sf = info->src; | 1748 int dstskip = info->d_skip >> 2; |
1686 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | 1749 SDL_PixelFormat *sf = info->src; |
1687 Uint32 amask = sf->Amask; | 1750 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; |
1688 Uint32 ashift = sf->Ashift; | 1751 Uint32 amask = sf->Amask; |
1689 Uint64 multmask; | 1752 Uint32 ashift = sf->Ashift; |
1690 | 1753 Uint64 multmask; |
1691 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | 1754 |
1692 | 1755 __m64 src1, dst1, mm_alpha, mm_zero, dmask; |
1693 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | 1756 |
1694 multmask = ~(0xFFFFi64 << (ashift * 2)); | 1757 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ |
1695 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ | 1758 /* *INDENT-OFF* */ |
1696 | 1759 multmask = ~(0xFFFFI64 << (ashift * 2)); |
1697 while(height--) { | 1760 /* *INDENT-ON* */ |
1761 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ | |
1762 | |
1763 while (height--) { | |
1764 /* *INDENT-OFF* */ | |
1698 DUFFS_LOOP4({ | 1765 DUFFS_LOOP4({ |
1699 Uint32 alpha; | 1766 Uint32 alpha; |
1700 | 1767 |
1701 _m_prefetch(srcp + 16); | 1768 _m_prefetch(srcp + 16); |
1702 _m_prefetch(dstp + 16); | 1769 _m_prefetch(dstp + 16); |
1730 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | 1797 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ |
1731 } | 1798 } |
1732 ++srcp; | 1799 ++srcp; |
1733 ++dstp; | 1800 ++dstp; |
1734 }, width); | 1801 }, width); |
1735 srcp += srcskip; | 1802 /* *INDENT-ON* */ |
1736 dstp += dstskip; | 1803 srcp += srcskip; |
1737 } | 1804 dstp += dstskip; |
1738 _mm_empty(); | 1805 } |
1739 } | 1806 _mm_empty(); |
1807 } | |
1808 | |
1740 /* End MSVC_ASMBLIT */ | 1809 /* End MSVC_ASMBLIT */ |
1741 | 1810 |
1742 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | 1811 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ |
1743 | 1812 |
1744 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ | 1813 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ |
1750 /* blend two 16 bit pixels at 50% */ | 1819 /* blend two 16 bit pixels at 50% */ |
1751 #define BLEND2x16_50(d, s, mask) \ | 1820 #define BLEND2x16_50(d, s, mask) \ |
1752 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ | 1821 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ |
1753 + (s & d & (~(mask | mask << 16)))) | 1822 + (s & d & (~(mask | mask << 16)))) |
1754 | 1823 |
1755 static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask) | 1824 static void |
1756 { | 1825 Blit16to16SurfaceAlpha128(SDL_BlitInfo * info, Uint16 mask) |
1757 int width = info->d_width; | 1826 { |
1758 int height = info->d_height; | 1827 int width = info->d_width; |
1759 Uint16 *srcp = (Uint16 *)info->s_pixels; | 1828 int height = info->d_height; |
1760 int srcskip = info->s_skip >> 1; | 1829 Uint16 *srcp = (Uint16 *) info->s_pixels; |
1761 Uint16 *dstp = (Uint16 *)info->d_pixels; | 1830 int srcskip = info->s_skip >> 1; |
1762 int dstskip = info->d_skip >> 1; | 1831 Uint16 *dstp = (Uint16 *) info->d_pixels; |
1763 | 1832 int dstskip = info->d_skip >> 1; |
1764 while(height--) { | 1833 |
1765 if(((uintptr_t)srcp ^ (uintptr_t)dstp) & 2) { | 1834 while (height--) { |
1766 /* | 1835 if (((uintptr_t) srcp ^ (uintptr_t) dstp) & 2) { |
1767 * Source and destination not aligned, pipeline it. | 1836 /* |
1768 * This is mostly a win for big blits but no loss for | 1837 * Source and destination not aligned, pipeline it. |
1769 * small ones | 1838 * This is mostly a win for big blits but no loss for |
1770 */ | 1839 * small ones |
1771 Uint32 prev_sw; | 1840 */ |
1772 int w = width; | 1841 Uint32 prev_sw; |
1773 | 1842 int w = width; |
1774 /* handle odd destination */ | 1843 |
1775 if((uintptr_t)dstp & 2) { | 1844 /* handle odd destination */ |
1776 Uint16 d = *dstp, s = *srcp; | 1845 if ((uintptr_t) dstp & 2) { |
1777 *dstp = BLEND16_50(d, s, mask); | 1846 Uint16 d = *dstp, s = *srcp; |
1778 dstp++; | 1847 *dstp = BLEND16_50(d, s, mask); |
1779 srcp++; | 1848 dstp++; |
1780 w--; | 1849 srcp++; |
1781 } | 1850 w--; |
1782 srcp++; /* srcp is now 32-bit aligned */ | 1851 } |
1783 | 1852 srcp++; /* srcp is now 32-bit aligned */ |
1784 /* bootstrap pipeline with first halfword */ | 1853 |
1785 prev_sw = ((Uint32 *)srcp)[-1]; | 1854 /* bootstrap pipeline with first halfword */ |
1786 | 1855 prev_sw = ((Uint32 *) srcp)[-1]; |
1787 while(w > 1) { | 1856 |
1788 Uint32 sw, dw, s; | 1857 while (w > 1) { |
1789 sw = *(Uint32 *)srcp; | 1858 Uint32 sw, dw, s; |
1790 dw = *(Uint32 *)dstp; | 1859 sw = *(Uint32 *) srcp; |
1860 dw = *(Uint32 *) dstp; | |
1791 #if SDL_BYTEORDER == SDL_BIG_ENDIAN | 1861 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
1792 s = (prev_sw << 16) + (sw >> 16); | 1862 s = (prev_sw << 16) + (sw >> 16); |
1793 #else | 1863 #else |
1794 s = (prev_sw >> 16) + (sw << 16); | 1864 s = (prev_sw >> 16) + (sw << 16); |
1795 #endif | 1865 #endif |
1796 prev_sw = sw; | 1866 prev_sw = sw; |
1797 *(Uint32 *)dstp = BLEND2x16_50(dw, s, mask); | 1867 *(Uint32 *) dstp = BLEND2x16_50(dw, s, mask); |
1798 dstp += 2; | 1868 dstp += 2; |
1799 srcp += 2; | 1869 srcp += 2; |
1800 w -= 2; | 1870 w -= 2; |
1801 } | 1871 } |
1802 | 1872 |
1803 /* final pixel if any */ | 1873 /* final pixel if any */ |
1804 if(w) { | 1874 if (w) { |
1805 Uint16 d = *dstp, s; | 1875 Uint16 d = *dstp, s; |
1806 #if SDL_BYTEORDER == SDL_BIG_ENDIAN | 1876 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
1807 s = (Uint16)prev_sw; | 1877 s = (Uint16) prev_sw; |
1808 #else | 1878 #else |
1809 s = (Uint16)(prev_sw >> 16); | 1879 s = (Uint16) (prev_sw >> 16); |
1810 #endif | 1880 #endif |
1811 *dstp = BLEND16_50(d, s, mask); | 1881 *dstp = BLEND16_50(d, s, mask); |
1812 srcp++; | 1882 srcp++; |
1813 dstp++; | 1883 dstp++; |
1814 } | 1884 } |
1815 srcp += srcskip - 1; | 1885 srcp += srcskip - 1; |
1816 dstp += dstskip; | 1886 dstp += dstskip; |
1817 } else { | 1887 } else { |
1818 /* source and destination are aligned */ | 1888 /* source and destination are aligned */ |
1819 int w = width; | 1889 int w = width; |
1820 | 1890 |
1821 /* first odd pixel? */ | 1891 /* first odd pixel? */ |
1822 if((uintptr_t)srcp & 2) { | 1892 if ((uintptr_t) srcp & 2) { |
1823 Uint16 d = *dstp, s = *srcp; | 1893 Uint16 d = *dstp, s = *srcp; |
1824 *dstp = BLEND16_50(d, s, mask); | 1894 *dstp = BLEND16_50(d, s, mask); |
1825 srcp++; | 1895 srcp++; |
1826 dstp++; | 1896 dstp++; |
1827 w--; | 1897 w--; |
1828 } | 1898 } |
1829 /* srcp and dstp are now 32-bit aligned */ | 1899 /* srcp and dstp are now 32-bit aligned */ |
1830 | 1900 |
1831 while(w > 1) { | 1901 while (w > 1) { |
1832 Uint32 sw = *(Uint32 *)srcp; | 1902 Uint32 sw = *(Uint32 *) srcp; |
1833 Uint32 dw = *(Uint32 *)dstp; | 1903 Uint32 dw = *(Uint32 *) dstp; |
1834 *(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask); | 1904 *(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask); |
1835 srcp += 2; | 1905 srcp += 2; |
1836 dstp += 2; | 1906 dstp += 2; |
1837 w -= 2; | 1907 w -= 2; |
1838 } | 1908 } |
1839 | 1909 |
1840 /* last odd pixel? */ | 1910 /* last odd pixel? */ |
1841 if(w) { | 1911 if (w) { |
1842 Uint16 d = *dstp, s = *srcp; | 1912 Uint16 d = *dstp, s = *srcp; |
1843 *dstp = BLEND16_50(d, s, mask); | 1913 *dstp = BLEND16_50(d, s, mask); |
1844 srcp++; | 1914 srcp++; |
1845 dstp++; | 1915 dstp++; |
1846 } | 1916 } |
1847 srcp += srcskip; | 1917 srcp += srcskip; |
1848 dstp += dstskip; | 1918 dstp += dstskip; |
1849 } | 1919 } |
1850 } | 1920 } |
1851 } | 1921 } |
1852 | 1922 |
1853 #if GCC_ASMBLIT | 1923 #if GCC_ASMBLIT |
1854 /* fast RGB565->RGB565 blending with surface alpha */ | 1924 /* fast RGB565->RGB565 blending with surface alpha */ |
1855 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) | 1925 static void |
1856 { | 1926 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info) |
1857 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ | 1927 { |
1858 if(alpha == 128) { | 1928 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
1859 Blit16to16SurfaceAlpha128(info, 0xf7de); | 1929 if (alpha == 128) { |
1860 } else { | 1930 Blit16to16SurfaceAlpha128(info, 0xf7de); |
1861 int width = info->d_width; | 1931 } else { |
1862 int height = info->d_height; | 1932 int width = info->d_width; |
1863 Uint16 *srcp = (Uint16 *)info->s_pixels; | 1933 int height = info->d_height; |
1864 int srcskip = info->s_skip >> 1; | 1934 Uint16 *srcp = (Uint16 *) info->s_pixels; |
1865 Uint16 *dstp = (Uint16 *)info->d_pixels; | 1935 int srcskip = info->s_skip >> 1; |
1866 int dstskip = info->d_skip >> 1; | 1936 Uint16 *dstp = (Uint16 *) info->d_pixels; |
1867 Uint32 s, d; | 1937 int dstskip = info->d_skip >> 1; |
1868 Uint8 load[8]; | 1938 Uint32 s, d; |
1869 | 1939 Uint8 load[8]; |
1870 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | 1940 |
1871 *(Uint64 *)load = alpha; | 1941 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ |
1872 alpha >>= 3; /* downscale alpha to 5 bits */ | 1942 *(Uint64 *) load = alpha; |
1873 | 1943 alpha >>= 3; /* downscale alpha to 5 bits */ |
1874 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ | 1944 |
1875 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | 1945 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
1876 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | 1946 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ |
1877 /* position alpha to allow for mullo and mulhi on diff channels | 1947 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ |
1878 to reduce the number of operations */ | 1948 /* position alpha to allow for mullo and mulhi on diff channels |
1879 psllq_i2r(3, mm0); | 1949 to reduce the number of operations */ |
1880 | 1950 psllq_i2r(3, mm0); |
1881 /* Setup the 565 color channel masks */ | 1951 |
1882 *(Uint64 *)load = 0x07E007E007E007E0ULL; | 1952 /* Setup the 565 color channel masks */ |
1883 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ | 1953 *(Uint64 *) load = 0x07E007E007E007E0ULL; |
1884 *(Uint64 *)load = 0x001F001F001F001FULL; | 1954 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
1885 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ | 1955 *(Uint64 *) load = 0x001F001F001F001FULL; |
1886 while(height--) { | 1956 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
1957 while (height--) { | |
1958 /* *INDENT-OFF* */ | |
1887 DUFFS_LOOP_QUATRO2( | 1959 DUFFS_LOOP_QUATRO2( |
1888 { | 1960 { |
1889 s = *srcp++; | 1961 s = *srcp++; |
1890 d = *dstp; | 1962 d = *dstp; |
1891 /* | 1963 /* |
1981 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ | 2053 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ |
1982 | 2054 |
1983 srcp += 4; | 2055 srcp += 4; |
1984 dstp += 4; | 2056 dstp += 4; |
1985 }, width); | 2057 }, width); |
1986 srcp += srcskip; | 2058 /* *INDENT-ON* */ |
1987 dstp += dstskip; | 2059 srcp += srcskip; |
1988 } | 2060 dstp += dstskip; |
1989 emms(); | 2061 } |
1990 } | 2062 emms(); |
2063 } | |
1991 } | 2064 } |
1992 | 2065 |
1993 /* fast RGB555->RGB555 blending with surface alpha */ | 2066 /* fast RGB555->RGB555 blending with surface alpha */ |
1994 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) | 2067 static void |
1995 { | 2068 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info) |
1996 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ | 2069 { |
1997 if(alpha == 128) { | 2070 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
1998 Blit16to16SurfaceAlpha128(info, 0xfbde); | 2071 if (alpha == 128) { |
1999 } else { | 2072 Blit16to16SurfaceAlpha128(info, 0xfbde); |
2000 int width = info->d_width; | 2073 } else { |
2001 int height = info->d_height; | 2074 int width = info->d_width; |
2002 Uint16 *srcp = (Uint16 *)info->s_pixels; | 2075 int height = info->d_height; |
2003 int srcskip = info->s_skip >> 1; | 2076 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2004 Uint16 *dstp = (Uint16 *)info->d_pixels; | 2077 int srcskip = info->s_skip >> 1; |
2005 int dstskip = info->d_skip >> 1; | 2078 Uint16 *dstp = (Uint16 *) info->d_pixels; |
2006 Uint32 s, d; | 2079 int dstskip = info->d_skip >> 1; |
2007 Uint8 load[8]; | 2080 Uint32 s, d; |
2008 | 2081 Uint8 load[8]; |
2009 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | 2082 |
2010 *(Uint64 *)load = alpha; | 2083 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ |
2011 alpha >>= 3; /* downscale alpha to 5 bits */ | 2084 *(Uint64 *) load = alpha; |
2012 | 2085 alpha >>= 3; /* downscale alpha to 5 bits */ |
2013 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ | 2086 |
2014 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | 2087 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
2015 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | 2088 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ |
2016 /* position alpha to allow for mullo and mulhi on diff channels | 2089 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ |
2017 to reduce the number of operations */ | 2090 /* position alpha to allow for mullo and mulhi on diff channels |
2018 psllq_i2r(3, mm0); | 2091 to reduce the number of operations */ |
2019 | 2092 psllq_i2r(3, mm0); |
2020 /* Setup the 555 color channel masks */ | 2093 |
2021 *(Uint64 *)load = 0x03E003E003E003E0ULL; | 2094 /* Setup the 555 color channel masks */ |
2022 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ | 2095 *(Uint64 *) load = 0x03E003E003E003E0ULL; |
2023 *(Uint64 *)load = 0x001F001F001F001FULL; | 2096 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
2024 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ | 2097 *(Uint64 *) load = 0x001F001F001F001FULL; |
2025 while(height--) { | 2098 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
2099 while (height--) { | |
2100 /* *INDENT-OFF* */ | |
2026 DUFFS_LOOP_QUATRO2( | 2101 DUFFS_LOOP_QUATRO2( |
2027 { | 2102 { |
2028 s = *srcp++; | 2103 s = *srcp++; |
2029 d = *dstp; | 2104 d = *dstp; |
2030 /* | 2105 /* |
2124 | 2199 |
2125 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ | 2200 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ |
2126 | 2201 |
2127 srcp += 4; | 2202 srcp += 4; |
2128 dstp += 4; | 2203 dstp += 4; |
2129 }, width); | 2204 }, width); |
2130 srcp += srcskip; | 2205 /* *INDENT-ON* */ |
2131 dstp += dstskip; | 2206 srcp += srcskip; |
2132 } | 2207 dstp += dstskip; |
2133 emms(); | 2208 } |
2134 } | 2209 emms(); |
2135 } | 2210 } |
2211 } | |
2212 | |
2136 /* End GCC_ASMBLIT */ | 2213 /* End GCC_ASMBLIT */ |
2137 | 2214 |
2138 #elif MSVC_ASMBLIT | 2215 #elif MSVC_ASMBLIT |
2139 /* fast RGB565->RGB565 blending with surface alpha */ | 2216 /* fast RGB565->RGB565 blending with surface alpha */ |
2140 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) | 2217 static void |
2141 { | 2218 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info) |
2142 unsigned alpha = info->src->alpha; | 2219 { |
2143 if(alpha == 128) { | 2220 unsigned alpha = info->src->alpha; |
2144 Blit16to16SurfaceAlpha128(info, 0xf7de); | 2221 if (alpha == 128) { |
2145 } else { | 2222 Blit16to16SurfaceAlpha128(info, 0xf7de); |
2146 int width = info->d_width; | 2223 } else { |
2147 int height = info->d_height; | 2224 int width = info->d_width; |
2148 Uint16 *srcp = (Uint16 *)info->s_pixels; | 2225 int height = info->d_height; |
2149 int srcskip = info->s_skip >> 1; | 2226 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2150 Uint16 *dstp = (Uint16 *)info->d_pixels; | 2227 int srcskip = info->s_skip >> 1; |
2151 int dstskip = info->d_skip >> 1; | 2228 Uint16 *dstp = (Uint16 *) info->d_pixels; |
2152 Uint32 s, d; | 2229 int dstskip = info->d_skip >> 1; |
2153 | 2230 Uint32 s, d; |
2154 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; | 2231 |
2155 | 2232 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; |
2156 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | 2233 |
2157 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ | 2234 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ |
2158 alpha >>= 3; /* downscale alpha to 5 bits */ | 2235 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ |
2159 | 2236 alpha >>= 3; /* downscale alpha to 5 bits */ |
2160 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | 2237 |
2161 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | 2238 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ |
2162 /* position alpha to allow for mullo and mulhi on diff channels | 2239 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ |
2163 to reduce the number of operations */ | 2240 /* position alpha to allow for mullo and mulhi on diff channels |
2164 mm_alpha = _mm_slli_si64(mm_alpha, 3); | 2241 to reduce the number of operations */ |
2165 | 2242 mm_alpha = _mm_slli_si64(mm_alpha, 3); |
2166 /* Setup the 565 color channel masks */ | 2243 |
2167 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ | 2244 /* Setup the 565 color channel masks */ |
2168 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | 2245 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ |
2169 | 2246 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ |
2170 while(height--) { | 2247 |
2248 while (height--) { | |
2249 /* *INDENT-OFF* */ | |
2171 DUFFS_LOOP_QUATRO2( | 2250 DUFFS_LOOP_QUATRO2( |
2172 { | 2251 { |
2173 s = *srcp++; | 2252 s = *srcp++; |
2174 d = *dstp; | 2253 d = *dstp; |
2175 /* | 2254 /* |
2260 | 2339 |
2261 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ | 2340 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ |
2262 | 2341 |
2263 srcp += 4; | 2342 srcp += 4; |
2264 dstp += 4; | 2343 dstp += 4; |
2265 }, width); | 2344 }, width); |
2266 srcp += srcskip; | 2345 /* *INDENT-ON* */ |
2267 dstp += dstskip; | 2346 srcp += srcskip; |
2268 } | 2347 dstp += dstskip; |
2269 _mm_empty(); | 2348 } |
2270 } | 2349 _mm_empty(); |
2350 } | |
2271 } | 2351 } |
2272 | 2352 |
2273 /* fast RGB555->RGB555 blending with surface alpha */ | 2353 /* fast RGB555->RGB555 blending with surface alpha */ |
2274 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) | 2354 static void |
2275 { | 2355 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info) |
2276 unsigned alpha = info->src->alpha; | 2356 { |
2277 if(alpha == 128) { | 2357 unsigned alpha = info->src->alpha; |
2278 Blit16to16SurfaceAlpha128(info, 0xfbde); | 2358 if (alpha == 128) { |
2279 } else { | 2359 Blit16to16SurfaceAlpha128(info, 0xfbde); |
2280 int width = info->d_width; | 2360 } else { |
2281 int height = info->d_height; | 2361 int width = info->d_width; |
2282 Uint16 *srcp = (Uint16 *)info->s_pixels; | 2362 int height = info->d_height; |
2283 int srcskip = info->s_skip >> 1; | 2363 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2284 Uint16 *dstp = (Uint16 *)info->d_pixels; | 2364 int srcskip = info->s_skip >> 1; |
2285 int dstskip = info->d_skip >> 1; | 2365 Uint16 *dstp = (Uint16 *) info->d_pixels; |
2286 Uint32 s, d; | 2366 int dstskip = info->d_skip >> 1; |
2287 | 2367 Uint32 s, d; |
2288 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; | 2368 |
2289 | 2369 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; |
2290 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | 2370 |
2291 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ | 2371 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ |
2292 alpha >>= 3; /* downscale alpha to 5 bits */ | 2372 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ |
2293 | 2373 alpha >>= 3; /* downscale alpha to 5 bits */ |
2294 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | 2374 |
2295 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | 2375 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ |
2296 /* position alpha to allow for mullo and mulhi on diff channels | 2376 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ |
2297 to reduce the number of operations */ | 2377 /* position alpha to allow for mullo and mulhi on diff channels |
2298 mm_alpha = _mm_slli_si64(mm_alpha, 3); | 2378 to reduce the number of operations */ |
2299 | 2379 mm_alpha = _mm_slli_si64(mm_alpha, 3); |
2300 /* Setup the 555 color channel masks */ | 2380 |
2301 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ | 2381 /* Setup the 555 color channel masks */ |
2302 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ | 2382 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ |
2303 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | 2383 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ |
2304 | 2384 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ |
2305 while(height--) { | 2385 |
2386 while (height--) { | |
2387 /* *INDENT-OFF* */ | |
2306 DUFFS_LOOP_QUATRO2( | 2388 DUFFS_LOOP_QUATRO2( |
2307 { | 2389 { |
2308 s = *srcp++; | 2390 s = *srcp++; |
2309 d = *dstp; | 2391 d = *dstp; |
2310 /* | 2392 /* |
2395 | 2477 |
2396 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ | 2478 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ |
2397 | 2479 |
2398 srcp += 4; | 2480 srcp += 4; |
2399 dstp += 4; | 2481 dstp += 4; |
2400 }, width); | 2482 }, width); |
2401 srcp += srcskip; | 2483 /* *INDENT-ON* */ |
2402 dstp += dstskip; | 2484 srcp += srcskip; |
2403 } | 2485 dstp += dstskip; |
2404 _mm_empty(); | 2486 } |
2405 } | 2487 _mm_empty(); |
2488 } | |
2406 } | 2489 } |
2407 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | 2490 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ |
2408 | 2491 |
2409 /* fast RGB565->RGB565 blending with surface alpha */ | 2492 /* fast RGB565->RGB565 blending with surface alpha */ |
2410 static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) | 2493 static void |
2411 { | 2494 Blit565to565SurfaceAlpha(SDL_BlitInfo * info) |
2412 unsigned alpha = info->src->alpha; | 2495 { |
2413 if(alpha == 128) { | 2496 unsigned alpha = info->src->alpha; |
2414 Blit16to16SurfaceAlpha128(info, 0xf7de); | 2497 if (alpha == 128) { |
2415 } else { | 2498 Blit16to16SurfaceAlpha128(info, 0xf7de); |
2416 int width = info->d_width; | 2499 } else { |
2417 int height = info->d_height; | 2500 int width = info->d_width; |
2418 Uint16 *srcp = (Uint16 *)info->s_pixels; | 2501 int height = info->d_height; |
2419 int srcskip = info->s_skip >> 1; | 2502 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2420 Uint16 *dstp = (Uint16 *)info->d_pixels; | 2503 int srcskip = info->s_skip >> 1; |
2421 int dstskip = info->d_skip >> 1; | 2504 Uint16 *dstp = (Uint16 *) info->d_pixels; |
2422 alpha >>= 3; /* downscale alpha to 5 bits */ | 2505 int dstskip = info->d_skip >> 1; |
2423 | 2506 alpha >>= 3; /* downscale alpha to 5 bits */ |
2424 while(height--) { | 2507 |
2508 while (height--) { | |
2509 /* *INDENT-OFF* */ | |
2425 DUFFS_LOOP4({ | 2510 DUFFS_LOOP4({ |
2426 Uint32 s = *srcp++; | 2511 Uint32 s = *srcp++; |
2427 Uint32 d = *dstp; | 2512 Uint32 d = *dstp; |
2428 /* | 2513 /* |
2429 * shift out the middle component (green) to | 2514 * shift out the middle component (green) to |
2434 d = (d | d << 16) & 0x07e0f81f; | 2519 d = (d | d << 16) & 0x07e0f81f; |
2435 d += (s - d) * alpha >> 5; | 2520 d += (s - d) * alpha >> 5; |
2436 d &= 0x07e0f81f; | 2521 d &= 0x07e0f81f; |
2437 *dstp++ = (Uint16)(d | d >> 16); | 2522 *dstp++ = (Uint16)(d | d >> 16); |
2438 }, width); | 2523 }, width); |
2439 srcp += srcskip; | 2524 /* *INDENT-ON* */ |
2440 dstp += dstskip; | 2525 srcp += srcskip; |
2441 } | 2526 dstp += dstskip; |
2442 } | 2527 } |
2528 } | |
2443 } | 2529 } |
2444 | 2530 |
2445 /* fast RGB555->RGB555 blending with surface alpha */ | 2531 /* fast RGB555->RGB555 blending with surface alpha */ |
2446 static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) | 2532 static void |
2447 { | 2533 Blit555to555SurfaceAlpha(SDL_BlitInfo * info) |
2448 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ | 2534 { |
2449 if(alpha == 128) { | 2535 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
2450 Blit16to16SurfaceAlpha128(info, 0xfbde); | 2536 if (alpha == 128) { |
2451 } else { | 2537 Blit16to16SurfaceAlpha128(info, 0xfbde); |
2452 int width = info->d_width; | 2538 } else { |
2453 int height = info->d_height; | 2539 int width = info->d_width; |
2454 Uint16 *srcp = (Uint16 *)info->s_pixels; | 2540 int height = info->d_height; |
2455 int srcskip = info->s_skip >> 1; | 2541 Uint16 *srcp = (Uint16 *) info->s_pixels; |
2456 Uint16 *dstp = (Uint16 *)info->d_pixels; | 2542 int srcskip = info->s_skip >> 1; |
2457 int dstskip = info->d_skip >> 1; | 2543 Uint16 *dstp = (Uint16 *) info->d_pixels; |
2458 alpha >>= 3; /* downscale alpha to 5 bits */ | 2544 int dstskip = info->d_skip >> 1; |
2459 | 2545 alpha >>= 3; /* downscale alpha to 5 bits */ |
2460 while(height--) { | 2546 |
2547 while (height--) { | |
2548 /* *INDENT-OFF* */ | |
2461 DUFFS_LOOP4({ | 2549 DUFFS_LOOP4({ |
2462 Uint32 s = *srcp++; | 2550 Uint32 s = *srcp++; |
2463 Uint32 d = *dstp; | 2551 Uint32 d = *dstp; |
2464 /* | 2552 /* |
2465 * shift out the middle component (green) to | 2553 * shift out the middle component (green) to |
2470 d = (d | d << 16) & 0x03e07c1f; | 2558 d = (d | d << 16) & 0x03e07c1f; |
2471 d += (s - d) * alpha >> 5; | 2559 d += (s - d) * alpha >> 5; |
2472 d &= 0x03e07c1f; | 2560 d &= 0x03e07c1f; |
2473 *dstp++ = (Uint16)(d | d >> 16); | 2561 *dstp++ = (Uint16)(d | d >> 16); |
2474 }, width); | 2562 }, width); |
2475 srcp += srcskip; | 2563 /* *INDENT-ON* */ |
2476 dstp += dstskip; | 2564 srcp += srcskip; |
2477 } | 2565 dstp += dstskip; |
2478 } | 2566 } |
2567 } | |
2479 } | 2568 } |
2480 | 2569 |
2481 /* fast ARGB8888->RGB565 blending with pixel alpha */ | 2570 /* fast ARGB8888->RGB565 blending with pixel alpha */ |
2482 static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) | 2571 static void |
2483 { | 2572 BlitARGBto565PixelAlpha(SDL_BlitInfo * info) |
2484 int width = info->d_width; | 2573 { |
2485 int height = info->d_height; | 2574 int width = info->d_width; |
2486 Uint32 *srcp = (Uint32 *)info->s_pixels; | 2575 int height = info->d_height; |
2487 int srcskip = info->s_skip >> 2; | 2576 Uint32 *srcp = (Uint32 *) info->s_pixels; |
2488 Uint16 *dstp = (Uint16 *)info->d_pixels; | 2577 int srcskip = info->s_skip >> 2; |
2489 int dstskip = info->d_skip >> 1; | 2578 Uint16 *dstp = (Uint16 *) info->d_pixels; |
2490 | 2579 int dstskip = info->d_skip >> 1; |
2491 while(height--) { | 2580 |
2581 while (height--) { | |
2582 /* *INDENT-OFF* */ | |
2492 DUFFS_LOOP4({ | 2583 DUFFS_LOOP4({ |
2493 Uint32 s = *srcp; | 2584 Uint32 s = *srcp; |
2494 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ | 2585 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ |
2495 /* FIXME: Here we special-case opaque alpha since the | 2586 /* FIXME: Here we special-case opaque alpha since the |
2496 compositioning used (>>8 instead of /255) doesn't handle | 2587 compositioning used (>>8 instead of /255) doesn't handle |
2514 } | 2605 } |
2515 } | 2606 } |
2516 srcp++; | 2607 srcp++; |
2517 dstp++; | 2608 dstp++; |
2518 }, width); | 2609 }, width); |
2519 srcp += srcskip; | 2610 /* *INDENT-ON* */ |
2520 dstp += dstskip; | 2611 srcp += srcskip; |
2521 } | 2612 dstp += dstskip; |
2613 } | |
2522 } | 2614 } |
2523 | 2615 |
2524 /* fast ARGB8888->RGB555 blending with pixel alpha */ | 2616 /* fast ARGB8888->RGB555 blending with pixel alpha */ |
2525 static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) | 2617 static void |
2526 { | 2618 BlitARGBto555PixelAlpha(SDL_BlitInfo * info) |
2527 int width = info->d_width; | 2619 { |
2528 int height = info->d_height; | 2620 int width = info->d_width; |
2529 Uint32 *srcp = (Uint32 *)info->s_pixels; | 2621 int height = info->d_height; |
2530 int srcskip = info->s_skip >> 2; | 2622 Uint32 *srcp = (Uint32 *) info->s_pixels; |
2531 Uint16 *dstp = (Uint16 *)info->d_pixels; | 2623 int srcskip = info->s_skip >> 2; |
2532 int dstskip = info->d_skip >> 1; | 2624 Uint16 *dstp = (Uint16 *) info->d_pixels; |
2533 | 2625 int dstskip = info->d_skip >> 1; |
2534 while(height--) { | 2626 |
2627 while (height--) { | |
2628 /* *INDENT-OFF* */ | |
2535 DUFFS_LOOP4({ | 2629 DUFFS_LOOP4({ |
2536 unsigned alpha; | 2630 unsigned alpha; |
2537 Uint32 s = *srcp; | 2631 Uint32 s = *srcp; |
2538 alpha = s >> 27; /* downscale alpha to 5 bits */ | 2632 alpha = s >> 27; /* downscale alpha to 5 bits */ |
2539 /* FIXME: Here we special-case opaque alpha since the | 2633 /* FIXME: Here we special-case opaque alpha since the |
2558 } | 2652 } |
2559 } | 2653 } |
2560 srcp++; | 2654 srcp++; |
2561 dstp++; | 2655 dstp++; |
2562 }, width); | 2656 }, width); |
2563 srcp += srcskip; | 2657 /* *INDENT-ON* */ |
2564 dstp += dstskip; | 2658 srcp += srcskip; |
2565 } | 2659 dstp += dstskip; |
2660 } | |
2566 } | 2661 } |
2567 | 2662 |
2568 /* General (slow) N->N blending with per-surface alpha */ | 2663 /* General (slow) N->N blending with per-surface alpha */ |
2569 static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) | 2664 static void |
2570 { | 2665 BlitNtoNSurfaceAlpha(SDL_BlitInfo * info) |
2571 int width = info->d_width; | 2666 { |
2572 int height = info->d_height; | 2667 int width = info->d_width; |
2573 Uint8 *src = info->s_pixels; | 2668 int height = info->d_height; |
2574 int srcskip = info->s_skip; | 2669 Uint8 *src = info->s_pixels; |
2575 Uint8 *dst = info->d_pixels; | 2670 int srcskip = info->s_skip; |
2576 int dstskip = info->d_skip; | 2671 Uint8 *dst = info->d_pixels; |
2577 SDL_PixelFormat *srcfmt = info->src; | 2672 int dstskip = info->d_skip; |
2578 SDL_PixelFormat *dstfmt = info->dst; | 2673 SDL_PixelFormat *srcfmt = info->src; |
2579 int srcbpp = srcfmt->BytesPerPixel; | 2674 SDL_PixelFormat *dstfmt = info->dst; |
2580 int dstbpp = dstfmt->BytesPerPixel; | 2675 int srcbpp = srcfmt->BytesPerPixel; |
2581 unsigned sA = srcfmt->alpha; | 2676 int dstbpp = dstfmt->BytesPerPixel; |
2582 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | 2677 unsigned sA = srcfmt->alpha; |
2583 | 2678 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
2584 if(sA) { | 2679 |
2585 while ( height-- ) { | 2680 if (sA) { |
2681 while (height--) { | |
2682 /* *INDENT-OFF* */ | |
2586 DUFFS_LOOP4( | 2683 DUFFS_LOOP4( |
2587 { | 2684 { |
2588 Uint32 Pixel; | 2685 Uint32 Pixel; |
2589 unsigned sR; | 2686 unsigned sR; |
2590 unsigned sG; | 2687 unsigned sG; |
2598 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | 2695 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); |
2599 src += srcbpp; | 2696 src += srcbpp; |
2600 dst += dstbpp; | 2697 dst += dstbpp; |
2601 }, | 2698 }, |
2602 width); | 2699 width); |
2603 src += srcskip; | 2700 /* *INDENT-ON* */ |
2604 dst += dstskip; | 2701 src += srcskip; |
2605 } | 2702 dst += dstskip; |
2606 } | 2703 } |
2704 } | |
2607 } | 2705 } |
2608 | 2706 |
2609 /* General (slow) colorkeyed N->N blending with per-surface alpha */ | 2707 /* General (slow) colorkeyed N->N blending with per-surface alpha */ |
2610 static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) | 2708 static void |
2611 { | 2709 BlitNtoNSurfaceAlphaKey(SDL_BlitInfo * info) |
2612 int width = info->d_width; | 2710 { |
2613 int height = info->d_height; | 2711 int width = info->d_width; |
2614 Uint8 *src = info->s_pixels; | 2712 int height = info->d_height; |
2615 int srcskip = info->s_skip; | 2713 Uint8 *src = info->s_pixels; |
2616 Uint8 *dst = info->d_pixels; | 2714 int srcskip = info->s_skip; |
2617 int dstskip = info->d_skip; | 2715 Uint8 *dst = info->d_pixels; |
2618 SDL_PixelFormat *srcfmt = info->src; | 2716 int dstskip = info->d_skip; |
2619 SDL_PixelFormat *dstfmt = info->dst; | 2717 SDL_PixelFormat *srcfmt = info->src; |
2620 Uint32 ckey = srcfmt->colorkey; | 2718 SDL_PixelFormat *dstfmt = info->dst; |
2621 int srcbpp = srcfmt->BytesPerPixel; | 2719 Uint32 ckey = srcfmt->colorkey; |
2622 int dstbpp = dstfmt->BytesPerPixel; | 2720 int srcbpp = srcfmt->BytesPerPixel; |
2623 unsigned sA = srcfmt->alpha; | 2721 int dstbpp = dstfmt->BytesPerPixel; |
2624 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | 2722 unsigned sA = srcfmt->alpha; |
2625 | 2723 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
2626 while ( height-- ) { | 2724 |
2725 while (height--) { | |
2726 /* *INDENT-OFF* */ | |
2627 DUFFS_LOOP4( | 2727 DUFFS_LOOP4( |
2628 { | 2728 { |
2629 Uint32 Pixel; | 2729 Uint32 Pixel; |
2630 unsigned sR; | 2730 unsigned sR; |
2631 unsigned sG; | 2731 unsigned sG; |
2642 } | 2742 } |
2643 src += srcbpp; | 2743 src += srcbpp; |
2644 dst += dstbpp; | 2744 dst += dstbpp; |
2645 }, | 2745 }, |
2646 width); | 2746 width); |
2647 src += srcskip; | 2747 /* *INDENT-ON* */ |
2648 dst += dstskip; | 2748 src += srcskip; |
2649 } | 2749 dst += dstskip; |
2750 } | |
2650 } | 2751 } |
2651 | 2752 |
2652 /* General (slow) N->N blending with pixel alpha */ | 2753 /* General (slow) N->N blending with pixel alpha */ |
2653 static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) | 2754 static void |
2654 { | 2755 BlitNtoNPixelAlpha(SDL_BlitInfo * info) |
2655 int width = info->d_width; | 2756 { |
2656 int height = info->d_height; | 2757 int width = info->d_width; |
2657 Uint8 *src = info->s_pixels; | 2758 int height = info->d_height; |
2658 int srcskip = info->s_skip; | 2759 Uint8 *src = info->s_pixels; |
2659 Uint8 *dst = info->d_pixels; | 2760 int srcskip = info->s_skip; |
2660 int dstskip = info->d_skip; | 2761 Uint8 *dst = info->d_pixels; |
2661 SDL_PixelFormat *srcfmt = info->src; | 2762 int dstskip = info->d_skip; |
2662 SDL_PixelFormat *dstfmt = info->dst; | 2763 SDL_PixelFormat *srcfmt = info->src; |
2663 | 2764 SDL_PixelFormat *dstfmt = info->dst; |
2664 int srcbpp; | 2765 |
2665 int dstbpp; | 2766 int srcbpp; |
2666 | 2767 int dstbpp; |
2667 /* Set up some basic variables */ | 2768 |
2668 srcbpp = srcfmt->BytesPerPixel; | 2769 /* Set up some basic variables */ |
2669 dstbpp = dstfmt->BytesPerPixel; | 2770 srcbpp = srcfmt->BytesPerPixel; |
2670 | 2771 dstbpp = dstfmt->BytesPerPixel; |
2671 /* FIXME: for 8bpp source alpha, this doesn't get opaque values | 2772 |
2672 quite right. for <8bpp source alpha, it gets them very wrong | 2773 /* FIXME: for 8bpp source alpha, this doesn't get opaque values |
2673 (check all macros!) | 2774 quite right. for <8bpp source alpha, it gets them very wrong |
2674 It is unclear whether there is a good general solution that doesn't | 2775 (check all macros!) |
2675 need a branch (or a divide). */ | 2776 It is unclear whether there is a good general solution that doesn't |
2676 while ( height-- ) { | 2777 need a branch (or a divide). */ |
2778 while (height--) { | |
2779 /* *INDENT-OFF* */ | |
2677 DUFFS_LOOP4( | 2780 DUFFS_LOOP4( |
2678 { | 2781 { |
2679 Uint32 Pixel; | 2782 Uint32 Pixel; |
2680 unsigned sR; | 2783 unsigned sR; |
2681 unsigned sG; | 2784 unsigned sG; |
2693 } | 2796 } |
2694 src += srcbpp; | 2797 src += srcbpp; |
2695 dst += dstbpp; | 2798 dst += dstbpp; |
2696 }, | 2799 }, |
2697 width); | 2800 width); |
2698 src += srcskip; | 2801 /* *INDENT-ON* */ |
2699 dst += dstskip; | 2802 src += srcskip; |
2700 } | 2803 dst += dstskip; |
2701 } | 2804 } |
2702 | 2805 } |
2703 | 2806 |
2704 SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) | 2807 |
2808 SDL_loblit | |
2809 SDL_CalculateAlphaBlit(SDL_Surface * surface, int blit_index) | |
2705 { | 2810 { |
2706 SDL_PixelFormat *sf = surface->format; | 2811 SDL_PixelFormat *sf = surface->format; |
2707 SDL_PixelFormat *df = surface->map->dst->format; | 2812 SDL_PixelFormat *df = surface->map->dst->format; |
2708 | 2813 |
2709 if(sf->Amask == 0) { | 2814 if (sf->Amask == 0) { |
2710 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { | 2815 if ((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { |
2711 if(df->BytesPerPixel == 1) | 2816 if (df->BytesPerPixel == 1) |
2712 return BlitNto1SurfaceAlphaKey; | 2817 return BlitNto1SurfaceAlphaKey; |
2713 else | 2818 else |
2714 #if SDL_ALTIVEC_BLITTERS | 2819 #if SDL_ALTIVEC_BLITTERS |
2715 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && | 2820 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && |
2716 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | 2821 !(surface->map->dst->flags & SDL_HWSURFACE) |
2717 return Blit32to32SurfaceAlphaKeyAltivec; | 2822 && SDL_HasAltiVec()) |
2718 else | 2823 return Blit32to32SurfaceAlphaKeyAltivec; |
2824 else | |
2719 #endif | 2825 #endif |
2720 return BlitNtoNSurfaceAlphaKey; | 2826 return BlitNtoNSurfaceAlphaKey; |
2721 } else { | 2827 } else { |
2722 /* Per-surface alpha blits */ | 2828 /* Per-surface alpha blits */ |
2723 switch(df->BytesPerPixel) { | 2829 switch (df->BytesPerPixel) { |
2724 case 1: | 2830 case 1: |
2725 return BlitNto1SurfaceAlpha; | 2831 return BlitNto1SurfaceAlpha; |
2726 | 2832 |
2727 case 2: | 2833 case 2: |
2728 if(surface->map->identity) { | 2834 if (surface->map->identity) { |
2729 if(df->Gmask == 0x7e0) | 2835 if (df->Gmask == 0x7e0) { |
2730 { | |
2731 #if MMX_ASMBLIT | 2836 #if MMX_ASMBLIT |
2732 if(SDL_HasMMX()) | 2837 if (SDL_HasMMX()) |
2733 return Blit565to565SurfaceAlphaMMX; | 2838 return Blit565to565SurfaceAlphaMMX; |
2734 else | 2839 else |
2735 #endif | 2840 #endif |
2736 return Blit565to565SurfaceAlpha; | 2841 return Blit565to565SurfaceAlpha; |
2737 } | 2842 } else if (df->Gmask == 0x3e0) { |
2738 else if(df->Gmask == 0x3e0) | |
2739 { | |
2740 #if MMX_ASMBLIT | 2843 #if MMX_ASMBLIT |
2741 if(SDL_HasMMX()) | 2844 if (SDL_HasMMX()) |
2742 return Blit555to555SurfaceAlphaMMX; | 2845 return Blit555to555SurfaceAlphaMMX; |
2743 else | 2846 else |
2744 #endif | 2847 #endif |
2745 return Blit555to555SurfaceAlpha; | 2848 return Blit555to555SurfaceAlpha; |
2746 } | 2849 } |
2747 } | 2850 } |
2748 return BlitNtoNSurfaceAlpha; | 2851 return BlitNtoNSurfaceAlpha; |
2749 | 2852 |
2750 case 4: | 2853 case 4: |
2751 if(sf->Rmask == df->Rmask | 2854 if (sf->Rmask == df->Rmask |
2752 && sf->Gmask == df->Gmask | 2855 && sf->Gmask == df->Gmask |
2753 && sf->Bmask == df->Bmask | 2856 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { |
2754 && sf->BytesPerPixel == 4) | |
2755 { | |
2756 #if MMX_ASMBLIT | 2857 #if MMX_ASMBLIT |
2757 if(sf->Rshift % 8 == 0 | 2858 if (sf->Rshift % 8 == 0 |
2758 && sf->Gshift % 8 == 0 | 2859 && sf->Gshift % 8 == 0 |
2759 && sf->Bshift % 8 == 0 | 2860 && sf->Bshift % 8 == 0 && SDL_HasMMX()) |
2760 && SDL_HasMMX()) | 2861 return BlitRGBtoRGBSurfaceAlphaMMX; |
2761 return BlitRGBtoRGBSurfaceAlphaMMX; | |
2762 #endif | 2862 #endif |
2763 if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) | 2863 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { |
2764 { | |
2765 #if SDL_ALTIVEC_BLITTERS | 2864 #if SDL_ALTIVEC_BLITTERS |
2766 if(!(surface->map->dst->flags & SDL_HWSURFACE) | 2865 if (!(surface->map->dst->flags & SDL_HWSURFACE) |
2767 && SDL_HasAltiVec()) | 2866 && SDL_HasAltiVec()) |
2768 return BlitRGBtoRGBSurfaceAlphaAltivec; | 2867 return BlitRGBtoRGBSurfaceAlphaAltivec; |
2769 #endif | 2868 #endif |
2770 return BlitRGBtoRGBSurfaceAlpha; | 2869 return BlitRGBtoRGBSurfaceAlpha; |
2771 } | 2870 } |
2772 } | 2871 } |
2773 #if SDL_ALTIVEC_BLITTERS | 2872 #if SDL_ALTIVEC_BLITTERS |
2774 if((sf->BytesPerPixel == 4) && | 2873 if ((sf->BytesPerPixel == 4) && |
2775 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | 2874 !(surface->map->dst->flags & SDL_HWSURFACE) |
2776 return Blit32to32SurfaceAlphaAltivec; | 2875 && SDL_HasAltiVec()) |
2777 else | 2876 return Blit32to32SurfaceAlphaAltivec; |
2877 else | |
2778 #endif | 2878 #endif |
2779 return BlitNtoNSurfaceAlpha; | 2879 return BlitNtoNSurfaceAlpha; |
2780 | 2880 |
2781 case 3: | 2881 case 3: |
2782 default: | 2882 default: |
2783 return BlitNtoNSurfaceAlpha; | 2883 return BlitNtoNSurfaceAlpha; |
2784 } | 2884 } |
2785 } | 2885 } |
2786 } else { | 2886 } else { |
2787 /* Per-pixel alpha blits */ | 2887 /* Per-pixel alpha blits */ |
2788 switch(df->BytesPerPixel) { | 2888 switch (df->BytesPerPixel) { |
2789 case 1: | 2889 case 1: |
2790 return BlitNto1PixelAlpha; | 2890 return BlitNto1PixelAlpha; |
2791 | 2891 |
2792 case 2: | 2892 case 2: |
2793 #if SDL_ALTIVEC_BLITTERS | 2893 #if SDL_ALTIVEC_BLITTERS |
2794 if(sf->BytesPerPixel == 4 && !(surface->map->dst->flags & SDL_HWSURFACE) && | 2894 if (sf->BytesPerPixel == 4 |
2795 df->Gmask == 0x7e0 && | 2895 && !(surface->map->dst->flags & SDL_HWSURFACE) |
2796 df->Bmask == 0x1f && SDL_HasAltiVec()) | 2896 && df->Gmask == 0x7e0 && df->Bmask == 0x1f |
2797 return Blit32to565PixelAlphaAltivec; | 2897 && SDL_HasAltiVec()) |
2798 else | 2898 return Blit32to565PixelAlphaAltivec; |
2899 else | |
2799 #endif | 2900 #endif |
2800 if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 | 2901 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 |
2801 && sf->Gmask == 0xff00 | 2902 && sf->Gmask == 0xff00 |
2802 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) | 2903 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) |
2803 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { | 2904 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { |
2804 if(df->Gmask == 0x7e0) | 2905 if (df->Gmask == 0x7e0) |
2805 return BlitARGBto565PixelAlpha; | 2906 return BlitARGBto565PixelAlpha; |
2806 else if(df->Gmask == 0x3e0) | 2907 else if (df->Gmask == 0x3e0) |
2807 return BlitARGBto555PixelAlpha; | 2908 return BlitARGBto555PixelAlpha; |
2808 } | 2909 } |
2809 return BlitNtoNPixelAlpha; | 2910 return BlitNtoNPixelAlpha; |
2810 | 2911 |
2811 case 4: | 2912 case 4: |
2812 if(sf->Rmask == df->Rmask | 2913 if (sf->Rmask == df->Rmask |
2813 && sf->Gmask == df->Gmask | 2914 && sf->Gmask == df->Gmask |
2814 && sf->Bmask == df->Bmask | 2915 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { |
2815 && sf->BytesPerPixel == 4) | |
2816 { | |
2817 #if MMX_ASMBLIT | 2916 #if MMX_ASMBLIT |
2818 if(sf->Rshift % 8 == 0 | 2917 if (sf->Rshift % 8 == 0 |
2819 && sf->Gshift % 8 == 0 | 2918 && sf->Gshift % 8 == 0 |
2820 && sf->Bshift % 8 == 0 | 2919 && sf->Bshift % 8 == 0 |
2821 && sf->Ashift % 8 == 0 | 2920 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { |
2822 && sf->Aloss == 0) | 2921 if (SDL_Has3DNow()) |
2823 { | 2922 return BlitRGBtoRGBPixelAlphaMMX3DNOW; |
2824 if(SDL_Has3DNow()) | 2923 if (SDL_HasMMX()) |
2825 return BlitRGBtoRGBPixelAlphaMMX3DNOW; | 2924 return BlitRGBtoRGBPixelAlphaMMX; |
2826 if(SDL_HasMMX()) | 2925 } |
2827 return BlitRGBtoRGBPixelAlphaMMX; | |
2828 } | |
2829 #endif | 2926 #endif |
2830 if(sf->Amask == 0xff000000) | 2927 if (sf->Amask == 0xff000000) { |
2831 { | |
2832 #if SDL_ALTIVEC_BLITTERS | 2928 #if SDL_ALTIVEC_BLITTERS |
2833 if(!(surface->map->dst->flags & SDL_HWSURFACE) | 2929 if (!(surface->map->dst->flags & SDL_HWSURFACE) |
2834 && SDL_HasAltiVec()) | 2930 && SDL_HasAltiVec()) |
2835 return BlitRGBtoRGBPixelAlphaAltivec; | 2931 return BlitRGBtoRGBPixelAlphaAltivec; |
2836 #endif | 2932 #endif |
2837 return BlitRGBtoRGBPixelAlpha; | 2933 return BlitRGBtoRGBPixelAlpha; |
2838 } | 2934 } |
2839 } | 2935 } |
2840 #if SDL_ALTIVEC_BLITTERS | 2936 #if SDL_ALTIVEC_BLITTERS |
2841 if (sf->Amask && sf->BytesPerPixel == 4 && | 2937 if (sf->Amask && sf->BytesPerPixel == 4 && |
2842 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | 2938 !(surface->map->dst->flags & SDL_HWSURFACE) |
2843 return Blit32to32PixelAlphaAltivec; | 2939 && SDL_HasAltiVec()) |
2844 else | 2940 return Blit32to32PixelAlphaAltivec; |
2941 else | |
2845 #endif | 2942 #endif |
2846 return BlitNtoNPixelAlpha; | 2943 return BlitNtoNPixelAlpha; |
2847 | 2944 |
2848 case 3: | 2945 case 3: |
2849 default: | 2946 default: |
2850 return BlitNtoNPixelAlpha; | 2947 return BlitNtoNPixelAlpha; |
2851 } | 2948 } |
2852 } | 2949 } |
2853 } | 2950 } |
2854 | 2951 |
2952 /* vi: set ts=4 sw=4 expandtab: */ |