comparison src/video/SDL_blit_A.c @ 1162:2651158f59b8

Enable altivec blitters on PowerPC Linux, and some fixes for recent GCCs versions.
author Ryan C. Gordon <icculus@icculus.org>
date Thu, 20 Oct 2005 06:55:26 +0000
parents ffaaf7ecf685
children 867f521591e5
comparison
equal deleted inserted replaced
1161:05d4b93b911e 1162:2651158f59b8
60 const unsigned A = srcfmt->alpha; 60 const unsigned A = srcfmt->alpha;
61 61
62 while ( height-- ) { 62 while ( height-- ) {
63 DUFFS_LOOP4( 63 DUFFS_LOOP4(
64 { 64 {
65 Uint32 pixel; 65 Uint32 Pixel;
66 unsigned sR; 66 unsigned sR;
67 unsigned sG; 67 unsigned sG;
68 unsigned sB; 68 unsigned sB;
69 unsigned dR; 69 unsigned dR;
70 unsigned dG; 70 unsigned dG;
71 unsigned dB; 71 unsigned dB;
72 DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB); 72 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
73 dR = dstfmt->palette->colors[*dst].r; 73 dR = dstfmt->palette->colors[*dst].r;
74 dG = dstfmt->palette->colors[*dst].g; 74 dG = dstfmt->palette->colors[*dst].g;
75 dB = dstfmt->palette->colors[*dst].b; 75 dB = dstfmt->palette->colors[*dst].b;
76 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); 76 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB);
77 dR &= 0xff; 77 dR &= 0xff;
112 112
113 /* FIXME: fix alpha bit field expansion here too? */ 113 /* FIXME: fix alpha bit field expansion here too? */
114 while ( height-- ) { 114 while ( height-- ) {
115 DUFFS_LOOP4( 115 DUFFS_LOOP4(
116 { 116 {
117 Uint32 pixel; 117 Uint32 Pixel;
118 unsigned sR; 118 unsigned sR;
119 unsigned sG; 119 unsigned sG;
120 unsigned sB; 120 unsigned sB;
121 unsigned sA; 121 unsigned sA;
122 unsigned dR; 122 unsigned dR;
123 unsigned dG; 123 unsigned dG;
124 unsigned dB; 124 unsigned dB;
125 DISEMBLE_RGBA(src,srcbpp,srcfmt,pixel,sR,sG,sB,sA); 125 DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA);
126 dR = dstfmt->palette->colors[*dst].r; 126 dR = dstfmt->palette->colors[*dst].r;
127 dG = dstfmt->palette->colors[*dst].g; 127 dG = dstfmt->palette->colors[*dst].g;
128 dB = dstfmt->palette->colors[*dst].b; 128 dB = dstfmt->palette->colors[*dst].b;
129 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); 129 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB);
130 dR &= 0xff; 130 dR &= 0xff;
167 const int A = srcfmt->alpha; 167 const int A = srcfmt->alpha;
168 168
169 while ( height-- ) { 169 while ( height-- ) {
170 DUFFS_LOOP( 170 DUFFS_LOOP(
171 { 171 {
172 Uint32 pixel; 172 Uint32 Pixel;
173 unsigned sR; 173 unsigned sR;
174 unsigned sG; 174 unsigned sG;
175 unsigned sB; 175 unsigned sB;
176 unsigned dR; 176 unsigned dR;
177 unsigned dG; 177 unsigned dG;
178 unsigned dB; 178 unsigned dB;
179 DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB); 179 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
180 if ( pixel != ckey ) { 180 if ( Pixel != ckey ) {
181 dR = dstfmt->palette->colors[*dst].r; 181 dR = dstfmt->palette->colors[*dst].r;
182 dG = dstfmt->palette->colors[*dst].g; 182 dG = dstfmt->palette->colors[*dst].g;
183 dB = dstfmt->palette->colors[*dst].b; 183 dB = dstfmt->palette->colors[*dst].b;
184 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); 184 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB);
185 dR &= 0xff; 185 dR &= 0xff;
296 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ 296 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */
297 paddw_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ 297 paddw_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */
298 pand_r2r(mm3, mm2); /* 0A0R0G0B -> mm2 */ 298 pand_r2r(mm3, mm2); /* 0A0R0G0B -> mm2 */
299 packuswb_r2r(mm2, mm2); /* ARGBARGB -> mm2 */ 299 packuswb_r2r(mm2, mm2); /* ARGBARGB -> mm2 */
300 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ 300 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */
301 movd_r2m(mm2, *dstp);/* mm2 -> pixel */ 301 movd_r2m(mm2, *dstp);/* mm2 -> Pixel */
302 ++srcp; 302 ++srcp;
303 ++dstp; 303 ++dstp;
304 },{ 304 },{
305 /* Two Pixels Blend */ 305 /* Two Pixels Blend */
306 movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ 306 movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/
332 packuswb_r2r(mm6, mm6); /* ARGBARGB -> mm6 */ 332 packuswb_r2r(mm6, mm6); /* ARGBARGB -> mm6 */
333 psrlq_i2r(32, mm2); /* mm2 >> 32 -> mm2 */ 333 psrlq_i2r(32, mm2); /* mm2 >> 32 -> mm2 */
334 psllq_i2r(32, mm6); /* mm6 << 32 -> mm6 */ 334 psllq_i2r(32, mm6); /* mm6 << 32 -> mm6 */
335 por_r2r(mm6, mm2); /* mm6 | mm2 -> mm2 */ 335 por_r2r(mm6, mm2); /* mm6 | mm2 -> mm2 */
336 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ 336 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */
337 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ 337 movq_r2m(mm2, *dstp);/* mm2 -> 2 x Pixel */
338 srcp += 2; 338 srcp += 2;
339 dstp += 2; 339 dstp += 2;
340 }, width); 340 }, width);
341 srcp += srcskip; 341 srcp += srcskip;
342 dstp += dstskip; 342 dstp += dstskip;
420 emms(); 420 emms();
421 } 421 }
422 #endif 422 #endif
423 423
424 #ifdef USE_ALTIVEC_BLITTERS 424 #ifdef USE_ALTIVEC_BLITTERS
425 #include <altivec.h>
425 #include <assert.h> 426 #include <assert.h>
427
428 #if ((defined MACOSX) && (__GNUC__ < 4))
429 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
430 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
431 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
432 (vector unsigned short) ( a,b,c,d,e,f,g,h )
433 #else
434 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
435 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
436 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
437 (vector unsigned short) { a,b,c,d,e,f,g,h }
438 #endif
439
426 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) 440 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
427 #define VECPRINT(msg, v) do { \ 441 #define VECPRINT(msg, v) do { \
428 vector unsigned int tmpvec = (vector unsigned int)(v); \ 442 vector unsigned int tmpvec = (vector unsigned int)(v); \
429 unsigned int *vp = (unsigned int *)&tmpvec; \ 443 unsigned int *vp = (unsigned int *)&tmpvec; \
430 printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ 444 printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \
491 srcfmt = &default_pixel_format; 505 srcfmt = &default_pixel_format;
492 } 506 }
493 if (!dstfmt) { 507 if (!dstfmt) {
494 dstfmt = &default_pixel_format; 508 dstfmt = &default_pixel_format;
495 } 509 }
496 vector unsigned char plus = (vector unsigned char) 510 vector unsigned char plus = VECUINT8_LITERAL
497 ( 0x00, 0x00, 0x00, 0x00, 511 ( 0x00, 0x00, 0x00, 0x00,
498 0x04, 0x04, 0x04, 0x04, 512 0x04, 0x04, 0x04, 0x04,
499 0x08, 0x08, 0x08, 0x08, 513 0x08, 0x08, 0x08, 0x08,
500 0x0C, 0x0C, 0x0C, 0x0C ); 514 0x0C, 0x0C, 0x0C, 0x0C );
501 vector unsigned char vswiz; 515 vector unsigned char vswiz;
510 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); 524 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
511 } else { 525 } else {
512 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); 526 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
513 } 527 }
514 #undef RESHIFT 528 #undef RESHIFT
515 ((unsigned int *)&srcvec)[0] = (rmask | gmask | bmask | amask); 529 ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
516 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); 530 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
517 return(vswiz); 531 return(vswiz);
518 } 532 }
519 533
520 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) 534 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info)
531 vector unsigned short v1_16 = vec_splat_u16(1); 545 vector unsigned short v1_16 = vec_splat_u16(1);
532 vector unsigned short v2_16 = vec_splat_u16(2); 546 vector unsigned short v2_16 = vec_splat_u16(2);
533 vector unsigned short v3_16 = vec_splat_u16(3); 547 vector unsigned short v3_16 = vec_splat_u16(3);
534 vector unsigned int v8_32 = vec_splat_u32(8); 548 vector unsigned int v8_32 = vec_splat_u32(8);
535 vector unsigned int v16_32 = vec_add(v8_32, v8_32); 549 vector unsigned int v16_32 = vec_add(v8_32, v8_32);
536 vector unsigned short v3f = (vector unsigned short)( 550 vector unsigned short v3f = VECUINT16_LITERAL(
537 0x003f, 0x003f, 0x003f, 0x003f, 551 0x003f, 0x003f, 0x003f, 0x003f,
538 0x003f, 0x003f, 0x003f, 0x003f); 552 0x003f, 0x003f, 0x003f, 0x003f);
539 vector unsigned short vfc = (vector unsigned short)( 553 vector unsigned short vfc = VECUINT16_LITERAL(
540 0x00fc, 0x00fc, 0x00fc, 0x00fc, 554 0x00fc, 0x00fc, 0x00fc, 0x00fc,
541 0x00fc, 0x00fc, 0x00fc, 0x00fc); 555 0x00fc, 0x00fc, 0x00fc, 0x00fc);
542 556
543 /* 557 /*
544 0x10 - 0x1f is the alpha 558 0x10 - 0x1f is the alpha
545 0x00 - 0x0e evens are the red 559 0x00 - 0x0e evens are the red
546 0x01 - 0x0f odds are zero 560 0x01 - 0x0f odds are zero
547 */ 561 */
548 vector unsigned char vredalpha1 = (vector unsigned char)( 562 vector unsigned char vredalpha1 = VECUINT8_LITERAL(
549 0x10, 0x00, 0x01, 0x01, 563 0x10, 0x00, 0x01, 0x01,
550 0x10, 0x02, 0x01, 0x01, 564 0x10, 0x02, 0x01, 0x01,
551 0x10, 0x04, 0x01, 0x01, 565 0x10, 0x04, 0x01, 0x01,
552 0x10, 0x06, 0x01, 0x01 566 0x10, 0x06, 0x01, 0x01
553 ); 567 );
556 ); 570 );
557 /* 571 /*
558 0x00 - 0x0f is ARxx ARxx ARxx ARxx 572 0x00 - 0x0f is ARxx ARxx ARxx ARxx
559 0x11 - 0x0f odds are blue 573 0x11 - 0x0f odds are blue
560 */ 574 */
561 vector unsigned char vblue1 = (vector unsigned char)( 575 vector unsigned char vblue1 = VECUINT8_LITERAL(
562 0x00, 0x01, 0x02, 0x11, 576 0x00, 0x01, 0x02, 0x11,
563 0x04, 0x05, 0x06, 0x13, 577 0x04, 0x05, 0x06, 0x13,
564 0x08, 0x09, 0x0a, 0x15, 578 0x08, 0x09, 0x0a, 0x15,
565 0x0c, 0x0d, 0x0e, 0x17 579 0x0c, 0x0d, 0x0e, 0x17
566 ); 580 );
569 ); 583 );
570 /* 584 /*
571 0x00 - 0x0f is ARxB ARxB ARxB ARxB 585 0x00 - 0x0f is ARxB ARxB ARxB ARxB
572 0x10 - 0x0e evens are green 586 0x10 - 0x0e evens are green
573 */ 587 */
574 vector unsigned char vgreen1 = (vector unsigned char)( 588 vector unsigned char vgreen1 = VECUINT8_LITERAL(
575 0x00, 0x01, 0x10, 0x03, 589 0x00, 0x01, 0x10, 0x03,
576 0x04, 0x05, 0x12, 0x07, 590 0x04, 0x05, 0x12, 0x07,
577 0x08, 0x09, 0x14, 0x0b, 591 0x08, 0x09, 0x14, 0x0b,
578 0x0c, 0x0d, 0x16, 0x0f 592 0x0c, 0x0d, 0x16, 0x0f
579 ); 593 );
580 vector unsigned char vgreen2 = (vector unsigned char)( 594 vector unsigned char vgreen2 = (vector unsigned char)(
581 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) 595 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32))
582 ); 596 );
583 vector unsigned char vgmerge = (vector unsigned char)( 597 vector unsigned char vgmerge = VECUINT8_LITERAL(
584 0x00, 0x02, 0x00, 0x06, 598 0x00, 0x02, 0x00, 0x06,
585 0x00, 0x0a, 0x00, 0x0e, 599 0x00, 0x0a, 0x00, 0x0e,
586 0x00, 0x12, 0x00, 0x16, 600 0x00, 0x12, 0x00, 0x16,
587 0x00, 0x1a, 0x00, 0x1e); 601 0x00, 0x1a, 0x00, 0x1e);
588 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); 602 vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
599 vector unsigned char voverflow; 613 vector unsigned char voverflow;
600 int width = info->d_width; 614 int width = info->d_width;
601 615
602 #define ONE_PIXEL_BLEND(condition, widthvar) \ 616 #define ONE_PIXEL_BLEND(condition, widthvar) \
603 while (condition) { \ 617 while (condition) { \
604 Uint32 pixel; \ 618 Uint32 Pixel; \
605 unsigned sR, sG, sB, dR, dG, dB, sA; \ 619 unsigned sR, sG, sB, dR, dG, dB, sA; \
606 DISEMBLE_RGBA(src, 4, srcfmt, pixel, sR, sG, sB, sA); \ 620 DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \
607 if(sA) { \ 621 if(sA) { \
608 unsigned short dstpixel = *((unsigned short *)dst); \ 622 unsigned short dstpixel = *((unsigned short *)dst); \
609 dR = (dstpixel >> 8) & 0xf8; \ 623 dR = (dstpixel >> 8) & 0xf8; \
610 dG = (dstpixel >> 3) & 0xfc; \ 624 dG = (dstpixel >> 3) & 0xfc; \
611 dB = (dstpixel << 3) & 0xf8; \ 625 dB = (dstpixel << 3) & 0xf8; \
725 ((unsigned char *)&valpha)[0] = alpha; 739 ((unsigned char *)&valpha)[0] = alpha;
726 valpha = vec_splat(valpha, 0); 740 valpha = vec_splat(valpha, 0);
727 vbits = (vector unsigned char)vec_splat_s8(-1); 741 vbits = (vector unsigned char)vec_splat_s8(-1);
728 742
729 ckey &= rgbmask; 743 ckey &= rgbmask;
730 ((unsigned int *)&vckey)[0] = ckey; 744 ((unsigned int *)(char*)&vckey)[0] = ckey;
731 vckey = vec_splat(vckey, 0); 745 vckey = vec_splat(vckey, 0);
732 ((unsigned int *)&vrgbmask)[0] = rgbmask; 746 ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
733 vrgbmask = vec_splat(vrgbmask, 0); 747 vrgbmask = vec_splat(vrgbmask, 0);
734 748
735 while(height--) { 749 while(height--) {
736 int width = info->d_width; 750 int width = info->d_width;
737 #define ONE_PIXEL_BLEND(condition, widthvar) \ 751 #define ONE_PIXEL_BLEND(condition, widthvar) \
738 while (condition) { \ 752 while (condition) { \
739 Uint32 pixel; \ 753 Uint32 Pixel; \
740 unsigned sR, sG, sB, dR, dG, dB; \ 754 unsigned sR, sG, sB, dR, dG, dB; \
741 RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, pixel); \ 755 RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \
742 if(sA && pixel != ckey) { \ 756 if(sA && Pixel != ckey) { \
743 RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); \ 757 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
744 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, pixel, dR, dG, dB); \ 758 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
745 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ 759 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
746 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ 760 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
747 } \ 761 } \
748 ((Uint8 *)dstp) += 4; \ 762 dstp++; \
749 ((Uint8 *)srcp) += 4; \ 763 srcp++; \
750 widthvar--; \ 764 widthvar--; \
751 } 765 }
752 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 766 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
753 if (width > 0) { 767 if (width > 0) {
754 int extrawidth = (width % 4); 768 int extrawidth = (width % 4);
838 vsdstPermute = calc_swizzle32(dstfmt, NULL); 852 vsdstPermute = calc_swizzle32(dstfmt, NULL);
839 853
840 while ( height-- ) { 854 while ( height-- ) {
841 width = info->d_width; 855 width = info->d_width;
842 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 856 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
843 Uint32 pixel; \ 857 Uint32 Pixel; \
844 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ 858 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
845 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, pixel, sR, sG, sB, sA); \ 859 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
846 if(sA) { \ 860 if(sA) { \
847 DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, pixel, dR, dG, dB, dA); \ 861 DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \
848 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ 862 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
849 ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ 863 ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \
850 } \ 864 } \
851 ++srcp; \ 865 ++srcp; \
852 ++dstp; \ 866 ++dstp; \
1038 vbits = (vector unsigned char)vec_splat_s8(-1); 1052 vbits = (vector unsigned char)vec_splat_s8(-1);
1039 1053
1040 while(height--) { 1054 while(height--) {
1041 int width = info->d_width; 1055 int width = info->d_width;
1042 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 1056 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
1043 Uint32 pixel; \ 1057 Uint32 Pixel; \
1044 unsigned sR, sG, sB, dR, dG, dB; \ 1058 unsigned sR, sG, sB, dR, dG, dB; \
1045 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, pixel, sR, sG, sB); \ 1059 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
1046 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, pixel, dR, dG, dB); \ 1060 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
1047 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ 1061 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
1048 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ 1062 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
1049 ++srcp; \ 1063 ++srcp; \
1050 ++dstp; \ 1064 ++dstp; \
1051 widthvar--; \ 1065 widthvar--; \
2000 2014
2001 if(sA) { 2015 if(sA) {
2002 while ( height-- ) { 2016 while ( height-- ) {
2003 DUFFS_LOOP4( 2017 DUFFS_LOOP4(
2004 { 2018 {
2005 Uint32 pixel; 2019 Uint32 Pixel;
2006 unsigned sR; 2020 unsigned sR;
2007 unsigned sG; 2021 unsigned sG;
2008 unsigned sB; 2022 unsigned sB;
2009 unsigned dR; 2023 unsigned dR;
2010 unsigned dG; 2024 unsigned dG;
2011 unsigned dB; 2025 unsigned dB;
2012 DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB); 2026 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
2013 DISEMBLE_RGB(dst, dstbpp, dstfmt, pixel, dR, dG, dB); 2027 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB);
2014 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); 2028 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB);
2015 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 2029 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
2016 src += srcbpp; 2030 src += srcbpp;
2017 dst += dstbpp; 2031 dst += dstbpp;
2018 }, 2032 },
2041 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 2055 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
2042 2056
2043 while ( height-- ) { 2057 while ( height-- ) {
2044 DUFFS_LOOP4( 2058 DUFFS_LOOP4(
2045 { 2059 {
2046 Uint32 pixel; 2060 Uint32 Pixel;
2047 unsigned sR; 2061 unsigned sR;
2048 unsigned sG; 2062 unsigned sG;
2049 unsigned sB; 2063 unsigned sB;
2050 unsigned dR; 2064 unsigned dR;
2051 unsigned dG; 2065 unsigned dG;
2052 unsigned dB; 2066 unsigned dB;
2053 RETRIEVE_RGB_PIXEL(src, srcbpp, pixel); 2067 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
2054 if(sA && pixel != ckey) { 2068 if(sA && Pixel != ckey) {
2055 RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); 2069 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
2056 DISEMBLE_RGB(dst, dstbpp, dstfmt, pixel, dR, dG, dB); 2070 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB);
2057 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); 2071 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB);
2058 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 2072 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
2059 } 2073 }
2060 src += srcbpp; 2074 src += srcbpp;
2061 dst += dstbpp; 2075 dst += dstbpp;
2091 It is unclear whether there is a good general solution that doesn't 2105 It is unclear whether there is a good general solution that doesn't
2092 need a branch (or a divide). */ 2106 need a branch (or a divide). */
2093 while ( height-- ) { 2107 while ( height-- ) {
2094 DUFFS_LOOP4( 2108 DUFFS_LOOP4(
2095 { 2109 {
2096 Uint32 pixel; 2110 Uint32 Pixel;
2097 unsigned sR; 2111 unsigned sR;
2098 unsigned sG; 2112 unsigned sG;
2099 unsigned sB; 2113 unsigned sB;
2100 unsigned dR; 2114 unsigned dR;
2101 unsigned dG; 2115 unsigned dG;
2102 unsigned dB; 2116 unsigned dB;
2103 unsigned sA; 2117 unsigned sA;
2104 unsigned dA; 2118 unsigned dA;
2105 DISEMBLE_RGBA(src, srcbpp, srcfmt, pixel, sR, sG, sB, sA); 2119 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
2106 if(sA) { 2120 if(sA) {
2107 DISEMBLE_RGBA(dst, dstbpp, dstfmt, pixel, dR, dG, dB, dA); 2121 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
2108 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); 2122 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB);
2109 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 2123 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
2110 } 2124 }
2111 src += srcbpp; 2125 src += srcbpp;
2112 dst += dstbpp; 2126 dst += dstbpp;