Mercurial > sdl-ios-xcode
comparison src/video/SDL_blit_A.c @ 1162:2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
GCCs versions.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Thu, 20 Oct 2005 06:55:26 +0000 |
parents | ffaaf7ecf685 |
children | 867f521591e5 |
comparison
equal
deleted
inserted
replaced
1161:05d4b93b911e | 1162:2651158f59b8 |
---|---|
60 const unsigned A = srcfmt->alpha; | 60 const unsigned A = srcfmt->alpha; |
61 | 61 |
62 while ( height-- ) { | 62 while ( height-- ) { |
63 DUFFS_LOOP4( | 63 DUFFS_LOOP4( |
64 { | 64 { |
65 Uint32 pixel; | 65 Uint32 Pixel; |
66 unsigned sR; | 66 unsigned sR; |
67 unsigned sG; | 67 unsigned sG; |
68 unsigned sB; | 68 unsigned sB; |
69 unsigned dR; | 69 unsigned dR; |
70 unsigned dG; | 70 unsigned dG; |
71 unsigned dB; | 71 unsigned dB; |
72 DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB); | 72 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
73 dR = dstfmt->palette->colors[*dst].r; | 73 dR = dstfmt->palette->colors[*dst].r; |
74 dG = dstfmt->palette->colors[*dst].g; | 74 dG = dstfmt->palette->colors[*dst].g; |
75 dB = dstfmt->palette->colors[*dst].b; | 75 dB = dstfmt->palette->colors[*dst].b; |
76 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); | 76 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); |
77 dR &= 0xff; | 77 dR &= 0xff; |
112 | 112 |
113 /* FIXME: fix alpha bit field expansion here too? */ | 113 /* FIXME: fix alpha bit field expansion here too? */ |
114 while ( height-- ) { | 114 while ( height-- ) { |
115 DUFFS_LOOP4( | 115 DUFFS_LOOP4( |
116 { | 116 { |
117 Uint32 pixel; | 117 Uint32 Pixel; |
118 unsigned sR; | 118 unsigned sR; |
119 unsigned sG; | 119 unsigned sG; |
120 unsigned sB; | 120 unsigned sB; |
121 unsigned sA; | 121 unsigned sA; |
122 unsigned dR; | 122 unsigned dR; |
123 unsigned dG; | 123 unsigned dG; |
124 unsigned dB; | 124 unsigned dB; |
125 DISEMBLE_RGBA(src,srcbpp,srcfmt,pixel,sR,sG,sB,sA); | 125 DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); |
126 dR = dstfmt->palette->colors[*dst].r; | 126 dR = dstfmt->palette->colors[*dst].r; |
127 dG = dstfmt->palette->colors[*dst].g; | 127 dG = dstfmt->palette->colors[*dst].g; |
128 dB = dstfmt->palette->colors[*dst].b; | 128 dB = dstfmt->palette->colors[*dst].b; |
129 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); | 129 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
130 dR &= 0xff; | 130 dR &= 0xff; |
167 const int A = srcfmt->alpha; | 167 const int A = srcfmt->alpha; |
168 | 168 |
169 while ( height-- ) { | 169 while ( height-- ) { |
170 DUFFS_LOOP( | 170 DUFFS_LOOP( |
171 { | 171 { |
172 Uint32 pixel; | 172 Uint32 Pixel; |
173 unsigned sR; | 173 unsigned sR; |
174 unsigned sG; | 174 unsigned sG; |
175 unsigned sB; | 175 unsigned sB; |
176 unsigned dR; | 176 unsigned dR; |
177 unsigned dG; | 177 unsigned dG; |
178 unsigned dB; | 178 unsigned dB; |
179 DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB); | 179 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
180 if ( pixel != ckey ) { | 180 if ( Pixel != ckey ) { |
181 dR = dstfmt->palette->colors[*dst].r; | 181 dR = dstfmt->palette->colors[*dst].r; |
182 dG = dstfmt->palette->colors[*dst].g; | 182 dG = dstfmt->palette->colors[*dst].g; |
183 dB = dstfmt->palette->colors[*dst].b; | 183 dB = dstfmt->palette->colors[*dst].b; |
184 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); | 184 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); |
185 dR &= 0xff; | 185 dR &= 0xff; |
296 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | 296 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ |
297 paddw_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | 297 paddw_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ |
298 pand_r2r(mm3, mm2); /* 0A0R0G0B -> mm2 */ | 298 pand_r2r(mm3, mm2); /* 0A0R0G0B -> mm2 */ |
299 packuswb_r2r(mm2, mm2); /* ARGBARGB -> mm2 */ | 299 packuswb_r2r(mm2, mm2); /* ARGBARGB -> mm2 */ |
300 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | 300 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ |
301 movd_r2m(mm2, *dstp);/* mm2 -> pixel */ | 301 movd_r2m(mm2, *dstp);/* mm2 -> Pixel */ |
302 ++srcp; | 302 ++srcp; |
303 ++dstp; | 303 ++dstp; |
304 },{ | 304 },{ |
305 /* Two Pixels Blend */ | 305 /* Two Pixels Blend */ |
306 movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ | 306 movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ |
332 packuswb_r2r(mm6, mm6); /* ARGBARGB -> mm6 */ | 332 packuswb_r2r(mm6, mm6); /* ARGBARGB -> mm6 */ |
333 psrlq_i2r(32, mm2); /* mm2 >> 32 -> mm2 */ | 333 psrlq_i2r(32, mm2); /* mm2 >> 32 -> mm2 */ |
334 psllq_i2r(32, mm6); /* mm6 << 32 -> mm6 */ | 334 psllq_i2r(32, mm6); /* mm6 << 32 -> mm6 */ |
335 por_r2r(mm6, mm2); /* mm6 | mm2 -> mm2 */ | 335 por_r2r(mm6, mm2); /* mm6 | mm2 -> mm2 */ |
336 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | 336 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ |
337 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ | 337 movq_r2m(mm2, *dstp);/* mm2 -> 2 x Pixel */ |
338 srcp += 2; | 338 srcp += 2; |
339 dstp += 2; | 339 dstp += 2; |
340 }, width); | 340 }, width); |
341 srcp += srcskip; | 341 srcp += srcskip; |
342 dstp += dstskip; | 342 dstp += dstskip; |
420 emms(); | 420 emms(); |
421 } | 421 } |
422 #endif | 422 #endif |
423 | 423 |
424 #ifdef USE_ALTIVEC_BLITTERS | 424 #ifdef USE_ALTIVEC_BLITTERS |
425 #include <altivec.h> | |
425 #include <assert.h> | 426 #include <assert.h> |
427 | |
428 #if ((defined MACOSX) && (__GNUC__ < 4)) | |
429 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ | |
430 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) | |
431 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ | |
432 (vector unsigned short) ( a,b,c,d,e,f,g,h ) | |
433 #else | |
434 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ | |
435 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } | |
436 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ | |
437 (vector unsigned short) { a,b,c,d,e,f,g,h } | |
438 #endif | |
439 | |
426 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) | 440 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) |
427 #define VECPRINT(msg, v) do { \ | 441 #define VECPRINT(msg, v) do { \ |
428 vector unsigned int tmpvec = (vector unsigned int)(v); \ | 442 vector unsigned int tmpvec = (vector unsigned int)(v); \ |
429 unsigned int *vp = (unsigned int *)&tmpvec; \ | 443 unsigned int *vp = (unsigned int *)&tmpvec; \ |
430 printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ | 444 printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ |
491 srcfmt = &default_pixel_format; | 505 srcfmt = &default_pixel_format; |
492 } | 506 } |
493 if (!dstfmt) { | 507 if (!dstfmt) { |
494 dstfmt = &default_pixel_format; | 508 dstfmt = &default_pixel_format; |
495 } | 509 } |
496 vector unsigned char plus = (vector unsigned char) | 510 vector unsigned char plus = VECUINT8_LITERAL |
497 ( 0x00, 0x00, 0x00, 0x00, | 511 ( 0x00, 0x00, 0x00, 0x00, |
498 0x04, 0x04, 0x04, 0x04, | 512 0x04, 0x04, 0x04, 0x04, |
499 0x08, 0x08, 0x08, 0x08, | 513 0x08, 0x08, 0x08, 0x08, |
500 0x0C, 0x0C, 0x0C, 0x0C ); | 514 0x0C, 0x0C, 0x0C, 0x0C ); |
501 vector unsigned char vswiz; | 515 vector unsigned char vswiz; |
510 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); | 524 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); |
511 } else { | 525 } else { |
512 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); | 526 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); |
513 } | 527 } |
514 #undef RESHIFT | 528 #undef RESHIFT |
515 ((unsigned int *)&srcvec)[0] = (rmask | gmask | bmask | amask); | 529 ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); |
516 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); | 530 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); |
517 return(vswiz); | 531 return(vswiz); |
518 } | 532 } |
519 | 533 |
520 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) | 534 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) |
531 vector unsigned short v1_16 = vec_splat_u16(1); | 545 vector unsigned short v1_16 = vec_splat_u16(1); |
532 vector unsigned short v2_16 = vec_splat_u16(2); | 546 vector unsigned short v2_16 = vec_splat_u16(2); |
533 vector unsigned short v3_16 = vec_splat_u16(3); | 547 vector unsigned short v3_16 = vec_splat_u16(3); |
534 vector unsigned int v8_32 = vec_splat_u32(8); | 548 vector unsigned int v8_32 = vec_splat_u32(8); |
535 vector unsigned int v16_32 = vec_add(v8_32, v8_32); | 549 vector unsigned int v16_32 = vec_add(v8_32, v8_32); |
536 vector unsigned short v3f = (vector unsigned short)( | 550 vector unsigned short v3f = VECUINT16_LITERAL( |
537 0x003f, 0x003f, 0x003f, 0x003f, | 551 0x003f, 0x003f, 0x003f, 0x003f, |
538 0x003f, 0x003f, 0x003f, 0x003f); | 552 0x003f, 0x003f, 0x003f, 0x003f); |
539 vector unsigned short vfc = (vector unsigned short)( | 553 vector unsigned short vfc = VECUINT16_LITERAL( |
540 0x00fc, 0x00fc, 0x00fc, 0x00fc, | 554 0x00fc, 0x00fc, 0x00fc, 0x00fc, |
541 0x00fc, 0x00fc, 0x00fc, 0x00fc); | 555 0x00fc, 0x00fc, 0x00fc, 0x00fc); |
542 | 556 |
543 /* | 557 /* |
544 0x10 - 0x1f is the alpha | 558 0x10 - 0x1f is the alpha |
545 0x00 - 0x0e evens are the red | 559 0x00 - 0x0e evens are the red |
546 0x01 - 0x0f odds are zero | 560 0x01 - 0x0f odds are zero |
547 */ | 561 */ |
548 vector unsigned char vredalpha1 = (vector unsigned char)( | 562 vector unsigned char vredalpha1 = VECUINT8_LITERAL( |
549 0x10, 0x00, 0x01, 0x01, | 563 0x10, 0x00, 0x01, 0x01, |
550 0x10, 0x02, 0x01, 0x01, | 564 0x10, 0x02, 0x01, 0x01, |
551 0x10, 0x04, 0x01, 0x01, | 565 0x10, 0x04, 0x01, 0x01, |
552 0x10, 0x06, 0x01, 0x01 | 566 0x10, 0x06, 0x01, 0x01 |
553 ); | 567 ); |
556 ); | 570 ); |
557 /* | 571 /* |
558 0x00 - 0x0f is ARxx ARxx ARxx ARxx | 572 0x00 - 0x0f is ARxx ARxx ARxx ARxx |
559 0x11 - 0x0f odds are blue | 573 0x11 - 0x0f odds are blue |
560 */ | 574 */ |
561 vector unsigned char vblue1 = (vector unsigned char)( | 575 vector unsigned char vblue1 = VECUINT8_LITERAL( |
562 0x00, 0x01, 0x02, 0x11, | 576 0x00, 0x01, 0x02, 0x11, |
563 0x04, 0x05, 0x06, 0x13, | 577 0x04, 0x05, 0x06, 0x13, |
564 0x08, 0x09, 0x0a, 0x15, | 578 0x08, 0x09, 0x0a, 0x15, |
565 0x0c, 0x0d, 0x0e, 0x17 | 579 0x0c, 0x0d, 0x0e, 0x17 |
566 ); | 580 ); |
569 ); | 583 ); |
570 /* | 584 /* |
571 0x00 - 0x0f is ARxB ARxB ARxB ARxB | 585 0x00 - 0x0f is ARxB ARxB ARxB ARxB |
572 0x10 - 0x0e evens are green | 586 0x10 - 0x0e evens are green |
573 */ | 587 */ |
574 vector unsigned char vgreen1 = (vector unsigned char)( | 588 vector unsigned char vgreen1 = VECUINT8_LITERAL( |
575 0x00, 0x01, 0x10, 0x03, | 589 0x00, 0x01, 0x10, 0x03, |
576 0x04, 0x05, 0x12, 0x07, | 590 0x04, 0x05, 0x12, 0x07, |
577 0x08, 0x09, 0x14, 0x0b, | 591 0x08, 0x09, 0x14, 0x0b, |
578 0x0c, 0x0d, 0x16, 0x0f | 592 0x0c, 0x0d, 0x16, 0x0f |
579 ); | 593 ); |
580 vector unsigned char vgreen2 = (vector unsigned char)( | 594 vector unsigned char vgreen2 = (vector unsigned char)( |
581 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) | 595 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) |
582 ); | 596 ); |
583 vector unsigned char vgmerge = (vector unsigned char)( | 597 vector unsigned char vgmerge = VECUINT8_LITERAL( |
584 0x00, 0x02, 0x00, 0x06, | 598 0x00, 0x02, 0x00, 0x06, |
585 0x00, 0x0a, 0x00, 0x0e, | 599 0x00, 0x0a, 0x00, 0x0e, |
586 0x00, 0x12, 0x00, 0x16, | 600 0x00, 0x12, 0x00, 0x16, |
587 0x00, 0x1a, 0x00, 0x1e); | 601 0x00, 0x1a, 0x00, 0x1e); |
588 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); | 602 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); |
599 vector unsigned char voverflow; | 613 vector unsigned char voverflow; |
600 int width = info->d_width; | 614 int width = info->d_width; |
601 | 615 |
602 #define ONE_PIXEL_BLEND(condition, widthvar) \ | 616 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
603 while (condition) { \ | 617 while (condition) { \ |
604 Uint32 pixel; \ | 618 Uint32 Pixel; \ |
605 unsigned sR, sG, sB, dR, dG, dB, sA; \ | 619 unsigned sR, sG, sB, dR, dG, dB, sA; \ |
606 DISEMBLE_RGBA(src, 4, srcfmt, pixel, sR, sG, sB, sA); \ | 620 DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
607 if(sA) { \ | 621 if(sA) { \ |
608 unsigned short dstpixel = *((unsigned short *)dst); \ | 622 unsigned short dstpixel = *((unsigned short *)dst); \ |
609 dR = (dstpixel >> 8) & 0xf8; \ | 623 dR = (dstpixel >> 8) & 0xf8; \ |
610 dG = (dstpixel >> 3) & 0xfc; \ | 624 dG = (dstpixel >> 3) & 0xfc; \ |
611 dB = (dstpixel << 3) & 0xf8; \ | 625 dB = (dstpixel << 3) & 0xf8; \ |
725 ((unsigned char *)&valpha)[0] = alpha; | 739 ((unsigned char *)&valpha)[0] = alpha; |
726 valpha = vec_splat(valpha, 0); | 740 valpha = vec_splat(valpha, 0); |
727 vbits = (vector unsigned char)vec_splat_s8(-1); | 741 vbits = (vector unsigned char)vec_splat_s8(-1); |
728 | 742 |
729 ckey &= rgbmask; | 743 ckey &= rgbmask; |
730 ((unsigned int *)&vckey)[0] = ckey; | 744 ((unsigned int *)(char*)&vckey)[0] = ckey; |
731 vckey = vec_splat(vckey, 0); | 745 vckey = vec_splat(vckey, 0); |
732 ((unsigned int *)&vrgbmask)[0] = rgbmask; | 746 ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; |
733 vrgbmask = vec_splat(vrgbmask, 0); | 747 vrgbmask = vec_splat(vrgbmask, 0); |
734 | 748 |
735 while(height--) { | 749 while(height--) { |
736 int width = info->d_width; | 750 int width = info->d_width; |
737 #define ONE_PIXEL_BLEND(condition, widthvar) \ | 751 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
738 while (condition) { \ | 752 while (condition) { \ |
739 Uint32 pixel; \ | 753 Uint32 Pixel; \ |
740 unsigned sR, sG, sB, dR, dG, dB; \ | 754 unsigned sR, sG, sB, dR, dG, dB; \ |
741 RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, pixel); \ | 755 RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \ |
742 if(sA && pixel != ckey) { \ | 756 if(sA && Pixel != ckey) { \ |
743 RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); \ | 757 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ |
744 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, pixel, dR, dG, dB); \ | 758 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
745 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ | 759 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
746 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ | 760 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
747 } \ | 761 } \ |
748 ((Uint8 *)dstp) += 4; \ | 762 dstp++; \ |
749 ((Uint8 *)srcp) += 4; \ | 763 srcp++; \ |
750 widthvar--; \ | 764 widthvar--; \ |
751 } | 765 } |
752 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | 766 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
753 if (width > 0) { | 767 if (width > 0) { |
754 int extrawidth = (width % 4); | 768 int extrawidth = (width % 4); |
838 vsdstPermute = calc_swizzle32(dstfmt, NULL); | 852 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
839 | 853 |
840 while ( height-- ) { | 854 while ( height-- ) { |
841 width = info->d_width; | 855 width = info->d_width; |
842 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | 856 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
843 Uint32 pixel; \ | 857 Uint32 Pixel; \ |
844 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ | 858 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ |
845 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, pixel, sR, sG, sB, sA); \ | 859 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
846 if(sA) { \ | 860 if(sA) { \ |
847 DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, pixel, dR, dG, dB, dA); \ | 861 DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \ |
848 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ | 862 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
849 ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ | 863 ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ |
850 } \ | 864 } \ |
851 ++srcp; \ | 865 ++srcp; \ |
852 ++dstp; \ | 866 ++dstp; \ |
1038 vbits = (vector unsigned char)vec_splat_s8(-1); | 1052 vbits = (vector unsigned char)vec_splat_s8(-1); |
1039 | 1053 |
1040 while(height--) { | 1054 while(height--) { |
1041 int width = info->d_width; | 1055 int width = info->d_width; |
1042 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | 1056 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1043 Uint32 pixel; \ | 1057 Uint32 Pixel; \ |
1044 unsigned sR, sG, sB, dR, dG, dB; \ | 1058 unsigned sR, sG, sB, dR, dG, dB; \ |
1045 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, pixel, sR, sG, sB); \ | 1059 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ |
1046 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, pixel, dR, dG, dB); \ | 1060 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
1047 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ | 1061 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
1048 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ | 1062 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
1049 ++srcp; \ | 1063 ++srcp; \ |
1050 ++dstp; \ | 1064 ++dstp; \ |
1051 widthvar--; \ | 1065 widthvar--; \ |
2000 | 2014 |
2001 if(sA) { | 2015 if(sA) { |
2002 while ( height-- ) { | 2016 while ( height-- ) { |
2003 DUFFS_LOOP4( | 2017 DUFFS_LOOP4( |
2004 { | 2018 { |
2005 Uint32 pixel; | 2019 Uint32 Pixel; |
2006 unsigned sR; | 2020 unsigned sR; |
2007 unsigned sG; | 2021 unsigned sG; |
2008 unsigned sB; | 2022 unsigned sB; |
2009 unsigned dR; | 2023 unsigned dR; |
2010 unsigned dG; | 2024 unsigned dG; |
2011 unsigned dB; | 2025 unsigned dB; |
2012 DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB); | 2026 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
2013 DISEMBLE_RGB(dst, dstbpp, dstfmt, pixel, dR, dG, dB); | 2027 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); |
2014 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); | 2028 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2015 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | 2029 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); |
2016 src += srcbpp; | 2030 src += srcbpp; |
2017 dst += dstbpp; | 2031 dst += dstbpp; |
2018 }, | 2032 }, |
2041 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | 2055 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
2042 | 2056 |
2043 while ( height-- ) { | 2057 while ( height-- ) { |
2044 DUFFS_LOOP4( | 2058 DUFFS_LOOP4( |
2045 { | 2059 { |
2046 Uint32 pixel; | 2060 Uint32 Pixel; |
2047 unsigned sR; | 2061 unsigned sR; |
2048 unsigned sG; | 2062 unsigned sG; |
2049 unsigned sB; | 2063 unsigned sB; |
2050 unsigned dR; | 2064 unsigned dR; |
2051 unsigned dG; | 2065 unsigned dG; |
2052 unsigned dB; | 2066 unsigned dB; |
2053 RETRIEVE_RGB_PIXEL(src, srcbpp, pixel); | 2067 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); |
2054 if(sA && pixel != ckey) { | 2068 if(sA && Pixel != ckey) { |
2055 RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); | 2069 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); |
2056 DISEMBLE_RGB(dst, dstbpp, dstfmt, pixel, dR, dG, dB); | 2070 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); |
2057 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); | 2071 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2058 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | 2072 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); |
2059 } | 2073 } |
2060 src += srcbpp; | 2074 src += srcbpp; |
2061 dst += dstbpp; | 2075 dst += dstbpp; |
2091 It is unclear whether there is a good general solution that doesn't | 2105 It is unclear whether there is a good general solution that doesn't |
2092 need a branch (or a divide). */ | 2106 need a branch (or a divide). */ |
2093 while ( height-- ) { | 2107 while ( height-- ) { |
2094 DUFFS_LOOP4( | 2108 DUFFS_LOOP4( |
2095 { | 2109 { |
2096 Uint32 pixel; | 2110 Uint32 Pixel; |
2097 unsigned sR; | 2111 unsigned sR; |
2098 unsigned sG; | 2112 unsigned sG; |
2099 unsigned sB; | 2113 unsigned sB; |
2100 unsigned dR; | 2114 unsigned dR; |
2101 unsigned dG; | 2115 unsigned dG; |
2102 unsigned dB; | 2116 unsigned dB; |
2103 unsigned sA; | 2117 unsigned sA; |
2104 unsigned dA; | 2118 unsigned dA; |
2105 DISEMBLE_RGBA(src, srcbpp, srcfmt, pixel, sR, sG, sB, sA); | 2119 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); |
2106 if(sA) { | 2120 if(sA) { |
2107 DISEMBLE_RGBA(dst, dstbpp, dstfmt, pixel, dR, dG, dB, dA); | 2121 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); |
2108 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); | 2122 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2109 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | 2123 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); |
2110 } | 2124 } |
2111 src += srcbpp; | 2125 src += srcbpp; |
2112 dst += dstbpp; | 2126 dst += dstbpp; |