Mercurial > sdl-ios-xcode
comparison src/video/SDL_blit_A.c @ 5264:6a65c1fc07af
Updated CPU detection code for SSE3 and SSE4 and removed obsolete 3DNow! and Altivec support.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Fri, 11 Feb 2011 14:51:04 -0800 |
parents | f7b03b6838cb |
children | b530ef003506 |
comparison
equal
deleted
inserted
replaced
5263:f26314c20071 | 5264:6a65c1fc07af |
---|---|
417 _mm_empty(); | 417 _mm_empty(); |
418 } | 418 } |
419 | 419 |
420 #endif /* __MMX__ */ | 420 #endif /* __MMX__ */ |
421 | 421 |
422 #if SDL_ALTIVEC_BLITTERS | |
423 #if __MWERKS__ | |
424 #pragma altivec_model on | |
425 #endif | |
426 #if HAVE_ALTIVEC_H | |
427 #include <altivec.h> | |
428 #endif | |
429 #include <assert.h> | |
430 | |
431 #if (defined(__MACOSX__) && (__GNUC__ < 4)) | |
432 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ | |
433 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) | |
434 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ | |
435 (vector unsigned short) ( a,b,c,d,e,f,g,h ) | |
436 #else | |
437 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ | |
438 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } | |
439 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ | |
440 (vector unsigned short) { a,b,c,d,e,f,g,h } | |
441 #endif | |
442 | |
443 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) | |
444 #define VECPRINT(msg, v) do { \ | |
445 vector unsigned int tmpvec = (vector unsigned int)(v); \ | |
446 unsigned int *vp = (unsigned int *)&tmpvec; \ | |
447 printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ | |
448 } while (0) | |
449 | |
450 /* the permuation vector that takes the high bytes out of all the appropriate shorts | |
451 (vector unsigned char)( | |
452 0x00, 0x10, 0x02, 0x12, | |
453 0x04, 0x14, 0x06, 0x16, | |
454 0x08, 0x18, 0x0A, 0x1A, | |
455 0x0C, 0x1C, 0x0E, 0x1E ); | |
456 */ | |
457 #define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F))) | |
458 #define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12))) | |
459 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) | |
460 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ | |
461 ? vec_lvsl(0, src) \ | |
462 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) | |
463 | |
464 | |
465 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ | |
466 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ | |
467 vector unsigned short vtemp1 = vec_mule(vs, valpha); \ | |
468 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ | |
469 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ | |
470 /* valpha2 is 255-alpha */ \ | |
471 vector unsigned char valpha2 = vec_nor(valpha, valpha); \ | |
472 /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \ | |
473 vector unsigned short vtemp3 = vec_mule(vd, valpha2); \ | |
474 /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \ | |
475 vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \ | |
476 /* add source and dest */ \ | |
477 vtemp1 = vec_add(vtemp1, vtemp3); \ | |
478 vtemp2 = vec_add(vtemp2, vtemp4); \ | |
479 /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \ | |
480 vtemp1 = vec_add(vtemp1, v1_16); \ | |
481 vtemp3 = vec_sr(vtemp1, v8_16); \ | |
482 vtemp1 = vec_add(vtemp1, vtemp3); \ | |
483 /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \ | |
484 vtemp2 = vec_add(vtemp2, v1_16); \ | |
485 vtemp4 = vec_sr(vtemp2, v8_16); \ | |
486 vtemp2 = vec_add(vtemp2, vtemp4); \ | |
487 /* (>>8) and get ARGBARGBARGBARGB */ \ | |
488 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ | |
489 } while (0) | |
490 | |
491 /* Calculate the permute vector used for 32->32 swizzling */ | |
492 static vector unsigned char | |
493 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt) | |
494 { | |
495 /* | |
496 * We have to assume that the bits that aren't used by other | |
497 * colors is alpha, and it's one complete byte, since some formats | |
498 * leave alpha with a zero mask, but we should still swizzle the bits. | |
499 */ | |
500 /* ARGB */ | |
501 const static struct SDL_PixelFormat default_pixel_format = { | |
502 NULL, 0, 0, | |
503 0, 0, 0, 0, | |
504 16, 8, 0, 24, | |
505 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 | |
506 }; | |
507 if (!srcfmt) { | |
508 srcfmt = &default_pixel_format; | |
509 } | |
510 if (!dstfmt) { | |
511 dstfmt = &default_pixel_format; | |
512 } | |
513 const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00, | |
514 0x04, 0x04, 0x04, 0x04, | |
515 0x08, 0x08, 0x08, 0x08, | |
516 0x0C, 0x0C, 0x0C, | |
517 0x0C); | |
518 vector unsigned char vswiz; | |
519 vector unsigned int srcvec; | |
520 #define RESHIFT(X) (3 - ((X) >> 3)) | |
521 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); | |
522 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); | |
523 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); | |
524 Uint32 amask; | |
525 /* Use zero for alpha if either surface doesn't have alpha */ | |
526 if (dstfmt->Amask) { | |
527 amask = | |
528 ((srcfmt->Amask) ? RESHIFT(srcfmt-> | |
529 Ashift) : 0x10) << (dstfmt->Ashift); | |
530 } else { | |
531 amask = | |
532 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ | |
533 0xFFFFFFFF); | |
534 } | |
535 #undef RESHIFT | |
536 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask); | |
537 vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0)); | |
538 return (vswiz); | |
539 } | |
540 | |
541 static void | |
542 Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info) | |
543 { | |
544 int height = info->dst_h; | |
545 Uint8 *src = (Uint8 *) info->src; | |
546 int srcskip = info->src_skip; | |
547 Uint8 *dst = (Uint8 *) info->dst; | |
548 int dstskip = info->dst_skip; | |
549 SDL_PixelFormat *srcfmt = info->src_fmt; | |
550 | |
551 vector unsigned char v0 = vec_splat_u8(0); | |
552 vector unsigned short v8_16 = vec_splat_u16(8); | |
553 vector unsigned short v1_16 = vec_splat_u16(1); | |
554 vector unsigned short v2_16 = vec_splat_u16(2); | |
555 vector unsigned short v3_16 = vec_splat_u16(3); | |
556 vector unsigned int v8_32 = vec_splat_u32(8); | |
557 vector unsigned int v16_32 = vec_add(v8_32, v8_32); | |
558 vector unsigned short v3f = | |
559 VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f, | |
560 0x003f, 0x003f, 0x003f, 0x003f); | |
561 vector unsigned short vfc = | |
562 VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc, | |
563 0x00fc, 0x00fc, 0x00fc, 0x00fc); | |
564 | |
565 /* | |
566 0x10 - 0x1f is the alpha | |
567 0x00 - 0x0e evens are the red | |
568 0x01 - 0x0f odds are zero | |
569 */ | |
570 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01, | |
571 0x10, 0x02, 0x01, 0x01, | |
572 0x10, 0x04, 0x01, 0x01, | |
573 0x10, 0x06, 0x01, | |
574 0x01); | |
575 vector unsigned char vredalpha2 = | |
576 (vector unsigned char) (vec_add((vector unsigned int) vredalpha1, | |
577 vec_sl(v8_32, v16_32)) | |
578 ); | |
579 /* | |
580 0x00 - 0x0f is ARxx ARxx ARxx ARxx | |
581 0x11 - 0x0f odds are blue | |
582 */ | |
583 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11, | |
584 0x04, 0x05, 0x06, 0x13, | |
585 0x08, 0x09, 0x0a, 0x15, | |
586 0x0c, 0x0d, 0x0e, 0x17); | |
587 vector unsigned char vblue2 = | |
588 (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32) | |
589 ); | |
590 /* | |
591 0x00 - 0x0f is ARxB ARxB ARxB ARxB | |
592 0x10 - 0x0e evens are green | |
593 */ | |
594 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03, | |
595 0x04, 0x05, 0x12, 0x07, | |
596 0x08, 0x09, 0x14, 0x0b, | |
597 0x0c, 0x0d, 0x16, 0x0f); | |
598 vector unsigned char vgreen2 = | |
599 (vector unsigned | |
600 char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32)) | |
601 ); | |
602 vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06, | |
603 0x00, 0x0a, 0x00, 0x0e, | |
604 0x00, 0x12, 0x00, 0x16, | |
605 0x00, 0x1a, 0x00, 0x1e); | |
606 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); | |
607 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); | |
608 vector unsigned char valphaPermute = | |
609 vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); | |
610 | |
611 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7); | |
612 vf800 = vec_sl(vf800, vec_splat_u16(8)); | |
613 | |
614 while (height--) { | |
615 int extrawidth; | |
616 vector unsigned char valigner; | |
617 vector unsigned char vsrc; | |
618 vector unsigned char voverflow; | |
619 int width = info->dst_w; | |
620 | |
621 #define ONE_PIXEL_BLEND(condition, widthvar) \ | |
622 while (condition) { \ | |
623 Uint32 Pixel; \ | |
624 unsigned sR, sG, sB, dR, dG, dB, sA; \ | |
625 DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \ | |
626 if(sA) { \ | |
627 unsigned short dstpixel = *((unsigned short *)dst); \ | |
628 dR = (dstpixel >> 8) & 0xf8; \ | |
629 dG = (dstpixel >> 3) & 0xfc; \ | |
630 dB = (dstpixel << 3) & 0xf8; \ | |
631 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ | |
632 *((unsigned short *)dst) = ( \ | |
633 ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \ | |
634 ); \ | |
635 } \ | |
636 src += 4; \ | |
637 dst += 2; \ | |
638 widthvar--; \ | |
639 } | |
640 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); | |
641 extrawidth = (width % 8); | |
642 valigner = VEC_ALIGNER(src); | |
643 vsrc = (vector unsigned char) vec_ld(0, src); | |
644 width -= extrawidth; | |
645 while (width) { | |
646 vector unsigned char valpha; | |
647 vector unsigned char vsrc1, vsrc2; | |
648 vector unsigned char vdst1, vdst2; | |
649 vector unsigned short vR, vG, vB; | |
650 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; | |
651 | |
652 /* Load 8 pixels from src as ARGB */ | |
653 voverflow = (vector unsigned char) vec_ld(15, src); | |
654 vsrc = vec_perm(vsrc, voverflow, valigner); | |
655 vsrc1 = vec_perm(vsrc, vsrc, vpermute); | |
656 src += 16; | |
657 vsrc = (vector unsigned char) vec_ld(15, src); | |
658 voverflow = vec_perm(voverflow, vsrc, valigner); | |
659 vsrc2 = vec_perm(voverflow, voverflow, vpermute); | |
660 src += 16; | |
661 | |
662 /* Load 8 pixels from dst as XRGB */ | |
663 voverflow = vec_ld(0, dst); | |
664 vR = vec_and((vector unsigned short) voverflow, vf800); | |
665 vB = vec_sl((vector unsigned short) voverflow, v3_16); | |
666 vG = vec_sl(vB, v2_16); | |
667 vdst1 = | |
668 (vector unsigned char) vec_perm((vector unsigned char) vR, | |
669 (vector unsigned char) vR, | |
670 vredalpha1); | |
671 vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1); | |
672 vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1); | |
673 vdst2 = | |
674 (vector unsigned char) vec_perm((vector unsigned char) vR, | |
675 (vector unsigned char) vR, | |
676 vredalpha2); | |
677 vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2); | |
678 vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2); | |
679 | |
680 /* Alpha blend 8 pixels as ARGB */ | |
681 valpha = vec_perm(vsrc1, v0, valphaPermute); | |
682 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, | |
683 v8_16); | |
684 valpha = vec_perm(vsrc2, v0, valphaPermute); | |
685 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, | |
686 v8_16); | |
687 | |
688 /* Convert 8 pixels to 565 */ | |
689 vpixel = (vector unsigned short) vec_packpx((vector unsigned int) | |
690 vdst1, | |
691 (vector unsigned int) | |
692 vdst2); | |
693 vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge); | |
694 vgpixel = vec_and(vgpixel, vfc); | |
695 vgpixel = vec_sl(vgpixel, v3_16); | |
696 vrpixel = vec_sl(vpixel, v1_16); | |
697 vrpixel = vec_and(vrpixel, vf800); | |
698 vbpixel = vec_and(vpixel, v3f); | |
699 vdst1 = | |
700 vec_or((vector unsigned char) vrpixel, | |
701 (vector unsigned char) vgpixel); | |
702 vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel); | |
703 | |
704 /* Store 8 pixels */ | |
705 vec_st(vdst1, 0, dst); | |
706 | |
707 width -= 8; | |
708 dst += 16; | |
709 } | |
710 ONE_PIXEL_BLEND((extrawidth), extrawidth); | |
711 #undef ONE_PIXEL_BLEND | |
712 src += srcskip; | |
713 dst += dstskip; | |
714 } | |
715 } | |
716 | |
717 static void | |
718 Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info) | |
719 { | |
720 int height = info->dst_h; | |
721 Uint32 *srcp = (Uint32 *) info->src; | |
722 int srcskip = info->src_skip >> 2; | |
723 Uint32 *dstp = (Uint32 *) info->dst; | |
724 int dstskip = info->dst_skip >> 2; | |
725 SDL_PixelFormat *srcfmt = info->src_fmt; | |
726 SDL_PixelFormat *dstfmt = info->dst_fmt; | |
727 unsigned sA = info->a; | |
728 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | |
729 Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; | |
730 Uint32 ckey = info->colorkey; | |
731 vector unsigned char mergePermute; | |
732 vector unsigned char vsrcPermute; | |
733 vector unsigned char vdstPermute; | |
734 vector unsigned char vsdstPermute; | |
735 vector unsigned char valpha; | |
736 vector unsigned char valphamask; | |
737 vector unsigned char vbits; | |
738 vector unsigned char v0; | |
739 vector unsigned short v1; | |
740 vector unsigned short v8; | |
741 vector unsigned int vckey; | |
742 vector unsigned int vrgbmask; | |
743 | |
744 mergePermute = VEC_MERGE_PERMUTE(); | |
745 v0 = vec_splat_u8(0); | |
746 v1 = vec_splat_u16(1); | |
747 v8 = vec_splat_u16(8); | |
748 | |
749 /* set the alpha to 255 on the destination surf */ | |
750 valphamask = VEC_ALPHA_MASK(); | |
751 | |
752 vsrcPermute = calc_swizzle32(srcfmt, NULL); | |
753 vdstPermute = calc_swizzle32(NULL, dstfmt); | |
754 vsdstPermute = calc_swizzle32(dstfmt, NULL); | |
755 | |
756 /* set a vector full of alpha and 255-alpha */ | |
757 ((unsigned char *) &valpha)[0] = sA; | |
758 valpha = vec_splat(valpha, 0); | |
759 vbits = (vector unsigned char) vec_splat_s8(-1); | |
760 | |
761 ckey &= rgbmask; | |
762 ((unsigned int *) (char *) &vckey)[0] = ckey; | |
763 vckey = vec_splat(vckey, 0); | |
764 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask; | |
765 vrgbmask = vec_splat(vrgbmask, 0); | |
766 | |
767 while (height--) { | |
768 int width = info->dst_w; | |
769 #define ONE_PIXEL_BLEND(condition, widthvar) \ | |
770 while (condition) { \ | |
771 Uint32 Pixel; \ | |
772 unsigned sR, sG, sB, dR, dG, dB; \ | |
773 RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \ | |
774 if(sA && Pixel != ckey) { \ | |
775 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ | |
776 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ | |
777 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ | |
778 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ | |
779 } \ | |
780 dstp++; \ | |
781 srcp++; \ | |
782 widthvar--; \ | |
783 } | |
784 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | |
785 if (width > 0) { | |
786 int extrawidth = (width % 4); | |
787 vector unsigned char valigner = VEC_ALIGNER(srcp); | |
788 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); | |
789 width -= extrawidth; | |
790 while (width) { | |
791 vector unsigned char vsel; | |
792 vector unsigned char voverflow; | |
793 vector unsigned char vd; | |
794 vector unsigned char vd_orig; | |
795 | |
796 /* s = *srcp */ | |
797 voverflow = (vector unsigned char) vec_ld(15, srcp); | |
798 vs = vec_perm(vs, voverflow, valigner); | |
799 | |
800 /* vsel is set for items that match the key */ | |
801 vsel = | |
802 (vector unsigned char) vec_and((vector unsigned int) vs, | |
803 vrgbmask); | |
804 vsel = (vector unsigned char) vec_cmpeq((vector unsigned int) | |
805 vsel, vckey); | |
806 | |
807 /* permute to source format */ | |
808 vs = vec_perm(vs, valpha, vsrcPermute); | |
809 | |
810 /* d = *dstp */ | |
811 vd = (vector unsigned char) vec_ld(0, dstp); | |
812 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); | |
813 | |
814 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | |
815 | |
816 /* set the alpha channel to full on */ | |
817 vd = vec_or(vd, valphamask); | |
818 | |
819 /* mask out color key */ | |
820 vd = vec_sel(vd, vd_orig, vsel); | |
821 | |
822 /* permute to dest format */ | |
823 vd = vec_perm(vd, vbits, vdstPermute); | |
824 | |
825 /* *dstp = res */ | |
826 vec_st((vector unsigned int) vd, 0, dstp); | |
827 | |
828 srcp += 4; | |
829 dstp += 4; | |
830 width -= 4; | |
831 vs = voverflow; | |
832 } | |
833 ONE_PIXEL_BLEND((extrawidth), extrawidth); | |
834 } | |
835 #undef ONE_PIXEL_BLEND | |
836 | |
837 srcp += srcskip; | |
838 dstp += dstskip; | |
839 } | |
840 } | |
841 | |
842 | |
843 static void | |
844 Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info) | |
845 { | |
846 int width = info->dst_w; | |
847 int height = info->dst_h; | |
848 Uint32 *srcp = (Uint32 *) info->src; | |
849 int srcskip = info->src_skip >> 2; | |
850 Uint32 *dstp = (Uint32 *) info->dst; | |
851 int dstskip = info->dst_skip >> 2; | |
852 SDL_PixelFormat *srcfmt = info->src_fmt; | |
853 SDL_PixelFormat *dstfmt = info->dst_fmt; | |
854 vector unsigned char mergePermute; | |
855 vector unsigned char valphaPermute; | |
856 vector unsigned char vsrcPermute; | |
857 vector unsigned char vdstPermute; | |
858 vector unsigned char vsdstPermute; | |
859 vector unsigned char valphamask; | |
860 vector unsigned char vpixelmask; | |
861 vector unsigned char v0; | |
862 vector unsigned short v1; | |
863 vector unsigned short v8; | |
864 | |
865 v0 = vec_splat_u8(0); | |
866 v1 = vec_splat_u16(1); | |
867 v8 = vec_splat_u16(8); | |
868 mergePermute = VEC_MERGE_PERMUTE(); | |
869 valphamask = VEC_ALPHA_MASK(); | |
870 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); | |
871 vpixelmask = vec_nor(valphamask, v0); | |
872 vsrcPermute = calc_swizzle32(srcfmt, NULL); | |
873 vdstPermute = calc_swizzle32(NULL, dstfmt); | |
874 vsdstPermute = calc_swizzle32(dstfmt, NULL); | |
875 | |
876 while (height--) { | |
877 width = info->dst_w; | |
878 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | |
879 Uint32 Pixel; \ | |
880 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ | |
881 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ | |
882 if(sA) { \ | |
883 DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \ | |
884 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ | |
885 ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ | |
886 } \ | |
887 ++srcp; \ | |
888 ++dstp; \ | |
889 widthvar--; \ | |
890 } | |
891 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | |
892 if (width > 0) { | |
893 /* vsrcPermute */ | |
894 /* vdstPermute */ | |
895 int extrawidth = (width % 4); | |
896 vector unsigned char valigner = VEC_ALIGNER(srcp); | |
897 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); | |
898 width -= extrawidth; | |
899 while (width) { | |
900 vector unsigned char voverflow; | |
901 vector unsigned char vd; | |
902 vector unsigned char valpha; | |
903 vector unsigned char vdstalpha; | |
904 /* s = *srcp */ | |
905 voverflow = (vector unsigned char) vec_ld(15, srcp); | |
906 vs = vec_perm(vs, voverflow, valigner); | |
907 vs = vec_perm(vs, v0, vsrcPermute); | |
908 | |
909 valpha = vec_perm(vs, v0, valphaPermute); | |
910 | |
911 /* d = *dstp */ | |
912 vd = (vector unsigned char) vec_ld(0, dstp); | |
913 vd = vec_perm(vd, v0, vsdstPermute); | |
914 vdstalpha = vec_and(vd, valphamask); | |
915 | |
916 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | |
917 | |
918 /* set the alpha to the dest alpha */ | |
919 vd = vec_and(vd, vpixelmask); | |
920 vd = vec_or(vd, vdstalpha); | |
921 vd = vec_perm(vd, v0, vdstPermute); | |
922 | |
923 /* *dstp = res */ | |
924 vec_st((vector unsigned int) vd, 0, dstp); | |
925 | |
926 srcp += 4; | |
927 dstp += 4; | |
928 width -= 4; | |
929 vs = voverflow; | |
930 | |
931 } | |
932 ONE_PIXEL_BLEND((extrawidth), extrawidth); | |
933 } | |
934 srcp += srcskip; | |
935 dstp += dstskip; | |
936 #undef ONE_PIXEL_BLEND | |
937 } | |
938 } | |
939 | |
940 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | |
941 static void | |
942 BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info) | |
943 { | |
944 int width = info->dst_w; | |
945 int height = info->dst_h; | |
946 Uint32 *srcp = (Uint32 *) info->src; | |
947 int srcskip = info->src_skip >> 2; | |
948 Uint32 *dstp = (Uint32 *) info->dst; | |
949 int dstskip = info->dst_skip >> 2; | |
950 vector unsigned char mergePermute; | |
951 vector unsigned char valphaPermute; | |
952 vector unsigned char valphamask; | |
953 vector unsigned char vpixelmask; | |
954 vector unsigned char v0; | |
955 vector unsigned short v1; | |
956 vector unsigned short v8; | |
957 v0 = vec_splat_u8(0); | |
958 v1 = vec_splat_u16(1); | |
959 v8 = vec_splat_u16(8); | |
960 mergePermute = VEC_MERGE_PERMUTE(); | |
961 valphamask = VEC_ALPHA_MASK(); | |
962 valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); | |
963 | |
964 | |
965 vpixelmask = vec_nor(valphamask, v0); | |
966 while (height--) { | |
967 width = info->dst_w; | |
968 #define ONE_PIXEL_BLEND(condition, widthvar) \ | |
969 while ((condition)) { \ | |
970 Uint32 dalpha; \ | |
971 Uint32 d; \ | |
972 Uint32 s1; \ | |
973 Uint32 d1; \ | |
974 Uint32 s = *srcp; \ | |
975 Uint32 alpha = s >> 24; \ | |
976 if(alpha) { \ | |
977 if(alpha == SDL_ALPHA_OPAQUE) { \ | |
978 *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \ | |
979 } else { \ | |
980 d = *dstp; \ | |
981 dalpha = d & 0xff000000; \ | |
982 s1 = s & 0xff00ff; \ | |
983 d1 = d & 0xff00ff; \ | |
984 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ | |
985 s &= 0xff00; \ | |
986 d &= 0xff00; \ | |
987 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ | |
988 *dstp = d1 | d | dalpha; \ | |
989 } \ | |
990 } \ | |
991 ++srcp; \ | |
992 ++dstp; \ | |
993 widthvar--; \ | |
994 } | |
995 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | |
996 if (width > 0) { | |
997 int extrawidth = (width % 4); | |
998 vector unsigned char valigner = VEC_ALIGNER(srcp); | |
999 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); | |
1000 width -= extrawidth; | |
1001 while (width) { | |
1002 vector unsigned char voverflow; | |
1003 vector unsigned char vd; | |
1004 vector unsigned char valpha; | |
1005 vector unsigned char vdstalpha; | |
1006 /* s = *srcp */ | |
1007 voverflow = (vector unsigned char) vec_ld(15, srcp); | |
1008 vs = vec_perm(vs, voverflow, valigner); | |
1009 | |
1010 valpha = vec_perm(vs, v0, valphaPermute); | |
1011 | |
1012 /* d = *dstp */ | |
1013 vd = (vector unsigned char) vec_ld(0, dstp); | |
1014 vdstalpha = vec_and(vd, valphamask); | |
1015 | |
1016 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | |
1017 | |
1018 /* set the alpha to the dest alpha */ | |
1019 vd = vec_and(vd, vpixelmask); | |
1020 vd = vec_or(vd, vdstalpha); | |
1021 | |
1022 /* *dstp = res */ | |
1023 vec_st((vector unsigned int) vd, 0, dstp); | |
1024 | |
1025 srcp += 4; | |
1026 dstp += 4; | |
1027 width -= 4; | |
1028 vs = voverflow; | |
1029 } | |
1030 ONE_PIXEL_BLEND((extrawidth), extrawidth); | |
1031 } | |
1032 srcp += srcskip; | |
1033 dstp += dstskip; | |
1034 } | |
1035 #undef ONE_PIXEL_BLEND | |
1036 } | |
1037 | |
1038 static void | |
1039 Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info) | |
1040 { | |
1041 /* XXX : 6 */ | |
1042 int height = info->dst_h; | |
1043 Uint32 *srcp = (Uint32 *) info->src; | |
1044 int srcskip = info->src_skip >> 2; | |
1045 Uint32 *dstp = (Uint32 *) info->dst; | |
1046 int dstskip = info->dst_skip >> 2; | |
1047 SDL_PixelFormat *srcfmt = info->src_fmt; | |
1048 SDL_PixelFormat *dstfmt = info->dst_fmt; | |
1049 unsigned sA = info->a; | |
1050 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | |
1051 vector unsigned char mergePermute; | |
1052 vector unsigned char vsrcPermute; | |
1053 vector unsigned char vdstPermute; | |
1054 vector unsigned char vsdstPermute; | |
1055 vector unsigned char valpha; | |
1056 vector unsigned char valphamask; | |
1057 vector unsigned char vbits; | |
1058 vector unsigned short v1; | |
1059 vector unsigned short v8; | |
1060 | |
1061 mergePermute = VEC_MERGE_PERMUTE(); | |
1062 v1 = vec_splat_u16(1); | |
1063 v8 = vec_splat_u16(8); | |
1064 | |
1065 /* set the alpha to 255 on the destination surf */ | |
1066 valphamask = VEC_ALPHA_MASK(); | |
1067 | |
1068 vsrcPermute = calc_swizzle32(srcfmt, NULL); | |
1069 vdstPermute = calc_swizzle32(NULL, dstfmt); | |
1070 vsdstPermute = calc_swizzle32(dstfmt, NULL); | |
1071 | |
1072 /* set a vector full of alpha and 255-alpha */ | |
1073 ((unsigned char *) &valpha)[0] = sA; | |
1074 valpha = vec_splat(valpha, 0); | |
1075 vbits = (vector unsigned char) vec_splat_s8(-1); | |
1076 | |
1077 while (height--) { | |
1078 int width = info->dst_w; | |
1079 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | |
1080 Uint32 Pixel; \ | |
1081 unsigned sR, sG, sB, dR, dG, dB; \ | |
1082 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ | |
1083 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ | |
1084 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ | |
1085 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ | |
1086 ++srcp; \ | |
1087 ++dstp; \ | |
1088 widthvar--; \ | |
1089 } | |
1090 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | |
1091 if (width > 0) { | |
1092 int extrawidth = (width % 4); | |
1093 vector unsigned char valigner = VEC_ALIGNER(srcp); | |
1094 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); | |
1095 width -= extrawidth; | |
1096 while (width) { | |
1097 vector unsigned char voverflow; | |
1098 vector unsigned char vd; | |
1099 | |
1100 /* s = *srcp */ | |
1101 voverflow = (vector unsigned char) vec_ld(15, srcp); | |
1102 vs = vec_perm(vs, voverflow, valigner); | |
1103 vs = vec_perm(vs, valpha, vsrcPermute); | |
1104 | |
1105 /* d = *dstp */ | |
1106 vd = (vector unsigned char) vec_ld(0, dstp); | |
1107 vd = vec_perm(vd, vd, vsdstPermute); | |
1108 | |
1109 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | |
1110 | |
1111 /* set the alpha channel to full on */ | |
1112 vd = vec_or(vd, valphamask); | |
1113 vd = vec_perm(vd, vbits, vdstPermute); | |
1114 | |
1115 /* *dstp = res */ | |
1116 vec_st((vector unsigned int) vd, 0, dstp); | |
1117 | |
1118 srcp += 4; | |
1119 dstp += 4; | |
1120 width -= 4; | |
1121 vs = voverflow; | |
1122 } | |
1123 ONE_PIXEL_BLEND((extrawidth), extrawidth); | |
1124 } | |
1125 #undef ONE_PIXEL_BLEND | |
1126 | |
1127 srcp += srcskip; | |
1128 dstp += dstskip; | |
1129 } | |
1130 | |
1131 } | |
1132 | |
1133 | |
1134 /* fast RGB888->(A)RGB888 blending */ | |
1135 static void | |
1136 BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info) | |
1137 { | |
1138 unsigned alpha = info->a; | |
1139 int height = info->dst_h; | |
1140 Uint32 *srcp = (Uint32 *) info->src; | |
1141 int srcskip = info->src_skip >> 2; | |
1142 Uint32 *dstp = (Uint32 *) info->dst; | |
1143 int dstskip = info->dst_skip >> 2; | |
1144 vector unsigned char mergePermute; | |
1145 vector unsigned char valpha; | |
1146 vector unsigned char valphamask; | |
1147 vector unsigned short v1; | |
1148 vector unsigned short v8; | |
1149 | |
1150 mergePermute = VEC_MERGE_PERMUTE(); | |
1151 v1 = vec_splat_u16(1); | |
1152 v8 = vec_splat_u16(8); | |
1153 | |
1154 /* set the alpha to 255 on the destination surf */ | |
1155 valphamask = VEC_ALPHA_MASK(); | |
1156 | |
1157 /* set a vector full of alpha and 255-alpha */ | |
1158 ((unsigned char *) &valpha)[0] = alpha; | |
1159 valpha = vec_splat(valpha, 0); | |
1160 | |
1161 while (height--) { | |
1162 int width = info->dst_w; | |
1163 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ | |
1164 Uint32 s = *srcp; \ | |
1165 Uint32 d = *dstp; \ | |
1166 Uint32 s1 = s & 0xff00ff; \ | |
1167 Uint32 d1 = d & 0xff00ff; \ | |
1168 d1 = (d1 + ((s1 - d1) * alpha >> 8)) \ | |
1169 & 0xff00ff; \ | |
1170 s &= 0xff00; \ | |
1171 d &= 0xff00; \ | |
1172 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ | |
1173 *dstp = d1 | d | 0xff000000; \ | |
1174 ++srcp; \ | |
1175 ++dstp; \ | |
1176 widthvar--; \ | |
1177 } | |
1178 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); | |
1179 if (width > 0) { | |
1180 int extrawidth = (width % 4); | |
1181 vector unsigned char valigner = VEC_ALIGNER(srcp); | |
1182 vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); | |
1183 width -= extrawidth; | |
1184 while (width) { | |
1185 vector unsigned char voverflow; | |
1186 vector unsigned char vd; | |
1187 | |
1188 /* s = *srcp */ | |
1189 voverflow = (vector unsigned char) vec_ld(15, srcp); | |
1190 vs = vec_perm(vs, voverflow, valigner); | |
1191 | |
1192 /* d = *dstp */ | |
1193 vd = (vector unsigned char) vec_ld(0, dstp); | |
1194 | |
1195 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); | |
1196 | |
1197 /* set the alpha channel to full on */ | |
1198 vd = vec_or(vd, valphamask); | |
1199 | |
1200 /* *dstp = res */ | |
1201 vec_st((vector unsigned int) vd, 0, dstp); | |
1202 | |
1203 srcp += 4; | |
1204 dstp += 4; | |
1205 width -= 4; | |
1206 vs = voverflow; | |
1207 } | |
1208 ONE_PIXEL_BLEND((extrawidth), extrawidth); | |
1209 } | |
1210 #undef ONE_PIXEL_BLEND | |
1211 | |
1212 srcp += srcskip; | |
1213 dstp += dstskip; | |
1214 } | |
1215 } | |
1216 | |
1217 #if __MWERKS__ | |
1218 #pragma altivec_model off | |
1219 #endif | |
1220 #endif /* SDL_ALTIVEC_BLITTERS */ | |
1221 | |
1222 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | 422 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
1223 static void | 423 static void |
1224 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info) | 424 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info) |
1225 { | 425 { |
1226 int width = info->dst_w; | 426 int width = info->dst_w; |
1335 /* *INDENT-ON* */ | 535 /* *INDENT-ON* */ |
1336 srcp += srcskip; | 536 srcp += srcskip; |
1337 dstp += dstskip; | 537 dstp += dstskip; |
1338 } | 538 } |
1339 } | 539 } |
1340 | |
1341 #ifdef __3dNOW__ | |
1342 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | |
1343 static void | |
1344 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) | |
1345 { | |
1346 int width = info->dst_w; | |
1347 int height = info->dst_h; | |
1348 Uint32 *srcp = (Uint32 *) info->src; | |
1349 int srcskip = info->src_skip >> 2; | |
1350 Uint32 *dstp = (Uint32 *) info->dst; | |
1351 int dstskip = info->dst_skip >> 2; | |
1352 SDL_PixelFormat *sf = info->src_fmt; | |
1353 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | |
1354 Uint32 amask = sf->Amask; | |
1355 Uint32 ashift = sf->Ashift; | |
1356 Uint64 multmask; | |
1357 | |
1358 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | |
1359 | |
1360 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
1361 multmask = 0xFFFF; | |
1362 multmask <<= (ashift * 2); | |
1363 multmask = ~multmask; | |
1364 dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ | |
1365 | |
1366 while (height--) { | |
1367 /* *INDENT-OFF* */ | |
1368 DUFFS_LOOP4({ | |
1369 Uint32 alpha; | |
1370 | |
1371 _m_prefetch(srcp + 16); | |
1372 _m_prefetch(dstp + 16); | |
1373 | |
1374 alpha = *srcp & amask; | |
1375 if (alpha == 0) { | |
1376 /* do nothing */ | |
1377 } else if (alpha == amask) { | |
1378 /* copy RGB, keep dst alpha */ | |
1379 *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); | |
1380 } else { | |
1381 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ | |
1382 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ | |
1383 | |
1384 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
1385 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
1386 | |
1387 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ | |
1388 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ | |
1389 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
1390 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
1391 mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ | |
1392 | |
1393 /* blend */ | |
1394 src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */ | |
1395 src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */ | |
1396 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ | |
1397 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */ | |
1398 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
1399 | |
1400 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
1401 } | |
1402 ++srcp; | |
1403 ++dstp; | |
1404 }, width); | |
1405 /* *INDENT-ON* */ | |
1406 srcp += srcskip; | |
1407 dstp += dstskip; | |
1408 } | |
1409 _mm_empty(); | |
1410 } | |
1411 | |
1412 #endif /* __MMX__ */ | |
1413 | 540 |
1414 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ | 541 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ |
1415 | 542 |
1416 /* blend a single 16 bit pixel at 50% */ | 543 /* blend a single 16 bit pixel at 50% */ |
1417 #define BLEND16_50(d, s, mask) \ | 544 #define BLEND16_50(d, s, mask) \ |
2128 switch (df->BytesPerPixel) { | 1255 switch (df->BytesPerPixel) { |
2129 case 1: | 1256 case 1: |
2130 return BlitNto1PixelAlpha; | 1257 return BlitNto1PixelAlpha; |
2131 | 1258 |
2132 case 2: | 1259 case 2: |
2133 #if SDL_ALTIVEC_BLITTERS | 1260 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 |
2134 if (sf->BytesPerPixel == 4 | 1261 && sf->Gmask == 0xff00 |
2135 && df->Gmask == 0x7e0 && df->Bmask == 0x1f | 1262 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) |
2136 && SDL_HasAltiVec()) | 1263 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { |
2137 return Blit32to565PixelAlphaAltivec; | |
2138 else | |
2139 #endif | |
2140 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 | |
2141 && sf->Gmask == 0xff00 | |
2142 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) | |
2143 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { | |
2144 if (df->Gmask == 0x7e0) | 1264 if (df->Gmask == 0x7e0) |
2145 return BlitARGBto565PixelAlpha; | 1265 return BlitARGBto565PixelAlpha; |
2146 else if (df->Gmask == 0x3e0) | 1266 else if (df->Gmask == 0x3e0) |
2147 return BlitARGBto555PixelAlpha; | 1267 return BlitARGBto555PixelAlpha; |
2148 } | 1268 } |
2150 | 1270 |
2151 case 4: | 1271 case 4: |
2152 if (sf->Rmask == df->Rmask | 1272 if (sf->Rmask == df->Rmask |
2153 && sf->Gmask == df->Gmask | 1273 && sf->Gmask == df->Gmask |
2154 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { | 1274 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { |
2155 #if defined(__MMX__) || defined(__3dNOW__) | 1275 #if defined(__MMX__) |
2156 if (sf->Rshift % 8 == 0 | 1276 if (sf->Rshift % 8 == 0 |
2157 && sf->Gshift % 8 == 0 | 1277 && sf->Gshift % 8 == 0 |
2158 && sf->Bshift % 8 == 0 | 1278 && sf->Bshift % 8 == 0 |
2159 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { | 1279 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { |
2160 #ifdef __3dNOW__ | |
2161 if (SDL_Has3DNow()) | |
2162 return BlitRGBtoRGBPixelAlphaMMX3DNOW; | |
2163 #endif | |
2164 #ifdef __MMX__ | |
2165 if (SDL_HasMMX()) | 1280 if (SDL_HasMMX()) |
2166 return BlitRGBtoRGBPixelAlphaMMX; | 1281 return BlitRGBtoRGBPixelAlphaMMX; |
2167 #endif | |
2168 } | 1282 } |
2169 #endif /* __MMX__ || __3dNOW__ */ | 1283 #endif /* __MMX__ */ |
2170 if (sf->Amask == 0xff000000) { | 1284 if (sf->Amask == 0xff000000) { |
2171 #if SDL_ALTIVEC_BLITTERS | |
2172 if (SDL_HasAltiVec()) | |
2173 return BlitRGBtoRGBPixelAlphaAltivec; | |
2174 #endif | |
2175 return BlitRGBtoRGBPixelAlpha; | 1285 return BlitRGBtoRGBPixelAlpha; |
2176 } | 1286 } |
2177 } | 1287 } |
2178 #if SDL_ALTIVEC_BLITTERS | 1288 return BlitNtoNPixelAlpha; |
2179 if (sf->Amask && sf->BytesPerPixel == 4 && SDL_HasAltiVec()) | |
2180 return Blit32to32PixelAlphaAltivec; | |
2181 else | |
2182 #endif | |
2183 return BlitNtoNPixelAlpha; | |
2184 | 1289 |
2185 case 3: | 1290 case 3: |
2186 default: | 1291 default: |
2187 return BlitNtoNPixelAlpha; | 1292 return BlitNtoNPixelAlpha; |
2188 } | 1293 } |
2224 && sf->Gshift % 8 == 0 | 1329 && sf->Gshift % 8 == 0 |
2225 && sf->Bshift % 8 == 0 && SDL_HasMMX()) | 1330 && sf->Bshift % 8 == 0 && SDL_HasMMX()) |
2226 return BlitRGBtoRGBSurfaceAlphaMMX; | 1331 return BlitRGBtoRGBSurfaceAlphaMMX; |
2227 #endif | 1332 #endif |
2228 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { | 1333 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { |
2229 #if SDL_ALTIVEC_BLITTERS | |
2230 if (SDL_HasAltiVec()) | |
2231 return BlitRGBtoRGBSurfaceAlphaAltivec; | |
2232 #endif | |
2233 return BlitRGBtoRGBSurfaceAlpha; | 1334 return BlitRGBtoRGBSurfaceAlpha; |
2234 } | 1335 } |
2235 } | 1336 } |
2236 #if SDL_ALTIVEC_BLITTERS | 1337 return BlitNtoNSurfaceAlpha; |
2237 if ((sf->BytesPerPixel == 4) && SDL_HasAltiVec()) | |
2238 return Blit32to32SurfaceAlphaAltivec; | |
2239 else | |
2240 #endif | |
2241 return BlitNtoNSurfaceAlpha; | |
2242 | 1338 |
2243 case 3: | 1339 case 3: |
2244 default: | 1340 default: |
2245 return BlitNtoNSurfaceAlpha; | 1341 return BlitNtoNSurfaceAlpha; |
2246 } | 1342 } |
2250 case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: | 1346 case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: |
2251 if (sf->Amask == 0) { | 1347 if (sf->Amask == 0) { |
2252 if (df->BytesPerPixel == 1) | 1348 if (df->BytesPerPixel == 1) |
2253 return BlitNto1SurfaceAlphaKey; | 1349 return BlitNto1SurfaceAlphaKey; |
2254 else | 1350 else |
2255 #if SDL_ALTIVEC_BLITTERS | |
2256 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && | |
2257 SDL_HasAltiVec()) | |
2258 return Blit32to32SurfaceAlphaKeyAltivec; | |
2259 else | |
2260 #endif | |
2261 return BlitNtoNSurfaceAlphaKey; | 1351 return BlitNtoNSurfaceAlphaKey; |
2262 } | 1352 } |
2263 break; | 1353 break; |
2264 } | 1354 } |
2265 | 1355 |