# HG changeset patch # User Sam Lantinga # Date 1297464664 28800 # Node ID 6a65c1fc07af8fb6912bc75d6851a76e79f525b6 # Parent f26314c200719c5a2104e392b7304880427a7d48 Updated CPU detection code for SSE3 and SSE4 and removed obsolete 3DNow! and Altivec support. diff -r f26314c20071 -r 6a65c1fc07af configure.in --- a/configure.in Fri Feb 11 14:42:58 2011 -0800 +++ b/configure.in Fri Feb 11 14:51:04 2011 -0800 @@ -501,33 +501,6 @@ fi fi - AC_ARG_ENABLE(3dnow, -AC_HELP_STRING([--enable-3dnow], [use MMX assembly routines [[default=yes]]]), - , enable_3dnow=yes) - if test x$enable_3dnow = xyes; then - save_CFLAGS="$CFLAGS" - have_gcc_3dnow=no - AC_MSG_CHECKING(for GCC -m3dnow option) - amd3dnow_CFLAGS="-m3dnow" - CFLAGS="$save_CFLAGS $amd3dnow_CFLAGS" - - AC_TRY_COMPILE([ - #include - #ifndef __3dNOW__ - #error Assembler CPP flag not enabled - #endif - ],[ - ],[ - have_gcc_3dnow=yes - ]) - AC_MSG_RESULT($have_gcc_3dnow) - CFLAGS="$save_CFLAGS" - - if test x$have_gcc_3dnow = xyes; then - EXTRA_CFLAGS="$EXTRA_CFLAGS $amd3dnow_CFLAGS" - fi - fi - AC_ARG_ENABLE(sse, AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]), , enable_sse=yes) @@ -599,82 +572,6 @@ EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS" fi fi - - AC_ARG_ENABLE(altivec, -AC_HELP_STRING([--enable-altivec], [use Altivec assembly routines [[default=yes]]]), - , enable_altivec=yes) - if test x$enable_altivec = xyes; then - save_CFLAGS="$CFLAGS" - have_gcc_altivec=no - have_altivec_h_hdr=no - altivec_CFLAGS="-maltivec" - CFLAGS="$save_CFLAGS $altivec_CFLAGS" - - AC_MSG_CHECKING(for Altivec with GCC altivec.h and -maltivec option) - AC_TRY_COMPILE([ - #include - vector unsigned int vzero() { - return vec_splat_u32(0); - } - ],[ - ],[ - have_gcc_altivec=yes - have_altivec_h_hdr=yes - ]) - AC_MSG_RESULT($have_gcc_altivec) - - if test x$have_gcc_altivec = xno; then - AC_MSG_CHECKING(for Altivec with GCC -maltivec option) - AC_TRY_COMPILE([ - vector unsigned int vzero() { - return vec_splat_u32(0); - } - ],[ - ],[ - have_gcc_altivec=yes - ]) - AC_MSG_RESULT($have_gcc_altivec) - fi - - if test x$have_gcc_altivec = xno; then - AC_MSG_CHECKING(for Altivec with GCC altivec.h and -faltivec option) - altivec_CFLAGS="-faltivec" - CFLAGS="$save_CFLAGS $altivec_CFLAGS" - AC_TRY_COMPILE([ - #include - vector unsigned int vzero() { - return vec_splat_u32(0); - } - ],[ - ],[ - have_gcc_altivec=yes - have_altivec_h_hdr=yes - ]) - AC_MSG_RESULT($have_gcc_altivec) - fi - - if test x$have_gcc_altivec = xno; then - AC_MSG_CHECKING(for Altivec with GCC -faltivec option) - AC_TRY_COMPILE([ - vector unsigned int vzero() { - return vec_splat_u32(0); - } - ],[ - ],[ - have_gcc_altivec=yes - ]) - AC_MSG_RESULT($have_gcc_altivec) - fi - CFLAGS="$save_CFLAGS" - - if test x$have_gcc_altivec = xyes; then - AC_DEFINE(SDL_ALTIVEC_BLITTERS) - if test x$have_altivec_h_hdr = xyes; then - AC_DEFINE(HAVE_ALTIVEC_H) - fi - EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS" - fi - fi fi dnl See if the OSS audio interface is supported diff -r f26314c20071 -r 6a65c1fc07af include/SDL_config.h.in --- a/include/SDL_config.h.in Fri Feb 11 14:42:58 2011 -0800 +++ b/include/SDL_config.h.in Fri Feb 11 14:51:04 2011 -0800 @@ -82,7 +82,6 @@ #undef HAVE_MATH_H #undef HAVE_ICONV_H #undef HAVE_SIGNAL_H -#undef HAVE_ALTIVEC_H /* C library functions */ #undef HAVE_MALLOC @@ -303,6 +302,5 @@ /* Enable assembly routines */ #undef SDL_ASSEMBLY_ROUTINES -#undef SDL_ALTIVEC_BLITTERS #endif /* _SDL_config_h */ diff -r f26314c20071 -r 6a65c1fc07af include/SDL_config_macosx.h --- a/include/SDL_config_macosx.h Fri Feb 11 14:42:58 2011 -0800 +++ b/include/SDL_config_macosx.h Fri Feb 11 14:51:04 2011 -0800 @@ -168,8 +168,5 @@ /* Enable assembly routines */ #define SDL_ASSEMBLY_ROUTINES 1 -#ifdef __ppc__ -#define SDL_ALTIVEC_BLITTERS 1 -#endif #endif /* _SDL_config_macosx_h */ diff -r f26314c20071 -r 6a65c1fc07af include/SDL_cpuinfo.h --- a/include/SDL_cpuinfo.h Fri Feb 11 14:42:58 2011 -0800 +++ b/include/SDL_cpuinfo.h Fri Feb 11 14:51:04 2011 -0800 @@ -70,21 +70,6 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void); /** - * This function returns true if the CPU has MMX Ext.\ features. - */ -extern DECLSPEC SDL_bool SDLCALL SDL_HasMMXExt(void); - -/** - * This function returns true if the CPU has 3DNow!\ features. - */ -extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNow(void); - -/** - * This function returns true if the CPU has 3DNow!\ Ext.\ features. - */ -extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNowExt(void); - -/** * This function returns true if the CPU has SSE features. */ extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE(void); @@ -95,9 +80,15 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE2(void); /** - * This function returns true if the CPU has AltiVec features. + * This function returns true if the CPU has SSE3 features. */ -extern DECLSPEC SDL_bool SDLCALL SDL_HasAltiVec(void); +extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE3(void); + +/** + * This function returns true if the CPU has SSE4 features. + */ +extern DECLSPEC SDL_bool SDLCALL SDL_HasSSE4(void); + /* Ends C function definitions when using C++ */ #ifdef __cplusplus diff -r f26314c20071 -r 6a65c1fc07af src/cpuinfo/SDL_cpuinfo.c --- a/src/cpuinfo/SDL_cpuinfo.c Fri Feb 11 14:42:58 2011 -0800 +++ b/src/cpuinfo/SDL_cpuinfo.c Fri Feb 11 14:51:04 2011 -0800 @@ -32,36 +32,17 @@ #include #include #endif -#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__)) -#include /* For AltiVec check */ -#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP -#include -#include -#endif #ifdef __WIN32__ #include "../core/windows/SDL_windows.h" #endif #define CPU_HAS_RDTSC 0x00000001 #define CPU_HAS_MMX 0x00000002 -#define CPU_HAS_MMXEXT 0x00000004 -#define CPU_HAS_3DNOW 0x00000010 -#define CPU_HAS_3DNOWEXT 0x00000020 -#define CPU_HAS_SSE 0x00000040 -#define CPU_HAS_SSE2 0x00000080 -#define CPU_HAS_ALTIVEC 0x00000100 +#define CPU_HAS_SSE 0x00000010 +#define CPU_HAS_SSE2 0x00000020 +#define CPU_HAS_SSE3 0x00000040 +#define CPU_HAS_SSE4 0x00000080 -#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ -/* This is the brute force way of detecting instruction sets... - the idea is borrowed from the libmpeg2 library - thanks! - */ -static jmp_buf jmpbuf; -static void -illegal_instruction(int sig) -{ - longjmp(jmpbuf, 1); -} -#endif /* HAVE_SETJMP */ static __inline__ int CPU_haveCPUID(void) @@ -202,20 +183,6 @@ } static __inline__ int -CPU_getCPUIDFeaturesExt(void) -{ - int features = 0; - int a, b, c, d; - - cpuid(0x80000000, a, b, c, d); - if (a >= 0x80000001) { - cpuid(0x80000001, a, b, c, d); - features = d; - } - return features; -} - -static __inline__ int CPU_haveRDTSC(void) { if (CPU_haveCPUID()) { @@ -234,33 +201,6 @@ } static __inline__ int -CPU_haveMMXExt(void) -{ - if (CPU_haveCPUID()) { - return (CPU_getCPUIDFeaturesExt() & 0x00400000); - } - return 0; -} - -static __inline__ int -CPU_have3DNow(void) -{ - if (CPU_haveCPUID()) { - return (CPU_getCPUIDFeaturesExt() & 0x80000000); - } - return 0; -} - -static __inline__ int -CPU_have3DNowExt(void) -{ - if (CPU_haveCPUID()) { - return (CPU_getCPUIDFeaturesExt() & 0x40000000); - } - return 0; -} - -static __inline__ int CPU_haveSSE(void) { if (CPU_haveCPUID()) { @@ -279,26 +219,33 @@ } static __inline__ int -CPU_haveAltiVec(void) +CPU_haveSSE3(void) { - volatile int altivec = 0; -#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__)) - int selectors[2] = { CTL_HW, HW_VECTORUNIT }; - int hasVectorUnit = 0; - size_t length = sizeof(hasVectorUnit); - int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); - if (0 == error) - altivec = (hasVectorUnit != 0); -#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP - void (*handler) (int sig); - handler = signal(SIGILL, illegal_instruction); - if (setjmp(jmpbuf) == 0) { - asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1)); - altivec = 1; + if (CPU_haveCPUID()) { + int a, b, c, d; + + cpuid(0, a, b, c, d); + if (a >= 1) { + cpuid(1, a, b, c, d); + return (c & 0x00000001); + } } - signal(SIGILL, handler); -#endif - return altivec; + return 0; +} + +static __inline__ int +CPU_haveSSE4(void) +{ + if (CPU_haveCPUID()) { + int a, b, c, d; + + cpuid(0, a, b, c, d); + if (a >= 1) { + cpuid(1, a, b, c, d); + return (c & 0x00000100); + } + } + return 0; } static int SDL_CPUCount = 0; @@ -471,23 +418,17 @@ if (CPU_haveMMX()) { SDL_CPUFeatures |= CPU_HAS_MMX; } - if (CPU_haveMMXExt()) { - SDL_CPUFeatures |= CPU_HAS_MMXEXT; - } - if (CPU_have3DNow()) { - SDL_CPUFeatures |= CPU_HAS_3DNOW; - } - if (CPU_have3DNowExt()) { - SDL_CPUFeatures |= CPU_HAS_3DNOWEXT; - } if (CPU_haveSSE()) { SDL_CPUFeatures |= CPU_HAS_SSE; } if (CPU_haveSSE2()) { SDL_CPUFeatures |= CPU_HAS_SSE2; } - if (CPU_haveAltiVec()) { - SDL_CPUFeatures |= CPU_HAS_ALTIVEC; + if (CPU_haveSSE3()) { + SDL_CPUFeatures |= CPU_HAS_SSE3; + } + if (CPU_haveSSE4()) { + SDL_CPUFeatures |= CPU_HAS_SSE4; } } return SDL_CPUFeatures; @@ -512,33 +453,6 @@ } SDL_bool -SDL_HasMMXExt(void) -{ - if (SDL_GetCPUFeatures() & CPU_HAS_MMXEXT) { - return SDL_TRUE; - } - return SDL_FALSE; -} - -SDL_bool -SDL_Has3DNow(void) -{ - if (SDL_GetCPUFeatures() & CPU_HAS_3DNOW) { - return SDL_TRUE; - } - return SDL_FALSE; -} - -SDL_bool -SDL_Has3DNowExt(void) -{ - if (SDL_GetCPUFeatures() & CPU_HAS_3DNOWEXT) { - return SDL_TRUE; - } - return SDL_FALSE; -} - -SDL_bool SDL_HasSSE(void) { if (SDL_GetCPUFeatures() & CPU_HAS_SSE) { @@ -557,9 +471,18 @@ } SDL_bool -SDL_HasAltiVec(void) +SDL_HasSSE3(void) { - if (SDL_GetCPUFeatures() & CPU_HAS_ALTIVEC) { + if (SDL_GetCPUFeatures() & CPU_HAS_SSE3) { + return SDL_TRUE; + } + return SDL_FALSE; +} + +SDL_bool +SDL_HasSSE4(void) +{ + if (SDL_GetCPUFeatures() & CPU_HAS_SSE4) { return SDL_TRUE; } return SDL_FALSE; @@ -578,12 +501,10 @@ printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize()); printf("RDTSC: %d\n", SDL_HasRDTSC()); printf("MMX: %d\n", SDL_HasMMX()); - printf("MMXExt: %d\n", SDL_HasMMXExt()); - printf("3DNow: %d\n", SDL_Has3DNow()); - printf("3DNowExt: %d\n", SDL_Has3DNowExt()); printf("SSE: %d\n", SDL_HasSSE()); printf("SSE2: %d\n", SDL_HasSSE2()); - printf("AltiVec: %d\n", SDL_HasAltiVec()); + printf("SSE3: %d\n", SDL_HasSSE3()); + printf("SSE4: %d\n", SDL_HasSSE4()); return 0; } diff -r f26314c20071 -r 6a65c1fc07af src/video/SDL_blit.c --- a/src/video/SDL_blit.c Fri Feb 11 14:42:58 2011 -0800 +++ b/src/video/SDL_blit.c Fri Feb 11 14:51:04 2011 -0800 @@ -100,30 +100,6 @@ return (okay ? 0 : -1); } -#ifdef __MACOSX__ -#include - -static SDL_bool -SDL_UseAltivecPrefetch() -{ - const char key[] = "hw.l3cachesize"; - u_int64_t result = 0; - size_t typeSize = sizeof(result); - - if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) { - return SDL_TRUE; - } else { - return SDL_FALSE; - } -} -#else -static SDL_bool -SDL_UseAltivecPrefetch() -{ - /* Just guess G4 */ - return SDL_TRUE; -} -#endif /* __MACOSX__ */ static SDL_BlitFunc SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags, @@ -145,22 +121,12 @@ if (SDL_HasMMX()) { features |= SDL_CPU_MMX; } - if (SDL_Has3DNow()) { - features |= SDL_CPU_3DNOW; - } if (SDL_HasSSE()) { features |= SDL_CPU_SSE; } if (SDL_HasSSE2()) { features |= SDL_CPU_SSE2; } - if (SDL_HasAltiVec()) { - if (SDL_UseAltivecPrefetch()) { - features |= SDL_CPU_ALTIVEC_PREFETCH; - } else { - features |= SDL_CPU_ALTIVEC_NOPREFETCH; - } - } } } diff -r f26314c20071 -r 6a65c1fc07af src/video/SDL_blit.h --- a/src/video/SDL_blit.h Fri Feb 11 14:42:58 2011 -0800 +++ b/src/video/SDL_blit.h Fri Feb 11 14:51:04 2011 -0800 @@ -34,9 +34,6 @@ #ifdef __MMX__ #include #endif -#ifdef __3dNOW__ -#include -#endif #ifdef __SSE__ #include #endif @@ -65,11 +62,8 @@ /* SDL blit CPU flags */ #define SDL_CPU_ANY 0x00000000 #define SDL_CPU_MMX 0x00000001 -#define SDL_CPU_3DNOW 0x00000002 #define SDL_CPU_SSE 0x00000004 #define SDL_CPU_SSE2 0x00000008 -#define SDL_CPU_ALTIVEC_PREFETCH 0x00000010 -#define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000020 typedef struct { diff -r f26314c20071 -r 6a65c1fc07af src/video/SDL_blit_A.c --- a/src/video/SDL_blit_A.c Fri Feb 11 14:42:58 2011 -0800 +++ b/src/video/SDL_blit_A.c Fri Feb 11 14:51:04 2011 -0800 @@ -419,806 +419,6 @@ #endif /* __MMX__ */ -#if SDL_ALTIVEC_BLITTERS -#if __MWERKS__ -#pragma altivec_model on -#endif -#if HAVE_ALTIVEC_H -#include -#endif -#include - -#if (defined(__MACOSX__) && (__GNUC__ < 4)) -#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ - (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) -#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ - (vector unsigned short) ( a,b,c,d,e,f,g,h ) -#else -#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ - (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } -#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ - (vector unsigned short) { a,b,c,d,e,f,g,h } -#endif - -#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) -#define VECPRINT(msg, v) do { \ - vector unsigned int tmpvec = (vector unsigned int)(v); \ - unsigned int *vp = (unsigned int *)&tmpvec; \ - printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ -} while (0) - -/* the permuation vector that takes the high bytes out of all the appropriate shorts - (vector unsigned char)( - 0x00, 0x10, 0x02, 0x12, - 0x04, 0x14, 0x06, 0x16, - 0x08, 0x18, 0x0A, 0x1A, - 0x0C, 0x1C, 0x0E, 0x1E ); -*/ -#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F))) -#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12))) -#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) -#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ - ? vec_lvsl(0, src) \ - : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) - - -#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ - /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ - vector unsigned short vtemp1 = vec_mule(vs, valpha); \ - /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ - vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ - /* valpha2 is 255-alpha */ \ - vector unsigned char valpha2 = vec_nor(valpha, valpha); \ - /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \ - vector unsigned short vtemp3 = vec_mule(vd, valpha2); \ - /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \ - vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \ - /* add source and dest */ \ - vtemp1 = vec_add(vtemp1, vtemp3); \ - vtemp2 = vec_add(vtemp2, vtemp4); \ - /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \ - vtemp1 = vec_add(vtemp1, v1_16); \ - vtemp3 = vec_sr(vtemp1, v8_16); \ - vtemp1 = vec_add(vtemp1, vtemp3); \ - /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \ - vtemp2 = vec_add(vtemp2, v1_16); \ - vtemp4 = vec_sr(vtemp2, v8_16); \ - vtemp2 = vec_add(vtemp2, vtemp4); \ - /* (>>8) and get ARGBARGBARGBARGB */ \ - vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ -} while (0) - -/* Calculate the permute vector used for 32->32 swizzling */ -static vector unsigned char -calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt) -{ - /* - * We have to assume that the bits that aren't used by other - * colors is alpha, and it's one complete byte, since some formats - * leave alpha with a zero mask, but we should still swizzle the bits. - */ - /* ARGB */ - const static struct SDL_PixelFormat default_pixel_format = { - NULL, 0, 0, - 0, 0, 0, 0, - 16, 8, 0, 24, - 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 - }; - if (!srcfmt) { - srcfmt = &default_pixel_format; - } - if (!dstfmt) { - dstfmt = &default_pixel_format; - } - const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00, - 0x04, 0x04, 0x04, 0x04, - 0x08, 0x08, 0x08, 0x08, - 0x0C, 0x0C, 0x0C, - 0x0C); - vector unsigned char vswiz; - vector unsigned int srcvec; -#define RESHIFT(X) (3 - ((X) >> 3)) - Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); - Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); - Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); - Uint32 amask; - /* Use zero for alpha if either surface doesn't have alpha */ - if (dstfmt->Amask) { - amask = - ((srcfmt->Amask) ? RESHIFT(srcfmt-> - Ashift) : 0x10) << (dstfmt->Ashift); - } else { - amask = - 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ - 0xFFFFFFFF); - } -#undef RESHIFT - ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask); - vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0)); - return (vswiz); -} - -static void -Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info) -{ - int height = info->dst_h; - Uint8 *src = (Uint8 *) info->src; - int srcskip = info->src_skip; - Uint8 *dst = (Uint8 *) info->dst; - int dstskip = info->dst_skip; - SDL_PixelFormat *srcfmt = info->src_fmt; - - vector unsigned char v0 = vec_splat_u8(0); - vector unsigned short v8_16 = vec_splat_u16(8); - vector unsigned short v1_16 = vec_splat_u16(1); - vector unsigned short v2_16 = vec_splat_u16(2); - vector unsigned short v3_16 = vec_splat_u16(3); - vector unsigned int v8_32 = vec_splat_u32(8); - vector unsigned int v16_32 = vec_add(v8_32, v8_32); - vector unsigned short v3f = - VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f, - 0x003f, 0x003f, 0x003f, 0x003f); - vector unsigned short vfc = - VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc, - 0x00fc, 0x00fc, 0x00fc, 0x00fc); - - /* - 0x10 - 0x1f is the alpha - 0x00 - 0x0e evens are the red - 0x01 - 0x0f odds are zero - */ - vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01, - 0x10, 0x02, 0x01, 0x01, - 0x10, 0x04, 0x01, 0x01, - 0x10, 0x06, 0x01, - 0x01); - vector unsigned char vredalpha2 = - (vector unsigned char) (vec_add((vector unsigned int) vredalpha1, - vec_sl(v8_32, v16_32)) - ); - /* - 0x00 - 0x0f is ARxx ARxx ARxx ARxx - 0x11 - 0x0f odds are blue - */ - vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11, - 0x04, 0x05, 0x06, 0x13, - 0x08, 0x09, 0x0a, 0x15, - 0x0c, 0x0d, 0x0e, 0x17); - vector unsigned char vblue2 = - (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32) - ); - /* - 0x00 - 0x0f is ARxB ARxB ARxB ARxB - 0x10 - 0x0e evens are green - */ - vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03, - 0x04, 0x05, 0x12, 0x07, - 0x08, 0x09, 0x14, 0x0b, - 0x0c, 0x0d, 0x16, 0x0f); - vector unsigned char vgreen2 = - (vector unsigned - char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32)) - ); - vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06, - 0x00, 0x0a, 0x00, 0x0e, - 0x00, 0x12, 0x00, 0x16, - 0x00, 0x1a, 0x00, 0x1e); - vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); - vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); - vector unsigned char valphaPermute = - vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); - - vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7); - vf800 = vec_sl(vf800, vec_splat_u16(8)); - - while (height--) { - int extrawidth; - vector unsigned char valigner; - vector unsigned char vsrc; - vector unsigned char voverflow; - int width = info->dst_w; - -#define ONE_PIXEL_BLEND(condition, widthvar) \ - while (condition) { \ - Uint32 Pixel; \ - unsigned sR, sG, sB, dR, dG, dB, sA; \ - DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \ - if(sA) { \ - unsigned short dstpixel = *((unsigned short *)dst); \ - dR = (dstpixel >> 8) & 0xf8; \ - dG = (dstpixel >> 3) & 0xfc; \ - dB = (dstpixel << 3) & 0xf8; \ - ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ - *((unsigned short *)dst) = ( \ - ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \ - ); \ - } \ - src += 4; \ - dst += 2; \ - widthvar--; \ - } - ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); - extrawidth = (width % 8); - valigner = VEC_ALIGNER(src); - vsrc = (vector unsigned char) vec_ld(0, src); - width -= extrawidth; - while (width) { - vector unsigned char valpha; - vector unsigned char vsrc1, vsrc2; - vector unsigned char vdst1, vdst2; - vector unsigned short vR, vG, vB; - vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; - - /* Load 8 pixels from src as ARGB */ - voverflow = (vector unsigned char) vec_ld(15, src); - vsrc = vec_perm(vsrc, voverflow, valigner); - vsrc1 = vec_perm(vsrc, vsrc, vpermute); - src += 16; - vsrc = (vector unsigned char) vec_ld(15, src); - voverflow = vec_perm(voverflow, vsrc, valigner); - vsrc2 = vec_perm(voverflow, voverflow, vpermute); - src += 16; - - /* Load 8 pixels from dst as XRGB */ - voverflow = vec_ld(0, dst); - vR = vec_and((vector unsigned short) voverflow, vf800); - vB = vec_sl((vector unsigned short) voverflow, v3_16); - vG = vec_sl(vB, v2_16); - vdst1 = - (vector unsigned char) vec_perm((vector unsigned char) vR, - (vector unsigned char) vR, - vredalpha1); - vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1); - vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1); - vdst2 = - (vector unsigned char) vec_perm((vector unsigned char) vR, - (vector unsigned char) vR, - vredalpha2); - vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2); - vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2); - - /* Alpha blend 8 pixels as ARGB */ - valpha = vec_perm(vsrc1, v0, valphaPermute); - VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, - v8_16); - valpha = vec_perm(vsrc2, v0, valphaPermute); - VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, - v8_16); - - /* Convert 8 pixels to 565 */ - vpixel = (vector unsigned short) vec_packpx((vector unsigned int) - vdst1, - (vector unsigned int) - vdst2); - vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge); - vgpixel = vec_and(vgpixel, vfc); - vgpixel = vec_sl(vgpixel, v3_16); - vrpixel = vec_sl(vpixel, v1_16); - vrpixel = vec_and(vrpixel, vf800); - vbpixel = vec_and(vpixel, v3f); - vdst1 = - vec_or((vector unsigned char) vrpixel, - (vector unsigned char) vgpixel); - vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel); - - /* Store 8 pixels */ - vec_st(vdst1, 0, dst); - - width -= 8; - dst += 16; - } - ONE_PIXEL_BLEND((extrawidth), extrawidth); -#undef ONE_PIXEL_BLEND - src += srcskip; - dst += dstskip; - } -} - -static void -Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info) -{ - int height = info->dst_h; - Uint32 *srcp = (Uint32 *) info->src; - int srcskip = info->src_skip >> 2; - Uint32 *dstp = (Uint32 *) info->dst; - int dstskip = info->dst_skip >> 2; - SDL_PixelFormat *srcfmt = info->src_fmt; - SDL_PixelFormat *dstfmt = info->dst_fmt; - unsigned sA = info->a; - unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; - Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; - Uint32 ckey = info->colorkey; - vector unsigned char mergePermute; - vector unsigned char vsrcPermute; - vector unsigned char vdstPermute; - vector unsigned char vsdstPermute; - vector unsigned char valpha; - vector unsigned char valphamask; - vector unsigned char vbits; - vector unsigned char v0; - vector unsigned short v1; - vector unsigned short v8; - vector unsigned int vckey; - vector unsigned int vrgbmask; - - mergePermute = VEC_MERGE_PERMUTE(); - v0 = vec_splat_u8(0); - v1 = vec_splat_u16(1); - v8 = vec_splat_u16(8); - - /* set the alpha to 255 on the destination surf */ - valphamask = VEC_ALPHA_MASK(); - - vsrcPermute = calc_swizzle32(srcfmt, NULL); - vdstPermute = calc_swizzle32(NULL, dstfmt); - vsdstPermute = calc_swizzle32(dstfmt, NULL); - - /* set a vector full of alpha and 255-alpha */ - ((unsigned char *) &valpha)[0] = sA; - valpha = vec_splat(valpha, 0); - vbits = (vector unsigned char) vec_splat_s8(-1); - - ckey &= rgbmask; - ((unsigned int *) (char *) &vckey)[0] = ckey; - vckey = vec_splat(vckey, 0); - ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask; - vrgbmask = vec_splat(vrgbmask, 0); - - while (height--) { - int width = info->dst_w; -#define ONE_PIXEL_BLEND(condition, widthvar) \ - while (condition) { \ - Uint32 Pixel; \ - unsigned sR, sG, sB, dR, dG, dB; \ - RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \ - if(sA && Pixel != ckey) { \ - RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ - DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ - ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ - ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ - } \ - dstp++; \ - srcp++; \ - widthvar--; \ - } - ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); - if (width > 0) { - int extrawidth = (width % 4); - vector unsigned char valigner = VEC_ALIGNER(srcp); - vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); - width -= extrawidth; - while (width) { - vector unsigned char vsel; - vector unsigned char voverflow; - vector unsigned char vd; - vector unsigned char vd_orig; - - /* s = *srcp */ - voverflow = (vector unsigned char) vec_ld(15, srcp); - vs = vec_perm(vs, voverflow, valigner); - - /* vsel is set for items that match the key */ - vsel = - (vector unsigned char) vec_and((vector unsigned int) vs, - vrgbmask); - vsel = (vector unsigned char) vec_cmpeq((vector unsigned int) - vsel, vckey); - - /* permute to source format */ - vs = vec_perm(vs, valpha, vsrcPermute); - - /* d = *dstp */ - vd = (vector unsigned char) vec_ld(0, dstp); - vd_orig = vd = vec_perm(vd, v0, vsdstPermute); - - VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); - - /* set the alpha channel to full on */ - vd = vec_or(vd, valphamask); - - /* mask out color key */ - vd = vec_sel(vd, vd_orig, vsel); - - /* permute to dest format */ - vd = vec_perm(vd, vbits, vdstPermute); - - /* *dstp = res */ - vec_st((vector unsigned int) vd, 0, dstp); - - srcp += 4; - dstp += 4; - width -= 4; - vs = voverflow; - } - ONE_PIXEL_BLEND((extrawidth), extrawidth); - } -#undef ONE_PIXEL_BLEND - - srcp += srcskip; - dstp += dstskip; - } -} - - -static void -Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info) -{ - int width = info->dst_w; - int height = info->dst_h; - Uint32 *srcp = (Uint32 *) info->src; - int srcskip = info->src_skip >> 2; - Uint32 *dstp = (Uint32 *) info->dst; - int dstskip = info->dst_skip >> 2; - SDL_PixelFormat *srcfmt = info->src_fmt; - SDL_PixelFormat *dstfmt = info->dst_fmt; - vector unsigned char mergePermute; - vector unsigned char valphaPermute; - vector unsigned char vsrcPermute; - vector unsigned char vdstPermute; - vector unsigned char vsdstPermute; - vector unsigned char valphamask; - vector unsigned char vpixelmask; - vector unsigned char v0; - vector unsigned short v1; - vector unsigned short v8; - - v0 = vec_splat_u8(0); - v1 = vec_splat_u16(1); - v8 = vec_splat_u16(8); - mergePermute = VEC_MERGE_PERMUTE(); - valphamask = VEC_ALPHA_MASK(); - valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); - vpixelmask = vec_nor(valphamask, v0); - vsrcPermute = calc_swizzle32(srcfmt, NULL); - vdstPermute = calc_swizzle32(NULL, dstfmt); - vsdstPermute = calc_swizzle32(dstfmt, NULL); - - while (height--) { - width = info->dst_w; -#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ - Uint32 Pixel; \ - unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ - DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ - if(sA) { \ - DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \ - ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ - ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ - } \ - ++srcp; \ - ++dstp; \ - widthvar--; \ - } - ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); - if (width > 0) { - /* vsrcPermute */ - /* vdstPermute */ - int extrawidth = (width % 4); - vector unsigned char valigner = VEC_ALIGNER(srcp); - vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); - width -= extrawidth; - while (width) { - vector unsigned char voverflow; - vector unsigned char vd; - vector unsigned char valpha; - vector unsigned char vdstalpha; - /* s = *srcp */ - voverflow = (vector unsigned char) vec_ld(15, srcp); - vs = vec_perm(vs, voverflow, valigner); - vs = vec_perm(vs, v0, vsrcPermute); - - valpha = vec_perm(vs, v0, valphaPermute); - - /* d = *dstp */ - vd = (vector unsigned char) vec_ld(0, dstp); - vd = vec_perm(vd, v0, vsdstPermute); - vdstalpha = vec_and(vd, valphamask); - - VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); - - /* set the alpha to the dest alpha */ - vd = vec_and(vd, vpixelmask); - vd = vec_or(vd, vdstalpha); - vd = vec_perm(vd, v0, vdstPermute); - - /* *dstp = res */ - vec_st((vector unsigned int) vd, 0, dstp); - - srcp += 4; - dstp += 4; - width -= 4; - vs = voverflow; - - } - ONE_PIXEL_BLEND((extrawidth), extrawidth); - } - srcp += srcskip; - dstp += dstskip; -#undef ONE_PIXEL_BLEND - } -} - -/* fast ARGB888->(A)RGB888 blending with pixel alpha */ -static void -BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info) -{ - int width = info->dst_w; - int height = info->dst_h; - Uint32 *srcp = (Uint32 *) info->src; - int srcskip = info->src_skip >> 2; - Uint32 *dstp = (Uint32 *) info->dst; - int dstskip = info->dst_skip >> 2; - vector unsigned char mergePermute; - vector unsigned char valphaPermute; - vector unsigned char valphamask; - vector unsigned char vpixelmask; - vector unsigned char v0; - vector unsigned short v1; - vector unsigned short v8; - v0 = vec_splat_u8(0); - v1 = vec_splat_u16(1); - v8 = vec_splat_u16(8); - mergePermute = VEC_MERGE_PERMUTE(); - valphamask = VEC_ALPHA_MASK(); - valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC)); - - - vpixelmask = vec_nor(valphamask, v0); - while (height--) { - width = info->dst_w; -#define ONE_PIXEL_BLEND(condition, widthvar) \ - while ((condition)) { \ - Uint32 dalpha; \ - Uint32 d; \ - Uint32 s1; \ - Uint32 d1; \ - Uint32 s = *srcp; \ - Uint32 alpha = s >> 24; \ - if(alpha) { \ - if(alpha == SDL_ALPHA_OPAQUE) { \ - *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \ - } else { \ - d = *dstp; \ - dalpha = d & 0xff000000; \ - s1 = s & 0xff00ff; \ - d1 = d & 0xff00ff; \ - d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ - s &= 0xff00; \ - d &= 0xff00; \ - d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ - *dstp = d1 | d | dalpha; \ - } \ - } \ - ++srcp; \ - ++dstp; \ - widthvar--; \ - } - ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); - if (width > 0) { - int extrawidth = (width % 4); - vector unsigned char valigner = VEC_ALIGNER(srcp); - vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); - width -= extrawidth; - while (width) { - vector unsigned char voverflow; - vector unsigned char vd; - vector unsigned char valpha; - vector unsigned char vdstalpha; - /* s = *srcp */ - voverflow = (vector unsigned char) vec_ld(15, srcp); - vs = vec_perm(vs, voverflow, valigner); - - valpha = vec_perm(vs, v0, valphaPermute); - - /* d = *dstp */ - vd = (vector unsigned char) vec_ld(0, dstp); - vdstalpha = vec_and(vd, valphamask); - - VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); - - /* set the alpha to the dest alpha */ - vd = vec_and(vd, vpixelmask); - vd = vec_or(vd, vdstalpha); - - /* *dstp = res */ - vec_st((vector unsigned int) vd, 0, dstp); - - srcp += 4; - dstp += 4; - width -= 4; - vs = voverflow; - } - ONE_PIXEL_BLEND((extrawidth), extrawidth); - } - srcp += srcskip; - dstp += dstskip; - } -#undef ONE_PIXEL_BLEND -} - -static void -Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info) -{ - /* XXX : 6 */ - int height = info->dst_h; - Uint32 *srcp = (Uint32 *) info->src; - int srcskip = info->src_skip >> 2; - Uint32 *dstp = (Uint32 *) info->dst; - int dstskip = info->dst_skip >> 2; - SDL_PixelFormat *srcfmt = info->src_fmt; - SDL_PixelFormat *dstfmt = info->dst_fmt; - unsigned sA = info->a; - unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; - vector unsigned char mergePermute; - vector unsigned char vsrcPermute; - vector unsigned char vdstPermute; - vector unsigned char vsdstPermute; - vector unsigned char valpha; - vector unsigned char valphamask; - vector unsigned char vbits; - vector unsigned short v1; - vector unsigned short v8; - - mergePermute = VEC_MERGE_PERMUTE(); - v1 = vec_splat_u16(1); - v8 = vec_splat_u16(8); - - /* set the alpha to 255 on the destination surf */ - valphamask = VEC_ALPHA_MASK(); - - vsrcPermute = calc_swizzle32(srcfmt, NULL); - vdstPermute = calc_swizzle32(NULL, dstfmt); - vsdstPermute = calc_swizzle32(dstfmt, NULL); - - /* set a vector full of alpha and 255-alpha */ - ((unsigned char *) &valpha)[0] = sA; - valpha = vec_splat(valpha, 0); - vbits = (vector unsigned char) vec_splat_s8(-1); - - while (height--) { - int width = info->dst_w; -#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ - Uint32 Pixel; \ - unsigned sR, sG, sB, dR, dG, dB; \ - DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ - DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ - ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ - ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ - ++srcp; \ - ++dstp; \ - widthvar--; \ - } - ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); - if (width > 0) { - int extrawidth = (width % 4); - vector unsigned char valigner = VEC_ALIGNER(srcp); - vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); - width -= extrawidth; - while (width) { - vector unsigned char voverflow; - vector unsigned char vd; - - /* s = *srcp */ - voverflow = (vector unsigned char) vec_ld(15, srcp); - vs = vec_perm(vs, voverflow, valigner); - vs = vec_perm(vs, valpha, vsrcPermute); - - /* d = *dstp */ - vd = (vector unsigned char) vec_ld(0, dstp); - vd = vec_perm(vd, vd, vsdstPermute); - - VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); - - /* set the alpha channel to full on */ - vd = vec_or(vd, valphamask); - vd = vec_perm(vd, vbits, vdstPermute); - - /* *dstp = res */ - vec_st((vector unsigned int) vd, 0, dstp); - - srcp += 4; - dstp += 4; - width -= 4; - vs = voverflow; - } - ONE_PIXEL_BLEND((extrawidth), extrawidth); - } -#undef ONE_PIXEL_BLEND - - srcp += srcskip; - dstp += dstskip; - } - -} - - -/* fast RGB888->(A)RGB888 blending */ -static void -BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info) -{ - unsigned alpha = info->a; - int height = info->dst_h; - Uint32 *srcp = (Uint32 *) info->src; - int srcskip = info->src_skip >> 2; - Uint32 *dstp = (Uint32 *) info->dst; - int dstskip = info->dst_skip >> 2; - vector unsigned char mergePermute; - vector unsigned char valpha; - vector unsigned char valphamask; - vector unsigned short v1; - vector unsigned short v8; - - mergePermute = VEC_MERGE_PERMUTE(); - v1 = vec_splat_u16(1); - v8 = vec_splat_u16(8); - - /* set the alpha to 255 on the destination surf */ - valphamask = VEC_ALPHA_MASK(); - - /* set a vector full of alpha and 255-alpha */ - ((unsigned char *) &valpha)[0] = alpha; - valpha = vec_splat(valpha, 0); - - while (height--) { - int width = info->dst_w; -#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ - Uint32 s = *srcp; \ - Uint32 d = *dstp; \ - Uint32 s1 = s & 0xff00ff; \ - Uint32 d1 = d & 0xff00ff; \ - d1 = (d1 + ((s1 - d1) * alpha >> 8)) \ - & 0xff00ff; \ - s &= 0xff00; \ - d &= 0xff00; \ - d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ - *dstp = d1 | d | 0xff000000; \ - ++srcp; \ - ++dstp; \ - widthvar--; \ - } - ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); - if (width > 0) { - int extrawidth = (width % 4); - vector unsigned char valigner = VEC_ALIGNER(srcp); - vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp); - width -= extrawidth; - while (width) { - vector unsigned char voverflow; - vector unsigned char vd; - - /* s = *srcp */ - voverflow = (vector unsigned char) vec_ld(15, srcp); - vs = vec_perm(vs, voverflow, valigner); - - /* d = *dstp */ - vd = (vector unsigned char) vec_ld(0, dstp); - - VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); - - /* set the alpha channel to full on */ - vd = vec_or(vd, valphamask); - - /* *dstp = res */ - vec_st((vector unsigned int) vd, 0, dstp); - - srcp += 4; - dstp += 4; - width -= 4; - vs = voverflow; - } - ONE_PIXEL_BLEND((extrawidth), extrawidth); - } -#undef ONE_PIXEL_BLEND - - srcp += srcskip; - dstp += dstskip; - } -} - -#if __MWERKS__ -#pragma altivec_model off -#endif -#endif /* SDL_ALTIVEC_BLITTERS */ - /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info) @@ -1338,79 +538,6 @@ } } -#ifdef __3dNOW__ -/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ -static void -BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) -{ - int width = info->dst_w; - int height = info->dst_h; - Uint32 *srcp = (Uint32 *) info->src; - int srcskip = info->src_skip >> 2; - Uint32 *dstp = (Uint32 *) info->dst; - int dstskip = info->dst_skip >> 2; - SDL_PixelFormat *sf = info->src_fmt; - Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; - Uint32 amask = sf->Amask; - Uint32 ashift = sf->Ashift; - Uint64 multmask; - - __m64 src1, dst1, mm_alpha, mm_zero, dmask; - - mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ - multmask = 0xFFFF; - multmask <<= (ashift * 2); - multmask = ~multmask; - dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */ - - while (height--) { - /* *INDENT-OFF* */ - DUFFS_LOOP4({ - Uint32 alpha; - - _m_prefetch(srcp + 16); - _m_prefetch(dstp + 16); - - alpha = *srcp & amask; - if (alpha == 0) { - /* do nothing */ - } else if (alpha == amask) { - /* copy RGB, keep dst alpha */ - *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); - } else { - src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ - src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ - - dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ - dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ - - mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ - mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ - mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ - mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ - mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ - - /* blend */ - src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */ - src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */ - src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ - dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */ - dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ - - *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ - } - ++srcp; - ++dstp; - }, width); - /* *INDENT-ON* */ - srcp += srcskip; - dstp += dstskip; - } - _mm_empty(); -} - -#endif /* __MMX__ */ - /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ /* blend a single 16 bit pixel at 50% */ @@ -2130,17 +1257,10 @@ return BlitNto1PixelAlpha; case 2: -#if SDL_ALTIVEC_BLITTERS - if (sf->BytesPerPixel == 4 - && df->Gmask == 0x7e0 && df->Bmask == 0x1f - && SDL_HasAltiVec()) - return Blit32to565PixelAlphaAltivec; - else -#endif - if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 - && sf->Gmask == 0xff00 - && ((sf->Rmask == 0xff && df->Rmask == 0x1f) - || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { + if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 + && sf->Gmask == 0xff00 + && ((sf->Rmask == 0xff && df->Rmask == 0x1f) + || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { if (df->Gmask == 0x7e0) return BlitARGBto565PixelAlpha; else if (df->Gmask == 0x3e0) @@ -2152,35 +1272,20 @@ if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { -#if defined(__MMX__) || defined(__3dNOW__) +#if defined(__MMX__) if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { -#ifdef __3dNOW__ - if (SDL_Has3DNow()) - return BlitRGBtoRGBPixelAlphaMMX3DNOW; -#endif -#ifdef __MMX__ if (SDL_HasMMX()) return BlitRGBtoRGBPixelAlphaMMX; -#endif } -#endif /* __MMX__ || __3dNOW__ */ +#endif /* __MMX__ */ if (sf->Amask == 0xff000000) { -#if SDL_ALTIVEC_BLITTERS - if (SDL_HasAltiVec()) - return BlitRGBtoRGBPixelAlphaAltivec; -#endif return BlitRGBtoRGBPixelAlpha; } } -#if SDL_ALTIVEC_BLITTERS - if (sf->Amask && sf->BytesPerPixel == 4 && SDL_HasAltiVec()) - return Blit32to32PixelAlphaAltivec; - else -#endif - return BlitNtoNPixelAlpha; + return BlitNtoNPixelAlpha; case 3: default: @@ -2226,19 +1331,10 @@ return BlitRGBtoRGBSurfaceAlphaMMX; #endif if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { -#if SDL_ALTIVEC_BLITTERS - if (SDL_HasAltiVec()) - return BlitRGBtoRGBSurfaceAlphaAltivec; -#endif return BlitRGBtoRGBSurfaceAlpha; } } -#if SDL_ALTIVEC_BLITTERS - if ((sf->BytesPerPixel == 4) && SDL_HasAltiVec()) - return Blit32to32SurfaceAlphaAltivec; - else -#endif - return BlitNtoNSurfaceAlpha; + return BlitNtoNSurfaceAlpha; case 3: default: @@ -2252,12 +1348,6 @@ if (df->BytesPerPixel == 1) return BlitNto1SurfaceAlphaKey; else -#if SDL_ALTIVEC_BLITTERS - if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && - SDL_HasAltiVec()) - return Blit32to32SurfaceAlphaKeyAltivec; - else -#endif return BlitNtoNSurfaceAlphaKey; } break; diff -r f26314c20071 -r 6a65c1fc07af src/video/SDL_blit_N.c --- a/src/video/SDL_blit_N.c Fri Feb 11 14:42:58 2011 -0800 +++ b/src/video/SDL_blit_N.c Fri Feb 11 14:51:04 2011 -0800 @@ -28,846 +28,8 @@ /* Functions to blit from N-bit surfaces to other surfaces */ -#if SDL_ALTIVEC_BLITTERS -#if __MWERKS__ -#pragma altivec_model on -#endif -#ifdef HAVE_ALTIVEC_H -#include -#endif -#define assert(X) -#ifdef __MACOSX__ -#include -static size_t -GetL3CacheSize(void) -{ - const char key[] = "hw.l3cachesize"; - u_int64_t result = 0; - size_t typeSize = sizeof(result); - - - int err = sysctlbyname(key, &result, &typeSize, NULL, 0); - if (0 != err) - return 0; - - return result; -} -#else -static size_t -GetL3CacheSize(void) -{ - /* XXX: Just guess G4 */ - return 2097152; -} -#endif /* __MACOSX__ */ - -#if (defined(__MACOSX__) && (__GNUC__ < 4)) -#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ - (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) -#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ - (vector unsigned short) ( a,b,c,d,e,f,g,h ) -#else -#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ - (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } -#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ - (vector unsigned short) { a,b,c,d,e,f,g,h } -#endif - -#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) -#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \ - ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \ - 0x04+a, 0x04+b, 0x04+c, 0x04+d, \ - 0x08+a, 0x08+b, 0x08+c, 0x08+d, \ - 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d ) - -#define MAKE8888(dstfmt, r, g, b, a) \ - ( ((r<Rshift)&dstfmt->Rmask) | \ - ((g<Gshift)&dstfmt->Gmask) | \ - ((b<Bshift)&dstfmt->Bmask) | \ - ((a<Ashift)&dstfmt->Amask) ) - -/* - * Data Stream Touch...Altivec cache prefetching. - * - * Don't use this on a G5...however, the speed boost is very significant - * on a G4. - */ -#define DST_CHAN_SRC 1 -#define DST_CHAN_DEST 2 - -/* macro to set DST control word value... */ -#define DST_CTRL(size, count, stride) \ - (((size) << 24) | ((count) << 16) | (stride)) - -#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ - ? vec_lvsl(0, src) \ - : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) - -/* Calculate the permute vector used for 32->32 swizzling */ -static vector unsigned char -calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt) -{ - /* - * We have to assume that the bits that aren't used by other - * colors is alpha, and it's one complete byte, since some formats - * leave alpha with a zero mask, but we should still swizzle the bits. - */ - /* ARGB */ - const static const struct SDL_PixelFormat default_pixel_format = { - NULL, 32, 4, - 0, 0, 0, 0, - 16, 8, 0, 24, - 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 - }; - if (!srcfmt) { - srcfmt = &default_pixel_format; - } - if (!dstfmt) { - dstfmt = &default_pixel_format; - } - const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00, - 0x04, 0x04, 0x04, 0x04, - 0x08, 0x08, 0x08, 0x08, - 0x0C, 0x0C, 0x0C, - 0x0C); - vector unsigned char vswiz; - vector unsigned int srcvec; -#define RESHIFT(X) (3 - ((X) >> 3)) - Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); - Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); - Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); - Uint32 amask; - /* Use zero for alpha if either surface doesn't have alpha */ - if (dstfmt->Amask) { - amask = - ((srcfmt->Amask) ? RESHIFT(srcfmt-> - Ashift) : 0x10) << (dstfmt->Ashift); - } else { - amask = - 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ - 0xFFFFFFFF); - } -#undef RESHIFT - ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask); - vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0)); - return (vswiz); -} - -static void Blit_RGB888_RGB565(SDL_BlitInfo * info); -static void -Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info) -{ - int height = info->dst_h; - Uint8 *src = (Uint8 *) info->src; - int srcskip = info->src_skip; - Uint8 *dst = (Uint8 *) info->dst; - int dstskip = info->dst_skip; - SDL_PixelFormat *srcfmt = info->src_fmt; - vector unsigned char valpha = vec_splat_u8(0); - vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); - vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06, - 0x00, 0x0a, 0x00, 0x0e, - 0x00, 0x12, 0x00, 0x16, - 0x00, 0x1a, 0x00, 0x1e); - vector unsigned short v1 = vec_splat_u16(1); - vector unsigned short v3 = vec_splat_u16(3); - vector unsigned short v3f = - VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f, - 0x003f, 0x003f, 0x003f, 0x003f); - vector unsigned short vfc = - VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc, - 0x00fc, 0x00fc, 0x00fc, 0x00fc); - vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7); - vf800 = vec_sl(vf800, vec_splat_u16(8)); - - while (height--) { - vector unsigned char valigner; - vector unsigned char voverflow; - vector unsigned char vsrc; - - int width = info->dst_w; - int extrawidth; - - /* do scalar until we can align... */ -#define ONE_PIXEL_BLEND(condition, widthvar) \ - while (condition) { \ - Uint32 Pixel; \ - unsigned sR, sG, sB, sA; \ - DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \ - sR, sG, sB, sA); \ - *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \ - ((sG << 3) & 0x000007E0) | \ - ((sB >> 3) & 0x0000001F)); \ - dst += 2; \ - src += 4; \ - widthvar--; \ - } - - ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); - - /* After all that work, here's the vector part! */ - extrawidth = (width % 8); /* trailing unaligned stores */ - width -= extrawidth; - vsrc = vec_ld(0, src); - valigner = VEC_ALIGNER(src); - - while (width) { - vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; - vector unsigned int vsrc1, vsrc2; - vector unsigned char vdst; - - voverflow = vec_ld(15, src); - vsrc = vec_perm(vsrc, voverflow, valigner); - vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute); - src += 16; - vsrc = voverflow; - voverflow = vec_ld(15, src); - vsrc = vec_perm(vsrc, voverflow, valigner); - vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute); - /* 1555 */ - vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2); - vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge); - vgpixel = vec_and(vgpixel, vfc); - vgpixel = vec_sl(vgpixel, v3); - vrpixel = vec_sl(vpixel, v1); - vrpixel = vec_and(vrpixel, vf800); - vbpixel = vec_and(vpixel, v3f); - vdst = - vec_or((vector unsigned char) vrpixel, - (vector unsigned char) vgpixel); - /* 565 */ - vdst = vec_or(vdst, (vector unsigned char) vbpixel); - vec_st(vdst, 0, dst); - - width -= 8; - src += 16; - dst += 16; - vsrc = voverflow; - } - - assert(width == 0); - - /* do scalar until we can align... */ - ONE_PIXEL_BLEND((extrawidth), extrawidth); -#undef ONE_PIXEL_BLEND - - src += srcskip; /* move to next row, accounting for pitch. */ - dst += dstskip; - } - - -} - -static void -Blit_RGB565_32Altivec(SDL_BlitInfo * info) -{ - int height = info->dst_h; - Uint8 *src = (Uint8 *) info->src; - int srcskip = info->src_skip; - Uint8 *dst = (Uint8 *) info->dst; - int dstskip = info->dst_skip; - SDL_PixelFormat *srcfmt = info->src_fmt; - SDL_PixelFormat *dstfmt = info->dst_fmt; - unsigned alpha; - vector unsigned char valpha; - vector unsigned char vpermute; - vector unsigned short vf800; - vector unsigned int v8 = vec_splat_u32(8); - vector unsigned int v16 = vec_add(v8, v8); - vector unsigned short v2 = vec_splat_u16(2); - vector unsigned short v3 = vec_splat_u16(3); - /* - 0x10 - 0x1f is the alpha - 0x00 - 0x0e evens are the red - 0x01 - 0x0f odds are zero - */ - vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01, - 0x10, 0x02, 0x01, 0x01, - 0x10, 0x04, 0x01, 0x01, - 0x10, 0x06, 0x01, - 0x01); - vector unsigned char vredalpha2 = - (vector unsigned - char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16)) - ); - /* - 0x00 - 0x0f is ARxx ARxx ARxx ARxx - 0x11 - 0x0f odds are blue - */ - vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11, - 0x04, 0x05, 0x06, 0x13, - 0x08, 0x09, 0x0a, 0x15, - 0x0c, 0x0d, 0x0e, 0x17); - vector unsigned char vblue2 = - (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8) - ); - /* - 0x00 - 0x0f is ARxB ARxB ARxB ARxB - 0x10 - 0x0e evens are green - */ - vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03, - 0x04, 0x05, 0x12, 0x07, - 0x08, 0x09, 0x14, 0x0b, - 0x0c, 0x0d, 0x16, 0x0f); - vector unsigned char vgreen2 = - (vector unsigned - char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8)) - ); - - - assert(srcfmt->BytesPerPixel == 2); - assert(dstfmt->BytesPerPixel == 4); - - vf800 = (vector unsigned short) vec_splat_u8(-7); - vf800 = vec_sl(vf800, vec_splat_u16(8)); - - if (dstfmt->Amask && info->a) { - ((unsigned char *) &valpha)[0] = alpha = info->a; - valpha = vec_splat(valpha, 0); - } else { - alpha = 0; - valpha = vec_splat_u8(0); - } - - vpermute = calc_swizzle32(NULL, dstfmt); - while (height--) { - vector unsigned char valigner; - vector unsigned char voverflow; - vector unsigned char vsrc; - - int width = info->dst_w; - int extrawidth; - - /* do scalar until we can align... */ -#define ONE_PIXEL_BLEND(condition, widthvar) \ - while (condition) { \ - unsigned sR, sG, sB; \ - unsigned short Pixel = *((unsigned short *)src); \ - sR = (Pixel >> 8) & 0xf8; \ - sG = (Pixel >> 3) & 0xfc; \ - sB = (Pixel << 3) & 0xf8; \ - ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \ - src += 2; \ - dst += 4; \ - widthvar--; \ - } - ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); - - /* After all that work, here's the vector part! */ - extrawidth = (width % 8); /* trailing unaligned stores */ - width -= extrawidth; - vsrc = vec_ld(0, src); - valigner = VEC_ALIGNER(src); - - while (width) { - vector unsigned short vR, vG, vB; - vector unsigned char vdst1, vdst2; - - voverflow = vec_ld(15, src); - vsrc = vec_perm(vsrc, voverflow, valigner); - - vR = vec_and((vector unsigned short) vsrc, vf800); - vB = vec_sl((vector unsigned short) vsrc, v3); - vG = vec_sl(vB, v2); - - vdst1 = - (vector unsigned char) vec_perm((vector unsigned char) vR, - valpha, vredalpha1); - vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1); - vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1); - vdst1 = vec_perm(vdst1, valpha, vpermute); - vec_st(vdst1, 0, dst); - - vdst2 = - (vector unsigned char) vec_perm((vector unsigned char) vR, - valpha, vredalpha2); - vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2); - vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2); - vdst2 = vec_perm(vdst2, valpha, vpermute); - vec_st(vdst2, 16, dst); - - width -= 8; - dst += 32; - src += 16; - vsrc = voverflow; - } - - assert(width == 0); - - - /* do scalar until we can align... */ - ONE_PIXEL_BLEND((extrawidth), extrawidth); -#undef ONE_PIXEL_BLEND - - src += srcskip; /* move to next row, accounting for pitch. */ - dst += dstskip; - } - -} - - -static void -Blit_RGB555_32Altivec(SDL_BlitInfo * info) -{ - int height = info->dst_h; - Uint8 *src = (Uint8 *) info->src; - int srcskip = info->src_skip; - Uint8 *dst = (Uint8 *) info->dst; - int dstskip = info->dst_skip; - SDL_PixelFormat *srcfmt = info->src_fmt; - SDL_PixelFormat *dstfmt = info->dst_fmt; - unsigned alpha; - vector unsigned char valpha; - vector unsigned char vpermute; - vector unsigned short vf800; - vector unsigned int v8 = vec_splat_u32(8); - vector unsigned int v16 = vec_add(v8, v8); - vector unsigned short v1 = vec_splat_u16(1); - vector unsigned short v3 = vec_splat_u16(3); - /* - 0x10 - 0x1f is the alpha - 0x00 - 0x0e evens are the red - 0x01 - 0x0f odds are zero - */ - vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01, - 0x10, 0x02, 0x01, 0x01, - 0x10, 0x04, 0x01, 0x01, - 0x10, 0x06, 0x01, - 0x01); - vector unsigned char vredalpha2 = - (vector unsigned - char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16)) - ); - /* - 0x00 - 0x0f is ARxx ARxx ARxx ARxx - 0x11 - 0x0f odds are blue - */ - vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11, - 0x04, 0x05, 0x06, 0x13, - 0x08, 0x09, 0x0a, 0x15, - 0x0c, 0x0d, 0x0e, 0x17); - vector unsigned char vblue2 = - (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8) - ); - /* - 0x00 - 0x0f is ARxB ARxB ARxB ARxB - 0x10 - 0x0e evens are green - */ - vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03, - 0x04, 0x05, 0x12, 0x07, - 0x08, 0x09, 0x14, 0x0b, - 0x0c, 0x0d, 0x16, 0x0f); - vector unsigned char vgreen2 = - (vector unsigned - char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8)) - ); - - - assert(srcfmt->BytesPerPixel == 2); - assert(dstfmt->BytesPerPixel == 4); - - vf800 = (vector unsigned short) vec_splat_u8(-7); - vf800 = vec_sl(vf800, vec_splat_u16(8)); - - if (dstfmt->Amask && info->a) { - ((unsigned char *) &valpha)[0] = alpha = info->a; - valpha = vec_splat(valpha, 0); - } else { - alpha = 0; - valpha = vec_splat_u8(0); - } - - vpermute = calc_swizzle32(NULL, dstfmt); - while (height--) { - vector unsigned char valigner; - vector unsigned char voverflow; - vector unsigned char vsrc; - - int width = info->dst_w; - int extrawidth; - - /* do scalar until we can align... */ -#define ONE_PIXEL_BLEND(condition, widthvar) \ - while (condition) { \ - unsigned sR, sG, sB; \ - unsigned short Pixel = *((unsigned short *)src); \ - sR = (Pixel >> 7) & 0xf8; \ - sG = (Pixel >> 2) & 0xf8; \ - sB = (Pixel << 3) & 0xf8; \ - ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \ - src += 2; \ - dst += 4; \ - widthvar--; \ - } - ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); - - /* After all that work, here's the vector part! */ - extrawidth = (width % 8); /* trailing unaligned stores */ - width -= extrawidth; - vsrc = vec_ld(0, src); - valigner = VEC_ALIGNER(src); - - while (width) { - vector unsigned short vR, vG, vB; - vector unsigned char vdst1, vdst2; - - voverflow = vec_ld(15, src); - vsrc = vec_perm(vsrc, voverflow, valigner); - - vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800); - vB = vec_sl((vector unsigned short) vsrc, v3); - vG = vec_sl(vB, v3); - - vdst1 = - (vector unsigned char) vec_perm((vector unsigned char) vR, - valpha, vredalpha1); - vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1); - vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1); - vdst1 = vec_perm(vdst1, valpha, vpermute); - vec_st(vdst1, 0, dst); - - vdst2 = - (vector unsigned char) vec_perm((vector unsigned char) vR, - valpha, vredalpha2); - vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2); - vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2); - vdst2 = vec_perm(vdst2, valpha, vpermute); - vec_st(vdst2, 16, dst); - - width -= 8; - dst += 32; - src += 16; - vsrc = voverflow; - } - - assert(width == 0); - - - /* do scalar until we can align... */ - ONE_PIXEL_BLEND((extrawidth), extrawidth); -#undef ONE_PIXEL_BLEND - - src += srcskip; /* move to next row, accounting for pitch. */ - dst += dstskip; - } - -} - -static void BlitNtoNKey(SDL_BlitInfo * info); -static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info); -static void -Blit32to32KeyAltivec(SDL_BlitInfo * info) -{ - int height = info->dst_h; - Uint32 *srcp = (Uint32 *) info->src; - int srcskip = info->src_skip / 4; - Uint32 *dstp = (Uint32 *) info->dst; - int dstskip = info->dst_skip / 4; - SDL_PixelFormat *srcfmt = info->src_fmt; - int srcbpp = srcfmt->BytesPerPixel; - SDL_PixelFormat *dstfmt = info->dst_fmt; - int dstbpp = dstfmt->BytesPerPixel; - int copy_alpha = (srcfmt->Amask && dstfmt->Amask); - unsigned alpha = dstfmt->Amask ? info->a : 0; - Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; - Uint32 ckey = info->colorkey; - vector unsigned int valpha; - vector unsigned char vpermute; - vector unsigned char vzero; - vector unsigned int vckey; - vector unsigned int vrgbmask; - vpermute = calc_swizzle32(srcfmt, dstfmt); - if (info->dst_w < 16) { - if (copy_alpha) { - BlitNtoNKeyCopyAlpha(info); - } else { - BlitNtoNKey(info); - } - return; - } - vzero = vec_splat_u8(0); - if (alpha) { - ((unsigned char *) &valpha)[0] = (unsigned char) alpha; - valpha = - (vector unsigned int) vec_splat((vector unsigned char) valpha, 0); - } else { - valpha = (vector unsigned int) vzero; - } - ckey &= rgbmask; - ((unsigned int *) (char *) &vckey)[0] = ckey; - vckey = vec_splat(vckey, 0); - ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask; - vrgbmask = vec_splat(vrgbmask, 0); - - while (height--) { -#define ONE_PIXEL_BLEND(condition, widthvar) \ - if (copy_alpha) { \ - while (condition) { \ - Uint32 Pixel; \ - unsigned sR, sG, sB, sA; \ - DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \ - sR, sG, sB, sA); \ - if ( (Pixel & rgbmask) != ckey ) { \ - ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \ - sR, sG, sB, sA); \ - } \ - dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \ - srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \ - widthvar--; \ - } \ - } else { \ - while (condition) { \ - Uint32 Pixel; \ - unsigned sR, sG, sB; \ - RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \ - if ( Pixel != ckey ) { \ - RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ - ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \ - sR, sG, sB, alpha); \ - } \ - dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \ - srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \ - widthvar--; \ - } \ - } - int width = info->dst_w; - ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); - assert(width > 0); - if (width > 0) { - int extrawidth = (width % 4); - vector unsigned char valigner = VEC_ALIGNER(srcp); - vector unsigned int vs = vec_ld(0, srcp); - width -= extrawidth; - assert(width >= 4); - while (width) { - vector unsigned char vsel; - vector unsigned int vd; - vector unsigned int voverflow = vec_ld(15, srcp); - /* load the source vec */ - vs = vec_perm(vs, voverflow, valigner); - /* vsel is set for items that match the key */ - vsel = (vector unsigned char) vec_and(vs, vrgbmask); - vsel = (vector unsigned char) vec_cmpeq(vs, vckey); - /* permute the src vec to the dest format */ - vs = vec_perm(vs, valpha, vpermute); - /* load the destination vec */ - vd = vec_ld(0, dstp); - /* select the source and dest into vs */ - vd = (vector unsigned int) vec_sel((vector unsigned char) vs, - (vector unsigned char) vd, - vsel); - - vec_st(vd, 0, dstp); - srcp += 4; - width -= 4; - dstp += 4; - vs = voverflow; - } - ONE_PIXEL_BLEND((extrawidth), extrawidth); -#undef ONE_PIXEL_BLEND - srcp += srcskip; - dstp += dstskip; - } - } -} - -/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */ -/* Use this on a G5 */ -static void -ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info) -{ - int height = info->dst_h; - Uint32 *src = (Uint32 *) info->src; - int srcskip = info->src_skip / 4; - Uint32 *dst = (Uint32 *) info->dst; - int dstskip = info->dst_skip / 4; - SDL_PixelFormat *srcfmt = info->src_fmt; - SDL_PixelFormat *dstfmt = info->dst_fmt; - vector unsigned int vzero = vec_splat_u32(0); - vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt); - if (dstfmt->Amask && !srcfmt->Amask) { - if (info->a) { - vector unsigned char valpha; - ((unsigned char *) &valpha)[0] = info->a; - vzero = (vector unsigned int) vec_splat(valpha, 0); - } - } - - assert(srcfmt->BytesPerPixel == 4); - assert(dstfmt->BytesPerPixel == 4); - - while (height--) { - vector unsigned char valigner; - vector unsigned int vbits; - vector unsigned int voverflow; - Uint32 bits; - Uint8 r, g, b, a; - - int width = info->dst_w; - int extrawidth; - - /* do scalar until we can align... */ - while ((UNALIGNED_PTR(dst)) && (width)) { - bits = *(src++); - RGBA_FROM_8888(bits, srcfmt, r, g, b, a); - *(dst++) = MAKE8888(dstfmt, r, g, b, a); - width--; - } - - /* After all that work, here's the vector part! */ - extrawidth = (width % 4); - width -= extrawidth; - valigner = VEC_ALIGNER(src); - vbits = vec_ld(0, src); - - while (width) { - voverflow = vec_ld(15, src); - src += 4; - width -= 4; - vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */ - vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */ - vec_st(vbits, 0, dst); /* store it back out. */ - dst += 4; - vbits = voverflow; - } - - assert(width == 0); - - /* cover pixels at the end of the row that didn't fit in 16 bytes. */ - while (extrawidth) { - bits = *(src++); /* max 7 pixels, don't bother with prefetch. */ - RGBA_FROM_8888(bits, srcfmt, r, g, b, a); - *(dst++) = MAKE8888(dstfmt, r, g, b, a); - extrawidth--; - } - - src += srcskip; - dst += dstskip; - } - -} - -/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */ -/* Use this on a G4 */ -static void -ConvertAltivec32to32_prefetch(SDL_BlitInfo * info) -{ - const int scalar_dst_lead = sizeof(Uint32) * 4; - const int vector_dst_lead = sizeof(Uint32) * 16; - - int height = info->dst_h; - Uint32 *src = (Uint32 *) info->src; - int srcskip = info->src_skip / 4; - Uint32 *dst = (Uint32 *) info->dst; - int dstskip = info->dst_skip / 4; - SDL_PixelFormat *srcfmt = info->src_fmt; - SDL_PixelFormat *dstfmt = info->dst_fmt; - vector unsigned int vzero = vec_splat_u32(0); - vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt); - if (dstfmt->Amask && !srcfmt->Amask) { - if (info->a) { - vector unsigned char valpha; - ((unsigned char *) &valpha)[0] = info->a; - vzero = (vector unsigned int) vec_splat(valpha, 0); - } - } - - assert(srcfmt->BytesPerPixel == 4); - assert(dstfmt->BytesPerPixel == 4); - - while (height--) { - vector unsigned char valigner; - vector unsigned int vbits; - vector unsigned int voverflow; - Uint32 bits; - Uint8 r, g, b, a; - - int width = info->dst_w; - int extrawidth; - - /* do scalar until we can align... */ - while ((UNALIGNED_PTR(dst)) && (width)) { - vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024), - DST_CHAN_SRC); - vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024), - DST_CHAN_DEST); - bits = *(src++); - RGBA_FROM_8888(bits, srcfmt, r, g, b, a); - *(dst++) = MAKE8888(dstfmt, r, g, b, a); - width--; - } - - /* After all that work, here's the vector part! */ - extrawidth = (width % 4); - width -= extrawidth; - valigner = VEC_ALIGNER(src); - vbits = vec_ld(0, src); - - while (width) { - vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024), - DST_CHAN_SRC); - vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024), - DST_CHAN_DEST); - voverflow = vec_ld(15, src); - src += 4; - width -= 4; - vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */ - vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */ - vec_st(vbits, 0, dst); /* store it back out. */ - dst += 4; - vbits = voverflow; - } - - assert(width == 0); - - /* cover pixels at the end of the row that didn't fit in 16 bytes. */ - while (extrawidth) { - bits = *(src++); /* max 7 pixels, don't bother with prefetch. */ - RGBA_FROM_8888(bits, srcfmt, r, g, b, a); - *(dst++) = MAKE8888(dstfmt, r, g, b, a); - extrawidth--; - } - - src += srcskip; - dst += dstskip; - } - - vec_dss(DST_CHAN_SRC); - vec_dss(DST_CHAN_DEST); -} - -static Uint32 -GetBlitFeatures(void) -{ - static Uint32 features = 0xffffffff; - if (features == 0xffffffff) { - /* Provide an override for testing .. */ - char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES"); - if (override) { - features = 0; - SDL_sscanf(override, "%u", &features); - } else { - features = (0 - /* Feature 1 is has-MMX */ - | ((SDL_HasMMX())? 1 : 0) - /* Feature 2 is has-AltiVec */ - | ((SDL_HasAltiVec())? 2 : 0) - /* Feature 4 is dont-use-prefetch */ - /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */ - | ((GetL3CacheSize() == 0) ? 4 : 0) - ); - } - } - return features; -} - -#if __MWERKS__ -#pragma altivec_model off -#endif -#else /* Feature 1 is has-MMX */ #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0)) -#endif /* This is now endian dependent */ #if SDL_BYTEORDER == SDL_LIL_ENDIAN @@ -2346,15 +1508,6 @@ }; static const struct blit_table normal_blit_2[] = { -#if SDL_ALTIVEC_BLITTERS - /* has-altivec */ - {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, - 0x00000000, - 2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA}, - {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, - 0x00000000, - 2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA}, -#endif {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF, 0, Blit_RGB565_ARGB8888, SET_ALPHA}, @@ -2378,22 +1531,6 @@ }; static const struct blit_table normal_blit_4[] = { -#if SDL_ALTIVEC_BLITTERS - /* has-altivec | dont-use-prefetch */ - {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, - 0x00000000, - 6, ConvertAltivec32to32_noprefetch, - NO_ALPHA | COPY_ALPHA | SET_ALPHA}, - /* has-altivec */ - {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, - 0x00000000, - 2, ConvertAltivec32to32_prefetch, - NO_ALPHA | COPY_ALPHA | SET_ALPHA}, - /* has-altivec */ - {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, - 0x0000001F, - 2, Blit_RGB888_RGB565Altivec, NO_ALPHA}, -#endif {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F, 0, Blit_RGB888_RGB565, NO_ALPHA}, @@ -2491,12 +1628,6 @@ else if (dstfmt->BytesPerPixel == 1) return BlitNto1Key; else { -#if SDL_ALTIVEC_BLITTERS - if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) - && SDL_HasAltiVec()) { - return Blit32to32KeyAltivec; - } else -#endif if (srcfmt->Amask && dstfmt->Amask) { return BlitNtoNKeyCopyAlpha; } else { diff -r f26314c20071 -r 6a65c1fc07af test/testplatform.c --- a/test/testplatform.c Fri Feb 11 14:42:58 2011 -0800 +++ b/test/testplatform.c Fri Feb 11 14:51:04 2011 -0800 @@ -143,13 +143,10 @@ printf("CPU cache line size: %d\n", SDL_GetCPUCacheLineSize()); printf("RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected"); printf("MMX %s\n", SDL_HasMMX()? "detected" : "not detected"); - printf("MMX Ext %s\n", SDL_HasMMXExt()? "detected" : "not detected"); - printf("3DNow %s\n", SDL_Has3DNow()? "detected" : "not detected"); - printf("3DNow Ext %s\n", - SDL_Has3DNowExt()? "detected" : "not detected"); printf("SSE %s\n", SDL_HasSSE()? "detected" : "not detected"); printf("SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected"); - printf("AltiVec %s\n", SDL_HasAltiVec()? "detected" : "not detected"); + printf("SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected"); + printf("SSE4 %s\n", SDL_HasSSE4()? "detected" : "not detected"); } return (0); }