Mercurial > sdl-ios-xcode
diff src/video/SDL_RLEaccel.c @ 3035:ff602fdfdedc
Removed Rafal Bursig's MMX RLE code, at his request.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Tue, 13 Jan 2009 07:20:55 +0000 |
parents | 99210400e8b9 |
children | dc1eb82ffdaa |
line wrap: on
line diff
--- a/src/video/SDL_RLEaccel.c Tue Jan 13 03:53:22 2009 +0000 +++ b/src/video/SDL_RLEaccel.c Tue Jan 13 07:20:55 2009 +0000 @@ -91,15 +91,6 @@ #include "SDL_blit.h" #include "SDL_RLEaccel_c.h" -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES -#define MMX_ASMBLIT -#endif - -#ifdef MMX_ASMBLIT -#include "mmx.h" -#include "SDL_cpuinfo.h" -#endif - #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif @@ -123,262 +114,6 @@ #define OPAQUE_BLIT(to, from, length, bpp, alpha) \ PIXEL_COPY(to, from, length, bpp) -#ifdef MMX_ASMBLIT - -#define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha) \ - do { \ - Uint32 *srcp = (Uint32 *)(from); \ - Uint32 *dstp = (Uint32 *)(to); \ - int i = 0x00FF00FF; \ - movd_m2r(*(&i), mm3); \ - punpckldq_r2r(mm3, mm3); \ - i = 0xFF000000; \ - movd_m2r(*(&i), mm7); \ - punpckldq_r2r(mm7, mm7); \ - i = alpha | alpha << 16; \ - movd_m2r(*(&i), mm4); \ - punpckldq_r2r(mm4, mm4); \ - pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */ \ - pxor_r2r(mm7, mm5); /* make clear alpha mask */ \ - i = length; \ - if(i & 1) { \ - movd_m2r((*srcp), mm1); /* src -> mm1 */ \ - punpcklbw_r2r(mm1, mm1); \ - pand_r2r(mm3, mm1); \ - movd_m2r((*dstp), mm2); /* dst -> mm2 */ \ - punpcklbw_r2r(mm2, mm2); \ - pand_r2r(mm3, mm2); \ - psubw_r2r(mm2, mm1); \ - pmullw_r2r(mm4, mm1); \ - psrlw_i2r(8, mm1); \ - paddw_r2r(mm1, mm2); \ - pand_r2r(mm3, mm2); \ - packuswb_r2r(mm2, mm2); \ - pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ - movd_r2m(mm2, *dstp); \ - ++srcp; \ - ++dstp; \ - i--; \ - } \ - for(; i > 0; --i) { \ - movq_m2r((*srcp), mm0); \ - movq_r2r(mm0, mm1); \ - punpcklbw_r2r(mm0, mm0); \ - movq_m2r((*dstp), mm2); \ - punpckhbw_r2r(mm1, mm1); \ - movq_r2r(mm2, mm6); \ - pand_r2r(mm3, mm0); \ - punpcklbw_r2r(mm2, mm2); \ - pand_r2r(mm3, mm1); \ - punpckhbw_r2r(mm6, mm6); \ - pand_r2r(mm3, mm2); \ - psubw_r2r(mm2, mm0); \ - pmullw_r2r(mm4, mm0); \ - pand_r2r(mm3, mm6); \ - psubw_r2r(mm6, mm1); \ - pmullw_r2r(mm4, mm1); \ - psrlw_i2r(8, mm0); \ - paddw_r2r(mm0, mm2); \ - psrlw_i2r(8, mm1); \ - paddw_r2r(mm1, mm6); \ - pand_r2r(mm3, mm2); \ - pand_r2r(mm3, mm6); \ - packuswb_r2r(mm2, mm2); \ - packuswb_r2r(mm6, mm6); \ - psrlq_i2r(32, mm2); \ - psllq_i2r(32, mm6); \ - por_r2r(mm6, mm2); \ - pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ - movq_r2m(mm2, *dstp); \ - srcp += 2; \ - dstp += 2; \ - i--; \ - } \ - emms(); \ - } while(0) - -#define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha) \ - do { \ - int i, n = 0; \ - Uint16 *srcp = (Uint16 *)(from); \ - Uint16 *dstp = (Uint16 *)(to); \ - Uint32 ALPHA = 0xF800; \ - movd_m2r(*(&ALPHA), mm1); \ - punpcklwd_r2r(mm1, mm1); \ - punpcklwd_r2r(mm1, mm1); \ - ALPHA = 0x07E0; \ - movd_m2r(*(&ALPHA), mm4); \ - punpcklwd_r2r(mm4, mm4); \ - punpcklwd_r2r(mm4, mm4); \ - ALPHA = 0x001F; \ - movd_m2r(*(&ALPHA), mm7); \ - punpcklwd_r2r(mm7, mm7); \ - punpcklwd_r2r(mm7, mm7); \ - alpha &= ~(1+2+4); \ - i = (Uint32)alpha | (Uint32)alpha << 16; \ - movd_m2r(*(&i), mm0); \ - punpckldq_r2r(mm0, mm0); \ - ALPHA = alpha >> 3; \ - i = ((int)(length) & 3); \ - for(; i > 0; --i) { \ - Uint32 s = *srcp++; \ - Uint32 d = *dstp; \ - s = (s | s << 16) & 0x07e0f81f; \ - d = (d | d << 16) & 0x07e0f81f; \ - d += (s - d) * ALPHA >> 5; \ - d &= 0x07e0f81f; \ - *dstp++ = d | d >> 16; \ - n++; \ - } \ - i = (int)(length) - n; \ - for(; i > 0; --i) { \ - movq_m2r((*dstp), mm3); \ - movq_m2r((*srcp), mm2); \ - movq_r2r(mm2, mm5); \ - pand_r2r(mm1 , mm5); \ - psrlq_i2r(11, mm5); \ - movq_r2r(mm3, mm6); \ - pand_r2r(mm1 , mm6); \ - psrlq_i2r(11, mm6); \ - psubw_r2r(mm6, mm5); \ - pmullw_r2r(mm0, mm5); \ - psrlw_i2r(8, mm5); \ - paddw_r2r(mm5, mm6); \ - psllq_i2r(11, mm6); \ - pand_r2r(mm1, mm6); \ - movq_r2r(mm4, mm5); \ - por_r2r(mm7, mm5); \ - pand_r2r(mm5, mm3); \ - por_r2r(mm6, mm3); \ - movq_r2r(mm2, mm5); \ - pand_r2r(mm4 , mm5); \ - psrlq_i2r(5, mm5); \ - movq_r2r(mm3, mm6); \ - pand_r2r(mm4 , mm6); \ - psrlq_i2r(5, mm6); \ - psubw_r2r(mm6, mm5); \ - pmullw_r2r(mm0, mm5); \ - psrlw_i2r(8, mm5); \ - paddw_r2r(mm5, mm6); \ - psllq_i2r(5, mm6); \ - pand_r2r(mm4, mm6); \ - movq_r2r(mm1, mm5); \ - por_r2r(mm7, mm5); \ - pand_r2r(mm5, mm3); \ - por_r2r(mm6, mm3); \ - movq_r2r(mm2, mm5); \ - pand_r2r(mm7 , mm5); \ - movq_r2r(mm3, mm6); \ - pand_r2r(mm7 , mm6); \ - psubw_r2r(mm6, mm5); \ - pmullw_r2r(mm0, mm5); \ - psrlw_i2r(8, mm5); \ - paddw_r2r(mm5, mm6); \ - pand_r2r(mm7, mm6); \ - movq_r2r(mm1, mm5); \ - por_r2r(mm4, mm5); \ - pand_r2r(mm5, mm3); \ - por_r2r(mm6, mm3); \ - movq_r2m(mm3, *dstp); \ - srcp += 4; \ - dstp += 4; \ - i -= 3; \ - } \ - emms(); \ - } while(0) - -#define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha) \ - do { \ - int i, n = 0; \ - Uint16 *srcp = (Uint16 *)(from); \ - Uint16 *dstp = (Uint16 *)(to); \ - Uint32 ALPHA = 0x7C00; \ - movd_m2r(*(&ALPHA), mm1); \ - punpcklwd_r2r(mm1, mm1); \ - punpcklwd_r2r(mm1, mm1); \ - ALPHA = 0x03E0; \ - movd_m2r(*(&ALPHA), mm4); \ - punpcklwd_r2r(mm4, mm4); \ - punpcklwd_r2r(mm4, mm4); \ - ALPHA = 0x001F; \ - movd_m2r(*(&ALPHA), mm7); \ - punpcklwd_r2r(mm7, mm7); \ - punpcklwd_r2r(mm7, mm7); \ - alpha &= ~(1+2+4); \ - i = (Uint32)alpha | (Uint32)alpha << 16; \ - movd_m2r(*(&i), mm0); \ - punpckldq_r2r(mm0, mm0); \ - i = ((int)(length) & 3); \ - ALPHA = alpha >> 3; \ - for(; i > 0; --i) { \ - Uint32 s = *srcp++; \ - Uint32 d = *dstp; \ - s = (s | s << 16) & 0x03e07c1f; \ - d = (d | d << 16) & 0x03e07c1f; \ - d += (s - d) * ALPHA >> 5; \ - d &= 0x03e07c1f; \ - *dstp++ = d | d >> 16; \ - n++; \ - } \ - i = (int)(length) - n; \ - for(; i > 0; --i) { \ - movq_m2r((*dstp), mm3); \ - movq_m2r((*srcp), mm2); \ - movq_r2r(mm2, mm5); \ - pand_r2r(mm1 , mm5); \ - psrlq_i2r(10, mm5); \ - movq_r2r(mm3, mm6); \ - pand_r2r(mm1 , mm6); \ - psrlq_i2r(10, mm6); \ - psubw_r2r(mm6, mm5); \ - pmullw_r2r(mm0, mm5); \ - psrlw_i2r(8, mm5); \ - paddw_r2r(mm5, mm6); \ - psllq_i2r(10, mm6); \ - pand_r2r(mm1, mm6); \ - movq_r2r(mm4, mm5); \ - por_r2r(mm7, mm5); \ - pand_r2r(mm5, mm3); \ - por_r2r(mm6, mm3); \ - movq_r2r(mm2, mm5); \ - pand_r2r(mm4 , mm5); \ - psrlq_i2r(5, mm5); \ - movq_r2r(mm3, mm6); \ - pand_r2r(mm4 , mm6); \ - psrlq_i2r(5, mm6); \ - psubw_r2r(mm6, mm5); \ - pmullw_r2r(mm0, mm5); \ - psrlw_i2r(8, mm5); \ - paddw_r2r(mm5, mm6); \ - psllq_i2r(5, mm6); \ - pand_r2r(mm4, mm6); \ - movq_r2r(mm1, mm5); \ - por_r2r(mm7, mm5); \ - pand_r2r(mm5, mm3); \ - por_r2r(mm6, mm3); \ - movq_r2r(mm2, mm5); \ - pand_r2r(mm7 , mm5); \ - movq_r2r(mm3, mm6); \ - pand_r2r(mm7 , mm6); \ - psubw_r2r(mm6, mm5); \ - pmullw_r2r(mm0, mm5); \ - psrlw_i2r(8, mm5); \ - paddw_r2r(mm5, mm6); \ - pand_r2r(mm7, mm6); \ - movq_r2r(mm1, mm5); \ - por_r2r(mm4, mm5); \ - pand_r2r(mm5, mm3); \ - por_r2r(mm6, mm3); \ - movq_r2m(mm3, *dstp); \ - srcp += 4; \ - dstp += 4; \ - i -= 3; \ - } \ - emms(); \ - } while(0) - -#endif - /* * For 32bpp pixels on the form 0x00rrggbb: * If we treat the middle component separately, we can process the two @@ -504,48 +239,6 @@ } \ } while(0) -#ifdef MMX_ASMBLIT - -#define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha) \ - do { \ - Uint32 *srcp = (Uint32 *)(from); \ - Uint32 *dstp = (Uint32 *)(to); \ - int i = 0x00fefefe; \ - movd_m2r(*(&i), mm4); \ - punpckldq_r2r(mm4, mm4); \ - i = 0x00010101; \ - movd_m2r(*(&i), mm3); \ - punpckldq_r2r(mm3, mm3); \ - i = (int)(length); \ - if( i & 1 ) { \ - Uint32 s = *srcp++; \ - Uint32 d = *dstp; \ - *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ - + (s & d & 0x00010101); \ - i--; \ - } \ - for(; i > 0; --i) { \ - movq_m2r((*dstp), mm2); /* dst -> mm2 */ \ - movq_r2r(mm2, mm6); /* dst -> mm6 */ \ - movq_m2r((*srcp), mm1); /* src -> mm1 */ \ - movq_r2r(mm1, mm5); /* src -> mm5 */ \ - pand_r2r(mm4, mm6); /* dst & 0x00fefefe -> mm6 */ \ - pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */ \ - paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */ \ - psrld_i2r(1, mm5); \ - pand_r2r(mm1, mm2); /* s & d -> mm2 */ \ - pand_r2r(mm3, mm2); /* s & d & 0x00010101 -> mm2 */ \ - paddd_r2r(mm5, mm2); \ - movq_r2m(mm2, (*dstp)); \ - dstp += 2; \ - srcp += 2; \ - i--; \ - } \ - emms(); \ - } while(0) - -#endif - /* * Special case: 50% alpha (alpha=128) * This is treated specially because it can be optimized very well, and @@ -617,94 +310,6 @@ #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \ ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde) -#ifdef MMX_ASMBLIT - -#define CHOOSE_BLIT(blitter, alpha, fmt) \ - do { \ - if(alpha == 255) { \ - switch(fmt->BytesPerPixel) { \ - case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ - case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ - case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ - case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ - } \ - } else { \ - switch(fmt->BytesPerPixel) { \ - case 1: \ - /* No 8bpp alpha blitting */ \ - break; \ - \ - case 2: \ - switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ - case 0xffff: \ - if(fmt->Gmask == 0x07e0 \ - || fmt->Rmask == 0x07e0 \ - || fmt->Bmask == 0x07e0) { \ - if(alpha == 128) \ - blitter(2, Uint8, ALPHA_BLIT16_565_50); \ - else { \ - if(SDL_HasMMX()) \ - blitter(2, Uint8, ALPHA_BLIT16_565MMX); \ - else \ - blitter(2, Uint8, ALPHA_BLIT16_565); \ - } \ - } else \ - goto general16; \ - break; \ - \ - case 0x7fff: \ - if(fmt->Gmask == 0x03e0 \ - || fmt->Rmask == 0x03e0 \ - || fmt->Bmask == 0x03e0) { \ - if(alpha == 128) \ - blitter(2, Uint8, ALPHA_BLIT16_555_50); \ - else { \ - if(SDL_HasMMX()) \ - blitter(2, Uint8, ALPHA_BLIT16_555MMX); \ - else \ - blitter(2, Uint8, ALPHA_BLIT16_555); \ - } \ - break; \ - } \ - /* fallthrough */ \ - \ - default: \ - general16: \ - blitter(2, Uint8, ALPHA_BLIT_ANY); \ - } \ - break; \ - \ - case 3: \ - blitter(3, Uint8, ALPHA_BLIT_ANY); \ - break; \ - \ - case 4: \ - if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ - && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ - || fmt->Bmask == 0xff00)) { \ - if(alpha == 128) \ - { \ - if(SDL_HasMMX()) \ - blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\ - else \ - blitter(4, Uint16, ALPHA_BLIT32_888_50);\ - } \ - else \ - { \ - if(SDL_HasMMX()) \ - blitter(4, Uint16, ALPHA_BLIT32_888MMX);\ - else \ - blitter(4, Uint16, ALPHA_BLIT32_888); \ - } \ - } else \ - blitter(4, Uint16, ALPHA_BLIT_ANY); \ - break; \ - } \ - } \ - } while(0) - -#else - #define CHOOSE_BLIT(blitter, alpha, fmt) \ do { \ if(alpha == 255) { \ @@ -773,8 +378,6 @@ } \ } while(0) -#endif - /* * This takes care of the case when the surface is clipped on the left and/or * right. Top clipping has already been taken care of.