diff src/video/SDL_RLEaccel.c @ 3035:ff602fdfdedc

Removed Rafal Bursig's MMX RLE code, at his request.
author Sam Lantinga <slouken@libsdl.org>
date Tue, 13 Jan 2009 07:20:55 +0000
parents 99210400e8b9
children dc1eb82ffdaa
line wrap: on
line diff
--- a/src/video/SDL_RLEaccel.c	Tue Jan 13 03:53:22 2009 +0000
+++ b/src/video/SDL_RLEaccel.c	Tue Jan 13 07:20:55 2009 +0000
@@ -91,15 +91,6 @@
 #include "SDL_blit.h"
 #include "SDL_RLEaccel_c.h"
 
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES
-#define MMX_ASMBLIT
-#endif
-
-#ifdef MMX_ASMBLIT
-#include "mmx.h"
-#include "SDL_cpuinfo.h"
-#endif
-
 #ifndef MAX
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #endif
@@ -123,262 +114,6 @@
 #define OPAQUE_BLIT(to, from, length, bpp, alpha)	\
     PIXEL_COPY(to, from, length, bpp)
 
-#ifdef MMX_ASMBLIT
-
-#define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha)	\
-    do {							\
-	Uint32 *srcp = (Uint32 *)(from);			\
-	Uint32 *dstp = (Uint32 *)(to);				\
-        int i = 0x00FF00FF;					\
-        movd_m2r(*(&i), mm3);					\
-        punpckldq_r2r(mm3, mm3);				\
-        i = 0xFF000000;						\
-        movd_m2r(*(&i), mm7);					\
-        punpckldq_r2r(mm7, mm7);				\
-        i = alpha | alpha << 16;				\
-        movd_m2r(*(&i), mm4);					\
-        punpckldq_r2r(mm4, mm4);				\
-	pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */		\
-	pxor_r2r(mm7, mm5); /* make clear alpha mask */		\
-        i = length;						\
-	if(i & 1) {						\
-          movd_m2r((*srcp), mm1); /* src -> mm1 */		\
-          punpcklbw_r2r(mm1, mm1);				\
-          pand_r2r(mm3, mm1);					\
-	  movd_m2r((*dstp), mm2); /* dst -> mm2 */		\
-          punpcklbw_r2r(mm2, mm2);				\
-          pand_r2r(mm3, mm2);					\
-	  psubw_r2r(mm2, mm1);					\
-	  pmullw_r2r(mm4, mm1);					\
-	  psrlw_i2r(8, mm1);					\
-	  paddw_r2r(mm1, mm2);					\
-	  pand_r2r(mm3, mm2);					\
-	  packuswb_r2r(mm2, mm2);				\
-	  pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */		\
-	  movd_r2m(mm2, *dstp);					\
-	  ++srcp;						\
-	  ++dstp;						\
-	  i--;							\
-	}							\
-	for(; i > 0; --i) {					\
-          movq_m2r((*srcp), mm0);				\
-	  movq_r2r(mm0, mm1);					\
-          punpcklbw_r2r(mm0, mm0);				\
-	  movq_m2r((*dstp), mm2);				\
-	  punpckhbw_r2r(mm1, mm1);				\
-	  movq_r2r(mm2, mm6);					\
-          pand_r2r(mm3, mm0);					\
-          punpcklbw_r2r(mm2, mm2);				\
-	  pand_r2r(mm3, mm1);					\
-	  punpckhbw_r2r(mm6, mm6);				\
-          pand_r2r(mm3, mm2);					\
-	  psubw_r2r(mm2, mm0);					\
-	  pmullw_r2r(mm4, mm0);					\
-	  pand_r2r(mm3, mm6);					\
-	  psubw_r2r(mm6, mm1);					\
-	  pmullw_r2r(mm4, mm1);					\
-	  psrlw_i2r(8, mm0);					\
-	  paddw_r2r(mm0, mm2);					\
-	  psrlw_i2r(8, mm1);					\
-	  paddw_r2r(mm1, mm6);					\
-	  pand_r2r(mm3, mm2);					\
-	  pand_r2r(mm3, mm6);					\
-	  packuswb_r2r(mm2, mm2);				\
-	  packuswb_r2r(mm6, mm6);				\
-	  psrlq_i2r(32, mm2);					\
-	  psllq_i2r(32, mm6);					\
-	  por_r2r(mm6, mm2);					\
-	  pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */		\
-         movq_r2m(mm2, *dstp);					\
-	  srcp += 2;						\
-	  dstp += 2;						\
-	  i--;							\
-	}							\
-	emms();							\
-    } while(0)
-
-#define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha)	\
-    do {						\
-        int i, n = 0;					\
-	Uint16 *srcp = (Uint16 *)(from);		\
-	Uint16 *dstp = (Uint16 *)(to);			\
-        Uint32 ALPHA = 0xF800;				\
-	movd_m2r(*(&ALPHA), mm1);			\
-        punpcklwd_r2r(mm1, mm1);			\
-        punpcklwd_r2r(mm1, mm1);			\
-	ALPHA = 0x07E0;					\
-	movd_m2r(*(&ALPHA), mm4);			\
-        punpcklwd_r2r(mm4, mm4);			\
-        punpcklwd_r2r(mm4, mm4);			\
-	ALPHA = 0x001F;					\
-	movd_m2r(*(&ALPHA), mm7);			\
-        punpcklwd_r2r(mm7, mm7);			\
-        punpcklwd_r2r(mm7, mm7);			\
-	alpha &= ~(1+2+4);				\
-        i = (Uint32)alpha | (Uint32)alpha << 16;	\
-        movd_m2r(*(&i), mm0);				\
-        punpckldq_r2r(mm0, mm0);			\
-        ALPHA = alpha >> 3;				\
-        i = ((int)(length) & 3);			\
-	for(; i > 0; --i) {				\
-	    Uint32 s = *srcp++;				\
-	    Uint32 d = *dstp;				\
-	    s = (s | s << 16) & 0x07e0f81f;		\
-	    d = (d | d << 16) & 0x07e0f81f;		\
-	    d += (s - d) * ALPHA >> 5;			\
-	    d &= 0x07e0f81f;				\
-	    *dstp++ = d | d >> 16;			\
-	    n++;					\
-	}						\
-	i = (int)(length) - n;				\
-	for(; i > 0; --i) {				\
-	  movq_m2r((*dstp), mm3);			\
-	  movq_m2r((*srcp), mm2);			\
-	  movq_r2r(mm2, mm5);				\
-	  pand_r2r(mm1 , mm5);				\
-	  psrlq_i2r(11, mm5);				\
-	  movq_r2r(mm3, mm6);				\
-	  pand_r2r(mm1 , mm6);				\
-	  psrlq_i2r(11, mm6);				\
-	  psubw_r2r(mm6, mm5);				\
-	  pmullw_r2r(mm0, mm5);				\
-	  psrlw_i2r(8, mm5);				\
-	  paddw_r2r(mm5, mm6);				\
-	  psllq_i2r(11, mm6);				\
-	  pand_r2r(mm1, mm6);				\
-	  movq_r2r(mm4, mm5);				\
-	  por_r2r(mm7, mm5);				\
-	  pand_r2r(mm5, mm3);				\
-	  por_r2r(mm6, mm3);				\
-	  movq_r2r(mm2, mm5);				\
-	  pand_r2r(mm4 , mm5);				\
-	  psrlq_i2r(5, mm5);				\
-	  movq_r2r(mm3, mm6);				\
-	  pand_r2r(mm4 , mm6);				\
-	  psrlq_i2r(5, mm6);				\
-	  psubw_r2r(mm6, mm5);				\
-	  pmullw_r2r(mm0, mm5);				\
-	  psrlw_i2r(8, mm5);				\
-	  paddw_r2r(mm5, mm6);				\
-	  psllq_i2r(5, mm6);				\
-	  pand_r2r(mm4, mm6);				\
-	  movq_r2r(mm1, mm5);				\
-	  por_r2r(mm7, mm5);				\
-	  pand_r2r(mm5, mm3);				\
-	  por_r2r(mm6, mm3);				\
-	  movq_r2r(mm2, mm5);				\
-	  pand_r2r(mm7 , mm5);				\
-          movq_r2r(mm3, mm6);				\
-	  pand_r2r(mm7 , mm6);				\
-	  psubw_r2r(mm6, mm5);				\
-	  pmullw_r2r(mm0, mm5);				\
-	  psrlw_i2r(8, mm5);				\
-	  paddw_r2r(mm5, mm6);				\
-	  pand_r2r(mm7, mm6);				\
-	  movq_r2r(mm1, mm5);				\
-	  por_r2r(mm4, mm5);				\
-	  pand_r2r(mm5, mm3);				\
-	  por_r2r(mm6, mm3);				\
-	  movq_r2m(mm3, *dstp);				\
-	  srcp += 4;					\
-	  dstp += 4;					\
-	  i -= 3;					\
-	}						\
-	emms();						\
-    } while(0)
-
-#define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha)	\
-    do {						\
-        int i, n = 0;					\
-	Uint16 *srcp = (Uint16 *)(from);		\
-	Uint16 *dstp = (Uint16 *)(to);			\
-        Uint32 ALPHA = 0x7C00;				\
-	movd_m2r(*(&ALPHA), mm1);			\
-        punpcklwd_r2r(mm1, mm1);			\
-        punpcklwd_r2r(mm1, mm1);			\
-	ALPHA = 0x03E0;					\
-        movd_m2r(*(&ALPHA), mm4);			\
-        punpcklwd_r2r(mm4, mm4);			\
-        punpcklwd_r2r(mm4, mm4);			\
-	ALPHA = 0x001F;					\
-	movd_m2r(*(&ALPHA), mm7);			\
-        punpcklwd_r2r(mm7, mm7);			\
-        punpcklwd_r2r(mm7, mm7);			\
-	alpha &= ~(1+2+4);				\
-        i = (Uint32)alpha | (Uint32)alpha << 16;	\
-        movd_m2r(*(&i), mm0);				\
-        punpckldq_r2r(mm0, mm0);			\
-        i = ((int)(length) & 3);				\
-        ALPHA = alpha >> 3;				\
-	for(; i > 0; --i) {				\
-	    Uint32 s = *srcp++;				\
-	    Uint32 d = *dstp;				\
-	    s = (s | s << 16) & 0x03e07c1f;		\
-	    d = (d | d << 16) & 0x03e07c1f;		\
-	    d += (s - d) * ALPHA >> 5;			\
-	    d &= 0x03e07c1f;				\
-	    *dstp++ = d | d >> 16;			\
-	    n++;					\
-	}						\
-	i = (int)(length) - n;				\
-	for(; i > 0; --i) {				\
-	  movq_m2r((*dstp), mm3);			\
-	  movq_m2r((*srcp), mm2);			\
-	  movq_r2r(mm2, mm5);				\
-	  pand_r2r(mm1 , mm5);				\
-	  psrlq_i2r(10, mm5);				\
-	  movq_r2r(mm3, mm6);				\
-	  pand_r2r(mm1 , mm6);				\
-	  psrlq_i2r(10, mm6);				\
-	  psubw_r2r(mm6, mm5);				\
-	  pmullw_r2r(mm0, mm5);				\
-	  psrlw_i2r(8, mm5);				\
-	  paddw_r2r(mm5, mm6);				\
-	  psllq_i2r(10, mm6);				\
-	  pand_r2r(mm1, mm6);				\
-	  movq_r2r(mm4, mm5);				\
-	  por_r2r(mm7, mm5);				\
-	  pand_r2r(mm5, mm3);				\
-	  por_r2r(mm6, mm3);				\
-	  movq_r2r(mm2, mm5);				\
-	  pand_r2r(mm4 , mm5);				\
-	  psrlq_i2r(5, mm5);				\
-	  movq_r2r(mm3, mm6);				\
-	  pand_r2r(mm4 , mm6);				\
-	  psrlq_i2r(5, mm6);				\
-	  psubw_r2r(mm6, mm5);				\
-	  pmullw_r2r(mm0, mm5);				\
-	  psrlw_i2r(8, mm5);				\
-	  paddw_r2r(mm5, mm6);				\
-	  psllq_i2r(5, mm6);				\
-	  pand_r2r(mm4, mm6);				\
-	  movq_r2r(mm1, mm5);				\
-	  por_r2r(mm7, mm5);				\
-	  pand_r2r(mm5, mm3);				\
-	  por_r2r(mm6, mm3);				\
-	  movq_r2r(mm2, mm5);				\
-	  pand_r2r(mm7 , mm5);				\
-          movq_r2r(mm3, mm6);				\
-	  pand_r2r(mm7 , mm6);				\
-	  psubw_r2r(mm6, mm5);				\
-	  pmullw_r2r(mm0, mm5);				\
-	  psrlw_i2r(8, mm5);				\
-	  paddw_r2r(mm5, mm6);				\
-	  pand_r2r(mm7, mm6);				\
-	  movq_r2r(mm1, mm5);				\
-	  por_r2r(mm4, mm5);				\
-	  pand_r2r(mm5, mm3);				\
-	  por_r2r(mm6, mm3);				\
-	  movq_r2m(mm3, *dstp);				\
-	  srcp += 4;					\
-	  dstp += 4;					\
-	  i -= 3;					\
-	}						\
-	emms();						\
-    } while(0)
-
-#endif
-
 /*
  * For 32bpp pixels on the form 0x00rrggbb:
  * If we treat the middle component separately, we can process the two
@@ -504,48 +239,6 @@
 	}								\
     } while(0)
 
-#ifdef MMX_ASMBLIT
-
-#define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha)		\
-    do {								\
-	Uint32 *srcp = (Uint32 *)(from);				\
-	Uint32 *dstp = (Uint32 *)(to);					\
-        int i = 0x00fefefe;						\
-        movd_m2r(*(&i), mm4);						\
-        punpckldq_r2r(mm4, mm4);					\
-        i = 0x00010101;							\
-        movd_m2r(*(&i), mm3);						\
-        punpckldq_r2r(mm3, mm3);					\
-        i = (int)(length);						\
-        if( i & 1 ) {							\
-	  Uint32 s = *srcp++;						\
-	  Uint32 d = *dstp;						\
-	  *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
-		     + (s & d & 0x00010101);				\
-	  i--;								\
-	}								\
-	for(; i > 0; --i) {						\
-	    movq_m2r((*dstp), mm2); /* dst -> mm2 */			\
-	    movq_r2r(mm2, mm6);	/* dst -> mm6 */			\
-	    movq_m2r((*srcp), mm1); /* src -> mm1 */			\
-	    movq_r2r(mm1, mm5);	/* src -> mm5 */			\
-	    pand_r2r(mm4, mm6);	/* dst & 0x00fefefe -> mm6 */		\
-	    pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */		\
-	    paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */	\
-	    psrld_i2r(1, mm5);						\
-	    pand_r2r(mm1, mm2);	/* s & d -> mm2 */			\
-	    pand_r2r(mm3, mm2);	/* s & d & 0x00010101 -> mm2 */		\
-	    paddd_r2r(mm5, mm2);					\
-	    movq_r2m(mm2, (*dstp));					\
-	    dstp += 2;							\
-	    srcp += 2;							\
-	    i--;							\
-	}								\
-	emms();								\
-    } while(0)
-
-#endif
-
 /*
  * Special case: 50% alpha (alpha=128)
  * This is treated specially because it can be optimized very well, and
@@ -617,94 +310,6 @@
 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)	\
     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
 
-#ifdef MMX_ASMBLIT
-
-#define CHOOSE_BLIT(blitter, alpha, fmt)				\
-    do {								\
-        if(alpha == 255) {						\
-	    switch(fmt->BytesPerPixel) {				\
-	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
-	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
-	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
-	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
-	    }								\
-	} else {							\
-	    switch(fmt->BytesPerPixel) {				\
-	    case 1:							\
-		/* No 8bpp alpha blitting */				\
-		break;							\
-									\
-	    case 2:							\
-		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
-		case 0xffff:						\
-		    if(fmt->Gmask == 0x07e0				\
-		       || fmt->Rmask == 0x07e0				\
-		       || fmt->Bmask == 0x07e0) {			\
-			if(alpha == 128)				\
-			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
-			else {						\
-			    if(SDL_HasMMX())				\
-				blitter(2, Uint8, ALPHA_BLIT16_565MMX);	\
-			    else					\
-				blitter(2, Uint8, ALPHA_BLIT16_565);	\
-			}						\
-		    } else						\
-			goto general16;					\
-		    break;						\
-									\
-		case 0x7fff:						\
-		    if(fmt->Gmask == 0x03e0				\
-		       || fmt->Rmask == 0x03e0				\
-		       || fmt->Bmask == 0x03e0) {			\
-			if(alpha == 128)				\
-			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
-			else {						\
-			    if(SDL_HasMMX())				\
-				blitter(2, Uint8, ALPHA_BLIT16_555MMX);	\
-			    else					\
-				blitter(2, Uint8, ALPHA_BLIT16_555);	\
-			}						\
-			break;						\
-		    }							\
-		    /* fallthrough */					\
-									\
-		default:						\
-		general16:						\
-		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
-		}							\
-		break;							\
-									\
-	    case 3:							\
-		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
-		break;							\
-									\
-	    case 4:							\
-		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
-		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
-		       || fmt->Bmask == 0xff00)) {			\
-		    if(alpha == 128)					\
-		    {							\
-			if(SDL_HasMMX())				\
-				blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\
-			else						\
-				blitter(4, Uint16, ALPHA_BLIT32_888_50);\
-		    }							\
-		    else						\
-		    {							\
-			if(SDL_HasMMX())				\
-				blitter(4, Uint16, ALPHA_BLIT32_888MMX);\
-			else						\
-				blitter(4, Uint16, ALPHA_BLIT32_888);	\
-		    }							\
-		} else							\
-		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
-		break;							\
-	    }								\
-	}								\
-    } while(0)
-
-#else
-
 #define CHOOSE_BLIT(blitter, alpha, fmt)				\
     do {								\
         if(alpha == 255) {						\
@@ -773,8 +378,6 @@
 	}								\
     } while(0)
 
-#endif
-
 /*
  * This takes care of the case when the surface is clipped on the left and/or
  * right. Top clipping has already been taken care of.