diff src/video/SDL_blit_N.c @ 1162:2651158f59b8

Enable altivec blitters on PowerPC Linux, and some fixes for recent GCCs versions.
author Ryan C. Gordon <icculus@icculus.org>
date Thu, 20 Oct 2005 06:55:26 +0000
parents d0ae4dff7208
children 867f521591e5
line wrap: on
line diff
--- a/src/video/SDL_blit_N.c	Thu Oct 20 06:53:56 2005 +0000
+++ b/src/video/SDL_blit_N.c	Thu Oct 20 06:55:26 2005 +0000
@@ -36,10 +36,11 @@
 /* Functions to blit from N-bit surfaces to other surfaces */
 
 #ifdef USE_ALTIVEC_BLITTERS
+#include <altivec.h>
 #include <assert.h>
+#include <stdlib.h>
 #ifdef MACOSX
 #include <sys/sysctl.h>
-#include <stdlib.h>
 static size_t GetL3CacheSize( void )
 {
     const char key[] = "hw.l3cachesize";
@@ -60,6 +61,18 @@
 }
 #endif /* MACOSX */
 
+#if ((defined MACOSX) && (__GNUC__ < 4))
+    #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
+        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
+    #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
+        (vector unsigned short) ( a,b,c,d,e,f,g,h )
+#else
+    #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
+        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
+    #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
+        (vector unsigned short) { a,b,c,d,e,f,g,h }
+#endif
+
 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
@@ -112,7 +125,8 @@
     if (!dstfmt) {
         dstfmt = &default_pixel_format;
     }
-    vector unsigned char plus = (vector unsigned char)( 0x00, 0x00, 0x00, 0x00,
+    vector unsigned char plus = VECUINT8_LITERAL(
+                                      0x00, 0x00, 0x00, 0x00,
                                       0x04, 0x04, 0x04, 0x04,
                                       0x08, 0x08, 0x08, 0x08,
                                       0x0C, 0x0C, 0x0C, 0x0C );
@@ -130,7 +144,7 @@
         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
     }           
 #undef RESHIFT  
-    ((unsigned int *)&srcvec)[0] = (rmask | gmask | bmask | amask);
+    ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
     return(vswiz);
 }
@@ -145,17 +159,17 @@
     SDL_PixelFormat *srcfmt = info->src;
     vector unsigned char valpha = vec_splat_u8(0);
     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
-    vector unsigned char vgmerge = (vector unsigned char)(
+    vector unsigned char vgmerge = VECUINT8_LITERAL(
         0x00, 0x02, 0x00, 0x06,
         0x00, 0x0a, 0x00, 0x0e,
         0x00, 0x12, 0x00, 0x16,
         0x00, 0x1a, 0x00, 0x1e);
     vector unsigned short v1 = vec_splat_u16(1);
     vector unsigned short v3 = vec_splat_u16(3);
-    vector unsigned short v3f = (vector unsigned short)(
+    vector unsigned short v3f = VECUINT16_LITERAL(
         0x003f, 0x003f, 0x003f, 0x003f,
         0x003f, 0x003f, 0x003f, 0x003f);
-    vector unsigned short vfc = (vector unsigned short)(
+    vector unsigned short vfc = VECUINT16_LITERAL(
         0x00fc, 0x00fc, 0x00fc, 0x00fc,
         0x00fc, 0x00fc, 0x00fc, 0x00fc);
     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
@@ -172,9 +186,9 @@
         /* do scalar until we can align... */
 #define ONE_PIXEL_BLEND(condition, widthvar) \
         while (condition) { \
-            Uint32 pixel; \
+            Uint32 Pixel; \
             unsigned sR, sG, sB, sA; \
-            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, pixel, \
+            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
                           sR, sG, sB, sA); \
             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
                                 ((sG << 3) & 0x000007E0) | \
@@ -259,20 +273,20 @@
         0x00 - 0x0e evens are the red
         0x01 - 0x0f odds are zero
     */
-    vector unsigned char vredalpha1 = (vector unsigned char)(
+    vector unsigned char vredalpha1 = VECUINT8_LITERAL(
         0x10, 0x00, 0x01, 0x01,
         0x10, 0x02, 0x01, 0x01,
         0x10, 0x04, 0x01, 0x01,
         0x10, 0x06, 0x01, 0x01
     );
-    vector unsigned char vredalpha2 = (vector unsigned char)(
+    vector unsigned char vredalpha2 = (vector unsigned char) (
         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
     );
     /*
         0x00 - 0x0f is ARxx ARxx ARxx ARxx
         0x11 - 0x0f odds are blue
     */
-    vector unsigned char vblue1 = (vector unsigned char)(
+    vector unsigned char vblue1 = VECUINT8_LITERAL(
         0x00, 0x01, 0x02, 0x11,
         0x04, 0x05, 0x06, 0x13,
         0x08, 0x09, 0x0a, 0x15,
@@ -285,7 +299,7 @@
         0x00 - 0x0f is ARxB ARxB ARxB ARxB
         0x10 - 0x0e evens are green
     */
-    vector unsigned char vgreen1 = (vector unsigned char)(
+    vector unsigned char vgreen1 = VECUINT8_LITERAL(
         0x00, 0x01, 0x10, 0x03,
         0x04, 0x05, 0x12, 0x07,
         0x08, 0x09, 0x14, 0x0b,
@@ -323,10 +337,10 @@
 #define ONE_PIXEL_BLEND(condition, widthvar) \
         while (condition) { \
             unsigned sR, sG, sB; \
-            unsigned short pixel = *((unsigned short *)src); \
-            sR = (pixel >> 8) & 0xf8; \
-            sG = (pixel >> 3) & 0xfc; \
-            sB = (pixel << 3) & 0xf8; \
+            unsigned short Pixel = *((unsigned short *)src); \
+            sR = (Pixel >> 8) & 0xf8; \
+            sG = (Pixel >> 3) & 0xfc; \
+            sB = (Pixel << 3) & 0xf8; \
             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
             src += 2; \
             dst += 4; \
@@ -404,7 +418,7 @@
         0x00 - 0x0e evens are the red
         0x01 - 0x0f odds are zero
     */
-    vector unsigned char vredalpha1 = (vector unsigned char)(
+    vector unsigned char vredalpha1 = VECUINT8_LITERAL(
         0x10, 0x00, 0x01, 0x01,
         0x10, 0x02, 0x01, 0x01,
         0x10, 0x04, 0x01, 0x01,
@@ -417,7 +431,7 @@
         0x00 - 0x0f is ARxx ARxx ARxx ARxx
         0x11 - 0x0f odds are blue
     */
-    vector unsigned char vblue1 = (vector unsigned char)(
+    vector unsigned char vblue1 = VECUINT8_LITERAL(
         0x00, 0x01, 0x02, 0x11,
         0x04, 0x05, 0x06, 0x13,
         0x08, 0x09, 0x0a, 0x15,
@@ -430,7 +444,7 @@
         0x00 - 0x0f is ARxB ARxB ARxB ARxB
         0x10 - 0x0e evens are green
     */
-    vector unsigned char vgreen1 = (vector unsigned char)(
+    vector unsigned char vgreen1 = VECUINT8_LITERAL(
         0x00, 0x01, 0x10, 0x03,
         0x04, 0x05, 0x12, 0x07,
         0x08, 0x09, 0x14, 0x0b,
@@ -468,10 +482,10 @@
 #define ONE_PIXEL_BLEND(condition, widthvar) \
         while (condition) { \
             unsigned sR, sG, sB; \
-            unsigned short pixel = *((unsigned short *)src); \
-            sR = (pixel >> 7) & 0xf8; \
-            sG = (pixel >> 2) & 0xf8; \
-            sB = (pixel << 3) & 0xf8; \
+            unsigned short Pixel = *((unsigned short *)src); \
+            sR = (Pixel >> 7) & 0xf8; \
+            sG = (Pixel >> 2) & 0xf8; \
+            sB = (Pixel << 3) & 0xf8; \
             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
             src += 2; \
             dst += 4; \
@@ -565,39 +579,39 @@
         valpha = (vector unsigned int)vzero;
     }
     ckey &= rgbmask;
-    ((unsigned int *)&vckey)[0] = ckey;
+    ((unsigned int *)(char*)&vckey)[0] = ckey;
     vckey = vec_splat(vckey, 0);
-    ((unsigned int *)&vrgbmask)[0] = rgbmask;
+    ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
     vrgbmask = vec_splat(vrgbmask, 0);
 
     while (height--) {
 #define ONE_PIXEL_BLEND(condition, widthvar) \
         if (copy_alpha) { \
             while (condition) { \
-                Uint32 pixel; \
+                Uint32 Pixel; \
                 unsigned sR, sG, sB, sA; \
-                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, pixel, \
+                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
                           sR, sG, sB, sA); \
-                if ( (pixel & rgbmask) != ckey ) { \
+                if ( (Pixel & rgbmask) != ckey ) { \
                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
                             sR, sG, sB, sA); \
                 } \
-                ((Uint8 *)dstp) += dstbpp; \
-                ((Uint8 *)srcp) += srcbpp; \
+                dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
+                srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
                 widthvar--; \
             } \
         } else { \
             while (condition) { \
-                Uint32 pixel; \
+                Uint32 Pixel; \
                 unsigned sR, sG, sB; \
-                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, pixel); \
-                if ( pixel != ckey ) { \
-                    RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB); \
+                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
+                if ( Pixel != ckey ) { \
+                    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
                               sR, sG, sB, alpha); \
                 } \
-                ((Uint8 *)dstp) += dstbpp; \
-                ((Uint8 *)srcp) += srcbpp; \
+                dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
+                srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
                 widthvar--; \
             } \
         }
@@ -819,6 +833,7 @@
                 /* Feature 2 is has-AltiVec */
                 | ((SDL_HasAltiVec()) ? 2 : 0)
                 /* Feature 4 is dont-use-prefetch */
+                /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
                 | ((GetL3CacheSize() == 0) ? 4 : 0)
             );
         }
@@ -911,43 +926,43 @@
 			dst += dstskip;
 		}
 	} else {
-		int pixel;
+		int Pixel;
 
 		while ( height-- ) {
 #ifdef USE_DUFFS_LOOP
 			DUFFS_LOOP(
-				RGB888_RGB332(pixel, *src);
-				*dst++ = map[pixel];
+				RGB888_RGB332(Pixel, *src);
+				*dst++ = map[Pixel];
 				++src;
 			, width);
 #else
 			for ( c=width/4; c; --c ) {
 				/* Pack RGB into 8bit pixel */
-				RGB888_RGB332(pixel, *src);
-				*dst++ = map[pixel];
+				RGB888_RGB332(Pixel, *src);
+				*dst++ = map[Pixel];
 				++src;
-				RGB888_RGB332(pixel, *src);
-				*dst++ = map[pixel];
+				RGB888_RGB332(Pixel, *src);
+				*dst++ = map[Pixel];
 				++src;
-				RGB888_RGB332(pixel, *src);
-				*dst++ = map[pixel];
+				RGB888_RGB332(Pixel, *src);
+				*dst++ = map[Pixel];
 				++src;
-				RGB888_RGB332(pixel, *src);
-				*dst++ = map[pixel];
+				RGB888_RGB332(Pixel, *src);
+				*dst++ = map[Pixel];
 				++src;
 			}
 			switch ( width & 3 ) {
 				case 3:
-					RGB888_RGB332(pixel, *src);
-					*dst++ = map[pixel];
+					RGB888_RGB332(Pixel, *src);
+					*dst++ = map[Pixel];
 					++src;
 				case 2:
-					RGB888_RGB332(pixel, *src);
-					*dst++ = map[pixel];
+					RGB888_RGB332(Pixel, *src);
+					*dst++ = map[Pixel];
 					++src;
 				case 1:
-					RGB888_RGB332(pixel, *src);
-					*dst++ = map[pixel];
+					RGB888_RGB332(Pixel, *src);
+					*dst++ = map[Pixel];
 					++src;
 			}
 #endif /* USE_DUFFS_LOOP */
@@ -1820,7 +1835,7 @@
 #ifndef USE_DUFFS_LOOP
 	int c;
 #endif
-	int pixel;
+	int Pixel;
 	int width, height;
 	Uint32 *src;
 	const Uint8 *map;
@@ -1839,8 +1854,8 @@
 #ifdef USE_DUFFS_LOOP
 	while ( height-- ) {
 		DUFFS_LOOP(
-			RGB888_RGB332(pixel, *src);
-			*dst++ = map[pixel];
+			RGB888_RGB332(Pixel, *src);
+			*dst++ = map[Pixel];
 			++src;
 		, width);
 		src += srcskip;
@@ -1850,31 +1865,31 @@
 	while ( height-- ) {
 		for ( c=width/4; c; --c ) {
 			/* Pack RGB into 8bit pixel */
-			RGB888_RGB332(pixel, *src);
-			*dst++ = map[pixel];
+			RGB888_RGB332(Pixel, *src);
+			*dst++ = map[Pixel];
 			++src;
-			RGB888_RGB332(pixel, *src);
-			*dst++ = map[pixel];
+			RGB888_RGB332(Pixel, *src);
+			*dst++ = map[Pixel];
 			++src;
-			RGB888_RGB332(pixel, *src);
-			*dst++ = map[pixel];
+			RGB888_RGB332(Pixel, *src);
+			*dst++ = map[Pixel];
 			++src;
-			RGB888_RGB332(pixel, *src);
-			*dst++ = map[pixel];
+			RGB888_RGB332(Pixel, *src);
+			*dst++ = map[Pixel];
 			++src;
 		}
 		switch ( width & 3 ) {
 			case 3:
-				RGB888_RGB332(pixel, *src);
-				*dst++ = map[pixel];
+				RGB888_RGB332(Pixel, *src);
+				*dst++ = map[Pixel];
 				++src;
 			case 2:
-				RGB888_RGB332(pixel, *src);
-				*dst++ = map[pixel];
+				RGB888_RGB332(Pixel, *src);
+				*dst++ = map[Pixel];
 				++src;
 			case 1:
-				RGB888_RGB332(pixel, *src);
-				*dst++ = map[pixel];
+				RGB888_RGB332(Pixel, *src);
+				*dst++ = map[Pixel];
 				++src;
 		}
 		src += srcskip;
@@ -1893,7 +1908,7 @@
 	Uint8 *dst;
 	int srcskip, dstskip;
 	int srcbpp;
-	Uint32 pixel;
+	Uint32 Pixel;
 	int  sR, sG, sB;
 	SDL_PixelFormat *srcfmt;
 
@@ -1912,7 +1927,7 @@
 		while ( height-- ) {
 #ifdef USE_DUFFS_LOOP
 			DUFFS_LOOP(
-				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
+				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
 								sR, sG, sB);
 				if ( 1 ) {
 				  	/* Pack RGB into 8bit pixel */
@@ -1925,7 +1940,7 @@
 			, width);
 #else
 			for ( c=width; c; --c ) {
-				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
+				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
 								sR, sG, sB);
 				if ( 1 ) {
 				  	/* Pack RGB into 8bit pixel */
@@ -1944,7 +1959,7 @@
 		while ( height-- ) {
 #ifdef USE_DUFFS_LOOP
 			DUFFS_LOOP(
-				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
+				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
 								sR, sG, sB);
 				if ( 1 ) {
 				  	/* Pack RGB into 8bit pixel */
@@ -1957,7 +1972,7 @@
 			, width);
 #else
 			for ( c=width; c; --c ) {
-				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
+				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
 								sR, sG, sB);
 				if ( 1 ) {
 				  	/* Pack RGB into 8bit pixel */
@@ -1991,11 +2006,11 @@
 	while ( height-- ) {
 		DUFFS_LOOP(
 		{
-		        Uint32 pixel;
+		        Uint32 Pixel;
 			unsigned sR;
 			unsigned sG;
 			unsigned sB;
-			DISEMBLE_RGB(src, srcbpp, srcfmt, pixel, sR, sG, sB);
+			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
 			dst += dstbpp;
 			src += srcbpp;
@@ -2023,9 +2038,9 @@
 	/* FIXME: should map alpha to [0..255] correctly! */
 	while ( height-- ) {
 		for ( c=width; c; --c ) {
-		        Uint32 pixel;
+		        Uint32 Pixel;
 			unsigned sR, sG, sB, sA;
-			DISEMBLE_RGBA(src, srcbpp, srcfmt, pixel,
+			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
 				      sR, sG, sB, sA);
 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
 				      sR, sG, sB, sA);
@@ -2050,7 +2065,7 @@
 	Uint32 ckey = srcfmt->colorkey;
 	Uint32 rgbmask = ~srcfmt->Amask;
 	int srcbpp;
-	Uint32 pixel;
+	Uint32 Pixel;
 	Uint8  sR, sG, sB;
 
 	/* Set up some basic variables */
@@ -2061,9 +2076,9 @@
 		while ( height-- ) {
 			DUFFS_LOOP(
 			{
-				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
+				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
 								sR, sG, sB);
-				if ( (pixel & rgbmask) != ckey ) {
+				if ( (Pixel & rgbmask) != ckey ) {
 				  	/* Pack RGB into 8bit pixel */
 				  	*dst = ((sR>>5)<<(3+2))|
 						((sG>>5)<<(2)) |
@@ -2080,9 +2095,9 @@
 		while ( height-- ) {
 			DUFFS_LOOP(
 			{
-				DISEMBLE_RGB(src, srcbpp, srcfmt, pixel,
+				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
 								sR, sG, sB);
-				if ( (pixel & rgbmask) != ckey ) {
+				if ( (Pixel & rgbmask) != ckey ) {
 				  	/* Pack RGB into 8bit pixel */
 				  	*dst = palmap[((sR>>5)<<(3+2))|
 							((sG>>5)<<(2))  |
@@ -2147,13 +2162,13 @@
 	while ( height-- ) {
 		DUFFS_LOOP(
 		{
-		        Uint32 pixel;
+		        Uint32 Pixel;
 			unsigned sR;
 			unsigned sG;
 			unsigned sB;
-			RETRIEVE_RGB_PIXEL(src, srcbpp, pixel);
-			if ( pixel != ckey ) {
-			        RGB_FROM_PIXEL(pixel, srcfmt, sR, sG, sB);
+			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
+			if ( Pixel != ckey ) {
+			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
 					      sR, sG, sB, alpha);
 			}
@@ -2181,7 +2196,7 @@
 
 	Uint8 srcbpp;
 	Uint8 dstbpp;
-	Uint32 pixel;
+	Uint32 Pixel;
 	Uint8  sR, sG, sB, sA;
 
 	/* Set up some basic variables */
@@ -2193,9 +2208,9 @@
 	while ( height-- ) {
 		DUFFS_LOOP(
 		{
-			DISEMBLE_RGBA(src, srcbpp, srcfmt, pixel,
+			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
 				      sR, sG, sB, sA);
-			if ( (pixel & rgbmask) != ckey ) {
+			if ( (Pixel & rgbmask) != ckey ) {
 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
 						sR, sG, sB, sA);
 			}