diff src/video/SDL_blit_A.c @ 1:cf2af46e9e2a

Changes since SDL 1.2.0 release
author Sam Lantinga <slouken@lokigames.com>
date Thu, 26 Apr 2001 16:50:19 +0000
parents 74212992fb08
children e8157fcb3114
line wrap: on
line diff
--- a/src/video/SDL_blit_A.c	Thu Apr 26 16:45:43 2001 +0000
+++ b/src/video/SDL_blit_A.c	Thu Apr 26 16:50:19 2001 +0000
@@ -195,8 +195,8 @@
 	}
 }
 
-/* fast RGB888->(A)RGB888 blending with surface alpha */
-static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
+/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
+static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
 {
 	int width = info->d_width;
 	int height = info->d_height;
@@ -204,32 +204,58 @@
 	int srcskip = info->s_skip >> 2;
 	Uint32 *dstp = (Uint32 *)info->d_pixels;
 	int dstskip = info->d_skip >> 2;
-	SDL_PixelFormat *srcfmt = info->src;
-	unsigned alpha = srcfmt->alpha;
 
 	while(height--) {
 	    DUFFS_LOOP4({
-		Uint32 s;
-		Uint32 d;
-		Uint32 s1;
-		Uint32 d1;
-		s = *srcp;
-		d = *dstp;
-		s1 = s & 0xff00ff;
-		d1 = d & 0xff00ff;
-		d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
-		s &= 0xff00;
-		d &= 0xff00;
-		d = (d + ((s - d) * alpha >> 8)) & 0xff00;
-		*dstp = d1 | d | 0xff000000;
-		++srcp;
-		++dstp;
+		    Uint32 s = *srcp++;
+		    Uint32 d = *dstp;
+		    *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
+			       + (s & d & 0x00010101)) | 0xff000000;
 	    }, width);
 	    srcp += srcskip;
 	    dstp += dstskip;
 	}
 }
 
+/* fast RGB888->(A)RGB888 blending with surface alpha */
+static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
+{
+	unsigned alpha = info->src->alpha;
+	if(alpha == 128) {
+		BlitRGBtoRGBSurfaceAlpha128(info);
+	} else {
+		int width = info->d_width;
+		int height = info->d_height;
+		Uint32 *srcp = (Uint32 *)info->s_pixels;
+		int srcskip = info->s_skip >> 2;
+		Uint32 *dstp = (Uint32 *)info->d_pixels;
+		int dstskip = info->d_skip >> 2;
+
+		while(height--) {
+			DUFFS_LOOP4({
+				Uint32 s;
+				Uint32 d;
+				Uint32 s1;
+				Uint32 d1;
+				s = *srcp;
+				d = *dstp;
+				s1 = s & 0xff00ff;
+				d1 = d & 0xff00ff;
+				d1 = (d1 + ((s1 - d1) * alpha >> 8))
+				     & 0xff00ff;
+				s &= 0xff00;
+				d &= 0xff00;
+				d = (d + ((s - d) * alpha >> 8)) & 0xff00;
+				*dstp = d1 | d | 0xff000000;
+				++srcp;
+				++dstp;
+			}, width);
+			srcp += srcskip;
+			dstp += dstskip;
+		}
+	}
+}
+
 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
 static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info)
 {
@@ -277,8 +303,18 @@
 	}
 }
 
-/* fast RGB565->RGB565 blending with surface alpha */
-static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
+/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
+
+/* blend a single 16 bit pixel at 50% */
+#define BLEND16_50(d, s, mask)						\
+	((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff)))
+
+/* blend two 16 bit pixels at 50% */
+#define BLEND2x16_50(d, s, mask)					     \
+	(((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
+	 + (s & d & (~(mask | mask << 16))))
+
+static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
 {
 	int width = info->d_width;
 	int height = info->d_height;
@@ -286,56 +322,163 @@
 	int srcskip = info->s_skip >> 1;
 	Uint16 *dstp = (Uint16 *)info->d_pixels;
 	int dstskip = info->d_skip >> 1;
-	unsigned alpha = info->src->alpha >> 3; /* downscale alpha to 5 bits */
 
 	while(height--) {
-	    DUFFS_LOOP4({
-		Uint32 s = *srcp++;
-		Uint32 d = *dstp;
-		/*
-		 * shift out the middle component (green) to the high 16
-		 * bits, and process all three RGB components at the same
-		 * time.
-		 */
-		s = (s | s << 16) & 0x07e0f81f;
-		d = (d | d << 16) & 0x07e0f81f;
-		d += (s - d) * alpha >> 5;
-		d &= 0x07e0f81f;
-		*dstp++ = d | d >> 16;
-	    }, width);
-	    srcp += srcskip;
-	    dstp += dstskip;
+		if(((unsigned long)srcp ^ (unsigned long)dstp) & 2) {
+			/*
+			 * Source and destination not aligned, pipeline it.
+			 * This is mostly a win for big blits but no loss for
+			 * small ones
+			 */
+			Uint32 prev_sw;
+			int w = width;
+
+			/* handle odd destination */
+			if((unsigned long)dstp & 2) {
+				Uint16 d = *dstp, s = *srcp;
+				*dstp = BLEND16_50(d, s, mask);
+				dstp++;
+				srcp++;
+				w--;
+			}
+			srcp++;	/* srcp is now 32-bit aligned */
+
+			/* bootstrap pipeline with first halfword */
+			prev_sw = ((Uint32 *)srcp)[-1];
+
+			while(w > 1) {
+				Uint32 sw, dw, s;
+				sw = *(Uint32 *)srcp;
+				dw = *(Uint32 *)dstp;
+				if(SDL_BYTEORDER == SDL_BIG_ENDIAN)
+					s = (prev_sw << 16) + (sw >> 16);
+				else
+					s = (prev_sw >> 16) + (sw << 16);
+				prev_sw = sw;
+				*(Uint32 *)dstp = BLEND2x16_50(dw, s, mask);
+				dstp += 2;
+				srcp += 2;
+				w -= 2;
+			}
+
+			/* final pixel if any */
+			if(w) {
+				Uint16 d = *dstp, s;
+				if(SDL_BYTEORDER == SDL_BIG_ENDIAN)
+					s = prev_sw;
+				else
+					s = prev_sw >> 16;
+				*dstp = BLEND16_50(d, s, mask);
+				srcp++;
+				dstp++;
+			}
+			srcp += srcskip - 1;
+			dstp += dstskip;
+		} else {
+			/* source and destination are aligned */
+			int w = width;
+
+			/* first odd pixel? */
+			if((unsigned long)srcp & 2) {
+				Uint16 d = *dstp, s = *srcp;
+				*dstp = BLEND16_50(d, s, mask);
+				srcp++;
+				dstp++;
+				w--;
+			}
+			/* srcp and dstp are now 32-bit aligned */
+
+			while(w > 1) {
+				Uint32 sw = *(Uint32 *)srcp;
+				Uint32 dw = *(Uint32 *)dstp;
+				*(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask);
+				srcp += 2;
+				dstp += 2;
+				w -= 2;
+			}
+
+			/* last odd pixel? */
+			if(w) {
+				Uint16 d = *dstp, s = *srcp;
+				*dstp = BLEND16_50(d, s, mask);
+				srcp++;
+				dstp++;
+			}
+			srcp += srcskip;
+			dstp += dstskip;
+		}
+	}
+}
+
+/* fast RGB565->RGB565 blending with surface alpha */
+static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
+{
+	unsigned alpha = info->src->alpha;
+	if(alpha == 128) {
+		Blit16to16SurfaceAlpha128(info, 0xf7de);
+	} else {
+		int width = info->d_width;
+		int height = info->d_height;
+		Uint16 *srcp = (Uint16 *)info->s_pixels;
+		int srcskip = info->s_skip >> 1;
+		Uint16 *dstp = (Uint16 *)info->d_pixels;
+		int dstskip = info->d_skip >> 1;
+		alpha >>= 3;	/* downscale alpha to 5 bits */
+
+		while(height--) {
+			DUFFS_LOOP4({
+				Uint32 s = *srcp++;
+				Uint32 d = *dstp;
+				/*
+				 * shift out the middle component (green) to
+				 * the high 16 bits, and process all three RGB
+				 * components at the same time.
+				 */
+				s = (s | s << 16) & 0x07e0f81f;
+				d = (d | d << 16) & 0x07e0f81f;
+				d += (s - d) * alpha >> 5;
+				d &= 0x07e0f81f;
+				*dstp++ = d | d >> 16;
+			}, width);
+			srcp += srcskip;
+			dstp += dstskip;
+		}
 	}
 }
 
 /* fast RGB555->RGB555 blending with surface alpha */
 static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
 {
-	int width = info->d_width;
-	int height = info->d_height;
-	Uint16 *srcp = (Uint16 *)info->s_pixels;
-	int srcskip = info->s_skip >> 1;
-	Uint16 *dstp = (Uint16 *)info->d_pixels;
-	int dstskip = info->d_skip >> 1;
-	unsigned alpha = info->src->alpha >> 3; /* downscale alpha to 5 bits */
+	unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */
+	if(alpha == 128) {
+		Blit16to16SurfaceAlpha128(info, 0xfbde);
+	} else {
+		int width = info->d_width;
+		int height = info->d_height;
+		Uint16 *srcp = (Uint16 *)info->s_pixels;
+		int srcskip = info->s_skip >> 1;
+		Uint16 *dstp = (Uint16 *)info->d_pixels;
+		int dstskip = info->d_skip >> 1;
+		alpha >>= 3;		/* downscale alpha to 5 bits */
 
-	while(height--) {
-	    DUFFS_LOOP4({
-		Uint32 s = *srcp++;
-		Uint32 d = *dstp;
-		/*
-		 * shift out the middle component (green) to the high 16
-		 * bits, and process all three RGB components at the same
-		 * time.
-		 */
-		s = (s | s << 16) & 0x03e07c1f;
-		d = (d | d << 16) & 0x03e07c1f;
-		d += (s - d) * alpha >> 5;
-		d &= 0x03e07c1f;
-		*dstp++ = d | d >> 16;
-	    }, width);
-	    srcp += srcskip;
-	    dstp += dstskip;
+		while(height--) {
+			DUFFS_LOOP4({
+				Uint32 s = *srcp++;
+				Uint32 d = *dstp;
+				/*
+				 * shift out the middle component (green) to
+				 * the high 16 bits, and process all three RGB
+				 * components at the same time.
+				 */
+				s = (s | s << 16) & 0x03e07c1f;
+				d = (d | d << 16) & 0x03e07c1f;
+				d += (s - d) * alpha >> 5;
+				d &= 0x03e07c1f;
+				*dstp++ = d | d >> 16;
+			}, width);
+			srcp += srcskip;
+			dstp += dstskip;
+		}
 	}
 }