diff src/video/SDL_rendercopy.c @ 1989:5b5f5de5433f

Optimized the copy blitters a little bit
author Sam Lantinga <slouken@libsdl.org>
date Mon, 28 Aug 2006 14:10:46 +0000
parents 8055185ae4ed
children 7387e0514595
line wrap: on
line diff
--- a/src/video/SDL_rendercopy.c	Mon Aug 28 04:39:37 2006 +0000
+++ b/src/video/SDL_rendercopy.c	Mon Aug 28 14:10:46 2006 +0000
@@ -215,11 +215,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -287,11 +296,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -341,6 +359,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -395,6 +416,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -437,11 +461,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -521,11 +554,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -641,11 +683,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -713,11 +764,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -767,6 +827,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -821,6 +884,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -863,11 +929,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -947,11 +1022,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1067,11 +1151,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1139,11 +1232,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1193,6 +1295,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -1247,6 +1352,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -1289,11 +1397,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1373,11 +1490,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1464,11 +1590,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1536,11 +1671,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1590,6 +1734,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -1644,6 +1791,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -1686,11 +1836,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1770,11 +1929,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1890,11 +2058,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1962,11 +2139,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2016,6 +2202,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -2070,6 +2259,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -2112,11 +2304,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2196,11 +2397,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2316,11 +2526,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2388,11 +2607,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2442,6 +2670,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -2496,6 +2727,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -2538,11 +2772,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2622,11 +2865,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2742,11 +2994,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2814,11 +3075,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2868,6 +3138,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -2922,6 +3195,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -2964,11 +3240,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3048,11 +3333,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3168,11 +3462,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3240,11 +3543,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3294,6 +3606,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -3348,6 +3663,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -3390,11 +3708,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3474,11 +3801,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3594,11 +3930,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3666,11 +4011,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3720,6 +4074,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -3774,6 +4131,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -3816,11 +4176,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3900,11 +4269,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4020,11 +4398,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4092,11 +4479,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4146,6 +4542,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -4200,6 +4599,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -4242,11 +4644,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4326,11 +4737,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4446,11 +4866,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4518,11 +4947,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4572,6 +5010,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -4626,6 +5067,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -4668,11 +5112,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4752,11 +5205,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4872,11 +5334,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4944,11 +5415,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4998,6 +5478,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -5052,6 +5535,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -5094,11 +5580,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -5178,11 +5673,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;