changeset 1989:5b5f5de5433f

Optimized the copy blitters a little bit
author Sam Lantinga <slouken@libsdl.org>
date Mon, 28 Aug 2006 14:10:46 +0000
parents 1ee02169bbb0
children 7b573c59cb1f
files src/video/SDL_rendercopy.c src/video/SDL_rendercopy.h src/video/SDL_renderer_sw.c src/video/sdlgenblit.pl
diffstat 4 files changed, 808 insertions(+), 298 deletions(-) [+]
line wrap: on
line diff
--- a/src/video/SDL_rendercopy.c	Mon Aug 28 04:39:37 2006 +0000
+++ b/src/video/SDL_rendercopy.c	Mon Aug 28 14:10:46 2006 +0000
@@ -215,11 +215,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -287,11 +296,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -341,6 +359,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -395,6 +416,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -437,11 +461,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -521,11 +554,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -641,11 +683,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -713,11 +764,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -767,6 +827,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -821,6 +884,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -863,11 +929,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -947,11 +1022,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1067,11 +1151,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1139,11 +1232,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1193,6 +1295,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -1247,6 +1352,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -1289,11 +1397,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1373,11 +1490,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1464,11 +1590,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1536,11 +1671,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1590,6 +1734,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -1644,6 +1791,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -1686,11 +1836,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1770,11 +1929,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1890,11 +2058,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -1962,11 +2139,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2016,6 +2202,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -2070,6 +2259,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -2112,11 +2304,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2196,11 +2397,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2316,11 +2526,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2388,11 +2607,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2442,6 +2670,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -2496,6 +2727,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -2538,11 +2772,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2622,11 +2865,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2742,11 +2994,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2814,11 +3075,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -2868,6 +3138,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -2922,6 +3195,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -2964,11 +3240,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3048,11 +3333,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3168,11 +3462,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3240,11 +3543,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3294,6 +3606,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -3348,6 +3663,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -3390,11 +3708,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3474,11 +3801,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3594,11 +3930,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3666,11 +4011,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3720,6 +4074,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -3774,6 +4131,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -3816,11 +4176,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -3900,11 +4269,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4020,11 +4398,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4092,11 +4479,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4146,6 +4542,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -4200,6 +4599,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -4242,11 +4644,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4326,11 +4737,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4446,11 +4866,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4518,11 +4947,20 @@
             dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4572,6 +5010,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             ++src;
@@ -4626,6 +5067,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
             *dst = pixel;
             posx += incx;
@@ -4668,11 +5112,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4752,11 +5205,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4872,11 +5334,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4944,11 +5415,20 @@
             dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -4998,6 +5478,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             ++src;
@@ -5052,6 +5535,9 @@
                 G = (G * modulateG) / 255;
                 B = (B * modulateB) / 255;
             }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                A = (A * modulateA) / 255;
+            }
             pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
             *dst = pixel;
             posx += incx;
@@ -5094,11 +5580,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
@@ -5178,11 +5673,20 @@
             }
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                srcR = (srcR * srcA) / 255;
-                srcG = (srcG * srcA) / 255;
-                srcB = (srcB * srcA) / 255;
-            }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+                if (srcA < 255) {
+                    srcR = (srcR * srcA) / 255;
+                    srcG = (srcG * srcA) / 255;
+                    srcB = (srcB * srcA) / 255;
+                }
+            }
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (srcA) {
+                    dstR = srcR;
+                    dstG = srcG;
+                    dstB = srcB;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
                 dstR = srcR + ((255 - srcA) * dstR) / 255;
                 dstG = srcG + ((255 - srcA) * dstG) / 255;
--- a/src/video/SDL_rendercopy.h	Mon Aug 28 04:39:37 2006 +0000
+++ b/src/video/SDL_rendercopy.h	Mon Aug 28 14:10:46 2006 +0000
@@ -26,9 +26,10 @@
 
 #define SDL_RENDERCOPY_MODULATE_COLOR   0x0001
 #define SDL_RENDERCOPY_MODULATE_ALPHA   0x0002
-#define SDL_RENDERCOPY_BLEND            0x0010
-#define SDL_RENDERCOPY_ADD              0x0020
-#define SDL_RENDERCOPY_MOD              0x0040
+#define SDL_RENDERCOPY_MASK             0x0010
+#define SDL_RENDERCOPY_BLEND            0x0020
+#define SDL_RENDERCOPY_ADD              0x0040
+#define SDL_RENDERCOPY_MOD              0x0080
 #define SDL_RENDERCOPY_NEAREST          0x0100
 
 typedef struct {
--- a/src/video/SDL_renderer_sw.c	Mon Aug 28 04:39:37 2006 +0000
+++ b/src/video/SDL_renderer_sw.c	Mon Aug 28 14:10:46 2006 +0000
@@ -628,9 +628,9 @@
                 copydata.flags |= SDL_RENDERCOPY_MODULATE_ALPHA;
                 copydata.a = texture->a;
             }
-            if (texture->
-                blendMode & (SDL_TEXTUREBLENDMODE_MASK |
-                             SDL_TEXTUREBLENDMODE_BLEND)) {
+            if (texture->blendMode & SDL_TEXTUREBLENDMODE_MASK) {
+                copydata.flags |= SDL_RENDERCOPY_MASK;
+            } else if (texture->blendMode & SDL_TEXTUREBLENDMODE_BLEND) {
                 copydata.flags |= SDL_RENDERCOPY_BLEND;
             } else if (texture->blendMode & SDL_TEXTUREBLENDMODE_ADD) {
                 copydata.flags |= SDL_RENDERCOPY_ADD;
--- a/src/video/sdlgenblit.pl	Mon Aug 28 04:39:37 2006 +0000
+++ b/src/video/sdlgenblit.pl	Mon Aug 28 14:10:46 2006 +0000
@@ -125,9 +125,10 @@
     print FILE <<__EOF__;
 #define SDL_RENDERCOPY_MODULATE_COLOR   0x0001
 #define SDL_RENDERCOPY_MODULATE_ALPHA   0x0002
-#define SDL_RENDERCOPY_BLEND            0x0010
-#define SDL_RENDERCOPY_ADD              0x0020
-#define SDL_RENDERCOPY_MOD              0x0040
+#define SDL_RENDERCOPY_MASK             0x0010
+#define SDL_RENDERCOPY_BLEND            0x0020
+#define SDL_RENDERCOPY_ADD              0x0040
+#define SDL_RENDERCOPY_MOD              0x0080
 #define SDL_RENDERCOPY_NEAREST          0x0100
 
 typedef struct {
@@ -214,19 +215,35 @@
     my $dst = shift;
     my $modulate = shift;
     my $blend = shift;
+    my $s = "";
+    my $d = "";
+
+    # Nice and easy...
+    if ( $src eq $dst && !$modulate && !$blend ) {
+        print FILE <<__EOF__;
+            *dst = *src;
+__EOF__
+        return;
+    }
+        
+    if ( $blend ) {
+        get_rgba("src", $src);
+        get_rgba("dst", $dst);
+        $s = "src";
+        $d = "dst";
+    } else {
+        get_rgba("", $src);
+    }
+
     if ( $modulate ) {
         print FILE <<__EOF__;
             if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
-                ${src}R = (${src}R * modulateR) / 255;
-                ${src}G = (${src}G * modulateG) / 255;
-                ${src}B = (${src}B * modulateB) / 255;
+                ${s}R = (${s}R * modulateR) / 255;
+                ${s}G = (${s}G * modulateG) / 255;
+                ${s}B = (${s}B * modulateB) / 255;
             }
-__EOF__
-    }
-    if ( $modulate && $blend ) {
-        print FILE <<__EOF__;
             if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
-                ${src}A = (${src}A * modulateA) / 255;
+                ${s}A = (${s}A * modulateA) / 255;
             }
 __EOF__
     }
@@ -234,29 +251,43 @@
         print FILE <<__EOF__;
             if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                 /* This goes away if we ever use premultiplied alpha */
-                ${src}R = (${src}R * ${src}A) / 255;
-                ${src}G = (${src}G * ${src}A) / 255;
-                ${src}B = (${src}B * ${src}A) / 255;
+                if (${s}A < 255) {
+                    ${s}R = (${s}R * ${s}A) / 255;
+                    ${s}G = (${s}G * ${s}A) / 255;
+                    ${s}B = (${s}B * ${s}A) / 255;
+                }
             }
-            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_MASK:
+                if (${s}A) {
+                    ${d}R = ${s}R;
+                    ${d}G = ${s}G;
+                    ${d}B = ${s}B;
+                }
+                break;
             case SDL_RENDERCOPY_BLEND:
-                ${dst}R = ${src}R + ((255 - ${src}A) * ${dst}R) / 255;
-                ${dst}G = ${src}G + ((255 - ${src}A) * ${dst}G) / 255;
-                ${dst}B = ${src}B + ((255 - ${src}A) * ${dst}B) / 255;
+                ${d}R = ${s}R + ((255 - ${s}A) * ${d}R) / 255;
+                ${d}G = ${s}G + ((255 - ${s}A) * ${d}G) / 255;
+                ${d}B = ${s}B + ((255 - ${s}A) * ${d}B) / 255;
                 break;
             case SDL_RENDERCOPY_ADD:
-                ${dst}R = ${src}R + ${dst}R; if (${dst}R > 255) ${dst}R = 255;
-                ${dst}G = ${src}G + ${dst}G; if (${dst}G > 255) ${dst}G = 255;
-                ${dst}B = ${src}B + ${dst}B; if (${dst}B > 255) ${dst}B = 255;
+                ${d}R = ${s}R + ${d}R; if (${d}R > 255) ${d}R = 255;
+                ${d}G = ${s}G + ${d}G; if (${d}G > 255) ${d}G = 255;
+                ${d}B = ${s}B + ${d}B; if (${d}B > 255) ${d}B = 255;
                 break;
             case SDL_RENDERCOPY_MOD:
-                ${dst}R = (${src}R * ${dst}R) / 255;
-                ${dst}G = (${src}G * ${dst}G) / 255;
-                ${dst}B = (${src}B * ${dst}B) / 255;
+                ${d}R = (${s}R * ${d}R) / 255;
+                ${d}G = (${s}G * ${d}G) / 255;
+                ${d}B = (${s}B * ${d}B) / 255;
                 break;
             }
 __EOF__
     }
+    if ( $blend ) {
+        set_rgba("dst", $dst);
+    } else {
+        set_rgba("", $dst);
+    }
 }
 
 sub output_copyfunc
@@ -325,20 +356,7 @@
         print FILE <<__EOF__;
             }
 __EOF__
-        if ( $blend ) {
-            get_rgba("src", $src);
-            get_rgba("dst", $dst);
-            output_copycore("src", "dst", $modulate, $blend);
-            set_rgba("dst", $dst);
-        } elsif ( $modulate || $src ne $dst ) {
-            get_rgba("", $src);
-            output_copycore("", "", $modulate, $blend);
-            set_rgba("", $dst);
-        } else {
-            print FILE <<__EOF__;
-            *dst = *src;
-__EOF__
-        }
+        output_copycore($src, $dst, $modulate, $blend);
         print FILE <<__EOF__;
             posx += incx;
             ++dst;
@@ -356,20 +374,7 @@
         int n = data->dst_w;
         while (n--) {
 __EOF__
-        if ( $blend ) {
-            get_rgba("src", $src);
-            get_rgba("dst", $dst);
-            output_copycore("src", "dst", $modulate, $blend);
-            set_rgba("dst", $dst);
-        } elsif ( $modulate || $src ne $dst ) {
-            get_rgba("", $src);
-            output_copycore("", "", $modulate, $blend);
-            set_rgba("", $dst);
-        } else {
-            print FILE <<__EOF__;
-            *dst = *src;
-__EOF__
-        }
+        output_copycore($src, $dst, $modulate, $blend);
         print FILE <<__EOF__;
             ++src;
             ++dst;