diff src/video/SDL_rendercopy.c @ 1985:8055185ae4ed

Added source color and alpha modulation support. Added perl script to generate optimized render copy functions.
author Sam Lantinga <slouken@libsdl.org>
date Mon, 28 Aug 2006 03:17:39 +0000
parents
children 5b5f5de5433f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_rendercopy.c	Mon Aug 28 03:17:39 2006 +0000
@@ -0,0 +1,5215 @@
+/* DO NOT EDIT!  This file is generated by sdlgenblit.pl */
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+/* *INDENT-OFF* */
+
+#include "SDL_video.h"
+#include "SDL_rendercopy.h"
+
+static struct {
+    Uint32 src_format;
+    Uint32 dst_format;
+    int modMode;
+    int blendMode;
+    int scaleMode;
+    SDL_RenderCopyFunc func;
+} SDL_RenderCopyFuncTable[] = {
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_RGB888_Blend },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGB888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_RGB888_BGR888 },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_BGR888_Blend },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGB888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_BGR888_RGB888 },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_RGB888_Blend },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGR888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_BGR888_Blend },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGR888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_ARGB8888_RGB888 },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_RGB888_Blend },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ARGB8888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_ARGB8888_BGR888 },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_BGR888_Blend },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ARGB8888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_RGBA8888_RGB888 },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_RGB888_Blend },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGBA8888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_RGBA8888_BGR888 },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_BGR888_Blend },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGBA8888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_ABGR8888_RGB888 },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_RGB888_Blend },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ABGR8888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_ABGR8888_BGR888 },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_BGR888_Blend },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ABGR8888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, 0, 0,  SDL_RenderCopy_BGRA8888_RGB888 },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_RGB888_Blend },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Blend_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGRA8888_RGB888_Modulate },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, 0, 0,  SDL_RenderCopy_BGRA8888_BGR888 },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_BGR888_Blend },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Blend_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGRA8888_BGR888_Modulate },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Scale },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend },
+    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend_Scale },
+};
+
+SDL_RenderCopyFunc SDL_GetRenderCopyFunc(Uint32 src_format, Uint32 dst_format, int modMode, int blendMode, int scaleMode)
+{
+    int i;
+
+    for (i = 0; i < SDL_arraysize(SDL_RenderCopyFuncTable); ++i) {
+        if (src_format != SDL_RenderCopyFuncTable[i].src_format) {
+            continue;
+        }
+        if (dst_format != SDL_RenderCopyFuncTable[i].dst_format) {
+            continue;
+        }
+        if ((modMode & SDL_RenderCopyFuncTable[i].modMode) != modMode) {
+            continue;
+        }
+        if ((blendMode & SDL_RenderCopyFuncTable[i].blendMode) != blendMode) {
+            continue;
+        }
+        if ((scaleMode & SDL_RenderCopyFuncTable[i].scaleMode) != scaleMode) {
+            continue;
+        }
+        return SDL_RenderCopyFuncTable[i].func;
+    }
+    return NULL;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            *dst = *src;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGB888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            *dst = *src;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGR888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Modulate(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 pixel;
+    Uint32 R, G, B, A;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            pixel = *src;
+            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                R = (R * modulateR) / 255;
+                G = (G * modulateG) / 255;
+                B = (B * modulateB) / 255;
+            }
+            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
+            *dst = pixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    while (data->dst_h--) {
+        Uint32 *src = (Uint32 *)data->src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        while (n--) {
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            ++src;
+            ++dst;
+        }
+        data->src += data->src_pitch;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
+{
+    const int flags = data->flags;
+    const Uint32 modulateR = data->r;
+    const Uint32 modulateG = data->g;
+    const Uint32 modulateB = data->b;
+    const Uint32 modulateA = data->a;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+    int srcy, srcx;
+    int posy, posx;
+    int incy, incx;
+
+    srcy = 0;
+    posy = 0;
+    incy = (data->src_h << 16) / data->dst_h;
+    incx = (data->src_w << 16) / data->dst_w;
+
+    while (data->dst_h--) {
+        Uint32 *src;
+        Uint32 *dst = (Uint32 *)data->dst;
+        int n = data->dst_w;
+        srcx = -1;
+        posx = 0x10000L;
+        while (posy >= 0x10000L) {
+            ++srcy;
+            posy -= 0x10000L;
+        }
+        while (n--) {
+            if (posx >= 0x10000L) {
+                while (posx >= 0x10000L) {
+                    ++srcx;
+                    posx -= 0x10000L;
+                }
+                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
+            }
+            srcpixel = *src;
+            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
+            dstpixel = *dst;
+            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
+            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
+                srcR = (srcR * modulateR) / 255;
+                srcG = (srcG * modulateG) / 255;
+                srcB = (srcB * modulateB) / 255;
+            }
+            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
+                srcA = (srcA * modulateA) / 255;
+            }
+            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
+                /* This goes away if we ever use premultiplied alpha */
+                srcR = (srcR * srcA) / 255;
+                srcG = (srcG * srcA) / 255;
+                srcB = (srcB * srcA) / 255;
+            }
+            switch (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
+            case SDL_RENDERCOPY_BLEND:
+                dstR = srcR + ((255 - srcA) * dstR) / 255;
+                dstG = srcG + ((255 - srcA) * dstG) / 255;
+                dstB = srcB + ((255 - srcA) * dstB) / 255;
+                break;
+            case SDL_RENDERCOPY_ADD:
+                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
+                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
+                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
+                break;
+            case SDL_RENDERCOPY_MOD:
+                dstR = (srcR * dstR) / 255;
+                dstG = (srcG * dstG) / 255;
+                dstB = (srcB * dstB) / 255;
+                break;
+            }
+            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
+            *dst = dstpixel;
+            posx += incx;
+            ++dst;
+        }
+        posy += incy;
+        data->dst += data->dst_pitch;
+    }
+    return 0;
+}
+
+/* *INDENT-ON* */
+
+/* vi: set ts=4 sw=4 expandtab: */