view src/video/SDL_rendercopy.c @ 2005:45af7d69f8eb

MiNT audio driver cleanups for clamping types and channels to supported values. int32 support now available in one instance.
author Ryan C. Gordon <icculus@icculus.org>
date Fri, 01 Sep 2006 06:32:54 +0000
parents 7387e0514595
children
line wrap: on
line source

/* DO NOT EDIT!  This file is generated by sdlgenblit.pl */
/*
    SDL - Simple DirectMedia Layer
    Copyright (C) 1997-2006 Sam Lantinga

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

    Sam Lantinga
    slouken@libsdl.org
*/
#include "SDL_config.h"

/* *INDENT-OFF* */

#include "SDL_video.h"
#include "SDL_rendercopy.h"

static struct {
    Uint32 src_format;
    Uint32 dst_format;
    int modMode;
    int blendMode;
    int scaleMode;
    SDL_RenderCopyFunc func;
} SDL_RenderCopyFuncTable[] = {
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Scale },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_RGB888_Blend },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Blend_Scale },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGB888_RGB888_Modulate },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Modulate_Scale },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_RGB888_Modulate_Blend },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_RGB888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Scale },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_BGR888_Blend },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Blend_Scale },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGB888_BGR888_Modulate },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Modulate_Scale },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGB888_BGR888_Modulate_Blend },
    { SDL_PIXELFORMAT_RGB888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGB888_BGR888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Scale },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_RGB888_Blend },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Blend_Scale },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGR888_RGB888_Modulate },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Modulate_Scale },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_RGB888_Modulate_Blend },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_RGB888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Scale },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_BGR888_Blend },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Blend_Scale },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGR888_BGR888_Modulate },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Modulate_Scale },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGR888_BGR888_Modulate_Blend },
    { SDL_PIXELFORMAT_BGR888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGR888_BGR888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Scale },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_RGB888_Blend },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Blend_Scale },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ARGB8888_RGB888_Modulate },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Scale },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Scale },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_BGR888_Blend },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Blend_Scale },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ARGB8888_BGR888_Modulate },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Scale },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend },
    { SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Scale },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_RGB888_Blend },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Blend_Scale },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGBA8888_RGB888_Modulate },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Scale },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Scale },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_BGR888_Blend },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Blend_Scale },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_RGBA8888_BGR888_Modulate },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Scale },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend },
    { SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Scale },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_RGB888_Blend },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Blend_Scale },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ABGR8888_RGB888_Modulate },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Scale },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Scale },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_BGR888_Blend },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Blend_Scale },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_ABGR8888_BGR888_Modulate },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Scale },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend },
    { SDL_PIXELFORMAT_ABGR8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Scale },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_RGB888_Blend },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Blend_Scale },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGRA8888_RGB888_Modulate },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Scale },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_RGB888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend_Scale },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Scale },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_BGR888_Blend },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, 0, (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Blend_Scale },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, 0,  SDL_RenderCopy_BGRA8888_BGR888_Modulate },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), 0, SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Scale },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), 0,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend },
    { SDL_PIXELFORMAT_BGRA8888, SDL_PIXELFORMAT_BGR888, (SDL_TEXTUREMODULATE_COLOR | SDL_TEXTUREMODULATE_ALPHA), (SDL_TEXTUREBLENDMODE_MASK | SDL_TEXTUREBLENDMODE_BLEND | SDL_TEXTUREBLENDMODE_ADD | SDL_TEXTUREBLENDMODE_MOD), SDL_TEXTURESCALEMODE_FAST,  SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend_Scale },
};

SDL_RenderCopyFunc SDL_GetRenderCopyFunc(Uint32 src_format, Uint32 dst_format, int modMode, int blendMode, int scaleMode)
{
    int i;

    for (i = 0; i < SDL_arraysize(SDL_RenderCopyFuncTable); ++i) {
        if (src_format != SDL_RenderCopyFuncTable[i].src_format) {
            continue;
        }
        if (dst_format != SDL_RenderCopyFuncTable[i].dst_format) {
            continue;
        }
        if ((modMode & SDL_RenderCopyFuncTable[i].modMode) != modMode) {
            continue;
        }
        if ((blendMode & SDL_RenderCopyFuncTable[i].blendMode) != blendMode) {
            continue;
        }
        if ((scaleMode & SDL_RenderCopyFuncTable[i].scaleMode) != scaleMode) {
            continue;
        }
        return SDL_RenderCopyFuncTable[i].func;
    }
    return NULL;
}

int SDL_RenderCopy_RGB888_RGB888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            *dst = *src;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_RGB888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_RGB888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_BGR888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_BGR888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_BGR888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel; A = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGB888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_RGB888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_RGB888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_RGB888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_BGR888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            *dst = *src;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_BGR888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_BGR888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel; A = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGR888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel; srcA = 0xFF;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_RGB888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_RGB888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_RGB888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_BGR888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_BGR888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_BGR888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            A = (Uint8)(pixel >> 24); R = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); B = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ARGB8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcR = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcB = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_RGB888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_RGB888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_RGB888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_BGR888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_BGR888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_BGR888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            R = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); B = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_RGBA8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcR = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcB = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_RGB888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_RGB888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_RGB888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_BGR888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_BGR888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_BGR888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            A = (Uint8)(pixel >> 24); B = (Uint8)(pixel >> 16); G = (Uint8)(pixel >> 8); R = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_ABGR8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcA = (Uint8)(srcpixel >> 24); srcB = (Uint8)(srcpixel >> 16); srcG = (Uint8)(srcpixel >> 8); srcR = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_RGB888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_RGB888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_RGB888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_RGB888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)R << 16) | ((Uint32)G << 8) | B;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_RGB888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstR = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstB = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstR << 16) | ((Uint32)dstG << 8) | dstB;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_BGR888_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_BGR888_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_BGR888_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_BGR888_Modulate(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            pixel = *src;
            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 pixel;
    Uint32 R, G, B, A;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            pixel = *src;
            B = (Uint8)(pixel >> 24); G = (Uint8)(pixel >> 16); R = (Uint8)(pixel >> 8); A = (Uint8)pixel;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                R = (R * modulateR) / 255;
                G = (G * modulateG) / 255;
                B = (B * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                A = (A * modulateA) / 255;
            }
            pixel = ((Uint32)B << 16) | ((Uint32)G << 8) | R;
            *dst = pixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;

    while (data->dst_h--) {
        Uint32 *src = (Uint32 *)data->src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        while (n--) {
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            ++src;
            ++dst;
        }
        data->src += data->src_pitch;
        data->dst += data->dst_pitch;
    }
    return 0;
}

int SDL_RenderCopy_BGRA8888_BGR888_Modulate_Blend_Scale(SDL_RenderCopyData *data)
{
    const int flags = data->flags;
    const Uint32 modulateR = data->r;
    const Uint32 modulateG = data->g;
    const Uint32 modulateB = data->b;
    const Uint32 modulateA = data->a;
    Uint32 srcpixel;
    Uint32 srcR, srcG, srcB, srcA;
    Uint32 dstpixel;
    Uint32 dstR, dstG, dstB, dstA;
    int srcy, srcx;
    int posy, posx;
    int incy, incx;

    srcy = 0;
    posy = 0;
    incy = (data->src_h << 16) / data->dst_h;
    incx = (data->src_w << 16) / data->dst_w;

    while (data->dst_h--) {
        Uint32 *src;
        Uint32 *dst = (Uint32 *)data->dst;
        int n = data->dst_w;
        srcx = -1;
        posx = 0x10000L;
        while (posy >= 0x10000L) {
            ++srcy;
            posy -= 0x10000L;
        }
        while (n--) {
            if (posx >= 0x10000L) {
                while (posx >= 0x10000L) {
                    ++srcx;
                    posx -= 0x10000L;
                }
                src = (Uint32 *)(data->src + (srcy * data->src_pitch) + (srcx * 4));
            }
            srcpixel = *src;
            srcB = (Uint8)(srcpixel >> 24); srcG = (Uint8)(srcpixel >> 16); srcR = (Uint8)(srcpixel >> 8); srcA = (Uint8)srcpixel;
            dstpixel = *dst;
            dstB = (Uint8)(dstpixel >> 16); dstG = (Uint8)(dstpixel >> 8); dstR = (Uint8)dstpixel; dstA = 0xFF;
            if (flags & SDL_RENDERCOPY_MODULATE_COLOR) {
                srcR = (srcR * modulateR) / 255;
                srcG = (srcG * modulateG) / 255;
                srcB = (srcB * modulateB) / 255;
            }
            if (flags & SDL_RENDERCOPY_MODULATE_ALPHA) {
                srcA = (srcA * modulateA) / 255;
            }
            if (flags & (SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD)) {
                /* This goes away if we ever use premultiplied alpha */
                if (srcA < 255) {
                    srcR = (srcR * srcA) / 255;
                    srcG = (srcG * srcA) / 255;
                    srcB = (srcB * srcA) / 255;
                }
            }
            switch (flags & (SDL_RENDERCOPY_MASK|SDL_RENDERCOPY_BLEND|SDL_RENDERCOPY_ADD|SDL_RENDERCOPY_MOD)) {
            case SDL_RENDERCOPY_MASK:
                if (srcA) {
                    dstR = srcR;
                    dstG = srcG;
                    dstB = srcB;
                }
                break;
            case SDL_RENDERCOPY_BLEND:
                dstR = srcR + ((255 - srcA) * dstR) / 255;
                dstG = srcG + ((255 - srcA) * dstG) / 255;
                dstB = srcB + ((255 - srcA) * dstB) / 255;
                break;
            case SDL_RENDERCOPY_ADD:
                dstR = srcR + dstR; if (dstR > 255) dstR = 255;
                dstG = srcG + dstG; if (dstG > 255) dstG = 255;
                dstB = srcB + dstB; if (dstB > 255) dstB = 255;
                break;
            case SDL_RENDERCOPY_MOD:
                dstR = (srcR * dstR) / 255;
                dstG = (srcG * dstG) / 255;
                dstB = (srcB * dstB) / 255;
                break;
            }
            dstpixel = ((Uint32)dstB << 16) | ((Uint32)dstG << 8) | dstR;
            *dst = dstpixel;
            posx += incx;
            ++dst;
        }
        posy += incy;
        data->dst += data->dst_pitch;
    }
    return 0;
}

/* *INDENT-ON* */

/* vi: set ts=4 sw=4 expandtab: */