Mercurial > sdl-ios-xcode

/*
    SDL - Simple DirectMedia Layer
    Copyright (C) 1997-2009 Sam Lantinga

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

    Sam Lantinga
    slouken@libsdl.org
*/
#include "SDL_config.h"

#include "SDL_video.h"
#include "SDL_blit.h"


#ifdef __SSE__
/* *INDENT-OFF* */

#ifdef _MSC_VER
#define SSE_BEGIN \
    __m128 c128; \
    c128.m128_u32[0] = color; \
    c128.m128_u32[1] = color; \
    c128.m128_u32[2] = color; \
    c128.m128_u32[3] = color;
#else
#define SSE_BEGIN \
    DECLARE_ALIGNED(Uint32, cccc[4], 16); \
    cccc[0] = color; \
    cccc[1] = color; \
    cccc[2] = color; \
    cccc[3] = color; \
    __m128 c128 = *(__m128 *)cccc;
#endif

#define SSE_WORK \
    for (i = n / 64; i--;) { \
        _mm_stream_ps((float *)(p+0), c128); \
        _mm_stream_ps((float *)(p+16), c128); \
        _mm_stream_ps((float *)(p+32), c128); \
        _mm_stream_ps((float *)(p+48), c128); \
        p += 64; \
    }

#define SSE_END

#define DEFINE_SSE_FILLRECT(bpp, type) \
static void \
SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
{ \
    SSE_BEGIN; \
 \
    while (h--) { \
        int i, n = w * bpp; \
        Uint8 *p = pixels; \
 \
        if (n > 63) { \
            int adjust = 16 - ((uintptr_t)p & 15); \
            if (adjust < 16) { \
                n -= adjust; \
                adjust /= bpp; \
                while (adjust--) { \
                    *((type *)p) = (type)color; \
                    p += bpp; \
                } \
            } \
            SSE_WORK; \
        } \
        if (n & 63) { \
            int remainder = (n & 63); \
            remainder /= bpp; \
            while (remainder--) { \
                *((type *)p) = (type)color; \
                p += bpp; \
            } \
        } \
        pixels += pitch; \
    } \
 \
    SSE_END; \
}

static void
SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
{
    SSE_BEGIN;

    while (h--) {
        int i, n = w;
        Uint8 *p = pixels;

        if (n > 63) {
            int adjust = 16 - ((uintptr_t)p & 15);
            if (adjust) {
                n -= adjust;
                SDL_memset(p, color, adjust);
                p += adjust;
            }
            SSE_WORK;
        }
        if (n & 63) {
            int remainder = (n & 63);
            SDL_memset(p, color, remainder);
            p += remainder;
        }
        pixels += pitch;
    }

    SSE_END;
}
/*DEFINE_SSE_FILLRECT(1, Uint8)*/
DEFINE_SSE_FILLRECT(2, Uint16)
DEFINE_SSE_FILLRECT(4, Uint32)

/* *INDENT-ON* */
#endif /* __SSE__ */

#ifdef __MMX__
/* *INDENT-OFF* */

#define MMX_BEGIN \
    __m64 c64 = _mm_set_pi32(color, color)

#define MMX_WORK \
    for (i = n / 64; i--;) { \
        _mm_stream_pi((__m64 *)(p+0), c64); \
        _mm_stream_pi((__m64 *)(p+8), c64); \
        _mm_stream_pi((__m64 *)(p+16), c64); \
        _mm_stream_pi((__m64 *)(p+24), c64); \
        _mm_stream_pi((__m64 *)(p+32), c64); \
        _mm_stream_pi((__m64 *)(p+40), c64); \
        _mm_stream_pi((__m64 *)(p+48), c64); \
        _mm_stream_pi((__m64 *)(p+56), c64); \
        p += 64; \
    }

#define MMX_END \
    _mm_empty()

#define DEFINE_MMX_FILLRECT(bpp, type) \
static void \
SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
{ \
    MMX_BEGIN; \
 \
    while (h--) { \
        int i, n = w * bpp; \
        Uint8 *p = pixels; \
 \
        if (n > 63) { \
            int adjust = 8 - ((uintptr_t)p & 7); \
            if (adjust < 8) { \
                n -= adjust; \
                adjust /= bpp; \
                while (adjust--) { \
                    *((type *)p) = (type)color; \
                    p += bpp; \
                } \
            } \
            MMX_WORK; \
        } \
        if (n & 63) { \
            int remainder = (n & 63); \
            remainder /= bpp; \
            while (remainder--) { \
                *((type *)p) = (type)color; \
                p += bpp; \
            } \
        } \
        pixels += pitch; \
    } \
 \
    MMX_END; \
}

static void
SDL_FillRect1MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
{
    MMX_BEGIN;

    while (h--) {
        int i, n = w;
        Uint8 *p = pixels;

        if (n > 63) {
            int adjust = 8 - ((uintptr_t)p & 7);
            if (adjust) {
                n -= adjust;
                SDL_memset(p, color, adjust);
                p += adjust;
            }
            MMX_WORK;
        }
        if (n & 63) {
            int remainder = (n & 63);
            SDL_memset(p, color, remainder);
            p += remainder;
        }
        pixels += pitch;
    }

    MMX_END;
}
/*DEFINE_MMX_FILLRECT(1, Uint8)*/
DEFINE_MMX_FILLRECT(2, Uint16)
DEFINE_MMX_FILLRECT(4, Uint32)

/* *INDENT-ON* */
#endif /* __MMX__ */

static void
SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
    while (h--) {
        int n = w;
        Uint8 *p = pixels;

        if (n > 3) {
            switch ((uintptr_t) p & 3) {
            case 1:
                *p++ = (Uint8) color;
                --n;
            case 2:
                *p++ = (Uint8) color;
                --n;
            case 3:
                *p++ = (Uint8) color;
                --n;
            }
            SDL_memset4(p, color, (n >> 2));
        }
        if (n & 3) {
            p += (n & ~3);
            switch (n & 3) {
            case 3:
                *p++ = (Uint8) color;
            case 2:
                *p++ = (Uint8) color;
            case 1:
                *p++ = (Uint8) color;
            }
        }
        pixels += pitch;
    }
}

static void
SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
    while (h--) {
        int n = w;
        Uint16 *p = (Uint16 *) pixels;

        if (n > 1) {
            if ((uintptr_t) p & 2) {
                *p++ = (Uint16) color;
                --n;
            }
            SDL_memset4(p, color, (n >> 1));
        }
        if (n & 1) {
            p[n - 1] = (Uint16) color;
        }
        pixels += pitch;
    }
}

static void
SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
    Uint8 r = (Uint8) ((color >> 16) & 0xFF);
    Uint8 g = (Uint8) ((color >> 8) & 0xFF);
    Uint8 b = (Uint8) (color & 0xFF);

    while (h--) {
        int n = w;
        Uint8 *p = pixels;

        while (n--) {
            *p++ = r;
            *p++ = g;
            *p++ = b;
        }
        pixels += pitch;
    }
}

static void
SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
{
    while (h--) {
        SDL_memset4(pixels, color, w);
        pixels += pitch;
    }
}

/*
 * This function performs a fast fill of the given rectangle with 'color'
 */
int
SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color)
{
    Uint8 *pixels;

    /* This function doesn't work on surfaces < 8 bpp */
    if (dst->format->BitsPerPixel < 8) {
        SDL_SetError("SDL_FillRect(): Unsupported surface format");
        return (-1);
    }

    /* If 'dstrect' == NULL, then fill the whole surface */
    if (dstrect) {
        /* Perform clipping */
        if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) {
            return (0);
        }
    } else {
        dstrect = &dst->clip_rect;
    }

    /* Perform software fill */
    if (!dst->pixels) {
        SDL_SetError("SDL_FillRect(): You must lock the surface");
        return (-1);
    }

    pixels =
        (Uint8 *) dst->pixels + dstrect->y * dst->pitch +
        dstrect->x * dst->format->BytesPerPixel;

    switch (dst->format->BytesPerPixel) {
    case 1:
        {
            color |= (color << 8);
            color |= (color << 16);
#ifdef __SSE__
            if (SDL_HasSSE()) {
                SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w,
                                 dstrect->h);
                break;
            }
#endif
#ifdef __MMX__
            if (SDL_HasMMX()) {
                SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w,
                                 dstrect->h);
                break;
            }
#endif
            SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h);
            break;
        }

    case 2:
        {
            color |= (color << 16);
#ifdef __SSE__
            if (SDL_HasSSE()) {
                SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w,
                                 dstrect->h);
                break;
            }
#endif
#ifdef __MMX__
            if (SDL_HasMMX()) {
                SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w,
                                 dstrect->h);
                break;
            }
#endif
            SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h);
            break;
        }

    case 3:
        /* 24-bit RGB is a slow path, at least for now. */
        {
            SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h);
            break;
        }

    case 4:
        {
#ifdef __SSE__
            if (SDL_HasSSE()) {
                SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w,
                                 dstrect->h);
                break;
            }
#endif
#ifdef __MMX__
            if (SDL_HasMMX()) {
                SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w,
                                 dstrect->h);
                break;
            }
#endif
            SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h);
            break;
        }
    }

    /* We're done! */
    return (0);
}

/* vi: set ts=4 sw=4 expandtab: */
author	Sam Lantinga <slouken@libsdl.org>
date	Sun, 15 Nov 2009 09:21:46 +0000
parents	7e30c2dc7783
children	0267b8b1595c