Mercurial > sdl-ios-xcode
changeset 2239:31835fd24b2b
Added SSE version of SDL_FillRect() for 32-bit ARGB surfaces
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 13 Aug 2007 06:24:56 +0000 |
parents | 93ea84f77d83 |
children | eebaa05f8bd9 |
files | src/video/SDL_surface.c |
diffstat | 1 files changed, 40 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/src/video/SDL_surface.c Mon Aug 13 03:03:23 2007 +0000 +++ b/src/video/SDL_surface.c Mon Aug 13 06:24:56 2007 +0000 @@ -587,20 +587,22 @@ } else { switch (dst->format->BytesPerPixel) { case 2: - for (y = dstrect->h; y; --y) { - Uint16 *pixels = (Uint16 *) row; + { Uint16 c = (Uint16) color; Uint32 cc = (Uint32) c << 16 | c; - int n = dstrect->w; - if ((uintptr_t) pixels & 3) { - *pixels++ = c; - n--; + for (y = dstrect->h; y; --y) { + Uint16 *pixels = (Uint16 *) row; + int n = dstrect->w; + if ((uintptr_t) pixels & 3) { + *pixels++ = c; + n--; + } + if (n >> 1) + SDL_memset4(pixels, cc, n >> 1); + if (n & 1) + pixels[n - 1] = c; + row += dst->pitch; } - if (n >> 1) - SDL_memset4(pixels, cc, n >> 1); - if (n & 1) - pixels[n - 1] = c; - row += dst->pitch; } break; @@ -619,6 +621,33 @@ break; case 4: +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES + if (SDL_HasSSE() && !((uintptr_t) row & 15) && !(dstrect->w & 3)) { + Uint32 cccc[4] __attribute__ ((aligned(16))) = { + color, color, color, color}; + int i, n = dstrect->w / 4; + __asm__ __volatile__(" movdqa (%0), %%xmm0\n":: + "r"(cccc):"memory"); + for (y = dstrect->h; y; --y) { + Uint8 *pixels = row; + for (i = n / 2; i--;) { + /* *INDENT-OFF* */ + __asm__ __volatile__(" prefetchnta 256(%0)\n" + " movdqa %%xmm0, (%0)\n" + " movdqa %%xmm0, 16(%0)\n"::"r"(pixels):"memory"); + /* *INDENT-ON* */ + pixels += 32; + } + if (n & 1) { + __asm__ __volatile__(" movdqa %%xmm0, (%0)\n":: + "r"(pixels):"memory"); + } + row += dst->pitch; + } + __asm__ __volatile__(" emms\n"::); + break; + } +#endif for (y = dstrect->h; y; --y) { SDL_memset4(row, color, dstrect->w); row += dst->pitch;