Mercurial > sdl-ios-xcode
changeset 2253:6d99edd791bf
Added notes on the next steps for SDL 1.3
Moved fill and copy routines to their own files.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Thu, 16 Aug 2007 21:43:19 +0000 |
parents | b80e3d57941f |
children | 79e00f5561f4 |
files | NOTES src/video/SDL_blit.c src/video/SDL_blit_copy.c src/video/SDL_blit_copy.h src/video/SDL_copy.c src/video/SDL_copy.h src/video/SDL_fill.c src/video/SDL_surface.c |
diffstat | 8 files changed, 562 insertions(+), 521 deletions(-) [+] |
line wrap: on
line diff
--- a/NOTES Thu Aug 16 06:40:34 2007 +0000 +++ b/NOTES Thu Aug 16 21:43:19 2007 +0000 @@ -157,6 +157,16 @@ streaming textures are lockable and may have system memory pixels available. SDL_compat will use a streaming video texture, and will never be HWSURFACE, but may be PREALLOC, if system memory pixels are available. +*** DONE Thu Aug 16 14:18:42 PDT 2007 The software renderer will be abstracted so the surface management can be used by any renderer that provides functions to copy surfaces to the window. + +Blitters... +---- +Copy blit and fill rect are optimized with MMX and SSE now. + +Here are the pieces we still need: +- Merging SDL texture capabilities into the SDL surface system +- Generic fallback blitter architecture +- Custom fast path blitters
--- a/src/video/SDL_blit.c Thu Aug 16 06:40:34 2007 +0000 +++ b/src/video/SDL_blit.c Thu Aug 16 21:43:19 2007 +0000 @@ -24,7 +24,7 @@ #include "SDL_video.h" #include "SDL_sysvideo.h" #include "SDL_blit.h" -#include "SDL_blit_copy.h" +#include "SDL_copy.h" #include "SDL_RLEaccel_c.h" #include "SDL_pixels_c.h"
--- a/src/video/SDL_blit_copy.c Thu Aug 16 06:40:34 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,158 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2006 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#include "SDL_video.h" -#include "SDL_blit.h" -#include "SDL_blit_copy.h" - - -#ifdef __MMX__ -static __inline__ void -SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len) -{ - int i; - - __m64 values[8]; - for (i = len / 64; i--;) { - _mm_prefetch(src, _MM_HINT_NTA); - values[0] = *(__m64 *) (src + 0); - values[1] = *(__m64 *) (src + 8); - values[2] = *(__m64 *) (src + 16); - values[3] = *(__m64 *) (src + 24); - values[4] = *(__m64 *) (src + 32); - values[5] = *(__m64 *) (src + 40); - values[6] = *(__m64 *) (src + 48); - values[7] = *(__m64 *) (src + 56); - _mm_stream_pi((__m64 *) (dst + 0), values[0]); - _mm_stream_pi((__m64 *) (dst + 8), values[1]); - _mm_stream_pi((__m64 *) (dst + 16), values[2]); - _mm_stream_pi((__m64 *) (dst + 24), values[3]); - _mm_stream_pi((__m64 *) (dst + 32), values[4]); - _mm_stream_pi((__m64 *) (dst + 40), values[5]); - _mm_stream_pi((__m64 *) (dst + 48), values[6]); - _mm_stream_pi((__m64 *) (dst + 56), values[7]); - src += 64; - dst += 64; - } - - if (len & 63) - SDL_memcpy(dst, src, len & 63); -} -#endif /* __MMX__ */ - -#ifdef __SSE__ -static __inline__ void -SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len) -{ - int i; - - __m128 values[4]; - for (i = len / 64; i--;) { - _mm_prefetch(src, _MM_HINT_NTA); - values[0] = *(__m128 *) (src + 0); - values[1] = *(__m128 *) (src + 16); - values[2] = *(__m128 *) (src + 32); - values[3] = *(__m128 *) (src + 48); - _mm_stream_ps((float *) (dst + 0), values[0]); - _mm_stream_ps((float *) (dst + 16), values[1]); - _mm_stream_ps((float *) (dst + 32), values[2]); - _mm_stream_ps((float *) (dst + 48), values[3]); - src += 64; - dst += 64; - } - - if (len & 63) - SDL_memcpy(dst, src, len & 63); -} -#endif /* __SSE__ */ - -void -SDL_BlitCopy(SDL_BlitInfo * info) -{ - Uint8 *src, *dst; - int w, h; - int srcskip, dstskip; - - w = info->d_width * info->dst->BytesPerPixel; - h = info->d_height; - src = info->s_pixels; - dst = info->d_pixels; - srcskip = w + info->s_skip; - dstskip = w + info->d_skip; - -#ifdef __SSE__ - if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) { - while (h--) { - SDL_memcpySSE(dst, src, w); - src += srcskip; - dst += dstskip; - } - return; - } -#endif - -#ifdef __MMX__ - if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) { - while (h--) { - SDL_memcpyMMX(dst, src, w); - src += srcskip; - dst += dstskip; - } - _mm_empty(); - return; - } -#endif - - while (h--) { - SDL_memcpy(dst, src, w); - src += srcskip; - dst += dstskip; - } -} - -void -SDL_BlitCopyOverlap(SDL_BlitInfo * info) -{ - Uint8 *src, *dst; - int w, h; - int skip; - - w = info->d_width * info->dst->BytesPerPixel; - h = info->d_height; - src = info->s_pixels; - dst = info->d_pixels; - skip = w + info->s_skip; - if ((dst < src) || (dst >= (src + h * skip))) { - SDL_BlitCopy(info); - } else { - src += ((h - 1) * skip); - dst += ((h - 1) * skip); - while (h--) { - SDL_revcpy(dst, src, w); - src -= skip; - dst -= skip; - } - } -} - -/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_blit_copy.h Thu Aug 16 06:40:34 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2006 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ - -void SDL_BlitCopy(SDL_BlitInfo * info); -void SDL_BlitCopyOverlap(SDL_BlitInfo * info); - -/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/video/SDL_copy.c Thu Aug 16 21:43:19 2007 +0000 @@ -0,0 +1,160 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2006 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +#include "SDL_video.h" +#include "SDL_blit.h" +#include "SDL_copy.h" + + +#ifdef __SSE__ +/* This assumes 16-byte aligned src and dst */ +static __inline__ void +SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len) +{ + int i; + + __m128 values[4]; + for (i = len / 64; i--;) { + _mm_prefetch(src, _MM_HINT_NTA); + values[0] = *(__m128 *) (src + 0); + values[1] = *(__m128 *) (src + 16); + values[2] = *(__m128 *) (src + 32); + values[3] = *(__m128 *) (src + 48); + _mm_stream_ps((float *) (dst + 0), values[0]); + _mm_stream_ps((float *) (dst + 16), values[1]); + _mm_stream_ps((float *) (dst + 32), values[2]); + _mm_stream_ps((float *) (dst + 48), values[3]); + src += 64; + dst += 64; + } + + if (len & 63) + SDL_memcpy(dst, src, len & 63); +} +#endif /* __SSE__ */ + +#ifdef __MMX__ +/* This assumes 8-byte aligned src and dst */ +static __inline__ void +SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len) +{ + int i; + + __m64 values[8]; + for (i = len / 64; i--;) { + _mm_prefetch(src, _MM_HINT_NTA); + values[0] = *(__m64 *) (src + 0); + values[1] = *(__m64 *) (src + 8); + values[2] = *(__m64 *) (src + 16); + values[3] = *(__m64 *) (src + 24); + values[4] = *(__m64 *) (src + 32); + values[5] = *(__m64 *) (src + 40); + values[6] = *(__m64 *) (src + 48); + values[7] = *(__m64 *) (src + 56); + _mm_stream_pi((__m64 *) (dst + 0), values[0]); + _mm_stream_pi((__m64 *) (dst + 8), values[1]); + _mm_stream_pi((__m64 *) (dst + 16), values[2]); + _mm_stream_pi((__m64 *) (dst + 24), values[3]); + _mm_stream_pi((__m64 *) (dst + 32), values[4]); + _mm_stream_pi((__m64 *) (dst + 40), values[5]); + _mm_stream_pi((__m64 *) (dst + 48), values[6]); + _mm_stream_pi((__m64 *) (dst + 56), values[7]); + src += 64; + dst += 64; + } + + if (len & 63) + SDL_memcpy(dst, src, len & 63); +} +#endif /* __MMX__ */ + +void +SDL_BlitCopy(SDL_BlitInfo * info) +{ + Uint8 *src, *dst; + int w, h; + int srcskip, dstskip; + + w = info->d_width * info->dst->BytesPerPixel; + h = info->d_height; + src = info->s_pixels; + dst = info->d_pixels; + srcskip = w + info->s_skip; + dstskip = w + info->d_skip; + +#ifdef __SSE__ + if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) { + while (h--) { + SDL_memcpySSE(dst, src, w); + src += srcskip; + dst += dstskip; + } + return; + } +#endif + +#ifdef __MMX__ + if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) { + while (h--) { + SDL_memcpyMMX(dst, src, w); + src += srcskip; + dst += dstskip; + } + _mm_empty(); + return; + } +#endif + + while (h--) { + SDL_memcpy(dst, src, w); + src += srcskip; + dst += dstskip; + } +} + +void +SDL_BlitCopyOverlap(SDL_BlitInfo * info) +{ + Uint8 *src, *dst; + int w, h; + int skip; + + w = info->d_width * info->dst->BytesPerPixel; + h = info->d_height; + src = info->s_pixels; + dst = info->d_pixels; + skip = w + info->s_skip; + if ((dst < src) || (dst >= (src + h * skip))) { + SDL_BlitCopy(info); + } else { + src += ((h - 1) * skip); + dst += ((h - 1) * skip); + while (h--) { + SDL_revcpy(dst, src, w); + src -= skip; + dst -= skip; + } + } +} + +/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/video/SDL_copy.h Thu Aug 16 21:43:19 2007 +0000 @@ -0,0 +1,26 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2006 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ + +void SDL_BlitCopy(SDL_BlitInfo * info); +void SDL_BlitCopyOverlap(SDL_BlitInfo * info); + +/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/video/SDL_fill.c Thu Aug 16 21:43:19 2007 +0000 @@ -0,0 +1,365 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2006 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +#include "SDL_video.h" +#include "SDL_blit.h" + + +#ifdef __SSE__ +/* *INDENT-OFF* */ + +#ifdef _MSC_VER +#define SSE_BEGIN \ + __m128 c128; \ + c128.m128_u32[0] = color; \ + c128.m128_u32[1] = color; \ + c128.m128_u32[2] = color; \ + c128.m128_u32[3] = color; +#else +#define SSE_BEGIN \ + DECLARE_ALIGNED(Uint32, cccc[4], 16); \ + cccc[0] = color; \ + cccc[1] = color; \ + cccc[2] = color; \ + cccc[3] = color; \ + __m128 c128 = *(__m128 *)cccc; +#endif + +#define SSE_WORK \ + for (i = n / 64; i--;) { \ + _mm_stream_ps((float *)(p+0), c128); \ + _mm_stream_ps((float *)(p+16), c128); \ + _mm_stream_ps((float *)(p+32), c128); \ + _mm_stream_ps((float *)(p+48), c128); \ + p += 64; \ + } + +#define SSE_END + +#define DEFINE_SSE_FILLRECT(bpp, type) \ +static void \ +SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ +{ \ + SSE_BEGIN; \ + \ + while (h--) { \ + int i, n = w * bpp; \ + Uint8 *p = pixels; \ + \ + if (n > 15) { \ + int adjust = 16 - ((uintptr_t)p & 15); \ + if (adjust < 16) { \ + n -= adjust; \ + adjust /= bpp; \ + while(adjust--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + SSE_WORK; \ + } \ + if (n & 63) { \ + int remainder = (n & 63); \ + remainder /= bpp; \ + while(remainder--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + pixels += pitch; \ + } \ + \ + SSE_END; \ +} + +DEFINE_SSE_FILLRECT(1, Uint8) +DEFINE_SSE_FILLRECT(2, Uint16) +DEFINE_SSE_FILLRECT(4, Uint32) + +/* *INDENT-ON* */ +#endif /* __SSE__ */ + +#ifdef __MMX__ +/* *INDENT-OFF* */ + +#define MMX_BEGIN \ + __m64 c64 = _mm_set_pi32(color, color) + +#define MMX_WORK \ + for (i = n / 64; i--;) { \ + _mm_stream_pi((__m64 *)(p+0), c64); \ + _mm_stream_pi((__m64 *)(p+8), c64); \ + _mm_stream_pi((__m64 *)(p+16), c64); \ + _mm_stream_pi((__m64 *)(p+24), c64); \ + _mm_stream_pi((__m64 *)(p+32), c64); \ + _mm_stream_pi((__m64 *)(p+40), c64); \ + _mm_stream_pi((__m64 *)(p+48), c64); \ + _mm_stream_pi((__m64 *)(p+56), c64); \ + p += 64; \ + } + +#define MMX_END \ + _mm_empty() + +#define DEFINE_MMX_FILLRECT(bpp, type) \ +static void \ +SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ +{ \ + MMX_BEGIN; \ + \ + while (h--) { \ + int i, n = w * bpp; \ + Uint8 *p = pixels; \ + \ + if (n > 7) { \ + int adjust = 8 - ((uintptr_t)p & 7); \ + if (adjust < 8) { \ + n -= adjust; \ + adjust /= bpp; \ + while(adjust--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + MMX_WORK; \ + } \ + if (n & 63) { \ + int remainder = (n & 63); \ + remainder /= bpp; \ + while(remainder--) { \ + *((type *)p) = (type)color; \ + p += bpp; \ + } \ + } \ + pixels += pitch; \ + } \ + \ + MMX_END; \ +} + +DEFINE_MMX_FILLRECT(1, Uint8) +DEFINE_MMX_FILLRECT(2, Uint16) +DEFINE_MMX_FILLRECT(4, Uint32) + +/* *INDENT-ON* */ +#endif /* __MMX__ */ + +static void +SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h) +{ + while (h--) { + int n = w; + Uint8 *p = pixels; + + if (n > 3) { + switch ((uintptr_t) p & 3) { + case 1: + *p++ = (Uint8) color; + --n; + case 2: + *p++ = (Uint8) color; + --n; + case 3: + *p++ = (Uint8) color; + --n; + } + SDL_memset4(p, color, (n >> 2)); + } + if (n & 3) { + p += (n & ~3); + switch (n & 3) { + case 3: + *p++ = (Uint8) color; + case 2: + *p++ = (Uint8) color; + case 1: + *p++ = (Uint8) color; + } + } + pixels += pitch; + } +} + +static void +SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h) +{ + while (h--) { + int n = w; + Uint16 *p = (Uint16 *) pixels; + + if (n > 1) { + if ((uintptr_t) p & 2) { + *p++ = (Uint16) color; + --n; + } + SDL_memset4(p, color, (n >> 1)); + } + if (n & 1) { + p[n - 1] = (Uint16) color; + } + pixels += pitch; + } +} + +static void +SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h) +{ + Uint8 r = (Uint8) ((color >> 16) & 0xFF); + Uint8 g = (Uint8) ((color >> 8) & 0xFF); + Uint8 b = (Uint8) (color & 0xFF); + + while (h--) { + int n = w; + Uint8 *p = pixels; + + while (n--) { + *p++ = r; + *p++ = g; + *p++ = b; + } + pixels += pitch; + } +} + +static void +SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h) +{ + while (h--) { + SDL_memset4(pixels, color, w); + pixels += pitch; + } +} + +/* + * This function performs a fast fill of the given rectangle with 'color' + */ +int +SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color) +{ + Uint8 *pixels; + + /* This function doesn't work on surfaces < 8 bpp */ + if (dst->format->BitsPerPixel < 8) { + SDL_SetError("SDL_FillRect(): Unsupported surface format"); + return (-1); + } + + /* If 'dstrect' == NULL, then fill the whole surface */ + if (dstrect) { + /* Perform clipping */ + if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) { + return (0); + } + } else { + dstrect = &dst->clip_rect; + } + + /* Perform software fill */ + if (!dst->pixels) { + SDL_SetError("SDL_FillRect(): You must lock the surface"); + return (-1); + } + + pixels = + (Uint8 *) dst->pixels + dstrect->y * dst->pitch + + dstrect->x * dst->format->BytesPerPixel; + + switch (dst->format->BytesPerPixel) { + case 1: + { + color |= (color << 8); + color |= (color << 16); +#ifdef __SSE__ + if (SDL_HasSSE()) { + SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif +#ifdef __MMX__ + if (SDL_HasMMX()) { + SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif + SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h); + break; + } + + case 2: + { + color |= (color << 16); +#ifdef __SSE__ + if (SDL_HasSSE()) { + SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif +#ifdef __MMX__ + if (SDL_HasMMX()) { + SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif + SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h); + break; + } + + case 3: + /* 24-bit RGB is a slow path, at least for now. */ + { + SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h); + break; + } + + case 4: + { +#ifdef __SSE__ + if (SDL_HasSSE()) { + SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif +#ifdef __MMX__ + if (SDL_HasMMX()) { + SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w, + dstrect->h); + break; + } +#endif + SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h); + break; + } + } + + SDL_UnlockSurface(dst); + + /* We're done! */ + return (0); +} + +/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_surface.c Thu Aug 16 06:40:34 2007 +0000 +++ b/src/video/SDL_surface.c Thu Aug 16 21:43:19 2007 +0000 @@ -509,342 +509,6 @@ return 0; } -#ifdef __SSE__ -/* *INDENT-OFF* */ - -#ifdef _MSC_VER -#define SSE_BEGIN \ - __m128 c128; \ - c128.m128_u32[0] = color; \ - c128.m128_u32[1] = color; \ - c128.m128_u32[2] = color; \ - c128.m128_u32[3] = color; -#else -#define SSE_BEGIN \ - DECLARE_ALIGNED(Uint32, cccc[4], 16); \ - cccc[0] = color; \ - cccc[1] = color; \ - cccc[2] = color; \ - cccc[3] = color; \ - __m128 c128 = *(__m128 *)cccc; -#endif - -#define SSE_WORK \ - for (i = n / 64; i--;) { \ - _mm_stream_ps((float *)(p+0), c128); \ - _mm_stream_ps((float *)(p+16), c128); \ - _mm_stream_ps((float *)(p+32), c128); \ - _mm_stream_ps((float *)(p+48), c128); \ - p += 64; \ - } - -#define SSE_END - -#define DEFINE_SSE_FILLRECT(bpp, type) \ -static void \ -SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ -{ \ - SSE_BEGIN; \ - \ - while (h--) { \ - int i, n = w * bpp; \ - Uint8 *p = pixels; \ - \ - if (n > 15) { \ - int adjust = 16 - ((uintptr_t)p & 15); \ - if (adjust < 16) { \ - n -= adjust; \ - adjust /= bpp; \ - while(adjust--) { \ - *((type *)p) = (type)color; \ - p += bpp; \ - } \ - } \ - SSE_WORK; \ - } \ - if (n & 63) { \ - int remainder = (n & 63); \ - remainder /= bpp; \ - while(remainder--) { \ - *((type *)p) = (type)color; \ - p += bpp; \ - } \ - } \ - pixels += pitch; \ - } \ - \ - SSE_END; \ -} - -DEFINE_SSE_FILLRECT(1, Uint8) -DEFINE_SSE_FILLRECT(2, Uint16) -DEFINE_SSE_FILLRECT(4, Uint32) - -/* *INDENT-ON* */ -#endif /* __SSE__ */ - -#ifdef __MMX__ -/* *INDENT-OFF* */ - -#define MMX_BEGIN \ - __m64 c64 = _mm_set_pi32(color, color) - -#define MMX_WORK \ - for (i = n / 64; i--;) { \ - _mm_stream_pi((__m64 *)(p+0), c64); \ - _mm_stream_pi((__m64 *)(p+8), c64); \ - _mm_stream_pi((__m64 *)(p+16), c64); \ - _mm_stream_pi((__m64 *)(p+24), c64); \ - _mm_stream_pi((__m64 *)(p+32), c64); \ - _mm_stream_pi((__m64 *)(p+40), c64); \ - _mm_stream_pi((__m64 *)(p+48), c64); \ - _mm_stream_pi((__m64 *)(p+56), c64); \ - p += 64; \ - } - -#define MMX_END \ - _mm_empty() - -#define DEFINE_MMX_FILLRECT(bpp, type) \ -static void \ -SDL_FillRect##bpp##MMX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ -{ \ - MMX_BEGIN; \ - \ - while (h--) { \ - int i, n = w * bpp; \ - Uint8 *p = pixels; \ - \ - if (n > 7) { \ - int adjust = 8 - ((uintptr_t)p & 7); \ - if (adjust < 8) { \ - n -= adjust; \ - adjust /= bpp; \ - while(adjust--) { \ - *((type *)p) = (type)color; \ - p += bpp; \ - } \ - } \ - MMX_WORK; \ - } \ - if (n & 63) { \ - int remainder = (n & 63); \ - remainder /= bpp; \ - while(remainder--) { \ - *((type *)p) = (type)color; \ - p += bpp; \ - } \ - } \ - pixels += pitch; \ - } \ - \ - MMX_END; \ -} - -DEFINE_MMX_FILLRECT(1, Uint8) -DEFINE_MMX_FILLRECT(2, Uint16) -DEFINE_MMX_FILLRECT(4, Uint32) - -/* *INDENT-ON* */ -#endif /* __MMX__ */ - -static void -SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h) -{ - while (h--) { - int n = w; - Uint8 *p = pixels; - - if (n > 3) { - switch ((uintptr_t) p & 3) { - case 1: - *p++ = (Uint8) color; - --n; - case 2: - *p++ = (Uint8) color; - --n; - case 3: - *p++ = (Uint8) color; - --n; - } - SDL_memset4(p, color, (n >> 2)); - } - if (n & 3) { - p += (n & ~3); - switch (n & 3) { - case 3: - *p++ = (Uint8) color; - case 2: - *p++ = (Uint8) color; - case 1: - *p++ = (Uint8) color; - } - } - pixels += pitch; - } -} - -static void -SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h) -{ - while (h--) { - int n = w; - Uint16 *p = (Uint16 *) pixels; - - if (n > 1) { - if ((uintptr_t) p & 2) { - *p++ = (Uint16) color; - --n; - } - SDL_memset4(p, color, (n >> 1)); - } - if (n & 1) { - p[n - 1] = (Uint16) color; - } - pixels += pitch; - } -} - -static void -SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h) -{ - Uint8 r = (Uint8) (color & 0xFF); - Uint8 g = (Uint8) ((color >> 8) & 0xFF); - Uint8 b = (Uint8) ((color >> 16) & 0xFF); - - while (h--) { - int n = w; - Uint8 *p = pixels; - - while (n--) { - *p++ = r; - *p++ = g; - *p++ = b; - } - pixels += pitch; - } -} - -static void -SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h) -{ - while (h--) { - SDL_memset4(pixels, color, w); - pixels += pitch; - } -} - -/* - * This function performs a fast fill of the given rectangle with 'color' - */ -int -SDL_FillRect(SDL_Surface * dst, SDL_Rect * dstrect, Uint32 color) -{ - Uint8 *pixels; - - /* This function doesn't work on surfaces < 8 bpp */ - if (dst->format->BitsPerPixel < 8) { - SDL_SetError("Fill rect on unsupported surface format"); - return (-1); - } - - /* If 'dstrect' == NULL, then fill the whole surface */ - if (dstrect) { - /* Perform clipping */ - if (!SDL_IntersectRect(dstrect, &dst->clip_rect, dstrect)) { - return (0); - } - } else { - dstrect = &dst->clip_rect; - } - - /* Perform software fill */ - if (SDL_LockSurface(dst) != 0) { - return (-1); - } - - pixels = - (Uint8 *) dst->pixels + dstrect->y * dst->pitch + - dstrect->x * dst->format->BytesPerPixel; - - switch (dst->format->BytesPerPixel) { - case 1: - { - color |= (color << 8); - color |= (color << 16); -#ifdef __SSE__ - if (SDL_HasSSE()) { - SDL_FillRect1SSE(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif -#ifdef __MMX__ - if (SDL_HasMMX()) { - SDL_FillRect1MMX(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif - SDL_FillRect1(pixels, dst->pitch, color, dstrect->w, dstrect->h); - break; - } - - case 2: - { - color |= (color << 16); -#ifdef __SSE__ - if (SDL_HasSSE()) { - SDL_FillRect2SSE(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif -#ifdef __MMX__ - if (SDL_HasMMX()) { - SDL_FillRect2MMX(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif - SDL_FillRect2(pixels, dst->pitch, color, dstrect->w, dstrect->h); - break; - } - - case 3: - /* 24-bit RGB is a slow path, at least for now. */ - { - SDL_FillRect3(pixels, dst->pitch, color, dstrect->w, dstrect->h); - break; - } - - case 4: - { -#ifdef __SSE__ - if (SDL_HasSSE()) { - SDL_FillRect4SSE(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif -#ifdef __MMX__ - if (SDL_HasMMX()) { - SDL_FillRect4MMX(pixels, dst->pitch, color, dstrect->w, - dstrect->h); - break; - } -#endif - SDL_FillRect4(pixels, dst->pitch, color, dstrect->w, dstrect->h); - break; - } - } - - SDL_UnlockSurface(dst); - - /* We're done! */ - return (0); -} - /* * Lock a surface to directly access the pixels */