Mercurial > sdl-ios-xcode
diff src/video/SDL_copy.c @ 2253:6d99edd791bf
Added notes on the next steps for SDL 1.3
Moved fill and copy routines to their own files.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Thu, 16 Aug 2007 21:43:19 +0000 |
parents | src/video/SDL_blit_copy.c@b80e3d57941f |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/video/SDL_copy.c Thu Aug 16 21:43:19 2007 +0000 @@ -0,0 +1,160 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2006 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +#include "SDL_video.h" +#include "SDL_blit.h" +#include "SDL_copy.h" + + +#ifdef __SSE__ +/* This assumes 16-byte aligned src and dst */ +static __inline__ void +SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len) +{ + int i; + + __m128 values[4]; + for (i = len / 64; i--;) { + _mm_prefetch(src, _MM_HINT_NTA); + values[0] = *(__m128 *) (src + 0); + values[1] = *(__m128 *) (src + 16); + values[2] = *(__m128 *) (src + 32); + values[3] = *(__m128 *) (src + 48); + _mm_stream_ps((float *) (dst + 0), values[0]); + _mm_stream_ps((float *) (dst + 16), values[1]); + _mm_stream_ps((float *) (dst + 32), values[2]); + _mm_stream_ps((float *) (dst + 48), values[3]); + src += 64; + dst += 64; + } + + if (len & 63) + SDL_memcpy(dst, src, len & 63); +} +#endif /* __SSE__ */ + +#ifdef __MMX__ +/* This assumes 8-byte aligned src and dst */ +static __inline__ void +SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len) +{ + int i; + + __m64 values[8]; + for (i = len / 64; i--;) { + _mm_prefetch(src, _MM_HINT_NTA); + values[0] = *(__m64 *) (src + 0); + values[1] = *(__m64 *) (src + 8); + values[2] = *(__m64 *) (src + 16); + values[3] = *(__m64 *) (src + 24); + values[4] = *(__m64 *) (src + 32); + values[5] = *(__m64 *) (src + 40); + values[6] = *(__m64 *) (src + 48); + values[7] = *(__m64 *) (src + 56); + _mm_stream_pi((__m64 *) (dst + 0), values[0]); + _mm_stream_pi((__m64 *) (dst + 8), values[1]); + _mm_stream_pi((__m64 *) (dst + 16), values[2]); + _mm_stream_pi((__m64 *) (dst + 24), values[3]); + _mm_stream_pi((__m64 *) (dst + 32), values[4]); + _mm_stream_pi((__m64 *) (dst + 40), values[5]); + _mm_stream_pi((__m64 *) (dst + 48), values[6]); + _mm_stream_pi((__m64 *) (dst + 56), values[7]); + src += 64; + dst += 64; + } + + if (len & 63) + SDL_memcpy(dst, src, len & 63); +} +#endif /* __MMX__ */ + +void +SDL_BlitCopy(SDL_BlitInfo * info) +{ + Uint8 *src, *dst; + int w, h; + int srcskip, dstskip; + + w = info->d_width * info->dst->BytesPerPixel; + h = info->d_height; + src = info->s_pixels; + dst = info->d_pixels; + srcskip = w + info->s_skip; + dstskip = w + info->d_skip; + +#ifdef __SSE__ + if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) { + while (h--) { + SDL_memcpySSE(dst, src, w); + src += srcskip; + dst += dstskip; + } + return; + } +#endif + +#ifdef __MMX__ + if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) { + while (h--) { + SDL_memcpyMMX(dst, src, w); + src += srcskip; + dst += dstskip; + } + _mm_empty(); + return; + } +#endif + + while (h--) { + SDL_memcpy(dst, src, w); + src += srcskip; + dst += dstskip; + } +} + +void +SDL_BlitCopyOverlap(SDL_BlitInfo * info) +{ + Uint8 *src, *dst; + int w, h; + int skip; + + w = info->d_width * info->dst->BytesPerPixel; + h = info->d_height; + src = info->s_pixels; + dst = info->d_pixels; + skip = w + info->s_skip; + if ((dst < src) || (dst >= (src + h * skip))) { + SDL_BlitCopy(info); + } else { + src += ((h - 1) * skip); + dst += ((h - 1) * skip); + while (h--) { + SDL_revcpy(dst, src, w); + src -= skip; + dst -= skip; + } + } +} + +/* vi: set ts=4 sw=4 expandtab: */