diff src/video/SDL_copy.c @ 2253:6d99edd791bf

Added notes on the next steps for SDL 1.3 Moved fill and copy routines to their own files.
author Sam Lantinga <slouken@libsdl.org>
date Thu, 16 Aug 2007 21:43:19 +0000
parents src/video/SDL_blit_copy.c@b80e3d57941f
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/video/SDL_copy.c	Thu Aug 16 21:43:19 2007 +0000
@@ -0,0 +1,160 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2006 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#include "SDL_video.h"
+#include "SDL_blit.h"
+#include "SDL_copy.h"
+
+
+#ifdef __SSE__
+/* This assumes 16-byte aligned src and dst */
+static __inline__ void
+SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
+{
+    int i;
+
+    __m128 values[4];
+    for (i = len / 64; i--;) {
+        _mm_prefetch(src, _MM_HINT_NTA);
+        values[0] = *(__m128 *) (src + 0);
+        values[1] = *(__m128 *) (src + 16);
+        values[2] = *(__m128 *) (src + 32);
+        values[3] = *(__m128 *) (src + 48);
+        _mm_stream_ps((float *) (dst + 0), values[0]);
+        _mm_stream_ps((float *) (dst + 16), values[1]);
+        _mm_stream_ps((float *) (dst + 32), values[2]);
+        _mm_stream_ps((float *) (dst + 48), values[3]);
+        src += 64;
+        dst += 64;
+    }
+
+    if (len & 63)
+        SDL_memcpy(dst, src, len & 63);
+}
+#endif /* __SSE__ */
+
+#ifdef __MMX__
+/* This assumes 8-byte aligned src and dst */
+static __inline__ void
+SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
+{
+    int i;
+
+    __m64 values[8];
+    for (i = len / 64; i--;) {
+        _mm_prefetch(src, _MM_HINT_NTA);
+        values[0] = *(__m64 *) (src + 0);
+        values[1] = *(__m64 *) (src + 8);
+        values[2] = *(__m64 *) (src + 16);
+        values[3] = *(__m64 *) (src + 24);
+        values[4] = *(__m64 *) (src + 32);
+        values[5] = *(__m64 *) (src + 40);
+        values[6] = *(__m64 *) (src + 48);
+        values[7] = *(__m64 *) (src + 56);
+        _mm_stream_pi((__m64 *) (dst + 0), values[0]);
+        _mm_stream_pi((__m64 *) (dst + 8), values[1]);
+        _mm_stream_pi((__m64 *) (dst + 16), values[2]);
+        _mm_stream_pi((__m64 *) (dst + 24), values[3]);
+        _mm_stream_pi((__m64 *) (dst + 32), values[4]);
+        _mm_stream_pi((__m64 *) (dst + 40), values[5]);
+        _mm_stream_pi((__m64 *) (dst + 48), values[6]);
+        _mm_stream_pi((__m64 *) (dst + 56), values[7]);
+        src += 64;
+        dst += 64;
+    }
+
+    if (len & 63)
+        SDL_memcpy(dst, src, len & 63);
+}
+#endif /* __MMX__ */
+
+void
+SDL_BlitCopy(SDL_BlitInfo * info)
+{
+    Uint8 *src, *dst;
+    int w, h;
+    int srcskip, dstskip;
+
+    w = info->d_width * info->dst->BytesPerPixel;
+    h = info->d_height;
+    src = info->s_pixels;
+    dst = info->d_pixels;
+    srcskip = w + info->s_skip;
+    dstskip = w + info->d_skip;
+
+#ifdef __SSE__
+    if (SDL_HasSSE() && !((uintptr_t) src & 15) && !((uintptr_t) dst & 15)) {
+        while (h--) {
+            SDL_memcpySSE(dst, src, w);
+            src += srcskip;
+            dst += dstskip;
+        }
+        return;
+    }
+#endif
+
+#ifdef __MMX__
+    if (SDL_HasMMX() && !((uintptr_t) src & 7) && !((uintptr_t) dst & 7)) {
+        while (h--) {
+            SDL_memcpyMMX(dst, src, w);
+            src += srcskip;
+            dst += dstskip;
+        }
+        _mm_empty();
+        return;
+    }
+#endif
+
+    while (h--) {
+        SDL_memcpy(dst, src, w);
+        src += srcskip;
+        dst += dstskip;
+    }
+}
+
+void
+SDL_BlitCopyOverlap(SDL_BlitInfo * info)
+{
+    Uint8 *src, *dst;
+    int w, h;
+    int skip;
+
+    w = info->d_width * info->dst->BytesPerPixel;
+    h = info->d_height;
+    src = info->s_pixels;
+    dst = info->d_pixels;
+    skip = w + info->s_skip;
+    if ((dst < src) || (dst >= (src + h * skip))) {
+        SDL_BlitCopy(info);
+    } else {
+        src += ((h - 1) * skip);
+        dst += ((h - 1) * skip);
+        while (h--) {
+            SDL_revcpy(dst, src, w);
+            src -= skip;
+            dst -= skip;
+        }
+    }
+}
+
+/* vi: set ts=4 sw=4 expandtab: */