diff src/video/SDL_blit.c @ 2247:93994f65c74c

Removed hermes since it's LGPL and not compatible with a commercial license. Prepping for using MMX and SSE intrinsics instead of inline assembly. .. except for memcpy equivalents which only get faster if they can exploit the parallelism of loading into multiple SIMD registers. :)
author Sam Lantinga <slouken@libsdl.org>
date Wed, 15 Aug 2007 08:21:10 +0000
parents c121d94672cb
children 5a58b57b6724
line wrap: on
line diff
--- a/src/video/SDL_blit.c	Wed Aug 15 04:04:17 2007 +0000
+++ b/src/video/SDL_blit.c	Wed Aug 15 08:21:10 2007 +0000
@@ -24,6 +24,7 @@
 #include "SDL_video.h"
 #include "SDL_sysvideo.h"
 #include "SDL_blit.h"
+#include "SDL_blit_copy.h"
 #include "SDL_RLEaccel_c.h"
 #include "SDL_pixels_c.h"
 
@@ -106,111 +107,64 @@
     return (okay ? 0 : -1);
 }
 
-#ifdef MMX_ASMBLIT
-static __inline__ void
-SDL_memcpyMMX(Uint8 * to, const Uint8 * from, int len)
+#ifdef __MACOSX__
+#include <sys/sysctl.h>
+
+static SDL_bool SDL_UseAltivecPrefetch()
 {
-    int i;
+    const char key[] = "hw.l3cachesize";
+    u_int64_t result = 0;
+    size_t typeSize = sizeof(result);
 
-    for (i = 0; i < len / 8; i++) {
-        __asm__ __volatile__("	movq (%0), %%mm0\n"
-                             "	movq %%mm0, (%1)\n"::"r"(from),
-                             "r"(to):"memory");
-        from += 8;
-        to += 8;
+    if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) {
+        return SDL_TRUE;
+    } else {
+        return SDL_FALSE;
     }
-    if (len & 7)
-        SDL_memcpy(to, from, len & 7);
 }
+#else
+static SDL_bool SDL_UseAltivecPrefetch()
+{
+    /* Just guess G4 */
+    return SDL_TRUE;
+}
+#endif /* __MACOSX__ */
 
-static __inline__ void
-SDL_memcpySSE(Uint8 * to, const Uint8 * from, int len)
+static SDL_loblit SDL_ChooseBlitFunc(SDL_BlitEntry *entries, int count)
 {
     int i;
-
-    __asm__ __volatile__("	prefetchnta (%0)\n"
-                         "	prefetchnta 64(%0)\n"
-                         "	prefetchnta 128(%0)\n"
-                         "	prefetchnta 192(%0)\n"::"r"(from));
+    static Uint32 features = 0xffffffff;
 
-    for (i = 0; i < len / 8; i++) {
-        __asm__ __volatile__("	prefetchnta 256(%0)\n"
-                             "	movq (%0), %%mm0\n"
-                             "	movntq %%mm0, (%1)\n"::"r"(from),
-                             "r"(to):"memory");
-        from += 8;
-        to += 8;
-    }
-    if (len & 7)
-        SDL_memcpy(to, from, len & 7);
-}
-#endif
-
-static void
-SDL_BlitCopy(SDL_BlitInfo * info)
-{
-    Uint8 *src, *dst;
-    int w, h;
-    int srcskip, dstskip;
+    if (features == 0xffffffff) {
+        features = SDL_BLIT_ANY;
 
-    w = info->d_width * info->dst->BytesPerPixel;
-    h = info->d_height;
-    src = info->s_pixels;
-    dst = info->d_pixels;
-    srcskip = w + info->s_skip;
-    dstskip = w + info->d_skip;
-#ifdef MMX_ASMBLIT
-    if (SDL_HasSSE()) {
-        while (h--) {
-            SDL_memcpySSE(dst, src, w);
-            src += srcskip;
-            dst += dstskip;
-        }
-        __asm__ __volatile__("	emms\n"::);
-    } else if (SDL_HasMMX()) {
-        while (h--) {
-            SDL_memcpyMMX(dst, src, w);
-            src += srcskip;
-            dst += dstskip;
-        }
-        __asm__ __volatile__("	emms\n"::);
-    } else
-#endif
-        while (h--) {
-            SDL_memcpy(dst, src, w);
-            src += srcskip;
-            dst += dstskip;
-        }
-}
-
-static void
-SDL_BlitCopyOverlap(SDL_BlitInfo * info)
-{
-    Uint8 *src, *dst;
-    int w, h;
-    int srcskip, dstskip;
-
-    w = info->d_width * info->dst->BytesPerPixel;
-    h = info->d_height;
-    src = info->s_pixels;
-    dst = info->d_pixels;
-    srcskip = w + info->s_skip;
-    dstskip = w + info->d_skip;
-    if (dst < src) {
-        while (h--) {
-            SDL_memcpy(dst, src, w);
-            src += srcskip;
-            dst += dstskip;
-        }
-    } else {
-        src += ((h - 1) * srcskip);
-        dst += ((h - 1) * dstskip);
-        while (h--) {
-            SDL_revcpy(dst, src, w);
-            src -= srcskip;
-            dst -= dstskip;
+        /* Provide an override for testing .. */
+        const char *override = SDL_getenv("SDL_BLIT_FEATURES");
+        if (override) {
+            SDL_sscanf(override, "%u", &features);
+        } else {
+            if (SDL_HasMMX()) {
+                features |= SDL_BLIT_MMX;
+            }
+            if (SDL_HasSSE()) {
+                features |= SDL_BLIT_SSE;
+            }
+            if (SDL_HasAltivec()) {
+                if (SDL_UseAltivecPrefetch()) {
+                    features |= SDL_BLIT_ALTIVEC_PREFETCH;
+                } else {
+                    features |= SDL_BLIT_ALTIVEC_NOPREFETCH;
+                }
+            }
         }
     }
+
+    for (i = count; i > 0; --i) {
+        if (features & entries[i].features) {
+            return entries[i].blit;
+        }
+    }
+    return entries[0].blit;
 }
 
 /* Figure out which of many blit routines to set up on a surface */
@@ -237,11 +191,11 @@
 
     /* Check for special "identity" case -- copy blit */
     if (surface->map->identity && blit_index == 0) {
-        surface->map->sw_data->blit = SDL_BlitCopy;
-
         /* Handle overlapping blits on the same surface */
         if (surface == surface->map->dst) {
             surface->map->sw_data->blit = SDL_BlitCopyOverlap;
+        } else {
+            surface->map->sw_data->blit = SDL_BlitCopy;
         }
     } else {
         if (surface->format->BitsPerPixel < 8) {