changeset 3261:72b542f34739

The new, cleaner, version of the atomic operations. The dummy code is what you should start working with to port atomic ops. The linux code appears to be complete and *should* be the base of all Unix and GCC based versions. The macosx and win32 versions are currently just copies of the dummy code. I will begin working on the windows version as soon as this check in is done. I need someone to work on the Mac OS X version. I'm afraid that this check in will break QNX (Sorry!)
author Bob Pendleton <bob@pendleton.com>
date Thu, 17 Sep 2009 20:35:12 +0000
parents 85bf3f297b5c
children e3d33bd599eb
files include/SDL_atomic.h src/atomic/dummy/SDL_atomic.c src/atomic/linux/SDL_atomic.c src/atomic/macosx/SDL_atomic.c src/atomic/win32/SDL_atomic.c test/testatomic.c
diffstat 6 files changed, 748 insertions(+), 2840 deletions(-) [+]
line wrap: on
line diff
--- a/include/SDL_atomic.h	Mon Sep 07 16:04:44 2009 +0000
+++ b/include/SDL_atomic.h	Thu Sep 17 20:35:12 2009 +0000
@@ -18,6 +18,8 @@
 
     Sam Lantinga
     slouken@libsdl.org
+
+    Contributed by Bob Pendleton, bob@pendleton.com
  */
 
 /**
@@ -46,48 +48,50 @@
  * processor specific atomic operations. When possible they are
  * implemented as true processor specific atomic operations. When that
  * is not possible the are implemented using locks that *do* use the
- * available atomic operations. In rare cases they may be implemented
- * using SDL's mutex fuctions.
+ * available atomic operations.
+ *
+ * At the very minimum spin locks must be implemented. Without spin
+ * locks it is not possible (AFAICT) to emulate the rest of the atomic
+ * operations.
  */
 
 /* Function prototypes */
 
+/**
+ * SDL AtomicLock.
+ * 
+ * The spin lock functions and type are required and can not be
+ * emulated because they are used in the emulation code.
+ */
+
+typedef volatile Uint32 SDL_SpinLock;
+
+/**
+ * \fn  void SDL_AtomicLock(SDL_SpinLock *lock);
+ *
+ * \brief Lock a spin lock by setting it to a none zero value.
+ *
+ * \param lock points to the lock.
+ *
+ */
+extern DECLSPEC void SDLCALL SDL_AtomicLock(SDL_SpinLock *lock);
+
+/**
+ * \fn  void SDL_AtomicUnlock(SDL_SpinLock *lock);
+ *
+ * \brief Unlock a spin lock by setting it to 0. Always returns immediately
+ *
+ * \param lock points to the lock.
+ *
+ */
+extern DECLSPEC void SDLCALL SDL_AtomicUnlock(SDL_SpinLock *lock);
+
 /* 32 bit atomic operations */
 
 /**
- * \fn int SDL_AtomicExchange32(volatile Uint32 * ptr, Uint32 value)
- *
- * \brief Atomically exchange two 32 bit values.
- *
- * \return the value point to by ptr.
- *
- * \param ptr points to the value to be fetched from *ptr.  
- * \param value is value to be stored at *ptr.
- *
- * The current value stored at *ptr is returned and it is replaced
- * with value. This function can be used to implement SDL_TestThenSet.
- *
- */
-extern DECLSPEC Uint32 SDLCALL SDL_AtomicExchange32(volatile Uint32 * ptr, Uint32 value);
-
-/**
- * \fn int SDL_AtomicCompareThenSet32(volatile Uint32 * ptr, Uint32 oldvalue, Uint32 newvalue)
- *
- * \brief If *ptr == oldvalue then replace the contents of *ptr by new value. 
- *
- * \return true if the newvalue was stored.
- *
- * \param *ptr is the value to be compared and replaced.
- * \param oldvalue is value to be compared to *ptr.
- * \param newvalue is value to be stored at *ptr.
- *
- */
-extern DECLSPEC SDL_bool SDLCALL SDL_AtomicCompareThenSet32(volatile Uint32 * ptr,
-                                                            Uint32 oldvalue, Uint32 newvalue);
-/**
  * \fn  SDL_bool SDL_AtomicTestThenSet32(volatile Uint32 * ptr);
  *
- * \brief Check to see if *ptr == 0 and set it to non-zero.
+ * \brief Check to see if *ptr == 0 and set it to 1.
  *
  * \return SDL_True if the value pointed to by ptr was zero and
  * SDL_False if it was not zero
@@ -211,9 +215,6 @@
 /* 64 bit atomic operations */
 #ifdef SDL_HAS_64BIT_TYPE
 
-extern DECLSPEC Uint64 SDLCALL SDL_AtomicExchange64(volatile Uint64 * ptr, Uint64 value);
-extern DECLSPEC SDL_bool SDLCALL SDL_AtomicCompareThenSet64(volatile Uint64 * ptr,
-                                                            Uint64 oldvalue, Uint64 newvalue);
 extern DECLSPEC SDL_bool SDLCALL SDL_AtomicTestThenSet64(volatile Uint64 * ptr);
 extern DECLSPEC void SDLCALL SDL_AtomicClear64(volatile Uint64 * ptr);
 extern DECLSPEC Uint64 SDLCALL SDL_AtomicFetchThenIncrement64(volatile Uint64 * ptr);
--- a/src/atomic/dummy/SDL_atomic.c	Mon Sep 07 16:04:44 2009 +0000
+++ b/src/atomic/dummy/SDL_atomic.c	Thu Sep 17 20:35:12 2009 +0000
@@ -1,639 +1,162 @@
 /*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2009 Sam Lantinga
+  SDL - Simple DirectMedia Layer
+  Copyright (C) 1997-2009 Sam Lantinga
 
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
 
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
 
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-    Sam Lantinga
-    slouken@libsdl.org
+  Sam Lantinga
+  slouken@libsdl.org
+
+  Contributed by Bob Pendleton, bob@pendleton.com
 */
 
 #include "SDL_stdinc.h"
 #include "SDL_atomic.h"
 
+#include "SDL_error.h"
+
 /*
-  This file provides 8, 16, 32, and 64 bit atomic operations. If the
+  This file provides 32, and 64 bit atomic operations. If the
   operations are provided by the native hardware and operating system
   they are used. If they are not then the operations are emulated
-  using the SDL mutex operations. 
- */
-
-/* 
-  First, detect whether the operations are supported and create
-  #defines that indicate that they do exist. The goal is to have all
-  the system dependent code in the top part of the file so that the
-  bottom can be use unchanged across all platforms.
-
-  Second, #define all the operations in each size class that are
-  supported. Doing this allows supported operations to be used along
-  side of emulated operations.
+  using the SDL spin lock operations. If spin lock can not be
+  implemented then these functions must fail.
 */
 
 /* 
-   Emmulated version.
+  DUMMY VERSION.
+
+  This version of the code assumes there is no support for atomic
+  operations. Therefore, every function sets the SDL error
+  message. Oddly enough, if you only have one thread then this
+  version actuallys works.
+*/
 
-   Assume there is no support for atomic operations. All such
-   operations are implemented using SDL mutex operations.
- */
+/*
+  Native spinlock routines. Because this is the dummy implementation
+  these will always call SDL_SetError() and do nothing.
+*/
+
+void 
+SDL_AtomicLock(SDL_SpinLock *lock)
+{
+   SDL_SetError("SDL_atomic.c: is not implemented on this platform");
+}
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#undef EMULATED_ATOMIC_OPERATIONS
-#endif
+void 
+SDL_AtomicUnlock(SDL_SpinLock *lock)
+{
+   SDL_SetError("SDL_atomic.c: is not implemented on this platform");
+}
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_8_BIT_OPS
+/*
+  Note that platform specific versions can be built from this version
+  by changing the #undefs to #defines and adding platform specific
+  code.
+*/
+
+#undef  nativeTestThenSet32
+#undef  nativeClear32
+#undef  nativeFetchThenIncrement32
+#undef  nativeFetchThenDecrement32
+#undef  nativeFetchThenAdd32
+#undef  nativeFetchThenSubtract32
+#undef  nativeIncrementThenFetch32
+#undef  nativeDecrementThenFetch32
+#undef  nativeAddThenFetch32
+#undef  nativeSubtractThenFetch32
 
-#define nativeExchange8(ptr, value)			()
-#define nativeCompareThenSet8(ptr, oldvalue, newvalue) 	()
-#define nativeTestThenSet8(ptr)    	     		()
-#define nativeClear8(ptr)				()
-#define nativeFetchThenIncrement8(ptr)   		()
-#define nativeFetchThenDecrement8(ptr) 			()
-#define nativeFetchThenAdd8(ptr, value) 		()
-#define nativeFetchThenSubtract8(ptr, value) 		()
-#define nativeIncrementThenFetch8(ptr) 			()
-#define nativeDecrementThenFetch8(ptr) 			()
-#define nativeAddThenFetch8(ptr, value) 		()
-#define nativeSubtractThenFetch8(ptr, value) 		()
-#endif
+#undef  nativeTestThenSet64
+#undef  nativeClear64
+#undef  nativeFetchThenIncrement64
+#undef  nativeFetchThenDecrement64
+#undef  nativeFetchThenAdd64
+#undef  nativeFetchThenSubtract64
+#undef  nativeIncrementThenFetch64
+#undef  nativeDecrementThenFetch64
+#undef  nativeAddThenFetch64
+#undef  nativeSubtractThenFetch64
+
+/* 
+  If any of the operations are not provided then we must emulate some
+  of them. That means we need a nice implementation of spin locks
+  that avoids the "one big lock" problem. We use a vector of spin
+  locks and pick which one to use based on the address of the operand
+  of the function.
+
+  To generate the index of the lock we first shift by 3 bits to get
+  rid on the zero bits that result from 32 and 64 bit allignment of
+  data. We then mask off all but 5 bits and use those 5 bits as an
+  index into the table. 
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_16_BIT_OPS
+  Picking the lock this way insures that accesses to the same data at
+  the same time will go to the same lock. OTOH, accesses to different
+  data have only a 1/32 chance of hitting the same lock. That should
+  pretty much eliminate the chances of several atomic operations on
+  different data from waiting on the same "big lock". If it isn't
+  then the table of locks can be expanded to a new size so long as
+  the new size if a power of two.
+*/
 
-#define nativeExchange16(ptr, value)			()
-#define nativeCompareThenSet16(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet16(ptr)    	     		()
-#define nativeClear16(ptr)				()
-#define nativeFetchThenIncrement16(ptr)   		()
-#define nativeFetchThenDecrement16(ptr) 		()
-#define nativeFetchThenAdd16(ptr, value) 		()
-#define nativeFetchThenSubtract16(ptr, value) 		()
-#define nativeIncrementThenFetch16(ptr) 		()
-#define nativeDecrementThenFetch16(ptr) 		()
-#define nativeAddThenFetch16(ptr, value) 		()
-#define nativeSubtractThenFetch16(ptr, value) 		()
+static SDL_SpinLock locks[32] = {
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static __inline__ void
+privateWaitLock(volatile void *ptr)
+{
+#if SIZEOF_VOIDP == 4
+   Uint32 index = ((((Uint32)ptr) >> 3) & 0x1f);
+#elif SIZEOF_VOIDP == 8
+   Uint64 index = ((((Uint64)ptr) >> 3) & 0x1f);
 #endif
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_32_BIT_OPS
-
-#define nativeExchange32(ptr, value)			()
-#define nativeCompareThenSet32(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet32(ptr)    	     		()
-#define nativeClear32(ptr)				()
-#define nativeFetchThenIncrement32(ptr)   		()
-#define nativeFetchThenDecrement32(ptr) 		()
-#define nativeFetchThenAdd32(ptr, value) 		()
-#define nativeFetchThenSubtract32(ptr, value) 		()
-#define nativeIncrementThenFetch32(ptr) 		()
-#define nativeDecrementThenFetch32(ptr) 		()
-#define nativeAddThenFetch32(ptr, value) 		()
-#define nativeSubtractThenFetch32(ptr, value) 		()
-#endif
-
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_64_BIT_OPS
-
-#define nativeExchange64(ptr, value)			()
-#define nativeCompareThenSet64(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet64(ptr)    	     		()
-#define nativeClear64(ptr)				()
-#define nativeFetchThenIncrement64(ptr)   		()
-#define nativeFetchThenDecrement64(ptr) 		()
-#define nativeFetchThenAdd64(ptr, value) 		()
-#define nativeFetchThenSubtract64(ptr, value) 		()
-#define nativeIncrementThenFetch64(ptr) 		()
-#define nativeDecrementThenFetch64(ptr) 		()
-#define nativeAddThenFetch64(ptr, value) 		()
-#define nativeSubtractThenFetch64(ptr, value) 		()
-#endif
-
-/* 
-If any of the operations are not provided then we must emulate some of
-them.
- */
-
-#if !defined(HAVE_ALL_8_BIT_OPS) || !defined(HAVE_ALL_16_BIT_OPS) || !defined(HAVE_ALL_32_BIT_OPS) || !defined(HAVE_ALL_64_BIT_OPS)
-
-#include "SDL_mutex.h"
-#include "SDL_error.h"
-
-static SDL_mutex * lock = NULL;
-
-static __inline__ void
-privateWaitLock()
-{
-   if(NULL == lock)
-   {
-      lock = SDL_CreateMutex();
-      if (NULL == lock)
-      {
-	 SDL_SetError("SDL_atomic.c: can't create a mutex");
-	 return;
-      }
-   }
-
-   if (-1 == SDL_LockMutex(lock))
-   {
-      SDL_SetError("SDL_atomic.c: can't lock mutex");
-   }
+   SDL_AtomicLock(&locks[index]);
 }
 
 static __inline__ void
-privateUnlock()
+privateUnlock(volatile void *ptr)
 {
-   if (-1 == SDL_UnlockMutex(lock))
-   {
-      SDL_SetError("SDL_atomic.c: can't unlock mutex");
-   }
-}
-
+#if SIZEOF_VOIDP == 4
+   Uint32 index = ((((Uint32)ptr) >> 3) & 0x1f);
+#elif SIZEOF_VOIDP == 8
+   Uint64 index = ((((Uint64)ptr) >> 3) & 0x1f);
 #endif
 
-/* 8 bit atomic operations */
-
-Uint8
-SDL_AtomicExchange8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeExchange8
-   return nativeExchange8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
+   SDL_AtomicUnlock(&locks[index]);
 }
 
+/* 32 bit atomic operations */
+
 SDL_bool
-SDL_AtomicCompareThenSet8(volatile Uint8 * ptr, Uint8 oldvalue, Uint8 newvalue)
+SDL_AtomicTestThenSet32(volatile Uint32 * ptr)
 {
-#ifdef nativeCompareThenSet8
-   return (SDL_bool)nativeCompareThenSet8(ptr, oldvalue, newvalue);
+#ifdef nativeTestThenSet32
 #else
    SDL_bool result = SDL_FALSE;
 
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet8(volatile Uint8 * ptr)
-{
-#ifdef nativeTestThenSet8
-   return (SDL_bool)nativeTestThenSet8(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
+   privateWaitLock(ptr);
    result = (*ptr == 0);
    if (result)
    {
       *ptr = 1;
    }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-void
-SDL_AtomicClear8(volatile Uint8 * ptr)
-{
-#ifdef nativeClear8
-   nativeClear8(ptr);
-#else
-   privateWaitLock();
-   *ptr = 0;
-   privateUnlock();
-
-   return;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenIncrement8(volatile Uint8 * ptr)
-{
-#ifdef nativeFetchThenIncrement8
-   return nativeFetchThenIncrement8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenDecrement8(volatile Uint8 * ptr)
-{
-#ifdef nativeFetchThenDecrement8
-   return nativeFetchThenDecrement8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenAdd8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeFetchThenAdd8
-   return nativeFetchThenAdd8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenSubtract8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeFetchThenSubtract8
-   return nativeFetchThenSubtract8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicIncrementThenFetch8(volatile Uint8 * ptr)
-{
-#ifdef nativeIncrementThenFetch8
-   return nativeIncrementThenFetch8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicDecrementThenFetch8(volatile Uint8 * ptr)
-{
-#ifdef nativeDecrementThenFetch8
-   return nativeDecrementThenFetch8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicAddThenFetch8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeAddThenFetch8
-   return nativeAddThenFetch8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicSubtractThenFetch8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeSubtractThenFetch8
-   return nativeSubtractThenFetch8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-/* 16 bit atomic operations */
-
-Uint16
-SDL_AtomicExchange16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeExchange16
-   return nativeExchange16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet16(volatile Uint16 * ptr, Uint16 oldvalue, Uint16 newvalue)
-{
-#ifdef nativeCompareThenSet16
-   return (SDL_bool)nativeCompareThenSet16(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet16(volatile Uint16 * ptr)
-{
-#ifdef nativeTestThenSet16
-   return (SDL_bool)nativeTestThenSet16(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == 0);
-   if (result)
-   {
-      *ptr = 1;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-void
-SDL_AtomicClear16(volatile Uint16 * ptr)
-{
-#ifdef nativeClear16
-   nativeClear16(ptr);
-#else
-   privateWaitLock();
-   *ptr = 0;
-   privateUnlock();
-
-   return;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenIncrement16(volatile Uint16 * ptr)
-{
-#ifdef nativeFetchThenIncrement16
-   return nativeFetchThenIncrement16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenDecrement16(volatile Uint16 * ptr)
-{
-#ifdef nativeFetchThenDecrement16
-   return nativeFetchThenDecrement16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenAdd16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeFetchThenAdd16
-   return nativeFetchThenAdd16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenSubtract16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeFetchThenSubtract16
-   return nativeFetchThenSubtract16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicIncrementThenFetch16(volatile Uint16 * ptr)
-{
-#ifdef nativeIncrementThenFetch16
-   return nativeIncrementThenFetch16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicDecrementThenFetch16(volatile Uint16 * ptr)
-{
-#ifdef nativeDecrementThenFetch16
-   return nativeDecrementThenFetch16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicAddThenFetch16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeAddThenFetch16
-   return nativeAddThenFetch16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicSubtractThenFetch16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeSubtractThenFetch16
-   return nativeSubtractThenFetch16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-/* 32 bit atomic operations */
-
-Uint32
-SDL_AtomicExchange32(volatile Uint32 * ptr, Uint32 value)
-{
-#ifdef nativeExchange32
-   return nativeExchange32(ptr, value);
-#else
-   Uint32 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet32(volatile Uint32 * ptr, Uint32 oldvalue, Uint32 newvalue)
-{
-#ifdef nativeCompareThenSet32
-   return (SDL_bool)nativeCompareThenSet32(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet32(volatile Uint32 * ptr)
-{
-#ifdef nativeTestThenSet32
-   return (SDL_bool)nativeTestThenSet32(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == 0);
-   if (result)
-   {
-      *ptr = 1;
-   }
-   privateUnlock();
+   privateUnlock(ptr);
 
    return result;
 #endif
@@ -643,11 +166,10 @@
 SDL_AtomicClear32(volatile Uint32 * ptr)
 {
 #ifdef nativeClear32
-   nativeClear32(ptr);
 #else
-   privateWaitLock();
+   privateWaitLock(ptr);
    *ptr = 0;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return;
 #endif
@@ -657,14 +179,13 @@
 SDL_AtomicFetchThenIncrement32(volatile Uint32 * ptr)
 {
 #ifdef nativeFetchThenIncrement32
-   return nativeFetchThenIncrement32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -674,14 +195,13 @@
 SDL_AtomicFetchThenDecrement32(volatile Uint32 * ptr)
 {
 #ifdef nativeFetchThenDecrement32
-   return nativeFetchThenDecrement32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr) -= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -691,14 +211,13 @@
 SDL_AtomicFetchThenAdd32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeFetchThenAdd32
-   return nativeFetchThenAdd32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -708,14 +227,13 @@
 SDL_AtomicFetchThenSubtract32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeFetchThenSubtract32
-   return nativeFetchThenSubtract32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)-= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -725,14 +243,13 @@
 SDL_AtomicIncrementThenFetch32(volatile Uint32 * ptr)
 {
 #ifdef nativeIncrementThenFetch32
-   return nativeIncrementThenFetch32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -742,14 +259,13 @@
 SDL_AtomicDecrementThenFetch32(volatile Uint32 * ptr)
 {
 #ifdef nativeDecrementThenFetch32
-   return nativeDecrementThenFetch32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -759,14 +275,13 @@
 SDL_AtomicAddThenFetch32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeAddThenFetch32
-   return nativeAddThenFetch32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -776,14 +291,13 @@
 SDL_AtomicSubtractThenFetch32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeSubtractThenFetch32
-   return nativeSubtractThenFetch32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -792,58 +306,20 @@
 /* 64 bit atomic operations */
 #ifdef SDL_HAS_64BIT_TYPE
 
-Uint64
-SDL_AtomicExchange64(volatile Uint64 * ptr, Uint64 value)
-{
-#ifdef nativeExchange64
-   return nativeExchange64(ptr, value);
-#else
-   Uint64 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet64(volatile Uint64 * ptr, Uint64 oldvalue, Uint64 newvalue)
-{
-#ifdef nativeCompareThenSet64
-   return (SDL_bool)nativeCompareThenSet64(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
 SDL_bool
 SDL_AtomicTestThenSet64(volatile Uint64 * ptr)
 {
 #ifdef nativeTestThenSet64
-   return (SDL_bool)nativeTestThenSet64(ptr);
 #else
    SDL_bool result = SDL_FALSE;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    result = (*ptr == 0);
    if (result)
    {
       *ptr = 1;
    }
-   privateUnlock();
+   privateUnlock(ptr);
 
    return result;
 #endif
@@ -853,11 +329,10 @@
 SDL_AtomicClear64(volatile Uint64 * ptr)
 {
 #ifdef nativeClear64
-   nativeClear64(ptr);
 #else
-   privateWaitLock();
+   privateWaitLock(ptr);
    *ptr = 0;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return;
 #endif
@@ -867,14 +342,13 @@
 SDL_AtomicFetchThenIncrement64(volatile Uint64 * ptr)
 {
 #ifdef nativeFetchThenIncrement64
-   return nativeFetchThenIncrement64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -884,14 +358,13 @@
 SDL_AtomicFetchThenDecrement64(volatile Uint64 * ptr)
 {
 #ifdef nativeFetchThenDecrement64
-   return nativeFetchThenDecrement64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr) -= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -901,14 +374,13 @@
 SDL_AtomicFetchThenAdd64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeFetchThenAdd64
-   return nativeFetchThenAdd64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -918,14 +390,13 @@
 SDL_AtomicFetchThenSubtract64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeFetchThenSubtract64
-   return nativeFetchThenSubtract64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)-= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -935,14 +406,13 @@
 SDL_AtomicIncrementThenFetch64(volatile Uint64 * ptr)
 {
 #ifdef nativeIncrementThenFetch64
-   return nativeIncrementThenFetch64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -952,14 +422,13 @@
 SDL_AtomicDecrementThenFetch64(volatile Uint64 * ptr)
 {
 #ifdef nativeDecrementThenFetch64
-   return nativeDecrementThenFetch64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -969,14 +438,13 @@
 SDL_AtomicAddThenFetch64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeAddThenFetch64
-   return nativeAddThenFetch64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -986,14 +454,13 @@
 SDL_AtomicSubtractThenFetch64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeSubtractThenFetch64
-   return nativeSubtractThenFetch64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
--- a/src/atomic/linux/SDL_atomic.c	Mon Sep 07 16:04:44 2009 +0000
+++ b/src/atomic/linux/SDL_atomic.c	Thu Sep 17 20:35:12 2009 +0000
@@ -1,611 +1,167 @@
 /*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2009 Sam Lantinga
+  SDL - Simple DirectMedia Layer
+  Copyright (C) 1997-2009 Sam Lantinga
 
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
 
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
 
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-    Sam Lantinga
-    slouken@libsdl.org
+  Sam Lantinga
+  slouken@libsdl.org
+
+  Contributed by Bob Pendleton, bob@pendleton.com
 */
 
 #include "SDL_stdinc.h"
 #include "SDL_atomic.h"
 
+#include "SDL_error.h"
+
 /*
-  This file provides 8, 16, 32, and 64 bit atomic operations. If the
+  This file provides 32, and 64 bit atomic operations. If the
   operations are provided by the native hardware and operating system
   they are used. If they are not then the operations are emulated
-  using the SDL mutex operations. 
- */
-
-/* 
-  First, detect whether the operations are supported and create
-  #defines that indicate that they do exist. The goal is to have all
-  the system dependent code in the top part of the file so that the
-  bottom can be use unchanged across all platforms.
-
-  Second, #define all the operations in each size class that are
-  supported. Doing this allows supported operations to be used along
-  side of emulated operations.
+  using the SDL spin lock operations. If spin lock can not be
+  implemented then these functions must fail.
 */
 
 /* 
-   Linux version.
+  LINUX/GCC VERSION.
+
+  This version of the code assumes support of the atomic builtins as
+  documented at gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html This
+  code should work on any modern x86 or other processor supported by
+  GCC. 
+
+  Some processors will only support some of these operations so
+  #ifdefs will have to be added as incompatibilities are discovered
+*/
 
-   Test for gnu C builtin support for atomic operations. The only way
-   I know of is to check to see if the
-   __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* macros are defined.
- */
+/*
+  Native spinlock routines.
+*/
 
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
-#define HAVE_ALL_8_BIT_OPS
+void 
+SDL_AtomicLock(SDL_SpinLock *lock)
+{
+   while (0 != __sync_lock_test_and_set(lock, 1))
+   {
+   }
+}
 
-#define nativeExchange8(ptr, value)			(__sync_lock_test_and_set(ptr, value))
-#define nativeCompareThenSet8(ptr, oldvalue, newvalue) 	(oldvalue == __sync_val_compare_and_swap(ptr, oldvalue, newvalue))
-#define nativeTestThenSet8(ptr)    	     		(0 == __sync_lock_test_and_set(ptr, 1))
-#define nativeClear8(ptr)				(__sync_lock_release(ptr))
-#define nativeFetchThenIncrement8(ptr)   		(__sync_fetch_and_add(ptr, 1))
-#define nativeFetchThenDecrement8(ptr) 			(__sync_fetch_and_sub(ptr, 1))
-#define nativeFetchThenAdd8(ptr, value) 		(__sync_fetch_and_add(ptr, value))
-#define nativeFetchThenSubtract8(ptr, value) 		(__sync_fetch_and_sub(ptr, value))
-#define nativeIncrementThenFetch8(ptr) 			(__sync_add_and_fetch(ptr, 1))
-#define nativeDecrementThenFetch8(ptr) 			(__sync_sub_and_fetch(ptr, 1))
-#define nativeAddThenFetch8(ptr, value) 		(__sync_add_and_fetch(ptr, value))
-#define nativeSubtractThenFetch8(ptr, value) 		(__sync_sub_and_fetch(ptr, value))
-#endif
+void 
+SDL_AtomicUnlock(SDL_SpinLock *lock)
+{
+   __sync_lock_test_and_set(lock, 0);
+}
 
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
-#define HAVE_ALL_16_BIT_OPS
+/*
+  Note that platform specific versions can be built from this version
+  by changing the #undefs to #defines and adding platform specific
+  code.
+*/
+
+#define nativeTestThenSet32
+#define nativeClear32
+#define nativeFetchThenIncrement32
+#define nativeFetchThenDecrement32
+#define nativeFetchThenAdd32
+#define nativeFetchThenSubtract32
+#define nativeIncrementThenFetch32
+#define nativeDecrementThenFetch32
+#define nativeAddThenFetch32
+#define nativeSubtractThenFetch32
 
-#define nativeExchange16(ptr, value)			(__sync_lock_test_and_set(ptr, value))
-#define nativeCompareThenSet16(ptr, oldvalue, newvalue) (oldvalue == __sync_val_compare_and_swap(ptr, oldvalue, newvalue))
-#define nativeTestThenSet16(ptr)    	     		(0 == __sync_lock_test_and_set(ptr, 1))
-#define nativeClear16(ptr)				(__sync_lock_release(ptr))
-#define nativeFetchThenIncrement16(ptr)   		(__sync_fetch_and_add(ptr, 1))
-#define nativeFetchThenDecrement16(ptr) 		(__sync_fetch_and_sub(ptr, 1))
-#define nativeFetchThenAdd16(ptr, value) 		(__sync_fetch_and_add(ptr, value))
-#define nativeFetchThenSubtract16(ptr, value) 		(__sync_fetch_and_sub(ptr, value))
-#define nativeIncrementThenFetch16(ptr) 		(__sync_add_and_fetch(ptr, 1))
-#define nativeDecrementThenFetch16(ptr) 		(__sync_sub_and_fetch(ptr, 1))
-#define nativeAddThenFetch16(ptr, value) 		(__sync_add_and_fetch(ptr, value))
-#define nativeSubtractThenFetch16(ptr, value) 		(__sync_sub_and_fetch(ptr, value))
-#endif
+#define nativeTestThenSet64
+#define nativeClear64
+#define nativeFetchThenIncrement64
+#define nativeFetchThenDecrement64
+#define nativeFetchThenAdd64
+#define nativeFetchThenSubtract64
+#define nativeIncrementThenFetch64
+#define nativeDecrementThenFetch64
+#define nativeAddThenFetch64
+#define nativeSubtractThenFetch64
+
+/* 
+  If any of the operations are not provided then we must emulate some
+  of them. That means we need a nice implementation of spin locks
+  that avoids the "one big lock" problem. We use a vector of spin
+  locks and pick which one to use based on the address of the operand
+  of the function.
+
+  To generate the index of the lock we first shift by 3 bits to get
+  rid on the zero bits that result from 32 and 64 bit allignment of
+  data. We then mask off all but 5 bits and use those 5 bits as an
+  index into the table. 
 
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
-#define HAVE_ALL_32_BIT_OPS
+  Picking the lock this way insures that accesses to the same data at
+  the same time will go to the same lock. OTOH, accesses to different
+  data have only a 1/32 chance of hitting the same lock. That should
+  pretty much eliminate the chances of several atomic operations on
+  different data from waiting on the same "big lock". If it isn't
+  then the table of locks can be expanded to a new size so long as
+  the new size if a power of two.
+*/
 
-#define nativeExchange32(ptr, value)			(__sync_lock_test_and_set(ptr, value))
-#define nativeCompareThenSet32(ptr, oldvalue, newvalue) (oldvalue == __sync_val_compare_and_swap(ptr, oldvalue, newvalue))
-#define nativeTestThenSet32(ptr)    	     		(0 == __sync_lock_test_and_set(ptr, 1))
-#define nativeClear32(ptr)				(__sync_lock_release(ptr))
-#define nativeFetchThenIncrement32(ptr)   		(__sync_fetch_and_add(ptr, 1))
-#define nativeFetchThenDecrement32(ptr) 		(__sync_fetch_and_sub(ptr, 1))
-#define nativeFetchThenAdd32(ptr, value) 		(__sync_fetch_and_add(ptr, value))
-#define nativeFetchThenSubtract32(ptr, value) 		(__sync_fetch_and_sub(ptr, value))
-#define nativeIncrementThenFetch32(ptr) 		(__sync_add_and_fetch(ptr, 1))
-#define nativeDecrementThenFetch32(ptr) 		(__sync_sub_and_fetch(ptr, 1))
-#define nativeAddThenFetch32(ptr, value) 		(__sync_add_and_fetch(ptr, value))
-#define nativeSubtractThenFetch32(ptr, value) 		(__sync_sub_and_fetch(ptr, value))
+static SDL_SpinLock locks[32] = {
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static __inline__ void
+privateWaitLock(volatile void *ptr)
+{
+#if SIZEOF_VOIDP == 4
+   Uint32 index = ((((Uint32)ptr) >> 3) & 0x1f);
+#elif SIZEOF_VOIDP == 8
+   Uint64 index = ((((Uint64)ptr) >> 3) & 0x1f);
 #endif
 
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
-#define HAVE_ALL_64_BIT_OPS
+   SDL_AtomicLock(&locks[index]);
+}
 
-#define nativeExchange64(ptr, value)			(__sync_lock_test_and_set(ptr, value))
-#define nativeCompareThenSet64(ptr, oldvalue, newvalue) (oldvalue == __sync_val_compare_and_swap(ptr, oldvalue, newvalue))
-#define nativeTestThenSet64(ptr)    	     		(0 == __sync_lock_test_and_set(ptr, 1))
-#define nativeClear64(ptr)				(__sync_lock_release(ptr))
-#define nativeFetchThenIncrement64(ptr)   		(__sync_fetch_and_add(ptr, 1))
-#define nativeFetchThenDecrement64(ptr) 		(__sync_fetch_and_sub(ptr, 1))
-#define nativeFetchThenAdd64(ptr, value) 		(__sync_fetch_and_add(ptr, value))
-#define nativeFetchThenSubtract64(ptr, value) 		(__sync_fetch_and_sub(ptr, value))
-#define nativeIncrementThenFetch64(ptr) 		(__sync_add_and_fetch(ptr, 1))
-#define nativeDecrementThenFetch64(ptr) 		(__sync_sub_and_fetch(ptr, 1))
-#define nativeAddThenFetch64(ptr, value) 		(__sync_add_and_fetch(ptr, value))
-#define nativeSubtractThenFetch64(ptr, value) 		(__sync_sub_and_fetch(ptr, value))
+static __inline__ void
+privateUnlock(volatile void *ptr)
+{
+#if SIZEOF_VOIDP == 4
+   Uint32 index = ((((Uint32)ptr) >> 3) & 0x1f);
+#elif SIZEOF_VOIDP == 8
+   Uint64 index = ((((Uint64)ptr) >> 3) & 0x1f);
 #endif
 
-/* 
-If any of the operations are not provided then we must emulate some of
-them.
- */
-
-#if !defined(HAVE_ALL_8_BIT_OPS) || !defined(HAVE_ALL_16_BIT_OPS) || !defined(HAVE_ALL_32_BIT_OPS) || !defined(HAVE_ALL_64_BIT_OPS)
-
-static Uint32 lock = 0;
-
-#define privateWaitLock()	       \
-   while (nativeTestThenSet32(&lock))  \
-   {				       \
-   };
-
-#define privateUnlock() (nativeClear32(&lock))
-#endif
-
-/* 8 bit atomic operations */
-
-Uint8
-SDL_AtomicExchange8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeExchange8
-   return nativeExchange8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
+   SDL_AtomicUnlock(&locks[index]);
 }
 
+/* 32 bit atomic operations */
+
 SDL_bool
-SDL_AtomicCompareThenSet8(volatile Uint8 * ptr, Uint8 oldvalue, Uint8 newvalue)
+SDL_AtomicTestThenSet32(volatile Uint32 * ptr)
 {
-#ifdef nativeCompareThenSet8
-   return (SDL_bool)nativeCompareThenSet8(ptr, oldvalue, newvalue);
+#ifdef nativeTestThenSet32
+   return 0 == __sync_lock_test_and_set(ptr, 1);
 #else
    SDL_bool result = SDL_FALSE;
 
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet8(volatile Uint8 * ptr)
-{
-#ifdef nativeTestThenSet8
-   return (SDL_bool)nativeTestThenSet8(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
+   privateWaitLock(ptr);
    result = (*ptr == 0);
    if (result)
    {
       *ptr = 1;
    }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-void
-SDL_AtomicClear8(volatile Uint8 * ptr)
-{
-#ifdef nativeClear8
-   nativeClear8(ptr);
-#else
-   privateWaitLock();
-   *ptr = 0;
-   privateUnlock();
-
-   return;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenIncrement8(volatile Uint8 * ptr)
-{
-#ifdef nativeFetchThenIncrement8
-   return nativeFetchThenIncrement8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenDecrement8(volatile Uint8 * ptr)
-{
-#ifdef nativeFetchThenDecrement8
-   return nativeFetchThenDecrement8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenAdd8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeFetchThenAdd8
-   return nativeFetchThenAdd8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenSubtract8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeFetchThenSubtract8
-   return nativeFetchThenSubtract8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicIncrementThenFetch8(volatile Uint8 * ptr)
-{
-#ifdef nativeIncrementThenFetch8
-   return nativeIncrementThenFetch8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicDecrementThenFetch8(volatile Uint8 * ptr)
-{
-#ifdef nativeDecrementThenFetch8
-   return nativeDecrementThenFetch8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicAddThenFetch8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeAddThenFetch8
-   return nativeAddThenFetch8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicSubtractThenFetch8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeSubtractThenFetch8
-   return nativeSubtractThenFetch8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-/* 16 bit atomic operations */
-
-Uint16
-SDL_AtomicExchange16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeExchange16
-   return nativeExchange16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet16(volatile Uint16 * ptr, Uint16 oldvalue, Uint16 newvalue)
-{
-#ifdef nativeCompareThenSet16
-   return (SDL_bool)nativeCompareThenSet16(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet16(volatile Uint16 * ptr)
-{
-#ifdef nativeTestThenSet16
-   return (SDL_bool)nativeTestThenSet16(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == 0);
-   if (result)
-   {
-      *ptr = 1;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-void
-SDL_AtomicClear16(volatile Uint16 * ptr)
-{
-#ifdef nativeClear16
-   nativeClear16(ptr);
-#else
-   privateWaitLock();
-   *ptr = 0;
-   privateUnlock();
-
-   return;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenIncrement16(volatile Uint16 * ptr)
-{
-#ifdef nativeFetchThenIncrement16
-   return nativeFetchThenIncrement16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenDecrement16(volatile Uint16 * ptr)
-{
-#ifdef nativeFetchThenDecrement16
-   return nativeFetchThenDecrement16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenAdd16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeFetchThenAdd16
-   return nativeFetchThenAdd16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenSubtract16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeFetchThenSubtract16
-   return nativeFetchThenSubtract16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicIncrementThenFetch16(volatile Uint16 * ptr)
-{
-#ifdef nativeIncrementThenFetch16
-   return nativeIncrementThenFetch16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicDecrementThenFetch16(volatile Uint16 * ptr)
-{
-#ifdef nativeDecrementThenFetch16
-   return nativeDecrementThenFetch16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicAddThenFetch16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeAddThenFetch16
-   return nativeAddThenFetch16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicSubtractThenFetch16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeSubtractThenFetch16
-   return nativeSubtractThenFetch16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-/* 32 bit atomic operations */
-
-Uint32
-SDL_AtomicExchange32(volatile Uint32 * ptr, Uint32 value)
-{
-#ifdef nativeExchange32
-   return nativeExchange32(ptr, value);
-#else
-   Uint32 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet32(volatile Uint32 * ptr, Uint32 oldvalue, Uint32 newvalue)
-{
-#ifdef nativeCompareThenSet32
-   return (SDL_bool)nativeCompareThenSet32(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet32(volatile Uint32 * ptr)
-{
-#ifdef nativeTestThenSet32
-   return (SDL_bool)nativeTestThenSet32(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == 0);
-   if (result)
-   {
-      *ptr = 1;
-   }
-   privateUnlock();
+   privateUnlock(ptr);
 
    return result;
 #endif
@@ -615,11 +171,12 @@
 SDL_AtomicClear32(volatile Uint32 * ptr)
 {
 #ifdef nativeClear32
-   nativeClear32(ptr);
+   __sync_lock_test_and_set(ptr, 0);
+   return;
 #else
-   privateWaitLock();
+   privateWaitLock(ptr);
    *ptr = 0;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return;
 #endif
@@ -629,14 +186,14 @@
 SDL_AtomicFetchThenIncrement32(volatile Uint32 * ptr)
 {
 #ifdef nativeFetchThenIncrement32
-   return nativeFetchThenIncrement32(ptr);
+   return __sync_fetch_and_add(ptr, 1);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -646,14 +203,14 @@
 SDL_AtomicFetchThenDecrement32(volatile Uint32 * ptr)
 {
 #ifdef nativeFetchThenDecrement32
-   return nativeFetchThenDecrement32(ptr);
+   return __sync_fetch_and_sub(ptr, 1);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr) -= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -663,14 +220,14 @@
 SDL_AtomicFetchThenAdd32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeFetchThenAdd32
-   return nativeFetchThenAdd32(ptr, value);
+   return __sync_fetch_and_add(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -680,14 +237,14 @@
 SDL_AtomicFetchThenSubtract32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeFetchThenSubtract32
-   return nativeFetchThenSubtract32(ptr, value);
+   return __sync_fetch_and_sub(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)-= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -697,14 +254,14 @@
 SDL_AtomicIncrementThenFetch32(volatile Uint32 * ptr)
 {
 #ifdef nativeIncrementThenFetch32
-   return nativeIncrementThenFetch32(ptr);
+   return __sync_add_and_fetch(ptr, 1);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -714,14 +271,14 @@
 SDL_AtomicDecrementThenFetch32(volatile Uint32 * ptr)
 {
 #ifdef nativeDecrementThenFetch32
-   return nativeDecrementThenFetch32(ptr);
+   return __sync_sub_and_fetch(ptr, 1);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -731,14 +288,14 @@
 SDL_AtomicAddThenFetch32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeAddThenFetch32
-   return nativeAddThenFetch32(ptr, value);
+   return __sync_add_and_fetch(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -748,14 +305,14 @@
 SDL_AtomicSubtractThenFetch32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeSubtractThenFetch32
-   return nativeSubtractThenFetch32(ptr, value);
+   return __sync_sub_and_fetch(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -764,58 +321,21 @@
 /* 64 bit atomic operations */
 #ifdef SDL_HAS_64BIT_TYPE
 
-Uint64
-SDL_AtomicExchange64(volatile Uint64 * ptr, Uint64 value)
-{
-#ifdef nativeExchange64
-   return nativeExchange64(ptr, value);
-#else
-   Uint64 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet64(volatile Uint64 * ptr, Uint64 oldvalue, Uint64 newvalue)
-{
-#ifdef nativeCompareThenSet64
-   return (SDL_bool)nativeCompareThenSet64(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
 SDL_bool
 SDL_AtomicTestThenSet64(volatile Uint64 * ptr)
 {
 #ifdef nativeTestThenSet64
-   return (SDL_bool)nativeTestThenSet64(ptr);
+   return 0 == __sync_lock_test_and_set(ptr, 1);
 #else
    SDL_bool result = SDL_FALSE;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    result = (*ptr == 0);
    if (result)
    {
       *ptr = 1;
    }
-   privateUnlock();
+   privateUnlock(ptr);
 
    return result;
 #endif
@@ -825,11 +345,12 @@
 SDL_AtomicClear64(volatile Uint64 * ptr)
 {
 #ifdef nativeClear64
-   nativeClear64(ptr);
+   __sync_lock_test_and_set(ptr, 0);
+   return;
 #else
-   privateWaitLock();
+   privateWaitLock(ptr);
    *ptr = 0;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return;
 #endif
@@ -839,14 +360,14 @@
 SDL_AtomicFetchThenIncrement64(volatile Uint64 * ptr)
 {
 #ifdef nativeFetchThenIncrement64
-   return nativeFetchThenIncrement64(ptr);
+   return __sync_fetch_and_add(ptr, 1);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -856,14 +377,14 @@
 SDL_AtomicFetchThenDecrement64(volatile Uint64 * ptr)
 {
 #ifdef nativeFetchThenDecrement64
-   return nativeFetchThenDecrement64(ptr);
+   return __sync_fetch_and_sub(ptr, 1);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr) -= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -873,14 +394,14 @@
 SDL_AtomicFetchThenAdd64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeFetchThenAdd64
-   return nativeFetchThenAdd64(ptr, value);
+   return __sync_fetch_and_add(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -890,14 +411,14 @@
 SDL_AtomicFetchThenSubtract64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeFetchThenSubtract64
-   return nativeFetchThenSubtract64(ptr, value);
+   return __sync_fetch_and_sub(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)-= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -907,14 +428,14 @@
 SDL_AtomicIncrementThenFetch64(volatile Uint64 * ptr)
 {
 #ifdef nativeIncrementThenFetch64
-   return nativeIncrementThenFetch64(ptr);
+   return __sync_add_and_fetch(ptr, 1);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -924,14 +445,14 @@
 SDL_AtomicDecrementThenFetch64(volatile Uint64 * ptr)
 {
 #ifdef nativeDecrementThenFetch64
-   return nativeDecrementThenFetch64(ptr);
+   return __sync_sub_and_fetch(ptr, 1);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -941,14 +462,14 @@
 SDL_AtomicAddThenFetch64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeAddThenFetch64
-   return nativeAddThenFetch64(ptr, value);
+   return __sync_add_and_fetch(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -958,17 +479,17 @@
 SDL_AtomicSubtractThenFetch64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeSubtractThenFetch64
-   return nativeSubtractThenFetch64(ptr, value);
+   return __sync_sub_and_fetch(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
-#endif
 
+#endif /* SDL_HAS_64BIT_TYPE */
--- a/src/atomic/macosx/SDL_atomic.c	Mon Sep 07 16:04:44 2009 +0000
+++ b/src/atomic/macosx/SDL_atomic.c	Thu Sep 17 20:35:12 2009 +0000
@@ -1,639 +1,162 @@
 /*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2009 Sam Lantinga
+  SDL - Simple DirectMedia Layer
+  Copyright (C) 1997-2009 Sam Lantinga
 
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
 
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
 
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-    Sam Lantinga
-    slouken@libsdl.org
+  Sam Lantinga
+  slouken@libsdl.org
+
+  Contributed by Bob Pendleton, bob@pendleton.com
 */
 
 #include "SDL_stdinc.h"
 #include "SDL_atomic.h"
 
+#include "SDL_error.h"
+
 /*
-  This file provides 8, 16, 32, and 64 bit atomic operations. If the
+  This file provides 32, and 64 bit atomic operations. If the
   operations are provided by the native hardware and operating system
   they are used. If they are not then the operations are emulated
-  using the SDL mutex operations. 
- */
-
-/* 
-  First, detect whether the operations are supported and create
-  #defines that indicate that they do exist. The goal is to have all
-  the system dependent code in the top part of the file so that the
-  bottom can be use unchanged across all platforms.
-
-  Second, #define all the operations in each size class that are
-  supported. Doing this allows supported operations to be used along
-  side of emulated operations.
+  using the SDL spin lock operations. If spin lock can not be
+  implemented then these functions must fail.
 */
 
 /* 
-   Emmulated version.
+  DUMMY VERSION.
+
+  This version of the code assumes there is no support for atomic
+  operations. Therefore, every function sets the SDL error
+  message. Oddly enough, if you only have one thread then this
+  version actuallys works.
+*/
 
-   Assume there is no support for atomic operations. All such
-   operations are implemented using SDL mutex operations.
- */
+/*
+  Native spinlock routines. Because this is the dummy implementation
+  these will always call SDL_SetError() and do nothing.
+*/
+
+void 
+SDL_AtomicLock(SDL_SpinLock *lock)
+{
+   SDL_SetError("SDL_atomic.c: is not implemented on this platform");
+}
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#undef EMULATED_ATOMIC_OPERATIONS
-#endif
+void 
+SDL_AtomicUnlock(SDL_SpinLock *lock)
+{
+   SDL_SetError("SDL_atomic.c: is not implemented on this platform");
+}
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_8_BIT_OPS
+/*
+  Note that platform specific versions can be built from this version
+  by changing the #undefs to #defines and adding platform specific
+  code.
+*/
+
+#undef  nativeTestThenSet32
+#undef  nativeClear32
+#undef  nativeFetchThenIncrement32
+#undef  nativeFetchThenDecrement32
+#undef  nativeFetchThenAdd32
+#undef  nativeFetchThenSubtract32
+#undef  nativeIncrementThenFetch32
+#undef  nativeDecrementThenFetch32
+#undef  nativeAddThenFetch32
+#undef  nativeSubtractThenFetch32
 
-#define nativeExchange8(ptr, value)			()
-#define nativeCompareThenSet8(ptr, oldvalue, newvalue) 	()
-#define nativeTestThenSet8(ptr)    	     		()
-#define nativeClear8(ptr)				()
-#define nativeFetchThenIncrement8(ptr)   		()
-#define nativeFetchThenDecrement8(ptr) 			()
-#define nativeFetchThenAdd8(ptr, value) 		()
-#define nativeFetchThenSubtract8(ptr, value) 		()
-#define nativeIncrementThenFetch8(ptr) 			()
-#define nativeDecrementThenFetch8(ptr) 			()
-#define nativeAddThenFetch8(ptr, value) 		()
-#define nativeSubtractThenFetch8(ptr, value) 		()
-#endif
+#undef  nativeTestThenSet64
+#undef  nativeClear64
+#undef  nativeFetchThenIncrement64
+#undef  nativeFetchThenDecrement64
+#undef  nativeFetchThenAdd64
+#undef  nativeFetchThenSubtract64
+#undef  nativeIncrementThenFetch64
+#undef  nativeDecrementThenFetch64
+#undef  nativeAddThenFetch64
+#undef  nativeSubtractThenFetch64
+
+/* 
+  If any of the operations are not provided then we must emulate some
+  of them. That means we need a nice implementation of spin locks
+  that avoids the "one big lock" problem. We use a vector of spin
+  locks and pick which one to use based on the address of the operand
+  of the function.
+
+  To generate the index of the lock we first shift by 3 bits to get
+  rid on the zero bits that result from 32 and 64 bit allignment of
+  data. We then mask off all but 5 bits and use those 5 bits as an
+  index into the table. 
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_16_BIT_OPS
+  Picking the lock this way insures that accesses to the same data at
+  the same time will go to the same lock. OTOH, accesses to different
+  data have only a 1/32 chance of hitting the same lock. That should
+  pretty much eliminate the chances of several atomic operations on
+  different data from waiting on the same "big lock". If it isn't
+  then the table of locks can be expanded to a new size so long as
+  the new size if a power of two.
+*/
 
-#define nativeExchange16(ptr, value)			()
-#define nativeCompareThenSet16(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet16(ptr)    	     		()
-#define nativeClear16(ptr)				()
-#define nativeFetchThenIncrement16(ptr)   		()
-#define nativeFetchThenDecrement16(ptr) 		()
-#define nativeFetchThenAdd16(ptr, value) 		()
-#define nativeFetchThenSubtract16(ptr, value) 		()
-#define nativeIncrementThenFetch16(ptr) 		()
-#define nativeDecrementThenFetch16(ptr) 		()
-#define nativeAddThenFetch16(ptr, value) 		()
-#define nativeSubtractThenFetch16(ptr, value) 		()
+static SDL_SpinLock locks[32] = {
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static __inline__ void
+privateWaitLock(volatile void *ptr)
+{
+#if SIZEOF_VOIDP == 4
+   Uint32 index = ((((Uint32)ptr) >> 3) & 0x1f);
+#elif SIZEOF_VOIDP == 8
+   Uint64 index = ((((Uint64)ptr) >> 3) & 0x1f);
 #endif
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_32_BIT_OPS
-
-#define nativeExchange32(ptr, value)			()
-#define nativeCompareThenSet32(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet32(ptr)    	     		()
-#define nativeClear32(ptr)				()
-#define nativeFetchThenIncrement32(ptr)   		()
-#define nativeFetchThenDecrement32(ptr) 		()
-#define nativeFetchThenAdd32(ptr, value) 		()
-#define nativeFetchThenSubtract32(ptr, value) 		()
-#define nativeIncrementThenFetch32(ptr) 		()
-#define nativeDecrementThenFetch32(ptr) 		()
-#define nativeAddThenFetch32(ptr, value) 		()
-#define nativeSubtractThenFetch32(ptr, value) 		()
-#endif
-
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_64_BIT_OPS
-
-#define nativeExchange64(ptr, value)			()
-#define nativeCompareThenSet64(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet64(ptr)    	     		()
-#define nativeClear64(ptr)				()
-#define nativeFetchThenIncrement64(ptr)   		()
-#define nativeFetchThenDecrement64(ptr) 		()
-#define nativeFetchThenAdd64(ptr, value) 		()
-#define nativeFetchThenSubtract64(ptr, value) 		()
-#define nativeIncrementThenFetch64(ptr) 		()
-#define nativeDecrementThenFetch64(ptr) 		()
-#define nativeAddThenFetch64(ptr, value) 		()
-#define nativeSubtractThenFetch64(ptr, value) 		()
-#endif
-
-/* 
-If any of the operations are not provided then we must emulate some of
-them.
- */
-
-#if !defined(HAVE_ALL_8_BIT_OPS) || !defined(HAVE_ALL_16_BIT_OPS) || !defined(HAVE_ALL_32_BIT_OPS) || !defined(HAVE_ALL_64_BIT_OPS)
-
-#include "SDL_mutex.h"
-#include "SDL_error.h"
-
-static SDL_mutex * lock = NULL;
-
-static __inline__ void
-privateWaitLock()
-{
-   if(NULL == lock)
-   {
-      lock = SDL_CreateMutex();
-      if (NULL == lock)
-      {
-	 SDL_SetError("SDL_atomic.c: can't create a mutex");
-	 return;
-      }
-   }
-
-   if (-1 == SDL_LockMutex(lock))
-   {
-      SDL_SetError("SDL_atomic.c: can't lock mutex");
-   }
+   SDL_AtomicLock(&locks[index]);
 }
 
 static __inline__ void
-privateUnlock()
+privateUnlock(volatile void *ptr)
 {
-   if (-1 == SDL_UnlockMutex(lock))
-   {
-      SDL_SetError("SDL_atomic.c: can't unlock mutex");
-   }
-}
-
+#if SIZEOF_VOIDP == 4
+   Uint32 index = ((((Uint32)ptr) >> 3) & 0x1f);
+#elif SIZEOF_VOIDP == 8
+   Uint64 index = ((((Uint64)ptr) >> 3) & 0x1f);
 #endif
 
-/* 8 bit atomic operations */
-
-Uint8
-SDL_AtomicExchange8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeExchange8
-   return nativeExchange8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
+   SDL_AtomicUnlock(&locks[index]);
 }
 
+/* 32 bit atomic operations */
+
 SDL_bool
-SDL_AtomicCompareThenSet8(volatile Uint8 * ptr, Uint8 oldvalue, Uint8 newvalue)
+SDL_AtomicTestThenSet32(volatile Uint32 * ptr)
 {
-#ifdef nativeCompareThenSet8
-   return (SDL_bool)nativeCompareThenSet8(ptr, oldvalue, newvalue);
+#ifdef nativeTestThenSet32
 #else
    SDL_bool result = SDL_FALSE;
 
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet8(volatile Uint8 * ptr)
-{
-#ifdef nativeTestThenSet8
-   return (SDL_bool)nativeTestThenSet8(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
+   privateWaitLock(ptr);
    result = (*ptr == 0);
    if (result)
    {
       *ptr = 1;
    }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-void
-SDL_AtomicClear8(volatile Uint8 * ptr)
-{
-#ifdef nativeClear8
-   nativeClear8(ptr);
-#else
-   privateWaitLock();
-   *ptr = 0;
-   privateUnlock();
-
-   return;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenIncrement8(volatile Uint8 * ptr)
-{
-#ifdef nativeFetchThenIncrement8
-   return nativeFetchThenIncrement8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenDecrement8(volatile Uint8 * ptr)
-{
-#ifdef nativeFetchThenDecrement8
-   return nativeFetchThenDecrement8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenAdd8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeFetchThenAdd8
-   return nativeFetchThenAdd8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenSubtract8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeFetchThenSubtract8
-   return nativeFetchThenSubtract8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicIncrementThenFetch8(volatile Uint8 * ptr)
-{
-#ifdef nativeIncrementThenFetch8
-   return nativeIncrementThenFetch8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicDecrementThenFetch8(volatile Uint8 * ptr)
-{
-#ifdef nativeDecrementThenFetch8
-   return nativeDecrementThenFetch8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicAddThenFetch8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeAddThenFetch8
-   return nativeAddThenFetch8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicSubtractThenFetch8(volatile Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeSubtractThenFetch8
-   return nativeSubtractThenFetch8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-/* 16 bit atomic operations */
-
-Uint16
-SDL_AtomicExchange16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeExchange16
-   return nativeExchange16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet16(volatile Uint16 * ptr, Uint16 oldvalue, Uint16 newvalue)
-{
-#ifdef nativeCompareThenSet16
-   return (SDL_bool)nativeCompareThenSet16(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet16(volatile Uint16 * ptr)
-{
-#ifdef nativeTestThenSet16
-   return (SDL_bool)nativeTestThenSet16(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == 0);
-   if (result)
-   {
-      *ptr = 1;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-void
-SDL_AtomicClear16(volatile Uint16 * ptr)
-{
-#ifdef nativeClear16
-   nativeClear16(ptr);
-#else
-   privateWaitLock();
-   *ptr = 0;
-   privateUnlock();
-
-   return;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenIncrement16(volatile Uint16 * ptr)
-{
-#ifdef nativeFetchThenIncrement16
-   return nativeFetchThenIncrement16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenDecrement16(volatile Uint16 * ptr)
-{
-#ifdef nativeFetchThenDecrement16
-   return nativeFetchThenDecrement16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenAdd16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeFetchThenAdd16
-   return nativeFetchThenAdd16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenSubtract16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeFetchThenSubtract16
-   return nativeFetchThenSubtract16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicIncrementThenFetch16(volatile Uint16 * ptr)
-{
-#ifdef nativeIncrementThenFetch16
-   return nativeIncrementThenFetch16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicDecrementThenFetch16(volatile Uint16 * ptr)
-{
-#ifdef nativeDecrementThenFetch16
-   return nativeDecrementThenFetch16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicAddThenFetch16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeAddThenFetch16
-   return nativeAddThenFetch16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicSubtractThenFetch16(volatile Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeSubtractThenFetch16
-   return nativeSubtractThenFetch16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-/* 32 bit atomic operations */
-
-Uint32
-SDL_AtomicExchange32(volatile Uint32 * ptr, Uint32 value)
-{
-#ifdef nativeExchange32
-   return nativeExchange32(ptr, value);
-#else
-   Uint32 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet32(volatile Uint32 * ptr, Uint32 oldvalue, Uint32 newvalue)
-{
-#ifdef nativeCompareThenSet32
-   return (SDL_bool)nativeCompareThenSet32(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet32(volatile Uint32 * ptr)
-{
-#ifdef nativeTestThenSet32
-   return (SDL_bool)nativeTestThenSet32(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == 0);
-   if (result)
-   {
-      *ptr = 1;
-   }
-   privateUnlock();
+   privateUnlock(ptr);
 
    return result;
 #endif
@@ -643,11 +166,10 @@
 SDL_AtomicClear32(volatile Uint32 * ptr)
 {
 #ifdef nativeClear32
-   nativeClear32(ptr);
 #else
-   privateWaitLock();
+   privateWaitLock(ptr);
    *ptr = 0;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return;
 #endif
@@ -657,14 +179,13 @@
 SDL_AtomicFetchThenIncrement32(volatile Uint32 * ptr)
 {
 #ifdef nativeFetchThenIncrement32
-   return nativeFetchThenIncrement32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -674,14 +195,13 @@
 SDL_AtomicFetchThenDecrement32(volatile Uint32 * ptr)
 {
 #ifdef nativeFetchThenDecrement32
-   return nativeFetchThenDecrement32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr) -= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -691,14 +211,13 @@
 SDL_AtomicFetchThenAdd32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeFetchThenAdd32
-   return nativeFetchThenAdd32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -708,14 +227,13 @@
 SDL_AtomicFetchThenSubtract32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeFetchThenSubtract32
-   return nativeFetchThenSubtract32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)-= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -725,14 +243,13 @@
 SDL_AtomicIncrementThenFetch32(volatile Uint32 * ptr)
 {
 #ifdef nativeIncrementThenFetch32
-   return nativeIncrementThenFetch32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -742,14 +259,13 @@
 SDL_AtomicDecrementThenFetch32(volatile Uint32 * ptr)
 {
 #ifdef nativeDecrementThenFetch32
-   return nativeDecrementThenFetch32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -759,14 +275,13 @@
 SDL_AtomicAddThenFetch32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeAddThenFetch32
-   return nativeAddThenFetch32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -776,14 +291,13 @@
 SDL_AtomicSubtractThenFetch32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeSubtractThenFetch32
-   return nativeSubtractThenFetch32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -792,58 +306,20 @@
 /* 64 bit atomic operations */
 #ifdef SDL_HAS_64BIT_TYPE
 
-Uint64
-SDL_AtomicExchange64(volatile Uint64 * ptr, Uint64 value)
-{
-#ifdef nativeExchange64
-   return nativeExchange64(ptr, value);
-#else
-   Uint64 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet64(volatile Uint64 * ptr, Uint64 oldvalue, Uint64 newvalue)
-{
-#ifdef nativeCompareThenSet64
-   return (SDL_bool)nativeCompareThenSet64(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
 SDL_bool
 SDL_AtomicTestThenSet64(volatile Uint64 * ptr)
 {
 #ifdef nativeTestThenSet64
-   return (SDL_bool)nativeTestThenSet64(ptr);
 #else
    SDL_bool result = SDL_FALSE;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    result = (*ptr == 0);
    if (result)
    {
       *ptr = 1;
    }
-   privateUnlock();
+   privateUnlock(ptr);
 
    return result;
 #endif
@@ -853,11 +329,10 @@
 SDL_AtomicClear64(volatile Uint64 * ptr)
 {
 #ifdef nativeClear64
-   nativeClear64(ptr);
 #else
-   privateWaitLock();
+   privateWaitLock(ptr);
    *ptr = 0;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return;
 #endif
@@ -867,14 +342,13 @@
 SDL_AtomicFetchThenIncrement64(volatile Uint64 * ptr)
 {
 #ifdef nativeFetchThenIncrement64
-   return nativeFetchThenIncrement64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -884,14 +358,13 @@
 SDL_AtomicFetchThenDecrement64(volatile Uint64 * ptr)
 {
 #ifdef nativeFetchThenDecrement64
-   return nativeFetchThenDecrement64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr) -= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -901,14 +374,13 @@
 SDL_AtomicFetchThenAdd64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeFetchThenAdd64
-   return nativeFetchThenAdd64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -918,14 +390,13 @@
 SDL_AtomicFetchThenSubtract64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeFetchThenSubtract64
-   return nativeFetchThenSubtract64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)-= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -935,14 +406,13 @@
 SDL_AtomicIncrementThenFetch64(volatile Uint64 * ptr)
 {
 #ifdef nativeIncrementThenFetch64
-   return nativeIncrementThenFetch64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -952,14 +422,13 @@
 SDL_AtomicDecrementThenFetch64(volatile Uint64 * ptr)
 {
 #ifdef nativeDecrementThenFetch64
-   return nativeDecrementThenFetch64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -969,14 +438,13 @@
 SDL_AtomicAddThenFetch64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeAddThenFetch64
-   return nativeAddThenFetch64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -986,14 +454,13 @@
 SDL_AtomicSubtractThenFetch64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeSubtractThenFetch64
-   return nativeSubtractThenFetch64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
--- a/src/atomic/win32/SDL_atomic.c	Mon Sep 07 16:04:44 2009 +0000
+++ b/src/atomic/win32/SDL_atomic.c	Thu Sep 17 20:35:12 2009 +0000
@@ -1,789 +1,303 @@
 /*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2009 Sam Lantinga
+  SDL - Simple DirectMedia Layer
+  Copyright (C) 1997-2009 Sam Lantinga
 
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
 
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
 
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
-    Sam Lantinga
-    slouken@libsdl.org
+  Sam Lantinga
+  slouken@libsdl.org
+
+  Contributed by Bob Pendleton, bob@pendleton.com
 */
 
 #include "SDL_stdinc.h"
 #include "SDL_atomic.h"
 
+#include "SDL_error.h"
+
 /*
-  This file provides 8, 16, 32, and 64 bit atomic operations. If the
+  This file provides 32, and 64 bit atomic operations. If the
   operations are provided by the native hardware and operating system
   they are used. If they are not then the operations are emulated
-  using the SDL mutex operations. 
- */
-
-/* 
-  First, detect whether the operations are supported and create
-  #defines that indicate that they do exist. The goal is to have all
-  the system dependent code in the top part of the file so that the
-  bottom can be use unchanged across all platforms.
-
-  Second, #define all the operations in each size class that are
-  supported. Doing this allows supported operations to be used along
-  side of emulated operations.
+  using the SDL spin lock operations. If spin lock can not be
+  implemented then these functions must fail.
 */
 
 /* 
-   Emmulated version.
+  DUMMY VERSION.
+
+  This version of the code assumes there is no support for atomic
+  operations. Therefore, every function sets the SDL error
+  message. Oddly enough, if you only have one thread then this
+  version actuallys works.
+*/
 
-   Assume there is no support for atomic operations. All such
-   operations are implemented using SDL mutex operations.
- */
+/*
+  Native spinlock routines. Because this is the dummy implementation
+  these will always call SDL_SetError() and do nothing.
+*/
+
+void 
+SDL_AtomicLock(SDL_SpinLock *lock)
+{
+   SDL_SetError("SDL_atomic.c: is not implemented on this platform");
+}
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#undef EMULATED_ATOMIC_OPERATIONS
-#endif
+void 
+SDL_AtomicUnlock(SDL_SpinLock *lock)
+{
+   SDL_SetError("SDL_atomic.c: is not implemented on this platform");
+}
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_8_BIT_OPS
+/*
+  Note that platform specific versions can be built from this version
+  by changing the #undefs to #defines and adding platform specific
+  code.
+*/
+
+#undef  nativeTestThenSet32
+#undef  nativeClear32
+#undef  nativeFetchThenIncrement32
+#undef  nativeFetchThenDecrement32
+#undef  nativeFetchThenAdd32
+#undef  nativeFetchThenSubtract32
+#undef  nativeIncrementThenFetch32
+#undef  nativeDecrementThenFetch32
+#undef  nativeAddThenFetch32
+#undef  nativeSubtractThenFetch32
 
-#define nativeExchange8(ptr, value)			()
-#define nativeCompareThenSet8(ptr, oldvalue, newvalue) 	()
-#define nativeTestThenSet8(ptr)    	     		()
-#define nativeClear8(ptr)				()
-#define nativeFetchThenIncrement8(ptr)   		()
-#define nativeFetchThenDecrement8(ptr) 			()
-#define nativeFetchThenAdd8(ptr, value) 		()
-#define nativeFetchThenSubtract8(ptr, value) 		()
-#define nativeIncrementThenFetch8(ptr) 			()
-#define nativeDecrementThenFetch8(ptr) 			()
-#define nativeAddThenFetch8(ptr, value) 		()
-#define nativeSubtractThenFetch8(ptr, value) 		()
-#endif
+#undef  nativeTestThenSet64
+#undef  nativeClear64
+#undef  nativeFetchThenIncrement64
+#undef  nativeFetchThenDecrement64
+#undef  nativeFetchThenAdd64
+#undef  nativeFetchThenSubtract64
+#undef  nativeIncrementThenFetch64
+#undef  nativeDecrementThenFetch64
+#undef  nativeAddThenFetch64
+#undef  nativeSubtractThenFetch64
+
+/* 
+  If any of the operations are not provided then we must emulate some
+  of them. That means we need a nice implementation of spin locks
+  that avoids the "one big lock" problem. We use a vector of spin
+  locks and pick which one to use based on the address of the operand
+  of the function.
+
+  To generate the index of the lock we first shift by 3 bits to get
+  rid on the zero bits that result from 32 and 64 bit allignment of
+  data. We then mask off all but 5 bits and use those 5 bits as an
+  index into the table. 
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_16_BIT_OPS
+  Picking the lock this way insures that accesses to the same data at
+  the same time will go to the same lock. OTOH, accesses to different
+  data have only a 1/32 chance of hitting the same lock. That should
+  pretty much eliminate the chances of several atomic operations on
+  different data from waiting on the same "big lock". If it isn't
+  then the table of locks can be expanded to a new size so long as
+  the new size if a power of two.
+*/
 
-#define nativeExchange16(ptr, value)			()
-#define nativeCompareThenSet16(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet16(ptr)    	     		()
-#define nativeClear16(ptr)				()
-#define nativeFetchThenIncrement16(ptr)   		()
-#define nativeFetchThenDecrement16(ptr) 		()
-#define nativeFetchThenAdd16(ptr, value) 		()
-#define nativeFetchThenSubtract16(ptr, value) 		()
-#define nativeIncrementThenFetch16(ptr) 		()
-#define nativeDecrementThenFetch16(ptr) 		()
-#define nativeAddThenFetch16(ptr, value) 		()
-#define nativeSubtractThenFetch16(ptr, value) 		()
+static SDL_SpinLock locks[32] = {
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static __inline__ void
+privateWaitLock(volatile void *ptr)
+{
+#if SIZEOF_VOIDP == 4
+   Uint32 index = ((((Uint32)ptr) >> 3) & 0x1f);
+#elif SIZEOF_VOIDP == 8
+   Uint64 index = ((((Uint64)ptr) >> 3) & 0x1f);
 #endif
 
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_32_BIT_OPS
-
-#define nativeExchange32(ptr, value)			()
-#define nativeCompareThenSet32(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet32(ptr)    	     		()
-#define nativeClear32(ptr)				()
-#define nativeFetchThenIncrement32(ptr)   		()
-#define nativeFetchThenDecrement32(ptr) 		()
-#define nativeFetchThenAdd32(ptr, value) 		()
-#define nativeFetchThenSubtract32(ptr, value) 		()
-#define nativeIncrementThenFetch32(ptr) 		()
-#define nativeDecrementThenFetch32(ptr) 		()
-#define nativeAddThenFetch32(ptr, value) 		()
-#define nativeSubtractThenFetch32(ptr, value) 		()
-#endif
-
-#ifdef EMULATED_ATOMIC_OPERATIONS
-#define HAVE_ALL_64_BIT_OPS
-
-#define nativeExchange64(ptr, value)			()
-#define nativeCompareThenSet64(ptr, oldvalue, newvalue) ()
-#define nativeTestThenSet64(ptr)    	     		()
-#define nativeClear64(ptr)				()
-#define nativeFetchThenIncrement64(ptr)   		()
-#define nativeFetchThenDecrement64(ptr) 		()
-#define nativeFetchThenAdd64(ptr, value) 		()
-#define nativeFetchThenSubtract64(ptr, value) 		()
-#define nativeIncrementThenFetch64(ptr) 		()
-#define nativeDecrementThenFetch64(ptr) 		()
-#define nativeAddThenFetch64(ptr, value) 		()
-#define nativeSubtractThenFetch64(ptr, value) 		()
-#endif
-
-/* 
-If any of the operations are not provided then we must emulate some of
-them.
- */
-
-#if !defined(HAVE_ALL_8_BIT_OPS) || !defined(HAVE_ALL_16_BIT_OPS) || !defined(HAVE_ALL_32_BIT_OPS) || !defined(HAVE_ALL_64_BIT_OPS)
-
-#include "SDL_mutex.h"
-#include "SDL_error.h"
-
-static SDL_mutex * lock = NULL;
-
-static __inline__ void
-privateWaitLock()
-{
-   if(NULL == lock)
-   {
-      lock = SDL_CreateMutex();
-      if (NULL == lock)
-      {
-	 SDL_SetError("SDL_atomic.c: can't create a mutex");
-	 return;
-      }
-   }
-
-   if (-1 == SDL_LockMutex(lock))
-   {
-      SDL_SetError("SDL_atomic.c: can't lock mutex");
-   }
+   SDL_AtomicLock(&locks[index]);
 }
 
 static __inline__ void
-privateUnlock()
+privateUnlock(volatile void *ptr)
 {
-   if (-1 == SDL_UnlockMutex(lock))
-   {
-      SDL_SetError("SDL_atomic.c: can't unlock mutex");
-   }
-}
-
+#if SIZEOF_VOIDP == 4
+   Uint32 index = ((((Uint32)ptr) >> 3) & 0x1f);
+#elif SIZEOF_VOIDP == 8
+   Uint64 index = ((((Uint64)ptr) >> 3) & 0x1f);
 #endif
 
-/* 8 bit atomic operations */
-
-Uint8
-SDL_AtomicExchange8(Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeExchange8
-   return nativeExchange8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
+   SDL_AtomicUnlock(&locks[index]);
 }
 
+/* 32 bit atomic operations */
+
 SDL_bool
-SDL_AtomicCompareThenSet8(Uint8 * ptr, Uint8 oldvalue, Uint8 newvalue)
+SDL_AtomicTestThenSet32(volatile Uint32 * ptr)
 {
-#ifdef nativeCompareThenSet8
-   return (SDL_bool)nativeCompareThenSet8(ptr, oldvalue, newvalue);
+#ifdef nativeTestThenSet32
 #else
    SDL_bool result = SDL_FALSE;
 
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet8(Uint8 * ptr)
-{
-#ifdef nativeTestThenSet8
-   return (SDL_bool)nativeTestThenSet8(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
+   privateWaitLock(ptr);
    result = (*ptr == 0);
    if (result)
    {
       *ptr = 1;
    }
-   privateUnlock();
+   privateUnlock(ptr);
 
    return result;
 #endif
 }
 
 void
-SDL_AtomicClear8(Uint8 * ptr)
-{
-#ifdef nativeClear8
-   nativeClear8(ptr);
-#else
-   privateWaitLock();
-   *ptr = 0;
-   privateUnlock();
-
-   return;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenIncrement8(Uint8 * ptr)
-{
-#ifdef nativeFetchThenIncrement8
-   return nativeFetchThenIncrement8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenDecrement8(Uint8 * ptr)
-{
-#ifdef nativeFetchThenDecrement8
-   return nativeFetchThenDecrement8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenAdd8(Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeFetchThenAdd8
-   return nativeFetchThenAdd8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicFetchThenSubtract8(Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeFetchThenSubtract8
-   return nativeFetchThenSubtract8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicIncrementThenFetch8(Uint8 * ptr)
-{
-#ifdef nativeIncrementThenFetch8
-   return nativeIncrementThenFetch8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicDecrementThenFetch8(Uint8 * ptr)
-{
-#ifdef nativeDecrementThenFetch8
-   return nativeDecrementThenFetch8(ptr);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicAddThenFetch8(Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeAddThenFetch8
-   return nativeAddThenFetch8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint8
-SDL_AtomicSubtractThenFetch8(Uint8 * ptr, Uint8 value)
-{
-#ifdef nativeSubtractThenFetch8
-   return nativeSubtractThenFetch8(ptr, value);
-#else
-   Uint8 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-/* 16 bit atomic operations */
-
-Uint16
-SDL_AtomicExchange16(Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeExchange16
-   return nativeExchange16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet16(Uint16 * ptr, Uint16 oldvalue, Uint16 newvalue)
-{
-#ifdef nativeCompareThenSet16
-   return (SDL_bool)nativeCompareThenSet16(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet16(Uint16 * ptr)
-{
-#ifdef nativeTestThenSet16
-   return (SDL_bool)nativeTestThenSet16(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == 0);
-   if (result)
-   {
-      *ptr = 1;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-void
-SDL_AtomicClear16(Uint16 * ptr)
-{
-#ifdef nativeClear16
-   nativeClear16(ptr);
-#else
-   privateWaitLock();
-   *ptr = 0;
-   privateUnlock();
-
-   return;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenIncrement16(Uint16 * ptr)
-{
-#ifdef nativeFetchThenIncrement16
-   return nativeFetchThenIncrement16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenDecrement16(Uint16 * ptr)
-{
-#ifdef nativeFetchThenDecrement16
-   return nativeFetchThenDecrement16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenAdd16(Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeFetchThenAdd16
-   return nativeFetchThenAdd16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicFetchThenSubtract16(Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeFetchThenSubtract16
-   return nativeFetchThenSubtract16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicIncrementThenFetch16(Uint16 * ptr)
-{
-#ifdef nativeIncrementThenFetch16
-   return nativeIncrementThenFetch16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicDecrementThenFetch16(Uint16 * ptr)
-{
-#ifdef nativeDecrementThenFetch16
-   return nativeDecrementThenFetch16(ptr);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicAddThenFetch16(Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeAddThenFetch16
-   return nativeAddThenFetch16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)+= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint16
-SDL_AtomicSubtractThenFetch16(Uint16 * ptr, Uint16 value)
-{
-#ifdef nativeSubtractThenFetch16
-   return nativeSubtractThenFetch16(ptr, value);
-#else
-   Uint16 tmp = 0;
-
-   privateWaitLock();
-   (*ptr)-= value;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-/* 32 bit atomic operations */
-
-Uint32
-SDL_AtomicExchange32(Uint32 * ptr, Uint32 value)
-{
-#ifdef nativeExchange32
-   return nativeExchange32(ptr, value);
-#else
-   Uint32 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet32(Uint32 * ptr, Uint32 oldvalue, Uint32 newvalue)
-{
-#ifdef nativeCompareThenSet32
-   return (SDL_bool)nativeCompareThenSet32(ptr, oldvalue, newvalue);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet32(Uint32 * ptr)
-{
-#ifdef nativeTestThenSet32
-   return (SDL_bool)nativeTestThenSet32(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
-   result = (*ptr == 0);
-   if (result)
-   {
-      *ptr = 1;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-void
-SDL_AtomicClear32(Uint32 * ptr)
+SDL_AtomicClear32(volatile Uint32 * ptr)
 {
 #ifdef nativeClear32
-   nativeClear32(ptr);
 #else
-   privateWaitLock();
+   privateWaitLock(ptr);
    *ptr = 0;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return;
 #endif
 }
 
 Uint32
-SDL_AtomicFetchThenIncrement32(Uint32 * ptr)
+SDL_AtomicFetchThenIncrement32(volatile Uint32 * ptr)
 {
 #ifdef nativeFetchThenIncrement32
-   return nativeFetchThenIncrement32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint32
-SDL_AtomicFetchThenDecrement32(Uint32 * ptr)
+SDL_AtomicFetchThenDecrement32(volatile Uint32 * ptr)
 {
 #ifdef nativeFetchThenDecrement32
-   return nativeFetchThenDecrement32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr) -= 1;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint32
-SDL_AtomicFetchThenAdd32(Uint32 * ptr, Uint32 value)
+SDL_AtomicFetchThenAdd32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeFetchThenAdd32
-   return nativeFetchThenAdd32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint32
-SDL_AtomicFetchThenSubtract32(Uint32 * ptr, Uint32 value)
+SDL_AtomicFetchThenSubtract32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeFetchThenSubtract32
-   return nativeFetchThenSubtract32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)-= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint32
-SDL_AtomicIncrementThenFetch32(Uint32 * ptr)
+SDL_AtomicIncrementThenFetch32(volatile Uint32 * ptr)
 {
 #ifdef nativeIncrementThenFetch32
-   return nativeIncrementThenFetch32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint32
-SDL_AtomicDecrementThenFetch32(Uint32 * ptr)
+SDL_AtomicDecrementThenFetch32(volatile Uint32 * ptr)
 {
 #ifdef nativeDecrementThenFetch32
-   return nativeDecrementThenFetch32(ptr);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint32
-SDL_AtomicAddThenFetch32(Uint32 * ptr, Uint32 value)
+SDL_AtomicAddThenFetch32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeAddThenFetch32
-   return nativeAddThenFetch32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint32
-SDL_AtomicSubtractThenFetch32(Uint32 * ptr, Uint32 value)
+SDL_AtomicSubtractThenFetch32(volatile Uint32 * ptr, Uint32 value)
 {
 #ifdef nativeSubtractThenFetch32
-   return nativeSubtractThenFetch32(ptr, value);
 #else
    Uint32 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
@@ -792,208 +306,161 @@
 /* 64 bit atomic operations */
 #ifdef SDL_HAS_64BIT_TYPE
 
-Uint64
-SDL_AtomicExchange64(Uint64 * ptr, Uint64 value)
+SDL_bool
+SDL_AtomicTestThenSet64(volatile Uint64 * ptr)
 {
-#ifdef nativeExchange64
-   return nativeExchange64(ptr, value);
-#else
-   Uint64 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   *ptr = value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-SDL_bool
-SDL_AtomicCompareThenSet64(Uint64 * ptr, Uint64 oldvalue, Uint64 newvalue)
-{
-#ifdef nativeCompareThenSet64
-   return (SDL_bool)nativeCompareThenSet64(ptr, oldvalue, newvalue);
+#ifdef nativeTestThenSet64
 #else
    SDL_bool result = SDL_FALSE;
 
-   privateWaitLock();
-   result = (*ptr == oldvalue);
-   if (result)
-   {
-      *ptr = newvalue;
-   }
-   privateUnlock();
-
-   return result;
-#endif
-}
-
-SDL_bool
-SDL_AtomicTestThenSet64(Uint64 * ptr)
-{
-#ifdef nativeTestThenSet64
-   return (SDL_bool)nativeTestThenSet64(ptr);
-#else
-   SDL_bool result = SDL_FALSE;
-
-   privateWaitLock();
+   privateWaitLock(ptr);
    result = (*ptr == 0);
    if (result)
    {
       *ptr = 1;
    }
-   privateUnlock();
+   privateUnlock(ptr);
 
    return result;
 #endif
 }
 
 void
-SDL_AtomicClear64(Uint64 * ptr)
+SDL_AtomicClear64(volatile Uint64 * ptr)
 {
 #ifdef nativeClear64
-   nativeClear64(ptr);
 #else
-   privateWaitLock();
+   privateWaitLock(ptr);
    *ptr = 0;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return;
 #endif
 }
 
 Uint64
-SDL_AtomicFetchThenIncrement64(Uint64 * ptr)
+SDL_AtomicFetchThenIncrement64(volatile Uint64 * ptr)
 {
 #ifdef nativeFetchThenIncrement64
-   return nativeFetchThenIncrement64(ptr);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
    (*ptr)+= 1;
-   privateUnlock();
+   privateUnlock(ptr);
+
+   return tmp;
+#endif
+}
+
+Uint64
+SDL_AtomicFetchThenDecrement64(volatile Uint64 * ptr)
+{
+#ifdef nativeFetchThenDecrement64
+#else
+   Uint64 tmp = 0;
+
+   privateWaitLock(ptr);
+   tmp = *ptr;
+   (*ptr) -= 1;
+   privateUnlock(ptr);
+
+   return tmp;
+#endif
+}
+
+Uint64
+SDL_AtomicFetchThenAdd64(volatile Uint64 * ptr, Uint64 value)
+{
+#ifdef nativeFetchThenAdd64
+#else
+   Uint64 tmp = 0;
+
+   privateWaitLock(ptr);
+   tmp = *ptr;
+   (*ptr)+= value;
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint64
-SDL_AtomicFetchThenDecrement64(Uint64 * ptr)
+SDL_AtomicFetchThenSubtract64(volatile Uint64 * ptr, Uint64 value)
 {
-#ifdef nativeFetchThenDecrement64
-   return nativeFetchThenDecrement64(ptr);
+#ifdef nativeFetchThenSubtract64
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    tmp = *ptr;
-   (*ptr) -= 1;
-   privateUnlock();
+   (*ptr)-= value;
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint64
-SDL_AtomicFetchThenAdd64(Uint64 * ptr, Uint64 value)
+SDL_AtomicIncrementThenFetch64(volatile Uint64 * ptr)
 {
-#ifdef nativeFetchThenAdd64
-   return nativeFetchThenAdd64(ptr, value);
+#ifdef nativeIncrementThenFetch64
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
+   (*ptr)+= 1;
    tmp = *ptr;
-   (*ptr)+= value;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint64
-SDL_AtomicFetchThenSubtract64(Uint64 * ptr, Uint64 value)
-{
-#ifdef nativeFetchThenSubtract64
-   return nativeFetchThenSubtract64(ptr, value);
-#else
-   Uint64 tmp = 0;
-
-   privateWaitLock();
-   tmp = *ptr;
-   (*ptr)-= value;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint64
-SDL_AtomicIncrementThenFetch64(Uint64 * ptr)
+SDL_AtomicDecrementThenFetch64(volatile Uint64 * ptr)
 {
-#ifdef nativeIncrementThenFetch64
-   return nativeIncrementThenFetch64(ptr);
+#ifdef nativeDecrementThenFetch64
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
-   (*ptr)+= 1;
-   tmp = *ptr;
-   privateUnlock();
-
-   return tmp;
-#endif
-}
-
-Uint64
-SDL_AtomicDecrementThenFetch64(Uint64 * ptr)
-{
-#ifdef nativeDecrementThenFetch64
-   return nativeDecrementThenFetch64(ptr);
-#else
-   Uint64 tmp = 0;
-
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= 1;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint64
-SDL_AtomicAddThenFetch64(Uint64 * ptr, Uint64 value)
+SDL_AtomicAddThenFetch64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeAddThenFetch64
-   return nativeAddThenFetch64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)+= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
 }
 
 Uint64
-SDL_AtomicSubtractThenFetch64(Uint64 * ptr, Uint64 value)
+SDL_AtomicSubtractThenFetch64(volatile Uint64 * ptr, Uint64 value)
 {
 #ifdef nativeSubtractThenFetch64
-   return nativeSubtractThenFetch64(ptr, value);
 #else
    Uint64 tmp = 0;
 
-   privateWaitLock();
+   privateWaitLock(ptr);
    (*ptr)-= value;
    tmp = *ptr;
-   privateUnlock();
+   privateUnlock(ptr);
 
    return tmp;
 #endif
--- a/test/testatomic.c	Mon Sep 07 16:04:44 2009 +0000
+++ b/test/testatomic.c	Thu Sep 17 20:35:12 2009 +0000
@@ -29,21 +29,18 @@
    volatile Uint64 val64 = 0;
    Uint64 ret64 = 0;
 
+   SDL_SpinLock lock = 0;
+
    SDL_bool tfret = SDL_FALSE;
 
-   printf("32 bit -----------------------------------------\n\n");
-
-   ret32 = SDL_AtomicExchange32(&val32, 10);
-   printf("Exchange32           ret=%d val=%d\n", ret32, val32);
-   ret32 = SDL_AtomicExchange32(&val32, 0);
-   printf("Exchange32           ret=%d val=%d\n", ret32, val32);
+   printf("\nspin lock---------------------------------------\n\n");
 
-   val32 = 10;
-   tfret = SDL_AtomicCompareThenSet32(&val32, 10, 20);
-   printf("CompareThenSet32     tfret=%s val=%d\n", tf(tfret), val32);
-   val32 = 10;
-   tfret = SDL_AtomicCompareThenSet32(&val32, 0, 20);
-   printf("CompareThenSet32     tfret=%s val=%d\n", tf(tfret), val32);
+   SDL_AtomicLock(&lock);
+   printf("AtomicLock                   lock=%d\n", lock);
+   SDL_AtomicUnlock(&lock);
+   printf("AtomicUnlock                 lock=%d\n", lock);
+
+   printf("\n32 bit -----------------------------------------\n\n");
 
    val32 = 0;
    tfret = SDL_AtomicTestThenSet32(&val32);
@@ -79,19 +76,7 @@
    printf("SubtractThenFetch32  ret=%d val=%d\n", ret32, val32);
 
 #ifdef SDL_HAS_64BIT_TYPE
-   printf("64 bit -----------------------------------------\n\n");
-
-   ret64 = SDL_AtomicExchange64(&val64, 10);
-   printf("Exchange64           ret=%lld val=%lld\n", ret64, val64);
-   ret64 = SDL_AtomicExchange64(&val64, 0);
-   printf("Exchange64           ret=%lld val=%lld\n", ret64, val64);
-
-   val64 = 10;
-   tfret = SDL_AtomicCompareThenSet64(&val64, 10, 20);
-   printf("CompareThenSet64     tfret=%s val=%lld\n", tf(tfret), val64);
-   val64 = 10;
-   tfret = SDL_AtomicCompareThenSet64(&val64, 0, 20);
-   printf("CompareThenSet64     tfret=%s val=%lld\n", tf(tfret), val64);
+   printf("\n64 bit -----------------------------------------\n\n");
 
    val64 = 0;
    tfret = SDL_AtomicTestThenSet64(&val64);