changeset 5159:307ccc9c135e

Made it possible to create a texture of any format, even if not supported by the renderer. This allows me to reduce the set of formats supported by the renderers to the most optimal set, for a nice speed boost.
author Sam Lantinga <slouken@libsdl.org>
date Thu, 03 Feb 2011 00:19:40 -0800
parents f3ebd1950442
children 657543cc92f9
files VisualC/SDL/SDL_VS2008.vcproj VisualC/SDL/SDL_VS2010.vcxproj Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj Xcode/SDL/SDL.xcodeproj/project.pbxproj include/SDL_pixels.h include/SDL_rect.h include/SDL_render.h src/SDL_compat.c src/render/SDL_render.c src/render/SDL_sysrender.h src/render/SDL_yuv_mmx.c src/render/SDL_yuv_sw.c src/render/SDL_yuv_sw_c.h src/render/direct3d/SDL_d3drender.c src/render/mmx.h src/render/opengl/SDL_renderer_gl.c src/render/opengles/SDL_renderer_gles.c src/render/software/SDL_renderer_sw.c src/video/SDL_leaks.h src/video/SDL_rect.c src/video/SDL_yuv_mmx.c src/video/SDL_yuv_sw.c src/video/SDL_yuv_sw_c.h src/video/mmx.h
diffstat 24 files changed, 3051 insertions(+), 3781 deletions(-) [+]
line wrap: on
line diff
--- a/VisualC/SDL/SDL_VS2008.vcproj	Wed Feb 02 22:55:12 2011 -0800
+++ b/VisualC/SDL/SDL_VS2008.vcproj	Thu Feb 03 00:19:40 2011 -0800
@@ -607,7 +607,7 @@
 			>
 		</File>
 		<File
-			RelativePath="..\..\src\video\mmx.h"
+			RelativePath="..\..\src\render\mmx.h"
 			>
 		</File>
 		<File
@@ -1251,15 +1251,15 @@
 			>
 		</File>
 		<File
-			RelativePath="..\..\src\video\SDL_yuv_mmx.c"
+			RelativePath="..\..\src\render\SDL_yuv_mmx.c"
 			>
 		</File>
 		<File
-			RelativePath="..\..\src\video\SDL_yuv_sw.c"
+			RelativePath="..\..\src\render\SDL_yuv_sw.c"
 			>
 		</File>
 		<File
-			RelativePath="..\..\src\video\SDL_yuv_sw_c.h"
+			RelativePath="..\..\src\render\SDL_yuv_sw_c.h"
 			>
 		</File>
 		<File
--- a/VisualC/SDL/SDL_VS2010.vcxproj	Wed Feb 02 22:55:12 2011 -0800
+++ b/VisualC/SDL/SDL_VS2010.vcxproj	Thu Feb 03 00:19:40 2011 -0800
@@ -282,8 +282,9 @@
     <ClInclude Include="..\..\src\events\SDL_touch_c.h" />
     <ClInclude Include="..\..\src\libm\math.h" />
     <ClInclude Include="..\..\src\libm\math_private.h" />
+    <ClInclude Include="..\..\src\render\mmx.h" />
     <ClInclude Include="..\..\src\render\SDL_sysrender.h" />
-    <ClInclude Include="..\..\src\video\mmx.h" />
+    <ClInclude Include="..\..\src\render\SDL_yuv_sw_c.h" />
     <ClInclude Include="..\..\src\video\SDL_alphamult.h" />
     <ClInclude Include="..\..\src\audio\SDL_audio_c.h" />
     <ClInclude Include="..\..\src\audio\SDL_audiodev_c.h" />
@@ -339,7 +340,6 @@
     <ClInclude Include="..\..\src\video\windows\SDL_windowsvideo.h" />
     <ClInclude Include="..\..\src\video\windows\SDL_windowswindow.h" />
     <ClInclude Include="..\..\src\events\SDL_windowevents_c.h" />
-    <ClInclude Include="..\..\src\video\SDL_yuv_sw_c.h" />
     <ClInclude Include="..\..\src\video\windows\wmmsg.h" />
   </ItemGroup>
   <ItemGroup>
@@ -365,6 +365,8 @@
     <ClCompile Include="..\..\src\render\direct3d\SDL_d3drender.c" />
     <ClCompile Include="..\..\src\render\opengl\SDL_renderer_gl.c" />
     <ClCompile Include="..\..\src\render\SDL_render.c" />
+    <ClCompile Include="..\..\src\render\SDL_yuv_mmx.c" />
+    <ClCompile Include="..\..\src\render\SDL_yuv_sw.c" />
     <ClCompile Include="..\..\src\render\software\SDL_renderer_sw.c" />
     <ClCompile Include="..\..\src\SDL.c" />
     <ClCompile Include="..\..\src\video\SDL_alphamult.c" />
@@ -452,8 +454,6 @@
     <ClCompile Include="..\..\src\video\windows\SDL_windowsvideo.c" />
     <ClCompile Include="..\..\src\video\windows\SDL_windowswindow.c" />
     <ClCompile Include="..\..\src\events\SDL_windowevents.c" />
-    <ClCompile Include="..\..\src\video\SDL_yuv_mmx.c" />
-    <ClCompile Include="..\..\src\video\SDL_yuv_sw.c" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
--- a/Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj	Wed Feb 02 22:55:12 2011 -0800
+++ b/Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj	Thu Feb 03 00:19:40 2011 -0800
@@ -73,6 +73,10 @@
 		043DD77010FD8A0000DED673 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 043DD76C10FD8A0000DED673 /* SDL_alphamult.h */; };
 		043DD77110FD8A0000DED673 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 043DD76D10FD8A0000DED673 /* SDL_blendfillrect.c */; };
 		043DD77210FD8A0000DED673 /* SDL_drawrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 043DD76E10FD8A0000DED673 /* SDL_drawrect.c */; };
+		04409BA612FA989600FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409BA212FA989600FB9AA8 /* mmx.h */; };
+		04409BA712FA989600FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */; };
+		04409BA812FA989600FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */; };
+		04409BA912FA989600FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */; };
 		04461DEE0EA76BA3006C462D /* SDL_haptic.h in Headers */ = {isa = PBXBuildFile; fileRef = 04461DED0EA76BA3006C462D /* SDL_haptic.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		044E5FB511E6069F0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5FB311E6069F0076F181 /* SDL_clipboard.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		044E5FB611E6069F0076F181 /* SDL_input.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5FB411E6069F0076F181 /* SDL_input.h */; settings = {ATTRIBUTES = (Public, ); }; };
@@ -223,9 +227,6 @@
 		FDA684660DF2374E00F98A1A /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA683190DF2374E00F98A1A /* SDL_surface.c */; };
 		FDA684670DF2374E00F98A1A /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */; };
 		FDA684680DF2374E00F98A1A /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831B0DF2374E00F98A1A /* SDL_video.c */; };
-		FDA684690DF2374E00F98A1A /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */; };
-		FDA6846A0DF2374E00F98A1A /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */; };
-		FDA6846B0DF2374E00F98A1A /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */; };
 		FDA685FB0DF244C800F98A1A /* SDL_nullevents.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA685F50DF244C800F98A1A /* SDL_nullevents.c */; };
 		FDA685FC0DF244C800F98A1A /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA685F60DF244C800F98A1A /* SDL_nullevents_c.h */; };
 		FDA685FF0DF244C800F98A1A /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA685F90DF244C800F98A1A /* SDL_nullvideo.c */; };
@@ -328,6 +329,10 @@
 		043DD76C10FD8A0000DED673 /* SDL_alphamult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_alphamult.h; sourceTree = "<group>"; };
 		043DD76D10FD8A0000DED673 /* SDL_blendfillrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_blendfillrect.c; sourceTree = "<group>"; };
 		043DD76E10FD8A0000DED673 /* SDL_drawrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_drawrect.c; sourceTree = "<group>"; };
+		04409BA212FA989600FB9AA8 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
+		04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
+		04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
+		04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
 		04461DED0EA76BA3006C462D /* SDL_haptic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_haptic.h; path = ../../include/SDL_haptic.h; sourceTree = SOURCE_ROOT; };
 		044E5FB311E6069F0076F181 /* SDL_clipboard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_clipboard.h; path = ../../include/SDL_clipboard.h; sourceTree = SOURCE_ROOT; };
 		044E5FB411E6069F0076F181 /* SDL_input.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_input.h; path = ../../include/SDL_input.h; sourceTree = SOURCE_ROOT; };
@@ -505,9 +510,6 @@
 		FDA683190DF2374E00F98A1A /* SDL_surface.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_surface.c; sourceTree = "<group>"; };
 		FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysvideo.h; sourceTree = "<group>"; };
 		FDA6831B0DF2374E00F98A1A /* SDL_video.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_video.c; sourceTree = "<group>"; };
-		FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
-		FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
-		FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
 		FDA685F50DF244C800F98A1A /* SDL_nullevents.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullevents.c; sourceTree = "<group>"; };
 		FDA685F60DF244C800F98A1A /* SDL_nullevents_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullevents_c.h; sourceTree = "<group>"; };
 		FDA685F90DF244C800F98A1A /* SDL_nullvideo.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullvideo.c; sourceTree = "<group>"; };
@@ -659,9 +661,13 @@
 			isa = PBXGroup;
 			children = (
 				041B2CE812FA0F680087D585 /* opengles */,
+				041B2CEC12FA0F680087D585 /* software */,
+				04409BA212FA989600FB9AA8 /* mmx.h */,
 				041B2CEA12FA0F680087D585 /* SDL_render.c */,
 				041B2CEB12FA0F680087D585 /* SDL_sysrender.h */,
-				041B2CEC12FA0F680087D585 /* software */,
+				04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */,
+				04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */,
+				04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */,
 			);
 			name = render;
 			path = ../../src/render;
@@ -1113,9 +1119,6 @@
 				FDA683190DF2374E00F98A1A /* SDL_surface.c */,
 				FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */,
 				FDA6831B0DF2374E00F98A1A /* SDL_video.c */,
-				FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */,
-				FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */,
-				FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */,
 			);
 			name = video;
 			path = ../../src/video;
@@ -1179,7 +1182,6 @@
 				FDA6845D0DF2374E00F98A1A /* SDL_pixels_c.h in Headers */,
 				FDA684630DF2374E00F98A1A /* SDL_RLEaccel_c.h in Headers */,
 				FDA684670DF2374E00F98A1A /* SDL_sysvideo.h in Headers */,
-				FDA6846B0DF2374E00F98A1A /* SDL_yuv_sw_c.h in Headers */,
 				FDA685FC0DF244C800F98A1A /* SDL_nullevents_c.h in Headers */,
 				FDA686000DF244C800F98A1A /* SDL_nullvideo.h in Headers */,
 				FD5F9D300E0E08B3008E885B /* SDL_joystick_c.h in Headers */,
@@ -1220,6 +1222,8 @@
 				04FFAB9812E23BDC00BA343D /* SDL_shape.h in Headers */,
 				041B2CD912FA0E9E0087D585 /* SDL_render.h in Headers */,
 				041B2CF212FA0F680087D585 /* SDL_sysrender.h in Headers */,
+				04409BA612FA989600FB9AA8 /* mmx.h in Headers */,
+				04409BA812FA989600FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -1427,8 +1431,6 @@
 				FDA684640DF2374E00F98A1A /* SDL_stretch.c in Sources */,
 				FDA684660DF2374E00F98A1A /* SDL_surface.c in Sources */,
 				FDA684680DF2374E00F98A1A /* SDL_video.c in Sources */,
-				FDA684690DF2374E00F98A1A /* SDL_yuv_mmx.c in Sources */,
-				FDA6846A0DF2374E00F98A1A /* SDL_yuv_sw.c in Sources */,
 				FDA685FB0DF244C800F98A1A /* SDL_nullevents.c in Sources */,
 				FDA685FF0DF244C800F98A1A /* SDL_nullvideo.c in Sources */,
 				FD5F9D2F0E0E08B3008E885B /* SDL_joystick.c in Sources */,
@@ -1469,6 +1471,8 @@
 				041B2CF012FA0F680087D585 /* SDL_renderer_gles.c in Sources */,
 				041B2CF112FA0F680087D585 /* SDL_render.c in Sources */,
 				041B2CF312FA0F680087D585 /* SDL_renderer_sw.c in Sources */,
+				04409BA712FA989600FB9AA8 /* SDL_yuv_mmx.c in Sources */,
+				04409BA912FA989600FB9AA8 /* SDL_yuv_sw.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
--- a/Xcode/SDL/SDL.xcodeproj/project.pbxproj	Wed Feb 02 22:55:12 2011 -0800
+++ b/Xcode/SDL/SDL.xcodeproj/project.pbxproj	Thu Feb 03 00:19:40 2011 -0800
@@ -131,6 +131,14 @@
 		041B2CAB12FA0D680087D585 /* SDL_render.c in Sources */ = {isa = PBXBuildFile; fileRef = 041B2C9E12FA0D680087D585 /* SDL_render.c */; };
 		041B2CAC12FA0D680087D585 /* SDL_sysrender.h in Headers */ = {isa = PBXBuildFile; fileRef = 041B2C9F12FA0D680087D585 /* SDL_sysrender.h */; };
 		041B2CAD12FA0D680087D585 /* SDL_renderer_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 041B2CA112FA0D680087D585 /* SDL_renderer_sw.c */; };
+		04409B9112FA97ED00FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8D12FA97ED00FB9AA8 /* mmx.h */; };
+		04409B9212FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */; };
+		04409B9312FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */; };
+		04409B9412FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */; };
+		04409B9512FA97ED00FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8D12FA97ED00FB9AA8 /* mmx.h */; };
+		04409B9612FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */; };
+		04409B9712FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */; };
+		04409B9812FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */; };
 		044E5F8511E6051C0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5F8411E6051C0076F181 /* SDL_clipboard.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		044E5F8611E6051C0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5F8411E6051C0076F181 /* SDL_clipboard.h */; };
 		0469A10B12EE4BF100B846D6 /* SDL_blendmode.h in Headers */ = {isa = PBXBuildFile; fileRef = 0469A10912EE4BF100B846D6 /* SDL_blendmode.h */; settings = {ATTRIBUTES = (Public, ); }; };
@@ -244,7 +252,6 @@
 		04BD011812E6671800899322 /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */; };
 		04BD011B12E6671800899322 /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */; };
 		04BD011C12E6671800899322 /* SDL_nullvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEED12E6671800899322 /* SDL_nullvideo.h */; };
-		04BD013212E6671800899322 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF0412E6671800899322 /* mmx.h */; };
 		04BD016F12E6671800899322 /* SDL_alphamult.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4812E6671800899322 /* SDL_alphamult.c */; };
 		04BD017012E6671800899322 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF4912E6671800899322 /* SDL_alphamult.h */; };
 		04BD017112E6671800899322 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */; };
@@ -285,9 +292,6 @@
 		04BD019B12E6671800899322 /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7412E6671800899322 /* SDL_surface.c */; };
 		04BD019C12E6671800899322 /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7512E6671800899322 /* SDL_sysvideo.h */; };
 		04BD019D12E6671800899322 /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7612E6671800899322 /* SDL_video.c */; };
-		04BD019E12E6671800899322 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */; };
-		04BD019F12E6671800899322 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */; };
-		04BD01A012E6671800899322 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */; };
 		04BD01DB12E6671800899322 /* imKStoUCS.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFB812E6671800899322 /* imKStoUCS.c */; };
 		04BD01DC12E6671800899322 /* imKStoUCS.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFFB912E6671800899322 /* imKStoUCS.h */; };
 		04BD01DD12E6671800899322 /* SDL_x11clipboard.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */; };
@@ -457,7 +461,6 @@
 		04BD033212E6671800899322 /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */; };
 		04BD033512E6671800899322 /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */; };
 		04BD033612E6671800899322 /* SDL_nullvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEED12E6671800899322 /* SDL_nullvideo.h */; };
-		04BD034C12E6671800899322 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF0412E6671800899322 /* mmx.h */; };
 		04BD038912E6671800899322 /* SDL_alphamult.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4812E6671800899322 /* SDL_alphamult.c */; };
 		04BD038A12E6671800899322 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF4912E6671800899322 /* SDL_alphamult.h */; };
 		04BD038B12E6671800899322 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */; };
@@ -498,9 +501,6 @@
 		04BD03B512E6671800899322 /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7412E6671800899322 /* SDL_surface.c */; };
 		04BD03B612E6671800899322 /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7512E6671800899322 /* SDL_sysvideo.h */; };
 		04BD03B712E6671800899322 /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7612E6671800899322 /* SDL_video.c */; };
-		04BD03B812E6671800899322 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */; };
-		04BD03B912E6671800899322 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */; };
-		04BD03BA12E6671800899322 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */; };
 		04BD03F312E6671800899322 /* imKStoUCS.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFB812E6671800899322 /* imKStoUCS.c */; };
 		04BD03F412E6671800899322 /* imKStoUCS.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFFB912E6671800899322 /* imKStoUCS.h */; };
 		04BD03F512E6671800899322 /* SDL_x11clipboard.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */; };
@@ -701,6 +701,10 @@
 		041B2C9E12FA0D680087D585 /* SDL_render.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_render.c; sourceTree = "<group>"; };
 		041B2C9F12FA0D680087D585 /* SDL_sysrender.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysrender.h; sourceTree = "<group>"; };
 		041B2CA112FA0D680087D585 /* SDL_renderer_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_renderer_sw.c; sourceTree = "<group>"; };
+		04409B8D12FA97ED00FB9AA8 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
+		04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
+		04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
+		04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
 		044E5F8411E6051C0076F181 /* SDL_clipboard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_clipboard.h; path = ../../include/SDL_clipboard.h; sourceTree = SOURCE_ROOT; };
 		0469A10912EE4BF100B846D6 /* SDL_blendmode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_blendmode.h; path = ../../include/SDL_blendmode.h; sourceTree = SOURCE_ROOT; };
 		04BDFD7412E6671700899322 /* SDL_atomic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_atomic.c; sourceTree = "<group>"; };
@@ -814,7 +818,6 @@
 		04BDFEE912E6671800899322 /* SDL_nullevents_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullevents_c.h; sourceTree = "<group>"; };
 		04BDFEEC12E6671800899322 /* SDL_nullvideo.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullvideo.c; sourceTree = "<group>"; };
 		04BDFEED12E6671800899322 /* SDL_nullvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullvideo.h; sourceTree = "<group>"; };
-		04BDFF0412E6671800899322 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; };
 		04BDFF4812E6671800899322 /* SDL_alphamult.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_alphamult.c; sourceTree = "<group>"; };
 		04BDFF4912E6671800899322 /* SDL_alphamult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_alphamult.h; sourceTree = "<group>"; };
 		04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_blendfillrect.c; sourceTree = "<group>"; };
@@ -855,9 +858,6 @@
 		04BDFF7412E6671800899322 /* SDL_surface.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_surface.c; sourceTree = "<group>"; };
 		04BDFF7512E6671800899322 /* SDL_sysvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysvideo.h; sourceTree = "<group>"; };
 		04BDFF7612E6671800899322 /* SDL_video.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_video.c; sourceTree = "<group>"; };
-		04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; };
-		04BDFF7812E6671800899322 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; };
-		04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; };
 		04BDFFB812E6671800899322 /* imKStoUCS.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = imKStoUCS.c; sourceTree = "<group>"; };
 		04BDFFB912E6671800899322 /* imKStoUCS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = imKStoUCS.h; sourceTree = "<group>"; };
 		04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_x11clipboard.c; sourceTree = "<group>"; };
@@ -1188,8 +1188,12 @@
 			children = (
 				041B2C9A12FA0D680087D585 /* opengl */,
 				041B2CA012FA0D680087D585 /* software */,
+				04409B8D12FA97ED00FB9AA8 /* mmx.h */,
 				041B2C9E12FA0D680087D585 /* SDL_render.c */,
 				041B2C9F12FA0D680087D585 /* SDL_sysrender.h */,
+				04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */,
+				04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */,
+				04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */,
 			);
 			name = render;
 			path = ../../src/render;
@@ -1483,7 +1487,6 @@
 				04BDFEE712E6671800899322 /* dummy */,
 				04BDFFB712E6671800899322 /* x11 */,
 				04BDFFD712E6671800899322 /* Xext */,
-				04BDFF0412E6671800899322 /* mmx.h */,
 				04BDFF4812E6671800899322 /* SDL_alphamult.c */,
 				04BDFF4912E6671800899322 /* SDL_alphamult.h */,
 				04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */,
@@ -1524,9 +1527,6 @@
 				04BDFF7412E6671800899322 /* SDL_surface.c */,
 				04BDFF7512E6671800899322 /* SDL_sysvideo.h */,
 				04BDFF7612E6671800899322 /* SDL_video.c */,
-				04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */,
-				04BDFF7812E6671800899322 /* SDL_yuv_sw.c */,
-				04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */,
 			);
 			name = video;
 			path = ../../src/video;
@@ -1893,7 +1893,6 @@
 				04BD010312E6671800899322 /* SDL_cocoawindow.h in Headers */,
 				04BD011812E6671800899322 /* SDL_nullevents_c.h in Headers */,
 				04BD011C12E6671800899322 /* SDL_nullvideo.h in Headers */,
-				04BD013212E6671800899322 /* mmx.h in Headers */,
 				04BD017012E6671800899322 /* SDL_alphamult.h in Headers */,
 				04BD017612E6671800899322 /* SDL_blit.h in Headers */,
 				04BD017B12E6671800899322 /* SDL_blit_auto.h in Headers */,
@@ -1907,7 +1906,6 @@
 				04BD019712E6671800899322 /* SDL_RLEaccel_c.h in Headers */,
 				04BD019912E6671800899322 /* SDL_shape_internals.h in Headers */,
 				04BD019C12E6671800899322 /* SDL_sysvideo.h in Headers */,
-				04BD01A012E6671800899322 /* SDL_yuv_sw_c.h in Headers */,
 				04BD01DC12E6671800899322 /* imKStoUCS.h in Headers */,
 				04BD01DE12E6671800899322 /* SDL_x11clipboard.h in Headers */,
 				04BD01E012E6671800899322 /* SDL_x11dyn.h in Headers */,
@@ -1942,6 +1940,8 @@
 				0469A10B12EE4BF100B846D6 /* SDL_blendmode.h in Headers */,
 				041B2C9512FA0D2A0087D585 /* SDL_render.h in Headers */,
 				041B2CA612FA0D680087D585 /* SDL_sysrender.h in Headers */,
+				04409B9112FA97ED00FB9AA8 /* mmx.h in Headers */,
+				04409B9312FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2016,7 +2016,6 @@
 				04BD031D12E6671800899322 /* SDL_cocoawindow.h in Headers */,
 				04BD033212E6671800899322 /* SDL_nullevents_c.h in Headers */,
 				04BD033612E6671800899322 /* SDL_nullvideo.h in Headers */,
-				04BD034C12E6671800899322 /* mmx.h in Headers */,
 				04BD038A12E6671800899322 /* SDL_alphamult.h in Headers */,
 				04BD039012E6671800899322 /* SDL_blit.h in Headers */,
 				04BD039512E6671800899322 /* SDL_blit_auto.h in Headers */,
@@ -2030,7 +2029,6 @@
 				04BD03B112E6671800899322 /* SDL_RLEaccel_c.h in Headers */,
 				04BD03B312E6671800899322 /* SDL_shape_internals.h in Headers */,
 				04BD03B612E6671800899322 /* SDL_sysvideo.h in Headers */,
-				04BD03BA12E6671800899322 /* SDL_yuv_sw_c.h in Headers */,
 				04BD03F412E6671800899322 /* imKStoUCS.h in Headers */,
 				04BD03F612E6671800899322 /* SDL_x11clipboard.h in Headers */,
 				04BD03F812E6671800899322 /* SDL_x11dyn.h in Headers */,
@@ -2065,6 +2063,8 @@
 				0469A10D12EE4BF100B846D6 /* SDL_blendmode.h in Headers */,
 				041B2C9612FA0D2A0087D585 /* SDL_render.h in Headers */,
 				041B2CAC12FA0D680087D585 /* SDL_sysrender.h in Headers */,
+				04409B9512FA97ED00FB9AA8 /* mmx.h in Headers */,
+				04409B9712FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2412,8 +2412,6 @@
 				04BD019A12E6671800899322 /* SDL_stretch.c in Sources */,
 				04BD019B12E6671800899322 /* SDL_surface.c in Sources */,
 				04BD019D12E6671800899322 /* SDL_video.c in Sources */,
-				04BD019E12E6671800899322 /* SDL_yuv_mmx.c in Sources */,
-				04BD019F12E6671800899322 /* SDL_yuv_sw.c in Sources */,
 				04BD01DB12E6671800899322 /* imKStoUCS.c in Sources */,
 				04BD01DD12E6671800899322 /* SDL_x11clipboard.c in Sources */,
 				04BD01DF12E6671800899322 /* SDL_x11dyn.c in Sources */,
@@ -2443,6 +2441,8 @@
 				041B2CA312FA0D680087D585 /* SDL_renderer_gl.c in Sources */,
 				041B2CA512FA0D680087D585 /* SDL_render.c in Sources */,
 				041B2CA712FA0D680087D585 /* SDL_renderer_sw.c in Sources */,
+				04409B9212FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */,
+				04409B9412FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -2539,8 +2539,6 @@
 				04BD03B412E6671800899322 /* SDL_stretch.c in Sources */,
 				04BD03B512E6671800899322 /* SDL_surface.c in Sources */,
 				04BD03B712E6671800899322 /* SDL_video.c in Sources */,
-				04BD03B812E6671800899322 /* SDL_yuv_mmx.c in Sources */,
-				04BD03B912E6671800899322 /* SDL_yuv_sw.c in Sources */,
 				04BD03F312E6671800899322 /* imKStoUCS.c in Sources */,
 				04BD03F512E6671800899322 /* SDL_x11clipboard.c in Sources */,
 				04BD03F712E6671800899322 /* SDL_x11dyn.c in Sources */,
@@ -2570,6 +2568,8 @@
 				041B2CA912FA0D680087D585 /* SDL_renderer_gl.c in Sources */,
 				041B2CAB12FA0D680087D585 /* SDL_render.c in Sources */,
 				041B2CAD12FA0D680087D585 /* SDL_renderer_sw.c in Sources */,
+				04409B9612FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */,
+				04409B9812FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
--- a/include/SDL_pixels.h	Wed Feb 02 22:55:12 2011 -0800
+++ b/include/SDL_pixels.h	Thu Feb 03 00:19:40 2011 -0800
@@ -122,18 +122,26 @@
 #define SDL_PIXELORDER(X)	(((X) >> 20) & 0x0F)
 #define SDL_PIXELLAYOUT(X)	(((X) >> 16) & 0x0F)
 #define SDL_BITSPERPIXEL(X)	(((X) >> 8) & 0xFF)
-#define SDL_BYTESPERPIXEL(X)	(((X) >> 0) & 0xFF)
+#define SDL_BYTESPERPIXEL(X) \
+    (SDL_ISPIXELFORMAT_FOURCC(X) ? \
+        ((((X) == SDL_PIXELFORMAT_YV12) || \
+          ((X) == SDL_PIXELFORMAT_IYUV) || \
+          ((X) == SDL_PIXELFORMAT_YUY2) || \
+          ((X) == SDL_PIXELFORMAT_UYVY) || \
+          ((X) == SDL_PIXELFORMAT_YVYU)) ? 2 : 1) : (((X) >> 0) & 0xFF))
 
 #define SDL_ISPIXELFORMAT_INDEXED(format)   \
-    ((SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX1) || \
-     (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX4) || \
-     (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX8))
+    (!SDL_ISPIXELFORMAT_FOURCC(format) && \
+     ((SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX1) || \
+      (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX4) || \
+      (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX8)))
 
 #define SDL_ISPIXELFORMAT_ALPHA(format)   \
-    ((SDL_PIXELORDER(format) == SDL_PACKEDORDER_ARGB) || \
-     (SDL_PIXELORDER(format) == SDL_PACKEDORDER_RGBA) || \
-     (SDL_PIXELORDER(format) == SDL_PACKEDORDER_ABGR) || \
-     (SDL_PIXELORDER(format) == SDL_PACKEDORDER_BGRA))
+    (!SDL_ISPIXELFORMAT_FOURCC(format) && \
+     ((SDL_PIXELORDER(format) == SDL_PACKEDORDER_ARGB) || \
+      (SDL_PIXELORDER(format) == SDL_PACKEDORDER_RGBA) || \
+      (SDL_PIXELORDER(format) == SDL_PACKEDORDER_ABGR) || \
+      (SDL_PIXELORDER(format) == SDL_PACKEDORDER_BGRA)))
 
 #define SDL_ISPIXELFORMAT_FOURCC(format)    \
     ((format) && !((format) & 0x80000000))
--- a/include/SDL_rect.h	Wed Feb 02 22:55:12 2011 -0800
+++ b/include/SDL_rect.h	Thu Feb 03 00:19:40 2011 -0800
@@ -70,25 +70,6 @@
 } SDL_Rect;
 
 /**
- *  \brief A structure used to track dirty rectangles
- *  
- *  \sa SDL_AddDirtyRect
- *  \sa SDL_ClearDirtyRects
- *  \sa SDL_FreeDirtyRects
- */
-typedef struct SDL_DirtyRect
-{
-    SDL_Rect rect;
-    struct SDL_DirtyRect *next;
-} SDL_DirtyRect;
-
-typedef struct SDL_DirtyRectList
-{
-    SDL_DirtyRect *list;
-    SDL_DirtyRect *free;
-} SDL_DirtyRectList;
-
-/**
  *  \brief Returns true if the rectangle has no area.
  */
 #define SDL_RectEmpty(X)    (((X)->w <= 0) || ((X)->h <= 0))
@@ -143,22 +124,6 @@
                                                           int *Y1, int *X2,
                                                           int *Y2);
 
-/**
- *  \brief Add a rectangle to a dirty rectangle list
- */
-extern DECLSPEC void SDLCALL SDL_AddDirtyRect(SDL_DirtyRectList * list, const SDL_Rect * rect);
-
-/**
- *  \brief Remove all rectangles associated with a dirty rectangle list
- */
-extern DECLSPEC void SDLCALL SDL_ClearDirtyRects(SDL_DirtyRectList * list);
-
-/**
- *  \brief Free memory associated with a dirty rectangle list
- */
-extern DECLSPEC void SDLCALL SDL_FreeDirtyRects(SDL_DirtyRectList * list);
-
-
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
 /* *INDENT-OFF* */
--- a/include/SDL_render.h	Wed Feb 02 22:55:12 2011 -0800
+++ b/include/SDL_render.h	Thu Feb 03 00:19:40 2011 -0800
@@ -61,7 +61,7 @@
     const char *name;           /**< The name of the renderer */
     Uint32 flags;               /**< Supported ::SDL_RendererFlags */
     Uint32 num_texture_formats; /**< The number of available texture formats */
-    Uint32 texture_formats[50]; /**< The available texture formats */
+    Uint32 texture_formats[16]; /**< The available texture formats */
     int max_texture_width;      /**< The maximimum texture width */
     int max_texture_height;     /**< The maximimum texture height */
 } SDL_RendererInfo;
@@ -204,22 +204,6 @@
                                              int *w, int *h);
 
 /**
- *  \brief Query the pixels of a texture, if the texture does not need to be 
- *         locked for pixel access.
- *  
- *  \param texture A texture to be queried, which was created with 
- *                   ::SDL_TEXTUREACCESS_STREAMING.
- *  \param pixels    A pointer filled with a pointer to the pixels for the 
- *                   texture.
- *  \param pitch     A pointer filled in with the pitch of the pixel data.
- *  
- *  \return 0 on success, or -1 if the texture is not valid, or must be locked 
- *          for pixel access.
- */
-extern DECLSPEC int SDLCALL SDL_QueryTexturePixels(SDL_Texture * texture,
-                                                   void **pixels, int *pitch);
-
-/**
  *  \brief Set an additional color value used in render copy operations.
  *  
  *  \param texture The texture to update.
@@ -299,7 +283,7 @@
 /**
  *  \brief Get the blend mode used for texture copy operations.
  *  
- *  \param texture The texture to query.
+ *  \param texture   The texture to query.
  *  \param blendMode A pointer filled in with the current blend mode.
  *  
  *  \return 0 on success, or -1 if the texture is not valid.
@@ -312,7 +296,7 @@
 /**
  *  \brief Update the given texture rectangle with new pixel data.
  *  
- *  \param texture The texture to update
+ *  \param texture   The texture to update
  *  \param rect      A pointer to the rectangle of pixels to update, or NULL to 
  *                   update the entire texture.
  *  \param pixels    The raw pixel data.
@@ -329,49 +313,28 @@
 /**
  *  \brief Lock a portion of the texture for pixel access.
  *  
- *  \param texture The texture to lock for access, which was created with 
+ *  \param texture   The texture to lock for access, which was created with 
  *                   ::SDL_TEXTUREACCESS_STREAMING.
  *  \param rect      A pointer to the rectangle to lock for access. If the rect 
  *                   is NULL, the entire texture will be locked.
- *  \param markDirty If this is nonzero, the locked area will be marked dirty 
- *                   when the texture is unlocked.
  *  \param pixels    This is filled in with a pointer to the locked pixels, 
  *                   appropriately offset by the locked area.
  *  \param pitch     This is filled in with the pitch of the locked pixels.
  *  
- *  \return 0 on success, or -1 if the texture is not valid or was created with 
- *          ::SDL_TEXTUREACCESS_STATIC.
+ *  \return 0 on success, or -1 if the texture is not valid or was not created with ::SDL_TEXTUREACCESS_STREAMING.
  *  
- *  \sa SDL_DirtyTexture()
  *  \sa SDL_UnlockTexture()
  */
 extern DECLSPEC int SDLCALL SDL_LockTexture(SDL_Texture * texture,
                                             const SDL_Rect * rect,
-                                            int markDirty, void **pixels,
-                                            int *pitch);
-
-/**
- *  \brief Unlock a texture, uploading the changes to renderer memory, if needed.
- *  
- *  \sa SDL_LockTexture()
- *  \sa SDL_DirtyTexture()
- */
-extern DECLSPEC void SDLCALL SDL_UnlockTexture(SDL_Texture * texture);
+                                            void **pixels, int *pitch);
 
 /**
- *  \brief Mark the specified rectangles of the texture as dirty.
- *  
- *  \param texture The texture to mark dirty, which was created with 
- *                   ::SDL_TEXTUREACCESS_STREAMING.
- *  \param numrects  The number of rectangles pointed to by rects.
- *  \param rects     The pointer to an array of dirty rectangles.
+ *  \brief Unlock a texture, uploading the changes to video memory, if needed.
  *  
  *  \sa SDL_LockTexture()
- *  \sa SDL_UnlockTexture()
  */
-extern DECLSPEC void SDLCALL SDL_DirtyTexture(SDL_Texture * texture,
-                                              int numrects,
-                                              const SDL_Rect * rects);
+extern DECLSPEC void SDLCALL SDL_UnlockTexture(SDL_Texture * texture);
 
 /**
  *  \brief Set the color used for drawing operations (Fill and Line).
--- a/src/SDL_compat.c	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/SDL_compat.c	Thu Feb 03 00:19:40 2011 -0800
@@ -28,7 +28,6 @@
 
 #include "video/SDL_sysvideo.h"
 #include "video/SDL_pixels_c.h"
-#include "video/SDL_yuv_sw_c.h"
 
 static SDL_Window *SDL_VideoWindow = NULL;
 static SDL_Renderer *SDL_VideoRenderer = NULL;
@@ -344,13 +343,10 @@
 static SDL_Surface *
 CreateVideoSurface(SDL_Texture * texture)
 {
-    SDL_Surface *surface;
     Uint32 format;
     int w, h;
     int bpp;
     Uint32 Rmask, Gmask, Bmask, Amask;
-    void *pixels;
-    int pitch;
 
     if (SDL_QueryTexture(texture, &format, NULL, &w, &h) < 0) {
         return NULL;
@@ -362,15 +358,7 @@
         return NULL;
     }
 
-    if (SDL_QueryTexturePixels(texture, &pixels, &pitch) == 0) {
-        surface =
-            SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask, Gmask,
-                                     Bmask, Amask);
-    } else {
-        surface =
-            SDL_CreateRGBSurface(0, w, h, bpp, Rmask, Gmask, Bmask, Amask);
-    }
-    return surface;
+    return SDL_CreateRGBSurface(0, w, h, bpp, Rmask, Gmask, Bmask, Amask);
 }
 
 static void
@@ -412,8 +400,6 @@
     int w, h;
     Uint32 format;
     int access;
-    void *pixels;
-    int pitch;
 
     /* We can't resize something we don't have... */
     if (!SDL_VideoWindow) {
@@ -454,15 +440,10 @@
 
     SDL_VideoSurface->w = width;
     SDL_VideoSurface->h = height;
-    if (SDL_QueryTexturePixels(SDL_VideoTexture, &pixels, &pitch) == 0) {
-        SDL_VideoSurface->pixels = pixels;
-        SDL_VideoSurface->pitch = pitch;
-    } else {
-        SDL_CalculatePitch(SDL_VideoSurface);
-        SDL_VideoSurface->pixels =
-            SDL_realloc(SDL_VideoSurface->pixels,
-                        SDL_VideoSurface->h * SDL_VideoSurface->pitch);
-    }
+    SDL_CalculatePitch(SDL_VideoSurface);
+    SDL_VideoSurface->pixels =
+        SDL_realloc(SDL_VideoSurface->pixels,
+                    SDL_VideoSurface->h * SDL_VideoSurface->pitch);
     SDL_SetClipRect(SDL_VideoSurface, NULL);
     SDL_InvalidateMap(SDL_VideoSurface->map);
 
@@ -830,20 +811,15 @@
         screen = SDL_VideoSurface;
     }
     if (screen == SDL_VideoSurface) {
-        if (screen->flags & SDL_PREALLOC) {
-            /* The surface memory is maintained by the renderer */
-            SDL_DirtyTexture(SDL_VideoTexture, numrects, rects);
-        } else {
-            /* The surface memory needs to be copied to texture */
-            int pitch = screen->pitch;
-            int psize = screen->format->BytesPerPixel;
-            for (i = 0; i < numrects; ++i) {
-                const SDL_Rect *rect = &rects[i];
-                void *pixels =
-                    (Uint8 *) screen->pixels + rect->y * pitch +
-                    rect->x * psize;
-                SDL_UpdateTexture(SDL_VideoTexture, rect, pixels, pitch);
-            }
+        /* The surface memory needs to be copied to texture */
+        int pitch = screen->pitch;
+        int psize = screen->format->BytesPerPixel;
+        for (i = 0; i < numrects; ++i) {
+            const SDL_Rect *rect = &rects[i];
+            void *pixels =
+                (Uint8 *) screen->pixels + rect->y * pitch +
+                rect->x * psize;
+            SDL_UpdateTexture(SDL_VideoTexture, rect, pixels, pitch);
         }
         rect.x = 0;
         rect.y = 0;
@@ -1459,8 +1435,6 @@
     Uint16 pitches[3];
     Uint8 *planes[3];
 
-    SDL_SW_YUVTexture *sw;
-
     SDL_Texture *texture;
     Uint32 texture_format;
 };
@@ -1545,24 +1519,6 @@
     overlay->hwdata->texture =
         SDL_CreateTexture(SDL_VideoRenderer, texture_format,
                           SDL_TEXTUREACCESS_STREAMING, w, h);
-    if (overlay->hwdata->texture) {
-        overlay->hwdata->sw = NULL;
-    } else {
-        SDL_DisplayMode current_mode;
-
-        overlay->hwdata->sw = SDL_SW_CreateYUVTexture(texture_format, w, h);
-        if (!overlay->hwdata->sw) {
-            SDL_FreeYUVOverlay(overlay);
-            return NULL;
-        }
-
-        /* Create a supported RGB format texture for display */
-        SDL_GetCurrentDisplayMode(&current_mode);
-        texture_format = current_mode.format;
-        overlay->hwdata->texture =
-            SDL_CreateTexture(SDL_VideoRenderer, texture_format,
-                              SDL_TEXTUREACCESS_STREAMING, w, h);
-    }
     if (!overlay->hwdata->texture) {
         SDL_FreeYUVOverlay(overlay);
         return NULL;
@@ -1582,17 +1538,8 @@
         SDL_SetError("Passed a NULL overlay");
         return -1;
     }
-    if (overlay->hwdata->sw) {
-        if (SDL_SW_QueryYUVTexturePixels(overlay->hwdata->sw, &pixels, &pitch)
-            < 0) {
-            return -1;
-        }
-    } else {
-        if (SDL_LockTexture
-            (overlay->hwdata->texture, NULL, 1, &pixels, &pitch)
-            < 0) {
-            return -1;
-        }
+    if (SDL_LockTexture(overlay->hwdata->texture, NULL, &pixels, &pitch) < 0) {
+        return -1;
     }
     overlay->pixels[0] = (Uint8 *) pixels;
     overlay->pitches[0] = pitch;
@@ -1620,25 +1567,7 @@
     if (!overlay) {
         return;
     }
-    if (overlay->hwdata->sw) {
-        void *pixels;
-        int pitch;
-        if (SDL_LockTexture
-            (overlay->hwdata->texture, NULL, 1, &pixels, &pitch) == 0) {
-            SDL_Rect srcrect;
-
-            srcrect.x = 0;
-            srcrect.y = 0;
-            srcrect.w = overlay->w;
-            srcrect.h = overlay->h;
-            SDL_SW_CopyYUVToRGB(overlay->hwdata->sw, &srcrect,
-                                overlay->hwdata->texture_format,
-                                overlay->w, overlay->h, pixels, pitch);
-            SDL_UnlockTexture(overlay->hwdata->texture);
-        }
-    } else {
-        SDL_UnlockTexture(overlay->hwdata->texture);
-    }
+    SDL_UnlockTexture(overlay->hwdata->texture);
 }
 
 int
--- a/src/render/SDL_render.c	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/SDL_render.c	Thu Feb 03 00:19:40 2011 -0800
@@ -152,6 +152,34 @@
     return 0;
 }
 
+static SDL_bool
+IsSupportedFormat(SDL_Renderer * renderer, Uint32 format)
+{
+    Uint32 i;
+
+    for (i = 0; i < renderer->info.num_texture_formats; ++i) {
+        if (renderer->info.texture_formats[i] == format) {
+            return SDL_TRUE;
+        }
+    }
+    return SDL_FALSE;
+}
+
+static Uint32
+GetClosestSupportedFormat(SDL_Renderer * renderer, Uint32 format)
+{
+    Uint32 i;
+    SDL_bool hasAlpha = SDL_ISPIXELFORMAT_ALPHA(format);
+
+    /* We just want to match the first format that has the same channels */
+    for (i = 0; i < renderer->info.num_texture_formats; ++i) {
+        if (SDL_ISPIXELFORMAT_ALPHA(renderer->info.texture_formats[i]) == hasAlpha) {
+            return renderer->info.texture_formats[i];
+        }
+    }
+    return renderer->info.texture_formats[0];
+}
+
 SDL_Texture *
 SDL_CreateTexture(SDL_Renderer * renderer, Uint32 format, int access, int w, int h)
 {
@@ -159,14 +187,18 @@
 
     CHECK_RENDERER_MAGIC(renderer, NULL);
 
+    if (SDL_ISPIXELFORMAT_INDEXED(format)) {
+        SDL_SetError("Palettized textures are not supported");
+        return NULL;
+    }
     if (w <= 0 || h <= 0) {
         SDL_SetError("Texture dimensions can't be 0");
-        return 0;
+        return NULL;
     }
     texture = (SDL_Texture *) SDL_calloc(1, sizeof(*texture));
     if (!texture) {
         SDL_OutOfMemory();
-        return 0;
+        return NULL;
     }
     texture->magic = &texture_magic;
     texture->format = format;
@@ -184,9 +216,35 @@
     }
     renderer->textures = texture;
 
-    if (renderer->CreateTexture(renderer, texture) < 0) {
-        SDL_DestroyTexture(texture);
-        return 0;
+    if (IsSupportedFormat(renderer, format)) {
+        if (renderer->CreateTexture(renderer, texture) < 0) {
+            SDL_DestroyTexture(texture);
+            return 0;
+        }
+    } else {
+        texture->native = SDL_CreateTexture(renderer,
+                                GetClosestSupportedFormat(renderer, format),
+                                access, w, h);
+        if (!texture->native) {
+            SDL_DestroyTexture(texture);
+            return NULL;
+        }
+
+        if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
+            texture->yuv = SDL_SW_CreateYUVTexture(format, w, h);
+            if (!texture->yuv) {
+                SDL_DestroyTexture(texture);
+                return NULL;
+            }
+        } else if (access == SDL_TEXTUREACCESS_STREAMING) {
+            /* The pitch is 4 byte aligned */
+            texture->pitch = (((w * SDL_BYTESPERPIXEL(format)) + 3) & ~3);
+            texture->pixels = SDL_malloc(texture->pitch * h);
+            if (!texture->pixels) {
+                SDL_DestroyTexture(texture);
+                return NULL;
+            }
+        }
     }
     return texture;
 }
@@ -501,21 +559,6 @@
 }
 
 int
-SDL_QueryTexturePixels(SDL_Texture * texture, void **pixels, int *pitch)
-{
-    SDL_Renderer *renderer;
-
-    CHECK_TEXTURE_MAGIC(texture, -1);
-
-    renderer = texture->renderer;
-    if (!renderer->QueryTexturePixels) {
-        SDL_Unsupported();
-        return -1;
-    }
-    return renderer->QueryTexturePixels(renderer, texture, pixels, pitch);
-}
-
-int
 SDL_SetTextureColorMod(SDL_Texture * texture, Uint8 r, Uint8 g, Uint8 b)
 {
     SDL_Renderer *renderer;
@@ -531,7 +574,9 @@
     texture->r = r;
     texture->g = g;
     texture->b = b;
-    if (renderer->SetTextureColorMod) {
+    if (texture->native) {
+        return SDL_SetTextureColorMod(texture->native, r, g, b);
+    } else if (renderer->SetTextureColorMod) {
         return renderer->SetTextureColorMod(renderer, texture);
     } else {
         return 0;
@@ -573,7 +618,9 @@
         texture->modMode &= ~SDL_TEXTUREMODULATE_ALPHA;
     }
     texture->a = alpha;
-    if (renderer->SetTextureAlphaMod) {
+    if (texture->native) {
+        return SDL_SetTextureAlphaMod(texture->native, alpha);
+    } else if (renderer->SetTextureAlphaMod) {
         return renderer->SetTextureAlphaMod(renderer, texture);
     } else {
         return 0;
@@ -600,7 +647,9 @@
 
     renderer = texture->renderer;
     texture->blendMode = blendMode;
-    if (renderer->SetTextureBlendMode) {
+    if (texture->native) {
+        return SDL_SetTextureBlendMode(texture, blendMode);
+    } else if (renderer->SetTextureBlendMode) {
         return renderer->SetTextureBlendMode(renderer, texture);
     } else {
         return 0;
@@ -618,6 +667,91 @@
     return 0;
 }
 
+static int
+SDL_UpdateTextureYUV(SDL_Texture * texture, const SDL_Rect * rect,
+                     const void *pixels, int pitch)
+{
+    SDL_Texture *native = texture->native;
+    SDL_Rect full_rect;
+
+    if (SDL_SW_UpdateYUVTexture(texture->yuv, rect, pixels, pitch) < 0) {
+        return -1;
+    }
+
+    full_rect.x = 0;
+    full_rect.y = 0;
+    full_rect.w = texture->w;
+    full_rect.h = texture->h;
+    rect = &full_rect;
+
+    if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
+        /* We can lock the texture and copy to it */
+        void *native_pixels;
+        int native_pitch;
+
+        if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
+            return -1;
+        }
+        SDL_SW_CopyYUVToRGB(texture->yuv, rect, native->format,
+                            rect->w, rect->h, native_pixels, native_pitch);
+        SDL_UnlockTexture(native);
+    } else {
+        /* Use a temporary buffer for updating */
+        void *temp_pixels;
+        int temp_pitch;
+
+        temp_pitch = (((rect->w * SDL_BYTESPERPIXEL(native->format)) + 3) & ~3);
+        temp_pixels = SDL_malloc(rect->h * temp_pitch);
+        if (!temp_pixels) {
+            SDL_OutOfMemory();
+            return -1;
+        }
+        SDL_SW_CopyYUVToRGB(texture->yuv, rect, native->format,
+                            rect->w, rect->h, temp_pixels, temp_pitch);
+        SDL_UpdateTexture(native, rect, temp_pixels, temp_pitch);
+        SDL_free(temp_pixels);
+    }
+    return 0;
+}
+
+static int
+SDL_UpdateTextureNative(SDL_Texture * texture, const SDL_Rect * rect,
+                        const void *pixels, int pitch)
+{
+    SDL_Texture *native = texture->native;
+
+    if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
+        /* We can lock the texture and copy to it */
+        void *native_pixels;
+        int native_pitch;
+
+        if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
+            return -1;
+        }
+        SDL_ConvertPixels(rect->w, rect->h,
+                          texture->format, pixels, pitch,
+                          native->format, native_pixels, native_pitch);
+        SDL_UnlockTexture(native);
+    } else {
+        /* Use a temporary buffer for updating */
+        void *temp_pixels;
+        int temp_pitch;
+
+        temp_pitch = (((rect->w * SDL_BYTESPERPIXEL(native->format)) + 3) & ~3);
+        temp_pixels = SDL_malloc(rect->h * temp_pitch);
+        if (!temp_pixels) {
+            SDL_OutOfMemory();
+            return -1;
+        }
+        SDL_ConvertPixels(rect->w, rect->h,
+                          texture->format, pixels, pitch,
+                          native->format, temp_pixels, temp_pitch);
+        SDL_UpdateTexture(native, rect, temp_pixels, temp_pitch);
+        SDL_free(temp_pixels);
+    }
+    return 0;
+}
+
 int
 SDL_UpdateTexture(SDL_Texture * texture, const SDL_Rect * rect,
                   const void *pixels, int pitch)
@@ -627,11 +761,6 @@
 
     CHECK_TEXTURE_MAGIC(texture, -1);
 
-    renderer = texture->renderer;
-    if (!renderer->UpdateTexture) {
-        SDL_Unsupported();
-        return -1;
-    }
     if (!rect) {
         full_rect.x = 0;
         full_rect.y = 0;
@@ -639,11 +768,38 @@
         full_rect.h = texture->h;
         rect = &full_rect;
     }
-    return renderer->UpdateTexture(renderer, texture, rect, pixels, pitch);
+
+    if (texture->yuv) {
+        return SDL_UpdateTextureYUV(texture, rect, pixels, pitch);
+    } else if (texture->native) {
+        return SDL_UpdateTextureNative(texture, rect, pixels, pitch);
+    } else {
+        renderer = texture->renderer;
+        return renderer->UpdateTexture(renderer, texture, rect, pixels, pitch);
+    }
+}
+
+static int
+SDL_LockTextureYUV(SDL_Texture * texture, const SDL_Rect * rect,
+                   void **pixels, int *pitch)
+{
+    return SDL_SW_LockYUVTexture(texture->yuv, rect, pixels, pitch);
+}
+
+static int
+SDL_LockTextureNative(SDL_Texture * texture, const SDL_Rect * rect,
+                      void **pixels, int *pitch)
+{
+    texture->locked_rect = *rect;
+    *pixels = (void *) ((Uint8 *) texture->pixels +
+                        rect->y * texture->pitch +
+                        rect->x * SDL_BYTESPERPIXEL(texture->format));
+    *pitch = texture->pitch;
+    return 0;
 }
 
 int
-SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect, int markDirty,
+SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect,
                 void **pixels, int *pitch)
 {
     SDL_Renderer *renderer;
@@ -655,11 +811,7 @@
         SDL_SetError("SDL_LockTexture(): texture must be streaming");
         return -1;
     }
-    renderer = texture->renderer;
-    if (!renderer->LockTexture) {
-        SDL_Unsupported();
-        return -1;
-    }
+
     if (!rect) {
         full_rect.x = 0;
         full_rect.y = 0;
@@ -667,8 +819,57 @@
         full_rect.h = texture->h;
         rect = &full_rect;
     }
-    return renderer->LockTexture(renderer, texture, rect, markDirty, pixels,
-                                 pitch);
+
+    if (texture->yuv) {
+        return SDL_LockTextureYUV(texture, rect, pixels, pitch);
+    } else if (texture->native) {
+        return SDL_LockTextureNative(texture, rect, pixels, pitch);
+    } else {
+        renderer = texture->renderer;
+        return renderer->LockTexture(renderer, texture, rect, pixels, pitch);
+    }
+}
+
+static void
+SDL_UnlockTextureYUV(SDL_Texture * texture)
+{
+    SDL_Texture *native = texture->native;
+    void *native_pixels;
+    int native_pitch;
+    SDL_Rect rect;
+
+    rect.x = 0;
+    rect.y = 0;
+    rect.w = texture->w;
+    rect.h = texture->h;
+
+    if (SDL_LockTexture(native, &rect, &native_pixels, &native_pitch) < 0) {
+        return;
+    }
+    SDL_SW_CopyYUVToRGB(texture->yuv, &rect, native->format,
+                        rect.w, rect.h, native_pixels, native_pitch);
+    SDL_UnlockTexture(native);
+}
+
+void
+SDL_UnlockTextureNative(SDL_Texture * texture)
+{
+    SDL_Texture *native = texture->native;
+    void *native_pixels;
+    int native_pitch;
+    const SDL_Rect *rect = &texture->locked_rect;
+    const void* pixels = (void *) ((Uint8 *) texture->pixels +
+                        rect->y * texture->pitch +
+                        rect->x * SDL_BYTESPERPIXEL(texture->format));
+    int pitch = texture->pitch;
+
+    if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) {
+        return;
+    }
+    SDL_ConvertPixels(rect->w, rect->h,
+                      texture->format, pixels, pitch,
+                      native->format, native_pixels, native_pitch);
+    SDL_UnlockTexture(native);
 }
 
 void
@@ -681,29 +882,14 @@
     if (texture->access != SDL_TEXTUREACCESS_STREAMING) {
         return;
     }
-    renderer = texture->renderer;
-    if (!renderer->UnlockTexture) {
-        return;
+    if (texture->yuv) {
+        SDL_UnlockTextureYUV(texture);
+    } else if (texture->native) {
+        SDL_UnlockTextureNative(texture);
+    } else {
+        renderer = texture->renderer;
+        renderer->UnlockTexture(renderer, texture);
     }
-    renderer->UnlockTexture(renderer, texture);
-}
-
-void
-SDL_DirtyTexture(SDL_Texture * texture, int numrects,
-                 const SDL_Rect * rects)
-{
-    SDL_Renderer *renderer;
-
-    CHECK_TEXTURE_MAGIC(texture, );
-
-    if (texture->access != SDL_TEXTUREACCESS_STREAMING) {
-        return;
-    }
-    renderer = texture->renderer;
-    if (!renderer->DirtyTexture) {
-        return;
-    }
-    renderer->DirtyTexture(renderer, texture, numrects, rects);
 }
 
 int
@@ -979,6 +1165,10 @@
         }
     }
 
+    if (texture->native) {
+        texture = texture->native;
+    }
+
     return renderer->RenderCopy(renderer, texture, &real_srcrect,
                                 &real_dstrect);
 }
@@ -1087,6 +1277,16 @@
         renderer->textures = texture->next;
     }
 
+    if (texture->native) {
+        SDL_DestroyTexture(texture->native);
+    }
+    if (texture->yuv) {
+        SDL_SW_DestroyYUVTexture(texture->yuv);
+    }
+    if (texture->pixels) {
+        SDL_free(texture->pixels);
+    }
+
     renderer->DestroyTexture(renderer, texture);
     SDL_free(texture);
 }
--- a/src/render/SDL_sysrender.h	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/SDL_sysrender.h	Thu Feb 03 00:19:40 2011 -0800
@@ -26,6 +26,7 @@
 
 #include "SDL_render.h"
 #include "SDL_events.h"
+#include "SDL_yuv_sw_c.h"
 
 /* The SDL 2D rendering system */
 
@@ -45,6 +46,13 @@
 
     SDL_Renderer *renderer;
 
+    /* Support for formats not supported directly by the renderer */
+    SDL_Texture *native;
+    SDL_SW_YUVTexture *yuv;
+    void *pixels;
+    int pitch;
+    SDL_Rect locked_rect;
+
     void *driverdata;           /**< Driver specific texture representation */
 
     SDL_Texture *prev;
@@ -58,8 +66,6 @@
 
     void (*WindowEvent) (SDL_Renderer * renderer, const SDL_WindowEvent *event);
     int (*CreateTexture) (SDL_Renderer * renderer, SDL_Texture * texture);
-    int (*QueryTexturePixels) (SDL_Renderer * renderer, SDL_Texture * texture,
-                               void **pixels, int *pitch);
     int (*SetTextureColorMod) (SDL_Renderer * renderer,
                                SDL_Texture * texture);
     int (*SetTextureAlphaMod) (SDL_Renderer * renderer,
@@ -70,11 +76,8 @@
                           const SDL_Rect * rect, const void *pixels,
                           int pitch);
     int (*LockTexture) (SDL_Renderer * renderer, SDL_Texture * texture,
-                        const SDL_Rect * rect, int markDirty, void **pixels,
-                        int *pitch);
+                        const SDL_Rect * rect, void **pixels, int *pitch);
     void (*UnlockTexture) (SDL_Renderer * renderer, SDL_Texture * texture);
-    void (*DirtyTexture) (SDL_Renderer * renderer, SDL_Texture * texture,
-                          int numrects, const SDL_Rect * rects);
     int (*RenderClear) (SDL_Renderer * renderer);
     int (*RenderDrawPoints) (SDL_Renderer * renderer, const SDL_Point * points,
                              int count);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/render/SDL_yuv_mmx.c	Thu Feb 03 00:19:40 2011 -0800
@@ -0,0 +1,432 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2010 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+
+#include "SDL_stdinc.h"
+
+#include "mmx.h"
+
+/* *INDENT-OFF* */
+
+static mmx_t MMX_0080w    = { .ud = {0x00800080, 0x00800080} };
+static mmx_t MMX_00FFw    = { .ud = {0x00ff00ff, 0x00ff00ff} };
+static mmx_t MMX_FF00w    = { .ud = {0xff00ff00, 0xff00ff00} };
+
+static mmx_t MMX_Ycoeff   = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
+
+static mmx_t MMX_UbluRGB  = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
+static mmx_t MMX_VredRGB  = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
+static mmx_t MMX_UgrnRGB  = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
+static mmx_t MMX_VgrnRGB  = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
+
+static mmx_t MMX_Ublu5x5  = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
+static mmx_t MMX_Vred5x5  = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
+static mmx_t MMX_Ugrn565  = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
+static mmx_t MMX_Vgrn565  = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
+
+static mmx_t MMX_red565   = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
+static mmx_t MMX_grn565   = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
+
+/**
+   This MMX assembler is my first assembler/MMX program ever.
+   Thus it maybe buggy.
+   Send patches to:
+   mvogt@rhrk.uni-kl.de
+
+   After it worked fine I have "obfuscated" the code a bit to have
+   more parallism in the MMX units. This means I moved
+   initilisation around and delayed other instruction.
+   Performance measurement did not show that this brought any advantage
+   but in theory it _should_ be faster this way.
+
+   The overall performanve gain to the C based dither was 30%-40%.
+   The MMX routine calculates 256bit=8RGB values in each cycle
+   (4 for row1 & 4 for row2)
+
+   The red/green/blue.. coefficents are taken from the mpeg_play 
+   player. They look nice, but I dont know if you can have
+   better values, to avoid integer rounding errors.
+   
+
+   IMPORTANT:
+   ==========
+
+   It is a requirement that the cr/cb/lum are 8 byte aligned and
+   the out are 16byte aligned or you will/may get segfaults
+
+*/
+
+void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
+                              unsigned char *lum, unsigned char *cr,
+                              unsigned char *cb, unsigned char *out,
+                              int rows, int cols, int mod )
+{
+    Uint32 *row1;
+    Uint32 *row2;
+
+    unsigned char* y = lum +cols*rows;    // Pointer to the end
+    int x = 0;
+    row1 = (Uint32 *)out;                 // 32 bit target
+    row2 = (Uint32 *)out+cols+mod;        // start of second row
+    mod = (mod+cols+mod)*4;               // increment for row1 in byte
+
+    __asm__ __volatile__ (
+        // tap dance to workaround the inability to use %%ebx at will...
+        //  move one thing to the stack...
+        "pushl $0\n"  // save a slot on the stack.
+        "pushl %%ebx\n"  // save %%ebx.
+        "movl %0, %%ebx\n"  // put the thing in ebx.
+        "movl %%ebx,4(%%esp)\n"  // put the thing in the stack slot.
+        "popl %%ebx\n"  // get back %%ebx (the PIC register).
+
+        ".align 8\n"
+        "1:\n"
+
+        // create Cr (result in mm1)
+        "pushl %%ebx\n"
+        "movl 4(%%esp),%%ebx\n"
+        "movd (%%ebx),%%mm1\n"   //         0  0  0  0  v3 v2 v1 v0
+        "popl %%ebx\n"
+        "pxor %%mm7,%%mm7\n"      //         00 00 00 00 00 00 00 00
+        "movd (%2), %%mm2\n"           //    0  0  0  0 l3 l2 l1 l0
+        "punpcklbw %%mm7,%%mm1\n" //         0  v3 0  v2 00 v1 00 v0
+        "punpckldq %%mm1,%%mm1\n" //         00 v1 00 v0 00 v1 00 v0
+        "psubw %9,%%mm1\n"        // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
+
+        // create Cr_g (result in mm0)
+        "movq %%mm1,%%mm0\n"           // r1 r1 r0 r0 r1 r1 r0 r0
+        "pmullw %10,%%mm0\n"           // red*-46dec=0.7136*64
+        "pmullw %11,%%mm1\n"           // red*89dec=1.4013*64
+        "psraw  $6, %%mm0\n"           // red=red/64
+        "psraw  $6, %%mm1\n"           // red=red/64
+
+        // create L1 L2 (result in mm2,mm4)
+        // L2=lum+cols
+        "movq (%2,%4),%%mm3\n"         //    0  0  0  0 L3 L2 L1 L0
+        "punpckldq %%mm3,%%mm2\n"      //   L3 L2 L1 L0 l3 l2 l1 l0
+        "movq %%mm2,%%mm4\n"           //   L3 L2 L1 L0 l3 l2 l1 l0
+        "pand %12,%%mm2\n"             //   L3 0  L1  0 l3  0 l1  0
+        "pand %13,%%mm4\n"             //   0  L2  0 L0  0 l2  0 l0
+        "psrlw $8,%%mm2\n"             //   0  L3  0 L1  0 l3  0 l1
+
+        // create R (result in mm6)
+        "movq %%mm2,%%mm5\n"           //   0 L3  0 L1  0 l3  0 l1
+        "movq %%mm4,%%mm6\n"           //   0 L2  0 L0  0 l2  0 l0
+        "paddsw  %%mm1, %%mm5\n"       // lum1+red:x R3 x R1 x r3 x r1
+        "paddsw  %%mm1, %%mm6\n"       // lum1+red:x R2 x R0 x r2 x r0
+        "packuswb %%mm5,%%mm5\n"       //  R3 R1 r3 r1 R3 R1 r3 r1
+        "packuswb %%mm6,%%mm6\n"       //  R2 R0 r2 r0 R2 R0 r2 r0
+        "pxor %%mm7,%%mm7\n"      //         00 00 00 00 00 00 00 00
+        "punpcklbw %%mm5,%%mm6\n"      //  R3 R2 R1 R0 r3 r2 r1 r0
+
+        // create Cb (result in mm1)
+        "movd (%1), %%mm1\n"      //         0  0  0  0  u3 u2 u1 u0
+        "punpcklbw %%mm7,%%mm1\n" //         0  u3 0  u2 00 u1 00 u0
+        "punpckldq %%mm1,%%mm1\n" //         00 u1 00 u0 00 u1 00 u0
+        "psubw %9,%%mm1\n"        // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
+
+        // create Cb_g (result in mm5)
+        "movq %%mm1,%%mm5\n"            // u1 u1 u0 u0 u1 u1 u0 u0
+        "pmullw %14,%%mm5\n"            // blue*-109dec=1.7129*64
+        "pmullw %15,%%mm1\n"            // blue*114dec=1.78125*64
+        "psraw  $6, %%mm5\n"            // blue=red/64
+        "psraw  $6, %%mm1\n"            // blue=blue/64
+
+        // create G (result in mm7)
+        "movq %%mm2,%%mm3\n"      //   0  L3  0 L1  0 l3  0 l1
+        "movq %%mm4,%%mm7\n"      //   0  L2  0 L0  0 l2  0 l1
+        "paddsw  %%mm5, %%mm3\n"  // lum1+Cb_g:x G3t x G1t x g3t x g1t
+        "paddsw  %%mm5, %%mm7\n"  // lum1+Cb_g:x G2t x G0t x g2t x g0t
+        "paddsw  %%mm0, %%mm3\n"  // lum1+Cr_g:x G3  x G1  x g3  x g1
+        "paddsw  %%mm0, %%mm7\n"  // lum1+blue:x G2  x G0  x g2  x g0
+        "packuswb %%mm3,%%mm3\n"  // G3 G1 g3 g1 G3 G1 g3 g1
+        "packuswb %%mm7,%%mm7\n"  // G2 G0 g2 g0 G2 G0 g2 g0
+        "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0
+
+        // create B (result in mm5)
+        "movq %%mm2,%%mm3\n"         //   0  L3  0 L1  0 l3  0 l1
+        "movq %%mm4,%%mm5\n"         //   0  L2  0 L0  0 l2  0 l1
+        "paddsw  %%mm1, %%mm3\n"     // lum1+blue:x B3 x B1 x b3 x b1
+        "paddsw  %%mm1, %%mm5\n"     // lum1+blue:x B2 x B0 x b2 x b0
+        "packuswb %%mm3,%%mm3\n"     // B3 B1 b3 b1 B3 B1 b3 b1
+        "packuswb %%mm5,%%mm5\n"     // B2 B0 b2 b0 B2 B0 b2 b0
+        "punpcklbw %%mm3,%%mm5\n"    // B3 B2 B1 B0 b3 b2 b1 b0
+
+        // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
+
+        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
+        "pxor %%mm4,%%mm4\n"           //  0  0  0  0  0  0  0  0
+        "movq %%mm6,%%mm1\n"           // R3 R2 R1 R0 r3 r2 r1 r0
+        "movq %%mm5,%%mm3\n"           // B3 B2 B1 B0 b3 b2 b1 b0
+
+        // process lower lum
+        "punpcklbw %%mm4,%%mm1\n"      //  0 r3  0 r2  0 r1  0 r0
+        "punpcklbw %%mm4,%%mm3\n"      //  0 b3  0 b2  0 b1  0 b0
+        "movq %%mm1,%%mm2\n"           //  0 r3  0 r2  0 r1  0 r0
+        "movq %%mm3,%%mm0\n"           //  0 b3  0 b2  0 b1  0 b0
+        "punpcklwd %%mm1,%%mm3\n"      //  0 r1  0 b1  0 r0  0 b0
+        "punpckhwd %%mm2,%%mm0\n"      //  0 r3  0 b3  0 r2  0 b2
+
+        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
+        "movq %%mm7,%%mm1\n"           // G3 G2 G1 G0 g3 g2 g1 g0
+        "punpcklbw %%mm1,%%mm2\n"      // g3  0 g2  0 g1  0 g0  0
+        "punpcklwd %%mm4,%%mm2\n"      //  0  0 g1  0  0  0 g0  0
+        "por %%mm3, %%mm2\n"          //  0 r1 g1 b1  0 r0 g0 b0
+        "movq %%mm2,(%3)\n"          // wrote out ! row1
+
+        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
+        "punpcklbw %%mm1,%%mm4\n"      // g3  0 g2  0 g1  0 g0  0
+        "punpckhwd %%mm2,%%mm4\n"      //  0  0 g3  0  0  0 g2  0
+        "por %%mm0, %%mm4\n"          //  0 r3 g3 b3  0 r2 g2 b2
+        "movq %%mm4,8(%3)\n"         // wrote out ! row1
+
+        // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
+        // this can be done "destructive"
+        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
+        "punpckhbw %%mm2,%%mm6\n"      //  0 R3  0 R2  0 R1  0 R0
+        "punpckhbw %%mm1,%%mm5\n"      // G3 B3 G2 B2 G1 B1 G0 B0
+        "movq %%mm5,%%mm1\n"           // G3 B3 G2 B2 G1 B1 G0 B0
+        "punpcklwd %%mm6,%%mm1\n"      //  0 R1 G1 B1  0 R0 G0 B0
+        "movq %%mm1,(%5)\n"          // wrote out ! row2
+        "punpckhwd %%mm6,%%mm5\n"      //  0 R3 G3 B3  0 R2 G2 B2
+        "movq %%mm5,8(%5)\n"         // wrote out ! row2
+
+        "addl $4,%2\n"            // lum+4
+        "leal 16(%3),%3\n"        // row1+16
+        "leal 16(%5),%5\n"        // row2+16
+        "addl $2,(%%esp)\n"        // cr+2
+        "addl $2,%1\n"           // cb+2
+
+        "addl $4,%6\n"            // x+4
+        "cmpl %4,%6\n"
+
+        "jl 1b\n"
+        "addl %4,%2\n" // lum += cols
+        "addl %8,%3\n" // row1+= mod
+        "addl %8,%5\n" // row2+= mod
+        "movl $0,%6\n" // x=0
+        "cmpl %7,%2\n"
+        "jl 1b\n"
+
+        "addl $4,%%esp\n"  // get rid of the stack slot we reserved.
+        "emms\n"  // reset MMX registers.
+        :
+        : "m" (cr), "r"(cb),"r"(lum),
+          "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
+          "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
+          "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
+          "m"(MMX_UbluRGB)
+    );
+}
+
+void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
+                             unsigned char *lum, unsigned char *cr,
+                             unsigned char *cb, unsigned char *out,
+                             int rows, int cols, int mod )
+{
+    Uint16 *row1;
+    Uint16 *row2;
+
+    unsigned char* y = lum +cols*rows;    /* Pointer to the end */
+    int x = 0;
+    row1 = (Uint16 *)out;                 /* 16 bit target */
+    row2 = (Uint16 *)out+cols+mod;        /* start of second row  */
+    mod = (mod+cols+mod)*2;               /* increment for row1 in byte */
+
+    __asm__ __volatile__(
+        // tap dance to workaround the inability to use %%ebx at will...
+        //  move one thing to the stack...
+        "pushl $0\n"  // save a slot on the stack.
+        "pushl %%ebx\n"  // save %%ebx.
+        "movl %0, %%ebx\n"  // put the thing in ebx.
+        "movl %%ebx, 4(%%esp)\n"  // put the thing in the stack slot.
+        "popl %%ebx\n"  // get back %%ebx (the PIC register).
+
+        ".align 8\n"
+        "1:\n"
+
+        "movd           (%1),                   %%mm0\n" // 4 Cb         0  0  0  0 u3 u2 u1 u0
+        "pxor           %%mm7,                  %%mm7\n"
+        "pushl %%ebx\n"
+        "movl 4(%%esp), %%ebx\n"
+        "movd (%%ebx), %%mm1\n"   // 4 Cr                0  0  0  0 v3 v2 v1 v0
+        "popl %%ebx\n"
+
+        "punpcklbw      %%mm7,                  %%mm0\n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
+        "punpcklbw      %%mm7,                  %%mm1\n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
+        "psubw          %9,                     %%mm0\n"
+        "psubw          %9,                     %%mm1\n"
+        "movq           %%mm0,                  %%mm2\n" // Cb                   0 u3  0 u2  0 u1  0 u0
+        "movq           %%mm1,                  %%mm3\n" // Cr
+        "pmullw         %10,                    %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
+        "movq           (%2),                   %%mm6\n" // L1      l7 L6 L5 L4 L3 L2 L1 L0
+        "pmullw         %11,                    %%mm0\n" // Cb2blue
+        "pand           %12,                    %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
+        "pmullw         %13,                    %%mm3\n" // Cr2green
+        "movq           (%2),                   %%mm7\n" // L2
+        "pmullw         %14,                    %%mm1\n" // Cr2red
+        "psrlw          $8,                     %%mm7\n"        // L2           00 L7 00 L5 00 L3 00 L1
+        "pmullw         %15,                    %%mm6\n" // lum1
+        "paddw          %%mm3,                  %%mm2\n" // Cb2green + Cr2green == green
+        "pmullw         %15,                    %%mm7\n" // lum2
+
+        "movq           %%mm6,                  %%mm4\n" // lum1
+        "paddw          %%mm0,                  %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
+        "movq           %%mm4,                  %%mm5\n" // lum1
+        "paddw          %%mm1,                  %%mm4\n" // lum1 +red  00 R6 00 R4 00 R2 00 R0
+        "paddw          %%mm2,                  %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0
+        "psraw          $6,                     %%mm4\n" // R1 0 .. 64
+        "movq           %%mm7,                  %%mm3\n" // lum2                       00 L7 00 L5 00 L3 00 L1
+        "psraw          $6,                     %%mm5\n" // G1  - .. +
+        "paddw          %%mm0,                  %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
+        "psraw          $6,                     %%mm6\n" // B1         0 .. 64
+        "packuswb       %%mm4,                  %%mm4\n" // R1 R1
+        "packuswb       %%mm5,                  %%mm5\n" // G1 G1
+        "packuswb       %%mm6,                  %%mm6\n" // B1 B1
+        "punpcklbw      %%mm4,                  %%mm4\n"
+        "punpcklbw      %%mm5,                  %%mm5\n"
+
+        "pand           %16,                    %%mm4\n"
+        "psllw          $3,                     %%mm5\n" // GREEN       1
+        "punpcklbw      %%mm6,                  %%mm6\n"
+        "pand           %17,                    %%mm5\n"
+        "pand           %16,                    %%mm6\n"
+        "por            %%mm5,                  %%mm4\n" //
+        "psrlw          $11,                    %%mm6\n" // BLUE        1
+        "movq           %%mm3,                  %%mm5\n" // lum2
+        "paddw          %%mm1,                  %%mm3\n" // lum2 +red      00 R7 00 R5 00 R3 00 R1
+        "paddw          %%mm2,                  %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
+        "psraw          $6,                     %%mm3\n" // R2
+        "por            %%mm6,                  %%mm4\n" // MM4
+        "psraw          $6,                     %%mm5\n" // G2
+        "movq           (%2, %4),               %%mm6\n" // L3 load lum2
+        "psraw          $6,                     %%mm7\n"
+        "packuswb       %%mm3,                  %%mm3\n"
+        "packuswb       %%mm5,                  %%mm5\n"
+        "packuswb       %%mm7,                  %%mm7\n"
+        "pand           %12,                    %%mm6\n" // L3
+        "punpcklbw      %%mm3,                  %%mm3\n"
+        "punpcklbw      %%mm5,                  %%mm5\n"
+        "pmullw         %15,                    %%mm6\n" // lum3
+        "punpcklbw      %%mm7,                  %%mm7\n"
+        "psllw          $3,                     %%mm5\n" // GREEN 2
+        "pand           %16,                    %%mm7\n"
+        "pand           %16,                    %%mm3\n"
+        "psrlw          $11,                    %%mm7\n" // BLUE  2
+        "pand           %17,                    %%mm5\n"
+        "por            %%mm7,                  %%mm3\n"
+        "movq           (%2,%4),                %%mm7\n" // L4 load lum2
+        "por            %%mm5,                  %%mm3\n" //
+        "psrlw          $8,                     %%mm7\n" // L4
+        "movq           %%mm4,                  %%mm5\n"
+        "punpcklwd      %%mm3,                  %%mm4\n"
+        "pmullw         %15,                    %%mm7\n" // lum4
+        "punpckhwd      %%mm3,                  %%mm5\n"
+
+        "movq           %%mm4,                  (%3)\n"  // write row1
+        "movq           %%mm5,                  8(%3)\n" // write row1
+
+        "movq           %%mm6,                  %%mm4\n" // Lum3
+        "paddw          %%mm0,                  %%mm6\n" // Lum3 +blue
+
+        "movq           %%mm4,                  %%mm5\n" // Lum3
+        "paddw          %%mm1,                  %%mm4\n" // Lum3 +red
+        "paddw          %%mm2,                  %%mm5\n" // Lum3 +green
+        "psraw          $6,                     %%mm4\n"
+        "movq           %%mm7,                  %%mm3\n" // Lum4
+        "psraw          $6,                     %%mm5\n"
+        "paddw          %%mm0,                  %%mm7\n" // Lum4 +blue
+        "psraw          $6,                     %%mm6\n" // Lum3 +blue
+        "movq           %%mm3,                  %%mm0\n" // Lum4
+        "packuswb       %%mm4,                  %%mm4\n"
+        "paddw          %%mm1,                  %%mm3\n" // Lum4 +red
+        "packuswb       %%mm5,                  %%mm5\n"
+        "paddw          %%mm2,                  %%mm0\n" // Lum4 +green
+        "packuswb       %%mm6,                  %%mm6\n"
+        "punpcklbw      %%mm4,                  %%mm4\n"
+        "punpcklbw      %%mm5,                  %%mm5\n"
+        "punpcklbw      %%mm6,                  %%mm6\n"
+        "psllw          $3,                     %%mm5\n" // GREEN 3
+        "pand           %16,                    %%mm4\n"
+        "psraw          $6,                     %%mm3\n" // psr 6
+        "psraw          $6,                     %%mm0\n"
+        "pand           %16,                    %%mm6\n" // BLUE
+        "pand           %17,                    %%mm5\n"
+        "psrlw          $11,                    %%mm6\n" // BLUE  3
+        "por            %%mm5,                  %%mm4\n"
+        "psraw          $6,                     %%mm7\n"
+        "por            %%mm6,                  %%mm4\n"
+        "packuswb       %%mm3,                  %%mm3\n"
+        "packuswb       %%mm0,                  %%mm0\n"
+        "packuswb       %%mm7,                  %%mm7\n"
+        "punpcklbw      %%mm3,                  %%mm3\n"
+        "punpcklbw      %%mm0,                  %%mm0\n"
+        "punpcklbw      %%mm7,                  %%mm7\n"
+        "pand           %16,                    %%mm3\n"
+        "pand           %16,                    %%mm7\n" // BLUE
+        "psllw          $3,                     %%mm0\n" // GREEN 4
+        "psrlw          $11,                    %%mm7\n"
+        "pand           %17,                    %%mm0\n"
+        "por            %%mm7,                  %%mm3\n"
+        "por            %%mm0,                  %%mm3\n"
+
+        "movq           %%mm4,                  %%mm5\n"
+
+        "punpcklwd      %%mm3,                  %%mm4\n"
+        "punpckhwd      %%mm3,                  %%mm5\n"
+
+        "movq           %%mm4,                  (%5)\n"
+        "movq           %%mm5,                  8(%5)\n"
+
+        "addl           $8,                     %6\n"
+        "addl           $8,                     %2\n"
+        "addl           $4,                     (%%esp)\n"
+        "addl           $4,                     %1\n"
+        "cmpl           %4,                     %6\n"
+        "leal           16(%3),                 %3\n"
+        "leal           16(%5),%5\n" // row2+16
+
+        "jl             1b\n"
+        "addl           %4,     %2\n" // lum += cols
+        "addl           %8,     %3\n" // row1+= mod
+        "addl           %8,     %5\n" // row2+= mod
+        "movl           $0,     %6\n" // x=0
+        "cmpl           %7,     %2\n"
+        "jl             1b\n"
+        "addl $4, %%esp\n"  // get rid of the stack slot we reserved.
+        "emms\n"
+        :
+        : "m" (cr), "r"(cb),"r"(lum),
+          "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
+          "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
+          "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),
+          "m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565)
+    );
+}
+
+/* *INDENT-ON* */
+
+#endif /* GCC3 i386 inline assembly */
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/render/SDL_yuv_sw.c	Thu Feb 03 00:19:40 2011 -0800
@@ -0,0 +1,1322 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2010 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+/* This is the software implementation of the YUV texture support */
+
+/* This code was derived from code carrying the following copyright notices:
+
+ * Copyright (c) 1995 The Regents of the University of California.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement is
+ * hereby granted, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ * 
+ * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
+ * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
+ * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+ * Copyright (c) 1995 Erik Corry
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement is
+ * hereby granted, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ * 
+ * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+ * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
+ * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
+ * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+
+ * Portions of this software Copyright (c) 1995 Brown University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement
+ * is hereby granted, provided that the above copyright notice and the
+ * following two paragraphs appear in all copies of this software.
+ * 
+ * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
+ * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
+ * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
+ * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "SDL_video.h"
+#include "SDL_cpuinfo.h"
+#include "SDL_yuv_sw_c.h"
+
+
+/* The colorspace conversion functions */
+
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod);
+extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod);
+#endif
+
+static void
+Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned short *row1;
+    unsigned short *row2;
+    unsigned char *lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    row1 = (unsigned short *) out;
+    row2 = row1 + cols + mod;
+    lum2 = lum + cols;
+
+    mod += cols + mod;
+
+    y = rows / 2;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            ++cr;
+            ++cb;
+
+            L = *lum++;
+            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+                                        rgb_2_pix[L + crb_g] |
+                                        rgb_2_pix[L + cb_b]);
+
+            L = *lum++;
+            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+                                        rgb_2_pix[L + crb_g] |
+                                        rgb_2_pix[L + cb_b]);
+
+
+            /* Now, do second row.  */
+
+            L = *lum2++;
+            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+                                        rgb_2_pix[L + crb_g] |
+                                        rgb_2_pix[L + cb_b]);
+
+            L = *lum2++;
+            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+                                        rgb_2_pix[L + crb_g] |
+                                        rgb_2_pix[L + cb_b]);
+        }
+
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+
+static void
+Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int value;
+    unsigned char *row1;
+    unsigned char *row2;
+    unsigned char *lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    row1 = out;
+    row2 = row1 + cols * 3 + mod * 3;
+    lum2 = lum + cols;
+
+    mod += cols + mod;
+    mod *= 3;
+
+    y = rows / 2;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            ++cr;
+            ++cb;
+
+            L = *lum++;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            *row1++ = (value) & 0xFF;
+            *row1++ = (value >> 8) & 0xFF;
+            *row1++ = (value >> 16) & 0xFF;
+
+            L = *lum++;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            *row1++ = (value) & 0xFF;
+            *row1++ = (value >> 8) & 0xFF;
+            *row1++ = (value >> 16) & 0xFF;
+
+
+            /* Now, do second row.  */
+
+            L = *lum2++;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            *row2++ = (value) & 0xFF;
+            *row2++ = (value >> 8) & 0xFF;
+            *row2++ = (value >> 16) & 0xFF;
+
+            L = *lum2++;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            *row2++ = (value) & 0xFF;
+            *row2++ = (value >> 8) & 0xFF;
+            *row2++ = (value >> 16) & 0xFF;
+        }
+
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+
+static void
+Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int *row1;
+    unsigned int *row2;
+    unsigned char *lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    row1 = (unsigned int *) out;
+    row2 = row1 + cols + mod;
+    lum2 = lum + cols;
+
+    mod += cols + mod;
+
+    y = rows / 2;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            ++cr;
+            ++cb;
+
+            L = *lum++;
+            *row1++ = (rgb_2_pix[L + cr_r] |
+                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+            L = *lum++;
+            *row1++ = (rgb_2_pix[L + cr_r] |
+                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+
+            /* Now, do second row.  */
+
+            L = *lum2++;
+            *row2++ = (rgb_2_pix[L + cr_r] |
+                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+            L = *lum2++;
+            *row2++ = (rgb_2_pix[L + cr_r] |
+                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+        }
+
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+
+/*
+ * In this function I make use of a nasty trick. The tables have the lower
+ * 16 bits replicated in the upper 16. This means I can write ints and get
+ * the horisontal doubling for free (almost).
+ */
+static void
+Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int *row1 = (unsigned int *) out;
+    const int next_row = cols + (mod / 2);
+    unsigned int *row2 = row1 + 2 * next_row;
+    unsigned char *lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    lum2 = lum + cols;
+
+    mod = (next_row * 3) + (mod / 2);
+
+    y = rows / 2;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            ++cr;
+            ++cb;
+
+            L = *lum++;
+            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
+                                        rgb_2_pix[L + crb_g] |
+                                        rgb_2_pix[L + cb_b]);
+            row1++;
+
+            L = *lum++;
+            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
+                                        rgb_2_pix[L + crb_g] |
+                                        rgb_2_pix[L + cb_b]);
+            row1++;
+
+
+            /* Now, do second row. */
+
+            L = *lum2++;
+            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
+                                        rgb_2_pix[L + crb_g] |
+                                        rgb_2_pix[L + cb_b]);
+            row2++;
+
+            L = *lum2++;
+            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
+                                        rgb_2_pix[L + crb_g] |
+                                        rgb_2_pix[L + cb_b]);
+            row2++;
+        }
+
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+
+static void
+Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int value;
+    unsigned char *row1 = out;
+    const int next_row = (cols * 2 + mod) * 3;
+    unsigned char *row2 = row1 + 2 * next_row;
+    unsigned char *lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    lum2 = lum + cols;
+
+    mod = next_row * 3 + mod * 3;
+
+    y = rows / 2;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            ++cr;
+            ++cb;
+
+            L = *lum++;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
+                row1[next_row + 3 + 0] = (value) & 0xFF;
+            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
+                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
+            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
+                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
+            row1 += 2 * 3;
+
+            L = *lum++;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
+                row1[next_row + 3 + 0] = (value) & 0xFF;
+            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
+                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
+            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
+                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
+            row1 += 2 * 3;
+
+
+            /* Now, do second row. */
+
+            L = *lum2++;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
+                row2[next_row + 3 + 0] = (value) & 0xFF;
+            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
+                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
+            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
+                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
+            row2 += 2 * 3;
+
+            L = *lum2++;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
+                row2[next_row + 3 + 0] = (value) & 0xFF;
+            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
+                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
+            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
+                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
+            row2 += 2 * 3;
+        }
+
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+
+static void
+Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int *row1 = (unsigned int *) out;
+    const int next_row = cols * 2 + mod;
+    unsigned int *row2 = row1 + 2 * next_row;
+    unsigned char *lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    lum2 = lum + cols;
+
+    mod = (next_row * 3) + mod;
+
+    y = rows / 2;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            ++cr;
+            ++cb;
+
+            L = *lum++;
+            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
+                (rgb_2_pix[L + cr_r] |
+                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row1 += 2;
+
+            L = *lum++;
+            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
+                (rgb_2_pix[L + cr_r] |
+                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row1 += 2;
+
+
+            /* Now, do second row. */
+
+            L = *lum2++;
+            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
+                (rgb_2_pix[L + cr_r] |
+                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row2 += 2;
+
+            L = *lum2++;
+            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
+                (rgb_2_pix[L + cr_r] |
+                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row2 += 2;
+        }
+
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+
+static void
+Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned short *row;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    row = (unsigned short *) out;
+
+    y = rows;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            cr += 4;
+            cb += 4;
+
+            L = *lum;
+            lum += 2;
+            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+                                       rgb_2_pix[L + crb_g] |
+                                       rgb_2_pix[L + cb_b]);
+
+            L = *lum;
+            lum += 2;
+            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
+                                       rgb_2_pix[L + crb_g] |
+                                       rgb_2_pix[L + cb_b]);
+
+        }
+
+        row += mod;
+    }
+}
+
+static void
+Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int value;
+    unsigned char *row;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    row = (unsigned char *) out;
+    mod *= 3;
+    y = rows;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            cr += 4;
+            cb += 4;
+
+            L = *lum;
+            lum += 2;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            *row++ = (value) & 0xFF;
+            *row++ = (value >> 8) & 0xFF;
+            *row++ = (value >> 16) & 0xFF;
+
+            L = *lum;
+            lum += 2;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            *row++ = (value) & 0xFF;
+            *row++ = (value >> 8) & 0xFF;
+            *row++ = (value >> 16) & 0xFF;
+
+        }
+        row += mod;
+    }
+}
+
+static void
+Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int *row;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    row = (unsigned int *) out;
+    y = rows;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            cr += 4;
+            cb += 4;
+
+            L = *lum;
+            lum += 2;
+            *row++ = (rgb_2_pix[L + cr_r] |
+                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+            L = *lum;
+            lum += 2;
+            *row++ = (rgb_2_pix[L + cr_r] |
+                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+
+
+        }
+        row += mod;
+    }
+}
+
+/*
+ * In this function I make use of a nasty trick. The tables have the lower
+ * 16 bits replicated in the upper 16. This means I can write ints and get
+ * the horisontal doubling for free (almost).
+ */
+static void
+Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int *row = (unsigned int *) out;
+    const int next_row = cols + (mod / 2);
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+
+    y = rows;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            cr += 4;
+            cb += 4;
+
+            L = *lum;
+            lum += 2;
+            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
+                                      rgb_2_pix[L + crb_g] |
+                                      rgb_2_pix[L + cb_b]);
+            row++;
+
+            L = *lum;
+            lum += 2;
+            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
+                                      rgb_2_pix[L + crb_g] |
+                                      rgb_2_pix[L + cb_b]);
+            row++;
+
+        }
+        row += next_row;
+    }
+}
+
+static void
+Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int value;
+    unsigned char *row = out;
+    const int next_row = (cols * 2 + mod) * 3;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    y = rows;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            cr += 4;
+            cb += 4;
+
+            L = *lum;
+            lum += 2;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
+                row[next_row + 3 + 0] = (value) & 0xFF;
+            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
+                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
+            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
+                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
+            row += 2 * 3;
+
+            L = *lum;
+            lum += 2;
+            value = (rgb_2_pix[L + cr_r] |
+                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
+                row[next_row + 3 + 0] = (value) & 0xFF;
+            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
+                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
+            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
+                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
+            row += 2 * 3;
+
+        }
+        row += next_row;
+    }
+}
+
+static void
+Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod)
+{
+    unsigned int *row = (unsigned int *) out;
+    const int next_row = cols * 2 + mod;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    mod += mod;
+    y = rows;
+    while (y--) {
+        x = cols_2;
+        while (x--) {
+            register int L;
+
+            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
+            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
+                + colortab[*cb + 2 * 256];
+            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
+            cr += 4;
+            cb += 4;
+
+            L = *lum;
+            lum += 2;
+            row[0] = row[1] = row[next_row] = row[next_row + 1] =
+                (rgb_2_pix[L + cr_r] |
+                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row += 2;
+
+            L = *lum;
+            lum += 2;
+            row[0] = row[1] = row[next_row] = row[next_row + 1] =
+                (rgb_2_pix[L + cr_r] |
+                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
+            row += 2;
+
+
+        }
+
+        row += next_row;
+    }
+}
+
+/*
+ * How many 1 bits are there in the Uint32.
+ * Low performance, do not call often.
+ */
+static int
+number_of_bits_set(Uint32 a)
+{
+    if (!a)
+        return 0;
+    if (a & 1)
+        return 1 + number_of_bits_set(a >> 1);
+    return (number_of_bits_set(a >> 1));
+}
+
+/*
+ * How many 0 bits are there at least significant end of Uint32.
+ * Low performance, do not call often.
+ */
+static int
+free_bits_at_bottom(Uint32 a)
+{
+    /* assume char is 8 bits */
+    if (!a)
+        return sizeof(Uint32) * 8;
+    if (((Sint32) a) & 1l)
+        return 0;
+    return 1 + free_bits_at_bottom(a >> 1);
+}
+
+static int
+SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
+{
+    Uint32 *r_2_pix_alloc;
+    Uint32 *g_2_pix_alloc;
+    Uint32 *b_2_pix_alloc;
+    int i;
+    int bpp;
+    Uint32 Rmask, Gmask, Bmask, Amask;
+
+    if (!SDL_PixelFormatEnumToMasks
+        (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
+        SDL_SetError("Unsupported YUV destination format");
+        return -1;
+    }
+
+    swdata->target_format = target_format;
+    r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
+    g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
+    b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
+
+    /* 
+     * Set up entries 0-255 in rgb-to-pixel value tables.
+     */
+    for (i = 0; i < 256; ++i) {
+        r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
+        r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
+        r_2_pix_alloc[i + 256] |= Amask;
+        g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
+        g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
+        g_2_pix_alloc[i + 256] |= Amask;
+        b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
+        b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
+        b_2_pix_alloc[i + 256] |= Amask;
+    }
+
+    /*
+     * If we have 16-bit output depth, then we double the value
+     * in the top word. This means that we can write out both
+     * pixels in the pixel doubling mode with one op. It is 
+     * harmless in the normal case as storing a 32-bit value
+     * through a short pointer will lose the top bits anyway.
+     */
+    if (SDL_BYTESPERPIXEL(target_format) == 2) {
+        for (i = 0; i < 256; ++i) {
+            r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
+            g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
+            b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
+        }
+    }
+
+    /*
+     * Spread out the values we have to the rest of the array so that
+     * we do not need to check for overflow.
+     */
+    for (i = 0; i < 256; ++i) {
+        r_2_pix_alloc[i] = r_2_pix_alloc[256];
+        r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
+        g_2_pix_alloc[i] = g_2_pix_alloc[256];
+        g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
+        b_2_pix_alloc[i] = b_2_pix_alloc[256];
+        b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
+    }
+
+    /* You have chosen wisely... */
+    switch (swdata->format) {
+    case SDL_PIXELFORMAT_YV12:
+    case SDL_PIXELFORMAT_IYUV:
+        if (SDL_BYTESPERPIXEL(target_format) == 2) {
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+            /* inline assembly functions */
+            if (SDL_HasMMX() && (Rmask == 0xF800) &&
+                (Gmask == 0x07E0) && (Bmask == 0x001F)
+                && (swdata->w & 15) == 0) {
+/*printf("Using MMX 16-bit 565 dither\n");*/
+                swdata->Display1X = Color565DitherYV12MMX1X;
+            } else {
+/*printf("Using C 16-bit dither\n");*/
+                swdata->Display1X = Color16DitherYV12Mod1X;
+            }
+#else
+            swdata->Display1X = Color16DitherYV12Mod1X;
+#endif
+            swdata->Display2X = Color16DitherYV12Mod2X;
+        }
+        if (SDL_BYTESPERPIXEL(target_format) == 3) {
+            swdata->Display1X = Color24DitherYV12Mod1X;
+            swdata->Display2X = Color24DitherYV12Mod2X;
+        }
+        if (SDL_BYTESPERPIXEL(target_format) == 4) {
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+            /* inline assembly functions */
+            if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
+                (Gmask == 0x0000FF00) &&
+                (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
+/*printf("Using MMX 32-bit dither\n");*/
+                swdata->Display1X = ColorRGBDitherYV12MMX1X;
+            } else {
+/*printf("Using C 32-bit dither\n");*/
+                swdata->Display1X = Color32DitherYV12Mod1X;
+            }
+#else
+            swdata->Display1X = Color32DitherYV12Mod1X;
+#endif
+            swdata->Display2X = Color32DitherYV12Mod2X;
+        }
+        break;
+    case SDL_PIXELFORMAT_YUY2:
+    case SDL_PIXELFORMAT_UYVY:
+    case SDL_PIXELFORMAT_YVYU:
+        if (SDL_BYTESPERPIXEL(target_format) == 2) {
+            swdata->Display1X = Color16DitherYUY2Mod1X;
+            swdata->Display2X = Color16DitherYUY2Mod2X;
+        }
+        if (SDL_BYTESPERPIXEL(target_format) == 3) {
+            swdata->Display1X = Color24DitherYUY2Mod1X;
+            swdata->Display2X = Color24DitherYUY2Mod2X;
+        }
+        if (SDL_BYTESPERPIXEL(target_format) == 4) {
+            swdata->Display1X = Color32DitherYUY2Mod1X;
+            swdata->Display2X = Color32DitherYUY2Mod2X;
+        }
+        break;
+    default:
+        /* We should never get here (caught above) */
+        break;
+    }
+
+    if (swdata->display) {
+        SDL_FreeSurface(swdata->display);
+        swdata->display = NULL;
+    }
+    return 0;
+}
+
+SDL_SW_YUVTexture *
+SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
+{
+    SDL_SW_YUVTexture *swdata;
+    int *Cr_r_tab;
+    int *Cr_g_tab;
+    int *Cb_g_tab;
+    int *Cb_b_tab;
+    int i;
+    int CR, CB;
+
+    swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
+    if (!swdata) {
+        SDL_OutOfMemory();
+        return NULL;
+    }
+
+    switch (format) {
+    case SDL_PIXELFORMAT_YV12:
+    case SDL_PIXELFORMAT_IYUV:
+    case SDL_PIXELFORMAT_YUY2:
+    case SDL_PIXELFORMAT_UYVY:
+    case SDL_PIXELFORMAT_YVYU:
+        break;
+    default:
+        SDL_SetError("Unsupported YUV format");
+        return NULL;
+    }
+
+    swdata->format = format;
+    swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
+    swdata->w = w;
+    swdata->h = h;
+    swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
+    swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
+    swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
+    if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
+        SDL_OutOfMemory();
+        SDL_SW_DestroyYUVTexture(swdata);
+        return NULL;
+    }
+
+    /* Generate the tables for the display surface */
+    Cr_r_tab = &swdata->colortab[0 * 256];
+    Cr_g_tab = &swdata->colortab[1 * 256];
+    Cb_g_tab = &swdata->colortab[2 * 256];
+    Cb_b_tab = &swdata->colortab[3 * 256];
+    for (i = 0; i < 256; i++) {
+        /* Gamma correction (luminescence table) and chroma correction
+           would be done here.  See the Berkeley mpeg_play sources.
+         */
+        CB = CR = (i - 128);
+        Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
+        Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
+        Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
+        Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
+    }
+
+    /* Find the pitch and offset values for the overlay */
+    switch (format) {
+    case SDL_PIXELFORMAT_YV12:
+    case SDL_PIXELFORMAT_IYUV:
+        swdata->pitches[0] = w;
+        swdata->pitches[1] = swdata->pitches[0] / 2;
+        swdata->pitches[2] = swdata->pitches[0] / 2;
+        swdata->planes[0] = swdata->pixels;
+        swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
+        swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
+        break;
+    case SDL_PIXELFORMAT_YUY2:
+    case SDL_PIXELFORMAT_UYVY:
+    case SDL_PIXELFORMAT_YVYU:
+        swdata->pitches[0] = w * 2;
+        swdata->planes[0] = swdata->pixels;
+        break;
+    default:
+        /* We should never get here (caught above) */
+        break;
+    }
+
+    /* We're all done.. */
+    return (swdata);
+}
+
+int
+SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
+                             int *pitch)
+{
+    *pixels = swdata->planes[0];
+    *pitch = swdata->pitches[0];
+    return 0;
+}
+
+int
+SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
+                        const void *pixels, int pitch)
+{
+    switch (swdata->format) {
+    case SDL_PIXELFORMAT_YV12:
+    case SDL_PIXELFORMAT_IYUV:
+        if (rect
+            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
+                || rect->h != swdata->h)) {
+            SDL_SetError
+                ("YV12 and IYUV textures only support full surface updates");
+            return -1;
+        }
+        SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2);
+        break;
+    case SDL_PIXELFORMAT_YUY2:
+    case SDL_PIXELFORMAT_UYVY:
+    case SDL_PIXELFORMAT_YVYU:
+        {
+            Uint8 *src, *dst;
+            int row;
+            size_t length;
+
+            src = (Uint8 *) pixels;
+            dst =
+                swdata->planes[0] + rect->y * swdata->pitches[0] +
+                rect->x * 2;
+            length = rect->w * 2;
+            for (row = 0; row < rect->h; ++row) {
+                SDL_memcpy(dst, src, length);
+                src += pitch;
+                dst += swdata->pitches[0];
+            }
+        }
+        break;
+    }
+    return 0;
+}
+
+int
+SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
+                      void **pixels, int *pitch)
+{
+    switch (swdata->format) {
+    case SDL_PIXELFORMAT_YV12:
+    case SDL_PIXELFORMAT_IYUV:
+        if (rect
+            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
+                || rect->h != swdata->h)) {
+            SDL_SetError
+                ("YV12 and IYUV textures only support full surface locks");
+            return -1;
+        }
+        break;
+    }
+
+    *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
+    *pitch = swdata->pitches[0];
+    return 0;
+}
+
+void
+SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
+{
+}
+
+int
+SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
+                    Uint32 target_format, int w, int h, void *pixels,
+                    int pitch)
+{
+    int stretch;
+    int scale_2x;
+    Uint8 *lum, *Cr, *Cb;
+    int mod;
+
+    /* Make sure we're set up to display in the desired format */
+    if (target_format != swdata->target_format) {
+        if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
+            return -1;
+        }
+    }
+
+    stretch = 0;
+    scale_2x = 0;
+    if (srcrect->x || srcrect->y || srcrect->w < swdata->w
+        || srcrect->h < swdata->h) {
+        /* The source rectangle has been clipped.
+           Using a scratch surface is easier than adding clipped
+           source support to all the blitters, plus that would
+           slow them down in the general unclipped case.
+         */
+        stretch = 1;
+    } else if ((srcrect->w != w) || (srcrect->h != h)) {
+        if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
+            scale_2x = 1;
+        } else {
+            stretch = 1;
+        }
+    }
+    if (stretch) {
+        int bpp;
+        Uint32 Rmask, Gmask, Bmask, Amask;
+
+        if (swdata->display) {
+            swdata->display->w = w;
+            swdata->display->h = h;
+            swdata->display->pixels = pixels;
+            swdata->display->pitch = pitch;
+        } else {
+            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
+            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
+                                       &Bmask, &Amask);
+            swdata->display =
+                SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
+                                         Gmask, Bmask, Amask);
+            if (!swdata->display) {
+                return (-1);
+            }
+        }
+        if (!swdata->stretch) {
+            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
+            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
+                                       &Bmask, &Amask);
+            swdata->stretch =
+                SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
+                                     Gmask, Bmask, Amask);
+            if (!swdata->stretch) {
+                return (-1);
+            }
+        }
+        pixels = swdata->stretch->pixels;
+        pitch = swdata->stretch->pitch;
+    }
+    switch (swdata->format) {
+    case SDL_PIXELFORMAT_YV12:
+        lum = swdata->planes[0];
+        Cr = swdata->planes[1];
+        Cb = swdata->planes[2];
+        break;
+    case SDL_PIXELFORMAT_IYUV:
+        lum = swdata->planes[0];
+        Cr = swdata->planes[2];
+        Cb = swdata->planes[1];
+        break;
+    case SDL_PIXELFORMAT_YUY2:
+        lum = swdata->planes[0];
+        Cr = lum + 3;
+        Cb = lum + 1;
+        break;
+    case SDL_PIXELFORMAT_UYVY:
+        lum = swdata->planes[0] + 1;
+        Cr = lum + 1;
+        Cb = lum - 1;
+        break;
+    case SDL_PIXELFORMAT_YVYU:
+        lum = swdata->planes[0];
+        Cr = lum + 1;
+        Cb = lum + 3;
+        break;
+    default:
+        SDL_SetError("Unsupported YUV format in copy");
+        return (-1);
+    }
+    mod = (pitch / SDL_BYTESPERPIXEL(target_format));
+
+    if (scale_2x) {
+        mod -= (swdata->w * 2);
+        swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
+                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
+    } else {
+        mod -= swdata->w;
+        swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
+                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
+    }
+    if (stretch) {
+        SDL_Rect rect = *srcrect;
+        SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
+    }
+    return 0;
+}
+
+void
+SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
+{
+    if (swdata) {
+        if (swdata->pixels) {
+            SDL_free(swdata->pixels);
+        }
+        if (swdata->colortab) {
+            SDL_free(swdata->colortab);
+        }
+        if (swdata->rgb_2_pix) {
+            SDL_free(swdata->rgb_2_pix);
+        }
+        if (swdata->stretch) {
+            SDL_FreeSurface(swdata->stretch);
+        }
+        if (swdata->display) {
+            SDL_FreeSurface(swdata->display);
+        }
+        SDL_free(swdata);
+    }
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/render/SDL_yuv_sw_c.h	Thu Feb 03 00:19:40 2011 -0800
@@ -0,0 +1,69 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2010 Sam Lantinga
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+
+#include "SDL_video.h"
+
+/* This is the software implementation of the YUV texture support */
+
+struct SDL_SW_YUVTexture
+{
+    Uint32 format;
+    Uint32 target_format;
+    int w, h;
+    Uint8 *pixels;
+    int *colortab;
+    Uint32 *rgb_2_pix;
+    void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod);
+    void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
+                       unsigned char *lum, unsigned char *cr,
+                       unsigned char *cb, unsigned char *out,
+                       int rows, int cols, int mod);
+
+    /* These are just so we don't have to allocate them separately */
+    Uint16 pitches[3];
+    Uint8 *planes[3];
+
+    /* This is a temporary surface in case we have to stretch copy */
+    SDL_Surface *stretch;
+    SDL_Surface *display;
+};
+
+typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture;
+
+SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h);
+int SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
+                                 int *pitch);
+int SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
+                            const void *pixels, int pitch);
+int SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
+                          void **pixels, int *pitch);
+void SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata);
+int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
+                        Uint32 target_format, int w, int h, void *pixels,
+                        int pitch);
+void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata);
+
+/* vi: set ts=4 sw=4 expandtab: */
--- a/src/render/direct3d/SDL_d3drender.c	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/direct3d/SDL_d3drender.c	Thu Feb 03 00:19:40 2011 -0800
@@ -28,7 +28,6 @@
 #include "SDL_loadso.h"
 #include "SDL_syswm.h"
 #include "../SDL_sysrender.h"
-#include "../../video/SDL_yuv_sw_c.h"
 
 #if SDL_VIDEO_RENDER_D3D
 #define D3D_DEBUG_INFO
@@ -89,7 +88,8 @@
 
 /* Direct3D renderer implementation */
 
-#if 1                           /* This takes more memory but you won't lose your texture data */
+#if 1
+/* This takes more memory but you won't lose your texture data */
 #define D3DPOOL_SDL    D3DPOOL_MANAGED
 #define SDL_MEMORY_POOL_MANAGED
 #else
@@ -99,18 +99,12 @@
 
 static SDL_Renderer *D3D_CreateRenderer(SDL_Window * window, Uint32 flags);
 static int D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int D3D_QueryTexturePixels(SDL_Renderer * renderer,
-                                  SDL_Texture * texture, void **pixels,
-                                  int *pitch);
 static int D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                              const SDL_Rect * rect, const void *pixels,
                              int pitch);
 static int D3D_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                           const SDL_Rect * rect, int markDirty,
-                           void **pixels, int *pitch);
+                           const SDL_Rect * rect, void **pixels, int *pitch);
 static void D3D_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static void D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                             int numrects, const SDL_Rect * rects);
 static int D3D_RenderDrawPoints(SDL_Renderer * renderer,
                                 const SDL_Point * points, int count);
 static int D3D_RenderDrawLines(SDL_Renderer * renderer,
@@ -134,8 +128,8 @@
     {
      "d3d",
      (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
-     0,
-     {0},
+     1,
+     {SDL_PIXELFORMAT_ARGB8888},
      0,
      0}
 };
@@ -152,7 +146,6 @@
 
 typedef struct
 {
-    SDL_SW_YUVTexture *yuv;
     Uint32 format;
     IDirect3DTexture9 *texture;
 } D3D_TextureData;
@@ -248,113 +241,30 @@
 PixelFormatToD3DFMT(Uint32 format)
 {
     switch (format) {
-    case SDL_PIXELFORMAT_INDEX8:
-        return D3DFMT_P8;
-    case SDL_PIXELFORMAT_RGB332:
-        return D3DFMT_R3G3B2;
-    case SDL_PIXELFORMAT_RGB444:
-        return D3DFMT_X4R4G4B4;
-    case SDL_PIXELFORMAT_RGB555:
-        return D3DFMT_X1R5G5B5;
-    case SDL_PIXELFORMAT_ARGB4444:
-        return D3DFMT_A4R4G4B4;
-    case SDL_PIXELFORMAT_ARGB1555:
-        return D3DFMT_A1R5G5B5;
     case SDL_PIXELFORMAT_RGB565:
         return D3DFMT_R5G6B5;
     case SDL_PIXELFORMAT_RGB888:
         return D3DFMT_X8R8G8B8;
     case SDL_PIXELFORMAT_ARGB8888:
         return D3DFMT_A8R8G8B8;
-    case SDL_PIXELFORMAT_ARGB2101010:
-        return D3DFMT_A2R10G10B10;
-    case SDL_PIXELFORMAT_YV12:
-        return MAKEFOURCC('Y','V','1','2');
-    case SDL_PIXELFORMAT_IYUV:
-        return MAKEFOURCC('I','4','2','0');
-    case SDL_PIXELFORMAT_UYVY:
-        return D3DFMT_UYVY;
-    case SDL_PIXELFORMAT_YUY2:
-        return D3DFMT_YUY2;
     default:
         return D3DFMT_UNKNOWN;
     }
 }
 
-static SDL_bool
-D3D_IsTextureFormatAvailable(IDirect3D9 * d3d, UINT adapter,
-                             D3DFORMAT display_format,
-                             D3DFORMAT texture_format)
+static Uint32
+D3DFMTToPixelFormat(D3DFORMAT format)
 {
-    HRESULT result;
-
-    result = IDirect3D9_CheckDeviceFormat(d3d, adapter,
-                                          D3DDEVTYPE_HAL,
-                                          display_format,
-                                          0,
-                                          D3DRTYPE_TEXTURE,
-                                          texture_format);
-    return FAILED(result) ? SDL_FALSE : SDL_TRUE;
-}
-
-static void
-UpdateYUVTextureData(SDL_Texture * texture)
-{
-    D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
-    SDL_Rect rect;
-    RECT d3drect;
-    D3DLOCKED_RECT locked;
-    HRESULT result;
-
-    d3drect.left = 0;
-    d3drect.right = texture->w;
-    d3drect.top = 0;
-    d3drect.bottom = texture->h;
-
-    result =
-        IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
-    if (FAILED(result)) {
-        return;
+    switch (format) {
+    case D3DFMT_R5G6B5:
+        return SDL_PIXELFORMAT_RGB565;
+    case D3DFMT_X8R8G8B8:
+        return SDL_PIXELFORMAT_RGB888;
+    case D3DFMT_A8R8G8B8:
+        return SDL_PIXELFORMAT_ARGB8888;
+    default:
+        return SDL_PIXELFORMAT_UNKNOWN;
     }
-
-    rect.x = 0;
-    rect.y = 0;
-    rect.w = texture->w;
-    rect.h = texture->h;
-    SDL_SW_CopyYUVToRGB(data->yuv, &rect, data->format, texture->w,
-                        texture->h, locked.pBits, locked.Pitch);
-
-    IDirect3DTexture9_UnlockRect(data->texture, 0);
-}
-
-static void
-D3D_AddTextureFormats(D3D_RenderData *data, SDL_RendererInfo *info)
-{
-    int i;
-    int formats[] = {
-        SDL_PIXELFORMAT_RGB332,
-        SDL_PIXELFORMAT_RGB444,
-        SDL_PIXELFORMAT_RGB555,
-        SDL_PIXELFORMAT_ARGB4444,
-        SDL_PIXELFORMAT_ARGB1555,
-        SDL_PIXELFORMAT_RGB565,
-        SDL_PIXELFORMAT_RGB888,
-        SDL_PIXELFORMAT_ARGB8888,
-        SDL_PIXELFORMAT_ARGB2101010,
-    };
-
-    info->num_texture_formats = 0;
-    for (i = 0; i < SDL_arraysize(formats); ++i) {
-        if (D3D_IsTextureFormatAvailable
-            (data->d3d, data->adapter, data->pparams.BackBufferFormat, PixelFormatToD3DFMT(formats[i]))) {
-            info->texture_formats[info->num_texture_formats++] = formats[i];
-        }
-    }
-    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YV12;
-    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_IYUV;
-    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YUY2;
-    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_UYVY;
-    info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YVYU;
 }
 
 SDL_Renderer *
@@ -367,6 +277,9 @@
     D3DPRESENT_PARAMETERS pparams;
     IDirect3DSwapChain9 *chain;
     D3DCAPS9 caps;
+    Uint32 window_flags;
+    int w, h;
+    SDL_DisplayMode fullscreen_mode;
 
     renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
     if (!renderer) {
@@ -404,11 +317,9 @@
     }
 
     renderer->CreateTexture = D3D_CreateTexture;
-    renderer->QueryTexturePixels = D3D_QueryTexturePixels;
     renderer->UpdateTexture = D3D_UpdateTexture;
     renderer->LockTexture = D3D_LockTexture;
     renderer->UnlockTexture = D3D_UnlockTexture;
-    renderer->DirtyTexture = D3D_DirtyTexture;
     renderer->RenderDrawPoints = D3D_RenderDrawPoints;
     renderer->RenderDrawLines = D3D_RenderDrawLines;
     renderer->RenderFillRects = D3D_RenderFillRects;
@@ -427,23 +338,27 @@
     SDL_VERSION(&windowinfo.version);
     SDL_GetWindowWMInfo(window, &windowinfo);
 
+    window_flags = SDL_GetWindowFlags(window);
+    SDL_GetWindowSize(window, &w, &h);
+    SDL_GetWindowDisplayMode(window, &fullscreen_mode);
+
     SDL_zero(pparams);
     pparams.hDeviceWindow = windowinfo.info.win.window;
-    pparams.BackBufferWidth = window->w;
-    pparams.BackBufferHeight = window->h;
-    if (window->flags & SDL_WINDOW_FULLSCREEN) {
+    pparams.BackBufferWidth = w;
+    pparams.BackBufferHeight = h;
+    if (window_flags & SDL_WINDOW_FULLSCREEN) {
         pparams.BackBufferFormat =
-            PixelFormatToD3DFMT(window->fullscreen_mode.format);
+            PixelFormatToD3DFMT(fullscreen_mode.format);
     } else {
         pparams.BackBufferFormat = D3DFMT_UNKNOWN;
     }
     pparams.BackBufferCount = 1;
     pparams.SwapEffect = D3DSWAPEFFECT_DISCARD;
 
-    if (window->flags & SDL_WINDOW_FULLSCREEN) {
+    if (window_flags & SDL_WINDOW_FULLSCREEN) {
         pparams.Windowed = FALSE;
         pparams.FullScreen_RefreshRateInHz =
-            window->fullscreen_mode.refresh_rate;
+            fullscreen_mode.refresh_rate;
     } else {
         pparams.Windowed = TRUE;
         pparams.FullScreen_RefreshRateInHz = 0;
@@ -494,8 +409,6 @@
     }
     data->pparams = pparams;
 
-    D3D_AddTextureFormats(data, &renderer->info);
-
     IDirect3DDevice9_GetDeviceCaps(data->device, &caps);
     renderer->info.max_texture_width = caps.MaxTextureWidth;
     renderer->info.max_texture_height = caps.MaxTextureHeight;
@@ -594,22 +507,7 @@
 
     texture->driverdata = data;
 
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format) &&
-        (texture->format != SDL_PIXELFORMAT_YUY2 ||
-         !D3D_IsTextureFormatAvailable(renderdata->d3d, renderdata->adapter,
-                                       display_format, PixelFormatToD3DFMT(texture->format)))
-        && (texture->format != SDL_PIXELFORMAT_YVYU
-            || !D3D_IsTextureFormatAvailable(renderdata->d3d, renderdata->adapter,
-                                             display_format, PixelFormatToD3DFMT(texture->format)))) {
-        data->yuv =
-            SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
-        if (!data->yuv) {
-            return -1;
-        }
-        data->format = SDL_GetWindowPixelFormat(window);
-    } else {
-        data->format = texture->format;
-    }
+    data->format = texture->format;
 
     result =
         IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
@@ -625,153 +523,118 @@
 }
 
 static int
-D3D_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
-                       void **pixels, int *pitch)
-{
-    D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
-
-    if (data->yuv) {
-        return SDL_SW_QueryYUVTexturePixels(data->yuv, pixels, pitch);
-    } else {
-        /* D3D textures don't have their pixels hanging out */
-        return -1;
-    }
-}
-
-static int
 D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                   const SDL_Rect * rect, const void *pixels, int pitch)
 {
     D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
     D3D_RenderData *renderdata = (D3D_RenderData *) renderer->driverdata;
 
-    if (data->yuv) {
-        if (SDL_SW_UpdateYUVTexture(data->yuv, rect, pixels, pitch) < 0) {
-            return -1;
-        }
-        UpdateYUVTextureData(texture);
-        return 0;
-    } else {
 #ifdef SDL_MEMORY_POOL_DEFAULT
-        IDirect3DTexture9 *temp;
-        RECT d3drect;
-        D3DLOCKED_RECT locked;
-        const Uint8 *src;
-        Uint8 *dst;
-        int row, length;
-        HRESULT result;
+    IDirect3DTexture9 *temp;
+    RECT d3drect;
+    D3DLOCKED_RECT locked;
+    const Uint8 *src;
+    Uint8 *dst;
+    int row, length;
+    HRESULT result;
 
-        result =
-            IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
-                                           texture->h, 1, 0,
-                                           PixelFormatToD3DFMT(texture->
-                                                               format),
-                                           D3DPOOL_SYSTEMMEM, &temp, NULL);
-        if (FAILED(result)) {
-            D3D_SetError("CreateTexture()", result);
-            return -1;
-        }
+    result =
+        IDirect3DDevice9_CreateTexture(renderdata->device, texture->w,
+                                       texture->h, 1, 0,
+                                       PixelFormatToD3DFMT(texture-> format),
+                                       D3DPOOL_SYSTEMMEM, &temp, NULL);
+    if (FAILED(result)) {
+        D3D_SetError("CreateTexture()", result);
+        return -1;
+    }
 
-        d3drect.left = rect->x;
-        d3drect.right = rect->x + rect->w;
-        d3drect.top = rect->y;
-        d3drect.bottom = rect->y + rect->h;
+    d3drect.left = rect->x;
+    d3drect.right = rect->x + rect->w;
+    d3drect.top = rect->y;
+    d3drect.bottom = rect->y + rect->h;
 
-        result = IDirect3DTexture9_LockRect(temp, 0, &locked, &d3drect, 0);
-        if (FAILED(result)) {
-            IDirect3DTexture9_Release(temp);
-            D3D_SetError("LockRect()", result);
-            return -1;
-        }
+    result = IDirect3DTexture9_LockRect(temp, 0, &locked, &d3drect, 0);
+    if (FAILED(result)) {
+        IDirect3DTexture9_Release(temp);
+        D3D_SetError("LockRect()", result);
+        return -1;
+    }
 
-        src = pixels;
-        dst = locked.pBits;
-        length = rect->w * SDL_BYTESPERPIXEL(texture->format);
-        for (row = 0; row < rect->h; ++row) {
-            SDL_memcpy(dst, src, length);
-            src += pitch;
-            dst += locked.Pitch;
-        }
-        IDirect3DTexture9_UnlockRect(temp, 0);
+    src = pixels;
+    dst = locked.pBits;
+    length = rect->w * SDL_BYTESPERPIXEL(texture->format);
+    for (row = 0; row < rect->h; ++row) {
+        SDL_memcpy(dst, src, length);
+        src += pitch;
+        dst += locked.Pitch;
+    }
+    IDirect3DTexture9_UnlockRect(temp, 0);
 
-        result =
-            IDirect3DDevice9_UpdateTexture(renderdata->device,
-                                           (IDirect3DBaseTexture9 *) temp,
-                                           (IDirect3DBaseTexture9 *)
-                                           data->texture);
-        IDirect3DTexture9_Release(temp);
-        if (FAILED(result)) {
-            D3D_SetError("UpdateTexture()", result);
-            return -1;
-        }
+    result =
+        IDirect3DDevice9_UpdateTexture(renderdata->device,
+                                       (IDirect3DBaseTexture9 *) temp,
+                                       (IDirect3DBaseTexture9 *)
+                                       data->texture);
+    IDirect3DTexture9_Release(temp);
+    if (FAILED(result)) {
+        D3D_SetError("UpdateTexture()", result);
+        return -1;
+    }
 #else
-        RECT d3drect;
-        D3DLOCKED_RECT locked;
-        const Uint8 *src;
-        Uint8 *dst;
-        int row, length;
-        HRESULT result;
+    RECT d3drect;
+    D3DLOCKED_RECT locked;
+    const Uint8 *src;
+    Uint8 *dst;
+    int row, length;
+    HRESULT result;
 
-        d3drect.left = rect->x;
-        d3drect.right = rect->x + rect->w;
-        d3drect.top = rect->y;
-        d3drect.bottom = rect->y + rect->h;
+    d3drect.left = rect->x;
+    d3drect.right = rect->x + rect->w;
+    d3drect.top = rect->y;
+    d3drect.bottom = rect->y + rect->h;
 
-        result =
-            IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect,
-                                       0);
-        if (FAILED(result)) {
-            D3D_SetError("LockRect()", result);
-            return -1;
-        }
+    result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
+    if (FAILED(result)) {
+        D3D_SetError("LockRect()", result);
+        return -1;
+    }
 
-        src = pixels;
-        dst = locked.pBits;
-        length = rect->w * SDL_BYTESPERPIXEL(texture->format);
-        for (row = 0; row < rect->h; ++row) {
-            SDL_memcpy(dst, src, length);
-            src += pitch;
-            dst += locked.Pitch;
-        }
-        IDirect3DTexture9_UnlockRect(data->texture, 0);
+    src = pixels;
+    dst = locked.pBits;
+    length = rect->w * SDL_BYTESPERPIXEL(texture->format);
+    for (row = 0; row < rect->h; ++row) {
+        SDL_memcpy(dst, src, length);
+        src += pitch;
+        dst += locked.Pitch;
+    }
+    IDirect3DTexture9_UnlockRect(data->texture, 0);
 #endif // SDL_MEMORY_POOL_DEFAULT
 
-        return 0;
-    }
+    return 0;
 }
 
 static int
 D3D_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                const SDL_Rect * rect, int markDirty, void **pixels,
-                int *pitch)
+                const SDL_Rect * rect, void **pixels, int *pitch)
 {
     D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
+    RECT d3drect;
+    D3DLOCKED_RECT locked;
+    HRESULT result;
 
-    if (data->yuv) {
-        return SDL_SW_LockYUVTexture(data->yuv, rect, markDirty, pixels,
-                                     pitch);
-    } else {
-        RECT d3drect;
-        D3DLOCKED_RECT locked;
-        HRESULT result;
+    d3drect.left = rect->x;
+    d3drect.right = rect->x + rect->w;
+    d3drect.top = rect->y;
+    d3drect.bottom = rect->y + rect->h;
 
-        d3drect.left = rect->x;
-        d3drect.right = rect->x + rect->w;
-        d3drect.top = rect->y;
-        d3drect.bottom = rect->y + rect->h;
-
-        result =
-            IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect,
-                                       markDirty ? 0 :
-                                       D3DLOCK_NO_DIRTY_UPDATE);
-        if (FAILED(result)) {
-            D3D_SetError("LockRect()", result);
-            return -1;
-        }
-        *pixels = locked.pBits;
-        *pitch = locked.Pitch;
-        return 0;
+    result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
+    if (FAILED(result)) {
+        D3D_SetError("LockRect()", result);
+        return -1;
     }
+    *pixels = locked.pBits;
+    *pitch = locked.Pitch;
+    return 0;
 }
 
 static void
@@ -779,32 +642,7 @@
 {
     D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
 
-    if (data->yuv) {
-        SDL_SW_UnlockYUVTexture(data->yuv);
-        UpdateYUVTextureData(texture);
-    } else {
-        IDirect3DTexture9_UnlockRect(data->texture, 0);
-    }
-}
-
-static void
-D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, int numrects,
-                 const SDL_Rect * rects)
-{
-    D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
-    RECT d3drect;
-    int i;
-
-    for (i = 0; i < numrects; ++i) {
-        const SDL_Rect *rect = &rects[i];
-
-        d3drect.left = rect->x;
-        d3drect.right = rect->x + rect->w;
-        d3drect.top = rect->y;
-        d3drect.bottom = rect->y + rect->h;
-
-        IDirect3DTexture9_AddDirtyRect(data->texture, &d3drect);
-    }
+    IDirect3DTexture9_UnlockRect(data->texture, 0);
 }
 
 static void
@@ -1123,8 +961,6 @@
                      Uint32 format, void * pixels, int pitch)
 {
     D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
-    SDL_Window *window = renderer->window;
-    SDL_VideoDisplay *display = window->display;
     D3DSURFACE_DESC desc;
     LPDIRECT3DSURFACE9 backBuffer;
     LPDIRECT3DSURFACE9 surface;
@@ -1174,7 +1010,7 @@
     }
 
     SDL_ConvertPixels(rect->w, rect->h,
-                      display->current_mode.format, locked.pBits, locked.Pitch,
+                      D3DFMTToPixelFormat(desc.Format), locked.pBits, locked.Pitch,
                       format, pixels, pitch);
 
     IDirect3DSurface9_UnlockRect(surface);
@@ -1227,9 +1063,6 @@
     if (!data) {
         return;
     }
-    if (data->yuv) {
-        SDL_SW_DestroyYUVTexture(data->yuv);
-    }
     if (data->texture) {
         IDirect3DTexture9_Release(data->texture);
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/render/mmx.h	Thu Feb 03 00:19:40 2011 -0800
@@ -0,0 +1,642 @@
+/*	mmx.h
+
+	MultiMedia eXtensions GCC interface library for IA32.
+
+	To use this library, simply include this header file
+	and compile with GCC.  You MUST have inlining enabled
+	in order for mmx_ok() to work; this can be done by
+	simply using -O on the GCC command line.
+
+	Compiling with -DMMX_TRACE will cause detailed trace
+	output to be sent to stderr for each mmx operation.
+	This adds lots of code, and obviously slows execution to
+	a crawl, but can be very useful for debugging.
+
+	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+	AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+	1997-99 by H. Dietz and R. Fisher
+
+ Notes:
+	It appears that the latest gas has the pand problem fixed, therefore
+	  I'll undefine BROKEN_PAND by default.
+*/
+
+#ifndef _MMX_H
+#define _MMX_H
+
+
+/*	Warning:  at this writing, the version of GAS packaged
+	with most Linux distributions does not handle the
+	parallel AND operation mnemonic correctly.  If the
+	symbol BROKEN_PAND is defined, a slower alternative
+	coding will be used.  If execution of mmxtest results
+	in an illegal instruction fault, define this symbol.
+*/
+#undef	BROKEN_PAND
+
+
+/*	The type of an value that fits in an MMX register
+	(note that long long constant values MUST be suffixed
+	 by LL and unsigned long long values by ULL, lest
+	 they be truncated by the compiler)
+*/
+typedef union
+{
+    long long q;                /* Quadword (64-bit) value */
+    unsigned long long uq;      /* Unsigned Quadword */
+    int d[2];                   /* 2 Doubleword (32-bit) values */
+    unsigned int ud[2];         /* 2 Unsigned Doubleword */
+    short w[4];                 /* 4 Word (16-bit) values */
+    unsigned short uw[4];       /* 4 Unsigned Word */
+    char b[8];                  /* 8 Byte (8-bit) values */
+    unsigned char ub[8];        /* 8 Unsigned Byte */
+    float s[2];                 /* Single-precision (32-bit) value */
+} __attribute__ ((aligned(8))) mmx_t;   /* On an 8-byte (64-bit) boundary */
+
+
+#if 0
+/*	Function to test if multimedia instructions are supported...
+*/
+inline extern int
+mm_support(void)
+{
+    /* Returns 1 if MMX instructions are supported,
+       3 if Cyrix MMX and Extended MMX instructions are supported
+       5 if AMD MMX and 3DNow! instructions are supported
+       0 if hardware does not support any of these
+     */
+    register int rval = 0;
+
+    __asm__ __volatile__(
+                            /* See if CPUID instruction is supported ... */
+                            /* ... Get copies of EFLAGS into eax and ecx */
+                            "pushf\n\t"
+                            "popl %%eax\n\t" "movl %%eax, %%ecx\n\t"
+                            /* ... Toggle the ID bit in one copy and store */
+                            /*     to the EFLAGS reg */
+                            "xorl $0x200000, %%eax\n\t"
+                            "push %%eax\n\t" "popf\n\t"
+                            /* ... Get the (hopefully modified) EFLAGS */
+                            "pushf\n\t" "popl %%eax\n\t"
+                            /* ... Compare and test result */
+                            "xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */
+                            /* Get standard CPUID information, and
+                               go to a specific vendor section */
+                            "movl $0, %%eax\n\t" "cpuid\n\t"
+                            /* Check for Intel */
+                            "cmpl $0x756e6547, %%ebx\n\t"
+                            "jne TryAMD\n\t"
+                            "cmpl $0x49656e69, %%edx\n\t"
+                            "jne TryAMD\n\t"
+                            "cmpl $0x6c65746e, %%ecx\n"
+                            "jne TryAMD\n\t" "jmp Intel\n\t"
+                            /* Check for AMD */
+                            "\nTryAMD:\n\t"
+                            "cmpl $0x68747541, %%ebx\n\t"
+                            "jne TryCyrix\n\t"
+                            "cmpl $0x69746e65, %%edx\n\t"
+                            "jne TryCyrix\n\t"
+                            "cmpl $0x444d4163, %%ecx\n"
+                            "jne TryCyrix\n\t" "jmp AMD\n\t"
+                            /* Check for Cyrix */
+                            "\nTryCyrix:\n\t"
+                            "cmpl $0x69727943, %%ebx\n\t"
+                            "jne NotSupported2\n\t"
+                            "cmpl $0x736e4978, %%edx\n\t"
+                            "jne NotSupported3\n\t"
+                            "cmpl $0x64616574, %%ecx\n\t"
+                            "jne NotSupported4\n\t"
+                            /* Drop through to Cyrix... */
+                            /* Cyrix Section */
+                            /* See if extended CPUID level 80000001 is supported */
+                            /* The value of CPUID/80000001 for the 6x86MX is undefined
+                               according to the Cyrix CPU Detection Guide (Preliminary
+                               Rev. 1.01 table 1), so we'll check the value of eax for
+                               CPUID/0 to see if standard CPUID level 2 is supported.
+                               According to the table, the only CPU which supports level
+                               2 is also the only one which supports extended CPUID levels.
+                             */
+                            "cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t"    /* Use standard CPUID instead */
+                            /* Extended CPUID supported (in theory), so get extended
+                               features */
+                            "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t"    /* Test for MMX */
+                            "jz NotSupported5\n\t"      /* MMX not supported */
+                            "testl $0x01000000, %%eax\n\t"      /* Test for Ext'd MMX */
+                            "jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t"        /* MMX Supported */
+                            "jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t"  /* EMMX and MMX Supported */
+                            "jmp Return\n\t"
+                            /* AMD Section */
+                            "AMD:\n\t"
+                            /* See if extended CPUID is supported */
+                            "movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t"    /* Use standard CPUID instead */
+                            /* Extended CPUID supported, so get extended features */
+                            "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t"    /* Test for MMX */
+                            "jz NotSupported6\n\t"      /* MMX not supported */
+                            "testl $0x80000000, %%edx\n\t"      /* Test for 3DNow! */
+                            "jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t"   /* MMX Supported */
+                            "jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t"     /* 3DNow! and MMX Supported */
+                            "jmp Return\n\t"
+                            /* Intel Section */
+                            "Intel:\n\t"
+                            /* Check for MMX */
+                            "MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t"      /* Test for MMX */
+                            "jz NotSupported7\n\t"      /* MMX Not supported */
+                            "movl $1, %0:\n\n\t"        /* MMX Supported */
+                            "jmp Return\n\t"
+                            /* Nothing supported */
+                            "\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval):     /* no input */
+                            :"eax", "ebx", "ecx", "edx");
+
+    /* Return */
+    return (rval);
+}
+
+/*	Function to test if mmx instructions are supported...
+*/
+inline extern int
+mmx_ok(void)
+{
+    /* Returns 1 if MMX instructions are supported, 0 otherwise */
+    return (mm_support() & 0x1);
+}
+#endif
+
+/*	Helper functions for the instruction macros that follow...
+	(note that memory-to-register, m2r, instructions are nearly
+	 as efficient as register-to-register, r2r, instructions;
+	 however, memory-to-memory instructions are really simulated
+	 as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef	MMX_TRACE
+
+/*	Include the stuff for printing a trace to stderr...
+*/
+
+#define	mmx_i2r(op, imm, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace.uq = (imm); \
+		printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		printf(#reg "=0x%08x%08x) => ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (imm)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		printf(#reg "=0x%08x%08x\n", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+	}
+
+#define	mmx_m2r(op, mem, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mem); \
+		printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		printf(#reg "=0x%08x%08x) => ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (mem)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		printf(#reg "=0x%08x%08x\n", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+	}
+
+#define	mmx_r2m(op, reg, mem) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		mmx_trace = (mem); \
+		printf(#mem "=0x%08x%08x) => ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		__asm__ __volatile__ (#op " %%" #reg ", %0" \
+				      : "=X" (mem) \
+				      : /* nothing */ ); \
+		mmx_trace = (mem); \
+		printf(#mem "=0x%08x%08x\n", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+	}
+
+#define	mmx_r2r(op, regs, regd) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #regs ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		printf(#regd "=0x%08x%08x) => ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		printf(#regd "=0x%08x%08x\n", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+	}
+
+#define	mmx_m2m(op, mems, memd) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mems); \
+		printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		mmx_trace = (memd); \
+		printf(#memd "=0x%08x%08x) => ", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+				      #op " %1, %%mm0\n\t" \
+				      "movq %%mm0, %0" \
+				      : "=X" (memd) \
+				      : "X" (mems)); \
+		mmx_trace = (memd); \
+		printf(#memd "=0x%08x%08x\n", \
+			mmx_trace.d[1], mmx_trace.d[0]); \
+	}
+
+#else
+
+/*	These macros are a lot simpler without the tracing...
+*/
+
+#define	mmx_i2r(op, imm, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "X" (imm) )
+
+#define	mmx_m2r(op, mem, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "m" (mem))
+
+#define	mmx_r2m(op, reg, mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=m" (mem) \
+			      : /* nothing */ )
+
+#define	mmx_r2r(op, regs, regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define	mmx_m2m(op, mems, memd) \
+	__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+			      #op " %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (memd) \
+			      : "X" (mems))
+
+#endif
+
+
+/*	1x64 MOVe Quadword
+	(this is both a load and a store...
+	 in fact, it is the only way to store)
+*/
+#define	movq_m2r(var, reg)	mmx_m2r(movq, var, reg)
+#define	movq_r2m(reg, var)	mmx_r2m(movq, reg, var)
+#define	movq_r2r(regs, regd)	mmx_r2r(movq, regs, regd)
+#define	movq(vars, vard) \
+	__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	1x32 MOVe Doubleword
+	(like movq, this is both load and store...
+	 but is most useful for moving things between
+	 mmx registers and ordinary registers)
+*/
+#define	movd_m2r(var, reg)	mmx_m2r(movd, var, reg)
+#define	movd_r2m(reg, var)	mmx_r2m(movd, reg, var)
+#define	movd_r2r(regs, regd)	mmx_r2r(movd, regs, regd)
+#define	movd(vars, vard) \
+	__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
+			      "movd %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	2x32, 4x16, and 8x8 Parallel ADDs
+*/
+#define	paddd_m2r(var, reg)	mmx_m2r(paddd, var, reg)
+#define	paddd_r2r(regs, regd)	mmx_r2r(paddd, regs, regd)
+#define	paddd(vars, vard)	mmx_m2m(paddd, vars, vard)
+
+#define	paddw_m2r(var, reg)	mmx_m2r(paddw, var, reg)
+#define	paddw_r2r(regs, regd)	mmx_r2r(paddw, regs, regd)
+#define	paddw(vars, vard)	mmx_m2m(paddw, vars, vard)
+
+#define	paddb_m2r(var, reg)	mmx_m2r(paddb, var, reg)
+#define	paddb_r2r(regs, regd)	mmx_r2r(paddb, regs, regd)
+#define	paddb(vars, vard)	mmx_m2m(paddb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Saturation arithmetic
+*/
+#define	paddsw_m2r(var, reg)	mmx_m2r(paddsw, var, reg)
+#define	paddsw_r2r(regs, regd)	mmx_r2r(paddsw, regs, regd)
+#define	paddsw(vars, vard)	mmx_m2m(paddsw, vars, vard)
+
+#define	paddsb_m2r(var, reg)	mmx_m2r(paddsb, var, reg)
+#define	paddsb_r2r(regs, regd)	mmx_r2r(paddsb, regs, regd)
+#define	paddsb(vars, vard)	mmx_m2m(paddsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
+*/
+#define	paddusw_m2r(var, reg)	mmx_m2r(paddusw, var, reg)
+#define	paddusw_r2r(regs, regd)	mmx_r2r(paddusw, regs, regd)
+#define	paddusw(vars, vard)	mmx_m2m(paddusw, vars, vard)
+
+#define	paddusb_m2r(var, reg)	mmx_m2r(paddusb, var, reg)
+#define	paddusb_r2r(regs, regd)	mmx_r2r(paddusb, regs, regd)
+#define	paddusb(vars, vard)	mmx_m2m(paddusb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel SUBs
+*/
+#define	psubd_m2r(var, reg)	mmx_m2r(psubd, var, reg)
+#define	psubd_r2r(regs, regd)	mmx_r2r(psubd, regs, regd)
+#define	psubd(vars, vard)	mmx_m2m(psubd, vars, vard)
+
+#define	psubw_m2r(var, reg)	mmx_m2r(psubw, var, reg)
+#define	psubw_r2r(regs, regd)	mmx_r2r(psubw, regs, regd)
+#define	psubw(vars, vard)	mmx_m2m(psubw, vars, vard)
+
+#define	psubb_m2r(var, reg)	mmx_m2r(psubb, var, reg)
+#define	psubb_r2r(regs, regd)	mmx_r2r(psubb, regs, regd)
+#define	psubb(vars, vard)	mmx_m2m(psubb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Saturation arithmetic
+*/
+#define	psubsw_m2r(var, reg)	mmx_m2r(psubsw, var, reg)
+#define	psubsw_r2r(regs, regd)	mmx_r2r(psubsw, regs, regd)
+#define	psubsw(vars, vard)	mmx_m2m(psubsw, vars, vard)
+
+#define	psubsb_m2r(var, reg)	mmx_m2r(psubsb, var, reg)
+#define	psubsb_r2r(regs, regd)	mmx_r2r(psubsb, regs, regd)
+#define	psubsb(vars, vard)	mmx_m2m(psubsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
+*/
+#define	psubusw_m2r(var, reg)	mmx_m2r(psubusw, var, reg)
+#define	psubusw_r2r(regs, regd)	mmx_r2r(psubusw, regs, regd)
+#define	psubusw(vars, vard)	mmx_m2m(psubusw, vars, vard)
+
+#define	psubusb_m2r(var, reg)	mmx_m2r(psubusb, var, reg)
+#define	psubusb_r2r(regs, regd)	mmx_r2r(psubusb, regs, regd)
+#define	psubusb(vars, vard)	mmx_m2m(psubusb, vars, vard)
+
+
+/*	4x16 Parallel MULs giving Low 4x16 portions of results
+*/
+#define	pmullw_m2r(var, reg)	mmx_m2r(pmullw, var, reg)
+#define	pmullw_r2r(regs, regd)	mmx_r2r(pmullw, regs, regd)
+#define	pmullw(vars, vard)	mmx_m2m(pmullw, vars, vard)
+
+
+/*	4x16 Parallel MULs giving High 4x16 portions of results
+*/
+#define	pmulhw_m2r(var, reg)	mmx_m2r(pmulhw, var, reg)
+#define	pmulhw_r2r(regs, regd)	mmx_r2r(pmulhw, regs, regd)
+#define	pmulhw(vars, vard)	mmx_m2m(pmulhw, vars, vard)
+
+
+/*	4x16->2x32 Parallel Mul-ADD
+	(muls like pmullw, then adds adjacent 16-bit fields
+	 in the multiply result to make the final 2x32 result)
+*/
+#define	pmaddwd_m2r(var, reg)	mmx_m2r(pmaddwd, var, reg)
+#define	pmaddwd_r2r(regs, regd)	mmx_r2r(pmaddwd, regs, regd)
+#define	pmaddwd(vars, vard)	mmx_m2m(pmaddwd, vars, vard)
+
+
+/*	1x64 bitwise AND
+*/
+#ifdef	BROKEN_PAND
+#define	pand_m2r(var, reg) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, reg); \
+		mmx_m2r(pandn, var, reg); \
+	}
+#define	pand_r2r(regs, regd) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, regd); \
+		mmx_r2r(pandn, regs, regd) \
+	}
+#define	pand(vars, vard) \
+	{ \
+		movq_m2r(vard, mm0); \
+		mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
+		mmx_m2r(pandn, vars, mm0); \
+		movq_r2m(mm0, vard); \
+	}
+#else
+#define	pand_m2r(var, reg)	mmx_m2r(pand, var, reg)
+#define	pand_r2r(regs, regd)	mmx_r2r(pand, regs, regd)
+#define	pand(vars, vard)	mmx_m2m(pand, vars, vard)
+#endif
+
+
+/*	1x64 bitwise AND with Not the destination
+*/
+#define	pandn_m2r(var, reg)	mmx_m2r(pandn, var, reg)
+#define	pandn_r2r(regs, regd)	mmx_r2r(pandn, regs, regd)
+#define	pandn(vars, vard)	mmx_m2m(pandn, vars, vard)
+
+
+/*	1x64 bitwise OR
+*/
+#define	por_m2r(var, reg)	mmx_m2r(por, var, reg)
+#define	por_r2r(regs, regd)	mmx_r2r(por, regs, regd)
+#define	por(vars, vard)	mmx_m2m(por, vars, vard)
+
+
+/*	1x64 bitwise eXclusive OR
+*/
+#define	pxor_m2r(var, reg)	mmx_m2r(pxor, var, reg)
+#define	pxor_r2r(regs, regd)	mmx_r2r(pxor, regs, regd)
+#define	pxor(vars, vard)	mmx_m2m(pxor, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpeqd_m2r(var, reg)	mmx_m2r(pcmpeqd, var, reg)
+#define	pcmpeqd_r2r(regs, regd)	mmx_r2r(pcmpeqd, regs, regd)
+#define	pcmpeqd(vars, vard)	mmx_m2m(pcmpeqd, vars, vard)
+
+#define	pcmpeqw_m2r(var, reg)	mmx_m2r(pcmpeqw, var, reg)
+#define	pcmpeqw_r2r(regs, regd)	mmx_r2r(pcmpeqw, regs, regd)
+#define	pcmpeqw(vars, vard)	mmx_m2m(pcmpeqw, vars, vard)
+
+#define	pcmpeqb_m2r(var, reg)	mmx_m2r(pcmpeqb, var, reg)
+#define	pcmpeqb_r2r(regs, regd)	mmx_r2r(pcmpeqb, regs, regd)
+#define	pcmpeqb(vars, vard)	mmx_m2m(pcmpeqb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpgtd_m2r(var, reg)	mmx_m2r(pcmpgtd, var, reg)
+#define	pcmpgtd_r2r(regs, regd)	mmx_r2r(pcmpgtd, regs, regd)
+#define	pcmpgtd(vars, vard)	mmx_m2m(pcmpgtd, vars, vard)
+
+#define	pcmpgtw_m2r(var, reg)	mmx_m2r(pcmpgtw, var, reg)
+#define	pcmpgtw_r2r(regs, regd)	mmx_r2r(pcmpgtw, regs, regd)
+#define	pcmpgtw(vars, vard)	mmx_m2m(pcmpgtw, vars, vard)
+
+#define	pcmpgtb_m2r(var, reg)	mmx_m2r(pcmpgtb, var, reg)
+#define	pcmpgtb_r2r(regs, regd)	mmx_r2r(pcmpgtb, regs, regd)
+#define	pcmpgtb(vars, vard)	mmx_m2m(pcmpgtb, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Left Logical
+*/
+#define	psllq_i2r(imm, reg)	mmx_i2r(psllq, imm, reg)
+#define	psllq_m2r(var, reg)	mmx_m2r(psllq, var, reg)
+#define	psllq_r2r(regs, regd)	mmx_r2r(psllq, regs, regd)
+#define	psllq(vars, vard)	mmx_m2m(psllq, vars, vard)
+
+#define	pslld_i2r(imm, reg)	mmx_i2r(pslld, imm, reg)
+#define	pslld_m2r(var, reg)	mmx_m2r(pslld, var, reg)
+#define	pslld_r2r(regs, regd)	mmx_r2r(pslld, regs, regd)
+#define	pslld(vars, vard)	mmx_m2m(pslld, vars, vard)
+
+#define	psllw_i2r(imm, reg)	mmx_i2r(psllw, imm, reg)
+#define	psllw_m2r(var, reg)	mmx_m2r(psllw, var, reg)
+#define	psllw_r2r(regs, regd)	mmx_r2r(psllw, regs, regd)
+#define	psllw(vars, vard)	mmx_m2m(psllw, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Right Logical
+*/
+#define	psrlq_i2r(imm, reg)	mmx_i2r(psrlq, imm, reg)
+#define	psrlq_m2r(var, reg)	mmx_m2r(psrlq, var, reg)
+#define	psrlq_r2r(regs, regd)	mmx_r2r(psrlq, regs, regd)
+#define	psrlq(vars, vard)	mmx_m2m(psrlq, vars, vard)
+
+#define	psrld_i2r(imm, reg)	mmx_i2r(psrld, imm, reg)
+#define	psrld_m2r(var, reg)	mmx_m2r(psrld, var, reg)
+#define	psrld_r2r(regs, regd)	mmx_r2r(psrld, regs, regd)
+#define	psrld(vars, vard)	mmx_m2m(psrld, vars, vard)
+
+#define	psrlw_i2r(imm, reg)	mmx_i2r(psrlw, imm, reg)
+#define	psrlw_m2r(var, reg)	mmx_m2r(psrlw, var, reg)
+#define	psrlw_r2r(regs, regd)	mmx_r2r(psrlw, regs, regd)
+#define	psrlw(vars, vard)	mmx_m2m(psrlw, vars, vard)
+
+
+/*	2x32 and 4x16 Parallel Shift Right Arithmetic
+*/
+#define	psrad_i2r(imm, reg)	mmx_i2r(psrad, imm, reg)
+#define	psrad_m2r(var, reg)	mmx_m2r(psrad, var, reg)
+#define	psrad_r2r(regs, regd)	mmx_r2r(psrad, regs, regd)
+#define	psrad(vars, vard)	mmx_m2m(psrad, vars, vard)
+
+#define	psraw_i2r(imm, reg)	mmx_i2r(psraw, imm, reg)
+#define	psraw_m2r(var, reg)	mmx_m2r(psraw, var, reg)
+#define	psraw_r2r(regs, regd)	mmx_r2r(psraw, regs, regd)
+#define	psraw(vars, vard)	mmx_m2m(psraw, vars, vard)
+
+
+/*	2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packssdw_m2r(var, reg)	mmx_m2r(packssdw, var, reg)
+#define	packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
+#define	packssdw(vars, vard)	mmx_m2m(packssdw, vars, vard)
+
+#define	packsswb_m2r(var, reg)	mmx_m2r(packsswb, var, reg)
+#define	packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
+#define	packsswb(vars, vard)	mmx_m2m(packsswb, vars, vard)
+
+
+/*	4x16->8x8 PACK and Unsigned Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packuswb_m2r(var, reg)	mmx_m2r(packuswb, var, reg)
+#define	packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
+#define	packuswb(vars, vard)	mmx_m2m(packuswb, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
+	(interleaves low half of dest with low half of source
+	 as padding in each result field)
+*/
+#define	punpckldq_m2r(var, reg)	mmx_m2r(punpckldq, var, reg)
+#define	punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
+#define	punpckldq(vars, vard)	mmx_m2m(punpckldq, vars, vard)
+
+#define	punpcklwd_m2r(var, reg)	mmx_m2r(punpcklwd, var, reg)
+#define	punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
+#define	punpcklwd(vars, vard)	mmx_m2m(punpcklwd, vars, vard)
+
+#define	punpcklbw_m2r(var, reg)	mmx_m2r(punpcklbw, var, reg)
+#define	punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
+#define	punpcklbw(vars, vard)	mmx_m2m(punpcklbw, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
+	(interleaves high half of dest with high half of source
+	 as padding in each result field)
+*/
+#define	punpckhdq_m2r(var, reg)	mmx_m2r(punpckhdq, var, reg)
+#define	punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
+#define	punpckhdq(vars, vard)	mmx_m2m(punpckhdq, vars, vard)
+
+#define	punpckhwd_m2r(var, reg)	mmx_m2r(punpckhwd, var, reg)
+#define	punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
+#define	punpckhwd(vars, vard)	mmx_m2m(punpckhwd, vars, vard)
+
+#define	punpckhbw_m2r(var, reg)	mmx_m2r(punpckhbw, var, reg)
+#define	punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
+#define	punpckhbw(vars, vard)	mmx_m2m(punpckhbw, vars, vard)
+
+
+/*	Empty MMx State
+	(used to clean-up when going from mmx to float use
+	 of the registers that are shared by both; note that
+	 there is no float-to-mmx operation needed, because
+	 only the float tag word info is corruptible)
+*/
+#ifdef	MMX_TRACE
+
+#define	emms() \
+	{ \
+		printf("emms()\n"); \
+		__asm__ __volatile__ ("emms"); \
+	}
+
+#else
+
+#define	emms()			__asm__ __volatile__ ("emms")
+
+#endif
+
+#endif
+/* vi: set ts=4 sw=4 expandtab: */
--- a/src/render/opengl/SDL_renderer_gl.c	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/opengl/SDL_renderer_gl.c	Thu Feb 03 00:19:40 2011 -0800
@@ -37,27 +37,6 @@
    http://developer.apple.com/documentation/GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_texturedata/chapter_10_section_2.html
 */
 
-/* !!! FIXME: this should go in a higher level than the GL renderer. */
-static __inline__ int
-bytes_per_pixel(const Uint32 format)
-{
-    if (!SDL_ISPIXELFORMAT_FOURCC(format)) {
-        return SDL_BYTESPERPIXEL(format);
-    }
-
-    /* FOURCC format */
-    switch (format) {
-    case SDL_PIXELFORMAT_YV12:
-    case SDL_PIXELFORMAT_IYUV:
-    case SDL_PIXELFORMAT_YUY2:
-    case SDL_PIXELFORMAT_UYVY:
-    case SDL_PIXELFORMAT_YVYU:
-        return 2;
-    default:
-        return 1;               /* shouldn't ever hit this. */
-    }
-}
-
 /* Used to re-create the window with OpenGL capability */
 extern int SDL_RecreateWindow(SDL_Window * window, Uint32 flags);
 
@@ -67,18 +46,12 @@
 static void GL_WindowEvent(SDL_Renderer * renderer,
                            const SDL_WindowEvent *event);
 static int GL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int GL_QueryTexturePixels(SDL_Renderer * renderer,
-                                 SDL_Texture * texture, void **pixels,
-                                 int *pitch);
 static int GL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                             const SDL_Rect * rect, const void *pixels,
                             int pitch);
 static int GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                          const SDL_Rect * rect, int markDirty, void **pixels,
-                          int *pitch);
+                          const SDL_Rect * rect, void **pixels, int *pitch);
 static void GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static void GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                            int numrects, const SDL_Rect * rects);
 static int GL_RenderClear(SDL_Renderer * renderer);
 static int GL_RenderDrawPoints(SDL_Renderer * renderer,
                                const SDL_Point * points, int count);
@@ -102,21 +75,8 @@
     {
      "opengl",
      (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
-     13,
-     {
-      SDL_PIXELFORMAT_RGB332,
-      SDL_PIXELFORMAT_RGB444,
-      SDL_PIXELFORMAT_RGB555,
-      SDL_PIXELFORMAT_ARGB4444,
-      SDL_PIXELFORMAT_ARGB1555,
-      SDL_PIXELFORMAT_RGB565,
-      SDL_PIXELFORMAT_RGB24,
-      SDL_PIXELFORMAT_BGR24,
-      SDL_PIXELFORMAT_RGB888,
-      SDL_PIXELFORMAT_BGR888,
-      SDL_PIXELFORMAT_ARGB8888,
-      SDL_PIXELFORMAT_ABGR8888,
-      SDL_PIXELFORMAT_ARGB2101010},
+     1,
+     {SDL_PIXELFORMAT_ARGB8888},
      0,
      0}
 };
@@ -126,10 +86,6 @@
     SDL_GLContext context;
     SDL_bool updateSize;
     SDL_bool GL_ARB_texture_rectangle_supported;
-    SDL_bool GL_EXT_paletted_texture_supported;
-    SDL_bool GL_APPLE_ycbcr_422_supported;
-    SDL_bool GL_MESA_ycbcr_texture_supported;
-    SDL_bool GL_ARB_fragment_program_supported;
     int blendMode;
 
     /* OpenGL functions */
@@ -139,33 +95,18 @@
 
     void (*glTextureRangeAPPLE) (GLenum target, GLsizei length,
                                  const GLvoid * pointer);
-
-    PFNGLGETPROGRAMIVARBPROC glGetProgramivARB;
-    PFNGLGETPROGRAMSTRINGARBPROC glGetProgramStringARB;
-    PFNGLPROGRAMLOCALPARAMETER4FVARBPROC glProgramLocalParameter4fvARB;
-    PFNGLDELETEPROGRAMSARBPROC glDeleteProgramsARB;
-    PFNGLGENPROGRAMSARBPROC glGenProgramsARB;
-    PFNGLBINDPROGRAMARBPROC glBindProgramARB;
-    PFNGLPROGRAMSTRINGARBPROC glProgramStringARB;
-
-    /* (optional) fragment programs */
-    GLuint fragment_program_UYVY;
 } GL_RenderData;
 
 typedef struct
 {
     GLuint texture;
-    GLuint shader;
     GLenum type;
     GLfloat texw;
     GLfloat texh;
     GLenum format;
     GLenum formattype;
-    Uint8 *palette;
     void *pixels;
     int pitch;
-    SDL_DirtyRectList dirty;
-    int HACK_RYAN_FIXME;
 } GL_TextureData;
 
 
@@ -257,11 +198,9 @@
 
     renderer->WindowEvent = GL_WindowEvent;
     renderer->CreateTexture = GL_CreateTexture;
-    renderer->QueryTexturePixels = GL_QueryTexturePixels;
     renderer->UpdateTexture = GL_UpdateTexture;
     renderer->LockTexture = GL_LockTexture;
     renderer->UnlockTexture = GL_UnlockTexture;
-    renderer->DirtyTexture = GL_DirtyTexture;
     renderer->RenderClear = GL_RenderClear;
     renderer->RenderDrawPoints = GL_RenderDrawPoints;
     renderer->RenderDrawLines = GL_RenderDrawLines;
@@ -317,40 +256,12 @@
         || SDL_GL_ExtensionSupported("GL_EXT_texture_rectangle")) {
         data->GL_ARB_texture_rectangle_supported = SDL_TRUE;
     }
-    if (SDL_GL_ExtensionSupported("GL_APPLE_ycbcr_422")) {
-        data->GL_APPLE_ycbcr_422_supported = SDL_TRUE;
-    }
-    if (SDL_GL_ExtensionSupported("GL_MESA_ycbcr_texture")) {
-        data->GL_MESA_ycbcr_texture_supported = SDL_TRUE;
-    }
     if (SDL_GL_ExtensionSupported("GL_APPLE_texture_range")) {
         data->glTextureRangeAPPLE =
             (void (*)(GLenum, GLsizei, const GLvoid *))
             SDL_GL_GetProcAddress("glTextureRangeAPPLE");
     }
 
-    /* we might use fragment programs for YUV data, etc. */
-    if (SDL_GL_ExtensionSupported("GL_ARB_fragment_program")) {
-        /* !!! FIXME: this doesn't check for errors. */
-        /* !!! FIXME: this should really reuse the glfuncs.h stuff. */
-        data->glGetProgramivARB = (PFNGLGETPROGRAMIVARBPROC)
-            SDL_GL_GetProcAddress("glGetProgramivARB");
-        data->glGetProgramStringARB = (PFNGLGETPROGRAMSTRINGARBPROC)
-            SDL_GL_GetProcAddress("glGetProgramStringARB");
-        data->glProgramLocalParameter4fvARB =
-            (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC)
-            SDL_GL_GetProcAddress("glProgramLocalParameter4fvARB");
-        data->glDeleteProgramsARB = (PFNGLDELETEPROGRAMSARBPROC)
-            SDL_GL_GetProcAddress("glDeleteProgramsARB");
-        data->glGenProgramsARB = (PFNGLGENPROGRAMSARBPROC)
-            SDL_GL_GetProcAddress("glGenProgramsARB");
-        data->glBindProgramARB = (PFNGLBINDPROGRAMARBPROC)
-            SDL_GL_GetProcAddress("glBindProgramARB");
-        data->glProgramStringARB = (PFNGLPROGRAMSTRINGARBPROC)
-            SDL_GL_GetProcAddress("glProgramStringARB");
-        data->GL_ARB_fragment_program_supported = SDL_TRUE;
-    }
-
     /* Set up parameters for rendering */
     data->blendMode = -1;
     data->glDisable(GL_DEPTH_TEST);
@@ -419,240 +330,16 @@
     return value;
 }
 
-
-//#define DEBUG_PROGRAM_COMPILE 1
-
-static void
-set_shader_error(GL_RenderData * data, const char *prefix)
-{
-    GLint pos = 0;
-    const GLubyte *errstr;
-    data->glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
-    errstr = data->glGetString(GL_PROGRAM_ERROR_STRING_ARB);
-    SDL_SetError("%s: shader compile error at position %d: %s",
-           prefix, (int) pos, (const char *) errstr);
-}
-
-static GLuint
-compile_shader(GL_RenderData * data, GLenum shader_type, const char *_code)
-{
-    const int have_texture_rects = data->GL_ARB_texture_rectangle_supported;
-    const char *replacement = have_texture_rects ? "RECT" : "2D";
-    const size_t replacementlen = SDL_strlen(replacement);
-    const char *token = "%TEXTURETARGET%";
-    const size_t tokenlen = SDL_strlen(token);
-    char *code = NULL;
-    char *ptr = NULL;
-    GLuint program = 0;
-
-    /*
-     * The TEX instruction needs a different target depending on what we use.
-     *  To handle this, we use "%TEXTURETARGET%" and replace the string before
-     *  compiling the shader.
-     */
-    code = SDL_strdup(_code);
-    if (code == NULL)
-        return 0;
-
-    for (ptr = SDL_strstr(code, token); ptr; ptr = SDL_strstr(ptr + 1, token)) {
-        SDL_memcpy(ptr, replacement, replacementlen);
-        SDL_memmove(ptr + replacementlen, ptr + tokenlen,
-                    SDL_strlen(ptr + tokenlen) + 1);
-    }
-
-#if DEBUG_PROGRAM_COMPILE
-    printf("compiling shader:\n%s\n\n", code);
-#endif
-
-    data->glGetError();         /* flush any existing error state. */
-    data->glGenProgramsARB(1, &program);
-    data->glBindProgramARB(shader_type, program);
-    data->glProgramStringARB(shader_type, GL_PROGRAM_FORMAT_ASCII_ARB,
-                             (GLsizei)SDL_strlen(code), code);
-
-    SDL_free(code);
-
-    if (data->glGetError() == GL_INVALID_OPERATION) {
-#if DEBUG_PROGRAM_COMPILE
-        GLint pos = 0;
-        const GLubyte *errstr;
-        data->glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
-        errstr = data->glGetString(GL_PROGRAM_ERROR_STRING_ARB);
-        printf("program compile error at position %d: %s\n\n",
-               (int) pos, (const char *) errstr);
-#endif
-        data->glBindProgramARB(shader_type, 0);
-        data->glDeleteProgramsARB(1, &program);
-        return 0;
-    }
-
-    return program;
-}
-
-
-/*
- * Fragment program that renders from UYVY textures.
- * The UYVY to RGB equasion is:
- *   R = 1.164(Y-16) + 1.596(Cr-128)
- *   G = 1.164(Y-16) - 0.813(Cr-128) - 0.391(Cb-128)
- *   B = 1.164(Y-16) + 2.018(Cb-128)
- * Byte layout is Cb, Y1, Cr, Y2, stored in the R, G, B, A channels.
- * 4 bytes == 2 pixels: Y1/Cb/Cr, Y2/Cb/Cr
- *
- * !!! FIXME: this ignores blendmodes, etc.
- * !!! FIXME: this could be more efficient...use a dot product for green, etc.
- */
-static const char *fragment_program_UYVY_source_code = "!!ARBfp1.0\n"
-    /* outputs... */
-    "OUTPUT outcolor = result.color;\n"
-    /* scratch registers... */
-    "TEMP uyvy;\n" "TEMP luminance;\n" "TEMP work;\n"
-    /* Halve the coordinates to grab the correct 32 bits for the fragment. */
-    "MUL work, fragment.texcoord, { 0.5, 1.0, 1.0, 1.0 };\n"
-    /* Sample the YUV texture. Cb, Y1, Cr, Y2, are stored in x, y, z, w. */
-    "TEX uyvy, work, texture[0], %TEXTURETARGET%;\n"
-    /* Do subtractions (128/255, 16/255, 128/255, 16/255) */
-    "SUB uyvy, uyvy, { 0.501960784313726, 0.06274509803922, 0.501960784313726, 0.06274509803922 };\n"
-    /* Choose the luminance component by texcoord. */
-    /* !!! FIXME: laziness wins out for now... just average Y1 and Y2. */
-    "ADD luminance, uyvy.yyyy, uyvy.wwww;\n"
-    "MUL luminance, luminance, { 0.5, 0.5, 0.5, 0.5 };\n"
-    /* Multiply luminance by its magic value. */
-    "MUL luminance, luminance, { 1.164, 1.164, 1.164, 1.164 };\n"
-    /* uyvy.xyzw becomes Cr/Cr/Cb/Cb, with multiplications. */
-    "MUL uyvy, uyvy.zzxx, { 1.596, -0.813, 2.018, -0.391 };\n"
-    /* Add luminance to Cr and Cb, store to RGB channels. */
-    "ADD work.rgb, luminance, uyvy;\n"
-    /* Do final addition for Green channel.  (!!! FIXME: this should be a DPH?) */
-    "ADD work.g, work.g, uyvy.w;\n"
-    /* Make sure alpha channel is fully opaque.  (!!! FIXME: blend modes!) */
-    "MOV work.a, { 1.0 };\n"
-    /* Store out the final fragment color... */
-    "MOV outcolor, work;\n"
-    /* ...and we're done! */
-    "END\n";
-
 static __inline__ SDL_bool
 convert_format(GL_RenderData *renderdata, Uint32 pixel_format,
                GLint* internalFormat, GLenum* format, GLenum* type)
 {
     switch (pixel_format) {
-    case SDL_PIXELFORMAT_RGB332:
-        *internalFormat = GL_R3_G3_B2;
-        *format = GL_RGB;
-        *type = GL_UNSIGNED_BYTE_3_3_2;
-        break;
-    case SDL_PIXELFORMAT_RGB444:
-        *internalFormat = GL_RGB4;
-        *format = GL_RGB;
-        *type = GL_UNSIGNED_SHORT_4_4_4_4;
-        break;
-    case SDL_PIXELFORMAT_RGB555:
-        *internalFormat = GL_RGB5;
-        *format = GL_RGB;
-        *type = GL_UNSIGNED_SHORT_5_5_5_1;
-        break;
-    case SDL_PIXELFORMAT_ARGB4444:
-        *internalFormat = GL_RGBA4;
-        *format = GL_BGRA;
-        *type = GL_UNSIGNED_SHORT_4_4_4_4_REV;
-        break;
-    case SDL_PIXELFORMAT_ARGB1555:
-        *internalFormat = GL_RGB5_A1;
-        *format = GL_BGRA;
-        *type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
-        break;
-    case SDL_PIXELFORMAT_RGB565:
-        *internalFormat = GL_RGB8;
-        *format = GL_RGB;
-        *type = GL_UNSIGNED_SHORT_5_6_5;
-        break;
-    case SDL_PIXELFORMAT_RGB24:
-        *internalFormat = GL_RGB8;
-        *format = GL_RGB;
-        *type = GL_UNSIGNED_BYTE;
-        break;
     case SDL_PIXELFORMAT_RGB888:
-        *internalFormat = GL_RGB8;
-        *format = GL_BGRA;
-        *type = GL_UNSIGNED_BYTE;
-        break;
-    case SDL_PIXELFORMAT_BGR24:
-        *internalFormat = GL_RGB8;
-        *format = GL_BGR;
-        *type = GL_UNSIGNED_BYTE;
-        break;
-    case SDL_PIXELFORMAT_BGR888:
-        *internalFormat = GL_RGB8;
-        *format = GL_RGBA;
-        *type = GL_UNSIGNED_BYTE;
-        break;
     case SDL_PIXELFORMAT_ARGB8888:
-#ifdef __MACOSX__
-        *internalFormat = GL_RGBA;
-        *format = GL_BGRA;
-        *type = GL_UNSIGNED_INT_8_8_8_8_REV;
-#else
         *internalFormat = GL_RGBA8;
         *format = GL_BGRA;
-        *type = GL_UNSIGNED_BYTE;
-#endif
-        break;
-    case SDL_PIXELFORMAT_ABGR8888:
-        *internalFormat = GL_RGBA8;
-        *format = GL_RGBA;
-        *type = GL_UNSIGNED_BYTE;
-        break;
-    case SDL_PIXELFORMAT_ARGB2101010:
-        *internalFormat = GL_RGB10_A2;
-        *format = GL_BGRA;
-        *type = GL_UNSIGNED_INT_2_10_10_10_REV;
-        break;
-    case SDL_PIXELFORMAT_UYVY:
-        if (renderdata->GL_APPLE_ycbcr_422_supported) {
-            *internalFormat = GL_RGB;
-            *format = GL_YCBCR_422_APPLE;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
-            *type = GL_UNSIGNED_SHORT_8_8_APPLE;
-#else
-            *type = GL_UNSIGNED_SHORT_8_8_REV_APPLE;
-#endif
-        } else if (renderdata->GL_MESA_ycbcr_texture_supported) {
-            *internalFormat = GL_YCBCR_MESA;
-            *format = GL_YCBCR_MESA;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
-            *type = GL_UNSIGNED_SHORT_8_8_MESA;
-#else
-            *type = GL_UNSIGNED_SHORT_8_8_REV_MESA;
-#endif
-        } else if (renderdata->GL_ARB_fragment_program_supported) {
-            *internalFormat = GL_RGBA;
-            *format = GL_RGBA;
-            *type = GL_UNSIGNED_BYTE;
-        } else {
-            return SDL_FALSE;
-        }
-        break;
-    case SDL_PIXELFORMAT_YUY2:
-        if (renderdata->GL_APPLE_ycbcr_422_supported) {
-            *internalFormat = GL_RGB;
-            *format = GL_YCBCR_422_APPLE;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
-            *type = GL_UNSIGNED_SHORT_8_8_REV_APPLE;
-#else
-            *type = GL_UNSIGNED_SHORT_8_8_APPLE;
-#endif
-        } else if (renderdata->GL_MESA_ycbcr_texture_supported) {
-            *internalFormat = GL_YCBCR_MESA;
-            *format = GL_YCBCR_MESA;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
-            *type = GL_UNSIGNED_SHORT_8_8_REV_MESA;
-#else
-            *type = GL_UNSIGNED_SHORT_8_8_MESA;
-#endif
-        } else {
-            return SDL_FALSE;
-        }
+        *type = GL_UNSIGNED_INT_8_8_8_8_REV;
         break;
     default:
         return SDL_FALSE;
@@ -668,7 +355,6 @@
     GLint internalFormat;
     GLenum format, type;
     int texture_w, texture_h;
-    GLuint shader = 0;
     GLenum result;
 
     GL_ActivateRenderer(renderer);
@@ -679,21 +365,6 @@
                      SDL_GetPixelFormatName(texture->format));
         return -1;
     }
-    if (texture->format == SDL_PIXELFORMAT_UYVY &&
-        !renderdata->GL_APPLE_ycbcr_422_supported &&
-        !renderdata->GL_MESA_ycbcr_texture_supported &&
-        renderdata->GL_ARB_fragment_program_supported) {
-        if (renderdata->fragment_program_UYVY == 0) {
-            renderdata->fragment_program_UYVY =
-                compile_shader(renderdata, GL_FRAGMENT_PROGRAM_ARB,
-                               fragment_program_UYVY_source_code);
-            if (renderdata->fragment_program_UYVY == 0) {
-                set_shader_error(renderdata, "UYVY");
-                return -1;
-            }
-        }
-        shader = renderdata->fragment_program_UYVY;
-    }
 
     data = (GL_TextureData *) SDL_calloc(1, sizeof(*data));
     if (!data) {
@@ -701,10 +372,8 @@
         return -1;
     }
 
-    data->shader = shader;
-
     if (texture->access == SDL_TEXTUREACCESS_STREAMING) {
-        data->pitch = texture->w * bytes_per_pixel(texture->format);
+        data->pitch = texture->w * SDL_BYTESPERPIXEL(texture->format);
         data->pixels = SDL_malloc(texture->h * data->pitch);
         if (!data->pixels) {
             SDL_OutOfMemory();
@@ -731,17 +400,6 @@
         data->texh = (GLfloat) texture->h / texture_h;
     }
 
-    /* YUV formats use RGBA but are really two bytes per pixel */
-    if (internalFormat == GL_RGBA && bytes_per_pixel(texture->format) < 4) {
-        texture_w /= 2;
-        if (data->type == GL_TEXTURE_2D) {
-            data->texw *= 2.0f;
-        }
-        data->HACK_RYAN_FIXME = 2;
-    } else {
-        data->HACK_RYAN_FIXME = 1;
-    }
-
     data->format = format;
     data->formattype = type;
     renderdata->glEnable(data->type);
@@ -771,22 +429,13 @@
         renderdata->glTexParameteri(data->type, GL_TEXTURE_STORAGE_HINT_APPLE,
                                     GL_STORAGE_CACHED_APPLE);
     }
-/* This causes a crash in testoverlay for some reason.  Apple bug? */
-#if 0
     if (texture->access == SDL_TEXTUREACCESS_STREAMING
         && texture->format == SDL_PIXELFORMAT_ARGB8888) {
-        /*
-           if (renderdata->glTextureRangeAPPLE) {
-           renderdata->glTextureRangeAPPLE(data->type,
-           texture->h * data->pitch,
-           data->pixels);
-           }
-         */
         renderdata->glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
         renderdata->glTexImage2D(data->type, 0, internalFormat, texture_w,
                                  texture_h, 0, format, type, data->pixels);
-    } else
-#endif
+    }
+    else
 #endif
     {
         renderdata->glTexImage2D(data->type, 0, internalFormat, texture_w,
@@ -801,26 +450,13 @@
     return 0;
 }
 
-static int
-GL_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
-                      void **pixels, int *pitch)
-{
-    GL_TextureData *data = (GL_TextureData *) texture->driverdata;
-
-    *pixels = data->pixels;
-    *pitch = data->pitch;
-    return 0;
-}
-
 static void
 SetupTextureUpdate(GL_RenderData * renderdata, SDL_Texture * texture,
                    int pitch)
 {
     renderdata->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
     renderdata->glPixelStorei(GL_UNPACK_ROW_LENGTH,
-                              (pitch / bytes_per_pixel(texture->format)) /
-                              ((GL_TextureData *) texture->driverdata)->
-                              HACK_RYAN_FIXME);
+                              (pitch / SDL_BYTESPERPIXEL(texture->format)));
 }
 
 static int
@@ -851,18 +487,13 @@
 
 static int
 GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-               const SDL_Rect * rect, int markDirty, void **pixels,
-               int *pitch)
+               const SDL_Rect * rect, void **pixels, int *pitch)
 {
     GL_TextureData *data = (GL_TextureData *) texture->driverdata;
 
-    if (markDirty) {
-        SDL_AddDirtyRect(&data->dirty, rect);
-    }
-
     *pixels =
         (void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
-                  rect->x * bytes_per_pixel(texture->format));
+                  rect->x * SDL_BYTESPERPIXEL(texture->format));
     *pitch = data->pitch;
     return 0;
 }
@@ -870,18 +501,17 @@
 static void
 GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
 {
-}
+    GL_RenderData *renderdata = (GL_RenderData *) renderer->driverdata;
+    GL_TextureData *data = (GL_TextureData *) texture->driverdata;
+
+    GL_ActivateRenderer(renderer);
 
-static void
-GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, int numrects,
-                const SDL_Rect * rects)
-{
-    GL_TextureData *data = (GL_TextureData *) texture->driverdata;
-    int i;
-
-    for (i = 0; i < numrects; ++i) {
-        SDL_AddDirtyRect(&data->dirty, &rects[i]);
-    }
+    SetupTextureUpdate(renderdata, texture, data->pitch);
+    renderdata->glEnable(data->type);
+    renderdata->glBindTexture(data->type, data->texture);
+    renderdata->glTexSubImage2D(data->type, 0, 0, 0, texture->w, texture->h,
+                                data->format, data->formattype, data->pixels);
+    renderdata->glDisable(data->type);
 }
 
 static void
@@ -1056,28 +686,6 @@
 
     GL_ActivateRenderer(renderer);
 
-    if (texturedata->dirty.list) {
-        SDL_DirtyRect *dirty;
-        void *pixels;
-        int bpp = bytes_per_pixel(texture->format);
-        int pitch = texturedata->pitch;
-
-        SetupTextureUpdate(data, texture, pitch);
-        data->glEnable(texturedata->type);
-        data->glBindTexture(texturedata->type, texturedata->texture);
-        for (dirty = texturedata->dirty.list; dirty; dirty = dirty->next) {
-            SDL_Rect *rect = &dirty->rect;
-            pixels =
-                (void *) ((Uint8 *) texturedata->pixels + rect->y * pitch +
-                          rect->x * bpp);
-            data->glTexSubImage2D(texturedata->type, 0, rect->x, rect->y,
-                                  rect->w / texturedata->HACK_RYAN_FIXME,
-                                  rect->h, texturedata->format,
-                                  texturedata->formattype, pixels);
-        }
-        SDL_ClearDirtyRects(&texturedata->dirty);
-    }
-
     minx = dstrect->x;
     miny = dstrect->y;
     maxx = dstrect->x + dstrect->w;
@@ -1106,12 +714,6 @@
 
     GL_SetBlendMode(data, texture->blendMode);
 
-    /* Set up the shader for the copy, if any */
-    if (texturedata->shader) {
-        data->glEnable(GL_FRAGMENT_PROGRAM_ARB);
-        data->glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, texturedata->shader);
-    }
-
     data->glBegin(GL_TRIANGLE_STRIP);
     data->glTexCoord2f(minu, minv);
     data->glVertex2f((GLfloat) minx, (GLfloat) miny);
@@ -1123,10 +725,6 @@
     data->glVertex2f((GLfloat) maxx, (GLfloat) maxy);
     data->glEnd();
 
-    if (texturedata->shader) {
-        data->glDisable(GL_FRAGMENT_PROGRAM_ARB);
-    }
-
     data->glDisable(texturedata->type);
 
     return 0;
@@ -1155,13 +753,13 @@
 
     data->glPixelStorei(GL_PACK_ALIGNMENT, 1);
     data->glPixelStorei(GL_PACK_ROW_LENGTH,
-                        (pitch / bytes_per_pixel(pixel_format)));
+                        (pitch / SDL_BYTESPERPIXEL(pixel_format)));
 
     data->glReadPixels(rect->x, (h-rect->y)-rect->h, rect->w, rect->h,
                        format, type, pixels);
 
     /* Flip the rows to be top-down */
-    length = rect->w * bytes_per_pixel(pixel_format);
+    length = rect->w * SDL_BYTESPERPIXEL(pixel_format);
     src = (Uint8*)pixels + (rect->h-1)*pitch;
     dst = (Uint8*)pixels;
     tmp = SDL_stack_alloc(Uint8, length);
@@ -1201,7 +799,7 @@
 
     data->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
     data->glPixelStorei(GL_UNPACK_ROW_LENGTH,
-                        (pitch / bytes_per_pixel(pixel_format)));
+                        (pitch / SDL_BYTESPERPIXEL(pixel_format)));
 
     /* Flip the rows to be bottom-up */
     length = rect->h * rect->w * pitch;
@@ -1244,13 +842,9 @@
     if (data->texture) {
         renderdata->glDeleteTextures(1, &data->texture);
     }
-    if (data->palette) {
-        SDL_free(data->palette);
-    }
     if (data->pixels) {
         SDL_free(data->pixels);
     }
-    SDL_FreeDirtyRects(&data->dirty);
     SDL_free(data);
     texture->driverdata = NULL;
 }
@@ -1262,16 +856,6 @@
 
     if (data) {
         if (data->context) {
-            if (data->GL_ARB_fragment_program_supported) {
-                data->glDisable(GL_FRAGMENT_PROGRAM_ARB);
-                data->glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
-                if (data->fragment_program_UYVY &&
-                    data->fragment_program_UYVY != ~0) {
-                    data->glDeleteProgramsARB(1,
-                                              &data->fragment_program_UYVY);
-                }
-            }
-
             /* SDL_GL_MakeCurrent(0, NULL); *//* doesn't do anything */
             SDL_GL_DeleteContext(data->context);
         }
--- a/src/render/opengles/SDL_renderer_gles.c	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/opengles/SDL_renderer_gles.c	Thu Feb 03 00:19:40 2011 -0800
@@ -49,19 +49,13 @@
 static void GLES_WindowEvent(SDL_Renderer * renderer,
                              const SDL_WindowEvent *event);
 static int GLES_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int GLES_QueryTexturePixels(SDL_Renderer * renderer,
-                                   SDL_Texture * texture, void **pixels,
-                                   int *pitch);
 static int GLES_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                               const SDL_Rect * rect, const void *pixels,
                               int pitch);
 static int GLES_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                            const SDL_Rect * rect, int markDirty,
-                            void **pixels, int *pitch);
+                            const SDL_Rect * rect, void **pixels, int *pitch);
 static void GLES_UnlockTexture(SDL_Renderer * renderer,
                                SDL_Texture * texture);
-static void GLES_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                              int numrects, const SDL_Rect * rects);
 static int GLES_RenderDrawPoints(SDL_Renderer * renderer,
                                  const SDL_Point * points, int count);
 static int GLES_RenderDrawLines(SDL_Renderer * renderer,
@@ -82,15 +76,8 @@
     {
      "opengl_es",
      (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED),
-     6,
-     {
-      /* OpenGL ES 1.x supported formats list */
-      SDL_PIXELFORMAT_RGBA4444,
-      SDL_PIXELFORMAT_RGBA5551,
-      SDL_PIXELFORMAT_RGB565,
-      SDL_PIXELFORMAT_RGB24,
-      SDL_PIXELFORMAT_BGR888,
-      SDL_PIXELFORMAT_ABGR8888},
+     1,
+     {SDL_PIXELFORMAT_ABGR8888},
      0,
      0}
 };
@@ -125,7 +112,6 @@
     GLenum formattype;
     void *pixels;
     int pitch;
-    SDL_DirtyRectList dirty;
 } GLES_TextureData;
 
 static void
@@ -205,11 +191,9 @@
 
     renderer->WindowEvent = GLES_WindowEvent;
     renderer->CreateTexture = GLES_CreateTexture;
-    renderer->QueryTexturePixels = GLES_QueryTexturePixels;
     renderer->UpdateTexture = GLES_UpdateTexture;
     renderer->LockTexture = GLES_LockTexture;
     renderer->UnlockTexture = GLES_UnlockTexture;
-    renderer->DirtyTexture = GLES_DirtyTexture;
     renderer->RenderDrawPoints = GLES_RenderDrawPoints;
     renderer->RenderDrawLines = GLES_RenderDrawLines;
     renderer->RenderFillRects = GLES_RenderFillRects;
@@ -343,32 +327,11 @@
     GLES_ActivateRenderer(renderer);
 
     switch (texture->format) {
-    case SDL_PIXELFORMAT_RGB24:
-        internalFormat = GL_RGB;
-        format = GL_RGB;
-        type = GL_UNSIGNED_BYTE;
-        break;
-    case SDL_PIXELFORMAT_BGR888:
     case SDL_PIXELFORMAT_ABGR8888:
         internalFormat = GL_RGBA;
         format = GL_RGBA;
         type = GL_UNSIGNED_BYTE;
         break;
-    case SDL_PIXELFORMAT_RGB565:
-        internalFormat = GL_RGB;
-        format = GL_RGB;
-        type = GL_UNSIGNED_SHORT_5_6_5;
-        break;
-    case SDL_PIXELFORMAT_RGBA5551:
-        internalFormat = GL_RGBA;
-        format = GL_RGBA;
-        type = GL_UNSIGNED_SHORT_5_5_5_1;
-        break;
-    case SDL_PIXELFORMAT_RGBA4444:
-        internalFormat = GL_RGBA;
-        format = GL_RGBA;
-        type = GL_UNSIGNED_SHORT_4_4_4_4;
-        break;
     default:
         SDL_SetError("Texture format %s not supported by OpenGL ES",
                      SDL_GetPixelFormatName(texture->format));
@@ -428,23 +391,10 @@
     return 0;
 }
 
-static int
-GLES_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
-                        void **pixels, int *pitch)
-{
-    GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
-
-    *pixels = data->pixels;
-    *pitch = data->pitch;
-    return 0;
-}
-
 static void
 SetupTextureUpdate(GLES_RenderData * renderdata, SDL_Texture * texture,
                    int pitch)
 {
-    GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
-    renderdata->glBindTexture(data->type, data->texture);
     renderdata->glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
 }
 
@@ -463,8 +413,9 @@
     GLES_ActivateRenderer(renderer);
 
     renderdata->glGetError();
+    SetupTextureUpdate(renderdata, texture, pitch);
     renderdata->glEnable(data->type);
-    SetupTextureUpdate(renderdata, texture, pitch);
+    renderdata->glBindTexture(data->type, data->texture);
 
     if( rect->w * bpp == pitch ) {
          temp_buffer = (void *)pixels; /* No need to reformat */
@@ -498,15 +449,10 @@
 
 static int
 GLES_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                 const SDL_Rect * rect, int markDirty, void **pixels,
-                 int *pitch)
+                 const SDL_Rect * rect, void **pixels, int *pitch)
 {
     GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
 
-    if (markDirty) {
-        SDL_AddDirtyRect(&data->dirty, rect);
-    }
-
     *pixels =
         (void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
                   rect->x * SDL_BYTESPERPIXEL(texture->format));
@@ -517,18 +463,18 @@
 static void
 GLES_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
 {
-}
+    GLES_RenderData *renderdata = (GLES_RenderData *) renderer->driverdata;
+    GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
+
+    GLES_ActivateRenderer(renderer);
 
-static void
-GLES_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                  int numrects, const SDL_Rect * rects)
-{
-    GLES_TextureData *data = (GLES_TextureData *) texture->driverdata;
-    int i;
-
-    for (i = 0; i < numrects; ++i) {
-        SDL_AddDirtyRect(&data->dirty, &rects[i]);
-    }
+    SetupTextureUpdate(renderdata, texture, data->pitch);
+    renderdata->glEnable(data->type);
+    renderdata->glBindTexture(data->type, data->texture);
+    renderdata->glTexSubImage2D(data->type, 0, 0, 0, texture->w,
+                                texture->h, data->format, data->formattype,
+                                data->pixels);
+    renderdata->glDisable(data->type);
 }
 
 static void
@@ -676,49 +622,6 @@
 
     data->glEnable(GL_TEXTURE_2D);
 
-    if (texturedata->dirty.list) {
-        SDL_DirtyRect *dirty;
-        void *pixels;
-        int bpp = SDL_BYTESPERPIXEL(texture->format);
-        int pitch = texturedata->pitch;
-
-        SetupTextureUpdate(data, texture, pitch);
-
-        data->glBindTexture(texturedata->type, texturedata->texture);
-        for (dirty = texturedata->dirty.list; dirty; dirty = dirty->next) {
-            SDL_Rect *rect = &dirty->rect;
-            pixels =
-                (void *) ((Uint8 *) texturedata->pixels + rect->y * pitch +
-                          rect->x * bpp);
-            /*      There is no GL_UNPACK_ROW_LENGTH in OpenGLES 
-               we must do this reformatting ourselves(!)
-
-               maybe it'd be a good idea to keep a temp buffer around
-               for this purpose rather than allocating it each time
-             */
-            if( rect->x == 0 && rect->w * bpp == pitch ) {
-                temp_buffer = pixels; /* Updating whole texture, no need to reformat */
-            } else {
-                temp_buffer = SDL_malloc(rect->w * rect->h * bpp);
-                temp_ptr = temp_buffer;
-                for (i = 0; i < rect->h; i++) {
-                    SDL_memcpy(temp_ptr, pixels, rect->w * bpp);
-                    temp_ptr += rect->w * bpp;
-                    pixels += pitch;
-                }
-            }
-
-            data->glTexSubImage2D(texturedata->type, 0, rect->x, rect->y,
-                                  rect->w, rect->h, texturedata->format,
-                                  texturedata->formattype, temp_buffer);
-
-            if( temp_buffer != pixels ) {
-                SDL_free(temp_buffer);
-            }
-        }
-        SDL_ClearDirtyRects(&texturedata->dirty);
-    }
-
     data->glBindTexture(texturedata->type, texturedata->texture);
 
     if (texture->modMode) {
@@ -818,7 +721,6 @@
     if (data->pixels) {
         SDL_free(data->pixels);
     }
-    SDL_FreeDirtyRects(&data->dirty);
     SDL_free(data);
     texture->driverdata = NULL;
 }
--- a/src/render/software/SDL_renderer_sw.c	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/render/software/SDL_renderer_sw.c	Thu Feb 03 00:19:40 2011 -0800
@@ -23,7 +23,6 @@
 
 #include "../SDL_sysrender.h"
 #include "../../video/SDL_pixels_c.h"
-#include "../../video/SDL_yuv_sw_c.h"
 
 
 /* SDL surface based renderer implementation */
@@ -32,9 +31,6 @@
 static void SW_WindowEvent(SDL_Renderer * renderer,
                            const SDL_WindowEvent *event);
 static int SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int SW_QueryTexturePixels(SDL_Renderer * renderer,
-                                 SDL_Texture * texture, void **pixels,
-                                 int *pitch);
 static int SW_SetTextureColorMod(SDL_Renderer * renderer,
                                  SDL_Texture * texture);
 static int SW_SetTextureAlphaMod(SDL_Renderer * renderer,
@@ -45,8 +41,7 @@
                             const SDL_Rect * rect, const void *pixels,
                             int pitch);
 static int SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                          const SDL_Rect * rect, int markDirty, void **pixels,
-                          int *pitch);
+                          const SDL_Rect * rect, void **pixels, int *pitch);
 static void SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
 static int SW_RenderDrawPoints(SDL_Renderer * renderer,
                                const SDL_Point * points, int count);
@@ -70,7 +65,7 @@
     {
      "software",
      (SDL_RENDERER_PRESENTVSYNC),
-     13,
+     8,
      {
       SDL_PIXELFORMAT_RGB555,
       SDL_PIXELFORMAT_RGB565,
@@ -79,12 +74,8 @@
       SDL_PIXELFORMAT_ARGB8888,
       SDL_PIXELFORMAT_RGBA8888,
       SDL_PIXELFORMAT_ABGR8888,
-      SDL_PIXELFORMAT_BGRA8888,
-      SDL_PIXELFORMAT_YV12,
-      SDL_PIXELFORMAT_IYUV,
-      SDL_PIXELFORMAT_YUY2,
-      SDL_PIXELFORMAT_UYVY,
-      SDL_PIXELFORMAT_YVYU},
+      SDL_PIXELFORMAT_BGRA8888
+     },
      0,
      0}
 };
@@ -96,7 +87,6 @@
     SDL_Texture *texture;
     SDL_Surface surface;
     SDL_Renderer *renderer;
-    SDL_DirtyRectList dirty;
 } SW_RenderData;
 
 static SDL_Texture *
@@ -136,6 +126,7 @@
     SDL_Renderer *renderer;
     SW_RenderData *data;
     int i;
+    int w, h;
     Uint32 format;
     int bpp;
     Uint32 Rmask, Gmask, Bmask, Amask;
@@ -163,7 +154,6 @@
     }
     renderer->WindowEvent = SW_WindowEvent;
     renderer->CreateTexture = SW_CreateTexture;
-    renderer->QueryTexturePixels = SW_QueryTexturePixels;
     renderer->SetTextureColorMod = SW_SetTextureColorMod;
     renderer->SetTextureAlphaMod = SW_SetTextureAlphaMod;
     renderer->SetTextureBlendMode = SW_SetTextureBlendMode;
@@ -217,8 +207,8 @@
     }
 
     /* Create the textures we'll use for display */
-    data->texture =
-        CreateTexture(data->renderer, data->format, window->w, window->h);
+    SDL_GetWindowSize(window, &w, &h);
+    data->texture = CreateTexture(data->renderer, data->format, w, h);
     if (!data->texture) {
         SW_DestroyRenderer(renderer);
         return NULL;
@@ -243,11 +233,12 @@
 
     if (data->updateSize) {
         /* Recreate the textures for the new window size */
+        int w, h;
         if (data->texture) {
             DestroyTexture(data->renderer, data->texture);
         }
-        data->texture = CreateTexture(data->renderer, data->format,
-                                      window->w, window->h);
+        SDL_GetWindowSize(window, &w, &h);
+        data->texture = CreateTexture(data->renderer, data->format, w, h);
         if (data->texture) {
             data->updateSize = SDL_FALSE;
         }
@@ -268,30 +259,25 @@
 static int
 SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
 {
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        texture->driverdata =
-            SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
-    } else {
-        int bpp;
-        Uint32 Rmask, Gmask, Bmask, Amask;
+    int bpp;
+    Uint32 Rmask, Gmask, Bmask, Amask;
+
+    if (!SDL_PixelFormatEnumToMasks
+        (texture->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
+        SDL_SetError("Unknown texture format");
+        return -1;
+    }
 
-        if (!SDL_PixelFormatEnumToMasks
-            (texture->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
-            SDL_SetError("Unknown texture format");
-            return -1;
-        }
+    texture->driverdata =
+        SDL_CreateRGBSurface(0, texture->w, texture->h, bpp, Rmask, Gmask,
+                             Bmask, Amask);
+    SDL_SetSurfaceColorMod(texture->driverdata, texture->r, texture->g,
+                           texture->b);
+    SDL_SetSurfaceAlphaMod(texture->driverdata, texture->a);
+    SDL_SetSurfaceBlendMode(texture->driverdata, texture->blendMode);
 
-        texture->driverdata =
-            SDL_CreateRGBSurface(0, texture->w, texture->h, bpp, Rmask, Gmask,
-                                 Bmask, Amask);
-        SDL_SetSurfaceColorMod(texture->driverdata, texture->r, texture->g,
-                               texture->b);
-        SDL_SetSurfaceAlphaMod(texture->driverdata, texture->a);
-        SDL_SetSurfaceBlendMode(texture->driverdata, texture->blendMode);
-
-        if (texture->access == SDL_TEXTUREACCESS_STATIC) {
-            SDL_SetSurfaceRLE(texture->driverdata, 1);
-        }
+    if (texture->access == SDL_TEXTUREACCESS_STATIC) {
+        SDL_SetSurfaceRLE(texture->driverdata, 1);
     }
 
     if (!texture->driverdata) {
@@ -301,23 +287,6 @@
 }
 
 static int
-SW_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
-                      void **pixels, int *pitch)
-{
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        return SDL_SW_QueryYUVTexturePixels((SDL_SW_YUVTexture *)
-                                            texture->driverdata, pixels,
-                                            pitch);
-    } else {
-        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
-
-        *pixels = surface->pixels;
-        *pitch = surface->pitch;
-        return 0;
-    }
-}
-
-static int
 SW_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture)
 {
     SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
@@ -343,56 +312,40 @@
 SW_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                  const SDL_Rect * rect, const void *pixels, int pitch)
 {
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        return SDL_SW_UpdateYUVTexture((SDL_SW_YUVTexture *)
-                                       texture->driverdata, rect, pixels,
-                                       pitch);
-    } else {
-        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
-        Uint8 *src, *dst;
-        int row;
-        size_t length;
+    SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
+    Uint8 *src, *dst;
+    int row;
+    size_t length;
 
-        src = (Uint8 *) pixels;
-        dst =
-            (Uint8 *) surface->pixels + rect->y * surface->pitch +
-            rect->x * surface->format->BytesPerPixel;
-        length = rect->w * surface->format->BytesPerPixel;
-        for (row = 0; row < rect->h; ++row) {
-            SDL_memcpy(dst, src, length);
-            src += pitch;
-            dst += surface->pitch;
-        }
-        return 0;
+    src = (Uint8 *) pixels;
+    dst = (Uint8 *) surface->pixels +
+                        rect->y * surface->pitch +
+                        rect->x * surface->format->BytesPerPixel;
+    length = rect->w * surface->format->BytesPerPixel;
+    for (row = 0; row < rect->h; ++row) {
+        SDL_memcpy(dst, src, length);
+        src += pitch;
+        dst += surface->pitch;
     }
+    return 0;
 }
 
 static int
 SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-               const SDL_Rect * rect, int markDirty, void **pixels,
-               int *pitch)
+               const SDL_Rect * rect, void **pixels, int *pitch)
 {
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        return SDL_SW_LockYUVTexture((SDL_SW_YUVTexture *)
-                                     texture->driverdata, rect, markDirty,
-                                     pixels, pitch);
-    } else {
-        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
+    SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
 
-        *pixels =
-            (void *) ((Uint8 *) surface->pixels + rect->y * surface->pitch +
-                      rect->x * surface->format->BytesPerPixel);
-        *pitch = surface->pitch;
-        return 0;
-    }
+    *pixels =
+        (void *) ((Uint8 *) surface->pixels + rect->y * surface->pitch +
+                  rect->x * surface->format->BytesPerPixel);
+    *pitch = surface->pitch;
+    return 0;
 }
 
 static void
 SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
 {
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        SDL_SW_UnlockYUVTexture((SDL_SW_YUVTexture *) texture->driverdata);
-    }
 }
 
 static int
@@ -420,7 +373,7 @@
         return 0;
     }
 
-    if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
+    if (data->renderer->LockTexture(data->renderer, texture, &rect,
                                     &data->surface.pixels,
                                     &data->surface.pitch) < 0) {
         return -1;
@@ -484,7 +437,7 @@
         return 0;
     }
 
-    if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
+    if (data->renderer->LockTexture(data->renderer, texture, &rect,
                                     &data->surface.pixels,
                                     &data->surface.pitch) < 0) {
         return -1;
@@ -558,7 +511,7 @@
             continue;
         }
 
-        if (data->renderer->LockTexture(data->renderer, texture, &rect, 1,
+        if (data->renderer->LockTexture(data->renderer, texture, &rect,
                                         &data->surface.pixels,
                                         &data->surface.pitch) < 0) {
             return -1;
@@ -586,38 +539,31 @@
               const SDL_Rect * srcrect, const SDL_Rect * dstrect)
 {
     SW_RenderData *data = (SW_RenderData *) renderer->driverdata;
+    SDL_Surface *surface;
+    SDL_Rect real_srcrect;
+    SDL_Rect real_dstrect;
     int status;
 
     if (!SW_ActivateRenderer(renderer)) {
         return -1;
     }
 
-    if (data->renderer->LockTexture(data->renderer, data->texture,
-                                    dstrect, 1, &data->surface.pixels,
+    if (data->renderer->LockTexture(data->renderer, data->texture, dstrect,
+                                    &data->surface.pixels,
                                     &data->surface.pitch) < 0) {
         return -1;
     }
 
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        status =
-            SDL_SW_CopyYUVToRGB((SDL_SW_YUVTexture *) texture->driverdata,
-                                srcrect, data->format, dstrect->w, dstrect->h,
-                                data->surface.pixels, data->surface.pitch);
-    } else {
-        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
-        SDL_Rect real_srcrect = *srcrect;
-        SDL_Rect real_dstrect;
+    surface = (SDL_Surface *) texture->driverdata;
+    real_srcrect = *srcrect;
 
-        data->surface.w = dstrect->w;
-        data->surface.h = dstrect->h;
-        data->surface.clip_rect.w = dstrect->w;
-        data->surface.clip_rect.h = dstrect->h;
-        real_dstrect = data->surface.clip_rect;
+    data->surface.w = dstrect->w;
+    data->surface.h = dstrect->h;
+    data->surface.clip_rect.w = dstrect->w;
+    data->surface.clip_rect.h = dstrect->h;
+    real_dstrect = data->surface.clip_rect;
 
-        status =
-            SDL_LowerBlit(surface, &real_srcrect, &data->surface,
-                          &real_dstrect);
-    }
+    status = SDL_LowerBlit(surface, &real_srcrect, &data->surface, &real_dstrect);
     data->renderer->UnlockTexture(data->renderer, data->texture);
     return status;
 }
@@ -632,8 +578,8 @@
         return -1;
     }
 
-    if (data->renderer->LockTexture(data->renderer, data->texture,
-                                    rect, 0, &data->surface.pixels,
+    if (data->renderer->LockTexture(data->renderer, data->texture, rect,
+                                    &data->surface.pixels,
                                     &data->surface.pitch) < 0) {
         return -1;
     }
@@ -656,8 +602,8 @@
         return -1;
     }
 
-    if (data->renderer->LockTexture(data->renderer, data->texture,
-                                    rect, 1, &data->surface.pixels,
+    if (data->renderer->LockTexture(data->renderer, data->texture, rect,
+                                    &data->surface.pixels,
                                     &data->surface.pitch) < 0) {
         return -1;
     }
@@ -692,13 +638,9 @@
 static void
 SW_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
 {
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        SDL_SW_DestroyYUVTexture((SDL_SW_YUVTexture *) texture->driverdata);
-    } else {
-        SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
+    SDL_Surface *surface = (SDL_Surface *) texture->driverdata;
 
-        SDL_FreeSurface(surface);
-    }
+    SDL_FreeSurface(surface);
 }
 
 static void
@@ -717,7 +659,6 @@
         if (data->renderer) {
             data->renderer->DestroyRenderer(data->renderer);
         }
-        SDL_FreeDirtyRects(&data->dirty);
         SDL_free(data);
     }
     SDL_free(renderer);
--- a/src/video/SDL_leaks.h	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/video/SDL_leaks.h	Thu Feb 03 00:19:40 2011 -0800
@@ -29,4 +29,5 @@
 #ifdef CHECK_LEAKS
 extern int surfaces_allocated;
 #endif
+
 /* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_rect.c	Wed Feb 02 22:55:12 2011 -0800
+++ b/src/video/SDL_rect.c	Thu Feb 03 00:19:40 2011 -0800
@@ -339,66 +339,4 @@
     return SDL_TRUE;
 }
 
-void
-SDL_AddDirtyRect(SDL_DirtyRectList * list, const SDL_Rect * rect)
-{
-    SDL_DirtyRect *dirty;
-
-    /* FIXME: At what point is this optimization too expensive? */
-    for (dirty = list->list; dirty; dirty = dirty->next) {
-        if (SDL_HasIntersection(&dirty->rect, rect)) {
-            SDL_UnionRect(&dirty->rect, rect, &dirty->rect);
-            return;
-        }
-    }
-
-    if (list->free) {
-        dirty = list->free;
-        list->free = dirty->next;
-    } else {
-        dirty = (SDL_DirtyRect *) SDL_malloc(sizeof(*dirty));
-        if (!dirty) {
-            return;
-        }
-    }
-    dirty->rect = *rect;
-    dirty->next = list->list;
-    list->list = dirty;
-}
-
-void
-SDL_ClearDirtyRects(SDL_DirtyRectList * list)
-{
-    SDL_DirtyRect *prev, *curr;
-
-    /* Skip to the end of the free list */
-    prev = NULL;
-    for (curr = list->free; curr; curr = curr->next) {
-        prev = curr;
-    }
-
-    /* Add the list entries to the end */
-    if (prev) {
-        prev->next = list->list;
-    } else {
-        list->free = list->list;
-    }
-    list->list = NULL;
-}
-
-void
-SDL_FreeDirtyRects(SDL_DirtyRectList * list)
-{
-    while (list->list) {
-        SDL_DirtyRect *elem = list->list;
-        list->list = elem->next;
-        SDL_free(elem);
-    }
-    while (list->free) {
-        SDL_DirtyRect *elem = list->free;
-        list->free = elem->next;
-        SDL_free(elem);
-    }
-}
-
 /* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_mmx.c	Wed Feb 02 22:55:12 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,432 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
-
-#include "SDL_stdinc.h"
-
-#include "mmx.h"
-
-/* *INDENT-OFF* */
-
-static mmx_t MMX_0080w    = { .ud = {0x00800080, 0x00800080} };
-static mmx_t MMX_00FFw    = { .ud = {0x00ff00ff, 0x00ff00ff} };
-static mmx_t MMX_FF00w    = { .ud = {0xff00ff00, 0xff00ff00} };
-
-static mmx_t MMX_Ycoeff   = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
-
-static mmx_t MMX_UbluRGB  = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
-static mmx_t MMX_VredRGB  = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
-static mmx_t MMX_UgrnRGB  = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
-static mmx_t MMX_VgrnRGB  = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
-
-static mmx_t MMX_Ublu5x5  = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
-static mmx_t MMX_Vred5x5  = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
-static mmx_t MMX_Ugrn565  = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
-static mmx_t MMX_Vgrn565  = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
-
-static mmx_t MMX_red565   = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
-static mmx_t MMX_grn565   = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
-
-/**
-   This MMX assembler is my first assembler/MMX program ever.
-   Thus it maybe buggy.
-   Send patches to:
-   mvogt@rhrk.uni-kl.de
-
-   After it worked fine I have "obfuscated" the code a bit to have
-   more parallism in the MMX units. This means I moved
-   initilisation around and delayed other instruction.
-   Performance measurement did not show that this brought any advantage
-   but in theory it _should_ be faster this way.
-
-   The overall performanve gain to the C based dither was 30%-40%.
-   The MMX routine calculates 256bit=8RGB values in each cycle
-   (4 for row1 & 4 for row2)
-
-   The red/green/blue.. coefficents are taken from the mpeg_play 
-   player. They look nice, but I dont know if you can have
-   better values, to avoid integer rounding errors.
-   
-
-   IMPORTANT:
-   ==========
-
-   It is a requirement that the cr/cb/lum are 8 byte aligned and
-   the out are 16byte aligned or you will/may get segfaults
-
-*/
-
-void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
-                              unsigned char *lum, unsigned char *cr,
-                              unsigned char *cb, unsigned char *out,
-                              int rows, int cols, int mod )
-{
-    Uint32 *row1;
-    Uint32 *row2;
-
-    unsigned char* y = lum +cols*rows;    // Pointer to the end
-    int x = 0;
-    row1 = (Uint32 *)out;                 // 32 bit target
-    row2 = (Uint32 *)out+cols+mod;        // start of second row
-    mod = (mod+cols+mod)*4;               // increment for row1 in byte
-
-    __asm__ __volatile__ (
-        // tap dance to workaround the inability to use %%ebx at will...
-        //  move one thing to the stack...
-        "pushl $0\n"  // save a slot on the stack.
-        "pushl %%ebx\n"  // save %%ebx.
-        "movl %0, %%ebx\n"  // put the thing in ebx.
-        "movl %%ebx,4(%%esp)\n"  // put the thing in the stack slot.
-        "popl %%ebx\n"  // get back %%ebx (the PIC register).
-
-        ".align 8\n"
-        "1:\n"
-
-        // create Cr (result in mm1)
-        "pushl %%ebx\n"
-        "movl 4(%%esp),%%ebx\n"
-        "movd (%%ebx),%%mm1\n"   //         0  0  0  0  v3 v2 v1 v0
-        "popl %%ebx\n"
-        "pxor %%mm7,%%mm7\n"      //         00 00 00 00 00 00 00 00
-        "movd (%2), %%mm2\n"           //    0  0  0  0 l3 l2 l1 l0
-        "punpcklbw %%mm7,%%mm1\n" //         0  v3 0  v2 00 v1 00 v0
-        "punpckldq %%mm1,%%mm1\n" //         00 v1 00 v0 00 v1 00 v0
-        "psubw %9,%%mm1\n"        // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
-
-        // create Cr_g (result in mm0)
-        "movq %%mm1,%%mm0\n"           // r1 r1 r0 r0 r1 r1 r0 r0
-        "pmullw %10,%%mm0\n"           // red*-46dec=0.7136*64
-        "pmullw %11,%%mm1\n"           // red*89dec=1.4013*64
-        "psraw  $6, %%mm0\n"           // red=red/64
-        "psraw  $6, %%mm1\n"           // red=red/64
-
-        // create L1 L2 (result in mm2,mm4)
-        // L2=lum+cols
-        "movq (%2,%4),%%mm3\n"         //    0  0  0  0 L3 L2 L1 L0
-        "punpckldq %%mm3,%%mm2\n"      //   L3 L2 L1 L0 l3 l2 l1 l0
-        "movq %%mm2,%%mm4\n"           //   L3 L2 L1 L0 l3 l2 l1 l0
-        "pand %12,%%mm2\n"             //   L3 0  L1  0 l3  0 l1  0
-        "pand %13,%%mm4\n"             //   0  L2  0 L0  0 l2  0 l0
-        "psrlw $8,%%mm2\n"             //   0  L3  0 L1  0 l3  0 l1
-
-        // create R (result in mm6)
-        "movq %%mm2,%%mm5\n"           //   0 L3  0 L1  0 l3  0 l1
-        "movq %%mm4,%%mm6\n"           //   0 L2  0 L0  0 l2  0 l0
-        "paddsw  %%mm1, %%mm5\n"       // lum1+red:x R3 x R1 x r3 x r1
-        "paddsw  %%mm1, %%mm6\n"       // lum1+red:x R2 x R0 x r2 x r0
-        "packuswb %%mm5,%%mm5\n"       //  R3 R1 r3 r1 R3 R1 r3 r1
-        "packuswb %%mm6,%%mm6\n"       //  R2 R0 r2 r0 R2 R0 r2 r0
-        "pxor %%mm7,%%mm7\n"      //         00 00 00 00 00 00 00 00
-        "punpcklbw %%mm5,%%mm6\n"      //  R3 R2 R1 R0 r3 r2 r1 r0
-
-        // create Cb (result in mm1)
-        "movd (%1), %%mm1\n"      //         0  0  0  0  u3 u2 u1 u0
-        "punpcklbw %%mm7,%%mm1\n" //         0  u3 0  u2 00 u1 00 u0
-        "punpckldq %%mm1,%%mm1\n" //         00 u1 00 u0 00 u1 00 u0
-        "psubw %9,%%mm1\n"        // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
-
-        // create Cb_g (result in mm5)
-        "movq %%mm1,%%mm5\n"            // u1 u1 u0 u0 u1 u1 u0 u0
-        "pmullw %14,%%mm5\n"            // blue*-109dec=1.7129*64
-        "pmullw %15,%%mm1\n"            // blue*114dec=1.78125*64
-        "psraw  $6, %%mm5\n"            // blue=red/64
-        "psraw  $6, %%mm1\n"            // blue=blue/64
-
-        // create G (result in mm7)
-        "movq %%mm2,%%mm3\n"      //   0  L3  0 L1  0 l3  0 l1
-        "movq %%mm4,%%mm7\n"      //   0  L2  0 L0  0 l2  0 l1
-        "paddsw  %%mm5, %%mm3\n"  // lum1+Cb_g:x G3t x G1t x g3t x g1t
-        "paddsw  %%mm5, %%mm7\n"  // lum1+Cb_g:x G2t x G0t x g2t x g0t
-        "paddsw  %%mm0, %%mm3\n"  // lum1+Cr_g:x G3  x G1  x g3  x g1
-        "paddsw  %%mm0, %%mm7\n"  // lum1+blue:x G2  x G0  x g2  x g0
-        "packuswb %%mm3,%%mm3\n"  // G3 G1 g3 g1 G3 G1 g3 g1
-        "packuswb %%mm7,%%mm7\n"  // G2 G0 g2 g0 G2 G0 g2 g0
-        "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0
-
-        // create B (result in mm5)
-        "movq %%mm2,%%mm3\n"         //   0  L3  0 L1  0 l3  0 l1
-        "movq %%mm4,%%mm5\n"         //   0  L2  0 L0  0 l2  0 l1
-        "paddsw  %%mm1, %%mm3\n"     // lum1+blue:x B3 x B1 x b3 x b1
-        "paddsw  %%mm1, %%mm5\n"     // lum1+blue:x B2 x B0 x b2 x b0
-        "packuswb %%mm3,%%mm3\n"     // B3 B1 b3 b1 B3 B1 b3 b1
-        "packuswb %%mm5,%%mm5\n"     // B2 B0 b2 b0 B2 B0 b2 b0
-        "punpcklbw %%mm3,%%mm5\n"    // B3 B2 B1 B0 b3 b2 b1 b0
-
-        // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
-
-        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
-        "pxor %%mm4,%%mm4\n"           //  0  0  0  0  0  0  0  0
-        "movq %%mm6,%%mm1\n"           // R3 R2 R1 R0 r3 r2 r1 r0
-        "movq %%mm5,%%mm3\n"           // B3 B2 B1 B0 b3 b2 b1 b0
-
-        // process lower lum
-        "punpcklbw %%mm4,%%mm1\n"      //  0 r3  0 r2  0 r1  0 r0
-        "punpcklbw %%mm4,%%mm3\n"      //  0 b3  0 b2  0 b1  0 b0
-        "movq %%mm1,%%mm2\n"           //  0 r3  0 r2  0 r1  0 r0
-        "movq %%mm3,%%mm0\n"           //  0 b3  0 b2  0 b1  0 b0
-        "punpcklwd %%mm1,%%mm3\n"      //  0 r1  0 b1  0 r0  0 b0
-        "punpckhwd %%mm2,%%mm0\n"      //  0 r3  0 b3  0 r2  0 b2
-
-        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
-        "movq %%mm7,%%mm1\n"           // G3 G2 G1 G0 g3 g2 g1 g0
-        "punpcklbw %%mm1,%%mm2\n"      // g3  0 g2  0 g1  0 g0  0
-        "punpcklwd %%mm4,%%mm2\n"      //  0  0 g1  0  0  0 g0  0
-        "por %%mm3, %%mm2\n"          //  0 r1 g1 b1  0 r0 g0 b0
-        "movq %%mm2,(%3)\n"          // wrote out ! row1
-
-        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
-        "punpcklbw %%mm1,%%mm4\n"      // g3  0 g2  0 g1  0 g0  0
-        "punpckhwd %%mm2,%%mm4\n"      //  0  0 g3  0  0  0 g2  0
-        "por %%mm0, %%mm4\n"          //  0 r3 g3 b3  0 r2 g2 b2
-        "movq %%mm4,8(%3)\n"         // wrote out ! row1
-
-        // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
-        // this can be done "destructive"
-        "pxor %%mm2,%%mm2\n"           //  0  0  0  0  0  0  0  0
-        "punpckhbw %%mm2,%%mm6\n"      //  0 R3  0 R2  0 R1  0 R0
-        "punpckhbw %%mm1,%%mm5\n"      // G3 B3 G2 B2 G1 B1 G0 B0
-        "movq %%mm5,%%mm1\n"           // G3 B3 G2 B2 G1 B1 G0 B0
-        "punpcklwd %%mm6,%%mm1\n"      //  0 R1 G1 B1  0 R0 G0 B0
-        "movq %%mm1,(%5)\n"          // wrote out ! row2
-        "punpckhwd %%mm6,%%mm5\n"      //  0 R3 G3 B3  0 R2 G2 B2
-        "movq %%mm5,8(%5)\n"         // wrote out ! row2
-
-        "addl $4,%2\n"            // lum+4
-        "leal 16(%3),%3\n"        // row1+16
-        "leal 16(%5),%5\n"        // row2+16
-        "addl $2,(%%esp)\n"        // cr+2
-        "addl $2,%1\n"           // cb+2
-
-        "addl $4,%6\n"            // x+4
-        "cmpl %4,%6\n"
-
-        "jl 1b\n"
-        "addl %4,%2\n" // lum += cols
-        "addl %8,%3\n" // row1+= mod
-        "addl %8,%5\n" // row2+= mod
-        "movl $0,%6\n" // x=0
-        "cmpl %7,%2\n"
-        "jl 1b\n"
-
-        "addl $4,%%esp\n"  // get rid of the stack slot we reserved.
-        "emms\n"  // reset MMX registers.
-        :
-        : "m" (cr), "r"(cb),"r"(lum),
-          "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
-          "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
-          "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
-          "m"(MMX_UbluRGB)
-    );
-}
-
-void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
-                             unsigned char *lum, unsigned char *cr,
-                             unsigned char *cb, unsigned char *out,
-                             int rows, int cols, int mod )
-{
-    Uint16 *row1;
-    Uint16 *row2;
-
-    unsigned char* y = lum +cols*rows;    /* Pointer to the end */
-    int x = 0;
-    row1 = (Uint16 *)out;                 /* 16 bit target */
-    row2 = (Uint16 *)out+cols+mod;        /* start of second row  */
-    mod = (mod+cols+mod)*2;               /* increment for row1 in byte */
-
-    __asm__ __volatile__(
-        // tap dance to workaround the inability to use %%ebx at will...
-        //  move one thing to the stack...
-        "pushl $0\n"  // save a slot on the stack.
-        "pushl %%ebx\n"  // save %%ebx.
-        "movl %0, %%ebx\n"  // put the thing in ebx.
-        "movl %%ebx, 4(%%esp)\n"  // put the thing in the stack slot.
-        "popl %%ebx\n"  // get back %%ebx (the PIC register).
-
-        ".align 8\n"
-        "1:\n"
-
-        "movd           (%1),                   %%mm0\n" // 4 Cb         0  0  0  0 u3 u2 u1 u0
-        "pxor           %%mm7,                  %%mm7\n"
-        "pushl %%ebx\n"
-        "movl 4(%%esp), %%ebx\n"
-        "movd (%%ebx), %%mm1\n"   // 4 Cr                0  0  0  0 v3 v2 v1 v0
-        "popl %%ebx\n"
-
-        "punpcklbw      %%mm7,                  %%mm0\n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
-        "punpcklbw      %%mm7,                  %%mm1\n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
-        "psubw          %9,                     %%mm0\n"
-        "psubw          %9,                     %%mm1\n"
-        "movq           %%mm0,                  %%mm2\n" // Cb                   0 u3  0 u2  0 u1  0 u0
-        "movq           %%mm1,                  %%mm3\n" // Cr
-        "pmullw         %10,                    %%mm2\n" // Cb2green 0 R3  0 R2  0 R1  0 R0
-        "movq           (%2),                   %%mm6\n" // L1      l7 L6 L5 L4 L3 L2 L1 L0
-        "pmullw         %11,                    %%mm0\n" // Cb2blue
-        "pand           %12,                    %%mm6\n" // L1      00 L6 00 L4 00 L2 00 L0
-        "pmullw         %13,                    %%mm3\n" // Cr2green
-        "movq           (%2),                   %%mm7\n" // L2
-        "pmullw         %14,                    %%mm1\n" // Cr2red
-        "psrlw          $8,                     %%mm7\n"        // L2           00 L7 00 L5 00 L3 00 L1
-        "pmullw         %15,                    %%mm6\n" // lum1
-        "paddw          %%mm3,                  %%mm2\n" // Cb2green + Cr2green == green
-        "pmullw         %15,                    %%mm7\n" // lum2
-
-        "movq           %%mm6,                  %%mm4\n" // lum1
-        "paddw          %%mm0,                  %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
-        "movq           %%mm4,                  %%mm5\n" // lum1
-        "paddw          %%mm1,                  %%mm4\n" // lum1 +red  00 R6 00 R4 00 R2 00 R0
-        "paddw          %%mm2,                  %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0
-        "psraw          $6,                     %%mm4\n" // R1 0 .. 64
-        "movq           %%mm7,                  %%mm3\n" // lum2                       00 L7 00 L5 00 L3 00 L1
-        "psraw          $6,                     %%mm5\n" // G1  - .. +
-        "paddw          %%mm0,                  %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
-        "psraw          $6,                     %%mm6\n" // B1         0 .. 64
-        "packuswb       %%mm4,                  %%mm4\n" // R1 R1
-        "packuswb       %%mm5,                  %%mm5\n" // G1 G1
-        "packuswb       %%mm6,                  %%mm6\n" // B1 B1
-        "punpcklbw      %%mm4,                  %%mm4\n"
-        "punpcklbw      %%mm5,                  %%mm5\n"
-
-        "pand           %16,                    %%mm4\n"
-        "psllw          $3,                     %%mm5\n" // GREEN       1
-        "punpcklbw      %%mm6,                  %%mm6\n"
-        "pand           %17,                    %%mm5\n"
-        "pand           %16,                    %%mm6\n"
-        "por            %%mm5,                  %%mm4\n" //
-        "psrlw          $11,                    %%mm6\n" // BLUE        1
-        "movq           %%mm3,                  %%mm5\n" // lum2
-        "paddw          %%mm1,                  %%mm3\n" // lum2 +red      00 R7 00 R5 00 R3 00 R1
-        "paddw          %%mm2,                  %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
-        "psraw          $6,                     %%mm3\n" // R2
-        "por            %%mm6,                  %%mm4\n" // MM4
-        "psraw          $6,                     %%mm5\n" // G2
-        "movq           (%2, %4),               %%mm6\n" // L3 load lum2
-        "psraw          $6,                     %%mm7\n"
-        "packuswb       %%mm3,                  %%mm3\n"
-        "packuswb       %%mm5,                  %%mm5\n"
-        "packuswb       %%mm7,                  %%mm7\n"
-        "pand           %12,                    %%mm6\n" // L3
-        "punpcklbw      %%mm3,                  %%mm3\n"
-        "punpcklbw      %%mm5,                  %%mm5\n"
-        "pmullw         %15,                    %%mm6\n" // lum3
-        "punpcklbw      %%mm7,                  %%mm7\n"
-        "psllw          $3,                     %%mm5\n" // GREEN 2
-        "pand           %16,                    %%mm7\n"
-        "pand           %16,                    %%mm3\n"
-        "psrlw          $11,                    %%mm7\n" // BLUE  2
-        "pand           %17,                    %%mm5\n"
-        "por            %%mm7,                  %%mm3\n"
-        "movq           (%2,%4),                %%mm7\n" // L4 load lum2
-        "por            %%mm5,                  %%mm3\n" //
-        "psrlw          $8,                     %%mm7\n" // L4
-        "movq           %%mm4,                  %%mm5\n"
-        "punpcklwd      %%mm3,                  %%mm4\n"
-        "pmullw         %15,                    %%mm7\n" // lum4
-        "punpckhwd      %%mm3,                  %%mm5\n"
-
-        "movq           %%mm4,                  (%3)\n"  // write row1
-        "movq           %%mm5,                  8(%3)\n" // write row1
-
-        "movq           %%mm6,                  %%mm4\n" // Lum3
-        "paddw          %%mm0,                  %%mm6\n" // Lum3 +blue
-
-        "movq           %%mm4,                  %%mm5\n" // Lum3
-        "paddw          %%mm1,                  %%mm4\n" // Lum3 +red
-        "paddw          %%mm2,                  %%mm5\n" // Lum3 +green
-        "psraw          $6,                     %%mm4\n"
-        "movq           %%mm7,                  %%mm3\n" // Lum4
-        "psraw          $6,                     %%mm5\n"
-        "paddw          %%mm0,                  %%mm7\n" // Lum4 +blue
-        "psraw          $6,                     %%mm6\n" // Lum3 +blue
-        "movq           %%mm3,                  %%mm0\n" // Lum4
-        "packuswb       %%mm4,                  %%mm4\n"
-        "paddw          %%mm1,                  %%mm3\n" // Lum4 +red
-        "packuswb       %%mm5,                  %%mm5\n"
-        "paddw          %%mm2,                  %%mm0\n" // Lum4 +green
-        "packuswb       %%mm6,                  %%mm6\n"
-        "punpcklbw      %%mm4,                  %%mm4\n"
-        "punpcklbw      %%mm5,                  %%mm5\n"
-        "punpcklbw      %%mm6,                  %%mm6\n"
-        "psllw          $3,                     %%mm5\n" // GREEN 3
-        "pand           %16,                    %%mm4\n"
-        "psraw          $6,                     %%mm3\n" // psr 6
-        "psraw          $6,                     %%mm0\n"
-        "pand           %16,                    %%mm6\n" // BLUE
-        "pand           %17,                    %%mm5\n"
-        "psrlw          $11,                    %%mm6\n" // BLUE  3
-        "por            %%mm5,                  %%mm4\n"
-        "psraw          $6,                     %%mm7\n"
-        "por            %%mm6,                  %%mm4\n"
-        "packuswb       %%mm3,                  %%mm3\n"
-        "packuswb       %%mm0,                  %%mm0\n"
-        "packuswb       %%mm7,                  %%mm7\n"
-        "punpcklbw      %%mm3,                  %%mm3\n"
-        "punpcklbw      %%mm0,                  %%mm0\n"
-        "punpcklbw      %%mm7,                  %%mm7\n"
-        "pand           %16,                    %%mm3\n"
-        "pand           %16,                    %%mm7\n" // BLUE
-        "psllw          $3,                     %%mm0\n" // GREEN 4
-        "psrlw          $11,                    %%mm7\n"
-        "pand           %17,                    %%mm0\n"
-        "por            %%mm7,                  %%mm3\n"
-        "por            %%mm0,                  %%mm3\n"
-
-        "movq           %%mm4,                  %%mm5\n"
-
-        "punpcklwd      %%mm3,                  %%mm4\n"
-        "punpckhwd      %%mm3,                  %%mm5\n"
-
-        "movq           %%mm4,                  (%5)\n"
-        "movq           %%mm5,                  8(%5)\n"
-
-        "addl           $8,                     %6\n"
-        "addl           $8,                     %2\n"
-        "addl           $4,                     (%%esp)\n"
-        "addl           $4,                     %1\n"
-        "cmpl           %4,                     %6\n"
-        "leal           16(%3),                 %3\n"
-        "leal           16(%5),%5\n" // row2+16
-
-        "jl             1b\n"
-        "addl           %4,     %2\n" // lum += cols
-        "addl           %8,     %3\n" // row1+= mod
-        "addl           %8,     %5\n" // row2+= mod
-        "movl           $0,     %6\n" // x=0
-        "cmpl           %7,     %2\n"
-        "jl             1b\n"
-        "addl $4, %%esp\n"  // get rid of the stack slot we reserved.
-        "emms\n"
-        :
-        : "m" (cr), "r"(cb),"r"(lum),
-          "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
-          "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
-          "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),
-          "m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565)
-    );
-}
-
-/* *INDENT-ON* */
-
-#endif /* GCC3 i386 inline assembly */
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_sw.c	Wed Feb 02 22:55:12 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1322 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-/* This is the software implementation of the YUV texture support */
-
-/* This code was derived from code carrying the following copyright notices:
-
- * Copyright (c) 1995 The Regents of the University of California.
- * All rights reserved.
- * 
- * Permission to use, copy, modify, and distribute this software and its
- * documentation for any purpose, without fee, and without written agreement is
- * hereby granted, provided that the above copyright notice and the following
- * two paragraphs appear in all copies of this software.
- * 
- * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
- * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
- * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
- * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
- * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
-
- * Copyright (c) 1995 Erik Corry
- * All rights reserved.
- * 
- * Permission to use, copy, modify, and distribute this software and its
- * documentation for any purpose, without fee, and without written agreement is
- * hereby granted, provided that the above copyright notice and the following
- * two paragraphs appear in all copies of this software.
- * 
- * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
- * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
- * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
- * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
- * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
-
- * Portions of this software Copyright (c) 1995 Brown University.
- * All rights reserved.
- * 
- * Permission to use, copy, modify, and distribute this software and its
- * documentation for any purpose, without fee, and without written agreement
- * is hereby granted, provided that the above copyright notice and the
- * following two paragraphs appear in all copies of this software.
- * 
- * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
- * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
- * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * 
- * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
- * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
- * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
- */
-
-#include "SDL_video.h"
-#include "SDL_cpuinfo.h"
-#include "SDL_yuv_sw_c.h"
-
-
-/* The colorspace conversion functions */
-
-#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
-extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
-                                    unsigned char *lum, unsigned char *cr,
-                                    unsigned char *cb, unsigned char *out,
-                                    int rows, int cols, int mod);
-extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
-                                    unsigned char *lum, unsigned char *cr,
-                                    unsigned char *cb, unsigned char *out,
-                                    int rows, int cols, int mod);
-#endif
-
-static void
-Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned short *row1;
-    unsigned short *row2;
-    unsigned char *lum2;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    row1 = (unsigned short *) out;
-    row2 = row1 + cols + mod;
-    lum2 = lum + cols;
-
-    mod += cols + mod;
-
-    y = rows / 2;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            ++cr;
-            ++cb;
-
-            L = *lum++;
-            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
-                                        rgb_2_pix[L + crb_g] |
-                                        rgb_2_pix[L + cb_b]);
-
-            L = *lum++;
-            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
-                                        rgb_2_pix[L + crb_g] |
-                                        rgb_2_pix[L + cb_b]);
-
-
-            /* Now, do second row.  */
-
-            L = *lum2++;
-            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
-                                        rgb_2_pix[L + crb_g] |
-                                        rgb_2_pix[L + cb_b]);
-
-            L = *lum2++;
-            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
-                                        rgb_2_pix[L + crb_g] |
-                                        rgb_2_pix[L + cb_b]);
-        }
-
-        /*
-         * These values are at the start of the next line, (due
-         * to the ++'s above),but they need to be at the start
-         * of the line after that.
-         */
-        lum += cols;
-        lum2 += cols;
-        row1 += mod;
-        row2 += mod;
-    }
-}
-
-static void
-Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int value;
-    unsigned char *row1;
-    unsigned char *row2;
-    unsigned char *lum2;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    row1 = out;
-    row2 = row1 + cols * 3 + mod * 3;
-    lum2 = lum + cols;
-
-    mod += cols + mod;
-    mod *= 3;
-
-    y = rows / 2;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            ++cr;
-            ++cb;
-
-            L = *lum++;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            *row1++ = (value) & 0xFF;
-            *row1++ = (value >> 8) & 0xFF;
-            *row1++ = (value >> 16) & 0xFF;
-
-            L = *lum++;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            *row1++ = (value) & 0xFF;
-            *row1++ = (value >> 8) & 0xFF;
-            *row1++ = (value >> 16) & 0xFF;
-
-
-            /* Now, do second row.  */
-
-            L = *lum2++;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            *row2++ = (value) & 0xFF;
-            *row2++ = (value >> 8) & 0xFF;
-            *row2++ = (value >> 16) & 0xFF;
-
-            L = *lum2++;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            *row2++ = (value) & 0xFF;
-            *row2++ = (value >> 8) & 0xFF;
-            *row2++ = (value >> 16) & 0xFF;
-        }
-
-        /*
-         * These values are at the start of the next line, (due
-         * to the ++'s above),but they need to be at the start
-         * of the line after that.
-         */
-        lum += cols;
-        lum2 += cols;
-        row1 += mod;
-        row2 += mod;
-    }
-}
-
-static void
-Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int *row1;
-    unsigned int *row2;
-    unsigned char *lum2;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    row1 = (unsigned int *) out;
-    row2 = row1 + cols + mod;
-    lum2 = lum + cols;
-
-    mod += cols + mod;
-
-    y = rows / 2;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            ++cr;
-            ++cb;
-
-            L = *lum++;
-            *row1++ = (rgb_2_pix[L + cr_r] |
-                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
-            L = *lum++;
-            *row1++ = (rgb_2_pix[L + cr_r] |
-                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
-
-            /* Now, do second row.  */
-
-            L = *lum2++;
-            *row2++ = (rgb_2_pix[L + cr_r] |
-                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
-            L = *lum2++;
-            *row2++ = (rgb_2_pix[L + cr_r] |
-                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-        }
-
-        /*
-         * These values are at the start of the next line, (due
-         * to the ++'s above),but they need to be at the start
-         * of the line after that.
-         */
-        lum += cols;
-        lum2 += cols;
-        row1 += mod;
-        row2 += mod;
-    }
-}
-
-/*
- * In this function I make use of a nasty trick. The tables have the lower
- * 16 bits replicated in the upper 16. This means I can write ints and get
- * the horisontal doubling for free (almost).
- */
-static void
-Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int *row1 = (unsigned int *) out;
-    const int next_row = cols + (mod / 2);
-    unsigned int *row2 = row1 + 2 * next_row;
-    unsigned char *lum2;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    lum2 = lum + cols;
-
-    mod = (next_row * 3) + (mod / 2);
-
-    y = rows / 2;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            ++cr;
-            ++cb;
-
-            L = *lum++;
-            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
-                                        rgb_2_pix[L + crb_g] |
-                                        rgb_2_pix[L + cb_b]);
-            row1++;
-
-            L = *lum++;
-            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
-                                        rgb_2_pix[L + crb_g] |
-                                        rgb_2_pix[L + cb_b]);
-            row1++;
-
-
-            /* Now, do second row. */
-
-            L = *lum2++;
-            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
-                                        rgb_2_pix[L + crb_g] |
-                                        rgb_2_pix[L + cb_b]);
-            row2++;
-
-            L = *lum2++;
-            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
-                                        rgb_2_pix[L + crb_g] |
-                                        rgb_2_pix[L + cb_b]);
-            row2++;
-        }
-
-        /*
-         * These values are at the start of the next line, (due
-         * to the ++'s above),but they need to be at the start
-         * of the line after that.
-         */
-        lum += cols;
-        lum2 += cols;
-        row1 += mod;
-        row2 += mod;
-    }
-}
-
-static void
-Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int value;
-    unsigned char *row1 = out;
-    const int next_row = (cols * 2 + mod) * 3;
-    unsigned char *row2 = row1 + 2 * next_row;
-    unsigned char *lum2;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    lum2 = lum + cols;
-
-    mod = next_row * 3 + mod * 3;
-
-    y = rows / 2;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            ++cr;
-            ++cb;
-
-            L = *lum++;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
-                row1[next_row + 3 + 0] = (value) & 0xFF;
-            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
-                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
-            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
-                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
-            row1 += 2 * 3;
-
-            L = *lum++;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
-                row1[next_row + 3 + 0] = (value) & 0xFF;
-            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
-                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
-            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
-                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
-            row1 += 2 * 3;
-
-
-            /* Now, do second row. */
-
-            L = *lum2++;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
-                row2[next_row + 3 + 0] = (value) & 0xFF;
-            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
-                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
-            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
-                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
-            row2 += 2 * 3;
-
-            L = *lum2++;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
-                row2[next_row + 3 + 0] = (value) & 0xFF;
-            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
-                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
-            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
-                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
-            row2 += 2 * 3;
-        }
-
-        /*
-         * These values are at the start of the next line, (due
-         * to the ++'s above),but they need to be at the start
-         * of the line after that.
-         */
-        lum += cols;
-        lum2 += cols;
-        row1 += mod;
-        row2 += mod;
-    }
-}
-
-static void
-Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int *row1 = (unsigned int *) out;
-    const int next_row = cols * 2 + mod;
-    unsigned int *row2 = row1 + 2 * next_row;
-    unsigned char *lum2;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    lum2 = lum + cols;
-
-    mod = (next_row * 3) + mod;
-
-    y = rows / 2;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            ++cr;
-            ++cb;
-
-            L = *lum++;
-            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
-                (rgb_2_pix[L + cr_r] |
-                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row1 += 2;
-
-            L = *lum++;
-            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
-                (rgb_2_pix[L + cr_r] |
-                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row1 += 2;
-
-
-            /* Now, do second row. */
-
-            L = *lum2++;
-            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
-                (rgb_2_pix[L + cr_r] |
-                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row2 += 2;
-
-            L = *lum2++;
-            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
-                (rgb_2_pix[L + cr_r] |
-                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row2 += 2;
-        }
-
-        /*
-         * These values are at the start of the next line, (due
-         * to the ++'s above),but they need to be at the start
-         * of the line after that.
-         */
-        lum += cols;
-        lum2 += cols;
-        row1 += mod;
-        row2 += mod;
-    }
-}
-
-static void
-Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned short *row;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    row = (unsigned short *) out;
-
-    y = rows;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            cr += 4;
-            cb += 4;
-
-            L = *lum;
-            lum += 2;
-            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
-                                       rgb_2_pix[L + crb_g] |
-                                       rgb_2_pix[L + cb_b]);
-
-            L = *lum;
-            lum += 2;
-            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
-                                       rgb_2_pix[L + crb_g] |
-                                       rgb_2_pix[L + cb_b]);
-
-        }
-
-        row += mod;
-    }
-}
-
-static void
-Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int value;
-    unsigned char *row;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    row = (unsigned char *) out;
-    mod *= 3;
-    y = rows;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            cr += 4;
-            cb += 4;
-
-            L = *lum;
-            lum += 2;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            *row++ = (value) & 0xFF;
-            *row++ = (value >> 8) & 0xFF;
-            *row++ = (value >> 16) & 0xFF;
-
-            L = *lum;
-            lum += 2;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            *row++ = (value) & 0xFF;
-            *row++ = (value >> 8) & 0xFF;
-            *row++ = (value >> 16) & 0xFF;
-
-        }
-        row += mod;
-    }
-}
-
-static void
-Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int *row;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    row = (unsigned int *) out;
-    y = rows;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            cr += 4;
-            cb += 4;
-
-            L = *lum;
-            lum += 2;
-            *row++ = (rgb_2_pix[L + cr_r] |
-                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
-            L = *lum;
-            lum += 2;
-            *row++ = (rgb_2_pix[L + cr_r] |
-                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-
-
-        }
-        row += mod;
-    }
-}
-
-/*
- * In this function I make use of a nasty trick. The tables have the lower
- * 16 bits replicated in the upper 16. This means I can write ints and get
- * the horisontal doubling for free (almost).
- */
-static void
-Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int *row = (unsigned int *) out;
-    const int next_row = cols + (mod / 2);
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-
-    y = rows;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            cr += 4;
-            cb += 4;
-
-            L = *lum;
-            lum += 2;
-            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
-                                      rgb_2_pix[L + crb_g] |
-                                      rgb_2_pix[L + cb_b]);
-            row++;
-
-            L = *lum;
-            lum += 2;
-            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
-                                      rgb_2_pix[L + crb_g] |
-                                      rgb_2_pix[L + cb_b]);
-            row++;
-
-        }
-        row += next_row;
-    }
-}
-
-static void
-Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int value;
-    unsigned char *row = out;
-    const int next_row = (cols * 2 + mod) * 3;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-    y = rows;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            cr += 4;
-            cb += 4;
-
-            L = *lum;
-            lum += 2;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
-                row[next_row + 3 + 0] = (value) & 0xFF;
-            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
-                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
-            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
-                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
-            row += 2 * 3;
-
-            L = *lum;
-            lum += 2;
-            value = (rgb_2_pix[L + cr_r] |
-                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
-                row[next_row + 3 + 0] = (value) & 0xFF;
-            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
-                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
-            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
-                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
-            row += 2 * 3;
-
-        }
-        row += next_row;
-    }
-}
-
-static void
-Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod)
-{
-    unsigned int *row = (unsigned int *) out;
-    const int next_row = cols * 2 + mod;
-    int x, y;
-    int cr_r;
-    int crb_g;
-    int cb_b;
-    int cols_2 = cols / 2;
-    mod += mod;
-    y = rows;
-    while (y--) {
-        x = cols_2;
-        while (x--) {
-            register int L;
-
-            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
-            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
-                + colortab[*cb + 2 * 256];
-            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
-            cr += 4;
-            cb += 4;
-
-            L = *lum;
-            lum += 2;
-            row[0] = row[1] = row[next_row] = row[next_row + 1] =
-                (rgb_2_pix[L + cr_r] |
-                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row += 2;
-
-            L = *lum;
-            lum += 2;
-            row[0] = row[1] = row[next_row] = row[next_row + 1] =
-                (rgb_2_pix[L + cr_r] |
-                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
-            row += 2;
-
-
-        }
-
-        row += next_row;
-    }
-}
-
-/*
- * How many 1 bits are there in the Uint32.
- * Low performance, do not call often.
- */
-static int
-number_of_bits_set(Uint32 a)
-{
-    if (!a)
-        return 0;
-    if (a & 1)
-        return 1 + number_of_bits_set(a >> 1);
-    return (number_of_bits_set(a >> 1));
-}
-
-/*
- * How many 0 bits are there at least significant end of Uint32.
- * Low performance, do not call often.
- */
-static int
-free_bits_at_bottom(Uint32 a)
-{
-    /* assume char is 8 bits */
-    if (!a)
-        return sizeof(Uint32) * 8;
-    if (((Sint32) a) & 1l)
-        return 0;
-    return 1 + free_bits_at_bottom(a >> 1);
-}
-
-static int
-SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
-{
-    Uint32 *r_2_pix_alloc;
-    Uint32 *g_2_pix_alloc;
-    Uint32 *b_2_pix_alloc;
-    int i;
-    int bpp;
-    Uint32 Rmask, Gmask, Bmask, Amask;
-
-    if (!SDL_PixelFormatEnumToMasks
-        (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
-        SDL_SetError("Unsupported YUV destination format");
-        return -1;
-    }
-
-    swdata->target_format = target_format;
-    r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
-    g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
-    b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
-
-    /* 
-     * Set up entries 0-255 in rgb-to-pixel value tables.
-     */
-    for (i = 0; i < 256; ++i) {
-        r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
-        r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
-        r_2_pix_alloc[i + 256] |= Amask;
-        g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
-        g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
-        g_2_pix_alloc[i + 256] |= Amask;
-        b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
-        b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
-        b_2_pix_alloc[i + 256] |= Amask;
-    }
-
-    /*
-     * If we have 16-bit output depth, then we double the value
-     * in the top word. This means that we can write out both
-     * pixels in the pixel doubling mode with one op. It is 
-     * harmless in the normal case as storing a 32-bit value
-     * through a short pointer will lose the top bits anyway.
-     */
-    if (SDL_BYTESPERPIXEL(target_format) == 2) {
-        for (i = 0; i < 256; ++i) {
-            r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
-            g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
-            b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
-        }
-    }
-
-    /*
-     * Spread out the values we have to the rest of the array so that
-     * we do not need to check for overflow.
-     */
-    for (i = 0; i < 256; ++i) {
-        r_2_pix_alloc[i] = r_2_pix_alloc[256];
-        r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
-        g_2_pix_alloc[i] = g_2_pix_alloc[256];
-        g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
-        b_2_pix_alloc[i] = b_2_pix_alloc[256];
-        b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
-    }
-
-    /* You have chosen wisely... */
-    switch (swdata->format) {
-    case SDL_PIXELFORMAT_YV12:
-    case SDL_PIXELFORMAT_IYUV:
-        if (SDL_BYTESPERPIXEL(target_format) == 2) {
-#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
-            /* inline assembly functions */
-            if (SDL_HasMMX() && (Rmask == 0xF800) &&
-                (Gmask == 0x07E0) && (Bmask == 0x001F)
-                && (swdata->w & 15) == 0) {
-/*printf("Using MMX 16-bit 565 dither\n");*/
-                swdata->Display1X = Color565DitherYV12MMX1X;
-            } else {
-/*printf("Using C 16-bit dither\n");*/
-                swdata->Display1X = Color16DitherYV12Mod1X;
-            }
-#else
-            swdata->Display1X = Color16DitherYV12Mod1X;
-#endif
-            swdata->Display2X = Color16DitherYV12Mod2X;
-        }
-        if (SDL_BYTESPERPIXEL(target_format) == 3) {
-            swdata->Display1X = Color24DitherYV12Mod1X;
-            swdata->Display2X = Color24DitherYV12Mod2X;
-        }
-        if (SDL_BYTESPERPIXEL(target_format) == 4) {
-#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
-            /* inline assembly functions */
-            if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
-                (Gmask == 0x0000FF00) &&
-                (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
-/*printf("Using MMX 32-bit dither\n");*/
-                swdata->Display1X = ColorRGBDitherYV12MMX1X;
-            } else {
-/*printf("Using C 32-bit dither\n");*/
-                swdata->Display1X = Color32DitherYV12Mod1X;
-            }
-#else
-            swdata->Display1X = Color32DitherYV12Mod1X;
-#endif
-            swdata->Display2X = Color32DitherYV12Mod2X;
-        }
-        break;
-    case SDL_PIXELFORMAT_YUY2:
-    case SDL_PIXELFORMAT_UYVY:
-    case SDL_PIXELFORMAT_YVYU:
-        if (SDL_BYTESPERPIXEL(target_format) == 2) {
-            swdata->Display1X = Color16DitherYUY2Mod1X;
-            swdata->Display2X = Color16DitherYUY2Mod2X;
-        }
-        if (SDL_BYTESPERPIXEL(target_format) == 3) {
-            swdata->Display1X = Color24DitherYUY2Mod1X;
-            swdata->Display2X = Color24DitherYUY2Mod2X;
-        }
-        if (SDL_BYTESPERPIXEL(target_format) == 4) {
-            swdata->Display1X = Color32DitherYUY2Mod1X;
-            swdata->Display2X = Color32DitherYUY2Mod2X;
-        }
-        break;
-    default:
-        /* We should never get here (caught above) */
-        break;
-    }
-
-    if (swdata->display) {
-        SDL_FreeSurface(swdata->display);
-        swdata->display = NULL;
-    }
-    return 0;
-}
-
-SDL_SW_YUVTexture *
-SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
-{
-    SDL_SW_YUVTexture *swdata;
-    int *Cr_r_tab;
-    int *Cr_g_tab;
-    int *Cb_g_tab;
-    int *Cb_b_tab;
-    int i;
-    int CR, CB;
-
-    swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
-    if (!swdata) {
-        SDL_OutOfMemory();
-        return NULL;
-    }
-
-    switch (format) {
-    case SDL_PIXELFORMAT_YV12:
-    case SDL_PIXELFORMAT_IYUV:
-    case SDL_PIXELFORMAT_YUY2:
-    case SDL_PIXELFORMAT_UYVY:
-    case SDL_PIXELFORMAT_YVYU:
-        break;
-    default:
-        SDL_SetError("Unsupported YUV format");
-        return NULL;
-    }
-
-    swdata->format = format;
-    swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
-    swdata->w = w;
-    swdata->h = h;
-    swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
-    swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
-    swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
-    if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
-        SDL_OutOfMemory();
-        SDL_SW_DestroyYUVTexture(swdata);
-        return NULL;
-    }
-
-    /* Generate the tables for the display surface */
-    Cr_r_tab = &swdata->colortab[0 * 256];
-    Cr_g_tab = &swdata->colortab[1 * 256];
-    Cb_g_tab = &swdata->colortab[2 * 256];
-    Cb_b_tab = &swdata->colortab[3 * 256];
-    for (i = 0; i < 256; i++) {
-        /* Gamma correction (luminescence table) and chroma correction
-           would be done here.  See the Berkeley mpeg_play sources.
-         */
-        CB = CR = (i - 128);
-        Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
-        Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
-        Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
-        Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
-    }
-
-    /* Find the pitch and offset values for the overlay */
-    switch (format) {
-    case SDL_PIXELFORMAT_YV12:
-    case SDL_PIXELFORMAT_IYUV:
-        swdata->pitches[0] = w;
-        swdata->pitches[1] = swdata->pitches[0] / 2;
-        swdata->pitches[2] = swdata->pitches[0] / 2;
-        swdata->planes[0] = swdata->pixels;
-        swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
-        swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
-        break;
-    case SDL_PIXELFORMAT_YUY2:
-    case SDL_PIXELFORMAT_UYVY:
-    case SDL_PIXELFORMAT_YVYU:
-        swdata->pitches[0] = w * 2;
-        swdata->planes[0] = swdata->pixels;
-        break;
-    default:
-        /* We should never get here (caught above) */
-        break;
-    }
-
-    /* We're all done.. */
-    return (swdata);
-}
-
-int
-SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
-                             int *pitch)
-{
-    *pixels = swdata->planes[0];
-    *pitch = swdata->pitches[0];
-    return 0;
-}
-
-int
-SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
-                        const void *pixels, int pitch)
-{
-    switch (swdata->format) {
-    case SDL_PIXELFORMAT_YV12:
-    case SDL_PIXELFORMAT_IYUV:
-        if (rect
-            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
-                || rect->h != swdata->h)) {
-            SDL_SetError
-                ("YV12 and IYUV textures only support full surface updates");
-            return -1;
-        }
-        SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2);
-        break;
-    case SDL_PIXELFORMAT_YUY2:
-    case SDL_PIXELFORMAT_UYVY:
-    case SDL_PIXELFORMAT_YVYU:
-        {
-            Uint8 *src, *dst;
-            int row;
-            size_t length;
-
-            src = (Uint8 *) pixels;
-            dst =
-                swdata->planes[0] + rect->y * swdata->pitches[0] +
-                rect->x * 2;
-            length = rect->w * 2;
-            for (row = 0; row < rect->h; ++row) {
-                SDL_memcpy(dst, src, length);
-                src += pitch;
-                dst += swdata->pitches[0];
-            }
-        }
-        break;
-    }
-    return 0;
-}
-
-int
-SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
-                      int markDirty, void **pixels, int *pitch)
-{
-    switch (swdata->format) {
-    case SDL_PIXELFORMAT_YV12:
-    case SDL_PIXELFORMAT_IYUV:
-        if (rect
-            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
-                || rect->h != swdata->h)) {
-            SDL_SetError
-                ("YV12 and IYUV textures only support full surface locks");
-            return -1;
-        }
-        break;
-    }
-
-    *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
-    *pitch = swdata->pitches[0];
-    return 0;
-}
-
-void
-SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
-{
-}
-
-int
-SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
-                    Uint32 target_format, int w, int h, void *pixels,
-                    int pitch)
-{
-    int stretch;
-    int scale_2x;
-    Uint8 *lum, *Cr, *Cb;
-    int mod;
-
-    /* Make sure we're set up to display in the desired format */
-    if (target_format != swdata->target_format) {
-        if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
-            return -1;
-        }
-    }
-
-    stretch = 0;
-    scale_2x = 0;
-    if (srcrect->x || srcrect->y || srcrect->w < swdata->w
-        || srcrect->h < swdata->h) {
-        /* The source rectangle has been clipped.
-           Using a scratch surface is easier than adding clipped
-           source support to all the blitters, plus that would
-           slow them down in the general unclipped case.
-         */
-        stretch = 1;
-    } else if ((srcrect->w != w) || (srcrect->h != h)) {
-        if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
-            scale_2x = 1;
-        } else {
-            stretch = 1;
-        }
-    }
-    if (stretch) {
-        int bpp;
-        Uint32 Rmask, Gmask, Bmask, Amask;
-
-        if (swdata->display) {
-            swdata->display->w = w;
-            swdata->display->h = h;
-            swdata->display->pixels = pixels;
-            swdata->display->pitch = pitch;
-        } else {
-            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
-            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
-                                       &Bmask, &Amask);
-            swdata->display =
-                SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
-                                         Gmask, Bmask, Amask);
-            if (!swdata->display) {
-                return (-1);
-            }
-        }
-        if (!swdata->stretch) {
-            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
-            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
-                                       &Bmask, &Amask);
-            swdata->stretch =
-                SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
-                                     Gmask, Bmask, Amask);
-            if (!swdata->stretch) {
-                return (-1);
-            }
-        }
-        pixels = swdata->stretch->pixels;
-        pitch = swdata->stretch->pitch;
-    }
-    switch (swdata->format) {
-    case SDL_PIXELFORMAT_YV12:
-        lum = swdata->planes[0];
-        Cr = swdata->planes[1];
-        Cb = swdata->planes[2];
-        break;
-    case SDL_PIXELFORMAT_IYUV:
-        lum = swdata->planes[0];
-        Cr = swdata->planes[2];
-        Cb = swdata->planes[1];
-        break;
-    case SDL_PIXELFORMAT_YUY2:
-        lum = swdata->planes[0];
-        Cr = lum + 3;
-        Cb = lum + 1;
-        break;
-    case SDL_PIXELFORMAT_UYVY:
-        lum = swdata->planes[0] + 1;
-        Cr = lum + 1;
-        Cb = lum - 1;
-        break;
-    case SDL_PIXELFORMAT_YVYU:
-        lum = swdata->planes[0];
-        Cr = lum + 1;
-        Cb = lum + 3;
-        break;
-    default:
-        SDL_SetError("Unsupported YUV format in copy");
-        return (-1);
-    }
-    mod = (pitch / SDL_BYTESPERPIXEL(target_format));
-
-    if (scale_2x) {
-        mod -= (swdata->w * 2);
-        swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
-                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
-    } else {
-        mod -= swdata->w;
-        swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
-                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
-    }
-    if (stretch) {
-        SDL_Rect rect = *srcrect;
-        SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
-    }
-    return 0;
-}
-
-void
-SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
-{
-    if (swdata) {
-        if (swdata->pixels) {
-            SDL_free(swdata->pixels);
-        }
-        if (swdata->colortab) {
-            SDL_free(swdata->colortab);
-        }
-        if (swdata->rgb_2_pix) {
-            SDL_free(swdata->rgb_2_pix);
-        }
-        if (swdata->stretch) {
-            SDL_FreeSurface(swdata->stretch);
-        }
-        if (swdata->display) {
-            SDL_FreeSurface(swdata->display);
-        }
-        SDL_free(swdata);
-    }
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_sw_c.h	Wed Feb 02 22:55:12 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "SDL_video.h"
-#include "SDL_sysvideo.h"
-
-/* This is the software implementation of the YUV texture support */
-
-struct SDL_SW_YUVTexture
-{
-    Uint32 format;
-    Uint32 target_format;
-    int w, h;
-    Uint8 *pixels;
-    int *colortab;
-    Uint32 *rgb_2_pix;
-    void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod);
-    void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
-                       unsigned char *lum, unsigned char *cr,
-                       unsigned char *cb, unsigned char *out,
-                       int rows, int cols, int mod);
-
-    /* These are just so we don't have to allocate them separately */
-    Uint16 pitches[3];
-    Uint8 *planes[3];
-
-    /* This is a temporary surface in case we have to stretch copy */
-    SDL_Surface *stretch;
-    SDL_Surface *display;
-};
-
-typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture;
-
-SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h);
-int SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
-                                 int *pitch);
-int SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
-                            const void *pixels, int pitch);
-int SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
-                          int markDirty, void **pixels, int *pitch);
-void SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata);
-int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
-                        Uint32 target_format, int w, int h, void *pixels,
-                        int pitch);
-void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata);
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/mmx.h	Wed Feb 02 22:55:12 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,642 +0,0 @@
-/*	mmx.h
-
-	MultiMedia eXtensions GCC interface library for IA32.
-
-	To use this library, simply include this header file
-	and compile with GCC.  You MUST have inlining enabled
-	in order for mmx_ok() to work; this can be done by
-	simply using -O on the GCC command line.
-
-	Compiling with -DMMX_TRACE will cause detailed trace
-	output to be sent to stderr for each mmx operation.
-	This adds lots of code, and obviously slows execution to
-	a crawl, but can be very useful for debugging.
-
-	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
-	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
-	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
-	AND FITNESS FOR ANY PARTICULAR PURPOSE.
-
-	1997-99 by H. Dietz and R. Fisher
-
- Notes:
-	It appears that the latest gas has the pand problem fixed, therefore
-	  I'll undefine BROKEN_PAND by default.
-*/
-
-#ifndef _MMX_H
-#define _MMX_H
-
-
-/*	Warning:  at this writing, the version of GAS packaged
-	with most Linux distributions does not handle the
-	parallel AND operation mnemonic correctly.  If the
-	symbol BROKEN_PAND is defined, a slower alternative
-	coding will be used.  If execution of mmxtest results
-	in an illegal instruction fault, define this symbol.
-*/
-#undef	BROKEN_PAND
-
-
-/*	The type of an value that fits in an MMX register
-	(note that long long constant values MUST be suffixed
-	 by LL and unsigned long long values by ULL, lest
-	 they be truncated by the compiler)
-*/
-typedef union
-{
-    long long q;                /* Quadword (64-bit) value */
-    unsigned long long uq;      /* Unsigned Quadword */
-    int d[2];                   /* 2 Doubleword (32-bit) values */
-    unsigned int ud[2];         /* 2 Unsigned Doubleword */
-    short w[4];                 /* 4 Word (16-bit) values */
-    unsigned short uw[4];       /* 4 Unsigned Word */
-    char b[8];                  /* 8 Byte (8-bit) values */
-    unsigned char ub[8];        /* 8 Unsigned Byte */
-    float s[2];                 /* Single-precision (32-bit) value */
-} __attribute__ ((aligned(8))) mmx_t;   /* On an 8-byte (64-bit) boundary */
-
-
-#if 0
-/*	Function to test if multimedia instructions are supported...
-*/
-inline extern int
-mm_support(void)
-{
-    /* Returns 1 if MMX instructions are supported,
-       3 if Cyrix MMX and Extended MMX instructions are supported
-       5 if AMD MMX and 3DNow! instructions are supported
-       0 if hardware does not support any of these
-     */
-    register int rval = 0;
-
-    __asm__ __volatile__(
-                            /* See if CPUID instruction is supported ... */
-                            /* ... Get copies of EFLAGS into eax and ecx */
-                            "pushf\n\t"
-                            "popl %%eax\n\t" "movl %%eax, %%ecx\n\t"
-                            /* ... Toggle the ID bit in one copy and store */
-                            /*     to the EFLAGS reg */
-                            "xorl $0x200000, %%eax\n\t"
-                            "push %%eax\n\t" "popf\n\t"
-                            /* ... Get the (hopefully modified) EFLAGS */
-                            "pushf\n\t" "popl %%eax\n\t"
-                            /* ... Compare and test result */
-                            "xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */
-                            /* Get standard CPUID information, and
-                               go to a specific vendor section */
-                            "movl $0, %%eax\n\t" "cpuid\n\t"
-                            /* Check for Intel */
-                            "cmpl $0x756e6547, %%ebx\n\t"
-                            "jne TryAMD\n\t"
-                            "cmpl $0x49656e69, %%edx\n\t"
-                            "jne TryAMD\n\t"
-                            "cmpl $0x6c65746e, %%ecx\n"
-                            "jne TryAMD\n\t" "jmp Intel\n\t"
-                            /* Check for AMD */
-                            "\nTryAMD:\n\t"
-                            "cmpl $0x68747541, %%ebx\n\t"
-                            "jne TryCyrix\n\t"
-                            "cmpl $0x69746e65, %%edx\n\t"
-                            "jne TryCyrix\n\t"
-                            "cmpl $0x444d4163, %%ecx\n"
-                            "jne TryCyrix\n\t" "jmp AMD\n\t"
-                            /* Check for Cyrix */
-                            "\nTryCyrix:\n\t"
-                            "cmpl $0x69727943, %%ebx\n\t"
-                            "jne NotSupported2\n\t"
-                            "cmpl $0x736e4978, %%edx\n\t"
-                            "jne NotSupported3\n\t"
-                            "cmpl $0x64616574, %%ecx\n\t"
-                            "jne NotSupported4\n\t"
-                            /* Drop through to Cyrix... */
-                            /* Cyrix Section */
-                            /* See if extended CPUID level 80000001 is supported */
-                            /* The value of CPUID/80000001 for the 6x86MX is undefined
-                               according to the Cyrix CPU Detection Guide (Preliminary
-                               Rev. 1.01 table 1), so we'll check the value of eax for
-                               CPUID/0 to see if standard CPUID level 2 is supported.
-                               According to the table, the only CPU which supports level
-                               2 is also the only one which supports extended CPUID levels.
-                             */
-                            "cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t"    /* Use standard CPUID instead */
-                            /* Extended CPUID supported (in theory), so get extended
-                               features */
-                            "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t"    /* Test for MMX */
-                            "jz NotSupported5\n\t"      /* MMX not supported */
-                            "testl $0x01000000, %%eax\n\t"      /* Test for Ext'd MMX */
-                            "jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t"        /* MMX Supported */
-                            "jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t"  /* EMMX and MMX Supported */
-                            "jmp Return\n\t"
-                            /* AMD Section */
-                            "AMD:\n\t"
-                            /* See if extended CPUID is supported */
-                            "movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t"    /* Use standard CPUID instead */
-                            /* Extended CPUID supported, so get extended features */
-                            "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t"    /* Test for MMX */
-                            "jz NotSupported6\n\t"      /* MMX not supported */
-                            "testl $0x80000000, %%edx\n\t"      /* Test for 3DNow! */
-                            "jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t"   /* MMX Supported */
-                            "jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t"     /* 3DNow! and MMX Supported */
-                            "jmp Return\n\t"
-                            /* Intel Section */
-                            "Intel:\n\t"
-                            /* Check for MMX */
-                            "MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t"      /* Test for MMX */
-                            "jz NotSupported7\n\t"      /* MMX Not supported */
-                            "movl $1, %0:\n\n\t"        /* MMX Supported */
-                            "jmp Return\n\t"
-                            /* Nothing supported */
-                            "\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval):     /* no input */
-                            :"eax", "ebx", "ecx", "edx");
-
-    /* Return */
-    return (rval);
-}
-
-/*	Function to test if mmx instructions are supported...
-*/
-inline extern int
-mmx_ok(void)
-{
-    /* Returns 1 if MMX instructions are supported, 0 otherwise */
-    return (mm_support() & 0x1);
-}
-#endif
-
-/*	Helper functions for the instruction macros that follow...
-	(note that memory-to-register, m2r, instructions are nearly
-	 as efficient as register-to-register, r2r, instructions;
-	 however, memory-to-memory instructions are really simulated
-	 as a convenience, and are only 1/3 as efficient)
-*/
-#ifdef	MMX_TRACE
-
-/*	Include the stuff for printing a trace to stderr...
-*/
-
-#define	mmx_i2r(op, imm, reg) \
-	{ \
-		mmx_t mmx_trace; \
-		mmx_trace.uq = (imm); \
-		printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		__asm__ __volatile__ ("movq %%" #reg ", %0" \
-				      : "=X" (mmx_trace) \
-				      : /* nothing */ ); \
-		printf(#reg "=0x%08x%08x) => ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		__asm__ __volatile__ (#op " %0, %%" #reg \
-				      : /* nothing */ \
-				      : "X" (imm)); \
-		__asm__ __volatile__ ("movq %%" #reg ", %0" \
-				      : "=X" (mmx_trace) \
-				      : /* nothing */ ); \
-		printf(#reg "=0x%08x%08x\n", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-	}
-
-#define	mmx_m2r(op, mem, reg) \
-	{ \
-		mmx_t mmx_trace; \
-		mmx_trace = (mem); \
-		printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		__asm__ __volatile__ ("movq %%" #reg ", %0" \
-				      : "=X" (mmx_trace) \
-				      : /* nothing */ ); \
-		printf(#reg "=0x%08x%08x) => ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		__asm__ __volatile__ (#op " %0, %%" #reg \
-				      : /* nothing */ \
-				      : "X" (mem)); \
-		__asm__ __volatile__ ("movq %%" #reg ", %0" \
-				      : "=X" (mmx_trace) \
-				      : /* nothing */ ); \
-		printf(#reg "=0x%08x%08x\n", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-	}
-
-#define	mmx_r2m(op, reg, mem) \
-	{ \
-		mmx_t mmx_trace; \
-		__asm__ __volatile__ ("movq %%" #reg ", %0" \
-				      : "=X" (mmx_trace) \
-				      : /* nothing */ ); \
-		printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		mmx_trace = (mem); \
-		printf(#mem "=0x%08x%08x) => ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		__asm__ __volatile__ (#op " %%" #reg ", %0" \
-				      : "=X" (mem) \
-				      : /* nothing */ ); \
-		mmx_trace = (mem); \
-		printf(#mem "=0x%08x%08x\n", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-	}
-
-#define	mmx_r2r(op, regs, regd) \
-	{ \
-		mmx_t mmx_trace; \
-		__asm__ __volatile__ ("movq %%" #regs ", %0" \
-				      : "=X" (mmx_trace) \
-				      : /* nothing */ ); \
-		printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		__asm__ __volatile__ ("movq %%" #regd ", %0" \
-				      : "=X" (mmx_trace) \
-				      : /* nothing */ ); \
-		printf(#regd "=0x%08x%08x) => ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
-		__asm__ __volatile__ ("movq %%" #regd ", %0" \
-				      : "=X" (mmx_trace) \
-				      : /* nothing */ ); \
-		printf(#regd "=0x%08x%08x\n", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-	}
-
-#define	mmx_m2m(op, mems, memd) \
-	{ \
-		mmx_t mmx_trace; \
-		mmx_trace = (mems); \
-		printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		mmx_trace = (memd); \
-		printf(#memd "=0x%08x%08x) => ", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
-				      #op " %1, %%mm0\n\t" \
-				      "movq %%mm0, %0" \
-				      : "=X" (memd) \
-				      : "X" (mems)); \
-		mmx_trace = (memd); \
-		printf(#memd "=0x%08x%08x\n", \
-			mmx_trace.d[1], mmx_trace.d[0]); \
-	}
-
-#else
-
-/*	These macros are a lot simpler without the tracing...
-*/
-
-#define	mmx_i2r(op, imm, reg) \
-	__asm__ __volatile__ (#op " %0, %%" #reg \
-			      : /* nothing */ \
-			      : "X" (imm) )
-
-#define	mmx_m2r(op, mem, reg) \
-	__asm__ __volatile__ (#op " %0, %%" #reg \
-			      : /* nothing */ \
-			      : "m" (mem))
-
-#define	mmx_r2m(op, reg, mem) \
-	__asm__ __volatile__ (#op " %%" #reg ", %0" \
-			      : "=m" (mem) \
-			      : /* nothing */ )
-
-#define	mmx_r2r(op, regs, regd) \
-	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
-
-#define	mmx_m2m(op, mems, memd) \
-	__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
-			      #op " %1, %%mm0\n\t" \
-			      "movq %%mm0, %0" \
-			      : "=X" (memd) \
-			      : "X" (mems))
-
-#endif
-
-
-/*	1x64 MOVe Quadword
-	(this is both a load and a store...
-	 in fact, it is the only way to store)
-*/
-#define	movq_m2r(var, reg)	mmx_m2r(movq, var, reg)
-#define	movq_r2m(reg, var)	mmx_r2m(movq, reg, var)
-#define	movq_r2r(regs, regd)	mmx_r2r(movq, regs, regd)
-#define	movq(vars, vard) \
-	__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
-			      "movq %%mm0, %0" \
-			      : "=X" (vard) \
-			      : "X" (vars))
-
-
-/*	1x32 MOVe Doubleword
-	(like movq, this is both load and store...
-	 but is most useful for moving things between
-	 mmx registers and ordinary registers)
-*/
-#define	movd_m2r(var, reg)	mmx_m2r(movd, var, reg)
-#define	movd_r2m(reg, var)	mmx_r2m(movd, reg, var)
-#define	movd_r2r(regs, regd)	mmx_r2r(movd, regs, regd)
-#define	movd(vars, vard) \
-	__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
-			      "movd %%mm0, %0" \
-			      : "=X" (vard) \
-			      : "X" (vars))
-
-
-/*	2x32, 4x16, and 8x8 Parallel ADDs
-*/
-#define	paddd_m2r(var, reg)	mmx_m2r(paddd, var, reg)
-#define	paddd_r2r(regs, regd)	mmx_r2r(paddd, regs, regd)
-#define	paddd(vars, vard)	mmx_m2m(paddd, vars, vard)
-
-#define	paddw_m2r(var, reg)	mmx_m2r(paddw, var, reg)
-#define	paddw_r2r(regs, regd)	mmx_r2r(paddw, regs, regd)
-#define	paddw(vars, vard)	mmx_m2m(paddw, vars, vard)
-
-#define	paddb_m2r(var, reg)	mmx_m2r(paddb, var, reg)
-#define	paddb_r2r(regs, regd)	mmx_r2r(paddb, regs, regd)
-#define	paddb(vars, vard)	mmx_m2m(paddb, vars, vard)
-
-
-/*	4x16 and 8x8 Parallel ADDs using Saturation arithmetic
-*/
-#define	paddsw_m2r(var, reg)	mmx_m2r(paddsw, var, reg)
-#define	paddsw_r2r(regs, regd)	mmx_r2r(paddsw, regs, regd)
-#define	paddsw(vars, vard)	mmx_m2m(paddsw, vars, vard)
-
-#define	paddsb_m2r(var, reg)	mmx_m2r(paddsb, var, reg)
-#define	paddsb_r2r(regs, regd)	mmx_r2r(paddsb, regs, regd)
-#define	paddsb(vars, vard)	mmx_m2m(paddsb, vars, vard)
-
-
-/*	4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
-*/
-#define	paddusw_m2r(var, reg)	mmx_m2r(paddusw, var, reg)
-#define	paddusw_r2r(regs, regd)	mmx_r2r(paddusw, regs, regd)
-#define	paddusw(vars, vard)	mmx_m2m(paddusw, vars, vard)
-
-#define	paddusb_m2r(var, reg)	mmx_m2r(paddusb, var, reg)
-#define	paddusb_r2r(regs, regd)	mmx_r2r(paddusb, regs, regd)
-#define	paddusb(vars, vard)	mmx_m2m(paddusb, vars, vard)
-
-
-/*	2x32, 4x16, and 8x8 Parallel SUBs
-*/
-#define	psubd_m2r(var, reg)	mmx_m2r(psubd, var, reg)
-#define	psubd_r2r(regs, regd)	mmx_r2r(psubd, regs, regd)
-#define	psubd(vars, vard)	mmx_m2m(psubd, vars, vard)
-
-#define	psubw_m2r(var, reg)	mmx_m2r(psubw, var, reg)
-#define	psubw_r2r(regs, regd)	mmx_r2r(psubw, regs, regd)
-#define	psubw(vars, vard)	mmx_m2m(psubw, vars, vard)
-
-#define	psubb_m2r(var, reg)	mmx_m2r(psubb, var, reg)
-#define	psubb_r2r(regs, regd)	mmx_r2r(psubb, regs, regd)
-#define	psubb(vars, vard)	mmx_m2m(psubb, vars, vard)
-
-
-/*	4x16 and 8x8 Parallel SUBs using Saturation arithmetic
-*/
-#define	psubsw_m2r(var, reg)	mmx_m2r(psubsw, var, reg)
-#define	psubsw_r2r(regs, regd)	mmx_r2r(psubsw, regs, regd)
-#define	psubsw(vars, vard)	mmx_m2m(psubsw, vars, vard)
-
-#define	psubsb_m2r(var, reg)	mmx_m2r(psubsb, var, reg)
-#define	psubsb_r2r(regs, regd)	mmx_r2r(psubsb, regs, regd)
-#define	psubsb(vars, vard)	mmx_m2m(psubsb, vars, vard)
-
-
-/*	4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
-*/
-#define	psubusw_m2r(var, reg)	mmx_m2r(psubusw, var, reg)
-#define	psubusw_r2r(regs, regd)	mmx_r2r(psubusw, regs, regd)
-#define	psubusw(vars, vard)	mmx_m2m(psubusw, vars, vard)
-
-#define	psubusb_m2r(var, reg)	mmx_m2r(psubusb, var, reg)
-#define	psubusb_r2r(regs, regd)	mmx_r2r(psubusb, regs, regd)
-#define	psubusb(vars, vard)	mmx_m2m(psubusb, vars, vard)
-
-
-/*	4x16 Parallel MULs giving Low 4x16 portions of results
-*/
-#define	pmullw_m2r(var, reg)	mmx_m2r(pmullw, var, reg)
-#define	pmullw_r2r(regs, regd)	mmx_r2r(pmullw, regs, regd)
-#define	pmullw(vars, vard)	mmx_m2m(pmullw, vars, vard)
-
-
-/*	4x16 Parallel MULs giving High 4x16 portions of results
-*/
-#define	pmulhw_m2r(var, reg)	mmx_m2r(pmulhw, var, reg)
-#define	pmulhw_r2r(regs, regd)	mmx_r2r(pmulhw, regs, regd)
-#define	pmulhw(vars, vard)	mmx_m2m(pmulhw, vars, vard)
-
-
-/*	4x16->2x32 Parallel Mul-ADD
-	(muls like pmullw, then adds adjacent 16-bit fields
-	 in the multiply result to make the final 2x32 result)
-*/
-#define	pmaddwd_m2r(var, reg)	mmx_m2r(pmaddwd, var, reg)
-#define	pmaddwd_r2r(regs, regd)	mmx_r2r(pmaddwd, regs, regd)
-#define	pmaddwd(vars, vard)	mmx_m2m(pmaddwd, vars, vard)
-
-
-/*	1x64 bitwise AND
-*/
-#ifdef	BROKEN_PAND
-#define	pand_m2r(var, reg) \
-	{ \
-		mmx_m2r(pandn, (mmx_t) -1LL, reg); \
-		mmx_m2r(pandn, var, reg); \
-	}
-#define	pand_r2r(regs, regd) \
-	{ \
-		mmx_m2r(pandn, (mmx_t) -1LL, regd); \
-		mmx_r2r(pandn, regs, regd) \
-	}
-#define	pand(vars, vard) \
-	{ \
-		movq_m2r(vard, mm0); \
-		mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
-		mmx_m2r(pandn, vars, mm0); \
-		movq_r2m(mm0, vard); \
-	}
-#else
-#define	pand_m2r(var, reg)	mmx_m2r(pand, var, reg)
-#define	pand_r2r(regs, regd)	mmx_r2r(pand, regs, regd)
-#define	pand(vars, vard)	mmx_m2m(pand, vars, vard)
-#endif
-
-
-/*	1x64 bitwise AND with Not the destination
-*/
-#define	pandn_m2r(var, reg)	mmx_m2r(pandn, var, reg)
-#define	pandn_r2r(regs, regd)	mmx_r2r(pandn, regs, regd)
-#define	pandn(vars, vard)	mmx_m2m(pandn, vars, vard)
-
-
-/*	1x64 bitwise OR
-*/
-#define	por_m2r(var, reg)	mmx_m2r(por, var, reg)
-#define	por_r2r(regs, regd)	mmx_r2r(por, regs, regd)
-#define	por(vars, vard)	mmx_m2m(por, vars, vard)
-
-
-/*	1x64 bitwise eXclusive OR
-*/
-#define	pxor_m2r(var, reg)	mmx_m2r(pxor, var, reg)
-#define	pxor_r2r(regs, regd)	mmx_r2r(pxor, regs, regd)
-#define	pxor(vars, vard)	mmx_m2m(pxor, vars, vard)
-
-
-/*	2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
-	(resulting fields are either 0 or -1)
-*/
-#define	pcmpeqd_m2r(var, reg)	mmx_m2r(pcmpeqd, var, reg)
-#define	pcmpeqd_r2r(regs, regd)	mmx_r2r(pcmpeqd, regs, regd)
-#define	pcmpeqd(vars, vard)	mmx_m2m(pcmpeqd, vars, vard)
-
-#define	pcmpeqw_m2r(var, reg)	mmx_m2r(pcmpeqw, var, reg)
-#define	pcmpeqw_r2r(regs, regd)	mmx_r2r(pcmpeqw, regs, regd)
-#define	pcmpeqw(vars, vard)	mmx_m2m(pcmpeqw, vars, vard)
-
-#define	pcmpeqb_m2r(var, reg)	mmx_m2r(pcmpeqb, var, reg)
-#define	pcmpeqb_r2r(regs, regd)	mmx_r2r(pcmpeqb, regs, regd)
-#define	pcmpeqb(vars, vard)	mmx_m2m(pcmpeqb, vars, vard)
-
-
-/*	2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
-	(resulting fields are either 0 or -1)
-*/
-#define	pcmpgtd_m2r(var, reg)	mmx_m2r(pcmpgtd, var, reg)
-#define	pcmpgtd_r2r(regs, regd)	mmx_r2r(pcmpgtd, regs, regd)
-#define	pcmpgtd(vars, vard)	mmx_m2m(pcmpgtd, vars, vard)
-
-#define	pcmpgtw_m2r(var, reg)	mmx_m2r(pcmpgtw, var, reg)
-#define	pcmpgtw_r2r(regs, regd)	mmx_r2r(pcmpgtw, regs, regd)
-#define	pcmpgtw(vars, vard)	mmx_m2m(pcmpgtw, vars, vard)
-
-#define	pcmpgtb_m2r(var, reg)	mmx_m2r(pcmpgtb, var, reg)
-#define	pcmpgtb_r2r(regs, regd)	mmx_r2r(pcmpgtb, regs, regd)
-#define	pcmpgtb(vars, vard)	mmx_m2m(pcmpgtb, vars, vard)
-
-
-/*	1x64, 2x32, and 4x16 Parallel Shift Left Logical
-*/
-#define	psllq_i2r(imm, reg)	mmx_i2r(psllq, imm, reg)
-#define	psllq_m2r(var, reg)	mmx_m2r(psllq, var, reg)
-#define	psllq_r2r(regs, regd)	mmx_r2r(psllq, regs, regd)
-#define	psllq(vars, vard)	mmx_m2m(psllq, vars, vard)
-
-#define	pslld_i2r(imm, reg)	mmx_i2r(pslld, imm, reg)
-#define	pslld_m2r(var, reg)	mmx_m2r(pslld, var, reg)
-#define	pslld_r2r(regs, regd)	mmx_r2r(pslld, regs, regd)
-#define	pslld(vars, vard)	mmx_m2m(pslld, vars, vard)
-
-#define	psllw_i2r(imm, reg)	mmx_i2r(psllw, imm, reg)
-#define	psllw_m2r(var, reg)	mmx_m2r(psllw, var, reg)
-#define	psllw_r2r(regs, regd)	mmx_r2r(psllw, regs, regd)
-#define	psllw(vars, vard)	mmx_m2m(psllw, vars, vard)
-
-
-/*	1x64, 2x32, and 4x16 Parallel Shift Right Logical
-*/
-#define	psrlq_i2r(imm, reg)	mmx_i2r(psrlq, imm, reg)
-#define	psrlq_m2r(var, reg)	mmx_m2r(psrlq, var, reg)
-#define	psrlq_r2r(regs, regd)	mmx_r2r(psrlq, regs, regd)
-#define	psrlq(vars, vard)	mmx_m2m(psrlq, vars, vard)
-
-#define	psrld_i2r(imm, reg)	mmx_i2r(psrld, imm, reg)
-#define	psrld_m2r(var, reg)	mmx_m2r(psrld, var, reg)
-#define	psrld_r2r(regs, regd)	mmx_r2r(psrld, regs, regd)
-#define	psrld(vars, vard)	mmx_m2m(psrld, vars, vard)
-
-#define	psrlw_i2r(imm, reg)	mmx_i2r(psrlw, imm, reg)
-#define	psrlw_m2r(var, reg)	mmx_m2r(psrlw, var, reg)
-#define	psrlw_r2r(regs, regd)	mmx_r2r(psrlw, regs, regd)
-#define	psrlw(vars, vard)	mmx_m2m(psrlw, vars, vard)
-
-
-/*	2x32 and 4x16 Parallel Shift Right Arithmetic
-*/
-#define	psrad_i2r(imm, reg)	mmx_i2r(psrad, imm, reg)
-#define	psrad_m2r(var, reg)	mmx_m2r(psrad, var, reg)
-#define	psrad_r2r(regs, regd)	mmx_r2r(psrad, regs, regd)
-#define	psrad(vars, vard)	mmx_m2m(psrad, vars, vard)
-
-#define	psraw_i2r(imm, reg)	mmx_i2r(psraw, imm, reg)
-#define	psraw_m2r(var, reg)	mmx_m2r(psraw, var, reg)
-#define	psraw_r2r(regs, regd)	mmx_r2r(psraw, regs, regd)
-#define	psraw(vars, vard)	mmx_m2m(psraw, vars, vard)
-
-
-/*	2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
-	(packs source and dest fields into dest in that order)
-*/
-#define	packssdw_m2r(var, reg)	mmx_m2r(packssdw, var, reg)
-#define	packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
-#define	packssdw(vars, vard)	mmx_m2m(packssdw, vars, vard)
-
-#define	packsswb_m2r(var, reg)	mmx_m2r(packsswb, var, reg)
-#define	packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
-#define	packsswb(vars, vard)	mmx_m2m(packsswb, vars, vard)
-
-
-/*	4x16->8x8 PACK and Unsigned Saturate
-	(packs source and dest fields into dest in that order)
-*/
-#define	packuswb_m2r(var, reg)	mmx_m2r(packuswb, var, reg)
-#define	packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
-#define	packuswb(vars, vard)	mmx_m2m(packuswb, vars, vard)
-
-
-/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
-	(interleaves low half of dest with low half of source
-	 as padding in each result field)
-*/
-#define	punpckldq_m2r(var, reg)	mmx_m2r(punpckldq, var, reg)
-#define	punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
-#define	punpckldq(vars, vard)	mmx_m2m(punpckldq, vars, vard)
-
-#define	punpcklwd_m2r(var, reg)	mmx_m2r(punpcklwd, var, reg)
-#define	punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
-#define	punpcklwd(vars, vard)	mmx_m2m(punpcklwd, vars, vard)
-
-#define	punpcklbw_m2r(var, reg)	mmx_m2r(punpcklbw, var, reg)
-#define	punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
-#define	punpcklbw(vars, vard)	mmx_m2m(punpcklbw, vars, vard)
-
-
-/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
-	(interleaves high half of dest with high half of source
-	 as padding in each result field)
-*/
-#define	punpckhdq_m2r(var, reg)	mmx_m2r(punpckhdq, var, reg)
-#define	punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
-#define	punpckhdq(vars, vard)	mmx_m2m(punpckhdq, vars, vard)
-
-#define	punpckhwd_m2r(var, reg)	mmx_m2r(punpckhwd, var, reg)
-#define	punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
-#define	punpckhwd(vars, vard)	mmx_m2m(punpckhwd, vars, vard)
-
-#define	punpckhbw_m2r(var, reg)	mmx_m2r(punpckhbw, var, reg)
-#define	punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
-#define	punpckhbw(vars, vard)	mmx_m2m(punpckhbw, vars, vard)
-
-
-/*	Empty MMx State
-	(used to clean-up when going from mmx to float use
-	 of the registers that are shared by both; note that
-	 there is no float-to-mmx operation needed, because
-	 only the float tag word info is corruptible)
-*/
-#ifdef	MMX_TRACE
-
-#define	emms() \
-	{ \
-		printf("emms()\n"); \
-		__asm__ __volatile__ ("emms"); \
-	}
-
-#else
-
-#define	emms()			__asm__ __volatile__ ("emms")
-
-#endif
-
-#endif
-/* vi: set ts=4 sw=4 expandtab: */