Mercurial > sdl-ios-xcode
changeset 5159:307ccc9c135e
Made it possible to create a texture of any format, even if not supported by the renderer.
This allows me to reduce the set of formats supported by the renderers to the most optimal set, for a nice speed boost.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Thu, 03 Feb 2011 00:19:40 -0800 |
parents | f3ebd1950442 |
children | 657543cc92f9 |
files | VisualC/SDL/SDL_VS2008.vcproj VisualC/SDL/SDL_VS2010.vcxproj Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj Xcode/SDL/SDL.xcodeproj/project.pbxproj include/SDL_pixels.h include/SDL_rect.h include/SDL_render.h src/SDL_compat.c src/render/SDL_render.c src/render/SDL_sysrender.h src/render/SDL_yuv_mmx.c src/render/SDL_yuv_sw.c src/render/SDL_yuv_sw_c.h src/render/direct3d/SDL_d3drender.c src/render/mmx.h src/render/opengl/SDL_renderer_gl.c src/render/opengles/SDL_renderer_gles.c src/render/software/SDL_renderer_sw.c src/video/SDL_leaks.h src/video/SDL_rect.c src/video/SDL_yuv_mmx.c src/video/SDL_yuv_sw.c src/video/SDL_yuv_sw_c.h src/video/mmx.h |
diffstat | 24 files changed, 3051 insertions(+), 3781 deletions(-) [+] |
line wrap: on
line diff
--- a/VisualC/SDL/SDL_VS2008.vcproj Wed Feb 02 22:55:12 2011 -0800 +++ b/VisualC/SDL/SDL_VS2008.vcproj Thu Feb 03 00:19:40 2011 -0800 @@ -607,7 +607,7 @@ > </File> <File - RelativePath="..\..\src\video\mmx.h" + RelativePath="..\..\src\render\mmx.h" > </File> <File @@ -1251,15 +1251,15 @@ > </File> <File - RelativePath="..\..\src\video\SDL_yuv_mmx.c" + RelativePath="..\..\src\render\SDL_yuv_mmx.c" > </File> <File - RelativePath="..\..\src\video\SDL_yuv_sw.c" + RelativePath="..\..\src\render\SDL_yuv_sw.c" > </File> <File - RelativePath="..\..\src\video\SDL_yuv_sw_c.h" + RelativePath="..\..\src\render\SDL_yuv_sw_c.h" > </File> <File
--- a/VisualC/SDL/SDL_VS2010.vcxproj Wed Feb 02 22:55:12 2011 -0800 +++ b/VisualC/SDL/SDL_VS2010.vcxproj Thu Feb 03 00:19:40 2011 -0800 @@ -282,8 +282,9 @@ <ClInclude Include="..\..\src\events\SDL_touch_c.h" /> <ClInclude Include="..\..\src\libm\math.h" /> <ClInclude Include="..\..\src\libm\math_private.h" /> + <ClInclude Include="..\..\src\render\mmx.h" /> <ClInclude Include="..\..\src\render\SDL_sysrender.h" /> - <ClInclude Include="..\..\src\video\mmx.h" /> + <ClInclude Include="..\..\src\render\SDL_yuv_sw_c.h" /> <ClInclude Include="..\..\src\video\SDL_alphamult.h" /> <ClInclude Include="..\..\src\audio\SDL_audio_c.h" /> <ClInclude Include="..\..\src\audio\SDL_audiodev_c.h" /> @@ -339,7 +340,6 @@ <ClInclude Include="..\..\src\video\windows\SDL_windowsvideo.h" /> <ClInclude Include="..\..\src\video\windows\SDL_windowswindow.h" /> <ClInclude Include="..\..\src\events\SDL_windowevents_c.h" /> - <ClInclude Include="..\..\src\video\SDL_yuv_sw_c.h" /> <ClInclude Include="..\..\src\video\windows\wmmsg.h" /> </ItemGroup> <ItemGroup> @@ -365,6 +365,8 @@ <ClCompile Include="..\..\src\render\direct3d\SDL_d3drender.c" /> <ClCompile Include="..\..\src\render\opengl\SDL_renderer_gl.c" /> <ClCompile Include="..\..\src\render\SDL_render.c" /> + <ClCompile Include="..\..\src\render\SDL_yuv_mmx.c" /> + <ClCompile Include="..\..\src\render\SDL_yuv_sw.c" /> <ClCompile Include="..\..\src\render\software\SDL_renderer_sw.c" /> <ClCompile Include="..\..\src\SDL.c" /> <ClCompile Include="..\..\src\video\SDL_alphamult.c" /> @@ -452,8 +454,6 @@ <ClCompile Include="..\..\src\video\windows\SDL_windowsvideo.c" /> <ClCompile Include="..\..\src\video\windows\SDL_windowswindow.c" /> <ClCompile Include="..\..\src\events\SDL_windowevents.c" /> - <ClCompile Include="..\..\src\video\SDL_yuv_mmx.c" /> - <ClCompile Include="..\..\src\video\SDL_yuv_sw.c" /> </ItemGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <ImportGroup Label="ExtensionTargets">
--- a/Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj Wed Feb 02 22:55:12 2011 -0800 +++ b/Xcode-iPhoneOS/SDL/SDLiPhoneOS.xcodeproj/project.pbxproj Thu Feb 03 00:19:40 2011 -0800 @@ -73,6 +73,10 @@ 043DD77010FD8A0000DED673 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 043DD76C10FD8A0000DED673 /* SDL_alphamult.h */; }; 043DD77110FD8A0000DED673 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 043DD76D10FD8A0000DED673 /* SDL_blendfillrect.c */; }; 043DD77210FD8A0000DED673 /* SDL_drawrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 043DD76E10FD8A0000DED673 /* SDL_drawrect.c */; }; + 04409BA612FA989600FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409BA212FA989600FB9AA8 /* mmx.h */; }; + 04409BA712FA989600FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */; }; + 04409BA812FA989600FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */; }; + 04409BA912FA989600FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */; }; 04461DEE0EA76BA3006C462D /* SDL_haptic.h in Headers */ = {isa = PBXBuildFile; fileRef = 04461DED0EA76BA3006C462D /* SDL_haptic.h */; settings = {ATTRIBUTES = (Public, ); }; }; 044E5FB511E6069F0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5FB311E6069F0076F181 /* SDL_clipboard.h */; settings = {ATTRIBUTES = (Public, ); }; }; 044E5FB611E6069F0076F181 /* SDL_input.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5FB411E6069F0076F181 /* SDL_input.h */; settings = {ATTRIBUTES = (Public, ); }; }; @@ -223,9 +227,6 @@ FDA684660DF2374E00F98A1A /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA683190DF2374E00F98A1A /* SDL_surface.c */; }; FDA684670DF2374E00F98A1A /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */; }; FDA684680DF2374E00F98A1A /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831B0DF2374E00F98A1A /* SDL_video.c */; }; - FDA684690DF2374E00F98A1A /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */; }; - FDA6846A0DF2374E00F98A1A /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */; }; - FDA6846B0DF2374E00F98A1A /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */; }; FDA685FB0DF244C800F98A1A /* SDL_nullevents.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA685F50DF244C800F98A1A /* SDL_nullevents.c */; }; FDA685FC0DF244C800F98A1A /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = FDA685F60DF244C800F98A1A /* SDL_nullevents_c.h */; }; FDA685FF0DF244C800F98A1A /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = FDA685F90DF244C800F98A1A /* SDL_nullvideo.c */; }; @@ -328,6 +329,10 @@ 043DD76C10FD8A0000DED673 /* SDL_alphamult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_alphamult.h; sourceTree = "<group>"; }; 043DD76D10FD8A0000DED673 /* SDL_blendfillrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_blendfillrect.c; sourceTree = "<group>"; }; 043DD76E10FD8A0000DED673 /* SDL_drawrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_drawrect.c; sourceTree = "<group>"; }; + 04409BA212FA989600FB9AA8 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; }; + 04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; }; + 04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; }; + 04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; }; 04461DED0EA76BA3006C462D /* SDL_haptic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_haptic.h; path = ../../include/SDL_haptic.h; sourceTree = SOURCE_ROOT; }; 044E5FB311E6069F0076F181 /* SDL_clipboard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_clipboard.h; path = ../../include/SDL_clipboard.h; sourceTree = SOURCE_ROOT; }; 044E5FB411E6069F0076F181 /* SDL_input.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_input.h; path = ../../include/SDL_input.h; sourceTree = SOURCE_ROOT; }; @@ -505,9 +510,6 @@ FDA683190DF2374E00F98A1A /* SDL_surface.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_surface.c; sourceTree = "<group>"; }; FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysvideo.h; sourceTree = "<group>"; }; FDA6831B0DF2374E00F98A1A /* SDL_video.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_video.c; sourceTree = "<group>"; }; - FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; }; - FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; }; - FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; }; FDA685F50DF244C800F98A1A /* SDL_nullevents.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullevents.c; sourceTree = "<group>"; }; FDA685F60DF244C800F98A1A /* SDL_nullevents_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullevents_c.h; sourceTree = "<group>"; }; FDA685F90DF244C800F98A1A /* SDL_nullvideo.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullvideo.c; sourceTree = "<group>"; }; @@ -659,9 +661,13 @@ isa = PBXGroup; children = ( 041B2CE812FA0F680087D585 /* opengles */, + 041B2CEC12FA0F680087D585 /* software */, + 04409BA212FA989600FB9AA8 /* mmx.h */, 041B2CEA12FA0F680087D585 /* SDL_render.c */, 041B2CEB12FA0F680087D585 /* SDL_sysrender.h */, - 041B2CEC12FA0F680087D585 /* software */, + 04409BA312FA989600FB9AA8 /* SDL_yuv_mmx.c */, + 04409BA412FA989600FB9AA8 /* SDL_yuv_sw_c.h */, + 04409BA512FA989600FB9AA8 /* SDL_yuv_sw.c */, ); name = render; path = ../../src/render; @@ -1113,9 +1119,6 @@ FDA683190DF2374E00F98A1A /* SDL_surface.c */, FDA6831A0DF2374E00F98A1A /* SDL_sysvideo.h */, FDA6831B0DF2374E00F98A1A /* SDL_video.c */, - FDA6831C0DF2374E00F98A1A /* SDL_yuv_mmx.c */, - FDA6831D0DF2374E00F98A1A /* SDL_yuv_sw.c */, - FDA6831E0DF2374E00F98A1A /* SDL_yuv_sw_c.h */, ); name = video; path = ../../src/video; @@ -1179,7 +1182,6 @@ FDA6845D0DF2374E00F98A1A /* SDL_pixels_c.h in Headers */, FDA684630DF2374E00F98A1A /* SDL_RLEaccel_c.h in Headers */, FDA684670DF2374E00F98A1A /* SDL_sysvideo.h in Headers */, - FDA6846B0DF2374E00F98A1A /* SDL_yuv_sw_c.h in Headers */, FDA685FC0DF244C800F98A1A /* SDL_nullevents_c.h in Headers */, FDA686000DF244C800F98A1A /* SDL_nullvideo.h in Headers */, FD5F9D300E0E08B3008E885B /* SDL_joystick_c.h in Headers */, @@ -1220,6 +1222,8 @@ 04FFAB9812E23BDC00BA343D /* SDL_shape.h in Headers */, 041B2CD912FA0E9E0087D585 /* SDL_render.h in Headers */, 041B2CF212FA0F680087D585 /* SDL_sysrender.h in Headers */, + 04409BA612FA989600FB9AA8 /* mmx.h in Headers */, + 04409BA812FA989600FB9AA8 /* SDL_yuv_sw_c.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -1427,8 +1431,6 @@ FDA684640DF2374E00F98A1A /* SDL_stretch.c in Sources */, FDA684660DF2374E00F98A1A /* SDL_surface.c in Sources */, FDA684680DF2374E00F98A1A /* SDL_video.c in Sources */, - FDA684690DF2374E00F98A1A /* SDL_yuv_mmx.c in Sources */, - FDA6846A0DF2374E00F98A1A /* SDL_yuv_sw.c in Sources */, FDA685FB0DF244C800F98A1A /* SDL_nullevents.c in Sources */, FDA685FF0DF244C800F98A1A /* SDL_nullvideo.c in Sources */, FD5F9D2F0E0E08B3008E885B /* SDL_joystick.c in Sources */, @@ -1469,6 +1471,8 @@ 041B2CF012FA0F680087D585 /* SDL_renderer_gles.c in Sources */, 041B2CF112FA0F680087D585 /* SDL_render.c in Sources */, 041B2CF312FA0F680087D585 /* SDL_renderer_sw.c in Sources */, + 04409BA712FA989600FB9AA8 /* SDL_yuv_mmx.c in Sources */, + 04409BA912FA989600FB9AA8 /* SDL_yuv_sw.c in Sources */, ); runOnlyForDeploymentPostprocessing = 0; };
--- a/Xcode/SDL/SDL.xcodeproj/project.pbxproj Wed Feb 02 22:55:12 2011 -0800 +++ b/Xcode/SDL/SDL.xcodeproj/project.pbxproj Thu Feb 03 00:19:40 2011 -0800 @@ -131,6 +131,14 @@ 041B2CAB12FA0D680087D585 /* SDL_render.c in Sources */ = {isa = PBXBuildFile; fileRef = 041B2C9E12FA0D680087D585 /* SDL_render.c */; }; 041B2CAC12FA0D680087D585 /* SDL_sysrender.h in Headers */ = {isa = PBXBuildFile; fileRef = 041B2C9F12FA0D680087D585 /* SDL_sysrender.h */; }; 041B2CAD12FA0D680087D585 /* SDL_renderer_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 041B2CA112FA0D680087D585 /* SDL_renderer_sw.c */; }; + 04409B9112FA97ED00FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8D12FA97ED00FB9AA8 /* mmx.h */; }; + 04409B9212FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */; }; + 04409B9312FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */; }; + 04409B9412FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */; }; + 04409B9512FA97ED00FB9AA8 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8D12FA97ED00FB9AA8 /* mmx.h */; }; + 04409B9612FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */; }; + 04409B9712FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */; }; + 04409B9812FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */; }; 044E5F8511E6051C0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5F8411E6051C0076F181 /* SDL_clipboard.h */; settings = {ATTRIBUTES = (Public, ); }; }; 044E5F8611E6051C0076F181 /* SDL_clipboard.h in Headers */ = {isa = PBXBuildFile; fileRef = 044E5F8411E6051C0076F181 /* SDL_clipboard.h */; }; 0469A10B12EE4BF100B846D6 /* SDL_blendmode.h in Headers */ = {isa = PBXBuildFile; fileRef = 0469A10912EE4BF100B846D6 /* SDL_blendmode.h */; settings = {ATTRIBUTES = (Public, ); }; }; @@ -244,7 +252,6 @@ 04BD011812E6671800899322 /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */; }; 04BD011B12E6671800899322 /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */; }; 04BD011C12E6671800899322 /* SDL_nullvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEED12E6671800899322 /* SDL_nullvideo.h */; }; - 04BD013212E6671800899322 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF0412E6671800899322 /* mmx.h */; }; 04BD016F12E6671800899322 /* SDL_alphamult.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4812E6671800899322 /* SDL_alphamult.c */; }; 04BD017012E6671800899322 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF4912E6671800899322 /* SDL_alphamult.h */; }; 04BD017112E6671800899322 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */; }; @@ -285,9 +292,6 @@ 04BD019B12E6671800899322 /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7412E6671800899322 /* SDL_surface.c */; }; 04BD019C12E6671800899322 /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7512E6671800899322 /* SDL_sysvideo.h */; }; 04BD019D12E6671800899322 /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7612E6671800899322 /* SDL_video.c */; }; - 04BD019E12E6671800899322 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */; }; - 04BD019F12E6671800899322 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */; }; - 04BD01A012E6671800899322 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */; }; 04BD01DB12E6671800899322 /* imKStoUCS.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFB812E6671800899322 /* imKStoUCS.c */; }; 04BD01DC12E6671800899322 /* imKStoUCS.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFFB912E6671800899322 /* imKStoUCS.h */; }; 04BD01DD12E6671800899322 /* SDL_x11clipboard.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */; }; @@ -457,7 +461,6 @@ 04BD033212E6671800899322 /* SDL_nullevents_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */; }; 04BD033512E6671800899322 /* SDL_nullvideo.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */; }; 04BD033612E6671800899322 /* SDL_nullvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFEED12E6671800899322 /* SDL_nullvideo.h */; }; - 04BD034C12E6671800899322 /* mmx.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF0412E6671800899322 /* mmx.h */; }; 04BD038912E6671800899322 /* SDL_alphamult.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4812E6671800899322 /* SDL_alphamult.c */; }; 04BD038A12E6671800899322 /* SDL_alphamult.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF4912E6671800899322 /* SDL_alphamult.h */; }; 04BD038B12E6671800899322 /* SDL_blendfillrect.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */; }; @@ -498,9 +501,6 @@ 04BD03B512E6671800899322 /* SDL_surface.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7412E6671800899322 /* SDL_surface.c */; }; 04BD03B612E6671800899322 /* SDL_sysvideo.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7512E6671800899322 /* SDL_sysvideo.h */; }; 04BD03B712E6671800899322 /* SDL_video.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7612E6671800899322 /* SDL_video.c */; }; - 04BD03B812E6671800899322 /* SDL_yuv_mmx.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */; }; - 04BD03B912E6671800899322 /* SDL_yuv_sw.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */; }; - 04BD03BA12E6671800899322 /* SDL_yuv_sw_c.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */; }; 04BD03F312E6671800899322 /* imKStoUCS.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFB812E6671800899322 /* imKStoUCS.c */; }; 04BD03F412E6671800899322 /* imKStoUCS.h in Headers */ = {isa = PBXBuildFile; fileRef = 04BDFFB912E6671800899322 /* imKStoUCS.h */; }; 04BD03F512E6671800899322 /* SDL_x11clipboard.c in Sources */ = {isa = PBXBuildFile; fileRef = 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */; }; @@ -701,6 +701,10 @@ 041B2C9E12FA0D680087D585 /* SDL_render.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_render.c; sourceTree = "<group>"; }; 041B2C9F12FA0D680087D585 /* SDL_sysrender.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysrender.h; sourceTree = "<group>"; }; 041B2CA112FA0D680087D585 /* SDL_renderer_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_renderer_sw.c; sourceTree = "<group>"; }; + 04409B8D12FA97ED00FB9AA8 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; }; + 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; }; + 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; }; + 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; }; 044E5F8411E6051C0076F181 /* SDL_clipboard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_clipboard.h; path = ../../include/SDL_clipboard.h; sourceTree = SOURCE_ROOT; }; 0469A10912EE4BF100B846D6 /* SDL_blendmode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_blendmode.h; path = ../../include/SDL_blendmode.h; sourceTree = SOURCE_ROOT; }; 04BDFD7412E6671700899322 /* SDL_atomic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_atomic.c; sourceTree = "<group>"; }; @@ -814,7 +818,6 @@ 04BDFEE912E6671800899322 /* SDL_nullevents_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullevents_c.h; sourceTree = "<group>"; }; 04BDFEEC12E6671800899322 /* SDL_nullvideo.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_nullvideo.c; sourceTree = "<group>"; }; 04BDFEED12E6671800899322 /* SDL_nullvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_nullvideo.h; sourceTree = "<group>"; }; - 04BDFF0412E6671800899322 /* mmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmx.h; sourceTree = "<group>"; }; 04BDFF4812E6671800899322 /* SDL_alphamult.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_alphamult.c; sourceTree = "<group>"; }; 04BDFF4912E6671800899322 /* SDL_alphamult.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_alphamult.h; sourceTree = "<group>"; }; 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_blendfillrect.c; sourceTree = "<group>"; }; @@ -855,9 +858,6 @@ 04BDFF7412E6671800899322 /* SDL_surface.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_surface.c; sourceTree = "<group>"; }; 04BDFF7512E6671800899322 /* SDL_sysvideo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_sysvideo.h; sourceTree = "<group>"; }; 04BDFF7612E6671800899322 /* SDL_video.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_video.c; sourceTree = "<group>"; }; - 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_mmx.c; sourceTree = "<group>"; }; - 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_yuv_sw.c; sourceTree = "<group>"; }; - 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SDL_yuv_sw_c.h; sourceTree = "<group>"; }; 04BDFFB812E6671800899322 /* imKStoUCS.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = imKStoUCS.c; sourceTree = "<group>"; }; 04BDFFB912E6671800899322 /* imKStoUCS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = imKStoUCS.h; sourceTree = "<group>"; }; 04BDFFBA12E6671800899322 /* SDL_x11clipboard.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = SDL_x11clipboard.c; sourceTree = "<group>"; }; @@ -1188,8 +1188,12 @@ children = ( 041B2C9A12FA0D680087D585 /* opengl */, 041B2CA012FA0D680087D585 /* software */, + 04409B8D12FA97ED00FB9AA8 /* mmx.h */, 041B2C9E12FA0D680087D585 /* SDL_render.c */, 041B2C9F12FA0D680087D585 /* SDL_sysrender.h */, + 04409B8E12FA97ED00FB9AA8 /* SDL_yuv_mmx.c */, + 04409B8F12FA97ED00FB9AA8 /* SDL_yuv_sw_c.h */, + 04409B9012FA97ED00FB9AA8 /* SDL_yuv_sw.c */, ); name = render; path = ../../src/render; @@ -1483,7 +1487,6 @@ 04BDFEE712E6671800899322 /* dummy */, 04BDFFB712E6671800899322 /* x11 */, 04BDFFD712E6671800899322 /* Xext */, - 04BDFF0412E6671800899322 /* mmx.h */, 04BDFF4812E6671800899322 /* SDL_alphamult.c */, 04BDFF4912E6671800899322 /* SDL_alphamult.h */, 04BDFF4A12E6671800899322 /* SDL_blendfillrect.c */, @@ -1524,9 +1527,6 @@ 04BDFF7412E6671800899322 /* SDL_surface.c */, 04BDFF7512E6671800899322 /* SDL_sysvideo.h */, 04BDFF7612E6671800899322 /* SDL_video.c */, - 04BDFF7712E6671800899322 /* SDL_yuv_mmx.c */, - 04BDFF7812E6671800899322 /* SDL_yuv_sw.c */, - 04BDFF7912E6671800899322 /* SDL_yuv_sw_c.h */, ); name = video; path = ../../src/video; @@ -1893,7 +1893,6 @@ 04BD010312E6671800899322 /* SDL_cocoawindow.h in Headers */, 04BD011812E6671800899322 /* SDL_nullevents_c.h in Headers */, 04BD011C12E6671800899322 /* SDL_nullvideo.h in Headers */, - 04BD013212E6671800899322 /* mmx.h in Headers */, 04BD017012E6671800899322 /* SDL_alphamult.h in Headers */, 04BD017612E6671800899322 /* SDL_blit.h in Headers */, 04BD017B12E6671800899322 /* SDL_blit_auto.h in Headers */, @@ -1907,7 +1906,6 @@ 04BD019712E6671800899322 /* SDL_RLEaccel_c.h in Headers */, 04BD019912E6671800899322 /* SDL_shape_internals.h in Headers */, 04BD019C12E6671800899322 /* SDL_sysvideo.h in Headers */, - 04BD01A012E6671800899322 /* SDL_yuv_sw_c.h in Headers */, 04BD01DC12E6671800899322 /* imKStoUCS.h in Headers */, 04BD01DE12E6671800899322 /* SDL_x11clipboard.h in Headers */, 04BD01E012E6671800899322 /* SDL_x11dyn.h in Headers */, @@ -1942,6 +1940,8 @@ 0469A10B12EE4BF100B846D6 /* SDL_blendmode.h in Headers */, 041B2C9512FA0D2A0087D585 /* SDL_render.h in Headers */, 041B2CA612FA0D680087D585 /* SDL_sysrender.h in Headers */, + 04409B9112FA97ED00FB9AA8 /* mmx.h in Headers */, + 04409B9312FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -2016,7 +2016,6 @@ 04BD031D12E6671800899322 /* SDL_cocoawindow.h in Headers */, 04BD033212E6671800899322 /* SDL_nullevents_c.h in Headers */, 04BD033612E6671800899322 /* SDL_nullvideo.h in Headers */, - 04BD034C12E6671800899322 /* mmx.h in Headers */, 04BD038A12E6671800899322 /* SDL_alphamult.h in Headers */, 04BD039012E6671800899322 /* SDL_blit.h in Headers */, 04BD039512E6671800899322 /* SDL_blit_auto.h in Headers */, @@ -2030,7 +2029,6 @@ 04BD03B112E6671800899322 /* SDL_RLEaccel_c.h in Headers */, 04BD03B312E6671800899322 /* SDL_shape_internals.h in Headers */, 04BD03B612E6671800899322 /* SDL_sysvideo.h in Headers */, - 04BD03BA12E6671800899322 /* SDL_yuv_sw_c.h in Headers */, 04BD03F412E6671800899322 /* imKStoUCS.h in Headers */, 04BD03F612E6671800899322 /* SDL_x11clipboard.h in Headers */, 04BD03F812E6671800899322 /* SDL_x11dyn.h in Headers */, @@ -2065,6 +2063,8 @@ 0469A10D12EE4BF100B846D6 /* SDL_blendmode.h in Headers */, 041B2C9612FA0D2A0087D585 /* SDL_render.h in Headers */, 041B2CAC12FA0D680087D585 /* SDL_sysrender.h in Headers */, + 04409B9512FA97ED00FB9AA8 /* mmx.h in Headers */, + 04409B9712FA97ED00FB9AA8 /* SDL_yuv_sw_c.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -2412,8 +2412,6 @@ 04BD019A12E6671800899322 /* SDL_stretch.c in Sources */, 04BD019B12E6671800899322 /* SDL_surface.c in Sources */, 04BD019D12E6671800899322 /* SDL_video.c in Sources */, - 04BD019E12E6671800899322 /* SDL_yuv_mmx.c in Sources */, - 04BD019F12E6671800899322 /* SDL_yuv_sw.c in Sources */, 04BD01DB12E6671800899322 /* imKStoUCS.c in Sources */, 04BD01DD12E6671800899322 /* SDL_x11clipboard.c in Sources */, 04BD01DF12E6671800899322 /* SDL_x11dyn.c in Sources */, @@ -2443,6 +2441,8 @@ 041B2CA312FA0D680087D585 /* SDL_renderer_gl.c in Sources */, 041B2CA512FA0D680087D585 /* SDL_render.c in Sources */, 041B2CA712FA0D680087D585 /* SDL_renderer_sw.c in Sources */, + 04409B9212FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */, + 04409B9412FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -2539,8 +2539,6 @@ 04BD03B412E6671800899322 /* SDL_stretch.c in Sources */, 04BD03B512E6671800899322 /* SDL_surface.c in Sources */, 04BD03B712E6671800899322 /* SDL_video.c in Sources */, - 04BD03B812E6671800899322 /* SDL_yuv_mmx.c in Sources */, - 04BD03B912E6671800899322 /* SDL_yuv_sw.c in Sources */, 04BD03F312E6671800899322 /* imKStoUCS.c in Sources */, 04BD03F512E6671800899322 /* SDL_x11clipboard.c in Sources */, 04BD03F712E6671800899322 /* SDL_x11dyn.c in Sources */, @@ -2570,6 +2568,8 @@ 041B2CA912FA0D680087D585 /* SDL_renderer_gl.c in Sources */, 041B2CAB12FA0D680087D585 /* SDL_render.c in Sources */, 041B2CAD12FA0D680087D585 /* SDL_renderer_sw.c in Sources */, + 04409B9612FA97ED00FB9AA8 /* SDL_yuv_mmx.c in Sources */, + 04409B9812FA97ED00FB9AA8 /* SDL_yuv_sw.c in Sources */, ); runOnlyForDeploymentPostprocessing = 0; };
--- a/include/SDL_pixels.h Wed Feb 02 22:55:12 2011 -0800 +++ b/include/SDL_pixels.h Thu Feb 03 00:19:40 2011 -0800 @@ -122,18 +122,26 @@ #define SDL_PIXELORDER(X) (((X) >> 20) & 0x0F) #define SDL_PIXELLAYOUT(X) (((X) >> 16) & 0x0F) #define SDL_BITSPERPIXEL(X) (((X) >> 8) & 0xFF) -#define SDL_BYTESPERPIXEL(X) (((X) >> 0) & 0xFF) +#define SDL_BYTESPERPIXEL(X) \ + (SDL_ISPIXELFORMAT_FOURCC(X) ? \ + ((((X) == SDL_PIXELFORMAT_YV12) || \ + ((X) == SDL_PIXELFORMAT_IYUV) || \ + ((X) == SDL_PIXELFORMAT_YUY2) || \ + ((X) == SDL_PIXELFORMAT_UYVY) || \ + ((X) == SDL_PIXELFORMAT_YVYU)) ? 2 : 1) : (((X) >> 0) & 0xFF)) #define SDL_ISPIXELFORMAT_INDEXED(format) \ - ((SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX1) || \ - (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX4) || \ - (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX8)) + (!SDL_ISPIXELFORMAT_FOURCC(format) && \ + ((SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX1) || \ + (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX4) || \ + (SDL_PIXELTYPE(format) == SDL_PIXELTYPE_INDEX8))) #define SDL_ISPIXELFORMAT_ALPHA(format) \ - ((SDL_PIXELORDER(format) == SDL_PACKEDORDER_ARGB) || \ - (SDL_PIXELORDER(format) == SDL_PACKEDORDER_RGBA) || \ - (SDL_PIXELORDER(format) == SDL_PACKEDORDER_ABGR) || \ - (SDL_PIXELORDER(format) == SDL_PACKEDORDER_BGRA)) + (!SDL_ISPIXELFORMAT_FOURCC(format) && \ + ((SDL_PIXELORDER(format) == SDL_PACKEDORDER_ARGB) || \ + (SDL_PIXELORDER(format) == SDL_PACKEDORDER_RGBA) || \ + (SDL_PIXELORDER(format) == SDL_PACKEDORDER_ABGR) || \ + (SDL_PIXELORDER(format) == SDL_PACKEDORDER_BGRA))) #define SDL_ISPIXELFORMAT_FOURCC(format) \ ((format) && !((format) & 0x80000000))
--- a/include/SDL_rect.h Wed Feb 02 22:55:12 2011 -0800 +++ b/include/SDL_rect.h Thu Feb 03 00:19:40 2011 -0800 @@ -70,25 +70,6 @@ } SDL_Rect; /** - * \brief A structure used to track dirty rectangles - * - * \sa SDL_AddDirtyRect - * \sa SDL_ClearDirtyRects - * \sa SDL_FreeDirtyRects - */ -typedef struct SDL_DirtyRect -{ - SDL_Rect rect; - struct SDL_DirtyRect *next; -} SDL_DirtyRect; - -typedef struct SDL_DirtyRectList -{ - SDL_DirtyRect *list; - SDL_DirtyRect *free; -} SDL_DirtyRectList; - -/** * \brief Returns true if the rectangle has no area. */ #define SDL_RectEmpty(X) (((X)->w <= 0) || ((X)->h <= 0)) @@ -143,22 +124,6 @@ int *Y1, int *X2, int *Y2); -/** - * \brief Add a rectangle to a dirty rectangle list - */ -extern DECLSPEC void SDLCALL SDL_AddDirtyRect(SDL_DirtyRectList * list, const SDL_Rect * rect); - -/** - * \brief Remove all rectangles associated with a dirty rectangle list - */ -extern DECLSPEC void SDLCALL SDL_ClearDirtyRects(SDL_DirtyRectList * list); - -/** - * \brief Free memory associated with a dirty rectangle list - */ -extern DECLSPEC void SDLCALL SDL_FreeDirtyRects(SDL_DirtyRectList * list); - - /* Ends C function definitions when using C++ */ #ifdef __cplusplus /* *INDENT-OFF* */
--- a/include/SDL_render.h Wed Feb 02 22:55:12 2011 -0800 +++ b/include/SDL_render.h Thu Feb 03 00:19:40 2011 -0800 @@ -61,7 +61,7 @@ const char *name; /**< The name of the renderer */ Uint32 flags; /**< Supported ::SDL_RendererFlags */ Uint32 num_texture_formats; /**< The number of available texture formats */ - Uint32 texture_formats[50]; /**< The available texture formats */ + Uint32 texture_formats[16]; /**< The available texture formats */ int max_texture_width; /**< The maximimum texture width */ int max_texture_height; /**< The maximimum texture height */ } SDL_RendererInfo; @@ -204,22 +204,6 @@ int *w, int *h); /** - * \brief Query the pixels of a texture, if the texture does not need to be - * locked for pixel access. - * - * \param texture A texture to be queried, which was created with - * ::SDL_TEXTUREACCESS_STREAMING. - * \param pixels A pointer filled with a pointer to the pixels for the - * texture. - * \param pitch A pointer filled in with the pitch of the pixel data. - * - * \return 0 on success, or -1 if the texture is not valid, or must be locked - * for pixel access. - */ -extern DECLSPEC int SDLCALL SDL_QueryTexturePixels(SDL_Texture * texture, - void **pixels, int *pitch); - -/** * \brief Set an additional color value used in render copy operations. * * \param texture The texture to update. @@ -299,7 +283,7 @@ /** * \brief Get the blend mode used for texture copy operations. * - * \param texture The texture to query. + * \param texture The texture to query. * \param blendMode A pointer filled in with the current blend mode. * * \return 0 on success, or -1 if the texture is not valid. @@ -312,7 +296,7 @@ /** * \brief Update the given texture rectangle with new pixel data. * - * \param texture The texture to update + * \param texture The texture to update * \param rect A pointer to the rectangle of pixels to update, or NULL to * update the entire texture. * \param pixels The raw pixel data. @@ -329,49 +313,28 @@ /** * \brief Lock a portion of the texture for pixel access. * - * \param texture The texture to lock for access, which was created with + * \param texture The texture to lock for access, which was created with * ::SDL_TEXTUREACCESS_STREAMING. * \param rect A pointer to the rectangle to lock for access. If the rect * is NULL, the entire texture will be locked. - * \param markDirty If this is nonzero, the locked area will be marked dirty - * when the texture is unlocked. * \param pixels This is filled in with a pointer to the locked pixels, * appropriately offset by the locked area. * \param pitch This is filled in with the pitch of the locked pixels. * - * \return 0 on success, or -1 if the texture is not valid or was created with - * ::SDL_TEXTUREACCESS_STATIC. + * \return 0 on success, or -1 if the texture is not valid or was not created with ::SDL_TEXTUREACCESS_STREAMING. * - * \sa SDL_DirtyTexture() * \sa SDL_UnlockTexture() */ extern DECLSPEC int SDLCALL SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect, - int markDirty, void **pixels, - int *pitch); - -/** - * \brief Unlock a texture, uploading the changes to renderer memory, if needed. - * - * \sa SDL_LockTexture() - * \sa SDL_DirtyTexture() - */ -extern DECLSPEC void SDLCALL SDL_UnlockTexture(SDL_Texture * texture); + void **pixels, int *pitch); /** - * \brief Mark the specified rectangles of the texture as dirty. - * - * \param texture The texture to mark dirty, which was created with - * ::SDL_TEXTUREACCESS_STREAMING. - * \param numrects The number of rectangles pointed to by rects. - * \param rects The pointer to an array of dirty rectangles. + * \brief Unlock a texture, uploading the changes to video memory, if needed. * * \sa SDL_LockTexture() - * \sa SDL_UnlockTexture() */ -extern DECLSPEC void SDLCALL SDL_DirtyTexture(SDL_Texture * texture, - int numrects, - const SDL_Rect * rects); +extern DECLSPEC void SDLCALL SDL_UnlockTexture(SDL_Texture * texture); /** * \brief Set the color used for drawing operations (Fill and Line).
--- a/src/SDL_compat.c Wed Feb 02 22:55:12 2011 -0800 +++ b/src/SDL_compat.c Thu Feb 03 00:19:40 2011 -0800 @@ -28,7 +28,6 @@ #include "video/SDL_sysvideo.h" #include "video/SDL_pixels_c.h" -#include "video/SDL_yuv_sw_c.h" static SDL_Window *SDL_VideoWindow = NULL; static SDL_Renderer *SDL_VideoRenderer = NULL; @@ -344,13 +343,10 @@ static SDL_Surface * CreateVideoSurface(SDL_Texture * texture) { - SDL_Surface *surface; Uint32 format; int w, h; int bpp; Uint32 Rmask, Gmask, Bmask, Amask; - void *pixels; - int pitch; if (SDL_QueryTexture(texture, &format, NULL, &w, &h) < 0) { return NULL; @@ -362,15 +358,7 @@ return NULL; } - if (SDL_QueryTexturePixels(texture, &pixels, &pitch) == 0) { - surface = - SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask, Gmask, - Bmask, Amask); - } else { - surface = - SDL_CreateRGBSurface(0, w, h, bpp, Rmask, Gmask, Bmask, Amask); - } - return surface; + return SDL_CreateRGBSurface(0, w, h, bpp, Rmask, Gmask, Bmask, Amask); } static void @@ -412,8 +400,6 @@ int w, h; Uint32 format; int access; - void *pixels; - int pitch; /* We can't resize something we don't have... */ if (!SDL_VideoWindow) { @@ -454,15 +440,10 @@ SDL_VideoSurface->w = width; SDL_VideoSurface->h = height; - if (SDL_QueryTexturePixels(SDL_VideoTexture, &pixels, &pitch) == 0) { - SDL_VideoSurface->pixels = pixels; - SDL_VideoSurface->pitch = pitch; - } else { - SDL_CalculatePitch(SDL_VideoSurface); - SDL_VideoSurface->pixels = - SDL_realloc(SDL_VideoSurface->pixels, - SDL_VideoSurface->h * SDL_VideoSurface->pitch); - } + SDL_CalculatePitch(SDL_VideoSurface); + SDL_VideoSurface->pixels = + SDL_realloc(SDL_VideoSurface->pixels, + SDL_VideoSurface->h * SDL_VideoSurface->pitch); SDL_SetClipRect(SDL_VideoSurface, NULL); SDL_InvalidateMap(SDL_VideoSurface->map); @@ -830,20 +811,15 @@ screen = SDL_VideoSurface; } if (screen == SDL_VideoSurface) { - if (screen->flags & SDL_PREALLOC) { - /* The surface memory is maintained by the renderer */ - SDL_DirtyTexture(SDL_VideoTexture, numrects, rects); - } else { - /* The surface memory needs to be copied to texture */ - int pitch = screen->pitch; - int psize = screen->format->BytesPerPixel; - for (i = 0; i < numrects; ++i) { - const SDL_Rect *rect = &rects[i]; - void *pixels = - (Uint8 *) screen->pixels + rect->y * pitch + - rect->x * psize; - SDL_UpdateTexture(SDL_VideoTexture, rect, pixels, pitch); - } + /* The surface memory needs to be copied to texture */ + int pitch = screen->pitch; + int psize = screen->format->BytesPerPixel; + for (i = 0; i < numrects; ++i) { + const SDL_Rect *rect = &rects[i]; + void *pixels = + (Uint8 *) screen->pixels + rect->y * pitch + + rect->x * psize; + SDL_UpdateTexture(SDL_VideoTexture, rect, pixels, pitch); } rect.x = 0; rect.y = 0; @@ -1459,8 +1435,6 @@ Uint16 pitches[3]; Uint8 *planes[3]; - SDL_SW_YUVTexture *sw; - SDL_Texture *texture; Uint32 texture_format; }; @@ -1545,24 +1519,6 @@ overlay->hwdata->texture = SDL_CreateTexture(SDL_VideoRenderer, texture_format, SDL_TEXTUREACCESS_STREAMING, w, h); - if (overlay->hwdata->texture) { - overlay->hwdata->sw = NULL; - } else { - SDL_DisplayMode current_mode; - - overlay->hwdata->sw = SDL_SW_CreateYUVTexture(texture_format, w, h); - if (!overlay->hwdata->sw) { - SDL_FreeYUVOverlay(overlay); - return NULL; - } - - /* Create a supported RGB format texture for display */ - SDL_GetCurrentDisplayMode(¤t_mode); - texture_format = current_mode.format; - overlay->hwdata->texture = - SDL_CreateTexture(SDL_VideoRenderer, texture_format, - SDL_TEXTUREACCESS_STREAMING, w, h); - } if (!overlay->hwdata->texture) { SDL_FreeYUVOverlay(overlay); return NULL; @@ -1582,17 +1538,8 @@ SDL_SetError("Passed a NULL overlay"); return -1; } - if (overlay->hwdata->sw) { - if (SDL_SW_QueryYUVTexturePixels(overlay->hwdata->sw, &pixels, &pitch) - < 0) { - return -1; - } - } else { - if (SDL_LockTexture - (overlay->hwdata->texture, NULL, 1, &pixels, &pitch) - < 0) { - return -1; - } + if (SDL_LockTexture(overlay->hwdata->texture, NULL, &pixels, &pitch) < 0) { + return -1; } overlay->pixels[0] = (Uint8 *) pixels; overlay->pitches[0] = pitch; @@ -1620,25 +1567,7 @@ if (!overlay) { return; } - if (overlay->hwdata->sw) { - void *pixels; - int pitch; - if (SDL_LockTexture - (overlay->hwdata->texture, NULL, 1, &pixels, &pitch) == 0) { - SDL_Rect srcrect; - - srcrect.x = 0; - srcrect.y = 0; - srcrect.w = overlay->w; - srcrect.h = overlay->h; - SDL_SW_CopyYUVToRGB(overlay->hwdata->sw, &srcrect, - overlay->hwdata->texture_format, - overlay->w, overlay->h, pixels, pitch); - SDL_UnlockTexture(overlay->hwdata->texture); - } - } else { - SDL_UnlockTexture(overlay->hwdata->texture); - } + SDL_UnlockTexture(overlay->hwdata->texture); } int
--- a/src/render/SDL_render.c Wed Feb 02 22:55:12 2011 -0800 +++ b/src/render/SDL_render.c Thu Feb 03 00:19:40 2011 -0800 @@ -152,6 +152,34 @@ return 0; } +static SDL_bool +IsSupportedFormat(SDL_Renderer * renderer, Uint32 format) +{ + Uint32 i; + + for (i = 0; i < renderer->info.num_texture_formats; ++i) { + if (renderer->info.texture_formats[i] == format) { + return SDL_TRUE; + } + } + return SDL_FALSE; +} + +static Uint32 +GetClosestSupportedFormat(SDL_Renderer * renderer, Uint32 format) +{ + Uint32 i; + SDL_bool hasAlpha = SDL_ISPIXELFORMAT_ALPHA(format); + + /* We just want to match the first format that has the same channels */ + for (i = 0; i < renderer->info.num_texture_formats; ++i) { + if (SDL_ISPIXELFORMAT_ALPHA(renderer->info.texture_formats[i]) == hasAlpha) { + return renderer->info.texture_formats[i]; + } + } + return renderer->info.texture_formats[0]; +} + SDL_Texture * SDL_CreateTexture(SDL_Renderer * renderer, Uint32 format, int access, int w, int h) { @@ -159,14 +187,18 @@ CHECK_RENDERER_MAGIC(renderer, NULL); + if (SDL_ISPIXELFORMAT_INDEXED(format)) { + SDL_SetError("Palettized textures are not supported"); + return NULL; + } if (w <= 0 || h <= 0) { SDL_SetError("Texture dimensions can't be 0"); - return 0; + return NULL; } texture = (SDL_Texture *) SDL_calloc(1, sizeof(*texture)); if (!texture) { SDL_OutOfMemory(); - return 0; + return NULL; } texture->magic = &texture_magic; texture->format = format; @@ -184,9 +216,35 @@ } renderer->textures = texture; - if (renderer->CreateTexture(renderer, texture) < 0) { - SDL_DestroyTexture(texture); - return 0; + if (IsSupportedFormat(renderer, format)) { + if (renderer->CreateTexture(renderer, texture) < 0) { + SDL_DestroyTexture(texture); + return 0; + } + } else { + texture->native = SDL_CreateTexture(renderer, + GetClosestSupportedFormat(renderer, format), + access, w, h); + if (!texture->native) { + SDL_DestroyTexture(texture); + return NULL; + } + + if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { + texture->yuv = SDL_SW_CreateYUVTexture(format, w, h); + if (!texture->yuv) { + SDL_DestroyTexture(texture); + return NULL; + } + } else if (access == SDL_TEXTUREACCESS_STREAMING) { + /* The pitch is 4 byte aligned */ + texture->pitch = (((w * SDL_BYTESPERPIXEL(format)) + 3) & ~3); + texture->pixels = SDL_malloc(texture->pitch * h); + if (!texture->pixels) { + SDL_DestroyTexture(texture); + return NULL; + } + } } return texture; } @@ -501,21 +559,6 @@ } int -SDL_QueryTexturePixels(SDL_Texture * texture, void **pixels, int *pitch) -{ - SDL_Renderer *renderer; - - CHECK_TEXTURE_MAGIC(texture, -1); - - renderer = texture->renderer; - if (!renderer->QueryTexturePixels) { - SDL_Unsupported(); - return -1; - } - return renderer->QueryTexturePixels(renderer, texture, pixels, pitch); -} - -int SDL_SetTextureColorMod(SDL_Texture * texture, Uint8 r, Uint8 g, Uint8 b) { SDL_Renderer *renderer; @@ -531,7 +574,9 @@ texture->r = r; texture->g = g; texture->b = b; - if (renderer->SetTextureColorMod) { + if (texture->native) { + return SDL_SetTextureColorMod(texture->native, r, g, b); + } else if (renderer->SetTextureColorMod) { return renderer->SetTextureColorMod(renderer, texture); } else { return 0; @@ -573,7 +618,9 @@ texture->modMode &= ~SDL_TEXTUREMODULATE_ALPHA; } texture->a = alpha; - if (renderer->SetTextureAlphaMod) { + if (texture->native) { + return SDL_SetTextureAlphaMod(texture->native, alpha); + } else if (renderer->SetTextureAlphaMod) { return renderer->SetTextureAlphaMod(renderer, texture); } else { return 0; @@ -600,7 +647,9 @@ renderer = texture->renderer; texture->blendMode = blendMode; - if (renderer->SetTextureBlendMode) { + if (texture->native) { + return SDL_SetTextureBlendMode(texture, blendMode); + } else if (renderer->SetTextureBlendMode) { return renderer->SetTextureBlendMode(renderer, texture); } else { return 0; @@ -618,6 +667,91 @@ return 0; } +static int +SDL_UpdateTextureYUV(SDL_Texture * texture, const SDL_Rect * rect, + const void *pixels, int pitch) +{ + SDL_Texture *native = texture->native; + SDL_Rect full_rect; + + if (SDL_SW_UpdateYUVTexture(texture->yuv, rect, pixels, pitch) < 0) { + return -1; + } + + full_rect.x = 0; + full_rect.y = 0; + full_rect.w = texture->w; + full_rect.h = texture->h; + rect = &full_rect; + + if (texture->access == SDL_TEXTUREACCESS_STREAMING) { + /* We can lock the texture and copy to it */ + void *native_pixels; + int native_pitch; + + if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) { + return -1; + } + SDL_SW_CopyYUVToRGB(texture->yuv, rect, native->format, + rect->w, rect->h, native_pixels, native_pitch); + SDL_UnlockTexture(native); + } else { + /* Use a temporary buffer for updating */ + void *temp_pixels; + int temp_pitch; + + temp_pitch = (((rect->w * SDL_BYTESPERPIXEL(native->format)) + 3) & ~3); + temp_pixels = SDL_malloc(rect->h * temp_pitch); + if (!temp_pixels) { + SDL_OutOfMemory(); + return -1; + } + SDL_SW_CopyYUVToRGB(texture->yuv, rect, native->format, + rect->w, rect->h, temp_pixels, temp_pitch); + SDL_UpdateTexture(native, rect, temp_pixels, temp_pitch); + SDL_free(temp_pixels); + } + return 0; +} + +static int +SDL_UpdateTextureNative(SDL_Texture * texture, const SDL_Rect * rect, + const void *pixels, int pitch) +{ + SDL_Texture *native = texture->native; + + if (texture->access == SDL_TEXTUREACCESS_STREAMING) { + /* We can lock the texture and copy to it */ + void *native_pixels; + int native_pitch; + + if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) { + return -1; + } + SDL_ConvertPixels(rect->w, rect->h, + texture->format, pixels, pitch, + native->format, native_pixels, native_pitch); + SDL_UnlockTexture(native); + } else { + /* Use a temporary buffer for updating */ + void *temp_pixels; + int temp_pitch; + + temp_pitch = (((rect->w * SDL_BYTESPERPIXEL(native->format)) + 3) & ~3); + temp_pixels = SDL_malloc(rect->h * temp_pitch); + if (!temp_pixels) { + SDL_OutOfMemory(); + return -1; + } + SDL_ConvertPixels(rect->w, rect->h, + texture->format, pixels, pitch, + native->format, temp_pixels, temp_pitch); + SDL_UpdateTexture(native, rect, temp_pixels, temp_pitch); + SDL_free(temp_pixels); + } + return 0; +} + int SDL_UpdateTexture(SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch) @@ -627,11 +761,6 @@ CHECK_TEXTURE_MAGIC(texture, -1); - renderer = texture->renderer; - if (!renderer->UpdateTexture) { - SDL_Unsupported(); - return -1; - } if (!rect) { full_rect.x = 0; full_rect.y = 0; @@ -639,11 +768,38 @@ full_rect.h = texture->h; rect = &full_rect; } - return renderer->UpdateTexture(renderer, texture, rect, pixels, pitch); + + if (texture->yuv) { + return SDL_UpdateTextureYUV(texture, rect, pixels, pitch); + } else if (texture->native) { + return SDL_UpdateTextureNative(texture, rect, pixels, pitch); + } else { + renderer = texture->renderer; + return renderer->UpdateTexture(renderer, texture, rect, pixels, pitch); + } +} + +static int +SDL_LockTextureYUV(SDL_Texture * texture, const SDL_Rect * rect, + void **pixels, int *pitch) +{ + return SDL_SW_LockYUVTexture(texture->yuv, rect, pixels, pitch); +} + +static int +SDL_LockTextureNative(SDL_Texture * texture, const SDL_Rect * rect, + void **pixels, int *pitch) +{ + texture->locked_rect = *rect; + *pixels = (void *) ((Uint8 *) texture->pixels + + rect->y * texture->pitch + + rect->x * SDL_BYTESPERPIXEL(texture->format)); + *pitch = texture->pitch; + return 0; } int -SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect, int markDirty, +SDL_LockTexture(SDL_Texture * texture, const SDL_Rect * rect, void **pixels, int *pitch) { SDL_Renderer *renderer; @@ -655,11 +811,7 @@ SDL_SetError("SDL_LockTexture(): texture must be streaming"); return -1; } - renderer = texture->renderer; - if (!renderer->LockTexture) { - SDL_Unsupported(); - return -1; - } + if (!rect) { full_rect.x = 0; full_rect.y = 0; @@ -667,8 +819,57 @@ full_rect.h = texture->h; rect = &full_rect; } - return renderer->LockTexture(renderer, texture, rect, markDirty, pixels, - pitch); + + if (texture->yuv) { + return SDL_LockTextureYUV(texture, rect, pixels, pitch); + } else if (texture->native) { + return SDL_LockTextureNative(texture, rect, pixels, pitch); + } else { + renderer = texture->renderer; + return renderer->LockTexture(renderer, texture, rect, pixels, pitch); + } +} + +static void +SDL_UnlockTextureYUV(SDL_Texture * texture) +{ + SDL_Texture *native = texture->native; + void *native_pixels; + int native_pitch; + SDL_Rect rect; + + rect.x = 0; + rect.y = 0; + rect.w = texture->w; + rect.h = texture->h; + + if (SDL_LockTexture(native, &rect, &native_pixels, &native_pitch) < 0) { + return; + } + SDL_SW_CopyYUVToRGB(texture->yuv, &rect, native->format, + rect.w, rect.h, native_pixels, native_pitch); + SDL_UnlockTexture(native); +} + +void +SDL_UnlockTextureNative(SDL_Texture * texture) +{ + SDL_Texture *native = texture->native; + void *native_pixels; + int native_pitch; + const SDL_Rect *rect = &texture->locked_rect; + const void* pixels = (void *) ((Uint8 *) texture->pixels + + rect->y * texture->pitch + + rect->x * SDL_BYTESPERPIXEL(texture->format)); + int pitch = texture->pitch; + + if (SDL_LockTexture(native, rect, &native_pixels, &native_pitch) < 0) { + return; + } + SDL_ConvertPixels(rect->w, rect->h, + texture->format, pixels, pitch, + native->format, native_pixels, native_pitch); + SDL_UnlockTexture(native); } void @@ -681,29 +882,14 @@ if (texture->access != SDL_TEXTUREACCESS_STREAMING) { return; } - renderer = texture->renderer; - if (!renderer->UnlockTexture) { - return; + if (texture->yuv) { + SDL_UnlockTextureYUV(texture); + } else if (texture->native) { + SDL_UnlockTextureNative(texture); + } else { + renderer = texture->renderer; + renderer->UnlockTexture(renderer, texture); } - renderer->UnlockTexture(renderer, texture); -} - -void -SDL_DirtyTexture(SDL_Texture * texture, int numrects, - const SDL_Rect * rects) -{ - SDL_Renderer *renderer; - - CHECK_TEXTURE_MAGIC(texture, ); - - if (texture->access != SDL_TEXTUREACCESS_STREAMING) { - return; - } - renderer = texture->renderer; - if (!renderer->DirtyTexture) { - return; - } - renderer->DirtyTexture(renderer, texture, numrects, rects); } int @@ -979,6 +1165,10 @@ } } + if (texture->native) { + texture = texture->native; + } + return renderer->RenderCopy(renderer, texture, &real_srcrect, &real_dstrect); } @@ -1087,6 +1277,16 @@ renderer->textures = texture->next; } + if (texture->native) { + SDL_DestroyTexture(texture->native); + } + if (texture->yuv) { + SDL_SW_DestroyYUVTexture(texture->yuv); + } + if (texture->pixels) { + SDL_free(texture->pixels); + } + renderer->DestroyTexture(renderer, texture); SDL_free(texture); }
--- a/src/render/SDL_sysrender.h Wed Feb 02 22:55:12 2011 -0800 +++ b/src/render/SDL_sysrender.h Thu Feb 03 00:19:40 2011 -0800 @@ -26,6 +26,7 @@ #include "SDL_render.h" #include "SDL_events.h" +#include "SDL_yuv_sw_c.h" /* The SDL 2D rendering system */ @@ -45,6 +46,13 @@ SDL_Renderer *renderer; + /* Support for formats not supported directly by the renderer */ + SDL_Texture *native; + SDL_SW_YUVTexture *yuv; + void *pixels; + int pitch; + SDL_Rect locked_rect; + void *driverdata; /**< Driver specific texture representation */ SDL_Texture *prev; @@ -58,8 +66,6 @@ void (*WindowEvent) (SDL_Renderer * renderer, const SDL_WindowEvent *event); int (*CreateTexture) (SDL_Renderer * renderer, SDL_Texture * texture); - int (*QueryTexturePixels) (SDL_Renderer * renderer, SDL_Texture * texture, - void **pixels, int *pitch); int (*SetTextureColorMod) (SDL_Renderer * renderer, SDL_Texture * texture); int (*SetTextureAlphaMod) (SDL_Renderer * renderer, @@ -70,11 +76,8 @@ const SDL_Rect * rect, const void *pixels, int pitch); int (*LockTexture) (SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, void **pixels, - int *pitch); + const SDL_Rect * rect, void **pixels, int *pitch); void (*UnlockTexture) (SDL_Renderer * renderer, SDL_Texture * texture); - void (*DirtyTexture) (SDL_Renderer * renderer, SDL_Texture * texture, - int numrects, const SDL_Rect * rects); int (*RenderClear) (SDL_Renderer * renderer); int (*RenderDrawPoints) (SDL_Renderer * renderer, const SDL_Point * points, int count);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/render/SDL_yuv_mmx.c Thu Feb 03 00:19:40 2011 -0800 @@ -0,0 +1,432 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2010 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES + +#include "SDL_stdinc.h" + +#include "mmx.h" + +/* *INDENT-OFF* */ + +static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} }; +static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} }; +static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} }; + +static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} }; + +static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} }; +static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} }; +static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} }; +static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} }; + +static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} }; +static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} }; +static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} }; +static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} }; + +static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} }; +static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} }; + +/** + This MMX assembler is my first assembler/MMX program ever. + Thus it maybe buggy. + Send patches to: + mvogt@rhrk.uni-kl.de + + After it worked fine I have "obfuscated" the code a bit to have + more parallism in the MMX units. This means I moved + initilisation around and delayed other instruction. + Performance measurement did not show that this brought any advantage + but in theory it _should_ be faster this way. + + The overall performanve gain to the C based dither was 30%-40%. + The MMX routine calculates 256bit=8RGB values in each cycle + (4 for row1 & 4 for row2) + + The red/green/blue.. coefficents are taken from the mpeg_play + player. They look nice, but I dont know if you can have + better values, to avoid integer rounding errors. + + + IMPORTANT: + ========== + + It is a requirement that the cr/cb/lum are 8 byte aligned and + the out are 16byte aligned or you will/may get segfaults + +*/ + +void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + Uint32 *row1; + Uint32 *row2; + + unsigned char* y = lum +cols*rows; // Pointer to the end + int x = 0; + row1 = (Uint32 *)out; // 32 bit target + row2 = (Uint32 *)out+cols+mod; // start of second row + mod = (mod+cols+mod)*4; // increment for row1 in byte + + __asm__ __volatile__ ( + // tap dance to workaround the inability to use %%ebx at will... + // move one thing to the stack... + "pushl $0\n" // save a slot on the stack. + "pushl %%ebx\n" // save %%ebx. + "movl %0, %%ebx\n" // put the thing in ebx. + "movl %%ebx,4(%%esp)\n" // put the thing in the stack slot. + "popl %%ebx\n" // get back %%ebx (the PIC register). + + ".align 8\n" + "1:\n" + + // create Cr (result in mm1) + "pushl %%ebx\n" + "movl 4(%%esp),%%ebx\n" + "movd (%%ebx),%%mm1\n" // 0 0 0 0 v3 v2 v1 v0 + "popl %%ebx\n" + "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00 + "movd (%2), %%mm2\n" // 0 0 0 0 l3 l2 l1 l0 + "punpcklbw %%mm7,%%mm1\n" // 0 v3 0 v2 00 v1 00 v0 + "punpckldq %%mm1,%%mm1\n" // 00 v1 00 v0 00 v1 00 v0 + "psubw %9,%%mm1\n" // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 + + // create Cr_g (result in mm0) + "movq %%mm1,%%mm0\n" // r1 r1 r0 r0 r1 r1 r0 r0 + "pmullw %10,%%mm0\n" // red*-46dec=0.7136*64 + "pmullw %11,%%mm1\n" // red*89dec=1.4013*64 + "psraw $6, %%mm0\n" // red=red/64 + "psraw $6, %%mm1\n" // red=red/64 + + // create L1 L2 (result in mm2,mm4) + // L2=lum+cols + "movq (%2,%4),%%mm3\n" // 0 0 0 0 L3 L2 L1 L0 + "punpckldq %%mm3,%%mm2\n" // L3 L2 L1 L0 l3 l2 l1 l0 + "movq %%mm2,%%mm4\n" // L3 L2 L1 L0 l3 l2 l1 l0 + "pand %12,%%mm2\n" // L3 0 L1 0 l3 0 l1 0 + "pand %13,%%mm4\n" // 0 L2 0 L0 0 l2 0 l0 + "psrlw $8,%%mm2\n" // 0 L3 0 L1 0 l3 0 l1 + + // create R (result in mm6) + "movq %%mm2,%%mm5\n" // 0 L3 0 L1 0 l3 0 l1 + "movq %%mm4,%%mm6\n" // 0 L2 0 L0 0 l2 0 l0 + "paddsw %%mm1, %%mm5\n" // lum1+red:x R3 x R1 x r3 x r1 + "paddsw %%mm1, %%mm6\n" // lum1+red:x R2 x R0 x r2 x r0 + "packuswb %%mm5,%%mm5\n" // R3 R1 r3 r1 R3 R1 r3 r1 + "packuswb %%mm6,%%mm6\n" // R2 R0 r2 r0 R2 R0 r2 r0 + "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00 + "punpcklbw %%mm5,%%mm6\n" // R3 R2 R1 R0 r3 r2 r1 r0 + + // create Cb (result in mm1) + "movd (%1), %%mm1\n" // 0 0 0 0 u3 u2 u1 u0 + "punpcklbw %%mm7,%%mm1\n" // 0 u3 0 u2 00 u1 00 u0 + "punpckldq %%mm1,%%mm1\n" // 00 u1 00 u0 00 u1 00 u0 + "psubw %9,%%mm1\n" // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 + + // create Cb_g (result in mm5) + "movq %%mm1,%%mm5\n" // u1 u1 u0 u0 u1 u1 u0 u0 + "pmullw %14,%%mm5\n" // blue*-109dec=1.7129*64 + "pmullw %15,%%mm1\n" // blue*114dec=1.78125*64 + "psraw $6, %%mm5\n" // blue=red/64 + "psraw $6, %%mm1\n" // blue=blue/64 + + // create G (result in mm7) + "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1 + "movq %%mm4,%%mm7\n" // 0 L2 0 L0 0 l2 0 l1 + "paddsw %%mm5, %%mm3\n" // lum1+Cb_g:x G3t x G1t x g3t x g1t + "paddsw %%mm5, %%mm7\n" // lum1+Cb_g:x G2t x G0t x g2t x g0t + "paddsw %%mm0, %%mm3\n" // lum1+Cr_g:x G3 x G1 x g3 x g1 + "paddsw %%mm0, %%mm7\n" // lum1+blue:x G2 x G0 x g2 x g0 + "packuswb %%mm3,%%mm3\n" // G3 G1 g3 g1 G3 G1 g3 g1 + "packuswb %%mm7,%%mm7\n" // G2 G0 g2 g0 G2 G0 g2 g0 + "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0 + + // create B (result in mm5) + "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1 + "movq %%mm4,%%mm5\n" // 0 L2 0 L0 0 l2 0 l1 + "paddsw %%mm1, %%mm3\n" // lum1+blue:x B3 x B1 x b3 x b1 + "paddsw %%mm1, %%mm5\n" // lum1+blue:x B2 x B0 x b2 x b0 + "packuswb %%mm3,%%mm3\n" // B3 B1 b3 b1 B3 B1 b3 b1 + "packuswb %%mm5,%%mm5\n" // B2 B0 b2 b0 B2 B0 b2 b0 + "punpcklbw %%mm3,%%mm5\n" // B3 B2 B1 B0 b3 b2 b1 b0 + + // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb) + + "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 + "pxor %%mm4,%%mm4\n" // 0 0 0 0 0 0 0 0 + "movq %%mm6,%%mm1\n" // R3 R2 R1 R0 r3 r2 r1 r0 + "movq %%mm5,%%mm3\n" // B3 B2 B1 B0 b3 b2 b1 b0 + + // process lower lum + "punpcklbw %%mm4,%%mm1\n" // 0 r3 0 r2 0 r1 0 r0 + "punpcklbw %%mm4,%%mm3\n" // 0 b3 0 b2 0 b1 0 b0 + "movq %%mm1,%%mm2\n" // 0 r3 0 r2 0 r1 0 r0 + "movq %%mm3,%%mm0\n" // 0 b3 0 b2 0 b1 0 b0 + "punpcklwd %%mm1,%%mm3\n" // 0 r1 0 b1 0 r0 0 b0 + "punpckhwd %%mm2,%%mm0\n" // 0 r3 0 b3 0 r2 0 b2 + + "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 + "movq %%mm7,%%mm1\n" // G3 G2 G1 G0 g3 g2 g1 g0 + "punpcklbw %%mm1,%%mm2\n" // g3 0 g2 0 g1 0 g0 0 + "punpcklwd %%mm4,%%mm2\n" // 0 0 g1 0 0 0 g0 0 + "por %%mm3, %%mm2\n" // 0 r1 g1 b1 0 r0 g0 b0 + "movq %%mm2,(%3)\n" // wrote out ! row1 + + "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 + "punpcklbw %%mm1,%%mm4\n" // g3 0 g2 0 g1 0 g0 0 + "punpckhwd %%mm2,%%mm4\n" // 0 0 g3 0 0 0 g2 0 + "por %%mm0, %%mm4\n" // 0 r3 g3 b3 0 r2 g2 b2 + "movq %%mm4,8(%3)\n" // wrote out ! row1 + + // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb) + // this can be done "destructive" + "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 + "punpckhbw %%mm2,%%mm6\n" // 0 R3 0 R2 0 R1 0 R0 + "punpckhbw %%mm1,%%mm5\n" // G3 B3 G2 B2 G1 B1 G0 B0 + "movq %%mm5,%%mm1\n" // G3 B3 G2 B2 G1 B1 G0 B0 + "punpcklwd %%mm6,%%mm1\n" // 0 R1 G1 B1 0 R0 G0 B0 + "movq %%mm1,(%5)\n" // wrote out ! row2 + "punpckhwd %%mm6,%%mm5\n" // 0 R3 G3 B3 0 R2 G2 B2 + "movq %%mm5,8(%5)\n" // wrote out ! row2 + + "addl $4,%2\n" // lum+4 + "leal 16(%3),%3\n" // row1+16 + "leal 16(%5),%5\n" // row2+16 + "addl $2,(%%esp)\n" // cr+2 + "addl $2,%1\n" // cb+2 + + "addl $4,%6\n" // x+4 + "cmpl %4,%6\n" + + "jl 1b\n" + "addl %4,%2\n" // lum += cols + "addl %8,%3\n" // row1+= mod + "addl %8,%5\n" // row2+= mod + "movl $0,%6\n" // x=0 + "cmpl %7,%2\n" + "jl 1b\n" + + "addl $4,%%esp\n" // get rid of the stack slot we reserved. + "emms\n" // reset MMX registers. + : + : "m" (cr), "r"(cb),"r"(lum), + "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod), + "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB), + "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB), + "m"(MMX_UbluRGB) + ); +} + +void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + Uint16 *row1; + Uint16 *row2; + + unsigned char* y = lum +cols*rows; /* Pointer to the end */ + int x = 0; + row1 = (Uint16 *)out; /* 16 bit target */ + row2 = (Uint16 *)out+cols+mod; /* start of second row */ + mod = (mod+cols+mod)*2; /* increment for row1 in byte */ + + __asm__ __volatile__( + // tap dance to workaround the inability to use %%ebx at will... + // move one thing to the stack... + "pushl $0\n" // save a slot on the stack. + "pushl %%ebx\n" // save %%ebx. + "movl %0, %%ebx\n" // put the thing in ebx. + "movl %%ebx, 4(%%esp)\n" // put the thing in the stack slot. + "popl %%ebx\n" // get back %%ebx (the PIC register). + + ".align 8\n" + "1:\n" + + "movd (%1), %%mm0\n" // 4 Cb 0 0 0 0 u3 u2 u1 u0 + "pxor %%mm7, %%mm7\n" + "pushl %%ebx\n" + "movl 4(%%esp), %%ebx\n" + "movd (%%ebx), %%mm1\n" // 4 Cr 0 0 0 0 v3 v2 v1 v0 + "popl %%ebx\n" + + "punpcklbw %%mm7, %%mm0\n" // 4 W cb 0 u3 0 u2 0 u1 0 u0 + "punpcklbw %%mm7, %%mm1\n" // 4 W cr 0 v3 0 v2 0 v1 0 v0 + "psubw %9, %%mm0\n" + "psubw %9, %%mm1\n" + "movq %%mm0, %%mm2\n" // Cb 0 u3 0 u2 0 u1 0 u0 + "movq %%mm1, %%mm3\n" // Cr + "pmullw %10, %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0 + "movq (%2), %%mm6\n" // L1 l7 L6 L5 L4 L3 L2 L1 L0 + "pmullw %11, %%mm0\n" // Cb2blue + "pand %12, %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0 + "pmullw %13, %%mm3\n" // Cr2green + "movq (%2), %%mm7\n" // L2 + "pmullw %14, %%mm1\n" // Cr2red + "psrlw $8, %%mm7\n" // L2 00 L7 00 L5 00 L3 00 L1 + "pmullw %15, %%mm6\n" // lum1 + "paddw %%mm3, %%mm2\n" // Cb2green + Cr2green == green + "pmullw %15, %%mm7\n" // lum2 + + "movq %%mm6, %%mm4\n" // lum1 + "paddw %%mm0, %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0 + "movq %%mm4, %%mm5\n" // lum1 + "paddw %%mm1, %%mm4\n" // lum1 +red 00 R6 00 R4 00 R2 00 R0 + "paddw %%mm2, %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0 + "psraw $6, %%mm4\n" // R1 0 .. 64 + "movq %%mm7, %%mm3\n" // lum2 00 L7 00 L5 00 L3 00 L1 + "psraw $6, %%mm5\n" // G1 - .. + + "paddw %%mm0, %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1 + "psraw $6, %%mm6\n" // B1 0 .. 64 + "packuswb %%mm4, %%mm4\n" // R1 R1 + "packuswb %%mm5, %%mm5\n" // G1 G1 + "packuswb %%mm6, %%mm6\n" // B1 B1 + "punpcklbw %%mm4, %%mm4\n" + "punpcklbw %%mm5, %%mm5\n" + + "pand %16, %%mm4\n" + "psllw $3, %%mm5\n" // GREEN 1 + "punpcklbw %%mm6, %%mm6\n" + "pand %17, %%mm5\n" + "pand %16, %%mm6\n" + "por %%mm5, %%mm4\n" // + "psrlw $11, %%mm6\n" // BLUE 1 + "movq %%mm3, %%mm5\n" // lum2 + "paddw %%mm1, %%mm3\n" // lum2 +red 00 R7 00 R5 00 R3 00 R1 + "paddw %%mm2, %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1 + "psraw $6, %%mm3\n" // R2 + "por %%mm6, %%mm4\n" // MM4 + "psraw $6, %%mm5\n" // G2 + "movq (%2, %4), %%mm6\n" // L3 load lum2 + "psraw $6, %%mm7\n" + "packuswb %%mm3, %%mm3\n" + "packuswb %%mm5, %%mm5\n" + "packuswb %%mm7, %%mm7\n" + "pand %12, %%mm6\n" // L3 + "punpcklbw %%mm3, %%mm3\n" + "punpcklbw %%mm5, %%mm5\n" + "pmullw %15, %%mm6\n" // lum3 + "punpcklbw %%mm7, %%mm7\n" + "psllw $3, %%mm5\n" // GREEN 2 + "pand %16, %%mm7\n" + "pand %16, %%mm3\n" + "psrlw $11, %%mm7\n" // BLUE 2 + "pand %17, %%mm5\n" + "por %%mm7, %%mm3\n" + "movq (%2,%4), %%mm7\n" // L4 load lum2 + "por %%mm5, %%mm3\n" // + "psrlw $8, %%mm7\n" // L4 + "movq %%mm4, %%mm5\n" + "punpcklwd %%mm3, %%mm4\n" + "pmullw %15, %%mm7\n" // lum4 + "punpckhwd %%mm3, %%mm5\n" + + "movq %%mm4, (%3)\n" // write row1 + "movq %%mm5, 8(%3)\n" // write row1 + + "movq %%mm6, %%mm4\n" // Lum3 + "paddw %%mm0, %%mm6\n" // Lum3 +blue + + "movq %%mm4, %%mm5\n" // Lum3 + "paddw %%mm1, %%mm4\n" // Lum3 +red + "paddw %%mm2, %%mm5\n" // Lum3 +green + "psraw $6, %%mm4\n" + "movq %%mm7, %%mm3\n" // Lum4 + "psraw $6, %%mm5\n" + "paddw %%mm0, %%mm7\n" // Lum4 +blue + "psraw $6, %%mm6\n" // Lum3 +blue + "movq %%mm3, %%mm0\n" // Lum4 + "packuswb %%mm4, %%mm4\n" + "paddw %%mm1, %%mm3\n" // Lum4 +red + "packuswb %%mm5, %%mm5\n" + "paddw %%mm2, %%mm0\n" // Lum4 +green + "packuswb %%mm6, %%mm6\n" + "punpcklbw %%mm4, %%mm4\n" + "punpcklbw %%mm5, %%mm5\n" + "punpcklbw %%mm6, %%mm6\n" + "psllw $3, %%mm5\n" // GREEN 3 + "pand %16, %%mm4\n" + "psraw $6, %%mm3\n" // psr 6 + "psraw $6, %%mm0\n" + "pand %16, %%mm6\n" // BLUE + "pand %17, %%mm5\n" + "psrlw $11, %%mm6\n" // BLUE 3 + "por %%mm5, %%mm4\n" + "psraw $6, %%mm7\n" + "por %%mm6, %%mm4\n" + "packuswb %%mm3, %%mm3\n" + "packuswb %%mm0, %%mm0\n" + "packuswb %%mm7, %%mm7\n" + "punpcklbw %%mm3, %%mm3\n" + "punpcklbw %%mm0, %%mm0\n" + "punpcklbw %%mm7, %%mm7\n" + "pand %16, %%mm3\n" + "pand %16, %%mm7\n" // BLUE + "psllw $3, %%mm0\n" // GREEN 4 + "psrlw $11, %%mm7\n" + "pand %17, %%mm0\n" + "por %%mm7, %%mm3\n" + "por %%mm0, %%mm3\n" + + "movq %%mm4, %%mm5\n" + + "punpcklwd %%mm3, %%mm4\n" + "punpckhwd %%mm3, %%mm5\n" + + "movq %%mm4, (%5)\n" + "movq %%mm5, 8(%5)\n" + + "addl $8, %6\n" + "addl $8, %2\n" + "addl $4, (%%esp)\n" + "addl $4, %1\n" + "cmpl %4, %6\n" + "leal 16(%3), %3\n" + "leal 16(%5),%5\n" // row2+16 + + "jl 1b\n" + "addl %4, %2\n" // lum += cols + "addl %8, %3\n" // row1+= mod + "addl %8, %5\n" // row2+= mod + "movl $0, %6\n" // x=0 + "cmpl %7, %2\n" + "jl 1b\n" + "addl $4, %%esp\n" // get rid of the stack slot we reserved. + "emms\n" + : + : "m" (cr), "r"(cb),"r"(lum), + "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod), + "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5), + "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5), + "m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565) + ); +} + +/* *INDENT-ON* */ + +#endif /* GCC3 i386 inline assembly */ + +/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/render/SDL_yuv_sw.c Thu Feb 03 00:19:40 2011 -0800 @@ -0,0 +1,1322 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2010 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +/* This is the software implementation of the YUV texture support */ + +/* This code was derived from code carrying the following copyright notices: + + * Copyright (c) 1995 The Regents of the University of California. + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement is + * hereby granted, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF + * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + + * Copyright (c) 1995 Erik Corry + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement is + * hereby granted, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, + * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" + * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, + * UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + + * Portions of this software Copyright (c) 1995 Brown University. + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement + * is hereby granted, provided that the above copyright notice and the + * following two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN + * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" + * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "SDL_video.h" +#include "SDL_cpuinfo.h" +#include "SDL_yuv_sw_c.h" + + +/* The colorspace conversion functions */ + +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES +extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod); +extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod); +#endif + +static void +Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned short *row1; + unsigned short *row2; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = (unsigned short *) out; + row2 = row1 + cols + mod; + lum2 = lum + cols; + + mod += cols + mod; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + L = *lum++; + *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + + /* Now, do second row. */ + + L = *lum2++; + *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + L = *lum2++; + *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int value; + unsigned char *row1; + unsigned char *row2; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = out; + row2 = row1 + cols * 3 + mod * 3; + lum2 = lum + cols; + + mod += cols + mod; + mod *= 3; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row1++ = (value) & 0xFF; + *row1++ = (value >> 8) & 0xFF; + *row1++ = (value >> 16) & 0xFF; + + L = *lum++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row1++ = (value) & 0xFF; + *row1++ = (value >> 8) & 0xFF; + *row1++ = (value >> 16) & 0xFF; + + + /* Now, do second row. */ + + L = *lum2++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row2++ = (value) & 0xFF; + *row2++ = (value >> 8) & 0xFF; + *row2++ = (value >> 16) & 0xFF; + + L = *lum2++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row2++ = (value) & 0xFF; + *row2++ = (value >> 8) & 0xFF; + *row2++ = (value >> 16) & 0xFF; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row1; + unsigned int *row2; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = (unsigned int *) out; + row2 = row1 + cols + mod; + lum2 = lum + cols; + + mod += cols + mod; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + *row1++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + L = *lum++; + *row1++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + + /* Now, do second row. */ + + L = *lum2++; + *row2++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + L = *lum2++; + *row2++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +/* + * In this function I make use of a nasty trick. The tables have the lower + * 16 bits replicated in the upper 16. This means I can write ints and get + * the horisontal doubling for free (almost). + */ +static void +Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row1 = (unsigned int *) out; + const int next_row = cols + (mod / 2); + unsigned int *row2 = row1 + 2 * next_row; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = (next_row * 3) + (mod / 2); + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row1++; + + L = *lum++; + row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row1++; + + + /* Now, do second row. */ + + L = *lum2++; + row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row2++; + + L = *lum2++; + row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row2++; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int value; + unsigned char *row1 = out; + const int next_row = (cols * 2 + mod) * 3; + unsigned char *row2 = row1 + 2 * next_row; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = next_row * 3 + mod * 3; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] = + row1[next_row + 3 + 0] = (value) & 0xFF; + row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] = + row1[next_row + 3 + 1] = (value >> 8) & 0xFF; + row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] = + row1[next_row + 3 + 2] = (value >> 16) & 0xFF; + row1 += 2 * 3; + + L = *lum++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] = + row1[next_row + 3 + 0] = (value) & 0xFF; + row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] = + row1[next_row + 3 + 1] = (value >> 8) & 0xFF; + row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] = + row1[next_row + 3 + 2] = (value >> 16) & 0xFF; + row1 += 2 * 3; + + + /* Now, do second row. */ + + L = *lum2++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] = + row2[next_row + 3 + 0] = (value) & 0xFF; + row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] = + row2[next_row + 3 + 1] = (value >> 8) & 0xFF; + row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] = + row2[next_row + 3 + 2] = (value >> 16) & 0xFF; + row2 += 2 * 3; + + L = *lum2++; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] = + row2[next_row + 3 + 0] = (value) & 0xFF; + row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] = + row2[next_row + 3 + 1] = (value >> 8) & 0xFF; + row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] = + row2[next_row + 3 + 2] = (value >> 16) & 0xFF; + row2 += 2 * 3; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row1 = (unsigned int *) out; + const int next_row = cols * 2 + mod; + unsigned int *row2 = row1 + 2 * next_row; + unsigned char *lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = (next_row * 3) + mod; + + y = rows / 2; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + ++cr; + ++cb; + + L = *lum++; + row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row1 += 2; + + L = *lum++; + row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row1 += 2; + + + /* Now, do second row. */ + + L = *lum2++; + row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row2 += 2; + + L = *lum2++; + row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row2 += 2; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void +Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned short *row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned short *) out; + + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + *row++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + L = *lum; + lum += 2; + *row++ = (unsigned short) (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + + } + + row += mod; + } +} + +static void +Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int value; + unsigned char *row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned char *) out; + mod *= 3; + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row++ = (value) & 0xFF; + *row++ = (value >> 8) & 0xFF; + *row++ = (value >> 16) & 0xFF; + + L = *lum; + lum += 2; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + *row++ = (value) & 0xFF; + *row++ = (value >> 8) & 0xFF; + *row++ = (value >> 16) & 0xFF; + + } + row += mod; + } +} + +static void +Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned int *) out; + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + *row++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + L = *lum; + lum += 2; + *row++ = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + + + } + row += mod; + } +} + +/* + * In this function I make use of a nasty trick. The tables have the lower + * 16 bits replicated in the upper 16. This means I can write ints and get + * the horisontal doubling for free (almost). + */ +static void +Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row = (unsigned int *) out; + const int next_row = cols + (mod / 2); + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + row[0] = row[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row++; + + L = *lum; + lum += 2; + row[0] = row[next_row] = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | + rgb_2_pix[L + cb_b]); + row++; + + } + row += next_row; + } +} + +static void +Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int value; + unsigned char *row = out; + const int next_row = (cols * 2 + mod) * 3; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row[0 + 0] = row[3 + 0] = row[next_row + 0] = + row[next_row + 3 + 0] = (value) & 0xFF; + row[0 + 1] = row[3 + 1] = row[next_row + 1] = + row[next_row + 3 + 1] = (value >> 8) & 0xFF; + row[0 + 2] = row[3 + 2] = row[next_row + 2] = + row[next_row + 3 + 2] = (value >> 16) & 0xFF; + row += 2 * 3; + + L = *lum; + lum += 2; + value = (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row[0 + 0] = row[3 + 0] = row[next_row + 0] = + row[next_row + 3 + 0] = (value) & 0xFF; + row[0 + 1] = row[3 + 1] = row[next_row + 1] = + row[next_row + 3 + 1] = (value >> 8) & 0xFF; + row[0 + 2] = row[3 + 2] = row[next_row + 2] = + row[next_row + 3 + 2] = (value >> 16) & 0xFF; + row += 2 * 3; + + } + row += next_row; + } +} + +static void +Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod) +{ + unsigned int *row = (unsigned int *) out; + const int next_row = cols * 2 + mod; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + mod += mod; + y = rows; + while (y--) { + x = cols_2; + while (x--) { + register int L; + + cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; + crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] + + colortab[*cb + 2 * 256]; + cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; + cr += 4; + cb += 4; + + L = *lum; + lum += 2; + row[0] = row[1] = row[next_row] = row[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row += 2; + + L = *lum; + lum += 2; + row[0] = row[1] = row[next_row] = row[next_row + 1] = + (rgb_2_pix[L + cr_r] | + rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); + row += 2; + + + } + + row += next_row; + } +} + +/* + * How many 1 bits are there in the Uint32. + * Low performance, do not call often. + */ +static int +number_of_bits_set(Uint32 a) +{ + if (!a) + return 0; + if (a & 1) + return 1 + number_of_bits_set(a >> 1); + return (number_of_bits_set(a >> 1)); +} + +/* + * How many 0 bits are there at least significant end of Uint32. + * Low performance, do not call often. + */ +static int +free_bits_at_bottom(Uint32 a) +{ + /* assume char is 8 bits */ + if (!a) + return sizeof(Uint32) * 8; + if (((Sint32) a) & 1l) + return 0; + return 1 + free_bits_at_bottom(a >> 1); +} + +static int +SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format) +{ + Uint32 *r_2_pix_alloc; + Uint32 *g_2_pix_alloc; + Uint32 *b_2_pix_alloc; + int i; + int bpp; + Uint32 Rmask, Gmask, Bmask, Amask; + + if (!SDL_PixelFormatEnumToMasks + (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) { + SDL_SetError("Unsupported YUV destination format"); + return -1; + } + + swdata->target_format = target_format; + r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768]; + g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768]; + b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768]; + + /* + * Set up entries 0-255 in rgb-to-pixel value tables. + */ + for (i = 0; i < 256; ++i) { + r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask)); + r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask); + r_2_pix_alloc[i + 256] |= Amask; + g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask)); + g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask); + g_2_pix_alloc[i + 256] |= Amask; + b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask)); + b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask); + b_2_pix_alloc[i + 256] |= Amask; + } + + /* + * If we have 16-bit output depth, then we double the value + * in the top word. This means that we can write out both + * pixels in the pixel doubling mode with one op. It is + * harmless in the normal case as storing a 32-bit value + * through a short pointer will lose the top bits anyway. + */ + if (SDL_BYTESPERPIXEL(target_format) == 2) { + for (i = 0; i < 256; ++i) { + r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16; + g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16; + b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16; + } + } + + /* + * Spread out the values we have to the rest of the array so that + * we do not need to check for overflow. + */ + for (i = 0; i < 256; ++i) { + r_2_pix_alloc[i] = r_2_pix_alloc[256]; + r_2_pix_alloc[i + 512] = r_2_pix_alloc[511]; + g_2_pix_alloc[i] = g_2_pix_alloc[256]; + g_2_pix_alloc[i + 512] = g_2_pix_alloc[511]; + b_2_pix_alloc[i] = b_2_pix_alloc[256]; + b_2_pix_alloc[i + 512] = b_2_pix_alloc[511]; + } + + /* You have chosen wisely... */ + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + if (SDL_BYTESPERPIXEL(target_format) == 2) { +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES + /* inline assembly functions */ + if (SDL_HasMMX() && (Rmask == 0xF800) && + (Gmask == 0x07E0) && (Bmask == 0x001F) + && (swdata->w & 15) == 0) { +/*printf("Using MMX 16-bit 565 dither\n");*/ + swdata->Display1X = Color565DitherYV12MMX1X; + } else { +/*printf("Using C 16-bit dither\n");*/ + swdata->Display1X = Color16DitherYV12Mod1X; + } +#else + swdata->Display1X = Color16DitherYV12Mod1X; +#endif + swdata->Display2X = Color16DitherYV12Mod2X; + } + if (SDL_BYTESPERPIXEL(target_format) == 3) { + swdata->Display1X = Color24DitherYV12Mod1X; + swdata->Display2X = Color24DitherYV12Mod2X; + } + if (SDL_BYTESPERPIXEL(target_format) == 4) { +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES + /* inline assembly functions */ + if (SDL_HasMMX() && (Rmask == 0x00FF0000) && + (Gmask == 0x0000FF00) && + (Bmask == 0x000000FF) && (swdata->w & 15) == 0) { +/*printf("Using MMX 32-bit dither\n");*/ + swdata->Display1X = ColorRGBDitherYV12MMX1X; + } else { +/*printf("Using C 32-bit dither\n");*/ + swdata->Display1X = Color32DitherYV12Mod1X; + } +#else + swdata->Display1X = Color32DitherYV12Mod1X; +#endif + swdata->Display2X = Color32DitherYV12Mod2X; + } + break; + case SDL_PIXELFORMAT_YUY2: + case SDL_PIXELFORMAT_UYVY: + case SDL_PIXELFORMAT_YVYU: + if (SDL_BYTESPERPIXEL(target_format) == 2) { + swdata->Display1X = Color16DitherYUY2Mod1X; + swdata->Display2X = Color16DitherYUY2Mod2X; + } + if (SDL_BYTESPERPIXEL(target_format) == 3) { + swdata->Display1X = Color24DitherYUY2Mod1X; + swdata->Display2X = Color24DitherYUY2Mod2X; + } + if (SDL_BYTESPERPIXEL(target_format) == 4) { + swdata->Display1X = Color32DitherYUY2Mod1X; + swdata->Display2X = Color32DitherYUY2Mod2X; + } + break; + default: + /* We should never get here (caught above) */ + break; + } + + if (swdata->display) { + SDL_FreeSurface(swdata->display); + swdata->display = NULL; + } + return 0; +} + +SDL_SW_YUVTexture * +SDL_SW_CreateYUVTexture(Uint32 format, int w, int h) +{ + SDL_SW_YUVTexture *swdata; + int *Cr_r_tab; + int *Cr_g_tab; + int *Cb_g_tab; + int *Cb_b_tab; + int i; + int CR, CB; + + swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata)); + if (!swdata) { + SDL_OutOfMemory(); + return NULL; + } + + switch (format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + case SDL_PIXELFORMAT_YUY2: + case SDL_PIXELFORMAT_UYVY: + case SDL_PIXELFORMAT_YVYU: + break; + default: + SDL_SetError("Unsupported YUV format"); + return NULL; + } + + swdata->format = format; + swdata->target_format = SDL_PIXELFORMAT_UNKNOWN; + swdata->w = w; + swdata->h = h; + swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2); + swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int)); + swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32)); + if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) { + SDL_OutOfMemory(); + SDL_SW_DestroyYUVTexture(swdata); + return NULL; + } + + /* Generate the tables for the display surface */ + Cr_r_tab = &swdata->colortab[0 * 256]; + Cr_g_tab = &swdata->colortab[1 * 256]; + Cb_g_tab = &swdata->colortab[2 * 256]; + Cb_b_tab = &swdata->colortab[3 * 256]; + for (i = 0; i < 256; i++) { + /* Gamma correction (luminescence table) and chroma correction + would be done here. See the Berkeley mpeg_play sources. + */ + CB = CR = (i - 128); + Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR); + Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR); + Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB); + Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB); + } + + /* Find the pitch and offset values for the overlay */ + switch (format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + swdata->pitches[0] = w; + swdata->pitches[1] = swdata->pitches[0] / 2; + swdata->pitches[2] = swdata->pitches[0] / 2; + swdata->planes[0] = swdata->pixels; + swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h; + swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2; + break; + case SDL_PIXELFORMAT_YUY2: + case SDL_PIXELFORMAT_UYVY: + case SDL_PIXELFORMAT_YVYU: + swdata->pitches[0] = w * 2; + swdata->planes[0] = swdata->pixels; + break; + default: + /* We should never get here (caught above) */ + break; + } + + /* We're all done.. */ + return (swdata); +} + +int +SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels, + int *pitch) +{ + *pixels = swdata->planes[0]; + *pitch = swdata->pitches[0]; + return 0; +} + +int +SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, + const void *pixels, int pitch) +{ + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + if (rect + && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w + || rect->h != swdata->h)) { + SDL_SetError + ("YV12 and IYUV textures only support full surface updates"); + return -1; + } + SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2); + break; + case SDL_PIXELFORMAT_YUY2: + case SDL_PIXELFORMAT_UYVY: + case SDL_PIXELFORMAT_YVYU: + { + Uint8 *src, *dst; + int row; + size_t length; + + src = (Uint8 *) pixels; + dst = + swdata->planes[0] + rect->y * swdata->pitches[0] + + rect->x * 2; + length = rect->w * 2; + for (row = 0; row < rect->h; ++row) { + SDL_memcpy(dst, src, length); + src += pitch; + dst += swdata->pitches[0]; + } + } + break; + } + return 0; +} + +int +SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, + void **pixels, int *pitch) +{ + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + case SDL_PIXELFORMAT_IYUV: + if (rect + && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w + || rect->h != swdata->h)) { + SDL_SetError + ("YV12 and IYUV textures only support full surface locks"); + return -1; + } + break; + } + + *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2; + *pitch = swdata->pitches[0]; + return 0; +} + +void +SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata) +{ +} + +int +SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect, + Uint32 target_format, int w, int h, void *pixels, + int pitch) +{ + int stretch; + int scale_2x; + Uint8 *lum, *Cr, *Cb; + int mod; + + /* Make sure we're set up to display in the desired format */ + if (target_format != swdata->target_format) { + if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) { + return -1; + } + } + + stretch = 0; + scale_2x = 0; + if (srcrect->x || srcrect->y || srcrect->w < swdata->w + || srcrect->h < swdata->h) { + /* The source rectangle has been clipped. + Using a scratch surface is easier than adding clipped + source support to all the blitters, plus that would + slow them down in the general unclipped case. + */ + stretch = 1; + } else if ((srcrect->w != w) || (srcrect->h != h)) { + if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) { + scale_2x = 1; + } else { + stretch = 1; + } + } + if (stretch) { + int bpp; + Uint32 Rmask, Gmask, Bmask, Amask; + + if (swdata->display) { + swdata->display->w = w; + swdata->display->h = h; + swdata->display->pixels = pixels; + swdata->display->pitch = pitch; + } else { + /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */ + SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask, + &Bmask, &Amask); + swdata->display = + SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask, + Gmask, Bmask, Amask); + if (!swdata->display) { + return (-1); + } + } + if (!swdata->stretch) { + /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */ + SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask, + &Bmask, &Amask); + swdata->stretch = + SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask, + Gmask, Bmask, Amask); + if (!swdata->stretch) { + return (-1); + } + } + pixels = swdata->stretch->pixels; + pitch = swdata->stretch->pitch; + } + switch (swdata->format) { + case SDL_PIXELFORMAT_YV12: + lum = swdata->planes[0]; + Cr = swdata->planes[1]; + Cb = swdata->planes[2]; + break; + case SDL_PIXELFORMAT_IYUV: + lum = swdata->planes[0]; + Cr = swdata->planes[2]; + Cb = swdata->planes[1]; + break; + case SDL_PIXELFORMAT_YUY2: + lum = swdata->planes[0]; + Cr = lum + 3; + Cb = lum + 1; + break; + case SDL_PIXELFORMAT_UYVY: + lum = swdata->planes[0] + 1; + Cr = lum + 1; + Cb = lum - 1; + break; + case SDL_PIXELFORMAT_YVYU: + lum = swdata->planes[0]; + Cr = lum + 1; + Cb = lum + 3; + break; + default: + SDL_SetError("Unsupported YUV format in copy"); + return (-1); + } + mod = (pitch / SDL_BYTESPERPIXEL(target_format)); + + if (scale_2x) { + mod -= (swdata->w * 2); + swdata->Display2X(swdata->colortab, swdata->rgb_2_pix, + lum, Cr, Cb, pixels, swdata->h, swdata->w, mod); + } else { + mod -= swdata->w; + swdata->Display1X(swdata->colortab, swdata->rgb_2_pix, + lum, Cr, Cb, pixels, swdata->h, swdata->w, mod); + } + if (stretch) { + SDL_Rect rect = *srcrect; + SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL); + } + return 0; +} + +void +SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata) +{ + if (swdata) { + if (swdata->pixels) { + SDL_free(swdata->pixels); + } + if (swdata->colortab) { + SDL_free(swdata->colortab); + } + if (swdata->rgb_2_pix) { + SDL_free(swdata->rgb_2_pix); + } + if (swdata->stretch) { + SDL_FreeSurface(swdata->stretch); + } + if (swdata->display) { + SDL_FreeSurface(swdata->display); + } + SDL_free(swdata); + } +} + +/* vi: set ts=4 sw=4 expandtab: */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/render/SDL_yuv_sw_c.h Thu Feb 03 00:19:40 2011 -0800 @@ -0,0 +1,69 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2010 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +#include "SDL_video.h" + +/* This is the software implementation of the YUV texture support */ + +struct SDL_SW_YUVTexture +{ + Uint32 format; + Uint32 target_format; + int w, h; + Uint8 *pixels; + int *colortab; + Uint32 *rgb_2_pix; + void (*Display1X) (int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod); + void (*Display2X) (int *colortab, Uint32 * rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod); + + /* These are just so we don't have to allocate them separately */ + Uint16 pitches[3]; + Uint8 *planes[3]; + + /* This is a temporary surface in case we have to stretch copy */ + SDL_Surface *stretch; + SDL_Surface *display; +}; + +typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture; + +SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h); +int SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels, + int *pitch); +int SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, + const void *pixels, int pitch); +int SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, + void **pixels, int *pitch); +void SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata); +int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect, + Uint32 target_format, int w, int h, void *pixels, + int pitch); +void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata); + +/* vi: set ts=4 sw=4 expandtab: */
--- a/src/render/direct3d/SDL_d3drender.c Wed Feb 02 22:55:12 2011 -0800 +++ b/src/render/direct3d/SDL_d3drender.c Thu Feb 03 00:19:40 2011 -0800 @@ -28,7 +28,6 @@ #include "SDL_loadso.h" #include "SDL_syswm.h" #include "../SDL_sysrender.h" -#include "../../video/SDL_yuv_sw_c.h" #if SDL_VIDEO_RENDER_D3D #define D3D_DEBUG_INFO @@ -89,7 +88,8 @@ /* Direct3D renderer implementation */ -#if 1 /* This takes more memory but you won't lose your texture data */ +#if 1 +/* This takes more memory but you won't lose your texture data */ #define D3DPOOL_SDL D3DPOOL_MANAGED #define SDL_MEMORY_POOL_MANAGED #else @@ -99,18 +99,12 @@ static SDL_Renderer *D3D_CreateRenderer(SDL_Window * window, Uint32 flags); static int D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static int D3D_QueryTexturePixels(SDL_Renderer * renderer, - SDL_Texture * texture, void **pixels, - int *pitch); static int D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch); static int D3D_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, - void **pixels, int *pitch); + const SDL_Rect * rect, void **pixels, int *pitch); static void D3D_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static void D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, - int numrects, const SDL_Rect * rects); static int D3D_RenderDrawPoints(SDL_Renderer * renderer, const SDL_Point * points, int count); static int D3D_RenderDrawLines(SDL_Renderer * renderer, @@ -134,8 +128,8 @@ { "d3d", (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED), - 0, - {0}, + 1, + {SDL_PIXELFORMAT_ARGB8888}, 0, 0} }; @@ -152,7 +146,6 @@ typedef struct { - SDL_SW_YUVTexture *yuv; Uint32 format; IDirect3DTexture9 *texture; } D3D_TextureData; @@ -248,113 +241,30 @@ PixelFormatToD3DFMT(Uint32 format) { switch (format) { - case SDL_PIXELFORMAT_INDEX8: - return D3DFMT_P8; - case SDL_PIXELFORMAT_RGB332: - return D3DFMT_R3G3B2; - case SDL_PIXELFORMAT_RGB444: - return D3DFMT_X4R4G4B4; - case SDL_PIXELFORMAT_RGB555: - return D3DFMT_X1R5G5B5; - case SDL_PIXELFORMAT_ARGB4444: - return D3DFMT_A4R4G4B4; - case SDL_PIXELFORMAT_ARGB1555: - return D3DFMT_A1R5G5B5; case SDL_PIXELFORMAT_RGB565: return D3DFMT_R5G6B5; case SDL_PIXELFORMAT_RGB888: return D3DFMT_X8R8G8B8; case SDL_PIXELFORMAT_ARGB8888: return D3DFMT_A8R8G8B8; - case SDL_PIXELFORMAT_ARGB2101010: - return D3DFMT_A2R10G10B10; - case SDL_PIXELFORMAT_YV12: - return MAKEFOURCC('Y','V','1','2'); - case SDL_PIXELFORMAT_IYUV: - return MAKEFOURCC('I','4','2','0'); - case SDL_PIXELFORMAT_UYVY: - return D3DFMT_UYVY; - case SDL_PIXELFORMAT_YUY2: - return D3DFMT_YUY2; default: return D3DFMT_UNKNOWN; } } -static SDL_bool -D3D_IsTextureFormatAvailable(IDirect3D9 * d3d, UINT adapter, - D3DFORMAT display_format, - D3DFORMAT texture_format) +static Uint32 +D3DFMTToPixelFormat(D3DFORMAT format) { - HRESULT result; - - result = IDirect3D9_CheckDeviceFormat(d3d, adapter, - D3DDEVTYPE_HAL, - display_format, - 0, - D3DRTYPE_TEXTURE, - texture_format); - return FAILED(result) ? SDL_FALSE : SDL_TRUE; -} - -static void -UpdateYUVTextureData(SDL_Texture * texture) -{ - D3D_TextureData *data = (D3D_TextureData *) texture->driverdata; - SDL_Rect rect; - RECT d3drect; - D3DLOCKED_RECT locked; - HRESULT result; - - d3drect.left = 0; - d3drect.right = texture->w; - d3drect.top = 0; - d3drect.bottom = texture->h; - - result = - IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0); - if (FAILED(result)) { - return; + switch (format) { + case D3DFMT_R5G6B5: + return SDL_PIXELFORMAT_RGB565; + case D3DFMT_X8R8G8B8: + return SDL_PIXELFORMAT_RGB888; + case D3DFMT_A8R8G8B8: + return SDL_PIXELFORMAT_ARGB8888; + default: + return SDL_PIXELFORMAT_UNKNOWN; } - - rect.x = 0; - rect.y = 0; - rect.w = texture->w; - rect.h = texture->h; - SDL_SW_CopyYUVToRGB(data->yuv, &rect, data->format, texture->w, - texture->h, locked.pBits, locked.Pitch); - - IDirect3DTexture9_UnlockRect(data->texture, 0); -} - -static void -D3D_AddTextureFormats(D3D_RenderData *data, SDL_RendererInfo *info) -{ - int i; - int formats[] = { - SDL_PIXELFORMAT_RGB332, - SDL_PIXELFORMAT_RGB444, - SDL_PIXELFORMAT_RGB555, - SDL_PIXELFORMAT_ARGB4444, - SDL_PIXELFORMAT_ARGB1555, - SDL_PIXELFORMAT_RGB565, - SDL_PIXELFORMAT_RGB888, - SDL_PIXELFORMAT_ARGB8888, - SDL_PIXELFORMAT_ARGB2101010, - }; - - info->num_texture_formats = 0; - for (i = 0; i < SDL_arraysize(formats); ++i) { - if (D3D_IsTextureFormatAvailable - (data->d3d, data->adapter, data->pparams.BackBufferFormat, PixelFormatToD3DFMT(formats[i]))) { - info->texture_formats[info->num_texture_formats++] = formats[i]; - } - } - info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YV12; - info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_IYUV; - info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YUY2; - info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_UYVY; - info->texture_formats[info->num_texture_formats++] = SDL_PIXELFORMAT_YVYU; } SDL_Renderer * @@ -367,6 +277,9 @@ D3DPRESENT_PARAMETERS pparams; IDirect3DSwapChain9 *chain; D3DCAPS9 caps; + Uint32 window_flags; + int w, h; + SDL_DisplayMode fullscreen_mode; renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer)); if (!renderer) { @@ -404,11 +317,9 @@ } renderer->CreateTexture = D3D_CreateTexture; - renderer->QueryTexturePixels = D3D_QueryTexturePixels; renderer->UpdateTexture = D3D_UpdateTexture; renderer->LockTexture = D3D_LockTexture; renderer->UnlockTexture = D3D_UnlockTexture; - renderer->DirtyTexture = D3D_DirtyTexture; renderer->RenderDrawPoints = D3D_RenderDrawPoints; renderer->RenderDrawLines = D3D_RenderDrawLines; renderer->RenderFillRects = D3D_RenderFillRects; @@ -427,23 +338,27 @@ SDL_VERSION(&windowinfo.version); SDL_GetWindowWMInfo(window, &windowinfo); + window_flags = SDL_GetWindowFlags(window); + SDL_GetWindowSize(window, &w, &h); + SDL_GetWindowDisplayMode(window, &fullscreen_mode); + SDL_zero(pparams); pparams.hDeviceWindow = windowinfo.info.win.window; - pparams.BackBufferWidth = window->w; - pparams.BackBufferHeight = window->h; - if (window->flags & SDL_WINDOW_FULLSCREEN) { + pparams.BackBufferWidth = w; + pparams.BackBufferHeight = h; + if (window_flags & SDL_WINDOW_FULLSCREEN) { pparams.BackBufferFormat = - PixelFormatToD3DFMT(window->fullscreen_mode.format); + PixelFormatToD3DFMT(fullscreen_mode.format); } else { pparams.BackBufferFormat = D3DFMT_UNKNOWN; } pparams.BackBufferCount = 1; pparams.SwapEffect = D3DSWAPEFFECT_DISCARD; - if (window->flags & SDL_WINDOW_FULLSCREEN) { + if (window_flags & SDL_WINDOW_FULLSCREEN) { pparams.Windowed = FALSE; pparams.FullScreen_RefreshRateInHz = - window->fullscreen_mode.refresh_rate; + fullscreen_mode.refresh_rate; } else { pparams.Windowed = TRUE; pparams.FullScreen_RefreshRateInHz = 0; @@ -494,8 +409,6 @@ } data->pparams = pparams; - D3D_AddTextureFormats(data, &renderer->info); - IDirect3DDevice9_GetDeviceCaps(data->device, &caps); renderer->info.max_texture_width = caps.MaxTextureWidth; renderer->info.max_texture_height = caps.MaxTextureHeight; @@ -594,22 +507,7 @@ texture->driverdata = data; - if (SDL_ISPIXELFORMAT_FOURCC(texture->format) && - (texture->format != SDL_PIXELFORMAT_YUY2 || - !D3D_IsTextureFormatAvailable(renderdata->d3d, renderdata->adapter, - display_format, PixelFormatToD3DFMT(texture->format))) - && (texture->format != SDL_PIXELFORMAT_YVYU - || !D3D_IsTextureFormatAvailable(renderdata->d3d, renderdata->adapter, - display_format, PixelFormatToD3DFMT(texture->format)))) { - data->yuv = - SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h); - if (!data->yuv) { - return -1; - } - data->format = SDL_GetWindowPixelFormat(window); - } else { - data->format = texture->format; - } + data->format = texture->format; result = IDirect3DDevice9_CreateTexture(renderdata->device, texture->w, @@ -625,153 +523,118 @@ } static int -D3D_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, - void **pixels, int *pitch) -{ - D3D_TextureData *data = (D3D_TextureData *) texture->driverdata; - - if (data->yuv) { - return SDL_SW_QueryYUVTexturePixels(data->yuv, pixels, pitch); - } else { - /* D3D textures don't have their pixels hanging out */ - return -1; - } -} - -static int D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch) { D3D_TextureData *data = (D3D_TextureData *) texture->driverdata; D3D_RenderData *renderdata = (D3D_RenderData *) renderer->driverdata; - if (data->yuv) { - if (SDL_SW_UpdateYUVTexture(data->yuv, rect, pixels, pitch) < 0) { - return -1; - } - UpdateYUVTextureData(texture); - return 0; - } else { #ifdef SDL_MEMORY_POOL_DEFAULT - IDirect3DTexture9 *temp; - RECT d3drect; - D3DLOCKED_RECT locked; - const Uint8 *src; - Uint8 *dst; - int row, length; - HRESULT result; + IDirect3DTexture9 *temp; + RECT d3drect; + D3DLOCKED_RECT locked; + const Uint8 *src; + Uint8 *dst; + int row, length; + HRESULT result; - result = - IDirect3DDevice9_CreateTexture(renderdata->device, texture->w, - texture->h, 1, 0, - PixelFormatToD3DFMT(texture-> - format), - D3DPOOL_SYSTEMMEM, &temp, NULL); - if (FAILED(result)) { - D3D_SetError("CreateTexture()", result); - return -1; - } + result = + IDirect3DDevice9_CreateTexture(renderdata->device, texture->w, + texture->h, 1, 0, + PixelFormatToD3DFMT(texture-> format), + D3DPOOL_SYSTEMMEM, &temp, NULL); + if (FAILED(result)) { + D3D_SetError("CreateTexture()", result); + return -1; + } - d3drect.left = rect->x; - d3drect.right = rect->x + rect->w; - d3drect.top = rect->y; - d3drect.bottom = rect->y + rect->h; + d3drect.left = rect->x; + d3drect.right = rect->x + rect->w; + d3drect.top = rect->y; + d3drect.bottom = rect->y + rect->h; - result = IDirect3DTexture9_LockRect(temp, 0, &locked, &d3drect, 0); - if (FAILED(result)) { - IDirect3DTexture9_Release(temp); - D3D_SetError("LockRect()", result); - return -1; - } + result = IDirect3DTexture9_LockRect(temp, 0, &locked, &d3drect, 0); + if (FAILED(result)) { + IDirect3DTexture9_Release(temp); + D3D_SetError("LockRect()", result); + return -1; + } - src = pixels; - dst = locked.pBits; - length = rect->w * SDL_BYTESPERPIXEL(texture->format); - for (row = 0; row < rect->h; ++row) { - SDL_memcpy(dst, src, length); - src += pitch; - dst += locked.Pitch; - } - IDirect3DTexture9_UnlockRect(temp, 0); + src = pixels; + dst = locked.pBits; + length = rect->w * SDL_BYTESPERPIXEL(texture->format); + for (row = 0; row < rect->h; ++row) { + SDL_memcpy(dst, src, length); + src += pitch; + dst += locked.Pitch; + } + IDirect3DTexture9_UnlockRect(temp, 0); - result = - IDirect3DDevice9_UpdateTexture(renderdata->device, - (IDirect3DBaseTexture9 *) temp, - (IDirect3DBaseTexture9 *) - data->texture); - IDirect3DTexture9_Release(temp); - if (FAILED(result)) { - D3D_SetError("UpdateTexture()", result); - return -1; - } + result = + IDirect3DDevice9_UpdateTexture(renderdata->device, + (IDirect3DBaseTexture9 *) temp, + (IDirect3DBaseTexture9 *) + data->texture); + IDirect3DTexture9_Release(temp); + if (FAILED(result)) { + D3D_SetError("UpdateTexture()", result); + return -1; + } #else - RECT d3drect; - D3DLOCKED_RECT locked; - const Uint8 *src; - Uint8 *dst; - int row, length; - HRESULT result; + RECT d3drect; + D3DLOCKED_RECT locked; + const Uint8 *src; + Uint8 *dst; + int row, length; + HRESULT result; - d3drect.left = rect->x; - d3drect.right = rect->x + rect->w; - d3drect.top = rect->y; - d3drect.bottom = rect->y + rect->h; + d3drect.left = rect->x; + d3drect.right = rect->x + rect->w; + d3drect.top = rect->y; + d3drect.bottom = rect->y + rect->h; - result = - IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, - 0); - if (FAILED(result)) { - D3D_SetError("LockRect()", result); - return -1; - } + result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0); + if (FAILED(result)) { + D3D_SetError("LockRect()", result); + return -1; + } - src = pixels; - dst = locked.pBits; - length = rect->w * SDL_BYTESPERPIXEL(texture->format); - for (row = 0; row < rect->h; ++row) { - SDL_memcpy(dst, src, length); - src += pitch; - dst += locked.Pitch; - } - IDirect3DTexture9_UnlockRect(data->texture, 0); + src = pixels; + dst = locked.pBits; + length = rect->w * SDL_BYTESPERPIXEL(texture->format); + for (row = 0; row < rect->h; ++row) { + SDL_memcpy(dst, src, length); + src += pitch; + dst += locked.Pitch; + } + IDirect3DTexture9_UnlockRect(data->texture, 0); #endif // SDL_MEMORY_POOL_DEFAULT - return 0; - } + return 0; } static int D3D_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, void **pixels, - int *pitch) + const SDL_Rect * rect, void **pixels, int *pitch) { D3D_TextureData *data = (D3D_TextureData *) texture->driverdata; + RECT d3drect; + D3DLOCKED_RECT locked; + HRESULT result; - if (data->yuv) { - return SDL_SW_LockYUVTexture(data->yuv, rect, markDirty, pixels, - pitch); - } else { - RECT d3drect; - D3DLOCKED_RECT locked; - HRESULT result; + d3drect.left = rect->x; + d3drect.right = rect->x + rect->w; + d3drect.top = rect->y; + d3drect.bottom = rect->y + rect->h; - d3drect.left = rect->x; - d3drect.right = rect->x + rect->w; - d3drect.top = rect->y; - d3drect.bottom = rect->y + rect->h; - - result = - IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, - markDirty ? 0 : - D3DLOCK_NO_DIRTY_UPDATE); - if (FAILED(result)) { - D3D_SetError("LockRect()", result); - return -1; - } - *pixels = locked.pBits; - *pitch = locked.Pitch; - return 0; + result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0); + if (FAILED(result)) { + D3D_SetError("LockRect()", result); + return -1; } + *pixels = locked.pBits; + *pitch = locked.Pitch; + return 0; } static void @@ -779,32 +642,7 @@ { D3D_TextureData *data = (D3D_TextureData *) texture->driverdata; - if (data->yuv) { - SDL_SW_UnlockYUVTexture(data->yuv); - UpdateYUVTextureData(texture); - } else { - IDirect3DTexture9_UnlockRect(data->texture, 0); - } -} - -static void -D3D_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, int numrects, - const SDL_Rect * rects) -{ - D3D_TextureData *data = (D3D_TextureData *) texture->driverdata; - RECT d3drect; - int i; - - for (i = 0; i < numrects; ++i) { - const SDL_Rect *rect = &rects[i]; - - d3drect.left = rect->x; - d3drect.right = rect->x + rect->w; - d3drect.top = rect->y; - d3drect.bottom = rect->y + rect->h; - - IDirect3DTexture9_AddDirtyRect(data->texture, &d3drect); - } + IDirect3DTexture9_UnlockRect(data->texture, 0); } static void @@ -1123,8 +961,6 @@ Uint32 format, void * pixels, int pitch) { D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata; - SDL_Window *window = renderer->window; - SDL_VideoDisplay *display = window->display; D3DSURFACE_DESC desc; LPDIRECT3DSURFACE9 backBuffer; LPDIRECT3DSURFACE9 surface; @@ -1174,7 +1010,7 @@ } SDL_ConvertPixels(rect->w, rect->h, - display->current_mode.format, locked.pBits, locked.Pitch, + D3DFMTToPixelFormat(desc.Format), locked.pBits, locked.Pitch, format, pixels, pitch); IDirect3DSurface9_UnlockRect(surface); @@ -1227,9 +1063,6 @@ if (!data) { return; } - if (data->yuv) { - SDL_SW_DestroyYUVTexture(data->yuv); - } if (data->texture) { IDirect3DTexture9_Release(data->texture); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/render/mmx.h Thu Feb 03 00:19:40 2011 -0800 @@ -0,0 +1,642 @@ +/* mmx.h + + MultiMedia eXtensions GCC interface library for IA32. + + To use this library, simply include this header file + and compile with GCC. You MUST have inlining enabled + in order for mmx_ok() to work; this can be done by + simply using -O on the GCC command line. + + Compiling with -DMMX_TRACE will cause detailed trace + output to be sent to stderr for each mmx operation. + This adds lots of code, and obviously slows execution to + a crawl, but can be very useful for debugging. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT + LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR ANY PARTICULAR PURPOSE. + + 1997-99 by H. Dietz and R. Fisher + + Notes: + It appears that the latest gas has the pand problem fixed, therefore + I'll undefine BROKEN_PAND by default. +*/ + +#ifndef _MMX_H +#define _MMX_H + + +/* Warning: at this writing, the version of GAS packaged + with most Linux distributions does not handle the + parallel AND operation mnemonic correctly. If the + symbol BROKEN_PAND is defined, a slower alternative + coding will be used. If execution of mmxtest results + in an illegal instruction fault, define this symbol. +*/ +#undef BROKEN_PAND + + +/* The type of an value that fits in an MMX register + (note that long long constant values MUST be suffixed + by LL and unsigned long long values by ULL, lest + they be truncated by the compiler) +*/ +typedef union +{ + long long q; /* Quadword (64-bit) value */ + unsigned long long uq; /* Unsigned Quadword */ + int d[2]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[2]; /* 2 Unsigned Doubleword */ + short w[4]; /* 4 Word (16-bit) values */ + unsigned short uw[4]; /* 4 Unsigned Word */ + char b[8]; /* 8 Byte (8-bit) values */ + unsigned char ub[8]; /* 8 Unsigned Byte */ + float s[2]; /* Single-precision (32-bit) value */ +} __attribute__ ((aligned(8))) mmx_t; /* On an 8-byte (64-bit) boundary */ + + +#if 0 +/* Function to test if multimedia instructions are supported... +*/ +inline extern int +mm_support(void) +{ + /* Returns 1 if MMX instructions are supported, + 3 if Cyrix MMX and Extended MMX instructions are supported + 5 if AMD MMX and 3DNow! instructions are supported + 0 if hardware does not support any of these + */ + register int rval = 0; + + __asm__ __volatile__( + /* See if CPUID instruction is supported ... */ + /* ... Get copies of EFLAGS into eax and ecx */ + "pushf\n\t" + "popl %%eax\n\t" "movl %%eax, %%ecx\n\t" + /* ... Toggle the ID bit in one copy and store */ + /* to the EFLAGS reg */ + "xorl $0x200000, %%eax\n\t" + "push %%eax\n\t" "popf\n\t" + /* ... Get the (hopefully modified) EFLAGS */ + "pushf\n\t" "popl %%eax\n\t" + /* ... Compare and test result */ + "xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */ + /* Get standard CPUID information, and + go to a specific vendor section */ + "movl $0, %%eax\n\t" "cpuid\n\t" + /* Check for Intel */ + "cmpl $0x756e6547, %%ebx\n\t" + "jne TryAMD\n\t" + "cmpl $0x49656e69, %%edx\n\t" + "jne TryAMD\n\t" + "cmpl $0x6c65746e, %%ecx\n" + "jne TryAMD\n\t" "jmp Intel\n\t" + /* Check for AMD */ + "\nTryAMD:\n\t" + "cmpl $0x68747541, %%ebx\n\t" + "jne TryCyrix\n\t" + "cmpl $0x69746e65, %%edx\n\t" + "jne TryCyrix\n\t" + "cmpl $0x444d4163, %%ecx\n" + "jne TryCyrix\n\t" "jmp AMD\n\t" + /* Check for Cyrix */ + "\nTryCyrix:\n\t" + "cmpl $0x69727943, %%ebx\n\t" + "jne NotSupported2\n\t" + "cmpl $0x736e4978, %%edx\n\t" + "jne NotSupported3\n\t" + "cmpl $0x64616574, %%ecx\n\t" + "jne NotSupported4\n\t" + /* Drop through to Cyrix... */ + /* Cyrix Section */ + /* See if extended CPUID level 80000001 is supported */ + /* The value of CPUID/80000001 for the 6x86MX is undefined + according to the Cyrix CPU Detection Guide (Preliminary + Rev. 1.01 table 1), so we'll check the value of eax for + CPUID/0 to see if standard CPUID level 2 is supported. + According to the table, the only CPU which supports level + 2 is also the only one which supports extended CPUID levels. + */ + "cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t" /* Use standard CPUID instead */ + /* Extended CPUID supported (in theory), so get extended + features */ + "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t" /* Test for MMX */ + "jz NotSupported5\n\t" /* MMX not supported */ + "testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */ + "jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */ + "jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */ + "jmp Return\n\t" + /* AMD Section */ + "AMD:\n\t" + /* See if extended CPUID is supported */ + "movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t" /* Use standard CPUID instead */ + /* Extended CPUID supported, so get extended features */ + "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */ + "jz NotSupported6\n\t" /* MMX not supported */ + "testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */ + "jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */ + "jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */ + "jmp Return\n\t" + /* Intel Section */ + "Intel:\n\t" + /* Check for MMX */ + "MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */ + "jz NotSupported7\n\t" /* MMX Not supported */ + "movl $1, %0:\n\n\t" /* MMX Supported */ + "jmp Return\n\t" + /* Nothing supported */ + "\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval): /* no input */ + :"eax", "ebx", "ecx", "edx"); + + /* Return */ + return (rval); +} + +/* Function to test if mmx instructions are supported... +*/ +inline extern int +mmx_ok(void) +{ + /* Returns 1 if MMX instructions are supported, 0 otherwise */ + return (mm_support() & 0x1); +} +#endif + +/* Helper functions for the instruction macros that follow... + (note that memory-to-register, m2r, instructions are nearly + as efficient as register-to-register, r2r, instructions; + however, memory-to-memory instructions are really simulated + as a convenience, and are only 1/3 as efficient) +*/ +#ifdef MMX_TRACE + +/* Include the stuff for printing a trace to stderr... +*/ + +#define mmx_i2r(op, imm, reg) \ + { \ + mmx_t mmx_trace; \ + mmx_trace.uq = (imm); \ + printf(#op "_i2r(" #imm "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + printf(#reg "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (imm)); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + printf(#reg "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#define mmx_m2r(op, mem, reg) \ + { \ + mmx_t mmx_trace; \ + mmx_trace = (mem); \ + printf(#op "_m2r(" #mem "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + printf(#reg "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem)); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + printf(#reg "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#define mmx_r2m(op, reg, mem) \ + { \ + mmx_t mmx_trace; \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + printf(#op "_r2m(" #reg "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + mmx_trace = (mem); \ + printf(#mem "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=X" (mem) \ + : /* nothing */ ); \ + mmx_trace = (mem); \ + printf(#mem "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#define mmx_r2r(op, regs, regd) \ + { \ + mmx_t mmx_trace; \ + __asm__ __volatile__ ("movq %%" #regs ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + printf(#op "_r2r(" #regs "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ ("movq %%" #regd ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + printf(#regd "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd); \ + __asm__ __volatile__ ("movq %%" #regd ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + printf(#regd "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#define mmx_m2m(op, mems, memd) \ + { \ + mmx_t mmx_trace; \ + mmx_trace = (mems); \ + printf(#op "_m2m(" #mems "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + mmx_trace = (memd); \ + printf(#memd "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ + #op " %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=X" (memd) \ + : "X" (mems)); \ + mmx_trace = (memd); \ + printf(#memd "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#else + +/* These macros are a lot simpler without the tracing... +*/ + +#define mmx_i2r(op, imm, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (imm) ) + +#define mmx_m2r(op, mem, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem)) + +#define mmx_r2m(op, reg, mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=m" (mem) \ + : /* nothing */ ) + +#define mmx_r2r(op, regs, regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + +#define mmx_m2m(op, mems, memd) \ + __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ + #op " %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=X" (memd) \ + : "X" (mems)) + +#endif + + +/* 1x64 MOVe Quadword + (this is both a load and a store... + in fact, it is the only way to store) +*/ +#define movq_m2r(var, reg) mmx_m2r(movq, var, reg) +#define movq_r2m(reg, var) mmx_r2m(movq, reg, var) +#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd) +#define movq(vars, vard) \ + __asm__ __volatile__ ("movq %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=X" (vard) \ + : "X" (vars)) + + +/* 1x32 MOVe Doubleword + (like movq, this is both load and store... + but is most useful for moving things between + mmx registers and ordinary registers) +*/ +#define movd_m2r(var, reg) mmx_m2r(movd, var, reg) +#define movd_r2m(reg, var) mmx_r2m(movd, reg, var) +#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd) +#define movd(vars, vard) \ + __asm__ __volatile__ ("movd %1, %%mm0\n\t" \ + "movd %%mm0, %0" \ + : "=X" (vard) \ + : "X" (vars)) + + +/* 2x32, 4x16, and 8x8 Parallel ADDs +*/ +#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg) +#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd) +#define paddd(vars, vard) mmx_m2m(paddd, vars, vard) + +#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg) +#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd) +#define paddw(vars, vard) mmx_m2m(paddw, vars, vard) + +#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg) +#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd) +#define paddb(vars, vard) mmx_m2m(paddb, vars, vard) + + +/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic +*/ +#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg) +#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd) +#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard) + +#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg) +#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd) +#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard) + + +/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic +*/ +#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg) +#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd) +#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard) + +#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg) +#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd) +#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel SUBs +*/ +#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg) +#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd) +#define psubd(vars, vard) mmx_m2m(psubd, vars, vard) + +#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg) +#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd) +#define psubw(vars, vard) mmx_m2m(psubw, vars, vard) + +#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg) +#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd) +#define psubb(vars, vard) mmx_m2m(psubb, vars, vard) + + +/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic +*/ +#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg) +#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd) +#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard) + +#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg) +#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd) +#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard) + + +/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic +*/ +#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg) +#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd) +#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard) + +#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg) +#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd) +#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard) + + +/* 4x16 Parallel MULs giving Low 4x16 portions of results +*/ +#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg) +#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd) +#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard) + + +/* 4x16 Parallel MULs giving High 4x16 portions of results +*/ +#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg) +#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd) +#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard) + + +/* 4x16->2x32 Parallel Mul-ADD + (muls like pmullw, then adds adjacent 16-bit fields + in the multiply result to make the final 2x32 result) +*/ +#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg) +#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd) +#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard) + + +/* 1x64 bitwise AND +*/ +#ifdef BROKEN_PAND +#define pand_m2r(var, reg) \ + { \ + mmx_m2r(pandn, (mmx_t) -1LL, reg); \ + mmx_m2r(pandn, var, reg); \ + } +#define pand_r2r(regs, regd) \ + { \ + mmx_m2r(pandn, (mmx_t) -1LL, regd); \ + mmx_r2r(pandn, regs, regd) \ + } +#define pand(vars, vard) \ + { \ + movq_m2r(vard, mm0); \ + mmx_m2r(pandn, (mmx_t) -1LL, mm0); \ + mmx_m2r(pandn, vars, mm0); \ + movq_r2m(mm0, vard); \ + } +#else +#define pand_m2r(var, reg) mmx_m2r(pand, var, reg) +#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd) +#define pand(vars, vard) mmx_m2m(pand, vars, vard) +#endif + + +/* 1x64 bitwise AND with Not the destination +*/ +#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg) +#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd) +#define pandn(vars, vard) mmx_m2m(pandn, vars, vard) + + +/* 1x64 bitwise OR +*/ +#define por_m2r(var, reg) mmx_m2r(por, var, reg) +#define por_r2r(regs, regd) mmx_r2r(por, regs, regd) +#define por(vars, vard) mmx_m2m(por, vars, vard) + + +/* 1x64 bitwise eXclusive OR +*/ +#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg) +#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd) +#define pxor(vars, vard) mmx_m2m(pxor, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality + (resulting fields are either 0 or -1) +*/ +#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg) +#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd) +#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard) + +#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg) +#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd) +#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard) + +#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg) +#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd) +#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than + (resulting fields are either 0 or -1) +*/ +#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg) +#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd) +#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard) + +#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg) +#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd) +#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard) + +#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg) +#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd) +#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard) + + +/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical +*/ +#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg) +#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg) +#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd) +#define psllq(vars, vard) mmx_m2m(psllq, vars, vard) + +#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg) +#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg) +#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd) +#define pslld(vars, vard) mmx_m2m(pslld, vars, vard) + +#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg) +#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg) +#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd) +#define psllw(vars, vard) mmx_m2m(psllw, vars, vard) + + +/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical +*/ +#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg) +#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg) +#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd) +#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard) + +#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg) +#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg) +#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd) +#define psrld(vars, vard) mmx_m2m(psrld, vars, vard) + +#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg) +#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg) +#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd) +#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard) + + +/* 2x32 and 4x16 Parallel Shift Right Arithmetic +*/ +#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg) +#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg) +#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd) +#define psrad(vars, vard) mmx_m2m(psrad, vars, vard) + +#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg) +#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg) +#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd) +#define psraw(vars, vard) mmx_m2m(psraw, vars, vard) + + +/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate + (packs source and dest fields into dest in that order) +*/ +#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg) +#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd) +#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard) + +#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg) +#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd) +#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard) + + +/* 4x16->8x8 PACK and Unsigned Saturate + (packs source and dest fields into dest in that order) +*/ +#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg) +#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd) +#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard) + + +/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low + (interleaves low half of dest with low half of source + as padding in each result field) +*/ +#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg) +#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd) +#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard) + +#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg) +#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd) +#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard) + +#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg) +#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd) +#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard) + + +/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High + (interleaves high half of dest with high half of source + as padding in each result field) +*/ +#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg) +#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd) +#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard) + +#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg) +#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd) +#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard) + +#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg) +#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd) +#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard) + + +/* Empty MMx State + (used to clean-up when going from mmx to float use + of the registers that are shared by both; note that + there is no float-to-mmx operation needed, because + only the float tag word info is corruptible) +*/ +#ifdef MMX_TRACE + +#define emms() \ + { \ + printf("emms()\n"); \ + __asm__ __volatile__ ("emms"); \ + } + +#else + +#define emms() __asm__ __volatile__ ("emms") + +#endif + +#endif +/* vi: set ts=4 sw=4 expandtab: */
--- a/src/render/opengl/SDL_renderer_gl.c Wed Feb 02 22:55:12 2011 -0800 +++ b/src/render/opengl/SDL_renderer_gl.c Thu Feb 03 00:19:40 2011 -0800 @@ -37,27 +37,6 @@ http://developer.apple.com/documentation/GraphicsImaging/Conceptual/OpenGL-MacProgGuide/opengl_texturedata/chapter_10_section_2.html */ -/* !!! FIXME: this should go in a higher level than the GL renderer. */ -static __inline__ int -bytes_per_pixel(const Uint32 format) -{ - if (!SDL_ISPIXELFORMAT_FOURCC(format)) { - return SDL_BYTESPERPIXEL(format); - } - - /* FOURCC format */ - switch (format) { - case SDL_PIXELFORMAT_YV12: - case SDL_PIXELFORMAT_IYUV: - case SDL_PIXELFORMAT_YUY2: - case SDL_PIXELFORMAT_UYVY: - case SDL_PIXELFORMAT_YVYU: - return 2; - default: - return 1; /* shouldn't ever hit this. */ - } -} - /* Used to re-create the window with OpenGL capability */ extern int SDL_RecreateWindow(SDL_Window * window, Uint32 flags); @@ -67,18 +46,12 @@ static void GL_WindowEvent(SDL_Renderer * renderer, const SDL_WindowEvent *event); static int GL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static int GL_QueryTexturePixels(SDL_Renderer * renderer, - SDL_Texture * texture, void **pixels, - int *pitch); static int GL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch); static int GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, void **pixels, - int *pitch); + const SDL_Rect * rect, void **pixels, int *pitch); static void GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static void GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, - int numrects, const SDL_Rect * rects); static int GL_RenderClear(SDL_Renderer * renderer); static int GL_RenderDrawPoints(SDL_Renderer * renderer, const SDL_Point * points, int count); @@ -102,21 +75,8 @@ { "opengl", (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED), - 13, - { - SDL_PIXELFORMAT_RGB332, - SDL_PIXELFORMAT_RGB444, - SDL_PIXELFORMAT_RGB555, - SDL_PIXELFORMAT_ARGB4444, - SDL_PIXELFORMAT_ARGB1555, - SDL_PIXELFORMAT_RGB565, - SDL_PIXELFORMAT_RGB24, - SDL_PIXELFORMAT_BGR24, - SDL_PIXELFORMAT_RGB888, - SDL_PIXELFORMAT_BGR888, - SDL_PIXELFORMAT_ARGB8888, - SDL_PIXELFORMAT_ABGR8888, - SDL_PIXELFORMAT_ARGB2101010}, + 1, + {SDL_PIXELFORMAT_ARGB8888}, 0, 0} }; @@ -126,10 +86,6 @@ SDL_GLContext context; SDL_bool updateSize; SDL_bool GL_ARB_texture_rectangle_supported; - SDL_bool GL_EXT_paletted_texture_supported; - SDL_bool GL_APPLE_ycbcr_422_supported; - SDL_bool GL_MESA_ycbcr_texture_supported; - SDL_bool GL_ARB_fragment_program_supported; int blendMode; /* OpenGL functions */ @@ -139,33 +95,18 @@ void (*glTextureRangeAPPLE) (GLenum target, GLsizei length, const GLvoid * pointer); - - PFNGLGETPROGRAMIVARBPROC glGetProgramivARB; - PFNGLGETPROGRAMSTRINGARBPROC glGetProgramStringARB; - PFNGLPROGRAMLOCALPARAMETER4FVARBPROC glProgramLocalParameter4fvARB; - PFNGLDELETEPROGRAMSARBPROC glDeleteProgramsARB; - PFNGLGENPROGRAMSARBPROC glGenProgramsARB; - PFNGLBINDPROGRAMARBPROC glBindProgramARB; - PFNGLPROGRAMSTRINGARBPROC glProgramStringARB; - - /* (optional) fragment programs */ - GLuint fragment_program_UYVY; } GL_RenderData; typedef struct { GLuint texture; - GLuint shader; GLenum type; GLfloat texw; GLfloat texh; GLenum format; GLenum formattype; - Uint8 *palette; void *pixels; int pitch; - SDL_DirtyRectList dirty; - int HACK_RYAN_FIXME; } GL_TextureData; @@ -257,11 +198,9 @@ renderer->WindowEvent = GL_WindowEvent; renderer->CreateTexture = GL_CreateTexture; - renderer->QueryTexturePixels = GL_QueryTexturePixels; renderer->UpdateTexture = GL_UpdateTexture; renderer->LockTexture = GL_LockTexture; renderer->UnlockTexture = GL_UnlockTexture; - renderer->DirtyTexture = GL_DirtyTexture; renderer->RenderClear = GL_RenderClear; renderer->RenderDrawPoints = GL_RenderDrawPoints; renderer->RenderDrawLines = GL_RenderDrawLines; @@ -317,40 +256,12 @@ || SDL_GL_ExtensionSupported("GL_EXT_texture_rectangle")) { data->GL_ARB_texture_rectangle_supported = SDL_TRUE; } - if (SDL_GL_ExtensionSupported("GL_APPLE_ycbcr_422")) { - data->GL_APPLE_ycbcr_422_supported = SDL_TRUE; - } - if (SDL_GL_ExtensionSupported("GL_MESA_ycbcr_texture")) { - data->GL_MESA_ycbcr_texture_supported = SDL_TRUE; - } if (SDL_GL_ExtensionSupported("GL_APPLE_texture_range")) { data->glTextureRangeAPPLE = (void (*)(GLenum, GLsizei, const GLvoid *)) SDL_GL_GetProcAddress("glTextureRangeAPPLE"); } - /* we might use fragment programs for YUV data, etc. */ - if (SDL_GL_ExtensionSupported("GL_ARB_fragment_program")) { - /* !!! FIXME: this doesn't check for errors. */ - /* !!! FIXME: this should really reuse the glfuncs.h stuff. */ - data->glGetProgramivARB = (PFNGLGETPROGRAMIVARBPROC) - SDL_GL_GetProcAddress("glGetProgramivARB"); - data->glGetProgramStringARB = (PFNGLGETPROGRAMSTRINGARBPROC) - SDL_GL_GetProcAddress("glGetProgramStringARB"); - data->glProgramLocalParameter4fvARB = - (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC) - SDL_GL_GetProcAddress("glProgramLocalParameter4fvARB"); - data->glDeleteProgramsARB = (PFNGLDELETEPROGRAMSARBPROC) - SDL_GL_GetProcAddress("glDeleteProgramsARB"); - data->glGenProgramsARB = (PFNGLGENPROGRAMSARBPROC) - SDL_GL_GetProcAddress("glGenProgramsARB"); - data->glBindProgramARB = (PFNGLBINDPROGRAMARBPROC) - SDL_GL_GetProcAddress("glBindProgramARB"); - data->glProgramStringARB = (PFNGLPROGRAMSTRINGARBPROC) - SDL_GL_GetProcAddress("glProgramStringARB"); - data->GL_ARB_fragment_program_supported = SDL_TRUE; - } - /* Set up parameters for rendering */ data->blendMode = -1; data->glDisable(GL_DEPTH_TEST); @@ -419,240 +330,16 @@ return value; } - -//#define DEBUG_PROGRAM_COMPILE 1 - -static void -set_shader_error(GL_RenderData * data, const char *prefix) -{ - GLint pos = 0; - const GLubyte *errstr; - data->glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); - errstr = data->glGetString(GL_PROGRAM_ERROR_STRING_ARB); - SDL_SetError("%s: shader compile error at position %d: %s", - prefix, (int) pos, (const char *) errstr); -} - -static GLuint -compile_shader(GL_RenderData * data, GLenum shader_type, const char *_code) -{ - const int have_texture_rects = data->GL_ARB_texture_rectangle_supported; - const char *replacement = have_texture_rects ? "RECT" : "2D"; - const size_t replacementlen = SDL_strlen(replacement); - const char *token = "%TEXTURETARGET%"; - const size_t tokenlen = SDL_strlen(token); - char *code = NULL; - char *ptr = NULL; - GLuint program = 0; - - /* - * The TEX instruction needs a different target depending on what we use. - * To handle this, we use "%TEXTURETARGET%" and replace the string before - * compiling the shader. - */ - code = SDL_strdup(_code); - if (code == NULL) - return 0; - - for (ptr = SDL_strstr(code, token); ptr; ptr = SDL_strstr(ptr + 1, token)) { - SDL_memcpy(ptr, replacement, replacementlen); - SDL_memmove(ptr + replacementlen, ptr + tokenlen, - SDL_strlen(ptr + tokenlen) + 1); - } - -#if DEBUG_PROGRAM_COMPILE - printf("compiling shader:\n%s\n\n", code); -#endif - - data->glGetError(); /* flush any existing error state. */ - data->glGenProgramsARB(1, &program); - data->glBindProgramARB(shader_type, program); - data->glProgramStringARB(shader_type, GL_PROGRAM_FORMAT_ASCII_ARB, - (GLsizei)SDL_strlen(code), code); - - SDL_free(code); - - if (data->glGetError() == GL_INVALID_OPERATION) { -#if DEBUG_PROGRAM_COMPILE - GLint pos = 0; - const GLubyte *errstr; - data->glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); - errstr = data->glGetString(GL_PROGRAM_ERROR_STRING_ARB); - printf("program compile error at position %d: %s\n\n", - (int) pos, (const char *) errstr); -#endif - data->glBindProgramARB(shader_type, 0); - data->glDeleteProgramsARB(1, &program); - return 0; - } - - return program; -} - - -/* - * Fragment program that renders from UYVY textures. - * The UYVY to RGB equasion is: - * R = 1.164(Y-16) + 1.596(Cr-128) - * G = 1.164(Y-16) - 0.813(Cr-128) - 0.391(Cb-128) - * B = 1.164(Y-16) + 2.018(Cb-128) - * Byte layout is Cb, Y1, Cr, Y2, stored in the R, G, B, A channels. - * 4 bytes == 2 pixels: Y1/Cb/Cr, Y2/Cb/Cr - * - * !!! FIXME: this ignores blendmodes, etc. - * !!! FIXME: this could be more efficient...use a dot product for green, etc. - */ -static const char *fragment_program_UYVY_source_code = "!!ARBfp1.0\n" - /* outputs... */ - "OUTPUT outcolor = result.color;\n" - /* scratch registers... */ - "TEMP uyvy;\n" "TEMP luminance;\n" "TEMP work;\n" - /* Halve the coordinates to grab the correct 32 bits for the fragment. */ - "MUL work, fragment.texcoord, { 0.5, 1.0, 1.0, 1.0 };\n" - /* Sample the YUV texture. Cb, Y1, Cr, Y2, are stored in x, y, z, w. */ - "TEX uyvy, work, texture[0], %TEXTURETARGET%;\n" - /* Do subtractions (128/255, 16/255, 128/255, 16/255) */ - "SUB uyvy, uyvy, { 0.501960784313726, 0.06274509803922, 0.501960784313726, 0.06274509803922 };\n" - /* Choose the luminance component by texcoord. */ - /* !!! FIXME: laziness wins out for now... just average Y1 and Y2. */ - "ADD luminance, uyvy.yyyy, uyvy.wwww;\n" - "MUL luminance, luminance, { 0.5, 0.5, 0.5, 0.5 };\n" - /* Multiply luminance by its magic value. */ - "MUL luminance, luminance, { 1.164, 1.164, 1.164, 1.164 };\n" - /* uyvy.xyzw becomes Cr/Cr/Cb/Cb, with multiplications. */ - "MUL uyvy, uyvy.zzxx, { 1.596, -0.813, 2.018, -0.391 };\n" - /* Add luminance to Cr and Cb, store to RGB channels. */ - "ADD work.rgb, luminance, uyvy;\n" - /* Do final addition for Green channel. (!!! FIXME: this should be a DPH?) */ - "ADD work.g, work.g, uyvy.w;\n" - /* Make sure alpha channel is fully opaque. (!!! FIXME: blend modes!) */ - "MOV work.a, { 1.0 };\n" - /* Store out the final fragment color... */ - "MOV outcolor, work;\n" - /* ...and we're done! */ - "END\n"; - static __inline__ SDL_bool convert_format(GL_RenderData *renderdata, Uint32 pixel_format, GLint* internalFormat, GLenum* format, GLenum* type) { switch (pixel_format) { - case SDL_PIXELFORMAT_RGB332: - *internalFormat = GL_R3_G3_B2; - *format = GL_RGB; - *type = GL_UNSIGNED_BYTE_3_3_2; - break; - case SDL_PIXELFORMAT_RGB444: - *internalFormat = GL_RGB4; - *format = GL_RGB; - *type = GL_UNSIGNED_SHORT_4_4_4_4; - break; - case SDL_PIXELFORMAT_RGB555: - *internalFormat = GL_RGB5; - *format = GL_RGB; - *type = GL_UNSIGNED_SHORT_5_5_5_1; - break; - case SDL_PIXELFORMAT_ARGB4444: - *internalFormat = GL_RGBA4; - *format = GL_BGRA; - *type = GL_UNSIGNED_SHORT_4_4_4_4_REV; - break; - case SDL_PIXELFORMAT_ARGB1555: - *internalFormat = GL_RGB5_A1; - *format = GL_BGRA; - *type = GL_UNSIGNED_SHORT_1_5_5_5_REV; - break; - case SDL_PIXELFORMAT_RGB565: - *internalFormat = GL_RGB8; - *format = GL_RGB; - *type = GL_UNSIGNED_SHORT_5_6_5; - break; - case SDL_PIXELFORMAT_RGB24: - *internalFormat = GL_RGB8; - *format = GL_RGB; - *type = GL_UNSIGNED_BYTE; - break; case SDL_PIXELFORMAT_RGB888: - *internalFormat = GL_RGB8; - *format = GL_BGRA; - *type = GL_UNSIGNED_BYTE; - break; - case SDL_PIXELFORMAT_BGR24: - *internalFormat = GL_RGB8; - *format = GL_BGR; - *type = GL_UNSIGNED_BYTE; - break; - case SDL_PIXELFORMAT_BGR888: - *internalFormat = GL_RGB8; - *format = GL_RGBA; - *type = GL_UNSIGNED_BYTE; - break; case SDL_PIXELFORMAT_ARGB8888: -#ifdef __MACOSX__ - *internalFormat = GL_RGBA; - *format = GL_BGRA; - *type = GL_UNSIGNED_INT_8_8_8_8_REV; -#else *internalFormat = GL_RGBA8; *format = GL_BGRA; - *type = GL_UNSIGNED_BYTE; -#endif - break; - case SDL_PIXELFORMAT_ABGR8888: - *internalFormat = GL_RGBA8; - *format = GL_RGBA; - *type = GL_UNSIGNED_BYTE; - break; - case SDL_PIXELFORMAT_ARGB2101010: - *internalFormat = GL_RGB10_A2; - *format = GL_BGRA; - *type = GL_UNSIGNED_INT_2_10_10_10_REV; - break; - case SDL_PIXELFORMAT_UYVY: - if (renderdata->GL_APPLE_ycbcr_422_supported) { - *internalFormat = GL_RGB; - *format = GL_YCBCR_422_APPLE; -#if SDL_BYTEORDER == SDL_LIL_ENDIAN - *type = GL_UNSIGNED_SHORT_8_8_APPLE; -#else - *type = GL_UNSIGNED_SHORT_8_8_REV_APPLE; -#endif - } else if (renderdata->GL_MESA_ycbcr_texture_supported) { - *internalFormat = GL_YCBCR_MESA; - *format = GL_YCBCR_MESA; -#if SDL_BYTEORDER == SDL_LIL_ENDIAN - *type = GL_UNSIGNED_SHORT_8_8_MESA; -#else - *type = GL_UNSIGNED_SHORT_8_8_REV_MESA; -#endif - } else if (renderdata->GL_ARB_fragment_program_supported) { - *internalFormat = GL_RGBA; - *format = GL_RGBA; - *type = GL_UNSIGNED_BYTE; - } else { - return SDL_FALSE; - } - break; - case SDL_PIXELFORMAT_YUY2: - if (renderdata->GL_APPLE_ycbcr_422_supported) { - *internalFormat = GL_RGB; - *format = GL_YCBCR_422_APPLE; -#if SDL_BYTEORDER == SDL_LIL_ENDIAN - *type = GL_UNSIGNED_SHORT_8_8_REV_APPLE; -#else - *type = GL_UNSIGNED_SHORT_8_8_APPLE; -#endif - } else if (renderdata->GL_MESA_ycbcr_texture_supported) { - *internalFormat = GL_YCBCR_MESA; - *format = GL_YCBCR_MESA; -#if SDL_BYTEORDER == SDL_LIL_ENDIAN - *type = GL_UNSIGNED_SHORT_8_8_REV_MESA; -#else - *type = GL_UNSIGNED_SHORT_8_8_MESA; -#endif - } else { - return SDL_FALSE; - } + *type = GL_UNSIGNED_INT_8_8_8_8_REV; break; default: return SDL_FALSE; @@ -668,7 +355,6 @@ GLint internalFormat; GLenum format, type; int texture_w, texture_h; - GLuint shader = 0; GLenum result; GL_ActivateRenderer(renderer); @@ -679,21 +365,6 @@ SDL_GetPixelFormatName(texture->format)); return -1; } - if (texture->format == SDL_PIXELFORMAT_UYVY && - !renderdata->GL_APPLE_ycbcr_422_supported && - !renderdata->GL_MESA_ycbcr_texture_supported && - renderdata->GL_ARB_fragment_program_supported) { - if (renderdata->fragment_program_UYVY == 0) { - renderdata->fragment_program_UYVY = - compile_shader(renderdata, GL_FRAGMENT_PROGRAM_ARB, - fragment_program_UYVY_source_code); - if (renderdata->fragment_program_UYVY == 0) { - set_shader_error(renderdata, "UYVY"); - return -1; - } - } - shader = renderdata->fragment_program_UYVY; - } data = (GL_TextureData *) SDL_calloc(1, sizeof(*data)); if (!data) { @@ -701,10 +372,8 @@ return -1; } - data->shader = shader; - if (texture->access == SDL_TEXTUREACCESS_STREAMING) { - data->pitch = texture->w * bytes_per_pixel(texture->format); + data->pitch = texture->w * SDL_BYTESPERPIXEL(texture->format); data->pixels = SDL_malloc(texture->h * data->pitch); if (!data->pixels) { SDL_OutOfMemory(); @@ -731,17 +400,6 @@ data->texh = (GLfloat) texture->h / texture_h; } - /* YUV formats use RGBA but are really two bytes per pixel */ - if (internalFormat == GL_RGBA && bytes_per_pixel(texture->format) < 4) { - texture_w /= 2; - if (data->type == GL_TEXTURE_2D) { - data->texw *= 2.0f; - } - data->HACK_RYAN_FIXME = 2; - } else { - data->HACK_RYAN_FIXME = 1; - } - data->format = format; data->formattype = type; renderdata->glEnable(data->type); @@ -771,22 +429,13 @@ renderdata->glTexParameteri(data->type, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); } -/* This causes a crash in testoverlay for some reason. Apple bug? */ -#if 0 if (texture->access == SDL_TEXTUREACCESS_STREAMING && texture->format == SDL_PIXELFORMAT_ARGB8888) { - /* - if (renderdata->glTextureRangeAPPLE) { - renderdata->glTextureRangeAPPLE(data->type, - texture->h * data->pitch, - data->pixels); - } - */ renderdata->glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); renderdata->glTexImage2D(data->type, 0, internalFormat, texture_w, texture_h, 0, format, type, data->pixels); - } else -#endif + } + else #endif { renderdata->glTexImage2D(data->type, 0, internalFormat, texture_w, @@ -801,26 +450,13 @@ return 0; } -static int -GL_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, - void **pixels, int *pitch) -{ - GL_TextureData *data = (GL_TextureData *) texture->driverdata; - - *pixels = data->pixels; - *pitch = data->pitch; - return 0; -} - static void SetupTextureUpdate(GL_RenderData * renderdata, SDL_Texture * texture, int pitch) { renderdata->glPixelStorei(GL_UNPACK_ALIGNMENT, 1); renderdata->glPixelStorei(GL_UNPACK_ROW_LENGTH, - (pitch / bytes_per_pixel(texture->format)) / - ((GL_TextureData *) texture->driverdata)-> - HACK_RYAN_FIXME); + (pitch / SDL_BYTESPERPIXEL(texture->format))); } static int @@ -851,18 +487,13 @@ static int GL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, void **pixels, - int *pitch) + const SDL_Rect * rect, void **pixels, int *pitch) { GL_TextureData *data = (GL_TextureData *) texture->driverdata; - if (markDirty) { - SDL_AddDirtyRect(&data->dirty, rect); - } - *pixels = (void *) ((Uint8 *) data->pixels + rect->y * data->pitch + - rect->x * bytes_per_pixel(texture->format)); + rect->x * SDL_BYTESPERPIXEL(texture->format)); *pitch = data->pitch; return 0; } @@ -870,18 +501,17 @@ static void GL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture) { -} + GL_RenderData *renderdata = (GL_RenderData *) renderer->driverdata; + GL_TextureData *data = (GL_TextureData *) texture->driverdata; + + GL_ActivateRenderer(renderer); -static void -GL_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, int numrects, - const SDL_Rect * rects) -{ - GL_TextureData *data = (GL_TextureData *) texture->driverdata; - int i; - - for (i = 0; i < numrects; ++i) { - SDL_AddDirtyRect(&data->dirty, &rects[i]); - } + SetupTextureUpdate(renderdata, texture, data->pitch); + renderdata->glEnable(data->type); + renderdata->glBindTexture(data->type, data->texture); + renderdata->glTexSubImage2D(data->type, 0, 0, 0, texture->w, texture->h, + data->format, data->formattype, data->pixels); + renderdata->glDisable(data->type); } static void @@ -1056,28 +686,6 @@ GL_ActivateRenderer(renderer); - if (texturedata->dirty.list) { - SDL_DirtyRect *dirty; - void *pixels; - int bpp = bytes_per_pixel(texture->format); - int pitch = texturedata->pitch; - - SetupTextureUpdate(data, texture, pitch); - data->glEnable(texturedata->type); - data->glBindTexture(texturedata->type, texturedata->texture); - for (dirty = texturedata->dirty.list; dirty; dirty = dirty->next) { - SDL_Rect *rect = &dirty->rect; - pixels = - (void *) ((Uint8 *) texturedata->pixels + rect->y * pitch + - rect->x * bpp); - data->glTexSubImage2D(texturedata->type, 0, rect->x, rect->y, - rect->w / texturedata->HACK_RYAN_FIXME, - rect->h, texturedata->format, - texturedata->formattype, pixels); - } - SDL_ClearDirtyRects(&texturedata->dirty); - } - minx = dstrect->x; miny = dstrect->y; maxx = dstrect->x + dstrect->w; @@ -1106,12 +714,6 @@ GL_SetBlendMode(data, texture->blendMode); - /* Set up the shader for the copy, if any */ - if (texturedata->shader) { - data->glEnable(GL_FRAGMENT_PROGRAM_ARB); - data->glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, texturedata->shader); - } - data->glBegin(GL_TRIANGLE_STRIP); data->glTexCoord2f(minu, minv); data->glVertex2f((GLfloat) minx, (GLfloat) miny); @@ -1123,10 +725,6 @@ data->glVertex2f((GLfloat) maxx, (GLfloat) maxy); data->glEnd(); - if (texturedata->shader) { - data->glDisable(GL_FRAGMENT_PROGRAM_ARB); - } - data->glDisable(texturedata->type); return 0; @@ -1155,13 +753,13 @@ data->glPixelStorei(GL_PACK_ALIGNMENT, 1); data->glPixelStorei(GL_PACK_ROW_LENGTH, - (pitch / bytes_per_pixel(pixel_format))); + (pitch / SDL_BYTESPERPIXEL(pixel_format))); data->glReadPixels(rect->x, (h-rect->y)-rect->h, rect->w, rect->h, format, type, pixels); /* Flip the rows to be top-down */ - length = rect->w * bytes_per_pixel(pixel_format); + length = rect->w * SDL_BYTESPERPIXEL(pixel_format); src = (Uint8*)pixels + (rect->h-1)*pitch; dst = (Uint8*)pixels; tmp = SDL_stack_alloc(Uint8, length); @@ -1201,7 +799,7 @@ data->glPixelStorei(GL_UNPACK_ALIGNMENT, 1); data->glPixelStorei(GL_UNPACK_ROW_LENGTH, - (pitch / bytes_per_pixel(pixel_format))); + (pitch / SDL_BYTESPERPIXEL(pixel_format))); /* Flip the rows to be bottom-up */ length = rect->h * rect->w * pitch; @@ -1244,13 +842,9 @@ if (data->texture) { renderdata->glDeleteTextures(1, &data->texture); } - if (data->palette) { - SDL_free(data->palette); - } if (data->pixels) { SDL_free(data->pixels); } - SDL_FreeDirtyRects(&data->dirty); SDL_free(data); texture->driverdata = NULL; } @@ -1262,16 +856,6 @@ if (data) { if (data->context) { - if (data->GL_ARB_fragment_program_supported) { - data->glDisable(GL_FRAGMENT_PROGRAM_ARB); - data->glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0); - if (data->fragment_program_UYVY && - data->fragment_program_UYVY != ~0) { - data->glDeleteProgramsARB(1, - &data->fragment_program_UYVY); - } - } - /* SDL_GL_MakeCurrent(0, NULL); *//* doesn't do anything */ SDL_GL_DeleteContext(data->context); }
--- a/src/render/opengles/SDL_renderer_gles.c Wed Feb 02 22:55:12 2011 -0800 +++ b/src/render/opengles/SDL_renderer_gles.c Thu Feb 03 00:19:40 2011 -0800 @@ -49,19 +49,13 @@ static void GLES_WindowEvent(SDL_Renderer * renderer, const SDL_WindowEvent *event); static int GLES_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static int GLES_QueryTexturePixels(SDL_Renderer * renderer, - SDL_Texture * texture, void **pixels, - int *pitch); static int GLES_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch); static int GLES_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, - void **pixels, int *pitch); + const SDL_Rect * rect, void **pixels, int *pitch); static void GLES_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static void GLES_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, - int numrects, const SDL_Rect * rects); static int GLES_RenderDrawPoints(SDL_Renderer * renderer, const SDL_Point * points, int count); static int GLES_RenderDrawLines(SDL_Renderer * renderer, @@ -82,15 +76,8 @@ { "opengl_es", (SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED), - 6, - { - /* OpenGL ES 1.x supported formats list */ - SDL_PIXELFORMAT_RGBA4444, - SDL_PIXELFORMAT_RGBA5551, - SDL_PIXELFORMAT_RGB565, - SDL_PIXELFORMAT_RGB24, - SDL_PIXELFORMAT_BGR888, - SDL_PIXELFORMAT_ABGR8888}, + 1, + {SDL_PIXELFORMAT_ABGR8888}, 0, 0} }; @@ -125,7 +112,6 @@ GLenum formattype; void *pixels; int pitch; - SDL_DirtyRectList dirty; } GLES_TextureData; static void @@ -205,11 +191,9 @@ renderer->WindowEvent = GLES_WindowEvent; renderer->CreateTexture = GLES_CreateTexture; - renderer->QueryTexturePixels = GLES_QueryTexturePixels; renderer->UpdateTexture = GLES_UpdateTexture; renderer->LockTexture = GLES_LockTexture; renderer->UnlockTexture = GLES_UnlockTexture; - renderer->DirtyTexture = GLES_DirtyTexture; renderer->RenderDrawPoints = GLES_RenderDrawPoints; renderer->RenderDrawLines = GLES_RenderDrawLines; renderer->RenderFillRects = GLES_RenderFillRects; @@ -343,32 +327,11 @@ GLES_ActivateRenderer(renderer); switch (texture->format) { - case SDL_PIXELFORMAT_RGB24: - internalFormat = GL_RGB; - format = GL_RGB; - type = GL_UNSIGNED_BYTE; - break; - case SDL_PIXELFORMAT_BGR888: case SDL_PIXELFORMAT_ABGR8888: internalFormat = GL_RGBA; format = GL_RGBA; type = GL_UNSIGNED_BYTE; break; - case SDL_PIXELFORMAT_RGB565: - internalFormat = GL_RGB; - format = GL_RGB; - type = GL_UNSIGNED_SHORT_5_6_5; - break; - case SDL_PIXELFORMAT_RGBA5551: - internalFormat = GL_RGBA; - format = GL_RGBA; - type = GL_UNSIGNED_SHORT_5_5_5_1; - break; - case SDL_PIXELFORMAT_RGBA4444: - internalFormat = GL_RGBA; - format = GL_RGBA; - type = GL_UNSIGNED_SHORT_4_4_4_4; - break; default: SDL_SetError("Texture format %s not supported by OpenGL ES", SDL_GetPixelFormatName(texture->format)); @@ -428,23 +391,10 @@ return 0; } -static int -GLES_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, - void **pixels, int *pitch) -{ - GLES_TextureData *data = (GLES_TextureData *) texture->driverdata; - - *pixels = data->pixels; - *pitch = data->pitch; - return 0; -} - static void SetupTextureUpdate(GLES_RenderData * renderdata, SDL_Texture * texture, int pitch) { - GLES_TextureData *data = (GLES_TextureData *) texture->driverdata; - renderdata->glBindTexture(data->type, data->texture); renderdata->glPixelStorei(GL_UNPACK_ALIGNMENT, 1); } @@ -463,8 +413,9 @@ GLES_ActivateRenderer(renderer); renderdata->glGetError(); + SetupTextureUpdate(renderdata, texture, pitch); renderdata->glEnable(data->type); - SetupTextureUpdate(renderdata, texture, pitch); + renderdata->glBindTexture(data->type, data->texture); if( rect->w * bpp == pitch ) { temp_buffer = (void *)pixels; /* No need to reformat */ @@ -498,15 +449,10 @@ static int GLES_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, void **pixels, - int *pitch) + const SDL_Rect * rect, void **pixels, int *pitch) { GLES_TextureData *data = (GLES_TextureData *) texture->driverdata; - if (markDirty) { - SDL_AddDirtyRect(&data->dirty, rect); - } - *pixels = (void *) ((Uint8 *) data->pixels + rect->y * data->pitch + rect->x * SDL_BYTESPERPIXEL(texture->format)); @@ -517,18 +463,18 @@ static void GLES_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture) { -} + GLES_RenderData *renderdata = (GLES_RenderData *) renderer->driverdata; + GLES_TextureData *data = (GLES_TextureData *) texture->driverdata; + + GLES_ActivateRenderer(renderer); -static void -GLES_DirtyTexture(SDL_Renderer * renderer, SDL_Texture * texture, - int numrects, const SDL_Rect * rects) -{ - GLES_TextureData *data = (GLES_TextureData *) texture->driverdata; - int i; - - for (i = 0; i < numrects; ++i) { - SDL_AddDirtyRect(&data->dirty, &rects[i]); - } + SetupTextureUpdate(renderdata, texture, data->pitch); + renderdata->glEnable(data->type); + renderdata->glBindTexture(data->type, data->texture); + renderdata->glTexSubImage2D(data->type, 0, 0, 0, texture->w, + texture->h, data->format, data->formattype, + data->pixels); + renderdata->glDisable(data->type); } static void @@ -676,49 +622,6 @@ data->glEnable(GL_TEXTURE_2D); - if (texturedata->dirty.list) { - SDL_DirtyRect *dirty; - void *pixels; - int bpp = SDL_BYTESPERPIXEL(texture->format); - int pitch = texturedata->pitch; - - SetupTextureUpdate(data, texture, pitch); - - data->glBindTexture(texturedata->type, texturedata->texture); - for (dirty = texturedata->dirty.list; dirty; dirty = dirty->next) { - SDL_Rect *rect = &dirty->rect; - pixels = - (void *) ((Uint8 *) texturedata->pixels + rect->y * pitch + - rect->x * bpp); - /* There is no GL_UNPACK_ROW_LENGTH in OpenGLES - we must do this reformatting ourselves(!) - - maybe it'd be a good idea to keep a temp buffer around - for this purpose rather than allocating it each time - */ - if( rect->x == 0 && rect->w * bpp == pitch ) { - temp_buffer = pixels; /* Updating whole texture, no need to reformat */ - } else { - temp_buffer = SDL_malloc(rect->w * rect->h * bpp); - temp_ptr = temp_buffer; - for (i = 0; i < rect->h; i++) { - SDL_memcpy(temp_ptr, pixels, rect->w * bpp); - temp_ptr += rect->w * bpp; - pixels += pitch; - } - } - - data->glTexSubImage2D(texturedata->type, 0, rect->x, rect->y, - rect->w, rect->h, texturedata->format, - texturedata->formattype, temp_buffer); - - if( temp_buffer != pixels ) { - SDL_free(temp_buffer); - } - } - SDL_ClearDirtyRects(&texturedata->dirty); - } - data->glBindTexture(texturedata->type, texturedata->texture); if (texture->modMode) { @@ -818,7 +721,6 @@ if (data->pixels) { SDL_free(data->pixels); } - SDL_FreeDirtyRects(&data->dirty); SDL_free(data); texture->driverdata = NULL; }
--- a/src/render/software/SDL_renderer_sw.c Wed Feb 02 22:55:12 2011 -0800 +++ b/src/render/software/SDL_renderer_sw.c Thu Feb 03 00:19:40 2011 -0800 @@ -23,7 +23,6 @@ #include "../SDL_sysrender.h" #include "../../video/SDL_pixels_c.h" -#include "../../video/SDL_yuv_sw_c.h" /* SDL surface based renderer implementation */ @@ -32,9 +31,6 @@ static void SW_WindowEvent(SDL_Renderer * renderer, const SDL_WindowEvent *event); static int SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture); -static int SW_QueryTexturePixels(SDL_Renderer * renderer, - SDL_Texture * texture, void **pixels, - int *pitch); static int SW_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture); static int SW_SetTextureAlphaMod(SDL_Renderer * renderer, @@ -45,8 +41,7 @@ const SDL_Rect * rect, const void *pixels, int pitch); static int SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, void **pixels, - int *pitch); + const SDL_Rect * rect, void **pixels, int *pitch); static void SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture); static int SW_RenderDrawPoints(SDL_Renderer * renderer, const SDL_Point * points, int count); @@ -70,7 +65,7 @@ { "software", (SDL_RENDERER_PRESENTVSYNC), - 13, + 8, { SDL_PIXELFORMAT_RGB555, SDL_PIXELFORMAT_RGB565, @@ -79,12 +74,8 @@ SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_RGBA8888, SDL_PIXELFORMAT_ABGR8888, - SDL_PIXELFORMAT_BGRA8888, - SDL_PIXELFORMAT_YV12, - SDL_PIXELFORMAT_IYUV, - SDL_PIXELFORMAT_YUY2, - SDL_PIXELFORMAT_UYVY, - SDL_PIXELFORMAT_YVYU}, + SDL_PIXELFORMAT_BGRA8888 + }, 0, 0} }; @@ -96,7 +87,6 @@ SDL_Texture *texture; SDL_Surface surface; SDL_Renderer *renderer; - SDL_DirtyRectList dirty; } SW_RenderData; static SDL_Texture * @@ -136,6 +126,7 @@ SDL_Renderer *renderer; SW_RenderData *data; int i; + int w, h; Uint32 format; int bpp; Uint32 Rmask, Gmask, Bmask, Amask; @@ -163,7 +154,6 @@ } renderer->WindowEvent = SW_WindowEvent; renderer->CreateTexture = SW_CreateTexture; - renderer->QueryTexturePixels = SW_QueryTexturePixels; renderer->SetTextureColorMod = SW_SetTextureColorMod; renderer->SetTextureAlphaMod = SW_SetTextureAlphaMod; renderer->SetTextureBlendMode = SW_SetTextureBlendMode; @@ -217,8 +207,8 @@ } /* Create the textures we'll use for display */ - data->texture = - CreateTexture(data->renderer, data->format, window->w, window->h); + SDL_GetWindowSize(window, &w, &h); + data->texture = CreateTexture(data->renderer, data->format, w, h); if (!data->texture) { SW_DestroyRenderer(renderer); return NULL; @@ -243,11 +233,12 @@ if (data->updateSize) { /* Recreate the textures for the new window size */ + int w, h; if (data->texture) { DestroyTexture(data->renderer, data->texture); } - data->texture = CreateTexture(data->renderer, data->format, - window->w, window->h); + SDL_GetWindowSize(window, &w, &h); + data->texture = CreateTexture(data->renderer, data->format, w, h); if (data->texture) { data->updateSize = SDL_FALSE; } @@ -268,30 +259,25 @@ static int SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture) { - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - texture->driverdata = - SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h); - } else { - int bpp; - Uint32 Rmask, Gmask, Bmask, Amask; + int bpp; + Uint32 Rmask, Gmask, Bmask, Amask; + + if (!SDL_PixelFormatEnumToMasks + (texture->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) { + SDL_SetError("Unknown texture format"); + return -1; + } - if (!SDL_PixelFormatEnumToMasks - (texture->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) { - SDL_SetError("Unknown texture format"); - return -1; - } + texture->driverdata = + SDL_CreateRGBSurface(0, texture->w, texture->h, bpp, Rmask, Gmask, + Bmask, Amask); + SDL_SetSurfaceColorMod(texture->driverdata, texture->r, texture->g, + texture->b); + SDL_SetSurfaceAlphaMod(texture->driverdata, texture->a); + SDL_SetSurfaceBlendMode(texture->driverdata, texture->blendMode); - texture->driverdata = - SDL_CreateRGBSurface(0, texture->w, texture->h, bpp, Rmask, Gmask, - Bmask, Amask); - SDL_SetSurfaceColorMod(texture->driverdata, texture->r, texture->g, - texture->b); - SDL_SetSurfaceAlphaMod(texture->driverdata, texture->a); - SDL_SetSurfaceBlendMode(texture->driverdata, texture->blendMode); - - if (texture->access == SDL_TEXTUREACCESS_STATIC) { - SDL_SetSurfaceRLE(texture->driverdata, 1); - } + if (texture->access == SDL_TEXTUREACCESS_STATIC) { + SDL_SetSurfaceRLE(texture->driverdata, 1); } if (!texture->driverdata) { @@ -301,23 +287,6 @@ } static int -SW_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, - void **pixels, int *pitch) -{ - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - return SDL_SW_QueryYUVTexturePixels((SDL_SW_YUVTexture *) - texture->driverdata, pixels, - pitch); - } else { - SDL_Surface *surface = (SDL_Surface *) texture->driverdata; - - *pixels = surface->pixels; - *pitch = surface->pitch; - return 0; - } -} - -static int SW_SetTextureColorMod(SDL_Renderer * renderer, SDL_Texture * texture) { SDL_Surface *surface = (SDL_Surface *) texture->driverdata; @@ -343,56 +312,40 @@ SW_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch) { - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - return SDL_SW_UpdateYUVTexture((SDL_SW_YUVTexture *) - texture->driverdata, rect, pixels, - pitch); - } else { - SDL_Surface *surface = (SDL_Surface *) texture->driverdata; - Uint8 *src, *dst; - int row; - size_t length; + SDL_Surface *surface = (SDL_Surface *) texture->driverdata; + Uint8 *src, *dst; + int row; + size_t length; - src = (Uint8 *) pixels; - dst = - (Uint8 *) surface->pixels + rect->y * surface->pitch + - rect->x * surface->format->BytesPerPixel; - length = rect->w * surface->format->BytesPerPixel; - for (row = 0; row < rect->h; ++row) { - SDL_memcpy(dst, src, length); - src += pitch; - dst += surface->pitch; - } - return 0; + src = (Uint8 *) pixels; + dst = (Uint8 *) surface->pixels + + rect->y * surface->pitch + + rect->x * surface->format->BytesPerPixel; + length = rect->w * surface->format->BytesPerPixel; + for (row = 0; row < rect->h; ++row) { + SDL_memcpy(dst, src, length); + src += pitch; + dst += surface->pitch; } + return 0; } static int SW_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, - const SDL_Rect * rect, int markDirty, void **pixels, - int *pitch) + const SDL_Rect * rect, void **pixels, int *pitch) { - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - return SDL_SW_LockYUVTexture((SDL_SW_YUVTexture *) - texture->driverdata, rect, markDirty, - pixels, pitch); - } else { - SDL_Surface *surface = (SDL_Surface *) texture->driverdata; + SDL_Surface *surface = (SDL_Surface *) texture->driverdata; - *pixels = - (void *) ((Uint8 *) surface->pixels + rect->y * surface->pitch + - rect->x * surface->format->BytesPerPixel); - *pitch = surface->pitch; - return 0; - } + *pixels = + (void *) ((Uint8 *) surface->pixels + rect->y * surface->pitch + + rect->x * surface->format->BytesPerPixel); + *pitch = surface->pitch; + return 0; } static void SW_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture) { - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - SDL_SW_UnlockYUVTexture((SDL_SW_YUVTexture *) texture->driverdata); - } } static int @@ -420,7 +373,7 @@ return 0; } - if (data->renderer->LockTexture(data->renderer, texture, &rect, 1, + if (data->renderer->LockTexture(data->renderer, texture, &rect, &data->surface.pixels, &data->surface.pitch) < 0) { return -1; @@ -484,7 +437,7 @@ return 0; } - if (data->renderer->LockTexture(data->renderer, texture, &rect, 1, + if (data->renderer->LockTexture(data->renderer, texture, &rect, &data->surface.pixels, &data->surface.pitch) < 0) { return -1; @@ -558,7 +511,7 @@ continue; } - if (data->renderer->LockTexture(data->renderer, texture, &rect, 1, + if (data->renderer->LockTexture(data->renderer, texture, &rect, &data->surface.pixels, &data->surface.pitch) < 0) { return -1; @@ -586,38 +539,31 @@ const SDL_Rect * srcrect, const SDL_Rect * dstrect) { SW_RenderData *data = (SW_RenderData *) renderer->driverdata; + SDL_Surface *surface; + SDL_Rect real_srcrect; + SDL_Rect real_dstrect; int status; if (!SW_ActivateRenderer(renderer)) { return -1; } - if (data->renderer->LockTexture(data->renderer, data->texture, - dstrect, 1, &data->surface.pixels, + if (data->renderer->LockTexture(data->renderer, data->texture, dstrect, + &data->surface.pixels, &data->surface.pitch) < 0) { return -1; } - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - status = - SDL_SW_CopyYUVToRGB((SDL_SW_YUVTexture *) texture->driverdata, - srcrect, data->format, dstrect->w, dstrect->h, - data->surface.pixels, data->surface.pitch); - } else { - SDL_Surface *surface = (SDL_Surface *) texture->driverdata; - SDL_Rect real_srcrect = *srcrect; - SDL_Rect real_dstrect; + surface = (SDL_Surface *) texture->driverdata; + real_srcrect = *srcrect; - data->surface.w = dstrect->w; - data->surface.h = dstrect->h; - data->surface.clip_rect.w = dstrect->w; - data->surface.clip_rect.h = dstrect->h; - real_dstrect = data->surface.clip_rect; + data->surface.w = dstrect->w; + data->surface.h = dstrect->h; + data->surface.clip_rect.w = dstrect->w; + data->surface.clip_rect.h = dstrect->h; + real_dstrect = data->surface.clip_rect; - status = - SDL_LowerBlit(surface, &real_srcrect, &data->surface, - &real_dstrect); - } + status = SDL_LowerBlit(surface, &real_srcrect, &data->surface, &real_dstrect); data->renderer->UnlockTexture(data->renderer, data->texture); return status; } @@ -632,8 +578,8 @@ return -1; } - if (data->renderer->LockTexture(data->renderer, data->texture, - rect, 0, &data->surface.pixels, + if (data->renderer->LockTexture(data->renderer, data->texture, rect, + &data->surface.pixels, &data->surface.pitch) < 0) { return -1; } @@ -656,8 +602,8 @@ return -1; } - if (data->renderer->LockTexture(data->renderer, data->texture, - rect, 1, &data->surface.pixels, + if (data->renderer->LockTexture(data->renderer, data->texture, rect, + &data->surface.pixels, &data->surface.pitch) < 0) { return -1; } @@ -692,13 +638,9 @@ static void SW_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture) { - if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) { - SDL_SW_DestroyYUVTexture((SDL_SW_YUVTexture *) texture->driverdata); - } else { - SDL_Surface *surface = (SDL_Surface *) texture->driverdata; + SDL_Surface *surface = (SDL_Surface *) texture->driverdata; - SDL_FreeSurface(surface); - } + SDL_FreeSurface(surface); } static void @@ -717,7 +659,6 @@ if (data->renderer) { data->renderer->DestroyRenderer(data->renderer); } - SDL_FreeDirtyRects(&data->dirty); SDL_free(data); } SDL_free(renderer);
--- a/src/video/SDL_leaks.h Wed Feb 02 22:55:12 2011 -0800 +++ b/src/video/SDL_leaks.h Thu Feb 03 00:19:40 2011 -0800 @@ -29,4 +29,5 @@ #ifdef CHECK_LEAKS extern int surfaces_allocated; #endif + /* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_rect.c Wed Feb 02 22:55:12 2011 -0800 +++ b/src/video/SDL_rect.c Thu Feb 03 00:19:40 2011 -0800 @@ -339,66 +339,4 @@ return SDL_TRUE; } -void -SDL_AddDirtyRect(SDL_DirtyRectList * list, const SDL_Rect * rect) -{ - SDL_DirtyRect *dirty; - - /* FIXME: At what point is this optimization too expensive? */ - for (dirty = list->list; dirty; dirty = dirty->next) { - if (SDL_HasIntersection(&dirty->rect, rect)) { - SDL_UnionRect(&dirty->rect, rect, &dirty->rect); - return; - } - } - - if (list->free) { - dirty = list->free; - list->free = dirty->next; - } else { - dirty = (SDL_DirtyRect *) SDL_malloc(sizeof(*dirty)); - if (!dirty) { - return; - } - } - dirty->rect = *rect; - dirty->next = list->list; - list->list = dirty; -} - -void -SDL_ClearDirtyRects(SDL_DirtyRectList * list) -{ - SDL_DirtyRect *prev, *curr; - - /* Skip to the end of the free list */ - prev = NULL; - for (curr = list->free; curr; curr = curr->next) { - prev = curr; - } - - /* Add the list entries to the end */ - if (prev) { - prev->next = list->list; - } else { - list->free = list->list; - } - list->list = NULL; -} - -void -SDL_FreeDirtyRects(SDL_DirtyRectList * list) -{ - while (list->list) { - SDL_DirtyRect *elem = list->list; - list->list = elem->next; - SDL_free(elem); - } - while (list->free) { - SDL_DirtyRect *elem = list->free; - list->free = elem->next; - SDL_free(elem); - } -} - /* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_mmx.c Wed Feb 02 22:55:12 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,432 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES - -#include "SDL_stdinc.h" - -#include "mmx.h" - -/* *INDENT-OFF* */ - -static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} }; -static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} }; -static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} }; - -static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} }; - -static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} }; -static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} }; -static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} }; -static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} }; - -static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} }; -static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} }; -static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} }; -static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} }; - -static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} }; -static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} }; - -/** - This MMX assembler is my first assembler/MMX program ever. - Thus it maybe buggy. - Send patches to: - mvogt@rhrk.uni-kl.de - - After it worked fine I have "obfuscated" the code a bit to have - more parallism in the MMX units. This means I moved - initilisation around and delayed other instruction. - Performance measurement did not show that this brought any advantage - but in theory it _should_ be faster this way. - - The overall performanve gain to the C based dither was 30%-40%. - The MMX routine calculates 256bit=8RGB values in each cycle - (4 for row1 & 4 for row2) - - The red/green/blue.. coefficents are taken from the mpeg_play - player. They look nice, but I dont know if you can have - better values, to avoid integer rounding errors. - - - IMPORTANT: - ========== - - It is a requirement that the cr/cb/lum are 8 byte aligned and - the out are 16byte aligned or you will/may get segfaults - -*/ - -void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod ) -{ - Uint32 *row1; - Uint32 *row2; - - unsigned char* y = lum +cols*rows; // Pointer to the end - int x = 0; - row1 = (Uint32 *)out; // 32 bit target - row2 = (Uint32 *)out+cols+mod; // start of second row - mod = (mod+cols+mod)*4; // increment for row1 in byte - - __asm__ __volatile__ ( - // tap dance to workaround the inability to use %%ebx at will... - // move one thing to the stack... - "pushl $0\n" // save a slot on the stack. - "pushl %%ebx\n" // save %%ebx. - "movl %0, %%ebx\n" // put the thing in ebx. - "movl %%ebx,4(%%esp)\n" // put the thing in the stack slot. - "popl %%ebx\n" // get back %%ebx (the PIC register). - - ".align 8\n" - "1:\n" - - // create Cr (result in mm1) - "pushl %%ebx\n" - "movl 4(%%esp),%%ebx\n" - "movd (%%ebx),%%mm1\n" // 0 0 0 0 v3 v2 v1 v0 - "popl %%ebx\n" - "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00 - "movd (%2), %%mm2\n" // 0 0 0 0 l3 l2 l1 l0 - "punpcklbw %%mm7,%%mm1\n" // 0 v3 0 v2 00 v1 00 v0 - "punpckldq %%mm1,%%mm1\n" // 00 v1 00 v0 00 v1 00 v0 - "psubw %9,%%mm1\n" // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 - - // create Cr_g (result in mm0) - "movq %%mm1,%%mm0\n" // r1 r1 r0 r0 r1 r1 r0 r0 - "pmullw %10,%%mm0\n" // red*-46dec=0.7136*64 - "pmullw %11,%%mm1\n" // red*89dec=1.4013*64 - "psraw $6, %%mm0\n" // red=red/64 - "psraw $6, %%mm1\n" // red=red/64 - - // create L1 L2 (result in mm2,mm4) - // L2=lum+cols - "movq (%2,%4),%%mm3\n" // 0 0 0 0 L3 L2 L1 L0 - "punpckldq %%mm3,%%mm2\n" // L3 L2 L1 L0 l3 l2 l1 l0 - "movq %%mm2,%%mm4\n" // L3 L2 L1 L0 l3 l2 l1 l0 - "pand %12,%%mm2\n" // L3 0 L1 0 l3 0 l1 0 - "pand %13,%%mm4\n" // 0 L2 0 L0 0 l2 0 l0 - "psrlw $8,%%mm2\n" // 0 L3 0 L1 0 l3 0 l1 - - // create R (result in mm6) - "movq %%mm2,%%mm5\n" // 0 L3 0 L1 0 l3 0 l1 - "movq %%mm4,%%mm6\n" // 0 L2 0 L0 0 l2 0 l0 - "paddsw %%mm1, %%mm5\n" // lum1+red:x R3 x R1 x r3 x r1 - "paddsw %%mm1, %%mm6\n" // lum1+red:x R2 x R0 x r2 x r0 - "packuswb %%mm5,%%mm5\n" // R3 R1 r3 r1 R3 R1 r3 r1 - "packuswb %%mm6,%%mm6\n" // R2 R0 r2 r0 R2 R0 r2 r0 - "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00 - "punpcklbw %%mm5,%%mm6\n" // R3 R2 R1 R0 r3 r2 r1 r0 - - // create Cb (result in mm1) - "movd (%1), %%mm1\n" // 0 0 0 0 u3 u2 u1 u0 - "punpcklbw %%mm7,%%mm1\n" // 0 u3 0 u2 00 u1 00 u0 - "punpckldq %%mm1,%%mm1\n" // 00 u1 00 u0 00 u1 00 u0 - "psubw %9,%%mm1\n" // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 - - // create Cb_g (result in mm5) - "movq %%mm1,%%mm5\n" // u1 u1 u0 u0 u1 u1 u0 u0 - "pmullw %14,%%mm5\n" // blue*-109dec=1.7129*64 - "pmullw %15,%%mm1\n" // blue*114dec=1.78125*64 - "psraw $6, %%mm5\n" // blue=red/64 - "psraw $6, %%mm1\n" // blue=blue/64 - - // create G (result in mm7) - "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1 - "movq %%mm4,%%mm7\n" // 0 L2 0 L0 0 l2 0 l1 - "paddsw %%mm5, %%mm3\n" // lum1+Cb_g:x G3t x G1t x g3t x g1t - "paddsw %%mm5, %%mm7\n" // lum1+Cb_g:x G2t x G0t x g2t x g0t - "paddsw %%mm0, %%mm3\n" // lum1+Cr_g:x G3 x G1 x g3 x g1 - "paddsw %%mm0, %%mm7\n" // lum1+blue:x G2 x G0 x g2 x g0 - "packuswb %%mm3,%%mm3\n" // G3 G1 g3 g1 G3 G1 g3 g1 - "packuswb %%mm7,%%mm7\n" // G2 G0 g2 g0 G2 G0 g2 g0 - "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0 - - // create B (result in mm5) - "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1 - "movq %%mm4,%%mm5\n" // 0 L2 0 L0 0 l2 0 l1 - "paddsw %%mm1, %%mm3\n" // lum1+blue:x B3 x B1 x b3 x b1 - "paddsw %%mm1, %%mm5\n" // lum1+blue:x B2 x B0 x b2 x b0 - "packuswb %%mm3,%%mm3\n" // B3 B1 b3 b1 B3 B1 b3 b1 - "packuswb %%mm5,%%mm5\n" // B2 B0 b2 b0 B2 B0 b2 b0 - "punpcklbw %%mm3,%%mm5\n" // B3 B2 B1 B0 b3 b2 b1 b0 - - // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb) - - "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 - "pxor %%mm4,%%mm4\n" // 0 0 0 0 0 0 0 0 - "movq %%mm6,%%mm1\n" // R3 R2 R1 R0 r3 r2 r1 r0 - "movq %%mm5,%%mm3\n" // B3 B2 B1 B0 b3 b2 b1 b0 - - // process lower lum - "punpcklbw %%mm4,%%mm1\n" // 0 r3 0 r2 0 r1 0 r0 - "punpcklbw %%mm4,%%mm3\n" // 0 b3 0 b2 0 b1 0 b0 - "movq %%mm1,%%mm2\n" // 0 r3 0 r2 0 r1 0 r0 - "movq %%mm3,%%mm0\n" // 0 b3 0 b2 0 b1 0 b0 - "punpcklwd %%mm1,%%mm3\n" // 0 r1 0 b1 0 r0 0 b0 - "punpckhwd %%mm2,%%mm0\n" // 0 r3 0 b3 0 r2 0 b2 - - "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 - "movq %%mm7,%%mm1\n" // G3 G2 G1 G0 g3 g2 g1 g0 - "punpcklbw %%mm1,%%mm2\n" // g3 0 g2 0 g1 0 g0 0 - "punpcklwd %%mm4,%%mm2\n" // 0 0 g1 0 0 0 g0 0 - "por %%mm3, %%mm2\n" // 0 r1 g1 b1 0 r0 g0 b0 - "movq %%mm2,(%3)\n" // wrote out ! row1 - - "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 - "punpcklbw %%mm1,%%mm4\n" // g3 0 g2 0 g1 0 g0 0 - "punpckhwd %%mm2,%%mm4\n" // 0 0 g3 0 0 0 g2 0 - "por %%mm0, %%mm4\n" // 0 r3 g3 b3 0 r2 g2 b2 - "movq %%mm4,8(%3)\n" // wrote out ! row1 - - // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb) - // this can be done "destructive" - "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 - "punpckhbw %%mm2,%%mm6\n" // 0 R3 0 R2 0 R1 0 R0 - "punpckhbw %%mm1,%%mm5\n" // G3 B3 G2 B2 G1 B1 G0 B0 - "movq %%mm5,%%mm1\n" // G3 B3 G2 B2 G1 B1 G0 B0 - "punpcklwd %%mm6,%%mm1\n" // 0 R1 G1 B1 0 R0 G0 B0 - "movq %%mm1,(%5)\n" // wrote out ! row2 - "punpckhwd %%mm6,%%mm5\n" // 0 R3 G3 B3 0 R2 G2 B2 - "movq %%mm5,8(%5)\n" // wrote out ! row2 - - "addl $4,%2\n" // lum+4 - "leal 16(%3),%3\n" // row1+16 - "leal 16(%5),%5\n" // row2+16 - "addl $2,(%%esp)\n" // cr+2 - "addl $2,%1\n" // cb+2 - - "addl $4,%6\n" // x+4 - "cmpl %4,%6\n" - - "jl 1b\n" - "addl %4,%2\n" // lum += cols - "addl %8,%3\n" // row1+= mod - "addl %8,%5\n" // row2+= mod - "movl $0,%6\n" // x=0 - "cmpl %7,%2\n" - "jl 1b\n" - - "addl $4,%%esp\n" // get rid of the stack slot we reserved. - "emms\n" // reset MMX registers. - : - : "m" (cr), "r"(cb),"r"(lum), - "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod), - "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB), - "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB), - "m"(MMX_UbluRGB) - ); -} - -void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod ) -{ - Uint16 *row1; - Uint16 *row2; - - unsigned char* y = lum +cols*rows; /* Pointer to the end */ - int x = 0; - row1 = (Uint16 *)out; /* 16 bit target */ - row2 = (Uint16 *)out+cols+mod; /* start of second row */ - mod = (mod+cols+mod)*2; /* increment for row1 in byte */ - - __asm__ __volatile__( - // tap dance to workaround the inability to use %%ebx at will... - // move one thing to the stack... - "pushl $0\n" // save a slot on the stack. - "pushl %%ebx\n" // save %%ebx. - "movl %0, %%ebx\n" // put the thing in ebx. - "movl %%ebx, 4(%%esp)\n" // put the thing in the stack slot. - "popl %%ebx\n" // get back %%ebx (the PIC register). - - ".align 8\n" - "1:\n" - - "movd (%1), %%mm0\n" // 4 Cb 0 0 0 0 u3 u2 u1 u0 - "pxor %%mm7, %%mm7\n" - "pushl %%ebx\n" - "movl 4(%%esp), %%ebx\n" - "movd (%%ebx), %%mm1\n" // 4 Cr 0 0 0 0 v3 v2 v1 v0 - "popl %%ebx\n" - - "punpcklbw %%mm7, %%mm0\n" // 4 W cb 0 u3 0 u2 0 u1 0 u0 - "punpcklbw %%mm7, %%mm1\n" // 4 W cr 0 v3 0 v2 0 v1 0 v0 - "psubw %9, %%mm0\n" - "psubw %9, %%mm1\n" - "movq %%mm0, %%mm2\n" // Cb 0 u3 0 u2 0 u1 0 u0 - "movq %%mm1, %%mm3\n" // Cr - "pmullw %10, %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0 - "movq (%2), %%mm6\n" // L1 l7 L6 L5 L4 L3 L2 L1 L0 - "pmullw %11, %%mm0\n" // Cb2blue - "pand %12, %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0 - "pmullw %13, %%mm3\n" // Cr2green - "movq (%2), %%mm7\n" // L2 - "pmullw %14, %%mm1\n" // Cr2red - "psrlw $8, %%mm7\n" // L2 00 L7 00 L5 00 L3 00 L1 - "pmullw %15, %%mm6\n" // lum1 - "paddw %%mm3, %%mm2\n" // Cb2green + Cr2green == green - "pmullw %15, %%mm7\n" // lum2 - - "movq %%mm6, %%mm4\n" // lum1 - "paddw %%mm0, %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0 - "movq %%mm4, %%mm5\n" // lum1 - "paddw %%mm1, %%mm4\n" // lum1 +red 00 R6 00 R4 00 R2 00 R0 - "paddw %%mm2, %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0 - "psraw $6, %%mm4\n" // R1 0 .. 64 - "movq %%mm7, %%mm3\n" // lum2 00 L7 00 L5 00 L3 00 L1 - "psraw $6, %%mm5\n" // G1 - .. + - "paddw %%mm0, %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1 - "psraw $6, %%mm6\n" // B1 0 .. 64 - "packuswb %%mm4, %%mm4\n" // R1 R1 - "packuswb %%mm5, %%mm5\n" // G1 G1 - "packuswb %%mm6, %%mm6\n" // B1 B1 - "punpcklbw %%mm4, %%mm4\n" - "punpcklbw %%mm5, %%mm5\n" - - "pand %16, %%mm4\n" - "psllw $3, %%mm5\n" // GREEN 1 - "punpcklbw %%mm6, %%mm6\n" - "pand %17, %%mm5\n" - "pand %16, %%mm6\n" - "por %%mm5, %%mm4\n" // - "psrlw $11, %%mm6\n" // BLUE 1 - "movq %%mm3, %%mm5\n" // lum2 - "paddw %%mm1, %%mm3\n" // lum2 +red 00 R7 00 R5 00 R3 00 R1 - "paddw %%mm2, %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1 - "psraw $6, %%mm3\n" // R2 - "por %%mm6, %%mm4\n" // MM4 - "psraw $6, %%mm5\n" // G2 - "movq (%2, %4), %%mm6\n" // L3 load lum2 - "psraw $6, %%mm7\n" - "packuswb %%mm3, %%mm3\n" - "packuswb %%mm5, %%mm5\n" - "packuswb %%mm7, %%mm7\n" - "pand %12, %%mm6\n" // L3 - "punpcklbw %%mm3, %%mm3\n" - "punpcklbw %%mm5, %%mm5\n" - "pmullw %15, %%mm6\n" // lum3 - "punpcklbw %%mm7, %%mm7\n" - "psllw $3, %%mm5\n" // GREEN 2 - "pand %16, %%mm7\n" - "pand %16, %%mm3\n" - "psrlw $11, %%mm7\n" // BLUE 2 - "pand %17, %%mm5\n" - "por %%mm7, %%mm3\n" - "movq (%2,%4), %%mm7\n" // L4 load lum2 - "por %%mm5, %%mm3\n" // - "psrlw $8, %%mm7\n" // L4 - "movq %%mm4, %%mm5\n" - "punpcklwd %%mm3, %%mm4\n" - "pmullw %15, %%mm7\n" // lum4 - "punpckhwd %%mm3, %%mm5\n" - - "movq %%mm4, (%3)\n" // write row1 - "movq %%mm5, 8(%3)\n" // write row1 - - "movq %%mm6, %%mm4\n" // Lum3 - "paddw %%mm0, %%mm6\n" // Lum3 +blue - - "movq %%mm4, %%mm5\n" // Lum3 - "paddw %%mm1, %%mm4\n" // Lum3 +red - "paddw %%mm2, %%mm5\n" // Lum3 +green - "psraw $6, %%mm4\n" - "movq %%mm7, %%mm3\n" // Lum4 - "psraw $6, %%mm5\n" - "paddw %%mm0, %%mm7\n" // Lum4 +blue - "psraw $6, %%mm6\n" // Lum3 +blue - "movq %%mm3, %%mm0\n" // Lum4 - "packuswb %%mm4, %%mm4\n" - "paddw %%mm1, %%mm3\n" // Lum4 +red - "packuswb %%mm5, %%mm5\n" - "paddw %%mm2, %%mm0\n" // Lum4 +green - "packuswb %%mm6, %%mm6\n" - "punpcklbw %%mm4, %%mm4\n" - "punpcklbw %%mm5, %%mm5\n" - "punpcklbw %%mm6, %%mm6\n" - "psllw $3, %%mm5\n" // GREEN 3 - "pand %16, %%mm4\n" - "psraw $6, %%mm3\n" // psr 6 - "psraw $6, %%mm0\n" - "pand %16, %%mm6\n" // BLUE - "pand %17, %%mm5\n" - "psrlw $11, %%mm6\n" // BLUE 3 - "por %%mm5, %%mm4\n" - "psraw $6, %%mm7\n" - "por %%mm6, %%mm4\n" - "packuswb %%mm3, %%mm3\n" - "packuswb %%mm0, %%mm0\n" - "packuswb %%mm7, %%mm7\n" - "punpcklbw %%mm3, %%mm3\n" - "punpcklbw %%mm0, %%mm0\n" - "punpcklbw %%mm7, %%mm7\n" - "pand %16, %%mm3\n" - "pand %16, %%mm7\n" // BLUE - "psllw $3, %%mm0\n" // GREEN 4 - "psrlw $11, %%mm7\n" - "pand %17, %%mm0\n" - "por %%mm7, %%mm3\n" - "por %%mm0, %%mm3\n" - - "movq %%mm4, %%mm5\n" - - "punpcklwd %%mm3, %%mm4\n" - "punpckhwd %%mm3, %%mm5\n" - - "movq %%mm4, (%5)\n" - "movq %%mm5, 8(%5)\n" - - "addl $8, %6\n" - "addl $8, %2\n" - "addl $4, (%%esp)\n" - "addl $4, %1\n" - "cmpl %4, %6\n" - "leal 16(%3), %3\n" - "leal 16(%5),%5\n" // row2+16 - - "jl 1b\n" - "addl %4, %2\n" // lum += cols - "addl %8, %3\n" // row1+= mod - "addl %8, %5\n" // row2+= mod - "movl $0, %6\n" // x=0 - "cmpl %7, %2\n" - "jl 1b\n" - "addl $4, %%esp\n" // get rid of the stack slot we reserved. - "emms\n" - : - : "m" (cr), "r"(cb),"r"(lum), - "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod), - "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5), - "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5), - "m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565) - ); -} - -/* *INDENT-ON* */ - -#endif /* GCC3 i386 inline assembly */ - -/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_sw.c Wed Feb 02 22:55:12 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1322 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -/* This is the software implementation of the YUV texture support */ - -/* This code was derived from code carrying the following copyright notices: - - * Copyright (c) 1995 The Regents of the University of California. - * All rights reserved. - * - * Permission to use, copy, modify, and distribute this software and its - * documentation for any purpose, without fee, and without written agreement is - * hereby granted, provided that the above copyright notice and the following - * two paragraphs appear in all copies of this software. - * - * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT - * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF - * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO - * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - - * Copyright (c) 1995 Erik Corry - * All rights reserved. - * - * Permission to use, copy, modify, and distribute this software and its - * documentation for any purpose, without fee, and without written agreement is - * hereby granted, provided that the above copyright notice and the following - * two paragraphs appear in all copies of this software. - * - * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, - * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF - * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" - * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, - * UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - - * Portions of this software Copyright (c) 1995 Brown University. - * All rights reserved. - * - * Permission to use, copy, modify, and distribute this software and its - * documentation for any purpose, without fee, and without written agreement - * is hereby granted, provided that the above copyright notice and the - * following two paragraphs appear in all copies of this software. - * - * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT - * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN - * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" - * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, - * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - */ - -#include "SDL_video.h" -#include "SDL_cpuinfo.h" -#include "SDL_yuv_sw_c.h" - - -/* The colorspace conversion functions */ - -#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES -extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod); -extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod); -#endif - -static void -Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned short *row1; - unsigned short *row2; - unsigned char *lum2; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - row1 = (unsigned short *) out; - row2 = row1 + cols + mod; - lum2 = lum + cols; - - mod += cols + mod; - - y = rows / 2; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - ++cr; - ++cb; - - L = *lum++; - *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - - L = *lum++; - *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - - - /* Now, do second row. */ - - L = *lum2++; - *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - - L = *lum2++; - *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - } - - /* - * These values are at the start of the next line, (due - * to the ++'s above),but they need to be at the start - * of the line after that. - */ - lum += cols; - lum2 += cols; - row1 += mod; - row2 += mod; - } -} - -static void -Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int value; - unsigned char *row1; - unsigned char *row2; - unsigned char *lum2; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - row1 = out; - row2 = row1 + cols * 3 + mod * 3; - lum2 = lum + cols; - - mod += cols + mod; - mod *= 3; - - y = rows / 2; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - ++cr; - ++cb; - - L = *lum++; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - *row1++ = (value) & 0xFF; - *row1++ = (value >> 8) & 0xFF; - *row1++ = (value >> 16) & 0xFF; - - L = *lum++; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - *row1++ = (value) & 0xFF; - *row1++ = (value >> 8) & 0xFF; - *row1++ = (value >> 16) & 0xFF; - - - /* Now, do second row. */ - - L = *lum2++; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - *row2++ = (value) & 0xFF; - *row2++ = (value >> 8) & 0xFF; - *row2++ = (value >> 16) & 0xFF; - - L = *lum2++; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - *row2++ = (value) & 0xFF; - *row2++ = (value >> 8) & 0xFF; - *row2++ = (value >> 16) & 0xFF; - } - - /* - * These values are at the start of the next line, (due - * to the ++'s above),but they need to be at the start - * of the line after that. - */ - lum += cols; - lum2 += cols; - row1 += mod; - row2 += mod; - } -} - -static void -Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int *row1; - unsigned int *row2; - unsigned char *lum2; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - row1 = (unsigned int *) out; - row2 = row1 + cols + mod; - lum2 = lum + cols; - - mod += cols + mod; - - y = rows / 2; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - ++cr; - ++cb; - - L = *lum++; - *row1++ = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - - L = *lum++; - *row1++ = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - - - /* Now, do second row. */ - - L = *lum2++; - *row2++ = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - - L = *lum2++; - *row2++ = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - } - - /* - * These values are at the start of the next line, (due - * to the ++'s above),but they need to be at the start - * of the line after that. - */ - lum += cols; - lum2 += cols; - row1 += mod; - row2 += mod; - } -} - -/* - * In this function I make use of a nasty trick. The tables have the lower - * 16 bits replicated in the upper 16. This means I can write ints and get - * the horisontal doubling for free (almost). - */ -static void -Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int *row1 = (unsigned int *) out; - const int next_row = cols + (mod / 2); - unsigned int *row2 = row1 + 2 * next_row; - unsigned char *lum2; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - lum2 = lum + cols; - - mod = (next_row * 3) + (mod / 2); - - y = rows / 2; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - ++cr; - ++cb; - - L = *lum++; - row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - row1++; - - L = *lum++; - row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - row1++; - - - /* Now, do second row. */ - - L = *lum2++; - row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - row2++; - - L = *lum2++; - row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - row2++; - } - - /* - * These values are at the start of the next line, (due - * to the ++'s above),but they need to be at the start - * of the line after that. - */ - lum += cols; - lum2 += cols; - row1 += mod; - row2 += mod; - } -} - -static void -Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int value; - unsigned char *row1 = out; - const int next_row = (cols * 2 + mod) * 3; - unsigned char *row2 = row1 + 2 * next_row; - unsigned char *lum2; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - lum2 = lum + cols; - - mod = next_row * 3 + mod * 3; - - y = rows / 2; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - ++cr; - ++cb; - - L = *lum++; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] = - row1[next_row + 3 + 0] = (value) & 0xFF; - row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] = - row1[next_row + 3 + 1] = (value >> 8) & 0xFF; - row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] = - row1[next_row + 3 + 2] = (value >> 16) & 0xFF; - row1 += 2 * 3; - - L = *lum++; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] = - row1[next_row + 3 + 0] = (value) & 0xFF; - row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] = - row1[next_row + 3 + 1] = (value >> 8) & 0xFF; - row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] = - row1[next_row + 3 + 2] = (value >> 16) & 0xFF; - row1 += 2 * 3; - - - /* Now, do second row. */ - - L = *lum2++; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] = - row2[next_row + 3 + 0] = (value) & 0xFF; - row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] = - row2[next_row + 3 + 1] = (value >> 8) & 0xFF; - row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] = - row2[next_row + 3 + 2] = (value >> 16) & 0xFF; - row2 += 2 * 3; - - L = *lum2++; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] = - row2[next_row + 3 + 0] = (value) & 0xFF; - row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] = - row2[next_row + 3 + 1] = (value >> 8) & 0xFF; - row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] = - row2[next_row + 3 + 2] = (value >> 16) & 0xFF; - row2 += 2 * 3; - } - - /* - * These values are at the start of the next line, (due - * to the ++'s above),but they need to be at the start - * of the line after that. - */ - lum += cols; - lum2 += cols; - row1 += mod; - row2 += mod; - } -} - -static void -Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int *row1 = (unsigned int *) out; - const int next_row = cols * 2 + mod; - unsigned int *row2 = row1 + 2 * next_row; - unsigned char *lum2; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - lum2 = lum + cols; - - mod = (next_row * 3) + mod; - - y = rows / 2; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - ++cr; - ++cb; - - L = *lum++; - row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] = - (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row1 += 2; - - L = *lum++; - row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] = - (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row1 += 2; - - - /* Now, do second row. */ - - L = *lum2++; - row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] = - (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row2 += 2; - - L = *lum2++; - row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] = - (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row2 += 2; - } - - /* - * These values are at the start of the next line, (due - * to the ++'s above),but they need to be at the start - * of the line after that. - */ - lum += cols; - lum2 += cols; - row1 += mod; - row2 += mod; - } -} - -static void -Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned short *row; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - row = (unsigned short *) out; - - y = rows; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - cr += 4; - cb += 4; - - L = *lum; - lum += 2; - *row++ = (unsigned short) (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - - L = *lum; - lum += 2; - *row++ = (unsigned short) (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - - } - - row += mod; - } -} - -static void -Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int value; - unsigned char *row; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - row = (unsigned char *) out; - mod *= 3; - y = rows; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - cr += 4; - cb += 4; - - L = *lum; - lum += 2; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - *row++ = (value) & 0xFF; - *row++ = (value >> 8) & 0xFF; - *row++ = (value >> 16) & 0xFF; - - L = *lum; - lum += 2; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - *row++ = (value) & 0xFF; - *row++ = (value >> 8) & 0xFF; - *row++ = (value >> 16) & 0xFF; - - } - row += mod; - } -} - -static void -Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int *row; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - row = (unsigned int *) out; - y = rows; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - cr += 4; - cb += 4; - - L = *lum; - lum += 2; - *row++ = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - - L = *lum; - lum += 2; - *row++ = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - - - } - row += mod; - } -} - -/* - * In this function I make use of a nasty trick. The tables have the lower - * 16 bits replicated in the upper 16. This means I can write ints and get - * the horisontal doubling for free (almost). - */ -static void -Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int *row = (unsigned int *) out; - const int next_row = cols + (mod / 2); - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - - y = rows; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - cr += 4; - cb += 4; - - L = *lum; - lum += 2; - row[0] = row[next_row] = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - row++; - - L = *lum; - lum += 2; - row[0] = row[next_row] = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | - rgb_2_pix[L + cb_b]); - row++; - - } - row += next_row; - } -} - -static void -Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int value; - unsigned char *row = out; - const int next_row = (cols * 2 + mod) * 3; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - y = rows; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - cr += 4; - cb += 4; - - L = *lum; - lum += 2; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row[0 + 0] = row[3 + 0] = row[next_row + 0] = - row[next_row + 3 + 0] = (value) & 0xFF; - row[0 + 1] = row[3 + 1] = row[next_row + 1] = - row[next_row + 3 + 1] = (value >> 8) & 0xFF; - row[0 + 2] = row[3 + 2] = row[next_row + 2] = - row[next_row + 3 + 2] = (value >> 16) & 0xFF; - row += 2 * 3; - - L = *lum; - lum += 2; - value = (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row[0 + 0] = row[3 + 0] = row[next_row + 0] = - row[next_row + 3 + 0] = (value) & 0xFF; - row[0 + 1] = row[3 + 1] = row[next_row + 1] = - row[next_row + 3 + 1] = (value >> 8) & 0xFF; - row[0 + 2] = row[3 + 2] = row[next_row + 2] = - row[next_row + 3 + 2] = (value >> 16) & 0xFF; - row += 2 * 3; - - } - row += next_row; - } -} - -static void -Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod) -{ - unsigned int *row = (unsigned int *) out; - const int next_row = cols * 2 + mod; - int x, y; - int cr_r; - int crb_g; - int cb_b; - int cols_2 = cols / 2; - mod += mod; - y = rows; - while (y--) { - x = cols_2; - while (x--) { - register int L; - - cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256]; - crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256] - + colortab[*cb + 2 * 256]; - cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256]; - cr += 4; - cb += 4; - - L = *lum; - lum += 2; - row[0] = row[1] = row[next_row] = row[next_row + 1] = - (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row += 2; - - L = *lum; - lum += 2; - row[0] = row[1] = row[next_row] = row[next_row + 1] = - (rgb_2_pix[L + cr_r] | - rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]); - row += 2; - - - } - - row += next_row; - } -} - -/* - * How many 1 bits are there in the Uint32. - * Low performance, do not call often. - */ -static int -number_of_bits_set(Uint32 a) -{ - if (!a) - return 0; - if (a & 1) - return 1 + number_of_bits_set(a >> 1); - return (number_of_bits_set(a >> 1)); -} - -/* - * How many 0 bits are there at least significant end of Uint32. - * Low performance, do not call often. - */ -static int -free_bits_at_bottom(Uint32 a) -{ - /* assume char is 8 bits */ - if (!a) - return sizeof(Uint32) * 8; - if (((Sint32) a) & 1l) - return 0; - return 1 + free_bits_at_bottom(a >> 1); -} - -static int -SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format) -{ - Uint32 *r_2_pix_alloc; - Uint32 *g_2_pix_alloc; - Uint32 *b_2_pix_alloc; - int i; - int bpp; - Uint32 Rmask, Gmask, Bmask, Amask; - - if (!SDL_PixelFormatEnumToMasks - (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) { - SDL_SetError("Unsupported YUV destination format"); - return -1; - } - - swdata->target_format = target_format; - r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768]; - g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768]; - b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768]; - - /* - * Set up entries 0-255 in rgb-to-pixel value tables. - */ - for (i = 0; i < 256; ++i) { - r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask)); - r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask); - r_2_pix_alloc[i + 256] |= Amask; - g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask)); - g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask); - g_2_pix_alloc[i + 256] |= Amask; - b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask)); - b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask); - b_2_pix_alloc[i + 256] |= Amask; - } - - /* - * If we have 16-bit output depth, then we double the value - * in the top word. This means that we can write out both - * pixels in the pixel doubling mode with one op. It is - * harmless in the normal case as storing a 32-bit value - * through a short pointer will lose the top bits anyway. - */ - if (SDL_BYTESPERPIXEL(target_format) == 2) { - for (i = 0; i < 256; ++i) { - r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16; - g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16; - b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16; - } - } - - /* - * Spread out the values we have to the rest of the array so that - * we do not need to check for overflow. - */ - for (i = 0; i < 256; ++i) { - r_2_pix_alloc[i] = r_2_pix_alloc[256]; - r_2_pix_alloc[i + 512] = r_2_pix_alloc[511]; - g_2_pix_alloc[i] = g_2_pix_alloc[256]; - g_2_pix_alloc[i + 512] = g_2_pix_alloc[511]; - b_2_pix_alloc[i] = b_2_pix_alloc[256]; - b_2_pix_alloc[i + 512] = b_2_pix_alloc[511]; - } - - /* You have chosen wisely... */ - switch (swdata->format) { - case SDL_PIXELFORMAT_YV12: - case SDL_PIXELFORMAT_IYUV: - if (SDL_BYTESPERPIXEL(target_format) == 2) { -#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES - /* inline assembly functions */ - if (SDL_HasMMX() && (Rmask == 0xF800) && - (Gmask == 0x07E0) && (Bmask == 0x001F) - && (swdata->w & 15) == 0) { -/*printf("Using MMX 16-bit 565 dither\n");*/ - swdata->Display1X = Color565DitherYV12MMX1X; - } else { -/*printf("Using C 16-bit dither\n");*/ - swdata->Display1X = Color16DitherYV12Mod1X; - } -#else - swdata->Display1X = Color16DitherYV12Mod1X; -#endif - swdata->Display2X = Color16DitherYV12Mod2X; - } - if (SDL_BYTESPERPIXEL(target_format) == 3) { - swdata->Display1X = Color24DitherYV12Mod1X; - swdata->Display2X = Color24DitherYV12Mod2X; - } - if (SDL_BYTESPERPIXEL(target_format) == 4) { -#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES - /* inline assembly functions */ - if (SDL_HasMMX() && (Rmask == 0x00FF0000) && - (Gmask == 0x0000FF00) && - (Bmask == 0x000000FF) && (swdata->w & 15) == 0) { -/*printf("Using MMX 32-bit dither\n");*/ - swdata->Display1X = ColorRGBDitherYV12MMX1X; - } else { -/*printf("Using C 32-bit dither\n");*/ - swdata->Display1X = Color32DitherYV12Mod1X; - } -#else - swdata->Display1X = Color32DitherYV12Mod1X; -#endif - swdata->Display2X = Color32DitherYV12Mod2X; - } - break; - case SDL_PIXELFORMAT_YUY2: - case SDL_PIXELFORMAT_UYVY: - case SDL_PIXELFORMAT_YVYU: - if (SDL_BYTESPERPIXEL(target_format) == 2) { - swdata->Display1X = Color16DitherYUY2Mod1X; - swdata->Display2X = Color16DitherYUY2Mod2X; - } - if (SDL_BYTESPERPIXEL(target_format) == 3) { - swdata->Display1X = Color24DitherYUY2Mod1X; - swdata->Display2X = Color24DitherYUY2Mod2X; - } - if (SDL_BYTESPERPIXEL(target_format) == 4) { - swdata->Display1X = Color32DitherYUY2Mod1X; - swdata->Display2X = Color32DitherYUY2Mod2X; - } - break; - default: - /* We should never get here (caught above) */ - break; - } - - if (swdata->display) { - SDL_FreeSurface(swdata->display); - swdata->display = NULL; - } - return 0; -} - -SDL_SW_YUVTexture * -SDL_SW_CreateYUVTexture(Uint32 format, int w, int h) -{ - SDL_SW_YUVTexture *swdata; - int *Cr_r_tab; - int *Cr_g_tab; - int *Cb_g_tab; - int *Cb_b_tab; - int i; - int CR, CB; - - swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata)); - if (!swdata) { - SDL_OutOfMemory(); - return NULL; - } - - switch (format) { - case SDL_PIXELFORMAT_YV12: - case SDL_PIXELFORMAT_IYUV: - case SDL_PIXELFORMAT_YUY2: - case SDL_PIXELFORMAT_UYVY: - case SDL_PIXELFORMAT_YVYU: - break; - default: - SDL_SetError("Unsupported YUV format"); - return NULL; - } - - swdata->format = format; - swdata->target_format = SDL_PIXELFORMAT_UNKNOWN; - swdata->w = w; - swdata->h = h; - swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2); - swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int)); - swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32)); - if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) { - SDL_OutOfMemory(); - SDL_SW_DestroyYUVTexture(swdata); - return NULL; - } - - /* Generate the tables for the display surface */ - Cr_r_tab = &swdata->colortab[0 * 256]; - Cr_g_tab = &swdata->colortab[1 * 256]; - Cb_g_tab = &swdata->colortab[2 * 256]; - Cb_b_tab = &swdata->colortab[3 * 256]; - for (i = 0; i < 256; i++) { - /* Gamma correction (luminescence table) and chroma correction - would be done here. See the Berkeley mpeg_play sources. - */ - CB = CR = (i - 128); - Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR); - Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR); - Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB); - Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB); - } - - /* Find the pitch and offset values for the overlay */ - switch (format) { - case SDL_PIXELFORMAT_YV12: - case SDL_PIXELFORMAT_IYUV: - swdata->pitches[0] = w; - swdata->pitches[1] = swdata->pitches[0] / 2; - swdata->pitches[2] = swdata->pitches[0] / 2; - swdata->planes[0] = swdata->pixels; - swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h; - swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2; - break; - case SDL_PIXELFORMAT_YUY2: - case SDL_PIXELFORMAT_UYVY: - case SDL_PIXELFORMAT_YVYU: - swdata->pitches[0] = w * 2; - swdata->planes[0] = swdata->pixels; - break; - default: - /* We should never get here (caught above) */ - break; - } - - /* We're all done.. */ - return (swdata); -} - -int -SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels, - int *pitch) -{ - *pixels = swdata->planes[0]; - *pitch = swdata->pitches[0]; - return 0; -} - -int -SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, - const void *pixels, int pitch) -{ - switch (swdata->format) { - case SDL_PIXELFORMAT_YV12: - case SDL_PIXELFORMAT_IYUV: - if (rect - && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w - || rect->h != swdata->h)) { - SDL_SetError - ("YV12 and IYUV textures only support full surface updates"); - return -1; - } - SDL_memcpy(swdata->pixels, pixels, swdata->h * swdata->w * 2); - break; - case SDL_PIXELFORMAT_YUY2: - case SDL_PIXELFORMAT_UYVY: - case SDL_PIXELFORMAT_YVYU: - { - Uint8 *src, *dst; - int row; - size_t length; - - src = (Uint8 *) pixels; - dst = - swdata->planes[0] + rect->y * swdata->pitches[0] + - rect->x * 2; - length = rect->w * 2; - for (row = 0; row < rect->h; ++row) { - SDL_memcpy(dst, src, length); - src += pitch; - dst += swdata->pitches[0]; - } - } - break; - } - return 0; -} - -int -SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, - int markDirty, void **pixels, int *pitch) -{ - switch (swdata->format) { - case SDL_PIXELFORMAT_YV12: - case SDL_PIXELFORMAT_IYUV: - if (rect - && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w - || rect->h != swdata->h)) { - SDL_SetError - ("YV12 and IYUV textures only support full surface locks"); - return -1; - } - break; - } - - *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2; - *pitch = swdata->pitches[0]; - return 0; -} - -void -SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata) -{ -} - -int -SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect, - Uint32 target_format, int w, int h, void *pixels, - int pitch) -{ - int stretch; - int scale_2x; - Uint8 *lum, *Cr, *Cb; - int mod; - - /* Make sure we're set up to display in the desired format */ - if (target_format != swdata->target_format) { - if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) { - return -1; - } - } - - stretch = 0; - scale_2x = 0; - if (srcrect->x || srcrect->y || srcrect->w < swdata->w - || srcrect->h < swdata->h) { - /* The source rectangle has been clipped. - Using a scratch surface is easier than adding clipped - source support to all the blitters, plus that would - slow them down in the general unclipped case. - */ - stretch = 1; - } else if ((srcrect->w != w) || (srcrect->h != h)) { - if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) { - scale_2x = 1; - } else { - stretch = 1; - } - } - if (stretch) { - int bpp; - Uint32 Rmask, Gmask, Bmask, Amask; - - if (swdata->display) { - swdata->display->w = w; - swdata->display->h = h; - swdata->display->pixels = pixels; - swdata->display->pitch = pitch; - } else { - /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */ - SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask, - &Bmask, &Amask); - swdata->display = - SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask, - Gmask, Bmask, Amask); - if (!swdata->display) { - return (-1); - } - } - if (!swdata->stretch) { - /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */ - SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask, - &Bmask, &Amask); - swdata->stretch = - SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask, - Gmask, Bmask, Amask); - if (!swdata->stretch) { - return (-1); - } - } - pixels = swdata->stretch->pixels; - pitch = swdata->stretch->pitch; - } - switch (swdata->format) { - case SDL_PIXELFORMAT_YV12: - lum = swdata->planes[0]; - Cr = swdata->planes[1]; - Cb = swdata->planes[2]; - break; - case SDL_PIXELFORMAT_IYUV: - lum = swdata->planes[0]; - Cr = swdata->planes[2]; - Cb = swdata->planes[1]; - break; - case SDL_PIXELFORMAT_YUY2: - lum = swdata->planes[0]; - Cr = lum + 3; - Cb = lum + 1; - break; - case SDL_PIXELFORMAT_UYVY: - lum = swdata->planes[0] + 1; - Cr = lum + 1; - Cb = lum - 1; - break; - case SDL_PIXELFORMAT_YVYU: - lum = swdata->planes[0]; - Cr = lum + 1; - Cb = lum + 3; - break; - default: - SDL_SetError("Unsupported YUV format in copy"); - return (-1); - } - mod = (pitch / SDL_BYTESPERPIXEL(target_format)); - - if (scale_2x) { - mod -= (swdata->w * 2); - swdata->Display2X(swdata->colortab, swdata->rgb_2_pix, - lum, Cr, Cb, pixels, swdata->h, swdata->w, mod); - } else { - mod -= swdata->w; - swdata->Display1X(swdata->colortab, swdata->rgb_2_pix, - lum, Cr, Cb, pixels, swdata->h, swdata->w, mod); - } - if (stretch) { - SDL_Rect rect = *srcrect; - SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL); - } - return 0; -} - -void -SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata) -{ - if (swdata) { - if (swdata->pixels) { - SDL_free(swdata->pixels); - } - if (swdata->colortab) { - SDL_free(swdata->colortab); - } - if (swdata->rgb_2_pix) { - SDL_free(swdata->rgb_2_pix); - } - if (swdata->stretch) { - SDL_FreeSurface(swdata->stretch); - } - if (swdata->display) { - SDL_FreeSurface(swdata->display); - } - SDL_free(swdata); - } -} - -/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/SDL_yuv_sw_c.h Wed Feb 02 22:55:12 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ -/* - SDL - Simple DirectMedia Layer - Copyright (C) 1997-2010 Sam Lantinga - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - Sam Lantinga - slouken@libsdl.org -*/ -#include "SDL_config.h" - -#include "SDL_video.h" -#include "SDL_sysvideo.h" - -/* This is the software implementation of the YUV texture support */ - -struct SDL_SW_YUVTexture -{ - Uint32 format; - Uint32 target_format; - int w, h; - Uint8 *pixels; - int *colortab; - Uint32 *rgb_2_pix; - void (*Display1X) (int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod); - void (*Display2X) (int *colortab, Uint32 * rgb_2_pix, - unsigned char *lum, unsigned char *cr, - unsigned char *cb, unsigned char *out, - int rows, int cols, int mod); - - /* These are just so we don't have to allocate them separately */ - Uint16 pitches[3]; - Uint8 *planes[3]; - - /* This is a temporary surface in case we have to stretch copy */ - SDL_Surface *stretch; - SDL_Surface *display; -}; - -typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture; - -SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h); -int SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels, - int *pitch); -int SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, - const void *pixels, int pitch); -int SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect, - int markDirty, void **pixels, int *pitch); -void SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata); -int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect, - Uint32 target_format, int w, int h, void *pixels, - int pitch); -void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata); - -/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/mmx.h Wed Feb 02 22:55:12 2011 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,642 +0,0 @@ -/* mmx.h - - MultiMedia eXtensions GCC interface library for IA32. - - To use this library, simply include this header file - and compile with GCC. You MUST have inlining enabled - in order for mmx_ok() to work; this can be done by - simply using -O on the GCC command line. - - Compiling with -DMMX_TRACE will cause detailed trace - output to be sent to stderr for each mmx operation. - This adds lots of code, and obviously slows execution to - a crawl, but can be very useful for debugging. - - THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY - EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT - LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY - AND FITNESS FOR ANY PARTICULAR PURPOSE. - - 1997-99 by H. Dietz and R. Fisher - - Notes: - It appears that the latest gas has the pand problem fixed, therefore - I'll undefine BROKEN_PAND by default. -*/ - -#ifndef _MMX_H -#define _MMX_H - - -/* Warning: at this writing, the version of GAS packaged - with most Linux distributions does not handle the - parallel AND operation mnemonic correctly. If the - symbol BROKEN_PAND is defined, a slower alternative - coding will be used. If execution of mmxtest results - in an illegal instruction fault, define this symbol. -*/ -#undef BROKEN_PAND - - -/* The type of an value that fits in an MMX register - (note that long long constant values MUST be suffixed - by LL and unsigned long long values by ULL, lest - they be truncated by the compiler) -*/ -typedef union -{ - long long q; /* Quadword (64-bit) value */ - unsigned long long uq; /* Unsigned Quadword */ - int d[2]; /* 2 Doubleword (32-bit) values */ - unsigned int ud[2]; /* 2 Unsigned Doubleword */ - short w[4]; /* 4 Word (16-bit) values */ - unsigned short uw[4]; /* 4 Unsigned Word */ - char b[8]; /* 8 Byte (8-bit) values */ - unsigned char ub[8]; /* 8 Unsigned Byte */ - float s[2]; /* Single-precision (32-bit) value */ -} __attribute__ ((aligned(8))) mmx_t; /* On an 8-byte (64-bit) boundary */ - - -#if 0 -/* Function to test if multimedia instructions are supported... -*/ -inline extern int -mm_support(void) -{ - /* Returns 1 if MMX instructions are supported, - 3 if Cyrix MMX and Extended MMX instructions are supported - 5 if AMD MMX and 3DNow! instructions are supported - 0 if hardware does not support any of these - */ - register int rval = 0; - - __asm__ __volatile__( - /* See if CPUID instruction is supported ... */ - /* ... Get copies of EFLAGS into eax and ecx */ - "pushf\n\t" - "popl %%eax\n\t" "movl %%eax, %%ecx\n\t" - /* ... Toggle the ID bit in one copy and store */ - /* to the EFLAGS reg */ - "xorl $0x200000, %%eax\n\t" - "push %%eax\n\t" "popf\n\t" - /* ... Get the (hopefully modified) EFLAGS */ - "pushf\n\t" "popl %%eax\n\t" - /* ... Compare and test result */ - "xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */ - /* Get standard CPUID information, and - go to a specific vendor section */ - "movl $0, %%eax\n\t" "cpuid\n\t" - /* Check for Intel */ - "cmpl $0x756e6547, %%ebx\n\t" - "jne TryAMD\n\t" - "cmpl $0x49656e69, %%edx\n\t" - "jne TryAMD\n\t" - "cmpl $0x6c65746e, %%ecx\n" - "jne TryAMD\n\t" "jmp Intel\n\t" - /* Check for AMD */ - "\nTryAMD:\n\t" - "cmpl $0x68747541, %%ebx\n\t" - "jne TryCyrix\n\t" - "cmpl $0x69746e65, %%edx\n\t" - "jne TryCyrix\n\t" - "cmpl $0x444d4163, %%ecx\n" - "jne TryCyrix\n\t" "jmp AMD\n\t" - /* Check for Cyrix */ - "\nTryCyrix:\n\t" - "cmpl $0x69727943, %%ebx\n\t" - "jne NotSupported2\n\t" - "cmpl $0x736e4978, %%edx\n\t" - "jne NotSupported3\n\t" - "cmpl $0x64616574, %%ecx\n\t" - "jne NotSupported4\n\t" - /* Drop through to Cyrix... */ - /* Cyrix Section */ - /* See if extended CPUID level 80000001 is supported */ - /* The value of CPUID/80000001 for the 6x86MX is undefined - according to the Cyrix CPU Detection Guide (Preliminary - Rev. 1.01 table 1), so we'll check the value of eax for - CPUID/0 to see if standard CPUID level 2 is supported. - According to the table, the only CPU which supports level - 2 is also the only one which supports extended CPUID levels. - */ - "cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t" /* Use standard CPUID instead */ - /* Extended CPUID supported (in theory), so get extended - features */ - "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t" /* Test for MMX */ - "jz NotSupported5\n\t" /* MMX not supported */ - "testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */ - "jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */ - "jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */ - "jmp Return\n\t" - /* AMD Section */ - "AMD:\n\t" - /* See if extended CPUID is supported */ - "movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t" /* Use standard CPUID instead */ - /* Extended CPUID supported, so get extended features */ - "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */ - "jz NotSupported6\n\t" /* MMX not supported */ - "testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */ - "jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */ - "jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */ - "jmp Return\n\t" - /* Intel Section */ - "Intel:\n\t" - /* Check for MMX */ - "MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */ - "jz NotSupported7\n\t" /* MMX Not supported */ - "movl $1, %0:\n\n\t" /* MMX Supported */ - "jmp Return\n\t" - /* Nothing supported */ - "\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval): /* no input */ - :"eax", "ebx", "ecx", "edx"); - - /* Return */ - return (rval); -} - -/* Function to test if mmx instructions are supported... -*/ -inline extern int -mmx_ok(void) -{ - /* Returns 1 if MMX instructions are supported, 0 otherwise */ - return (mm_support() & 0x1); -} -#endif - -/* Helper functions for the instruction macros that follow... - (note that memory-to-register, m2r, instructions are nearly - as efficient as register-to-register, r2r, instructions; - however, memory-to-memory instructions are really simulated - as a convenience, and are only 1/3 as efficient) -*/ -#ifdef MMX_TRACE - -/* Include the stuff for printing a trace to stderr... -*/ - -#define mmx_i2r(op, imm, reg) \ - { \ - mmx_t mmx_trace; \ - mmx_trace.uq = (imm); \ - printf(#op "_i2r(" #imm "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */ ); \ - printf(#reg "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (imm)); \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */ ); \ - printf(#reg "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#define mmx_m2r(op, mem, reg) \ - { \ - mmx_t mmx_trace; \ - mmx_trace = (mem); \ - printf(#op "_m2r(" #mem "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */ ); \ - printf(#reg "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (mem)); \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */ ); \ - printf(#reg "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#define mmx_r2m(op, reg, mem) \ - { \ - mmx_t mmx_trace; \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */ ); \ - printf(#op "_r2m(" #reg "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - mmx_trace = (mem); \ - printf(#mem "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ (#op " %%" #reg ", %0" \ - : "=X" (mem) \ - : /* nothing */ ); \ - mmx_trace = (mem); \ - printf(#mem "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#define mmx_r2r(op, regs, regd) \ - { \ - mmx_t mmx_trace; \ - __asm__ __volatile__ ("movq %%" #regs ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */ ); \ - printf(#op "_r2r(" #regs "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ ("movq %%" #regd ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */ ); \ - printf(#regd "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ (#op " %" #regs ", %" #regd); \ - __asm__ __volatile__ ("movq %%" #regd ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */ ); \ - printf(#regd "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#define mmx_m2m(op, mems, memd) \ - { \ - mmx_t mmx_trace; \ - mmx_trace = (mems); \ - printf(#op "_m2m(" #mems "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - mmx_trace = (memd); \ - printf(#memd "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ - #op " %1, %%mm0\n\t" \ - "movq %%mm0, %0" \ - : "=X" (memd) \ - : "X" (mems)); \ - mmx_trace = (memd); \ - printf(#memd "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#else - -/* These macros are a lot simpler without the tracing... -*/ - -#define mmx_i2r(op, imm, reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (imm) ) - -#define mmx_m2r(op, mem, reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "m" (mem)) - -#define mmx_r2m(op, reg, mem) \ - __asm__ __volatile__ (#op " %%" #reg ", %0" \ - : "=m" (mem) \ - : /* nothing */ ) - -#define mmx_r2r(op, regs, regd) \ - __asm__ __volatile__ (#op " %" #regs ", %" #regd) - -#define mmx_m2m(op, mems, memd) \ - __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ - #op " %1, %%mm0\n\t" \ - "movq %%mm0, %0" \ - : "=X" (memd) \ - : "X" (mems)) - -#endif - - -/* 1x64 MOVe Quadword - (this is both a load and a store... - in fact, it is the only way to store) -*/ -#define movq_m2r(var, reg) mmx_m2r(movq, var, reg) -#define movq_r2m(reg, var) mmx_r2m(movq, reg, var) -#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd) -#define movq(vars, vard) \ - __asm__ __volatile__ ("movq %1, %%mm0\n\t" \ - "movq %%mm0, %0" \ - : "=X" (vard) \ - : "X" (vars)) - - -/* 1x32 MOVe Doubleword - (like movq, this is both load and store... - but is most useful for moving things between - mmx registers and ordinary registers) -*/ -#define movd_m2r(var, reg) mmx_m2r(movd, var, reg) -#define movd_r2m(reg, var) mmx_r2m(movd, reg, var) -#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd) -#define movd(vars, vard) \ - __asm__ __volatile__ ("movd %1, %%mm0\n\t" \ - "movd %%mm0, %0" \ - : "=X" (vard) \ - : "X" (vars)) - - -/* 2x32, 4x16, and 8x8 Parallel ADDs -*/ -#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg) -#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd) -#define paddd(vars, vard) mmx_m2m(paddd, vars, vard) - -#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg) -#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd) -#define paddw(vars, vard) mmx_m2m(paddw, vars, vard) - -#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg) -#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd) -#define paddb(vars, vard) mmx_m2m(paddb, vars, vard) - - -/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic -*/ -#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg) -#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd) -#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard) - -#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg) -#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd) -#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard) - - -/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic -*/ -#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg) -#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd) -#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard) - -#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg) -#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd) -#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard) - - -/* 2x32, 4x16, and 8x8 Parallel SUBs -*/ -#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg) -#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd) -#define psubd(vars, vard) mmx_m2m(psubd, vars, vard) - -#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg) -#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd) -#define psubw(vars, vard) mmx_m2m(psubw, vars, vard) - -#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg) -#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd) -#define psubb(vars, vard) mmx_m2m(psubb, vars, vard) - - -/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic -*/ -#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg) -#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd) -#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard) - -#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg) -#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd) -#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard) - - -/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic -*/ -#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg) -#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd) -#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard) - -#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg) -#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd) -#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard) - - -/* 4x16 Parallel MULs giving Low 4x16 portions of results -*/ -#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg) -#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd) -#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard) - - -/* 4x16 Parallel MULs giving High 4x16 portions of results -*/ -#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg) -#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd) -#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard) - - -/* 4x16->2x32 Parallel Mul-ADD - (muls like pmullw, then adds adjacent 16-bit fields - in the multiply result to make the final 2x32 result) -*/ -#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg) -#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd) -#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard) - - -/* 1x64 bitwise AND -*/ -#ifdef BROKEN_PAND -#define pand_m2r(var, reg) \ - { \ - mmx_m2r(pandn, (mmx_t) -1LL, reg); \ - mmx_m2r(pandn, var, reg); \ - } -#define pand_r2r(regs, regd) \ - { \ - mmx_m2r(pandn, (mmx_t) -1LL, regd); \ - mmx_r2r(pandn, regs, regd) \ - } -#define pand(vars, vard) \ - { \ - movq_m2r(vard, mm0); \ - mmx_m2r(pandn, (mmx_t) -1LL, mm0); \ - mmx_m2r(pandn, vars, mm0); \ - movq_r2m(mm0, vard); \ - } -#else -#define pand_m2r(var, reg) mmx_m2r(pand, var, reg) -#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd) -#define pand(vars, vard) mmx_m2m(pand, vars, vard) -#endif - - -/* 1x64 bitwise AND with Not the destination -*/ -#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg) -#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd) -#define pandn(vars, vard) mmx_m2m(pandn, vars, vard) - - -/* 1x64 bitwise OR -*/ -#define por_m2r(var, reg) mmx_m2r(por, var, reg) -#define por_r2r(regs, regd) mmx_r2r(por, regs, regd) -#define por(vars, vard) mmx_m2m(por, vars, vard) - - -/* 1x64 bitwise eXclusive OR -*/ -#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg) -#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd) -#define pxor(vars, vard) mmx_m2m(pxor, vars, vard) - - -/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality - (resulting fields are either 0 or -1) -*/ -#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg) -#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd) -#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard) - -#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg) -#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd) -#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard) - -#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg) -#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd) -#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard) - - -/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than - (resulting fields are either 0 or -1) -*/ -#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg) -#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd) -#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard) - -#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg) -#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd) -#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard) - -#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg) -#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd) -#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard) - - -/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical -*/ -#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg) -#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg) -#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd) -#define psllq(vars, vard) mmx_m2m(psllq, vars, vard) - -#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg) -#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg) -#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd) -#define pslld(vars, vard) mmx_m2m(pslld, vars, vard) - -#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg) -#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg) -#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd) -#define psllw(vars, vard) mmx_m2m(psllw, vars, vard) - - -/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical -*/ -#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg) -#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg) -#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd) -#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard) - -#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg) -#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg) -#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd) -#define psrld(vars, vard) mmx_m2m(psrld, vars, vard) - -#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg) -#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg) -#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd) -#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard) - - -/* 2x32 and 4x16 Parallel Shift Right Arithmetic -*/ -#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg) -#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg) -#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd) -#define psrad(vars, vard) mmx_m2m(psrad, vars, vard) - -#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg) -#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg) -#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd) -#define psraw(vars, vard) mmx_m2m(psraw, vars, vard) - - -/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate - (packs source and dest fields into dest in that order) -*/ -#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg) -#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd) -#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard) - -#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg) -#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd) -#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard) - - -/* 4x16->8x8 PACK and Unsigned Saturate - (packs source and dest fields into dest in that order) -*/ -#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg) -#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd) -#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard) - - -/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low - (interleaves low half of dest with low half of source - as padding in each result field) -*/ -#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg) -#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd) -#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard) - -#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg) -#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd) -#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard) - -#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg) -#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd) -#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard) - - -/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High - (interleaves high half of dest with high half of source - as padding in each result field) -*/ -#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg) -#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd) -#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard) - -#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg) -#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd) -#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard) - -#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg) -#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd) -#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard) - - -/* Empty MMx State - (used to clean-up when going from mmx to float use - of the registers that are shared by both; note that - there is no float-to-mmx operation needed, because - only the float tag word info is corruptible) -*/ -#ifdef MMX_TRACE - -#define emms() \ - { \ - printf("emms()\n"); \ - __asm__ __volatile__ ("emms"); \ - } - -#else - -#define emms() __asm__ __volatile__ ("emms") - -#endif - -#endif -/* vi: set ts=4 sw=4 expandtab: */