view src/hermes/HeadMMX.h @ 1542:a8bf1aa21020

Fixed bug #15 SDL_blit_A.mmx-speed.patch.txt -- Speed improvements and a bugfix for the current GCC inline mmx asm code: - Changed some ops and removed some resulting useless ones. - Added some instruction parallelism (some gain) The resulting speed on my Xeon improved upto 35% depending on the function (measured in fps). - Fixed a bug where BlitRGBtoRGBSurfaceAlphaMMX() was setting the alpha component on the destination surfaces (to opaque-alpha) even when the surface had none. SDL_blit_A.mmx-msvc.patch.txt -- MSVC mmx intrinsics version of the same GCC asm code. MSVC compiler tries to parallelize the code and to avoid register stalls, but does not always do a very good job. Per-surface blending MSVC functions run quite a bit faster than their pure-asm counterparts (upto 55% faster for 16bit ones), but the per-pixel blending runs somewhat slower than asm. - BlitRGBtoRGBSurfaceAlphaMMX and BlitRGBtoRGBPixelAlphaMMX (and all variants) can now also handle formats other than (A)RGB8888. Formats like RGBA8888 and some quite exotic ones are allowed -- like RAGB8888, or actually anything having channels aligned on 8bit boundary and full 8bit alpha (for per-pixel alpha blending). The performance cost of this change is virtually 0 for per-surface alpha blending (no extra ops inside the loop) and a single non-MMX op inside the loop for per-pixel blending. In testing, the per-pixel alpha blending takes a ~2% performance hit, but it still runs much faster than the current code in CVS. If necessary, a separate function with this functionality can be made. This code requires Processor Pack for VC6.
author Sam Lantinga <slouken@libsdl.org>
date Wed, 15 Mar 2006 15:39:29 +0000
parents d910939febfa
children 782fd950bd46 c121d94672cb 39b9405d3cb6
line wrap: on
line source

/*
   Header definitions for the MMX routines for the HERMES library
   Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
   This source code is licensed under the GNU LGPL
  
   Please refer to the file COPYING.LIB contained in the distribution for
   licensing conditions
*/
#include "SDL_config.h"

#ifndef __HERMES_HEAD_MMX__
#define __HERMES_HEAD_MMX__


/* If you cannot stand ifdefs, then please do not look into this file, it's
   going to end your life :) */

#ifdef X86_ASSEMBLER


#ifdef __cplusplus
extern "C" {
#endif

void STACKCALL ConvertMMX(HermesConverterInterface *);

void STACKCALL ClearMMX_32(HermesClearInterface *);
void STACKCALL ClearMMX_24(HermesClearInterface *);
void STACKCALL ClearMMX_16(HermesClearInterface *);
void STACKCALL ClearMMX_8(HermesClearInterface *);

void ConvertMMXpII32_24RGB888();
void ConvertMMXpII32_16RGB565();
void ConvertMMXpII32_16BGR565();
void ConvertMMXpII32_16RGB555();
void ConvertMMXpII32_16BGR565();
void ConvertMMXpII32_16BGR555();

void ConvertMMXp32_16RGB555();

#ifdef __cplusplus
}
#endif



/* Fix the underscore business with ELF compilers */

#if defined(__ELF__) && defined(__GNUC__)
  #ifdef __cplusplus 
  extern "C" {   
  #endif

  extern void _ConvertMMX(HermesConverterInterface *);
  extern void _ConvertMMXpII32_24RGB888();
  extern void _ConvertMMXpII32_16RGB565();
  extern void _ConvertMMXpII32_16BGR565();
  extern void _ConvertMMXpII32_16RGB555();
  extern void _ConvertMMXpII32_16BGR555();

  #define ConvertMMX _ConvertMMX
  #define ConvertMMXpII32_24RGB888 _ConvertMMXpII32_24RGB888
  #define ConvertMMXpII32_16RGB565 _ConvertMMXpII32_16RGB565
  #define ConvertMMXpII32_16BGR565 _ConvertMMXpII32_16BGR565
  #define ConvertMMXpII32_16RGB555 _ConvertMMXpII32_16RGB555
  #define ConvertMMXpII32_16BGR555 _ConvertMMXpII32_16BGR555

  #ifdef __cplusplus
  }
  #endif

#endif /* ELF and GNUC */




/* Make it work with Watcom */
#ifdef __WATCOMC__
#pragma warning 601 9

#pragma aux ConvertMMX "_*" modify [EAX EBX ECX EDX ESI EDI]

#pragma aux ClearMMX_32 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearMMX_24 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearMMX_16 "_*" modify [EAX EBX ECX EDX ESI EDI]
#pragma aux ClearMMX_8 "_*" modify [EAX EBX ECX EDX ESI EDI]

#pragma aux ConvertMMXpII32_24RGB888 "_*"
#pragma aux ConvertMMXpII32_16RGB565 "_*"
#pragma aux ConvertMMXpII32_16BGR565 "_*"
#pragma aux ConvertMMXpII32_16RGB555 "_*"
#pragma aux ConvertMMXpII32_16BGR555 "_*"
#pragma aux ConvertMMXp32_16RGB555 "_*"

#endif /* WATCOM */

#endif /* X86_ASSEMBLER */


#endif