Mercurial > sdl-ios-xcode
annotate src/video/dga/SDL_dgavideo.h @ 1542:a8bf1aa21020
Fixed bug #15
SDL_blit_A.mmx-speed.patch.txt --
Speed improvements and a bugfix for the current GCC inline mmx
asm code:
- Changed some ops and removed some resulting useless ones.
- Added some instruction parallelism (some gain)
The resulting speed on my Xeon improved upto 35% depending on
the function (measured in fps).
- Fixed a bug where BlitRGBtoRGBSurfaceAlphaMMX() was
setting the alpha component on the destination surfaces (to
opaque-alpha) even when the surface had none.
SDL_blit_A.mmx-msvc.patch.txt --
MSVC mmx intrinsics version of the same GCC asm code.
MSVC compiler tries to parallelize the code and to avoid
register stalls, but does not always do a very good job.
Per-surface blending MSVC functions run quite a bit faster
than their pure-asm counterparts (upto 55% faster for 16bit
ones), but the per-pixel blending runs somewhat slower than asm.
- BlitRGBtoRGBSurfaceAlphaMMX and BlitRGBtoRGBPixelAlphaMMX (and all
variants) can now also handle formats other than (A)RGB8888. Formats
like RGBA8888 and some quite exotic ones are allowed -- like
RAGB8888, or actually anything having channels aligned on 8bit
boundary and full 8bit alpha (for per-pixel alpha blending).
The performance cost of this change is virtually 0 for per-surface
alpha blending (no extra ops inside the loop) and a single non-MMX
op inside the loop for per-pixel blending. In testing, the per-pixel
alpha blending takes a ~2% performance hit, but it still runs much
faster than the current code in CVS. If necessary, a separate function
with this functionality can be made.
This code requires Processor Pack for VC6.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Wed, 15 Mar 2006 15:39:29 +0000 |
parents | d910939febfa |
children | 14717b52abc0 45669d4efd02 |
rev | line source |
---|---|
0 | 1 /* |
2 SDL - Simple DirectMedia Layer | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
3 Copyright (C) 1997-2006 Sam Lantinga |
0 | 4 |
5 This library is free software; you can redistribute it and/or | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
6 modify it under the terms of the GNU Lesser General Public |
0 | 7 License as published by the Free Software Foundation; either |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
8 version 2.1 of the License, or (at your option) any later version. |
0 | 9 |
10 This library is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
13 Lesser General Public License for more details. |
0 | 14 |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
15 You should have received a copy of the GNU Lesser General Public |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
16 License along with this library; if not, write to the Free Software |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 18 |
19 Sam Lantinga | |
252
e8157fcb3114
Updated the source with the correct e-mail address
Sam Lantinga <slouken@libsdl.org>
parents:
105
diff
changeset
|
20 slouken@libsdl.org |
0 | 21 */ |
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
22 #include "SDL_config.h" |
0 | 23 |
24 #ifndef _SDL_dgavideo_h | |
25 #define _SDL_dgavideo_h | |
26 | |
27 #include <X11/Xlib.h> | |
28 | |
29 #include "SDL_mouse.h" | |
30 #include "SDL_mutex.h" | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1312
diff
changeset
|
31 #include "../SDL_sysvideo.h" |
0 | 32 |
33 /* Hidden "this" pointer for the video functions */ | |
34 #define _THIS SDL_VideoDevice *this | |
35 | |
101
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
36 /* Define this if you need the DGA driver to be thread-safe */ |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
37 #define LOCK_DGA_DISPLAY |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
38 #ifdef LOCK_DGA_DISPLAY |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
39 #define LOCK_DISPLAY() SDL_mutexP(event_lock) |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
40 #define UNLOCK_DISPLAY() SDL_mutexV(event_lock) |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
41 #else |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
42 #define LOCK_DISPLAY() |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
43 #define UNLOCK_DISPLAY() |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
44 #endif |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
45 |
0 | 46 |
47 /* This is the structure we use to keep track of video memory */ | |
48 typedef struct vidmem_bucket { | |
49 struct vidmem_bucket *prev; | |
101
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
50 int used; |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
51 int dirty; |
0 | 52 Uint8 *base; |
53 unsigned int size; | |
54 struct vidmem_bucket *next; | |
55 } vidmem_bucket; | |
56 | |
57 /* Private display data */ | |
58 struct SDL_PrivateVideoData { | |
59 Display *DGA_Display; | |
60 Colormap DGA_colormap; | |
61 int visualClass; | |
62 | |
63 #define NUM_MODELISTS 4 /* 8, 16, 24, and 32 bits-per-pixel */ | |
64 int SDL_nummodes[NUM_MODELISTS]; | |
65 SDL_Rect **SDL_modelist[NUM_MODELISTS]; | |
66 | |
67 /* Information for the video surface */ | |
68 Uint8 *memory_base; | |
69 int memory_pitch; | |
70 SDL_mutex *hw_lock; | |
71 int sync_needed; | |
72 int was_flipped; | |
73 | |
74 /* Information for hardware surfaces */ | |
75 vidmem_bucket surfaces; | |
76 int surfaces_memtotal; | |
77 int surfaces_memleft; | |
78 | |
79 /* Information for double-buffering */ | |
80 int flip_page; | |
81 int flip_yoffset[2]; | |
82 Uint8 *flip_address[2]; | |
83 | |
84 /* Used to handle DGA events */ | |
85 int event_base; | |
101
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
86 #ifdef LOCK_DGA_DISPLAY |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
87 SDL_mutex *event_lock; |
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
88 #endif |
0 | 89 }; |
90 /* Old variable names */ | |
91 #define DGA_Display (this->hidden->DGA_Display) | |
92 #define DGA_Screen DefaultScreen(DGA_Display) | |
93 #define DGA_colormap (this->hidden->DGA_colormap) | |
94 #define DGA_visualClass (this->hidden->visualClass) | |
95 #define memory_base (this->hidden->memory_base) | |
96 #define memory_pitch (this->hidden->memory_pitch) | |
97 #define flip_page (this->hidden->flip_page) | |
98 #define flip_yoffset (this->hidden->flip_yoffset) | |
99 #define flip_address (this->hidden->flip_address) | |
100 #define sync_needed (this->hidden->sync_needed) | |
101 #define was_flipped (this->hidden->was_flipped) | |
102 #define SDL_nummodes (this->hidden->SDL_nummodes) | |
103 #define SDL_modelist (this->hidden->SDL_modelist) | |
104 #define surfaces (this->hidden->surfaces) | |
105 #define surfaces_memtotal (this->hidden->surfaces_memtotal) | |
106 #define surfaces_memleft (this->hidden->surfaces_memleft) | |
107 #define hw_lock (this->hidden->hw_lock) | |
108 #define DGA_event_base (this->hidden->event_base) | |
101
825b2fa28e2e
DGA video driver is now thread-safe
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
109 #define event_lock (this->hidden->event_lock) |
0 | 110 |
111 #endif /* _SDL_dgavideo_h */ |