comparison src/video/SDL_blit.c @ 2247:93994f65c74c

Removed hermes since it's LGPL and not compatible with a commercial license. Prepping for using MMX and SSE intrinsics instead of inline assembly. .. except for memcpy equivalents which only get faster if they can exploit the parallelism of loading into multiple SIMD registers. :)
author Sam Lantinga <slouken@libsdl.org>
date Wed, 15 Aug 2007 08:21:10 +0000
parents c121d94672cb
children 5a58b57b6724
comparison
equal deleted inserted replaced
2246:75daa0792bd1 2247:93994f65c74c
22 #include "SDL_config.h" 22 #include "SDL_config.h"
23 23
24 #include "SDL_video.h" 24 #include "SDL_video.h"
25 #include "SDL_sysvideo.h" 25 #include "SDL_sysvideo.h"
26 #include "SDL_blit.h" 26 #include "SDL_blit.h"
27 #include "SDL_blit_copy.h"
27 #include "SDL_RLEaccel_c.h" 28 #include "SDL_RLEaccel_c.h"
28 #include "SDL_pixels_c.h" 29 #include "SDL_pixels_c.h"
29 30
30 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES 31 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES
31 #define MMX_ASMBLIT 32 #define MMX_ASMBLIT
104 } 105 }
105 /* Blit is done! */ 106 /* Blit is done! */
106 return (okay ? 0 : -1); 107 return (okay ? 0 : -1);
107 } 108 }
108 109
109 #ifdef MMX_ASMBLIT 110 #ifdef __MACOSX__
110 static __inline__ void 111 #include <sys/sysctl.h>
111 SDL_memcpyMMX(Uint8 * to, const Uint8 * from, int len) 112
113 static SDL_bool SDL_UseAltivecPrefetch()
114 {
115 const char key[] = "hw.l3cachesize";
116 u_int64_t result = 0;
117 size_t typeSize = sizeof(result);
118
119 if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) {
120 return SDL_TRUE;
121 } else {
122 return SDL_FALSE;
123 }
124 }
125 #else
126 static SDL_bool SDL_UseAltivecPrefetch()
127 {
128 /* Just guess G4 */
129 return SDL_TRUE;
130 }
131 #endif /* __MACOSX__ */
132
133 static SDL_loblit SDL_ChooseBlitFunc(SDL_BlitEntry *entries, int count)
112 { 134 {
113 int i; 135 int i;
114 136 static Uint32 features = 0xffffffff;
115 for (i = 0; i < len / 8; i++) { 137
116 __asm__ __volatile__(" movq (%0), %%mm0\n" 138 if (features == 0xffffffff) {
117 " movq %%mm0, (%1)\n"::"r"(from), 139 features = SDL_BLIT_ANY;
118 "r"(to):"memory"); 140
119 from += 8; 141 /* Provide an override for testing .. */
120 to += 8; 142 const char *override = SDL_getenv("SDL_BLIT_FEATURES");
121 } 143 if (override) {
122 if (len & 7) 144 SDL_sscanf(override, "%u", &features);
123 SDL_memcpy(to, from, len & 7); 145 } else {
124 } 146 if (SDL_HasMMX()) {
125 147 features |= SDL_BLIT_MMX;
126 static __inline__ void 148 }
127 SDL_memcpySSE(Uint8 * to, const Uint8 * from, int len) 149 if (SDL_HasSSE()) {
128 { 150 features |= SDL_BLIT_SSE;
129 int i; 151 }
130 152 if (SDL_HasAltivec()) {
131 __asm__ __volatile__(" prefetchnta (%0)\n" 153 if (SDL_UseAltivecPrefetch()) {
132 " prefetchnta 64(%0)\n" 154 features |= SDL_BLIT_ALTIVEC_PREFETCH;
133 " prefetchnta 128(%0)\n" 155 } else {
134 " prefetchnta 192(%0)\n"::"r"(from)); 156 features |= SDL_BLIT_ALTIVEC_NOPREFETCH;
135 157 }
136 for (i = 0; i < len / 8; i++) { 158 }
137 __asm__ __volatile__(" prefetchnta 256(%0)\n" 159 }
138 " movq (%0), %%mm0\n" 160 }
139 " movntq %%mm0, (%1)\n"::"r"(from), 161
140 "r"(to):"memory"); 162 for (i = count; i > 0; --i) {
141 from += 8; 163 if (features & entries[i].features) {
142 to += 8; 164 return entries[i].blit;
143 } 165 }
144 if (len & 7) 166 }
145 SDL_memcpy(to, from, len & 7); 167 return entries[0].blit;
146 }
147 #endif
148
149 static void
150 SDL_BlitCopy(SDL_BlitInfo * info)
151 {
152 Uint8 *src, *dst;
153 int w, h;
154 int srcskip, dstskip;
155
156 w = info->d_width * info->dst->BytesPerPixel;
157 h = info->d_height;
158 src = info->s_pixels;
159 dst = info->d_pixels;
160 srcskip = w + info->s_skip;
161 dstskip = w + info->d_skip;
162 #ifdef MMX_ASMBLIT
163 if (SDL_HasSSE()) {
164 while (h--) {
165 SDL_memcpySSE(dst, src, w);
166 src += srcskip;
167 dst += dstskip;
168 }
169 __asm__ __volatile__(" emms\n"::);
170 } else if (SDL_HasMMX()) {
171 while (h--) {
172 SDL_memcpyMMX(dst, src, w);
173 src += srcskip;
174 dst += dstskip;
175 }
176 __asm__ __volatile__(" emms\n"::);
177 } else
178 #endif
179 while (h--) {
180 SDL_memcpy(dst, src, w);
181 src += srcskip;
182 dst += dstskip;
183 }
184 }
185
186 static void
187 SDL_BlitCopyOverlap(SDL_BlitInfo * info)
188 {
189 Uint8 *src, *dst;
190 int w, h;
191 int srcskip, dstskip;
192
193 w = info->d_width * info->dst->BytesPerPixel;
194 h = info->d_height;
195 src = info->s_pixels;
196 dst = info->d_pixels;
197 srcskip = w + info->s_skip;
198 dstskip = w + info->d_skip;
199 if (dst < src) {
200 while (h--) {
201 SDL_memcpy(dst, src, w);
202 src += srcskip;
203 dst += dstskip;
204 }
205 } else {
206 src += ((h - 1) * srcskip);
207 dst += ((h - 1) * dstskip);
208 while (h--) {
209 SDL_revcpy(dst, src, w);
210 src -= srcskip;
211 dst -= dstskip;
212 }
213 }
214 } 168 }
215 169
216 /* Figure out which of many blit routines to set up on a surface */ 170 /* Figure out which of many blit routines to set up on a surface */
217 int 171 int
218 SDL_CalculateBlit(SDL_Surface * surface) 172 SDL_CalculateBlit(SDL_Surface * surface)
235 blit_index |= 2; 189 blit_index |= 2;
236 } 190 }
237 191
238 /* Check for special "identity" case -- copy blit */ 192 /* Check for special "identity" case -- copy blit */
239 if (surface->map->identity && blit_index == 0) { 193 if (surface->map->identity && blit_index == 0) {
240 surface->map->sw_data->blit = SDL_BlitCopy;
241
242 /* Handle overlapping blits on the same surface */ 194 /* Handle overlapping blits on the same surface */
243 if (surface == surface->map->dst) { 195 if (surface == surface->map->dst) {
244 surface->map->sw_data->blit = SDL_BlitCopyOverlap; 196 surface->map->sw_data->blit = SDL_BlitCopyOverlap;
197 } else {
198 surface->map->sw_data->blit = SDL_BlitCopy;
245 } 199 }
246 } else { 200 } else {
247 if (surface->format->BitsPerPixel < 8) { 201 if (surface->format->BitsPerPixel < 8) {
248 surface->map->sw_data->blit = 202 surface->map->sw_data->blit =
249 SDL_CalculateBlit0(surface, blit_index); 203 SDL_CalculateBlit0(surface, blit_index);