comparison src/video/SDL_blit_N.c @ 1668:4da1ee79c9af SDL-1.3

more tweaking indent options
author Sam Lantinga <slouken@libsdl.org>
date Mon, 29 May 2006 04:04:35 +0000
parents 782fd950bd46
children
comparison
equal deleted inserted replaced
1667:1fddae038bc8 1668:4da1ee79c9af
37 #endif 37 #endif
38 #define assert(X) 38 #define assert(X)
39 #ifdef __MACOSX__ 39 #ifdef __MACOSX__
40 #include <sys/sysctl.h> 40 #include <sys/sysctl.h>
41 static size_t 41 static size_t
42 GetL3CacheSize (void) 42 GetL3CacheSize(void)
43 { 43 {
44 const char key[] = "hw.l3cachesize"; 44 const char key[] = "hw.l3cachesize";
45 u_int64_t result = 0; 45 u_int64_t result = 0;
46 size_t typeSize = sizeof (result); 46 size_t typeSize = sizeof(result);
47 47
48 48
49 int err = sysctlbyname (key, &result, &typeSize, NULL, 0); 49 int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
50 if (0 != err) 50 if (0 != err)
51 return 0; 51 return 0;
52 52
53 return result; 53 return result;
54 } 54 }
55 #else 55 #else
56 static size_t 56 static size_t
57 GetL3CacheSize (void) 57 GetL3CacheSize(void)
58 { 58 {
59 /* XXX: Just guess G4 */ 59 /* XXX: Just guess G4 */
60 return 2097152; 60 return 2097152;
61 } 61 }
62 #endif /* __MACOSX__ */ 62 #endif /* __MACOSX__ */
103 ? vec_lvsl(0, src) \ 103 ? vec_lvsl(0, src) \
104 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) 104 : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
105 105
106 /* Calculate the permute vector used for 32->32 swizzling */ 106 /* Calculate the permute vector used for 32->32 swizzling */
107 static vector unsigned char 107 static vector unsigned char
108 calc_swizzle32 (const SDL_PixelFormat * srcfmt, 108 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
109 const SDL_PixelFormat * dstfmt)
110 { 109 {
111 /* 110 /*
112 * We have to assume that the bits that aren't used by other 111 * We have to assume that the bits that aren't used by other
113 * colors is alpha, and it's one complete byte, since some formats 112 * colors is alpha, and it's one complete byte, since some formats
114 * leave alpha with a zero mask, but we should still swizzle the bits. 113 * leave alpha with a zero mask, but we should still swizzle the bits.
125 srcfmt = &default_pixel_format; 124 srcfmt = &default_pixel_format;
126 } 125 }
127 if (!dstfmt) { 126 if (!dstfmt) {
128 dstfmt = &default_pixel_format; 127 dstfmt = &default_pixel_format;
129 } 128 }
130 const vector unsigned char plus = 129 const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
131 VECUINT8_LITERAL (0x00, 0x00, 0x00, 0x00, 130 0x04, 0x04, 0x04, 0x04,
132 0x04, 0x04, 0x04, 0x04, 131 0x08, 0x08, 0x08, 0x08,
133 0x08, 0x08, 0x08, 0x08, 132 0x0C, 0x0C, 0x0C,
134 0x0C, 0x0C, 0x0C, 0x0C); 133 0x0C);
135 vector unsigned char vswiz; 134 vector unsigned char vswiz;
136 vector unsigned int srcvec; 135 vector unsigned int srcvec;
137 #define RESHIFT(X) (3 - ((X) >> 3)) 136 #define RESHIFT(X) (3 - ((X) >> 3))
138 Uint32 rmask = RESHIFT (srcfmt->Rshift) << (dstfmt->Rshift); 137 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
139 Uint32 gmask = RESHIFT (srcfmt->Gshift) << (dstfmt->Gshift); 138 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
140 Uint32 bmask = RESHIFT (srcfmt->Bshift) << (dstfmt->Bshift); 139 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
141 Uint32 amask; 140 Uint32 amask;
142 /* Use zero for alpha if either surface doesn't have alpha */ 141 /* Use zero for alpha if either surface doesn't have alpha */
143 if (dstfmt->Amask) { 142 if (dstfmt->Amask) {
144 amask = 143 amask =
145 ((srcfmt->Amask) ? RESHIFT (srcfmt->Ashift) : 0x10) << (dstfmt-> 144 ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->
146 Ashift); 145 Ashift);
147 } else { 146 } else {
148 amask = 147 amask =
149 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 148 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
150 0xFFFFFFFF); 149 0xFFFFFFFF);
151 } 150 }
152 #undef RESHIFT 151 #undef RESHIFT
153 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask); 152 ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
154 vswiz = vec_add (plus, (vector unsigned char) vec_splat (srcvec, 0)); 153 vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
155 return (vswiz); 154 return (vswiz);
156 } 155 }
157 156
158 static void Blit_RGB888_RGB565 (SDL_BlitInfo * info); 157 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
159 static void 158 static void
160 Blit_RGB888_RGB565Altivec (SDL_BlitInfo * info) 159 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
161 { 160 {
162 int height = info->d_height; 161 int height = info->d_height;
163 Uint8 *src = (Uint8 *) info->s_pixels; 162 Uint8 *src = (Uint8 *) info->s_pixels;
164 int srcskip = info->s_skip; 163 int srcskip = info->s_skip;
165 Uint8 *dst = (Uint8 *) info->d_pixels; 164 Uint8 *dst = (Uint8 *) info->d_pixels;
166 int dstskip = info->d_skip; 165 int dstskip = info->d_skip;
167 SDL_PixelFormat *srcfmt = info->src; 166 SDL_PixelFormat *srcfmt = info->src;
168 vector unsigned char valpha = vec_splat_u8 (0); 167 vector unsigned char valpha = vec_splat_u8(0);
169 vector unsigned char vpermute = calc_swizzle32 (srcfmt, NULL); 168 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
170 vector unsigned char vgmerge = VECUINT8_LITERAL (0x00, 0x02, 0x00, 0x06, 169 vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
171 0x00, 0x0a, 0x00, 0x0e, 170 0x00, 0x0a, 0x00, 0x0e,
172 0x00, 0x12, 0x00, 0x16, 171 0x00, 0x12, 0x00, 0x16,
173 0x00, 0x1a, 0x00, 0x1e); 172 0x00, 0x1a, 0x00, 0x1e);
174 vector unsigned short v1 = vec_splat_u16 (1); 173 vector unsigned short v1 = vec_splat_u16(1);
175 vector unsigned short v3 = vec_splat_u16 (3); 174 vector unsigned short v3 = vec_splat_u16(3);
176 vector unsigned short v3f = 175 vector unsigned short v3f =
177 VECUINT16_LITERAL (0x003f, 0x003f, 0x003f, 0x003f, 176 VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
178 0x003f, 0x003f, 0x003f, 0x003f); 177 0x003f, 0x003f, 0x003f, 0x003f);
179 vector unsigned short vfc = 178 vector unsigned short vfc =
180 VECUINT16_LITERAL (0x00fc, 0x00fc, 0x00fc, 0x00fc, 179 VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
181 0x00fc, 0x00fc, 0x00fc, 0x00fc); 180 0x00fc, 0x00fc, 0x00fc, 0x00fc);
182 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8 (-7); 181 vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
183 vf800 = vec_sl (vf800, vec_splat_u16 (8)); 182 vf800 = vec_sl(vf800, vec_splat_u16(8));
184 183
185 while (height--) { 184 while (height--) {
186 vector unsigned char valigner; 185 vector unsigned char valigner;
187 vector unsigned char voverflow; 186 vector unsigned char voverflow;
188 vector unsigned char vsrc; 187 vector unsigned char vsrc;
203 dst += 2; \ 202 dst += 2; \
204 src += 4; \ 203 src += 4; \
205 widthvar--; \ 204 widthvar--; \
206 } 205 }
207 206
208 ONE_PIXEL_BLEND (((UNALIGNED_PTR (dst)) && (width)), width); 207 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
209 208
210 /* After all that work, here's the vector part! */ 209 /* After all that work, here's the vector part! */
211 extrawidth = (width % 8); /* trailing unaligned stores */ 210 extrawidth = (width % 8); /* trailing unaligned stores */
212 width -= extrawidth; 211 width -= extrawidth;
213 vsrc = vec_ld (0, src); 212 vsrc = vec_ld(0, src);
214 valigner = VEC_ALIGNER (src); 213 valigner = VEC_ALIGNER(src);
215 214
216 while (width) { 215 while (width) {
217 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; 216 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
218 vector unsigned int vsrc1, vsrc2; 217 vector unsigned int vsrc1, vsrc2;
219 vector unsigned char vdst; 218 vector unsigned char vdst;
220 219
221 voverflow = vec_ld (15, src); 220 voverflow = vec_ld(15, src);
222 vsrc = vec_perm (vsrc, voverflow, valigner); 221 vsrc = vec_perm(vsrc, voverflow, valigner);
223 vsrc1 = (vector unsigned int) vec_perm (vsrc, valpha, vpermute); 222 vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
224 src += 16; 223 src += 16;
225 vsrc = voverflow; 224 vsrc = voverflow;
226 voverflow = vec_ld (15, src); 225 voverflow = vec_ld(15, src);
227 vsrc = vec_perm (vsrc, voverflow, valigner); 226 vsrc = vec_perm(vsrc, voverflow, valigner);
228 vsrc2 = (vector unsigned int) vec_perm (vsrc, valpha, vpermute); 227 vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
229 /* 1555 */ 228 /* 1555 */
230 vpixel = (vector unsigned short) vec_packpx (vsrc1, vsrc2); 229 vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
231 vgpixel = 230 vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
232 (vector unsigned short) vec_perm (vsrc1, vsrc2, vgmerge); 231 vgpixel = vec_and(vgpixel, vfc);
233 vgpixel = vec_and (vgpixel, vfc); 232 vgpixel = vec_sl(vgpixel, v3);
234 vgpixel = vec_sl (vgpixel, v3); 233 vrpixel = vec_sl(vpixel, v1);
235 vrpixel = vec_sl (vpixel, v1); 234 vrpixel = vec_and(vrpixel, vf800);
236 vrpixel = vec_and (vrpixel, vf800); 235 vbpixel = vec_and(vpixel, v3f);
237 vbpixel = vec_and (vpixel, v3f);
238 vdst = 236 vdst =
239 vec_or ((vector unsigned char) vrpixel, 237 vec_or((vector unsigned char) vrpixel,
240 (vector unsigned char) vgpixel); 238 (vector unsigned char) vgpixel);
241 /* 565 */ 239 /* 565 */
242 vdst = vec_or (vdst, (vector unsigned char) vbpixel); 240 vdst = vec_or(vdst, (vector unsigned char) vbpixel);
243 vec_st (vdst, 0, dst); 241 vec_st(vdst, 0, dst);
244 242
245 width -= 8; 243 width -= 8;
246 src += 16; 244 src += 16;
247 dst += 16; 245 dst += 16;
248 vsrc = voverflow; 246 vsrc = voverflow;
249 } 247 }
250 248
251 assert (width == 0); 249 assert(width == 0);
252 250
253 /* do scalar until we can align... */ 251 /* do scalar until we can align... */
254 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 252 ONE_PIXEL_BLEND((extrawidth), extrawidth);
255 #undef ONE_PIXEL_BLEND 253 #undef ONE_PIXEL_BLEND
256 254
257 src += srcskip; /* move to next row, accounting for pitch. */ 255 src += srcskip; /* move to next row, accounting for pitch. */
258 dst += dstskip; 256 dst += dstskip;
259 } 257 }
260 258
261 259
262 } 260 }
263 261
264 static void 262 static void
265 Blit_RGB565_32Altivec (SDL_BlitInfo * info) 263 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
266 { 264 {
267 int height = info->d_height; 265 int height = info->d_height;
268 Uint8 *src = (Uint8 *) info->s_pixels; 266 Uint8 *src = (Uint8 *) info->s_pixels;
269 int srcskip = info->s_skip; 267 int srcskip = info->s_skip;
270 Uint8 *dst = (Uint8 *) info->d_pixels; 268 Uint8 *dst = (Uint8 *) info->d_pixels;
273 SDL_PixelFormat *dstfmt = info->dst; 271 SDL_PixelFormat *dstfmt = info->dst;
274 unsigned alpha; 272 unsigned alpha;
275 vector unsigned char valpha; 273 vector unsigned char valpha;
276 vector unsigned char vpermute; 274 vector unsigned char vpermute;
277 vector unsigned short vf800; 275 vector unsigned short vf800;
278 vector unsigned int v8 = vec_splat_u32 (8); 276 vector unsigned int v8 = vec_splat_u32(8);
279 vector unsigned int v16 = vec_add (v8, v8); 277 vector unsigned int v16 = vec_add(v8, v8);
280 vector unsigned short v2 = vec_splat_u16 (2); 278 vector unsigned short v2 = vec_splat_u16(2);
281 vector unsigned short v3 = vec_splat_u16 (3); 279 vector unsigned short v3 = vec_splat_u16(3);
282 /* 280 /*
283 0x10 - 0x1f is the alpha 281 0x10 - 0x1f is the alpha
284 0x00 - 0x0e evens are the red 282 0x00 - 0x0e evens are the red
285 0x01 - 0x0f odds are zero 283 0x01 - 0x0f odds are zero
286 */ 284 */
287 vector unsigned char vredalpha1 = 285 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
288 VECUINT8_LITERAL (0x10, 0x00, 0x01, 0x01, 286 0x10, 0x02, 0x01, 0x01,
289 0x10, 0x02, 0x01, 0x01, 287 0x10, 0x04, 0x01, 0x01,
290 0x10, 0x04, 0x01, 0x01, 288 0x10, 0x06, 0x01,
291 0x10, 0x06, 0x01, 0x01); 289 0x01);
292 vector unsigned char vredalpha2 = 290 vector unsigned char vredalpha2 =
293 (vector unsigned 291 (vector unsigned
294 char) (vec_add ((vector unsigned int) vredalpha1, vec_sl (v8, v16)) 292 char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
295 ); 293 );
296 /* 294 /*
297 0x00 - 0x0f is ARxx ARxx ARxx ARxx 295 0x00 - 0x0f is ARxx ARxx ARxx ARxx
298 0x11 - 0x0f odds are blue 296 0x11 - 0x0f odds are blue
299 */ 297 */
300 vector unsigned char vblue1 = VECUINT8_LITERAL (0x00, 0x01, 0x02, 0x11, 298 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
301 0x04, 0x05, 0x06, 0x13, 299 0x04, 0x05, 0x06, 0x13,
302 0x08, 0x09, 0x0a, 0x15, 300 0x08, 0x09, 0x0a, 0x15,
303 0x0c, 0x0d, 0x0e, 0x17); 301 0x0c, 0x0d, 0x0e, 0x17);
304 vector unsigned char vblue2 = 302 vector unsigned char vblue2 =
305 (vector unsigned char) (vec_add ((vector unsigned int) vblue1, v8) 303 (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
306 ); 304 );
307 /* 305 /*
308 0x00 - 0x0f is ARxB ARxB ARxB ARxB 306 0x00 - 0x0f is ARxB ARxB ARxB ARxB
309 0x10 - 0x0e evens are green 307 0x10 - 0x0e evens are green
310 */ 308 */
311 vector unsigned char vgreen1 = VECUINT8_LITERAL (0x00, 0x01, 0x10, 0x03, 309 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
312 0x04, 0x05, 0x12, 0x07, 310 0x04, 0x05, 0x12, 0x07,
313 0x08, 0x09, 0x14, 0x0b, 311 0x08, 0x09, 0x14, 0x0b,
314 0x0c, 0x0d, 0x16, 0x0f); 312 0x0c, 0x0d, 0x16, 0x0f);
315 vector unsigned char vgreen2 = 313 vector unsigned char vgreen2 =
316 (vector unsigned 314 (vector unsigned
317 char) (vec_add ((vector unsigned int) vgreen1, vec_sl (v8, v8)) 315 char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
318 ); 316 );
319 317
320 318
321 assert (srcfmt->BytesPerPixel == 2); 319 assert(srcfmt->BytesPerPixel == 2);
322 assert (dstfmt->BytesPerPixel == 4); 320 assert(dstfmt->BytesPerPixel == 4);
323 321
324 vf800 = (vector unsigned short) vec_splat_u8 (-7); 322 vf800 = (vector unsigned short) vec_splat_u8(-7);
325 vf800 = vec_sl (vf800, vec_splat_u16 (8)); 323 vf800 = vec_sl(vf800, vec_splat_u16(8));
326 324
327 if (dstfmt->Amask && srcfmt->alpha) { 325 if (dstfmt->Amask && srcfmt->alpha) {
328 ((unsigned char *) &valpha)[0] = alpha = srcfmt->alpha; 326 ((unsigned char *) &valpha)[0] = alpha = srcfmt->alpha;
329 valpha = vec_splat (valpha, 0); 327 valpha = vec_splat(valpha, 0);
330 } else { 328 } else {
331 alpha = 0; 329 alpha = 0;
332 valpha = vec_splat_u8 (0); 330 valpha = vec_splat_u8(0);
333 } 331 }
334 332
335 vpermute = calc_swizzle32 (NULL, dstfmt); 333 vpermute = calc_swizzle32(NULL, dstfmt);
336 while (height--) { 334 while (height--) {
337 vector unsigned char valigner; 335 vector unsigned char valigner;
338 vector unsigned char voverflow; 336 vector unsigned char voverflow;
339 vector unsigned char vsrc; 337 vector unsigned char vsrc;
340 338
352 ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \ 350 ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
353 src += 2; \ 351 src += 2; \
354 dst += 4; \ 352 dst += 4; \
355 widthvar--; \ 353 widthvar--; \
356 } 354 }
357 ONE_PIXEL_BLEND (((UNALIGNED_PTR (dst)) && (width)), width); 355 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
358 356
359 /* After all that work, here's the vector part! */ 357 /* After all that work, here's the vector part! */
360 extrawidth = (width % 8); /* trailing unaligned stores */ 358 extrawidth = (width % 8); /* trailing unaligned stores */
361 width -= extrawidth; 359 width -= extrawidth;
362 vsrc = vec_ld (0, src); 360 vsrc = vec_ld(0, src);
363 valigner = VEC_ALIGNER (src); 361 valigner = VEC_ALIGNER(src);
364 362
365 while (width) { 363 while (width) {
366 vector unsigned short vR, vG, vB; 364 vector unsigned short vR, vG, vB;
367 vector unsigned char vdst1, vdst2; 365 vector unsigned char vdst1, vdst2;
368 366
369 voverflow = vec_ld (15, src); 367 voverflow = vec_ld(15, src);
370 vsrc = vec_perm (vsrc, voverflow, valigner); 368 vsrc = vec_perm(vsrc, voverflow, valigner);
371 369
372 vR = vec_and ((vector unsigned short) vsrc, vf800); 370 vR = vec_and((vector unsigned short) vsrc, vf800);
373 vB = vec_sl ((vector unsigned short) vsrc, v3); 371 vB = vec_sl((vector unsigned short) vsrc, v3);
374 vG = vec_sl (vB, v2); 372 vG = vec_sl(vB, v2);
375 373
376 vdst1 = 374 vdst1 =
377 (vector unsigned char) vec_perm ((vector unsigned char) vR, 375 (vector unsigned char) vec_perm((vector unsigned char) vR,
378 valpha, vredalpha1); 376 valpha, vredalpha1);
379 vdst1 = vec_perm (vdst1, (vector unsigned char) vB, vblue1); 377 vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
380 vdst1 = vec_perm (vdst1, (vector unsigned char) vG, vgreen1); 378 vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
381 vdst1 = vec_perm (vdst1, valpha, vpermute); 379 vdst1 = vec_perm(vdst1, valpha, vpermute);
382 vec_st (vdst1, 0, dst); 380 vec_st(vdst1, 0, dst);
383 381
384 vdst2 = 382 vdst2 =
385 (vector unsigned char) vec_perm ((vector unsigned char) vR, 383 (vector unsigned char) vec_perm((vector unsigned char) vR,
386 valpha, vredalpha2); 384 valpha, vredalpha2);
387 vdst2 = vec_perm (vdst2, (vector unsigned char) vB, vblue2); 385 vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
388 vdst2 = vec_perm (vdst2, (vector unsigned char) vG, vgreen2); 386 vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
389 vdst2 = vec_perm (vdst2, valpha, vpermute); 387 vdst2 = vec_perm(vdst2, valpha, vpermute);
390 vec_st (vdst2, 16, dst); 388 vec_st(vdst2, 16, dst);
391 389
392 width -= 8; 390 width -= 8;
393 dst += 32; 391 dst += 32;
394 src += 16; 392 src += 16;
395 vsrc = voverflow; 393 vsrc = voverflow;
396 } 394 }
397 395
398 assert (width == 0); 396 assert(width == 0);
399 397
400 398
401 /* do scalar until we can align... */ 399 /* do scalar until we can align... */
402 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 400 ONE_PIXEL_BLEND((extrawidth), extrawidth);
403 #undef ONE_PIXEL_BLEND 401 #undef ONE_PIXEL_BLEND
404 402
405 src += srcskip; /* move to next row, accounting for pitch. */ 403 src += srcskip; /* move to next row, accounting for pitch. */
406 dst += dstskip; 404 dst += dstskip;
407 } 405 }
408 406
409 } 407 }
410 408
411 409
412 static void 410 static void
413 Blit_RGB555_32Altivec (SDL_BlitInfo * info) 411 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
414 { 412 {
415 int height = info->d_height; 413 int height = info->d_height;
416 Uint8 *src = (Uint8 *) info->s_pixels; 414 Uint8 *src = (Uint8 *) info->s_pixels;
417 int srcskip = info->s_skip; 415 int srcskip = info->s_skip;
418 Uint8 *dst = (Uint8 *) info->d_pixels; 416 Uint8 *dst = (Uint8 *) info->d_pixels;
421 SDL_PixelFormat *dstfmt = info->dst; 419 SDL_PixelFormat *dstfmt = info->dst;
422 unsigned alpha; 420 unsigned alpha;
423 vector unsigned char valpha; 421 vector unsigned char valpha;
424 vector unsigned char vpermute; 422 vector unsigned char vpermute;
425 vector unsigned short vf800; 423 vector unsigned short vf800;
426 vector unsigned int v8 = vec_splat_u32 (8); 424 vector unsigned int v8 = vec_splat_u32(8);
427 vector unsigned int v16 = vec_add (v8, v8); 425 vector unsigned int v16 = vec_add(v8, v8);
428 vector unsigned short v1 = vec_splat_u16 (1); 426 vector unsigned short v1 = vec_splat_u16(1);
429 vector unsigned short v3 = vec_splat_u16 (3); 427 vector unsigned short v3 = vec_splat_u16(3);
430 /* 428 /*
431 0x10 - 0x1f is the alpha 429 0x10 - 0x1f is the alpha
432 0x00 - 0x0e evens are the red 430 0x00 - 0x0e evens are the red
433 0x01 - 0x0f odds are zero 431 0x01 - 0x0f odds are zero
434 */ 432 */
435 vector unsigned char vredalpha1 = 433 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
436 VECUINT8_LITERAL (0x10, 0x00, 0x01, 0x01, 434 0x10, 0x02, 0x01, 0x01,
437 0x10, 0x02, 0x01, 0x01, 435 0x10, 0x04, 0x01, 0x01,
438 0x10, 0x04, 0x01, 0x01, 436 0x10, 0x06, 0x01,
439 0x10, 0x06, 0x01, 0x01); 437 0x01);
440 vector unsigned char vredalpha2 = 438 vector unsigned char vredalpha2 =
441 (vector unsigned 439 (vector unsigned
442 char) (vec_add ((vector unsigned int) vredalpha1, vec_sl (v8, v16)) 440 char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
443 ); 441 );
444 /* 442 /*
445 0x00 - 0x0f is ARxx ARxx ARxx ARxx 443 0x00 - 0x0f is ARxx ARxx ARxx ARxx
446 0x11 - 0x0f odds are blue 444 0x11 - 0x0f odds are blue
447 */ 445 */
448 vector unsigned char vblue1 = VECUINT8_LITERAL (0x00, 0x01, 0x02, 0x11, 446 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
449 0x04, 0x05, 0x06, 0x13, 447 0x04, 0x05, 0x06, 0x13,
450 0x08, 0x09, 0x0a, 0x15, 448 0x08, 0x09, 0x0a, 0x15,
451 0x0c, 0x0d, 0x0e, 0x17); 449 0x0c, 0x0d, 0x0e, 0x17);
452 vector unsigned char vblue2 = 450 vector unsigned char vblue2 =
453 (vector unsigned char) (vec_add ((vector unsigned int) vblue1, v8) 451 (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
454 ); 452 );
455 /* 453 /*
456 0x00 - 0x0f is ARxB ARxB ARxB ARxB 454 0x00 - 0x0f is ARxB ARxB ARxB ARxB
457 0x10 - 0x0e evens are green 455 0x10 - 0x0e evens are green
458 */ 456 */
459 vector unsigned char vgreen1 = VECUINT8_LITERAL (0x00, 0x01, 0x10, 0x03, 457 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
460 0x04, 0x05, 0x12, 0x07, 458 0x04, 0x05, 0x12, 0x07,
461 0x08, 0x09, 0x14, 0x0b, 459 0x08, 0x09, 0x14, 0x0b,
462 0x0c, 0x0d, 0x16, 0x0f); 460 0x0c, 0x0d, 0x16, 0x0f);
463 vector unsigned char vgreen2 = 461 vector unsigned char vgreen2 =
464 (vector unsigned 462 (vector unsigned
465 char) (vec_add ((vector unsigned int) vgreen1, vec_sl (v8, v8)) 463 char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
466 ); 464 );
467 465
468 466
469 assert (srcfmt->BytesPerPixel == 2); 467 assert(srcfmt->BytesPerPixel == 2);
470 assert (dstfmt->BytesPerPixel == 4); 468 assert(dstfmt->BytesPerPixel == 4);
471 469
472 vf800 = (vector unsigned short) vec_splat_u8 (-7); 470 vf800 = (vector unsigned short) vec_splat_u8(-7);
473 vf800 = vec_sl (vf800, vec_splat_u16 (8)); 471 vf800 = vec_sl(vf800, vec_splat_u16(8));
474 472
475 if (dstfmt->Amask && srcfmt->alpha) { 473 if (dstfmt->Amask && srcfmt->alpha) {
476 ((unsigned char *) &valpha)[0] = alpha = srcfmt->alpha; 474 ((unsigned char *) &valpha)[0] = alpha = srcfmt->alpha;
477 valpha = vec_splat (valpha, 0); 475 valpha = vec_splat(valpha, 0);
478 } else { 476 } else {
479 alpha = 0; 477 alpha = 0;
480 valpha = vec_splat_u8 (0); 478 valpha = vec_splat_u8(0);
481 } 479 }
482 480
483 vpermute = calc_swizzle32 (NULL, dstfmt); 481 vpermute = calc_swizzle32(NULL, dstfmt);
484 while (height--) { 482 while (height--) {
485 vector unsigned char valigner; 483 vector unsigned char valigner;
486 vector unsigned char voverflow; 484 vector unsigned char voverflow;
487 vector unsigned char vsrc; 485 vector unsigned char vsrc;
488 486
500 ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \ 498 ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
501 src += 2; \ 499 src += 2; \
502 dst += 4; \ 500 dst += 4; \
503 widthvar--; \ 501 widthvar--; \
504 } 502 }
505 ONE_PIXEL_BLEND (((UNALIGNED_PTR (dst)) && (width)), width); 503 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
506 504
507 /* After all that work, here's the vector part! */ 505 /* After all that work, here's the vector part! */
508 extrawidth = (width % 8); /* trailing unaligned stores */ 506 extrawidth = (width % 8); /* trailing unaligned stores */
509 width -= extrawidth; 507 width -= extrawidth;
510 vsrc = vec_ld (0, src); 508 vsrc = vec_ld(0, src);
511 valigner = VEC_ALIGNER (src); 509 valigner = VEC_ALIGNER(src);
512 510
513 while (width) { 511 while (width) {
514 vector unsigned short vR, vG, vB; 512 vector unsigned short vR, vG, vB;
515 vector unsigned char vdst1, vdst2; 513 vector unsigned char vdst1, vdst2;
516 514
517 voverflow = vec_ld (15, src); 515 voverflow = vec_ld(15, src);
518 vsrc = vec_perm (vsrc, voverflow, valigner); 516 vsrc = vec_perm(vsrc, voverflow, valigner);
519 517
520 vR = vec_and (vec_sl ((vector unsigned short) vsrc, v1), vf800); 518 vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
521 vB = vec_sl ((vector unsigned short) vsrc, v3); 519 vB = vec_sl((vector unsigned short) vsrc, v3);
522 vG = vec_sl (vB, v3); 520 vG = vec_sl(vB, v3);
523 521
524 vdst1 = 522 vdst1 =
525 (vector unsigned char) vec_perm ((vector unsigned char) vR, 523 (vector unsigned char) vec_perm((vector unsigned char) vR,
526 valpha, vredalpha1); 524 valpha, vredalpha1);
527 vdst1 = vec_perm (vdst1, (vector unsigned char) vB, vblue1); 525 vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
528 vdst1 = vec_perm (vdst1, (vector unsigned char) vG, vgreen1); 526 vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
529 vdst1 = vec_perm (vdst1, valpha, vpermute); 527 vdst1 = vec_perm(vdst1, valpha, vpermute);
530 vec_st (vdst1, 0, dst); 528 vec_st(vdst1, 0, dst);
531 529
532 vdst2 = 530 vdst2 =
533 (vector unsigned char) vec_perm ((vector unsigned char) vR, 531 (vector unsigned char) vec_perm((vector unsigned char) vR,
534 valpha, vredalpha2); 532 valpha, vredalpha2);
535 vdst2 = vec_perm (vdst2, (vector unsigned char) vB, vblue2); 533 vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
536 vdst2 = vec_perm (vdst2, (vector unsigned char) vG, vgreen2); 534 vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
537 vdst2 = vec_perm (vdst2, valpha, vpermute); 535 vdst2 = vec_perm(vdst2, valpha, vpermute);
538 vec_st (vdst2, 16, dst); 536 vec_st(vdst2, 16, dst);
539 537
540 width -= 8; 538 width -= 8;
541 dst += 32; 539 dst += 32;
542 src += 16; 540 src += 16;
543 vsrc = voverflow; 541 vsrc = voverflow;
544 } 542 }
545 543
546 assert (width == 0); 544 assert(width == 0);
547 545
548 546
549 /* do scalar until we can align... */ 547 /* do scalar until we can align... */
550 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 548 ONE_PIXEL_BLEND((extrawidth), extrawidth);
551 #undef ONE_PIXEL_BLEND 549 #undef ONE_PIXEL_BLEND
552 550
553 src += srcskip; /* move to next row, accounting for pitch. */ 551 src += srcskip; /* move to next row, accounting for pitch. */
554 dst += dstskip; 552 dst += dstskip;
555 } 553 }
556 554
557 } 555 }
558 556
559 static void BlitNtoNKey (SDL_BlitInfo * info); 557 static void BlitNtoNKey(SDL_BlitInfo * info);
560 static void BlitNtoNKeyCopyAlpha (SDL_BlitInfo * info); 558 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
561 static void 559 static void
562 Blit32to32KeyAltivec (SDL_BlitInfo * info) 560 Blit32to32KeyAltivec(SDL_BlitInfo * info)
563 { 561 {
564 int height = info->d_height; 562 int height = info->d_height;
565 Uint32 *srcp = (Uint32 *) info->s_pixels; 563 Uint32 *srcp = (Uint32 *) info->s_pixels;
566 int srcskip = info->s_skip; 564 int srcskip = info->s_skip;
567 Uint32 *dstp = (Uint32 *) info->d_pixels; 565 Uint32 *dstp = (Uint32 *) info->d_pixels;
577 vector unsigned int valpha; 575 vector unsigned int valpha;
578 vector unsigned char vpermute; 576 vector unsigned char vpermute;
579 vector unsigned char vzero; 577 vector unsigned char vzero;
580 vector unsigned int vckey; 578 vector unsigned int vckey;
581 vector unsigned int vrgbmask; 579 vector unsigned int vrgbmask;
582 vpermute = calc_swizzle32 (srcfmt, dstfmt); 580 vpermute = calc_swizzle32(srcfmt, dstfmt);
583 if (info->d_width < 16) { 581 if (info->d_width < 16) {
584 if (copy_alpha) { 582 if (copy_alpha) {
585 BlitNtoNKeyCopyAlpha (info); 583 BlitNtoNKeyCopyAlpha(info);
586 } else { 584 } else {
587 BlitNtoNKey (info); 585 BlitNtoNKey(info);
588 } 586 }
589 return; 587 return;
590 } 588 }
591 vzero = vec_splat_u8 (0); 589 vzero = vec_splat_u8(0);
592 if (alpha) { 590 if (alpha) {
593 ((unsigned char *) &valpha)[0] = (unsigned char) alpha; 591 ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
594 valpha = 592 valpha =
595 (vector unsigned int) vec_splat ((vector unsigned char) valpha, 593 (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
596 0);
597 } else { 594 } else {
598 valpha = (vector unsigned int) vzero; 595 valpha = (vector unsigned int) vzero;
599 } 596 }
600 ckey &= rgbmask; 597 ckey &= rgbmask;
601 ((unsigned int *) (char *) &vckey)[0] = ckey; 598 ((unsigned int *) (char *) &vckey)[0] = ckey;
602 vckey = vec_splat (vckey, 0); 599 vckey = vec_splat(vckey, 0);
603 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask; 600 ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
604 vrgbmask = vec_splat (vrgbmask, 0); 601 vrgbmask = vec_splat(vrgbmask, 0);
605 602
606 while (height--) { 603 while (height--) {
607 #define ONE_PIXEL_BLEND(condition, widthvar) \ 604 #define ONE_PIXEL_BLEND(condition, widthvar) \
608 if (copy_alpha) { \ 605 if (copy_alpha) { \
609 while (condition) { \ 606 while (condition) { \
633 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \ 630 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
634 widthvar--; \ 631 widthvar--; \
635 } \ 632 } \
636 } 633 }
637 int width = info->d_width; 634 int width = info->d_width;
638 ONE_PIXEL_BLEND ((UNALIGNED_PTR (dstp)) && (width), width); 635 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
639 assert (width > 0); 636 assert(width > 0);
640 if (width > 0) { 637 if (width > 0) {
641 int extrawidth = (width % 4); 638 int extrawidth = (width % 4);
642 vector unsigned char valigner = VEC_ALIGNER (srcp); 639 vector unsigned char valigner = VEC_ALIGNER(srcp);
643 vector unsigned int vs = vec_ld (0, srcp); 640 vector unsigned int vs = vec_ld(0, srcp);
644 width -= extrawidth; 641 width -= extrawidth;
645 assert (width >= 4); 642 assert(width >= 4);
646 while (width) { 643 while (width) {
647 vector unsigned char vsel; 644 vector unsigned char vsel;
648 vector unsigned int vd; 645 vector unsigned int vd;
649 vector unsigned int voverflow = vec_ld (15, srcp); 646 vector unsigned int voverflow = vec_ld(15, srcp);
650 /* load the source vec */ 647 /* load the source vec */
651 vs = vec_perm (vs, voverflow, valigner); 648 vs = vec_perm(vs, voverflow, valigner);
652 /* vsel is set for items that match the key */ 649 /* vsel is set for items that match the key */
653 vsel = (vector unsigned char) vec_and (vs, vrgbmask); 650 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
654 vsel = (vector unsigned char) vec_cmpeq (vs, vckey); 651 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
655 /* permute the src vec to the dest format */ 652 /* permute the src vec to the dest format */
656 vs = vec_perm (vs, valpha, vpermute); 653 vs = vec_perm(vs, valpha, vpermute);
657 /* load the destination vec */ 654 /* load the destination vec */
658 vd = vec_ld (0, dstp); 655 vd = vec_ld(0, dstp);
659 /* select the source and dest into vs */ 656 /* select the source and dest into vs */
660 vd = (vector unsigned int) vec_sel ((vector unsigned char) vs, 657 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
661 (vector unsigned char) vd, 658 (vector unsigned char) vd,
662 vsel); 659 vsel);
663 660
664 vec_st (vd, 0, dstp); 661 vec_st(vd, 0, dstp);
665 srcp += 4; 662 srcp += 4;
666 width -= 4; 663 width -= 4;
667 dstp += 4; 664 dstp += 4;
668 vs = voverflow; 665 vs = voverflow;
669 } 666 }
670 ONE_PIXEL_BLEND ((extrawidth), extrawidth); 667 ONE_PIXEL_BLEND((extrawidth), extrawidth);
671 #undef ONE_PIXEL_BLEND 668 #undef ONE_PIXEL_BLEND
672 srcp += srcskip >> 2; 669 srcp += srcskip >> 2;
673 dstp += dstskip >> 2; 670 dstp += dstskip >> 2;
674 } 671 }
675 } 672 }
676 } 673 }
677 674
678 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */ 675 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
679 /* Use this on a G5 */ 676 /* Use this on a G5 */
680 static void 677 static void
681 ConvertAltivec32to32_noprefetch (SDL_BlitInfo * info) 678 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
682 { 679 {
683 int height = info->d_height; 680 int height = info->d_height;
684 Uint32 *src = (Uint32 *) info->s_pixels; 681 Uint32 *src = (Uint32 *) info->s_pixels;
685 int srcskip = info->s_skip; 682 int srcskip = info->s_skip;
686 Uint32 *dst = (Uint32 *) info->d_pixels; 683 Uint32 *dst = (Uint32 *) info->d_pixels;
687 int dstskip = info->d_skip; 684 int dstskip = info->d_skip;
688 SDL_PixelFormat *srcfmt = info->src; 685 SDL_PixelFormat *srcfmt = info->src;
689 SDL_PixelFormat *dstfmt = info->dst; 686 SDL_PixelFormat *dstfmt = info->dst;
690 vector unsigned int vzero = vec_splat_u32 (0); 687 vector unsigned int vzero = vec_splat_u32(0);
691 vector unsigned char vpermute = calc_swizzle32 (srcfmt, dstfmt); 688 vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
692 if (dstfmt->Amask && !srcfmt->Amask) { 689 if (dstfmt->Amask && !srcfmt->Amask) {
693 if (srcfmt->alpha) { 690 if (srcfmt->alpha) {
694 vector unsigned char valpha; 691 vector unsigned char valpha;
695 ((unsigned char *) &valpha)[0] = srcfmt->alpha; 692 ((unsigned char *) &valpha)[0] = srcfmt->alpha;
696 vzero = (vector unsigned int) vec_splat (valpha, 0); 693 vzero = (vector unsigned int) vec_splat(valpha, 0);
697 } 694 }
698 } 695 }
699 696
700 assert (srcfmt->BytesPerPixel == 4); 697 assert(srcfmt->BytesPerPixel == 4);
701 assert (dstfmt->BytesPerPixel == 4); 698 assert(dstfmt->BytesPerPixel == 4);
702 699
703 while (height--) { 700 while (height--) {
704 vector unsigned char valigner; 701 vector unsigned char valigner;
705 vector unsigned int vbits; 702 vector unsigned int vbits;
706 vector unsigned int voverflow; 703 vector unsigned int voverflow;
709 706
710 int width = info->d_width; 707 int width = info->d_width;
711 int extrawidth; 708 int extrawidth;
712 709
713 /* do scalar until we can align... */ 710 /* do scalar until we can align... */
714 while ((UNALIGNED_PTR (dst)) && (width)) { 711 while ((UNALIGNED_PTR(dst)) && (width)) {
715 bits = *(src++); 712 bits = *(src++);
716 RGBA_FROM_8888 (bits, srcfmt, r, g, b, a); 713 RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
717 *(dst++) = MAKE8888 (dstfmt, r, g, b, a); 714 *(dst++) = MAKE8888(dstfmt, r, g, b, a);
718 width--; 715 width--;
719 } 716 }
720 717
721 /* After all that work, here's the vector part! */ 718 /* After all that work, here's the vector part! */
722 extrawidth = (width % 4); 719 extrawidth = (width % 4);
723 width -= extrawidth; 720 width -= extrawidth;
724 valigner = VEC_ALIGNER (src); 721 valigner = VEC_ALIGNER(src);
725 vbits = vec_ld (0, src); 722 vbits = vec_ld(0, src);
726 723
727 while (width) { 724 while (width) {
728 voverflow = vec_ld (15, src); 725 voverflow = vec_ld(15, src);
729 src += 4; 726 src += 4;
730 width -= 4; 727 width -= 4;
731 vbits = vec_perm (vbits, voverflow, valigner); /* src is ready. */ 728 vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
732 vbits = vec_perm (vbits, vzero, vpermute); /* swizzle it. */ 729 vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
733 vec_st (vbits, 0, dst); /* store it back out. */ 730 vec_st(vbits, 0, dst); /* store it back out. */
734 dst += 4; 731 dst += 4;
735 vbits = voverflow; 732 vbits = voverflow;
736 } 733 }
737 734
738 assert (width == 0); 735 assert(width == 0);
739 736
740 /* cover pixels at the end of the row that didn't fit in 16 bytes. */ 737 /* cover pixels at the end of the row that didn't fit in 16 bytes. */
741 while (extrawidth) { 738 while (extrawidth) {
742 bits = *(src++); /* max 7 pixels, don't bother with prefetch. */ 739 bits = *(src++); /* max 7 pixels, don't bother with prefetch. */
743 RGBA_FROM_8888 (bits, srcfmt, r, g, b, a); 740 RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
744 *(dst++) = MAKE8888 (dstfmt, r, g, b, a); 741 *(dst++) = MAKE8888(dstfmt, r, g, b, a);
745 extrawidth--; 742 extrawidth--;
746 } 743 }
747 744
748 src += srcskip >> 2; /* move to next row, accounting for pitch. */ 745 src += srcskip >> 2; /* move to next row, accounting for pitch. */
749 dst += dstskip >> 2; 746 dst += dstskip >> 2;
752 } 749 }
753 750
754 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */ 751 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
755 /* Use this on a G4 */ 752 /* Use this on a G4 */
756 static void 753 static void
757 ConvertAltivec32to32_prefetch (SDL_BlitInfo * info) 754 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
758 { 755 {
759 const int scalar_dst_lead = sizeof (Uint32) * 4; 756 const int scalar_dst_lead = sizeof(Uint32) * 4;
760 const int vector_dst_lead = sizeof (Uint32) * 16; 757 const int vector_dst_lead = sizeof(Uint32) * 16;
761 758
762 int height = info->d_height; 759 int height = info->d_height;
763 Uint32 *src = (Uint32 *) info->s_pixels; 760 Uint32 *src = (Uint32 *) info->s_pixels;
764 int srcskip = info->s_skip; 761 int srcskip = info->s_skip;
765 Uint32 *dst = (Uint32 *) info->d_pixels; 762 Uint32 *dst = (Uint32 *) info->d_pixels;
766 int dstskip = info->d_skip; 763 int dstskip = info->d_skip;
767 SDL_PixelFormat *srcfmt = info->src; 764 SDL_PixelFormat *srcfmt = info->src;
768 SDL_PixelFormat *dstfmt = info->dst; 765 SDL_PixelFormat *dstfmt = info->dst;
769 vector unsigned int vzero = vec_splat_u32 (0); 766 vector unsigned int vzero = vec_splat_u32(0);
770 vector unsigned char vpermute = calc_swizzle32 (srcfmt, dstfmt); 767 vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
771 if (dstfmt->Amask && !srcfmt->Amask) { 768 if (dstfmt->Amask && !srcfmt->Amask) {
772 if (srcfmt->alpha) { 769 if (srcfmt->alpha) {
773 vector unsigned char valpha; 770 vector unsigned char valpha;
774 ((unsigned char *) &valpha)[0] = srcfmt->alpha; 771 ((unsigned char *) &valpha)[0] = srcfmt->alpha;
775 vzero = (vector unsigned int) vec_splat (valpha, 0); 772 vzero = (vector unsigned int) vec_splat(valpha, 0);
776 } 773 }
777 } 774 }
778 775
779 assert (srcfmt->BytesPerPixel == 4); 776 assert(srcfmt->BytesPerPixel == 4);
780 assert (dstfmt->BytesPerPixel == 4); 777 assert(dstfmt->BytesPerPixel == 4);
781 778
782 while (height--) { 779 while (height--) {
783 vector unsigned char valigner; 780 vector unsigned char valigner;
784 vector unsigned int vbits; 781 vector unsigned int vbits;
785 vector unsigned int voverflow; 782 vector unsigned int voverflow;
788 785
789 int width = info->d_width; 786 int width = info->d_width;
790 int extrawidth; 787 int extrawidth;
791 788
792 /* do scalar until we can align... */ 789 /* do scalar until we can align... */
793 while ((UNALIGNED_PTR (dst)) && (width)) { 790 while ((UNALIGNED_PTR(dst)) && (width)) {
794 vec_dstt (src + scalar_dst_lead, DST_CTRL (2, 32, 1024), 791 vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
795 DST_CHAN_SRC); 792 DST_CHAN_SRC);
796 vec_dstst (dst + scalar_dst_lead, DST_CTRL (2, 32, 1024), 793 vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
797 DST_CHAN_DEST); 794 DST_CHAN_DEST);
798 bits = *(src++); 795 bits = *(src++);
799 RGBA_FROM_8888 (bits, srcfmt, r, g, b, a); 796 RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
800 *(dst++) = MAKE8888 (dstfmt, r, g, b, a); 797 *(dst++) = MAKE8888(dstfmt, r, g, b, a);
801 width--; 798 width--;
802 } 799 }
803 800
804 /* After all that work, here's the vector part! */ 801 /* After all that work, here's the vector part! */
805 extrawidth = (width % 4); 802 extrawidth = (width % 4);
806 width -= extrawidth; 803 width -= extrawidth;
807 valigner = VEC_ALIGNER (src); 804 valigner = VEC_ALIGNER(src);
808 vbits = vec_ld (0, src); 805 vbits = vec_ld(0, src);
809 806
810 while (width) { 807 while (width) {
811 vec_dstt (src + vector_dst_lead, DST_CTRL (2, 32, 1024), 808 vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
812 DST_CHAN_SRC); 809 DST_CHAN_SRC);
813 vec_dstst (dst + vector_dst_lead, DST_CTRL (2, 32, 1024), 810 vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
814 DST_CHAN_DEST); 811 DST_CHAN_DEST);
815 voverflow = vec_ld (15, src); 812 voverflow = vec_ld(15, src);
816 src += 4; 813 src += 4;
817 width -= 4; 814 width -= 4;
818 vbits = vec_perm (vbits, voverflow, valigner); /* src is ready. */ 815 vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
819 vbits = vec_perm (vbits, vzero, vpermute); /* swizzle it. */ 816 vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
820 vec_st (vbits, 0, dst); /* store it back out. */ 817 vec_st(vbits, 0, dst); /* store it back out. */
821 dst += 4; 818 dst += 4;
822 vbits = voverflow; 819 vbits = voverflow;
823 } 820 }
824 821
825 assert (width == 0); 822 assert(width == 0);
826 823
827 /* cover pixels at the end of the row that didn't fit in 16 bytes. */ 824 /* cover pixels at the end of the row that didn't fit in 16 bytes. */
828 while (extrawidth) { 825 while (extrawidth) {
829 bits = *(src++); /* max 7 pixels, don't bother with prefetch. */ 826 bits = *(src++); /* max 7 pixels, don't bother with prefetch. */
830 RGBA_FROM_8888 (bits, srcfmt, r, g, b, a); 827 RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
831 *(dst++) = MAKE8888 (dstfmt, r, g, b, a); 828 *(dst++) = MAKE8888(dstfmt, r, g, b, a);
832 extrawidth--; 829 extrawidth--;
833 } 830 }
834 831
835 src += srcskip >> 2; /* move to next row, accounting for pitch. */ 832 src += srcskip >> 2; /* move to next row, accounting for pitch. */
836 dst += dstskip >> 2; 833 dst += dstskip >> 2;
837 } 834 }
838 835
839 vec_dss (DST_CHAN_SRC); 836 vec_dss(DST_CHAN_SRC);
840 vec_dss (DST_CHAN_DEST); 837 vec_dss(DST_CHAN_DEST);
841 } 838 }
842 839
843 static Uint32 840 static Uint32
844 GetBlitFeatures (void) 841 GetBlitFeatures(void)
845 { 842 {
846 static Uint32 features = 0xffffffff; 843 static Uint32 features = 0xffffffff;
847 if (features == 0xffffffff) { 844 if (features == 0xffffffff) {
848 /* Provide an override for testing .. */ 845 /* Provide an override for testing .. */
849 char *override = SDL_getenv ("SDL_ALTIVEC_BLIT_FEATURES"); 846 char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
850 if (override) { 847 if (override) {
851 features = 0; 848 features = 0;
852 SDL_sscanf (override, "%u", &features); 849 SDL_sscanf(override, "%u", &features);
853 } else { 850 } else {
854 features = (0 851 features = (0
855 /* Feature 1 is has-MMX */ 852 /* Feature 1 is has-MMX */
856 | ((SDL_HasMMX ())? 1 : 0) 853 | ((SDL_HasMMX())? 1 : 0)
857 /* Feature 2 is has-AltiVec */ 854 /* Feature 2 is has-AltiVec */
858 | ((SDL_HasAltiVec ())? 2 : 0) 855 | ((SDL_HasAltiVec())? 2 : 0)
859 /* Feature 4 is dont-use-prefetch */ 856 /* Feature 4 is dont-use-prefetch */
860 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */ 857 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
861 | ((GetL3CacheSize () == 0) ? 4 : 0) 858 | ((GetL3CacheSize() == 0) ? 4 : 0)
862 ); 859 );
863 } 860 }
864 } 861 }
865 return features; 862 return features;
866 } 863 }
900 dst = (Uint8)((((src)&0x00E00000)>>16)| \ 897 dst = (Uint8)((((src)&0x00E00000)>>16)| \
901 (((src)&0x0000E000)>>11)| \ 898 (((src)&0x0000E000)>>11)| \
902 (((src)&0x000000C0)>>6)); \ 899 (((src)&0x000000C0)>>6)); \
903 } 900 }
904 static void 901 static void
905 Blit_RGB888_index8 (SDL_BlitInfo * info) 902 Blit_RGB888_index8(SDL_BlitInfo * info)
906 { 903 {
907 #ifndef USE_DUFFS_LOOP 904 #ifndef USE_DUFFS_LOOP
908 int c; 905 int c;
909 #endif 906 #endif
910 int width, height; 907 int width, height;
932 /* *INDENT-ON* */ 929 /* *INDENT-ON* */
933 #else 930 #else
934 for (c = width / 4; c; --c) { 931 for (c = width / 4; c; --c) {
935 /* Pack RGB into 8bit pixel */ 932 /* Pack RGB into 8bit pixel */
936 ++src; 933 ++src;
937 RGB888_RGB332 (*dst++, *src); 934 RGB888_RGB332(*dst++, *src);
938 ++src; 935 ++src;
939 RGB888_RGB332 (*dst++, *src); 936 RGB888_RGB332(*dst++, *src);
940 ++src; 937 ++src;
941 RGB888_RGB332 (*dst++, *src); 938 RGB888_RGB332(*dst++, *src);
942 ++src; 939 ++src;
943 } 940 }
944 switch (width & 3) { 941 switch (width & 3) {
945 case 3: 942 case 3:
946 RGB888_RGB332 (*dst++, *src); 943 RGB888_RGB332(*dst++, *src);
947 ++src; 944 ++src;
948 case 2: 945 case 2:
949 RGB888_RGB332 (*dst++, *src); 946 RGB888_RGB332(*dst++, *src);
950 ++src; 947 ++src;
951 case 1: 948 case 1:
952 RGB888_RGB332 (*dst++, *src); 949 RGB888_RGB332(*dst++, *src);
953 ++src; 950 ++src;
954 } 951 }
955 #endif /* USE_DUFFS_LOOP */ 952 #endif /* USE_DUFFS_LOOP */
956 src += srcskip; 953 src += srcskip;
957 dst += dstskip; 954 dst += dstskip;
969 , width); 966 , width);
970 /* *INDENT-ON* */ 967 /* *INDENT-ON* */
971 #else 968 #else
972 for (c = width / 4; c; --c) { 969 for (c = width / 4; c; --c) {
973 /* Pack RGB into 8bit pixel */ 970 /* Pack RGB into 8bit pixel */
974 RGB888_RGB332 (Pixel, *src); 971 RGB888_RGB332(Pixel, *src);
975 *dst++ = map[Pixel]; 972 *dst++ = map[Pixel];
976 ++src; 973 ++src;
977 RGB888_RGB332 (Pixel, *src); 974 RGB888_RGB332(Pixel, *src);
978 *dst++ = map[Pixel]; 975 *dst++ = map[Pixel];
979 ++src; 976 ++src;
980 RGB888_RGB332 (Pixel, *src); 977 RGB888_RGB332(Pixel, *src);
981 *dst++ = map[Pixel]; 978 *dst++ = map[Pixel];
982 ++src; 979 ++src;
983 RGB888_RGB332 (Pixel, *src); 980 RGB888_RGB332(Pixel, *src);
984 *dst++ = map[Pixel]; 981 *dst++ = map[Pixel];
985 ++src; 982 ++src;
986 } 983 }
987 switch (width & 3) { 984 switch (width & 3) {
988 case 3: 985 case 3:
989 RGB888_RGB332 (Pixel, *src); 986 RGB888_RGB332(Pixel, *src);
990 *dst++ = map[Pixel]; 987 *dst++ = map[Pixel];
991 ++src; 988 ++src;
992 case 2: 989 case 2:
993 RGB888_RGB332 (Pixel, *src); 990 RGB888_RGB332(Pixel, *src);
994 *dst++ = map[Pixel]; 991 *dst++ = map[Pixel];
995 ++src; 992 ++src;
996 case 1: 993 case 1:
997 RGB888_RGB332 (Pixel, *src); 994 RGB888_RGB332(Pixel, *src);
998 *dst++ = map[Pixel]; 995 *dst++ = map[Pixel];
999 ++src; 996 ++src;
1000 } 997 }
1001 #endif /* USE_DUFFS_LOOP */ 998 #endif /* USE_DUFFS_LOOP */
1002 src += srcskip; 999 src += srcskip;
1018 (((src[LO])&0x00F80000)>>9)| \ 1015 (((src[LO])&0x00F80000)>>9)| \
1019 (((src[LO])&0x0000F800)>>6)| \ 1016 (((src[LO])&0x0000F800)>>6)| \
1020 (((src[LO])&0x000000F8)>>3); \ 1017 (((src[LO])&0x000000F8)>>3); \
1021 } 1018 }
1022 static void 1019 static void
1023 Blit_RGB888_RGB555 (SDL_BlitInfo * info) 1020 Blit_RGB888_RGB555(SDL_BlitInfo * info)
1024 { 1021 {
1025 #ifndef USE_DUFFS_LOOP 1022 #ifndef USE_DUFFS_LOOP
1026 int c; 1023 int c;
1027 #endif 1024 #endif
1028 int width, height; 1025 int width, height;
1059 } 1056 }
1060 --width; 1057 --width;
1061 1058
1062 while (height--) { 1059 while (height--) {
1063 /* Perform copy alignment */ 1060 /* Perform copy alignment */
1064 RGB888_RGB555 (dst, src); 1061 RGB888_RGB555(dst, src);
1065 ++src; 1062 ++src;
1066 ++dst; 1063 ++dst;
1067 1064
1068 /* Copy in 4 pixel chunks */ 1065 /* Copy in 4 pixel chunks */
1069 for (c = width / 4; c; --c) { 1066 for (c = width / 4; c; --c) {
1070 RGB888_RGB555_TWO (dst, src); 1067 RGB888_RGB555_TWO(dst, src);
1071 src += 2; 1068 src += 2;
1072 dst += 2; 1069 dst += 2;
1073 RGB888_RGB555_TWO (dst, src); 1070 RGB888_RGB555_TWO(dst, src);
1074 src += 2; 1071 src += 2;
1075 dst += 2; 1072 dst += 2;
1076 } 1073 }
1077 /* Get any leftovers */ 1074 /* Get any leftovers */
1078 switch (width & 3) { 1075 switch (width & 3) {
1079 case 3: 1076 case 3:
1080 RGB888_RGB555 (dst, src); 1077 RGB888_RGB555(dst, src);
1081 ++src; 1078 ++src;
1082 ++dst; 1079 ++dst;
1083 case 2: 1080 case 2:
1084 RGB888_RGB555_TWO (dst, src); 1081 RGB888_RGB555_TWO(dst, src);
1085 src += 2; 1082 src += 2;
1086 dst += 2; 1083 dst += 2;
1087 break; 1084 break;
1088 case 1: 1085 case 1:
1089 RGB888_RGB555 (dst, src); 1086 RGB888_RGB555(dst, src);
1090 ++src; 1087 ++src;
1091 ++dst; 1088 ++dst;
1092 break; 1089 break;
1093 } 1090 }
1094 src += srcskip; 1091 src += srcskip;
1096 } 1093 }
1097 } else { 1094 } else {
1098 while (height--) { 1095 while (height--) {
1099 /* Copy in 4 pixel chunks */ 1096 /* Copy in 4 pixel chunks */
1100 for (c = width / 4; c; --c) { 1097 for (c = width / 4; c; --c) {
1101 RGB888_RGB555_TWO (dst, src); 1098 RGB888_RGB555_TWO(dst, src);
1102 src += 2; 1099 src += 2;
1103 dst += 2; 1100 dst += 2;
1104 RGB888_RGB555_TWO (dst, src); 1101 RGB888_RGB555_TWO(dst, src);
1105 src += 2; 1102 src += 2;
1106 dst += 2; 1103 dst += 2;
1107 } 1104 }
1108 /* Get any leftovers */ 1105 /* Get any leftovers */
1109 switch (width & 3) { 1106 switch (width & 3) {
1110 case 3: 1107 case 3:
1111 RGB888_RGB555 (dst, src); 1108 RGB888_RGB555(dst, src);
1112 ++src; 1109 ++src;
1113 ++dst; 1110 ++dst;
1114 case 2: 1111 case 2:
1115 RGB888_RGB555_TWO (dst, src); 1112 RGB888_RGB555_TWO(dst, src);
1116 src += 2; 1113 src += 2;
1117 dst += 2; 1114 dst += 2;
1118 break; 1115 break;
1119 case 1: 1116 case 1:
1120 RGB888_RGB555 (dst, src); 1117 RGB888_RGB555(dst, src);
1121 ++src; 1118 ++src;
1122 ++dst; 1119 ++dst;
1123 break; 1120 break;
1124 } 1121 }
1125 src += srcskip; 1122 src += srcskip;
1142 (((src[LO])&0x00F80000)>>8)| \ 1139 (((src[LO])&0x00F80000)>>8)| \
1143 (((src[LO])&0x0000FC00)>>5)| \ 1140 (((src[LO])&0x0000FC00)>>5)| \
1144 (((src[LO])&0x000000F8)>>3); \ 1141 (((src[LO])&0x000000F8)>>3); \
1145 } 1142 }
1146 static void 1143 static void
1147 Blit_RGB888_RGB565 (SDL_BlitInfo * info) 1144 Blit_RGB888_RGB565(SDL_BlitInfo * info)
1148 { 1145 {
1149 #ifndef USE_DUFFS_LOOP 1146 #ifndef USE_DUFFS_LOOP
1150 int c; 1147 int c;
1151 #endif 1148 #endif
1152 int width, height; 1149 int width, height;
1183 } 1180 }
1184 --width; 1181 --width;
1185 1182
1186 while (height--) { 1183 while (height--) {
1187 /* Perform copy alignment */ 1184 /* Perform copy alignment */
1188 RGB888_RGB565 (dst, src); 1185 RGB888_RGB565(dst, src);
1189 ++src; 1186 ++src;
1190 ++dst; 1187 ++dst;
1191 1188
1192 /* Copy in 4 pixel chunks */ 1189 /* Copy in 4 pixel chunks */
1193 for (c = width / 4; c; --c) { 1190 for (c = width / 4; c; --c) {
1194 RGB888_RGB565_TWO (dst, src); 1191 RGB888_RGB565_TWO(dst, src);
1195 src += 2; 1192 src += 2;
1196 dst += 2; 1193 dst += 2;
1197 RGB888_RGB565_TWO (dst, src); 1194 RGB888_RGB565_TWO(dst, src);
1198 src += 2; 1195 src += 2;
1199 dst += 2; 1196 dst += 2;
1200 } 1197 }
1201 /* Get any leftovers */ 1198 /* Get any leftovers */
1202 switch (width & 3) { 1199 switch (width & 3) {
1203 case 3: 1200 case 3:
1204 RGB888_RGB565 (dst, src); 1201 RGB888_RGB565(dst, src);
1205 ++src; 1202 ++src;
1206 ++dst; 1203 ++dst;
1207 case 2: 1204 case 2:
1208 RGB888_RGB565_TWO (dst, src); 1205 RGB888_RGB565_TWO(dst, src);
1209 src += 2; 1206 src += 2;
1210 dst += 2; 1207 dst += 2;
1211 break; 1208 break;
1212 case 1: 1209 case 1:
1213 RGB888_RGB565 (dst, src); 1210 RGB888_RGB565(dst, src);
1214 ++src; 1211 ++src;
1215 ++dst; 1212 ++dst;
1216 break; 1213 break;
1217 } 1214 }
1218 src += srcskip; 1215 src += srcskip;
1220 } 1217 }
1221 } else { 1218 } else {
1222 while (height--) { 1219 while (height--) {
1223 /* Copy in 4 pixel chunks */ 1220 /* Copy in 4 pixel chunks */
1224 for (c = width / 4; c; --c) { 1221 for (c = width / 4; c; --c) {
1225 RGB888_RGB565_TWO (dst, src); 1222 RGB888_RGB565_TWO(dst, src);
1226 src += 2; 1223 src += 2;
1227 dst += 2; 1224 dst += 2;
1228 RGB888_RGB565_TWO (dst, src); 1225 RGB888_RGB565_TWO(dst, src);
1229 src += 2; 1226 src += 2;
1230 dst += 2; 1227 dst += 2;
1231 } 1228 }
1232 /* Get any leftovers */ 1229 /* Get any leftovers */
1233 switch (width & 3) { 1230 switch (width & 3) {
1234 case 3: 1231 case 3:
1235 RGB888_RGB565 (dst, src); 1232 RGB888_RGB565(dst, src);
1236 ++src; 1233 ++src;
1237 ++dst; 1234 ++dst;
1238 case 2: 1235 case 2:
1239 RGB888_RGB565_TWO (dst, src); 1236 RGB888_RGB565_TWO(dst, src);
1240 src += 2; 1237 src += 2;
1241 dst += 2; 1238 dst += 2;
1242 break; 1239 break;
1243 case 1: 1240 case 1:
1244 RGB888_RGB565 (dst, src); 1241 RGB888_RGB565(dst, src);
1245 ++src; 1242 ++src;
1246 ++dst; 1243 ++dst;
1247 break; 1244 break;
1248 } 1245 }
1249 src += srcskip; 1246 src += srcskip;
1257 1254
1258 1255
1259 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */ 1256 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
1260 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1]) 1257 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
1261 static void 1258 static void
1262 Blit_RGB565_32 (SDL_BlitInfo * info, const Uint32 * map) 1259 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
1263 { 1260 {
1264 #ifndef USE_DUFFS_LOOP 1261 #ifndef USE_DUFFS_LOOP
1265 int c; 1262 int c;
1266 #endif 1263 #endif
1267 int width, height; 1264 int width, height;
1292 } 1289 }
1293 #else 1290 #else
1294 while (height--) { 1291 while (height--) {
1295 /* Copy in 4 pixel chunks */ 1292 /* Copy in 4 pixel chunks */
1296 for (c = width / 4; c; --c) { 1293 for (c = width / 4; c; --c) {
1297 *dst++ = RGB565_32 (dst, src, map); 1294 *dst++ = RGB565_32(dst, src, map);
1298 src += 2; 1295 src += 2;
1299 *dst++ = RGB565_32 (dst, src, map); 1296 *dst++ = RGB565_32(dst, src, map);
1300 src += 2; 1297 src += 2;
1301 *dst++ = RGB565_32 (dst, src, map); 1298 *dst++ = RGB565_32(dst, src, map);
1302 src += 2; 1299 src += 2;
1303 *dst++ = RGB565_32 (dst, src, map); 1300 *dst++ = RGB565_32(dst, src, map);
1304 src += 2; 1301 src += 2;
1305 } 1302 }
1306 /* Get any leftovers */ 1303 /* Get any leftovers */
1307 switch (width & 3) { 1304 switch (width & 3) {
1308 case 3: 1305 case 3:
1309 *dst++ = RGB565_32 (dst, src, map); 1306 *dst++ = RGB565_32(dst, src, map);
1310 src += 2; 1307 src += 2;
1311 case 2: 1308 case 2:
1312 *dst++ = RGB565_32 (dst, src, map); 1309 *dst++ = RGB565_32(dst, src, map);
1313 src += 2; 1310 src += 2;
1314 case 1: 1311 case 1:
1315 *dst++ = RGB565_32 (dst, src, map); 1312 *dst++ = RGB565_32(dst, src, map);
1316 src += 2; 1313 src += 2;
1317 break; 1314 break;
1318 } 1315 }
1319 src += srcskip; 1316 src += srcskip;
1320 dst += dstskip; 1317 dst += dstskip;
1452 0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100, 1449 0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
1453 0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100, 1450 0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
1454 0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200 1451 0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
1455 }; 1452 };
1456 static void 1453 static void
1457 Blit_RGB565_ARGB8888 (SDL_BlitInfo * info) 1454 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
1458 { 1455 {
1459 Blit_RGB565_32 (info, RGB565_ARGB8888_LUT); 1456 Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
1460 } 1457 }
1461 1458
1462 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */ 1459 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
1463 static const Uint32 RGB565_ABGR8888_LUT[512] = { 1460 static const Uint32 RGB565_ABGR8888_LUT[512] = {
1464 0xff000000, 0x00000000, 0xff080000, 0x00002000, 1461 0xff000000, 0x00000000, 0xff080000, 0x00002000,
1589 0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff, 1586 0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
1590 0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff, 1587 0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
1591 0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff 1588 0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
1592 }; 1589 };
1593 static void 1590 static void
1594 Blit_RGB565_ABGR8888 (SDL_BlitInfo * info) 1591 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
1595 { 1592 {
1596 Blit_RGB565_32 (info, RGB565_ABGR8888_LUT); 1593 Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
1597 } 1594 }
1598 1595
1599 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */ 1596 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
1600 static const Uint32 RGB565_RGBA8888_LUT[512] = { 1597 static const Uint32 RGB565_RGBA8888_LUT[512] = {
1601 0x000000ff, 0x00000000, 0x000008ff, 0x00200000, 1598 0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
1726 0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000, 1723 0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
1727 0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000, 1724 0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
1728 0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000, 1725 0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
1729 }; 1726 };
1730 static void 1727 static void
1731 Blit_RGB565_RGBA8888 (SDL_BlitInfo * info) 1728 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
1732 { 1729 {
1733 Blit_RGB565_32 (info, RGB565_RGBA8888_LUT); 1730 Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
1734 } 1731 }
1735 1732
1736 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */ 1733 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
1737 static const Uint32 RGB565_BGRA8888_LUT[512] = { 1734 static const Uint32 RGB565_BGRA8888_LUT[512] = {
1738 0x00000000, 0x000000ff, 0x08000000, 0x002000ff, 1735 0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
1863 0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff, 1860 0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
1864 0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff, 1861 0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
1865 0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff 1862 0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
1866 }; 1863 };
1867 static void 1864 static void
1868 Blit_RGB565_BGRA8888 (SDL_BlitInfo * info) 1865 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
1869 { 1866 {
1870 Blit_RGB565_32 (info, RGB565_BGRA8888_LUT); 1867 Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
1871 } 1868 }
1872 1869
1873 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */ 1870 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
1874 #ifndef RGB888_RGB332 1871 #ifndef RGB888_RGB332
1875 #define RGB888_RGB332(dst, src) { \ 1872 #define RGB888_RGB332(dst, src) { \
1877 (((src)&0x0000E000)>>11)| \ 1874 (((src)&0x0000E000)>>11)| \
1878 (((src)&0x000000C0)>>6); \ 1875 (((src)&0x000000C0)>>6); \
1879 } 1876 }
1880 #endif 1877 #endif
1881 static void 1878 static void
1882 Blit_RGB888_index8_map (SDL_BlitInfo * info) 1879 Blit_RGB888_index8_map(SDL_BlitInfo * info)
1883 { 1880 {
1884 #ifndef USE_DUFFS_LOOP 1881 #ifndef USE_DUFFS_LOOP
1885 int c; 1882 int c;
1886 #endif 1883 #endif
1887 int Pixel; 1884 int Pixel;
1914 } 1911 }
1915 #else 1912 #else
1916 while (height--) { 1913 while (height--) {
1917 for (c = width / 4; c; --c) { 1914 for (c = width / 4; c; --c) {
1918 /* Pack RGB into 8bit pixel */ 1915 /* Pack RGB into 8bit pixel */
1919 RGB888_RGB332 (Pixel, *src); 1916 RGB888_RGB332(Pixel, *src);
1920 *dst++ = map[Pixel]; 1917 *dst++ = map[Pixel];
1921 ++src; 1918 ++src;
1922 RGB888_RGB332 (Pixel, *src); 1919 RGB888_RGB332(Pixel, *src);
1923 *dst++ = map[Pixel]; 1920 *dst++ = map[Pixel];
1924 ++src; 1921 ++src;
1925 RGB888_RGB332 (Pixel, *src); 1922 RGB888_RGB332(Pixel, *src);
1926 *dst++ = map[Pixel]; 1923 *dst++ = map[Pixel];
1927 ++src; 1924 ++src;
1928 RGB888_RGB332 (Pixel, *src); 1925 RGB888_RGB332(Pixel, *src);
1929 *dst++ = map[Pixel]; 1926 *dst++ = map[Pixel];
1930 ++src; 1927 ++src;
1931 } 1928 }
1932 switch (width & 3) { 1929 switch (width & 3) {
1933 case 3: 1930 case 3:
1934 RGB888_RGB332 (Pixel, *src); 1931 RGB888_RGB332(Pixel, *src);
1935 *dst++ = map[Pixel]; 1932 *dst++ = map[Pixel];
1936 ++src; 1933 ++src;
1937 case 2: 1934 case 2:
1938 RGB888_RGB332 (Pixel, *src); 1935 RGB888_RGB332(Pixel, *src);
1939 *dst++ = map[Pixel]; 1936 *dst++ = map[Pixel];
1940 ++src; 1937 ++src;
1941 case 1: 1938 case 1:
1942 RGB888_RGB332 (Pixel, *src); 1939 RGB888_RGB332(Pixel, *src);
1943 *dst++ = map[Pixel]; 1940 *dst++ = map[Pixel];
1944 ++src; 1941 ++src;
1945 } 1942 }
1946 src += srcskip; 1943 src += srcskip;
1947 dst += dstskip; 1944 dst += dstskip;
1948 } 1945 }
1949 #endif /* USE_DUFFS_LOOP */ 1946 #endif /* USE_DUFFS_LOOP */
1950 } 1947 }
1951 static void 1948 static void
1952 BlitNto1 (SDL_BlitInfo * info) 1949 BlitNto1(SDL_BlitInfo * info)
1953 { 1950 {
1954 #ifndef USE_DUFFS_LOOP 1951 #ifndef USE_DUFFS_LOOP
1955 int c; 1952 int c;
1956 #endif 1953 #endif
1957 int width, height; 1954 int width, height;
1992 src += srcbpp; 1989 src += srcbpp;
1993 , width); 1990 , width);
1994 /* *INDENT-ON* */ 1991 /* *INDENT-ON* */
1995 #else 1992 #else
1996 for (c = width; c; --c) { 1993 for (c = width; c; --c) {
1997 DISEMBLE_RGB (src, srcbpp, srcfmt, Pixel, sR, sG, sB); 1994 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
1998 if (1) { 1995 if (1) {
1999 /* Pack RGB into 8bit pixel */ 1996 /* Pack RGB into 8bit pixel */
2000 *dst = ((sR >> 5) << (3 + 2)) | 1997 *dst = ((sR >> 5) << (3 + 2)) |
2001 ((sG >> 5) << (2)) | ((sB >> 6) << (0)); 1998 ((sG >> 5) << (2)) | ((sB >> 6) << (0));
2002 } 1999 }
2024 src += srcbpp; 2021 src += srcbpp;
2025 , width); 2022 , width);
2026 /* *INDENT-ON* */ 2023 /* *INDENT-ON* */
2027 #else 2024 #else
2028 for (c = width; c; --c) { 2025 for (c = width; c; --c) {
2029 DISEMBLE_RGB (src, srcbpp, srcfmt, Pixel, sR, sG, sB); 2026 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
2030 if (1) { 2027 if (1) {
2031 /* Pack RGB into 8bit pixel */ 2028 /* Pack RGB into 8bit pixel */
2032 *dst = map[((sR >> 5) << (3 + 2)) | 2029 *dst = map[((sR >> 5) << (3 + 2)) |
2033 ((sG >> 5) << (2)) | ((sB >> 6) << (0))]; 2030 ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
2034 } 2031 }
2042 } 2039 }
2043 } 2040 }
2044 2041
2045 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */ 2042 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
2046 static void 2043 static void
2047 Blit4to4MaskAlpha (SDL_BlitInfo * info) 2044 Blit4to4MaskAlpha(SDL_BlitInfo * info)
2048 { 2045 {
2049 int width = info->d_width; 2046 int width = info->d_width;
2050 int height = info->d_height; 2047 int height = info->d_height;
2051 Uint32 *src = (Uint32 *) info->s_pixels; 2048 Uint32 *src = (Uint32 *) info->s_pixels;
2052 int srcskip = info->s_skip; 2049 int srcskip = info->s_skip;
2091 } 2088 }
2092 } 2089 }
2093 } 2090 }
2094 2091
2095 static void 2092 static void
2096 BlitNtoN (SDL_BlitInfo * info) 2093 BlitNtoN(SDL_BlitInfo * info)
2097 { 2094 {
2098 int width = info->d_width; 2095 int width = info->d_width;
2099 int height = info->d_height; 2096 int height = info->d_height;
2100 Uint8 *src = info->s_pixels; 2097 Uint8 *src = info->s_pixels;
2101 int srcskip = info->s_skip; 2098 int srcskip = info->s_skip;
2126 dst += dstskip; 2123 dst += dstskip;
2127 } 2124 }
2128 } 2125 }
2129 2126
2130 static void 2127 static void
2131 BlitNtoNCopyAlpha (SDL_BlitInfo * info) 2128 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
2132 { 2129 {
2133 int width = info->d_width; 2130 int width = info->d_width;
2134 int height = info->d_height; 2131 int height = info->d_height;
2135 Uint8 *src = info->s_pixels; 2132 Uint8 *src = info->s_pixels;
2136 int srcskip = info->s_skip; 2133 int srcskip = info->s_skip;
2145 /* FIXME: should map alpha to [0..255] correctly! */ 2142 /* FIXME: should map alpha to [0..255] correctly! */
2146 while (height--) { 2143 while (height--) {
2147 for (c = width; c; --c) { 2144 for (c = width; c; --c) {
2148 Uint32 Pixel; 2145 Uint32 Pixel;
2149 unsigned sR, sG, sB, sA; 2146 unsigned sR, sG, sB, sA;
2150 DISEMBLE_RGBA (src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); 2147 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
2151 ASSEMBLE_RGBA (dst, dstbpp, dstfmt, sR, sG, sB, sA); 2148 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
2152 dst += dstbpp; 2149 dst += dstbpp;
2153 src += srcbpp; 2150 src += srcbpp;
2154 } 2151 }
2155 src += srcskip; 2152 src += srcskip;
2156 dst += dstskip; 2153 dst += dstskip;
2157 } 2154 }
2158 } 2155 }
2159 2156
2160 static void 2157 static void
2161 BlitNto1Key (SDL_BlitInfo * info) 2158 BlitNto1Key(SDL_BlitInfo * info)
2162 { 2159 {
2163 int width = info->d_width; 2160 int width = info->d_width;
2164 int height = info->d_height; 2161 int height = info->d_height;
2165 Uint8 *src = info->s_pixels; 2162 Uint8 *src = info->s_pixels;
2166 int srcskip = info->s_skip; 2163 int srcskip = info->s_skip;
2222 } 2219 }
2223 } 2220 }
2224 } 2221 }
2225 2222
2226 static void 2223 static void
2227 Blit2to2Key (SDL_BlitInfo * info) 2224 Blit2to2Key(SDL_BlitInfo * info)
2228 { 2225 {
2229 int width = info->d_width; 2226 int width = info->d_width;
2230 int height = info->d_height; 2227 int height = info->d_height;
2231 Uint16 *srcp = (Uint16 *) info->s_pixels; 2228 Uint16 *srcp = (Uint16 *) info->s_pixels;
2232 int srcskip = info->s_skip; 2229 int srcskip = info->s_skip;
2256 dstp += dstskip; 2253 dstp += dstskip;
2257 } 2254 }
2258 } 2255 }
2259 2256
2260 static void 2257 static void
2261 BlitNtoNKey (SDL_BlitInfo * info) 2258 BlitNtoNKey(SDL_BlitInfo * info)
2262 { 2259 {
2263 int width = info->d_width; 2260 int width = info->d_width;
2264 int height = info->d_height; 2261 int height = info->d_height;
2265 Uint8 *src = info->s_pixels; 2262 Uint8 *src = info->s_pixels;
2266 int srcskip = info->s_skip; 2263 int srcskip = info->s_skip;
2300 dst += dstskip; 2297 dst += dstskip;
2301 } 2298 }
2302 } 2299 }
2303 2300
2304 static void 2301 static void
2305 BlitNtoNKeyCopyAlpha (SDL_BlitInfo * info) 2302 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
2306 { 2303 {
2307 int width = info->d_width; 2304 int width = info->d_width;
2308 int height = info->d_height; 2305 int height = info->d_height;
2309 Uint8 *src = info->s_pixels; 2306 Uint8 *src = info->s_pixels;
2310 int srcskip = info->s_skip; 2307 int srcskip = info->s_skip;
2476 2473
2477 /* Mask matches table, or table entry is zero */ 2474 /* Mask matches table, or table entry is zero */
2478 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000)) 2475 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
2479 2476
2480 SDL_loblit 2477 SDL_loblit
2481 SDL_CalculateBlitN (SDL_Surface * surface, int blit_index) 2478 SDL_CalculateBlitN(SDL_Surface * surface, int blit_index)
2482 { 2479 {
2483 struct private_swaccel *sdata; 2480 struct private_swaccel *sdata;
2484 SDL_PixelFormat *srcfmt; 2481 SDL_PixelFormat *srcfmt;
2485 SDL_PixelFormat *dstfmt; 2482 SDL_PixelFormat *dstfmt;
2486 const struct blit_table *table; 2483 const struct blit_table *table;
2492 srcfmt = surface->format; 2489 srcfmt = surface->format;
2493 dstfmt = surface->map->dst->format; 2490 dstfmt = surface->map->dst->format;
2494 2491
2495 if (blit_index & 2) { 2492 if (blit_index & 2) {
2496 /* alpha or alpha+colorkey */ 2493 /* alpha or alpha+colorkey */
2497 return SDL_CalculateAlphaBlit (surface, blit_index); 2494 return SDL_CalculateAlphaBlit(surface, blit_index);
2498 } 2495 }
2499 2496
2500 /* We don't support destinations less than 8-bits */ 2497 /* We don't support destinations less than 8-bits */
2501 if (dstfmt->BitsPerPixel < 8) { 2498 if (dstfmt->BitsPerPixel < 8) {
2502 return (NULL); 2499 return (NULL);
2512 else if (dstfmt->BytesPerPixel == 1) 2509 else if (dstfmt->BytesPerPixel == 1)
2513 return BlitNto1Key; 2510 return BlitNto1Key;
2514 else { 2511 else {
2515 #if SDL_ALTIVEC_BLITTERS 2512 #if SDL_ALTIVEC_BLITTERS
2516 if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) 2513 if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
2517 && SDL_HasAltiVec ()) { 2514 && SDL_HasAltiVec()) {
2518 return Blit32to32KeyAltivec; 2515 return Blit32to32KeyAltivec;
2519 } else 2516 } else
2520 #endif 2517 #endif
2521 2518
2522 if (srcfmt->Amask && dstfmt->Amask) 2519 if (srcfmt->Amask && dstfmt->Amask)
2550 int a_need = NO_ALPHA; 2547 int a_need = NO_ALPHA;
2551 if (dstfmt->Amask) 2548 if (dstfmt->Amask)
2552 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA; 2549 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
2553 table = normal_blit[srcfmt->BytesPerPixel - 1]; 2550 table = normal_blit[srcfmt->BytesPerPixel - 1];
2554 for (which = 0; table[which].dstbpp; ++which) { 2551 for (which = 0; table[which].dstbpp; ++which) {
2555 if (MASKOK (srcfmt->Rmask, table[which].srcR) && 2552 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
2556 MASKOK (srcfmt->Gmask, table[which].srcG) && 2553 MASKOK(srcfmt->Gmask, table[which].srcG) &&
2557 MASKOK (srcfmt->Bmask, table[which].srcB) && 2554 MASKOK(srcfmt->Bmask, table[which].srcB) &&
2558 MASKOK (dstfmt->Rmask, table[which].dstR) && 2555 MASKOK(dstfmt->Rmask, table[which].dstR) &&
2559 MASKOK (dstfmt->Gmask, table[which].dstG) && 2556 MASKOK(dstfmt->Gmask, table[which].dstG) &&
2560 MASKOK (dstfmt->Bmask, table[which].dstB) && 2557 MASKOK(dstfmt->Bmask, table[which].dstB) &&
2561 dstfmt->BytesPerPixel == table[which].dstbpp && 2558 dstfmt->BytesPerPixel == table[which].dstbpp &&
2562 (a_need & table[which].alpha) == a_need && 2559 (a_need & table[which].alpha) == a_need &&
2563 ((table[which].blit_features & GetBlitFeatures ()) == 2560 ((table[which].blit_features & GetBlitFeatures()) ==
2564 table[which].blit_features)) 2561 table[which].blit_features))
2565 break; 2562 break;
2566 } 2563 }
2567 sdata->aux_data = table[which].aux_data; 2564 sdata->aux_data = table[which].aux_data;
2568 blitfun = table[which].blitfunc; 2565 blitfun = table[which].blitfunc;
2581 } 2578 }
2582 2579
2583 #ifdef DEBUG_ASM 2580 #ifdef DEBUG_ASM
2584 #if SDL_HERMES_BLITTERS 2581 #if SDL_HERMES_BLITTERS
2585 if (blitfun == ConvertMMX) 2582 if (blitfun == ConvertMMX)
2586 fprintf (stderr, "Using mmx blit\n"); 2583 fprintf(stderr, "Using mmx blit\n");
2587 else if (blitfun == ConvertX86) 2584 else if (blitfun == ConvertX86)
2588 fprintf (stderr, "Using asm blit\n"); 2585 fprintf(stderr, "Using asm blit\n");
2589 else 2586 else
2590 #endif 2587 #endif
2591 if ((blitfun == BlitNtoN) || (blitfun == BlitNto1)) 2588 if ((blitfun == BlitNtoN) || (blitfun == BlitNto1))
2592 fprintf (stderr, "Using C blit\n"); 2589 fprintf(stderr, "Using C blit\n");
2593 else 2590 else
2594 fprintf (stderr, "Using optimized C blit\n"); 2591 fprintf(stderr, "Using optimized C blit\n");
2595 #endif /* DEBUG_ASM */ 2592 #endif /* DEBUG_ASM */
2596 2593
2597 return (blitfun); 2594 return (blitfun);
2598 } 2595 }
2599 2596