Mercurial > sdl-ios-xcode
comparison src/video/SDL_blit.h @ 689:5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
From: Stephane Marchesin
Subject: Re: [SDL] [patch] MMX alpha blit patches with MMX detection
I think everything is correct now. I've done as much testing as I could,
but some real-world testing wouldn't hurt, I think.
The patch is here : http://icps.u-strasbg.fr/~marchesin/sdl_mmxblit.patch
If you do byte-by-byte comparison of the output between C and MMX
functions, you'll notice that the results for 555 and 565 RGB alpha
blits aren't exactly the same. This is because MMX functions for 555 and
565 RGB have an higher accuracy. If you want the exact same behaviour
that's possible by masking the three lower alpha bits in the MMX
functions. Just ask !
I removed one MMX function because after I fixed it to match its C
equivalent, it revealed to be slower than the C version on a PIII
(although a bit faster on an Athlon XP).
I've also added MMX and PIII replacements for SDL_memcpy. Those provide
some speed up in testvidinfo -benchmark (at least for me, under linux &
X11).
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Fri, 22 Aug 2003 05:51:19 +0000 |
parents | 417f8709e648 |
children | b8d311d90021 |
comparison
equal
deleted
inserted
replaced
688:c0522010bb6d | 689:5bb080d35049 |
---|---|
408 case 1: pixel_copy_increment; \ | 408 case 1: pixel_copy_increment; \ |
409 } while ( --n > 0 ); \ | 409 } while ( --n > 0 ); \ |
410 } \ | 410 } \ |
411 } | 411 } |
412 | 412 |
413 /* 2 - times unrolled loop */ | |
414 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment, \ | |
415 double_pixel_copy_increment, width) \ | |
416 { int n, w = width; \ | |
417 if( w & 1 ) { \ | |
418 pixel_copy_increment; \ | |
419 w--; \ | |
420 } \ | |
421 if ( w > 0 ) { \ | |
422 n = ( w + 2) / 4; \ | |
423 switch( w & 2 ) { \ | |
424 case 0: do { double_pixel_copy_increment; \ | |
425 case 2: double_pixel_copy_increment; \ | |
426 } while ( --n > 0 ); \ | |
427 } \ | |
428 } \ | |
429 } | |
430 | |
431 /* 2 - times unrolled loop 4 pixels */ | |
432 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment, \ | |
433 double_pixel_copy_increment, \ | |
434 quatro_pixel_copy_increment, width) \ | |
435 { int n, w = width; \ | |
436 if(w & 1) { \ | |
437 pixel_copy_increment; \ | |
438 w--; \ | |
439 } \ | |
440 if(w & 2) { \ | |
441 double_pixel_copy_increment; \ | |
442 w -= 2; \ | |
443 } \ | |
444 if ( w > 0 ) { \ | |
445 n = ( w + 7 ) / 8; \ | |
446 switch( w & 4 ) { \ | |
447 case 0: do { quatro_pixel_copy_increment; \ | |
448 case 4: quatro_pixel_copy_increment; \ | |
449 } while ( --n > 0 ); \ | |
450 } \ | |
451 } \ | |
452 } | |
453 | |
413 /* Use the 8-times version of the loop by default */ | 454 /* Use the 8-times version of the loop by default */ |
414 #define DUFFS_LOOP(pixel_copy_increment, width) \ | 455 #define DUFFS_LOOP(pixel_copy_increment, width) \ |
415 DUFFS_LOOP8(pixel_copy_increment, width) | 456 DUFFS_LOOP8(pixel_copy_increment, width) |
416 | 457 |
417 #else | 458 #else |
459 | |
460 /* Don't use Duff's device to unroll loops */ | |
461 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment, \ | |
462 double_pixel_copy_increment, width) \ | |
463 { int n = width; \ | |
464 if( n & 1 ) { \ | |
465 pixel_copy_increment; \ | |
466 n--; \ | |
467 } \ | |
468 n=n>>1; \ | |
469 for(; n > 0; --n) { \ | |
470 double_pixel_copy_increment; \ | |
471 } \ | |
472 } | |
473 | |
474 /* Don't use Duff's device to unroll loops */ | |
475 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment, \ | |
476 double_pixel_copy_increment, \ | |
477 quatro_pixel_copy_increment, width) \ | |
478 { int n = width; \ | |
479 if(n & 1) { \ | |
480 pixel_copy_increment; \ | |
481 n--; \ | |
482 } \ | |
483 if(n & 2) { \ | |
484 double_pixel_copy_increment; \ | |
485 n -= 2; \ | |
486 } \ | |
487 n=n>>2; \ | |
488 for(; n > 0; --n) { \ | |
489 quatro_pixel_copy_increment; \ | |
490 } \ | |
491 } | |
418 | 492 |
419 /* Don't use Duff's device to unroll loops */ | 493 /* Don't use Duff's device to unroll loops */ |
420 #define DUFFS_LOOP(pixel_copy_increment, width) \ | 494 #define DUFFS_LOOP(pixel_copy_increment, width) \ |
421 { int n; \ | 495 { int n; \ |
422 for ( n=width; n > 0; --n ) { \ | 496 for ( n=width; n > 0; --n ) { \ |