comparison src/video/SDL_blit.h @ 689:5bb080d35049

Date: Tue, 19 Aug 2003 17:57:00 +0200 From: Stephane Marchesin Subject: Re: [SDL] [patch] MMX alpha blit patches with MMX detection I think everything is correct now. I've done as much testing as I could, but some real-world testing wouldn't hurt, I think. The patch is here : http://icps.u-strasbg.fr/~marchesin/sdl_mmxblit.patch If you do byte-by-byte comparison of the output between C and MMX functions, you'll notice that the results for 555 and 565 RGB alpha blits aren't exactly the same. This is because MMX functions for 555 and 565 RGB have an higher accuracy. If you want the exact same behaviour that's possible by masking the three lower alpha bits in the MMX functions. Just ask ! I removed one MMX function because after I fixed it to match its C equivalent, it revealed to be slower than the C version on a PIII (although a bit faster on an Athlon XP). I've also added MMX and PIII replacements for SDL_memcpy. Those provide some speed up in testvidinfo -benchmark (at least for me, under linux & X11).
author Sam Lantinga <slouken@libsdl.org>
date Fri, 22 Aug 2003 05:51:19 +0000
parents 417f8709e648
children b8d311d90021
comparison
equal deleted inserted replaced
688:c0522010bb6d 689:5bb080d35049
408 case 1: pixel_copy_increment; \ 408 case 1: pixel_copy_increment; \
409 } while ( --n > 0 ); \ 409 } while ( --n > 0 ); \
410 } \ 410 } \
411 } 411 }
412 412
413 /* 2 - times unrolled loop */
414 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment, \
415 double_pixel_copy_increment, width) \
416 { int n, w = width; \
417 if( w & 1 ) { \
418 pixel_copy_increment; \
419 w--; \
420 } \
421 if ( w > 0 ) { \
422 n = ( w + 2) / 4; \
423 switch( w & 2 ) { \
424 case 0: do { double_pixel_copy_increment; \
425 case 2: double_pixel_copy_increment; \
426 } while ( --n > 0 ); \
427 } \
428 } \
429 }
430
431 /* 2 - times unrolled loop 4 pixels */
432 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment, \
433 double_pixel_copy_increment, \
434 quatro_pixel_copy_increment, width) \
435 { int n, w = width; \
436 if(w & 1) { \
437 pixel_copy_increment; \
438 w--; \
439 } \
440 if(w & 2) { \
441 double_pixel_copy_increment; \
442 w -= 2; \
443 } \
444 if ( w > 0 ) { \
445 n = ( w + 7 ) / 8; \
446 switch( w & 4 ) { \
447 case 0: do { quatro_pixel_copy_increment; \
448 case 4: quatro_pixel_copy_increment; \
449 } while ( --n > 0 ); \
450 } \
451 } \
452 }
453
413 /* Use the 8-times version of the loop by default */ 454 /* Use the 8-times version of the loop by default */
414 #define DUFFS_LOOP(pixel_copy_increment, width) \ 455 #define DUFFS_LOOP(pixel_copy_increment, width) \
415 DUFFS_LOOP8(pixel_copy_increment, width) 456 DUFFS_LOOP8(pixel_copy_increment, width)
416 457
417 #else 458 #else
459
460 /* Don't use Duff's device to unroll loops */
461 #define DUFFS_LOOP_DOUBLE2(pixel_copy_increment, \
462 double_pixel_copy_increment, width) \
463 { int n = width; \
464 if( n & 1 ) { \
465 pixel_copy_increment; \
466 n--; \
467 } \
468 n=n>>1; \
469 for(; n > 0; --n) { \
470 double_pixel_copy_increment; \
471 } \
472 }
473
474 /* Don't use Duff's device to unroll loops */
475 #define DUFFS_LOOP_QUATRO2(pixel_copy_increment, \
476 double_pixel_copy_increment, \
477 quatro_pixel_copy_increment, width) \
478 { int n = width; \
479 if(n & 1) { \
480 pixel_copy_increment; \
481 n--; \
482 } \
483 if(n & 2) { \
484 double_pixel_copy_increment; \
485 n -= 2; \
486 } \
487 n=n>>2; \
488 for(; n > 0; --n) { \
489 quatro_pixel_copy_increment; \
490 } \
491 }
418 492
419 /* Don't use Duff's device to unroll loops */ 493 /* Don't use Duff's device to unroll loops */
420 #define DUFFS_LOOP(pixel_copy_increment, width) \ 494 #define DUFFS_LOOP(pixel_copy_increment, width) \
421 { int n; \ 495 { int n; \
422 for ( n=width; n > 0; --n ) { \ 496 for ( n=width; n > 0; --n ) { \