comparison src/video/SDL_surface.c @ 2239:31835fd24b2b

Added SSE version of SDL_FillRect() for 32-bit ARGB surfaces
author Sam Lantinga <slouken@libsdl.org>
date Mon, 13 Aug 2007 06:24:56 +0000
parents 55a8bebf9ba6
children 5a58b57b6724
comparison
equal deleted inserted replaced
2238:93ea84f77d83 2239:31835fd24b2b
585 } 585 }
586 } 586 }
587 } else { 587 } else {
588 switch (dst->format->BytesPerPixel) { 588 switch (dst->format->BytesPerPixel) {
589 case 2: 589 case 2:
590 for (y = dstrect->h; y; --y) { 590 {
591 Uint16 *pixels = (Uint16 *) row;
592 Uint16 c = (Uint16) color; 591 Uint16 c = (Uint16) color;
593 Uint32 cc = (Uint32) c << 16 | c; 592 Uint32 cc = (Uint32) c << 16 | c;
594 int n = dstrect->w; 593 for (y = dstrect->h; y; --y) {
595 if ((uintptr_t) pixels & 3) { 594 Uint16 *pixels = (Uint16 *) row;
596 *pixels++ = c; 595 int n = dstrect->w;
597 n--; 596 if ((uintptr_t) pixels & 3) {
597 *pixels++ = c;
598 n--;
599 }
600 if (n >> 1)
601 SDL_memset4(pixels, cc, n >> 1);
602 if (n & 1)
603 pixels[n - 1] = c;
604 row += dst->pitch;
598 } 605 }
599 if (n >> 1)
600 SDL_memset4(pixels, cc, n >> 1);
601 if (n & 1)
602 pixels[n - 1] = c;
603 row += dst->pitch;
604 } 606 }
605 break; 607 break;
606 608
607 case 3: 609 case 3:
608 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 610 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
617 row += dst->pitch; 619 row += dst->pitch;
618 } 620 }
619 break; 621 break;
620 622
621 case 4: 623 case 4:
624 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES
625 if (SDL_HasSSE() && !((uintptr_t) row & 15) && !(dstrect->w & 3)) {
626 Uint32 cccc[4] __attribute__ ((aligned(16))) = {
627 color, color, color, color};
628 int i, n = dstrect->w / 4;
629 __asm__ __volatile__(" movdqa (%0), %%xmm0\n"::
630 "r"(cccc):"memory");
631 for (y = dstrect->h; y; --y) {
632 Uint8 *pixels = row;
633 for (i = n / 2; i--;) {
634 /* *INDENT-OFF* */
635 __asm__ __volatile__(" prefetchnta 256(%0)\n"
636 " movdqa %%xmm0, (%0)\n"
637 " movdqa %%xmm0, 16(%0)\n"::"r"(pixels):"memory");
638 /* *INDENT-ON* */
639 pixels += 32;
640 }
641 if (n & 1) {
642 __asm__ __volatile__(" movdqa %%xmm0, (%0)\n"::
643 "r"(pixels):"memory");
644 }
645 row += dst->pitch;
646 }
647 __asm__ __volatile__(" emms\n"::);
648 break;
649 }
650 #endif
622 for (y = dstrect->h; y; --y) { 651 for (y = dstrect->h; y; --y) {
623 SDL_memset4(row, color, dstrect->w); 652 SDL_memset4(row, color, dstrect->w);
624 row += dst->pitch; 653 row += dst->pitch;
625 } 654 }
626 break; 655 break;