Mercurial > sdl-ios-xcode
comparison src/video/SDL_surface.c @ 2239:31835fd24b2b
Added SSE version of SDL_FillRect() for 32-bit ARGB surfaces
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 13 Aug 2007 06:24:56 +0000 |
parents | 55a8bebf9ba6 |
children | 5a58b57b6724 |
comparison
equal
deleted
inserted
replaced
2238:93ea84f77d83 | 2239:31835fd24b2b |
---|---|
585 } | 585 } |
586 } | 586 } |
587 } else { | 587 } else { |
588 switch (dst->format->BytesPerPixel) { | 588 switch (dst->format->BytesPerPixel) { |
589 case 2: | 589 case 2: |
590 for (y = dstrect->h; y; --y) { | 590 { |
591 Uint16 *pixels = (Uint16 *) row; | |
592 Uint16 c = (Uint16) color; | 591 Uint16 c = (Uint16) color; |
593 Uint32 cc = (Uint32) c << 16 | c; | 592 Uint32 cc = (Uint32) c << 16 | c; |
594 int n = dstrect->w; | 593 for (y = dstrect->h; y; --y) { |
595 if ((uintptr_t) pixels & 3) { | 594 Uint16 *pixels = (Uint16 *) row; |
596 *pixels++ = c; | 595 int n = dstrect->w; |
597 n--; | 596 if ((uintptr_t) pixels & 3) { |
597 *pixels++ = c; | |
598 n--; | |
599 } | |
600 if (n >> 1) | |
601 SDL_memset4(pixels, cc, n >> 1); | |
602 if (n & 1) | |
603 pixels[n - 1] = c; | |
604 row += dst->pitch; | |
598 } | 605 } |
599 if (n >> 1) | |
600 SDL_memset4(pixels, cc, n >> 1); | |
601 if (n & 1) | |
602 pixels[n - 1] = c; | |
603 row += dst->pitch; | |
604 } | 606 } |
605 break; | 607 break; |
606 | 608 |
607 case 3: | 609 case 3: |
608 #if SDL_BYTEORDER == SDL_BIG_ENDIAN | 610 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
617 row += dst->pitch; | 619 row += dst->pitch; |
618 } | 620 } |
619 break; | 621 break; |
620 | 622 |
621 case 4: | 623 case 4: |
624 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES | |
625 if (SDL_HasSSE() && !((uintptr_t) row & 15) && !(dstrect->w & 3)) { | |
626 Uint32 cccc[4] __attribute__ ((aligned(16))) = { | |
627 color, color, color, color}; | |
628 int i, n = dstrect->w / 4; | |
629 __asm__ __volatile__(" movdqa (%0), %%xmm0\n":: | |
630 "r"(cccc):"memory"); | |
631 for (y = dstrect->h; y; --y) { | |
632 Uint8 *pixels = row; | |
633 for (i = n / 2; i--;) { | |
634 /* *INDENT-OFF* */ | |
635 __asm__ __volatile__(" prefetchnta 256(%0)\n" | |
636 " movdqa %%xmm0, (%0)\n" | |
637 " movdqa %%xmm0, 16(%0)\n"::"r"(pixels):"memory"); | |
638 /* *INDENT-ON* */ | |
639 pixels += 32; | |
640 } | |
641 if (n & 1) { | |
642 __asm__ __volatile__(" movdqa %%xmm0, (%0)\n":: | |
643 "r"(pixels):"memory"); | |
644 } | |
645 row += dst->pitch; | |
646 } | |
647 __asm__ __volatile__(" emms\n"::); | |
648 break; | |
649 } | |
650 #endif | |
622 for (y = dstrect->h; y; --y) { | 651 for (y = dstrect->h; y; --y) { |
623 SDL_memset4(row, color, dstrect->w); | 652 SDL_memset4(row, color, dstrect->w); |
624 row += dst->pitch; | 653 row += dst->pitch; |
625 } | 654 } |
626 break; | 655 break; |