Mercurial > sdl-ios-xcode
annotate src/video/SDL_blit_A.c @ 3896:1a327643e741 SDL-1.2
X11 backend: tell app that mouse focus has been obtained when grabbing the
input. This allows us to enable DGA mode, even when the system cursor was
outside of the window when the grab was initiated.
Fixes Bugzilla #299, which has a more detailed explanation of the issue.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Mon, 13 Nov 2006 03:58:22 +0000 |
parents | 571c75f3d093 |
children | 081aecdb0911 |
rev | line source |
---|---|
0 | 1 /* |
2 SDL - Simple DirectMedia Layer | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
3 Copyright (C) 1997-2006 Sam Lantinga |
0 | 4 |
5 This library is free software; you can redistribute it and/or | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
6 modify it under the terms of the GNU Lesser General Public |
0 | 7 License as published by the Free Software Foundation; either |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
8 version 2.1 of the License, or (at your option) any later version. |
0 | 9 |
10 This library is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
13 Lesser General Public License for more details. |
0 | 14 |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
15 You should have received a copy of the GNU Lesser General Public |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
16 License along with this library; if not, write to the Free Software |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 18 |
19 Sam Lantinga | |
252
e8157fcb3114
Updated the source with the correct e-mail address
Sam Lantinga <slouken@libsdl.org>
parents:
1
diff
changeset
|
20 slouken@libsdl.org |
0 | 21 */ |
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
22 #include "SDL_config.h" |
0 | 23 |
24 #include "SDL_video.h" | |
25 #include "SDL_blit.h" | |
26 | |
1542 | 27 #if SDL_ASSEMBLY_ROUTINES |
28 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
29 #define MMX_ASMBLIT 1 |
1542 | 30 #define GCC_ASMBLIT 1 |
31 #elif defined(_MSC_VER) && (_MSC_VER >= 1200) && defined(_M_IX86) | |
32 #define MMX_ASMBLIT 1 | |
33 #define MSVC_ASMBLIT 1 | |
880
9ef41050100c
Date: Tue, 30 Mar 2004 21:26:47 -0600
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
34 #endif |
1542 | 35 #endif /* SDL_ASSEMBLY_ROUTINES */ |
880
9ef41050100c
Date: Tue, 30 Mar 2004 21:26:47 -0600
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
36 |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
37 /* Function to check the CPU flags */ |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
38 #include "SDL_cpuinfo.h" |
1542 | 39 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
40 #include "mmx.h" |
1542 | 41 #elif MSVC_ASMBLIT |
42 #include <mmintrin.h> | |
43 #include <mm3dnow.h> | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
44 #endif |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
45 |
0 | 46 /* Functions to perform alpha blended blitting */ |
47 | |
48 /* N->1 blending with per-surface alpha */ | |
49 static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) | |
50 { | |
51 int width = info->d_width; | |
52 int height = info->d_height; | |
53 Uint8 *src = info->s_pixels; | |
54 int srcskip = info->s_skip; | |
55 Uint8 *dst = info->d_pixels; | |
56 int dstskip = info->d_skip; | |
57 Uint8 *palmap = info->table; | |
58 SDL_PixelFormat *srcfmt = info->src; | |
59 SDL_PixelFormat *dstfmt = info->dst; | |
60 int srcbpp = srcfmt->BytesPerPixel; | |
61 | |
62 const unsigned A = srcfmt->alpha; | |
63 | |
64 while ( height-- ) { | |
65 DUFFS_LOOP4( | |
66 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
67 Uint32 Pixel; |
0 | 68 unsigned sR; |
69 unsigned sG; | |
70 unsigned sB; | |
71 unsigned dR; | |
72 unsigned dG; | |
73 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
74 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
0 | 75 dR = dstfmt->palette->colors[*dst].r; |
76 dG = dstfmt->palette->colors[*dst].g; | |
77 dB = dstfmt->palette->colors[*dst].b; | |
78 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); | |
79 dR &= 0xff; | |
80 dG &= 0xff; | |
81 dB &= 0xff; | |
82 /* Pack RGB into 8bit pixel */ | |
83 if ( palmap == NULL ) { | |
84 *dst =((dR>>5)<<(3+2))| | |
85 ((dG>>5)<<(2))| | |
86 ((dB>>6)<<(0)); | |
87 } else { | |
88 *dst = palmap[((dR>>5)<<(3+2))| | |
89 ((dG>>5)<<(2)) | | |
90 ((dB>>6)<<(0))]; | |
91 } | |
92 dst++; | |
93 src += srcbpp; | |
94 }, | |
95 width); | |
96 src += srcskip; | |
97 dst += dstskip; | |
98 } | |
99 } | |
100 | |
101 /* N->1 blending with pixel alpha */ | |
102 static void BlitNto1PixelAlpha(SDL_BlitInfo *info) | |
103 { | |
104 int width = info->d_width; | |
105 int height = info->d_height; | |
106 Uint8 *src = info->s_pixels; | |
107 int srcskip = info->s_skip; | |
108 Uint8 *dst = info->d_pixels; | |
109 int dstskip = info->d_skip; | |
110 Uint8 *palmap = info->table; | |
111 SDL_PixelFormat *srcfmt = info->src; | |
112 SDL_PixelFormat *dstfmt = info->dst; | |
113 int srcbpp = srcfmt->BytesPerPixel; | |
114 | |
115 /* FIXME: fix alpha bit field expansion here too? */ | |
116 while ( height-- ) { | |
117 DUFFS_LOOP4( | |
118 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
119 Uint32 Pixel; |
0 | 120 unsigned sR; |
121 unsigned sG; | |
122 unsigned sB; | |
123 unsigned sA; | |
124 unsigned dR; | |
125 unsigned dG; | |
126 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
127 DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); |
0 | 128 dR = dstfmt->palette->colors[*dst].r; |
129 dG = dstfmt->palette->colors[*dst].g; | |
130 dB = dstfmt->palette->colors[*dst].b; | |
131 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); | |
132 dR &= 0xff; | |
133 dG &= 0xff; | |
134 dB &= 0xff; | |
135 /* Pack RGB into 8bit pixel */ | |
136 if ( palmap == NULL ) { | |
137 *dst =((dR>>5)<<(3+2))| | |
138 ((dG>>5)<<(2))| | |
139 ((dB>>6)<<(0)); | |
140 } else { | |
141 *dst = palmap[((dR>>5)<<(3+2))| | |
142 ((dG>>5)<<(2)) | | |
143 ((dB>>6)<<(0)) ]; | |
144 } | |
145 dst++; | |
146 src += srcbpp; | |
147 }, | |
148 width); | |
149 src += srcskip; | |
150 dst += dstskip; | |
151 } | |
152 } | |
153 | |
154 /* colorkeyed N->1 blending with per-surface alpha */ | |
155 static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) | |
156 { | |
157 int width = info->d_width; | |
158 int height = info->d_height; | |
159 Uint8 *src = info->s_pixels; | |
160 int srcskip = info->s_skip; | |
161 Uint8 *dst = info->d_pixels; | |
162 int dstskip = info->d_skip; | |
163 Uint8 *palmap = info->table; | |
164 SDL_PixelFormat *srcfmt = info->src; | |
165 SDL_PixelFormat *dstfmt = info->dst; | |
166 int srcbpp = srcfmt->BytesPerPixel; | |
167 Uint32 ckey = srcfmt->colorkey; | |
168 | |
169 const int A = srcfmt->alpha; | |
170 | |
171 while ( height-- ) { | |
172 DUFFS_LOOP( | |
173 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
174 Uint32 Pixel; |
0 | 175 unsigned sR; |
176 unsigned sG; | |
177 unsigned sB; | |
178 unsigned dR; | |
179 unsigned dG; | |
180 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
181 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
182 if ( Pixel != ckey ) { |
0 | 183 dR = dstfmt->palette->colors[*dst].r; |
184 dG = dstfmt->palette->colors[*dst].g; | |
185 dB = dstfmt->palette->colors[*dst].b; | |
186 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); | |
187 dR &= 0xff; | |
188 dG &= 0xff; | |
189 dB &= 0xff; | |
190 /* Pack RGB into 8bit pixel */ | |
191 if ( palmap == NULL ) { | |
192 *dst =((dR>>5)<<(3+2))| | |
193 ((dG>>5)<<(2)) | | |
194 ((dB>>6)<<(0)); | |
195 } else { | |
196 *dst = palmap[((dR>>5)<<(3+2))| | |
197 ((dG>>5)<<(2)) | | |
198 ((dB>>6)<<(0)) ]; | |
199 } | |
200 } | |
201 dst++; | |
202 src += srcbpp; | |
203 }, | |
204 width); | |
205 src += srcskip; | |
206 dst += dstskip; | |
207 } | |
208 } | |
209 | |
1542 | 210 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
211 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
212 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
213 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
214 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
215 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
216 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
217 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
218 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
219 int dstskip = info->d_skip >> 2; |
1542 | 220 Uint32 dalpha = info->dst->Amask; |
221 Uint8 load[8]; | |
222 | |
223 *(Uint64 *)load = 0x00fefefe00fefefeULL;/* alpha128 mask */ | |
224 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ | |
225 *(Uint64 *)load = 0x0001010100010101ULL;/* !alpha128 mask */ | |
226 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ | |
227 movd_m2r(dalpha, mm7); /* dst alpha mask */ | |
228 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
229 while(height--) { |
1542 | 230 DUFFS_LOOP_DOUBLE2( |
231 { | |
232 Uint32 s = *srcp++; | |
233 Uint32 d = *dstp; | |
234 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | |
235 + (s & d & 0x00010101)) | dalpha; | |
236 },{ | |
237 movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ | |
238 movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ | |
239 | |
240 movq_m2r((*srcp), mm1);/* 2 x src -> mm1(ARGBARGB) */ | |
241 movq_r2r(mm1, mm5); /* 2 x src -> mm5(ARGBARGB) */ | |
242 | |
243 pand_r2r(mm4, mm6); /* dst & mask -> mm6 */ | |
244 pand_r2r(mm4, mm5); /* src & mask -> mm5 */ | |
245 paddd_r2r(mm6, mm5); /* mm6 + mm5 -> mm5 */ | |
246 pand_r2r(mm1, mm2); /* src & dst -> mm2 */ | |
247 psrld_i2r(1, mm5); /* mm5 >> 1 -> mm5 */ | |
248 pand_r2r(mm3, mm2); /* mm2 & !mask -> mm2 */ | |
249 paddd_r2r(mm5, mm2); /* mm5 + mm2 -> mm2 */ | |
250 | |
251 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | |
252 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ | |
253 dstp += 2; | |
254 srcp += 2; | |
255 }, width); | |
256 srcp += srcskip; | |
257 dstp += dstskip; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
258 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
259 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
260 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
261 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
262 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
263 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
264 { |
1542 | 265 SDL_PixelFormat* df = info->dst; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
266 unsigned alpha = info->src->alpha; |
1542 | 267 |
268 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | |
269 /* only call a128 version when R,G,B occupy lower bits */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
270 BlitRGBtoRGBSurfaceAlpha128MMX(info); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
271 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
272 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
273 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
274 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
275 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
276 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
277 int dstskip = info->d_skip >> 2; |
1542 | 278 |
279 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ | |
280 /* form the alpha mult */ | |
281 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ | |
282 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | |
283 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | |
284 alpha = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); | |
285 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ | |
286 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ | |
287 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ | |
288 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ | |
289 movd_m2r(df->Amask, mm7); /* dst alpha mask */ | |
290 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
291 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
292 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
293 DUFFS_LOOP_DOUBLE2({ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
294 /* One Pixel Blend */ |
1542 | 295 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ |
296 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
297 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ | |
298 punpcklbw_r2r(mm5, mm2); /* 0A0R0G0B -> mm2(dst) */ | |
299 | |
300 psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ | |
301 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
302 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | |
303 paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | |
304 | |
305 packuswb_r2r(mm5, mm2); /* ARGBARGB -> mm2 */ | |
306 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | |
307 movd_r2m(mm2, *dstp);/* mm2 -> pixel */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
308 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
309 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
310 },{ |
1542 | 311 /* Two Pixels Blend */ |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
312 movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ |
1542 | 313 movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ |
314 movq_r2r(mm0, mm1); /* 2 x src -> mm1(ARGBARGB) */ | |
315 movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ | |
316 | |
317 punpcklbw_r2r(mm5, mm0); /* low - 0A0R0G0B -> mm0(src1) */ | |
318 punpckhbw_r2r(mm5, mm1); /* high - 0A0R0G0B -> mm1(src2) */ | |
319 punpcklbw_r2r(mm5, mm2); /* low - 0A0R0G0B -> mm2(dst1) */ | |
320 punpckhbw_r2r(mm5, mm6); /* high - 0A0R0G0B -> mm6(dst2) */ | |
321 | |
322 psubw_r2r(mm2, mm0);/* src1 - dst1 -> mm0 */ | |
323 pmullw_r2r(mm4, mm0); /* mm0 * alpha -> mm0 */ | |
324 psrlw_i2r(8, mm0); /* mm0 >> 8 -> mm1 */ | |
325 paddb_r2r(mm0, mm2); /* mm0 + mm2(dst1) -> mm2 */ | |
326 | |
327 psubw_r2r(mm6, mm1);/* src2 - dst2 -> mm1 */ | |
328 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
329 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | |
330 paddb_r2r(mm1, mm6); /* mm1 + mm6(dst2) -> mm6 */ | |
331 | |
332 packuswb_r2r(mm6, mm2); /* ARGBARGB -> mm2 */ | |
333 por_r2r(mm7, mm2); /* mm7(dst alpha) | mm2 -> mm2 */ | |
334 | |
335 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ | |
336 | |
337 srcp += 2; | |
338 dstp += 2; | |
339 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
340 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
341 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
342 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
343 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
344 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
345 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
346 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
347 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
348 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
349 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
350 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
351 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
352 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
353 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
354 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
355 int dstskip = info->d_skip >> 2; |
1542 | 356 SDL_PixelFormat* sf = info->src; |
357 Uint32 amask = sf->Amask; | |
358 | |
359 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ | |
360 /* form multiplication mask */ | |
361 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ | |
362 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ | |
363 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ | |
364 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ | |
365 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ | |
366 /* form channel masks */ | |
367 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ | |
368 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ | |
369 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ | |
370 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ | |
371 /* get alpha channel shift */ | |
372 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */ | |
373 | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
374 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
375 DUFFS_LOOP4({ |
1542 | 376 Uint32 alpha = *srcp & amask; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
377 /* FIXME: Here we special-case opaque alpha since the |
1542 | 378 compositioning used (>>8 instead of /255) doesn't handle |
379 it correctly. Also special-case alpha=0 for speed? | |
380 Benchmark this! */ | |
381 if(alpha == 0) { | |
382 /* do nothing */ | |
383 } else if(alpha == amask) { | |
384 /* opaque alpha -- copy RGB, keep dst alpha */ | |
385 /* using MMX here to free up regular registers for other things */ | |
386 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
387 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
388 pand_r2r(mm0, mm1); /* src & chanmask -> mm1 */ | |
389 pand_r2r(mm3, mm2); /* dst & ~chanmask -> mm2 */ | |
390 por_r2r(mm1, mm2); /* src | dst -> mm2 */ | |
391 movd_r2m(mm2, (*dstp)); /* mm2 -> dst */ | |
392 } else { | |
393 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
394 punpcklbw_r2r(mm6, mm1); /* 0A0R0G0B -> mm1 */ | |
395 | |
396 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
397 punpcklbw_r2r(mm6, mm2); /* 0A0R0G0B -> mm2 */ | |
398 | |
399 __asm__ __volatile__ ( | |
400 "movd %0, %%mm4" | |
401 : : "r" (alpha) ); /* 0000A000 -> mm4 */ | |
402 psrld_r2r(mm5, mm4); /* mm4 >> mm5 -> mm4 (0000000A) */ | |
403 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | |
404 punpcklwd_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | |
405 pand_r2r(mm7, mm4); /* 000A0A0A -> mm4, preserve dst alpha on add */ | |
406 | |
407 /* blend */ | |
408 psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ | |
409 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
410 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1(000R0G0B) */ | |
411 paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | |
412 | |
413 packuswb_r2r(mm6, mm2); /* 0000ARGB -> mm2 */ | |
414 movd_r2m(mm2, *dstp);/* mm2 -> dst */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
415 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
416 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
417 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
418 }, width); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
419 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
420 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
421 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
422 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
423 } |
1542 | 424 /* End GCC_ASMBLIT */ |
425 | |
426 #elif MSVC_ASMBLIT | |
427 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | |
428 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) | |
429 { | |
430 int width = info->d_width; | |
431 int height = info->d_height; | |
432 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
433 int srcskip = info->s_skip >> 2; | |
434 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
435 int dstskip = info->d_skip >> 2; | |
436 Uint32 dalpha = info->dst->Amask; | |
437 | |
438 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; | |
439 | |
440 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ | |
441 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ | |
442 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ | |
443 | |
444 while (height--) { | |
445 int n = width; | |
446 if ( n & 1 ) { | |
447 Uint32 s = *srcp++; | |
448 Uint32 d = *dstp; | |
449 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | |
450 + (s & d & 0x00010101)) | dalpha; | |
451 n--; | |
452 } | |
453 | |
454 for (n >>= 1; n > 0; --n) { | |
455 dst1 = *(__m64*)dstp; /* 2 x dst -> dst1(ARGBARGB) */ | |
456 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | |
457 | |
458 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB) */ | |
459 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | |
460 | |
461 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ | |
462 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ | |
463 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ | |
464 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ | |
465 | |
466 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ | |
467 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ | |
468 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ | |
469 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ | |
470 | |
471 *(__m64*)dstp = dst1; /* dst1 -> 2 x dst pixels */ | |
472 dstp += 2; | |
473 srcp += 2; | |
474 } | |
475 | |
476 srcp += srcskip; | |
477 dstp += dstskip; | |
478 } | |
479 _mm_empty(); | |
480 } | |
481 | |
482 /* fast RGB888->(A)RGB888 blending with surface alpha */ | |
483 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) | |
484 { | |
485 SDL_PixelFormat* df = info->dst; | |
486 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; | |
487 unsigned alpha = info->src->alpha; | |
488 | |
489 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | |
490 /* only call a128 version when R,G,B occupy lower bits */ | |
491 BlitRGBtoRGBSurfaceAlpha128MMX(info); | |
492 } else { | |
493 int width = info->d_width; | |
494 int height = info->d_height; | |
495 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
496 int srcskip = info->s_skip >> 2; | |
497 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
498 int dstskip = info->d_skip >> 2; | |
499 Uint32 dalpha = df->Amask; | |
500 Uint32 amult; | |
501 | |
502 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; | |
503 | |
504 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
505 /* form the alpha mult */ | |
506 amult = alpha | (alpha << 8); | |
507 amult = amult | (amult << 16); | |
508 chanmask = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); | |
509 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ | |
510 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ | |
511 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ | |
512 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ | |
513 | |
514 while (height--) { | |
515 int n = width; | |
516 if (n & 1) { | |
517 /* One Pixel Blend */ | |
518 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB)*/ | |
519 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ | |
520 | |
521 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
522 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
523 | |
524 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ | |
525 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
526 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ | |
527 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ | |
528 | |
529 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
530 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ | |
531 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
532 | |
533 ++srcp; | |
534 ++dstp; | |
535 | |
536 n--; | |
537 } | |
538 | |
539 for (n >>= 1; n > 0; --n) { | |
540 /* Two Pixels Blend */ | |
541 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB)*/ | |
542 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | |
543 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ | |
544 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ | |
545 | |
546 dst1 = *(__m64*)dstp;/* 2 x dst -> dst1(ARGBARGB) */ | |
547 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | |
548 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ | |
549 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ | |
550 | |
551 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ | |
552 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ | |
553 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ | |
554 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ | |
555 | |
556 src2 = _mm_sub_pi16(src2, dst2);/* src2 - dst2 -> src2 */ | |
557 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
558 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ | |
559 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ | |
560 | |
561 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ | |
562 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ | |
563 | |
564 *(__m64*)dstp = dst1; /* dst1 -> 2 x pixel */ | |
565 | |
566 srcp += 2; | |
567 dstp += 2; | |
568 } | |
569 srcp += srcskip; | |
570 dstp += dstskip; | |
571 } | |
572 _mm_empty(); | |
573 } | |
574 } | |
575 | |
576 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | |
577 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) | |
578 { | |
579 int width = info->d_width; | |
580 int height = info->d_height; | |
581 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
582 int srcskip = info->s_skip >> 2; | |
583 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
584 int dstskip = info->d_skip >> 2; | |
585 SDL_PixelFormat* sf = info->src; | |
586 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | |
587 Uint32 amask = sf->Amask; | |
588 Uint32 ashift = sf->Ashift; | |
589 Uint64 multmask; | |
590 | |
591 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | |
592 | |
593 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
594 multmask = ~(0xFFFFi64 << (ashift * 2)); | |
595 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ | |
596 | |
597 while(height--) { | |
598 DUFFS_LOOP4({ | |
599 Uint32 alpha = *srcp & amask; | |
600 if (alpha == 0) { | |
601 /* do nothing */ | |
602 } else if (alpha == amask) { | |
603 /* opaque alpha -- copy RGB, keep dst alpha */ | |
604 *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); | |
605 } else { | |
606 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ | |
607 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ | |
608 | |
609 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
610 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
611 | |
612 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ | |
613 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ | |
614 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
615 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
616 mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ | |
617 | |
618 /* blend */ | |
619 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ | |
620 src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src1 - dst1) * alpha -> src1 */ | |
621 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ | |
622 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1 -> dst1(0A0R0G0B) */ | |
623 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
624 | |
625 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
626 } | |
627 ++srcp; | |
628 ++dstp; | |
629 }, width); | |
630 srcp += srcskip; | |
631 dstp += dstskip; | |
632 } | |
633 _mm_empty(); | |
634 } | |
635 /* End MSVC_ASMBLIT */ | |
636 | |
637 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
638 |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
639 #if SDL_ALTIVEC_BLITTERS |
1795 | 640 #if __MWERKS__ |
641 #pragma altivec_model on | |
642 #endif | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
643 #if HAVE_ALTIVEC_H |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
644 #include <altivec.h> |
1175
867f521591e5
Fixed Altivec support on Mac OS X.
Ryan C. Gordon <icculus@icculus.org>
parents:
1162
diff
changeset
|
645 #endif |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
646 #include <assert.h> |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
647 |
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
648 #if (defined(__MACOSX__) && (__GNUC__ < 4)) |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
649 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
650 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
651 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
652 (vector unsigned short) ( a,b,c,d,e,f,g,h ) |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
653 #else |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
654 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
655 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
656 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
657 (vector unsigned short) { a,b,c,d,e,f,g,h } |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
658 #endif |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
659 |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
660 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
661 #define VECPRINT(msg, v) do { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
662 vector unsigned int tmpvec = (vector unsigned int)(v); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
663 unsigned int *vp = (unsigned int *)&tmpvec; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
664 printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
665 } while (0) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
666 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
667 /* the permuation vector that takes the high bytes out of all the appropriate shorts |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
668 (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
669 0x00, 0x10, 0x02, 0x12, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
670 0x04, 0x14, 0x06, 0x16, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
671 0x08, 0x18, 0x0A, 0x1A, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
672 0x0C, 0x1C, 0x0E, 0x1E ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
673 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
674 #define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
675 #define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
676 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
677 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
678 ? vec_lvsl(0, src) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
679 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
680 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
681 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
682 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
683 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
684 vector unsigned short vtemp1 = vec_mule(vs, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
685 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
686 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
687 /* valpha2 is 255-alpha */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
688 vector unsigned char valpha2 = vec_nor(valpha, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
689 /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
690 vector unsigned short vtemp3 = vec_mule(vd, valpha2); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
691 /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
692 vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
693 /* add source and dest */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
694 vtemp1 = vec_add(vtemp1, vtemp3); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
695 vtemp2 = vec_add(vtemp2, vtemp4); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
696 /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
697 vtemp1 = vec_add(vtemp1, v1_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
698 vtemp3 = vec_sr(vtemp1, v8_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
699 vtemp1 = vec_add(vtemp1, vtemp3); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
700 /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
701 vtemp2 = vec_add(vtemp2, v1_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
702 vtemp4 = vec_sr(vtemp2, v8_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
703 vtemp2 = vec_add(vtemp2, vtemp4); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
704 /* (>>8) and get ARGBARGBARGBARGB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
705 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
706 } while (0) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
707 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
708 /* Calculate the permute vector used for 32->32 swizzling */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
709 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
710 const SDL_PixelFormat *dstfmt) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
711 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
712 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
713 * We have to assume that the bits that aren't used by other |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
714 * colors is alpha, and it's one complete byte, since some formats |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
715 * leave alpha with a zero mask, but we should still swizzle the bits. |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
716 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
717 /* ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
718 const static struct SDL_PixelFormat default_pixel_format = { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
719 NULL, 0, 0, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
720 0, 0, 0, 0, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
721 16, 8, 0, 24, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
722 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
723 0, 0}; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
724 if (!srcfmt) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
725 srcfmt = &default_pixel_format; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
726 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
727 if (!dstfmt) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
728 dstfmt = &default_pixel_format; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
729 } |
1487
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
730 const vector unsigned char plus = VECUINT8_LITERAL |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
731 ( 0x00, 0x00, 0x00, 0x00, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
732 0x04, 0x04, 0x04, 0x04, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
733 0x08, 0x08, 0x08, 0x08, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
734 0x0C, 0x0C, 0x0C, 0x0C ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
735 vector unsigned char vswiz; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
736 vector unsigned int srcvec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
737 #define RESHIFT(X) (3 - ((X) >> 3)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
738 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
739 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
740 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
741 Uint32 amask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
742 /* Use zero for alpha if either surface doesn't have alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
743 if (dstfmt->Amask) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
744 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
745 } else { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
746 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
747 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
748 #undef RESHIFT |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
749 ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
750 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
751 return(vswiz); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
752 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
753 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
754 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
755 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
756 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
757 Uint8 *src = (Uint8 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
758 int srcskip = info->s_skip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
759 Uint8 *dst = (Uint8 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
760 int dstskip = info->d_skip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
761 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
762 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
763 vector unsigned char v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
764 vector unsigned short v8_16 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
765 vector unsigned short v1_16 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
766 vector unsigned short v2_16 = vec_splat_u16(2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
767 vector unsigned short v3_16 = vec_splat_u16(3); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
768 vector unsigned int v8_32 = vec_splat_u32(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
769 vector unsigned int v16_32 = vec_add(v8_32, v8_32); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
770 vector unsigned short v3f = VECUINT16_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
771 0x003f, 0x003f, 0x003f, 0x003f, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
772 0x003f, 0x003f, 0x003f, 0x003f); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
773 vector unsigned short vfc = VECUINT16_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
774 0x00fc, 0x00fc, 0x00fc, 0x00fc, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
775 0x00fc, 0x00fc, 0x00fc, 0x00fc); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
776 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
777 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
778 0x10 - 0x1f is the alpha |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
779 0x00 - 0x0e evens are the red |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
780 0x01 - 0x0f odds are zero |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
781 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
782 vector unsigned char vredalpha1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
783 0x10, 0x00, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
784 0x10, 0x02, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
785 0x10, 0x04, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
786 0x10, 0x06, 0x01, 0x01 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
787 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
788 vector unsigned char vredalpha2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
789 vec_add((vector unsigned int)vredalpha1, vec_sl(v8_32, v16_32)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
790 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
791 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
792 0x00 - 0x0f is ARxx ARxx ARxx ARxx |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
793 0x11 - 0x0f odds are blue |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
794 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
795 vector unsigned char vblue1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
796 0x00, 0x01, 0x02, 0x11, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
797 0x04, 0x05, 0x06, 0x13, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
798 0x08, 0x09, 0x0a, 0x15, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
799 0x0c, 0x0d, 0x0e, 0x17 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
800 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
801 vector unsigned char vblue2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
802 vec_add((vector unsigned int)vblue1, v8_32) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
803 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
804 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
805 0x00 - 0x0f is ARxB ARxB ARxB ARxB |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
806 0x10 - 0x0e evens are green |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
807 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
808 vector unsigned char vgreen1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
809 0x00, 0x01, 0x10, 0x03, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
810 0x04, 0x05, 0x12, 0x07, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
811 0x08, 0x09, 0x14, 0x0b, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
812 0x0c, 0x0d, 0x16, 0x0f |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
813 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
814 vector unsigned char vgreen2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
815 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
816 ); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
817 vector unsigned char vgmerge = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
818 0x00, 0x02, 0x00, 0x06, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
819 0x00, 0x0a, 0x00, 0x0e, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
820 0x00, 0x12, 0x00, 0x16, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
821 0x00, 0x1a, 0x00, 0x1e); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
822 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
823 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
824 vector unsigned char valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
825 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
826 vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
827 vf800 = vec_sl(vf800, vec_splat_u16(8)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
828 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
829 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
830 int extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
831 vector unsigned char valigner; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
832 vector unsigned char vsrc; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
833 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
834 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
835 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
836 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
837 while (condition) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
838 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
839 unsigned sR, sG, sB, dR, dG, dB, sA; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
840 DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
841 if(sA) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
842 unsigned short dstpixel = *((unsigned short *)dst); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
843 dR = (dstpixel >> 8) & 0xf8; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
844 dG = (dstpixel >> 3) & 0xfc; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
845 dB = (dstpixel << 3) & 0xf8; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
846 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
847 *((unsigned short *)dst) = ( \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
848 ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
849 ); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
850 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
851 src += 4; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
852 dst += 2; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
853 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
854 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
855 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
856 extrawidth = (width % 8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
857 valigner = VEC_ALIGNER(src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
858 vsrc = (vector unsigned char)vec_ld(0, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
859 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
860 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
861 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
862 vector unsigned char vsrc1, vsrc2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
863 vector unsigned char vdst1, vdst2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
864 vector unsigned short vR, vG, vB; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
865 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
866 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
867 /* Load 8 pixels from src as ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
868 voverflow = (vector unsigned char)vec_ld(15, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
869 vsrc = vec_perm(vsrc, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
870 vsrc1 = vec_perm(vsrc, vsrc, vpermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
871 src += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
872 vsrc = (vector unsigned char)vec_ld(15, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
873 voverflow = vec_perm(voverflow, vsrc, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
874 vsrc2 = vec_perm(voverflow, voverflow, vpermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
875 src += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
876 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
877 /* Load 8 pixels from dst as XRGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
878 voverflow = vec_ld(0, dst); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
879 vR = vec_and((vector unsigned short)voverflow, vf800); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
880 vB = vec_sl((vector unsigned short)voverflow, v3_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
881 vG = vec_sl(vB, v2_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
882 vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
883 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
884 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
885 vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
886 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
887 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
888 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
889 /* Alpha blend 8 pixels as ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
890 valpha = vec_perm(vsrc1, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
891 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, v8_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
892 valpha = vec_perm(vsrc2, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
893 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, v8_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
894 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
895 /* Convert 8 pixels to 565 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
896 vpixel = (vector unsigned short)vec_packpx((vector unsigned int)vdst1, (vector unsigned int)vdst2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
897 vgpixel = (vector unsigned short)vec_perm(vdst1, vdst2, vgmerge); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
898 vgpixel = vec_and(vgpixel, vfc); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
899 vgpixel = vec_sl(vgpixel, v3_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
900 vrpixel = vec_sl(vpixel, v1_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
901 vrpixel = vec_and(vrpixel, vf800); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
902 vbpixel = vec_and(vpixel, v3f); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
903 vdst1 = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
904 vdst1 = vec_or(vdst1, (vector unsigned char)vbpixel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
905 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
906 /* Store 8 pixels */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
907 vec_st(vdst1, 0, dst); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
908 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
909 width -= 8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
910 dst += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
911 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
912 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
913 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
914 src += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
915 dst += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
916 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
917 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
918 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
919 static void Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
920 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
921 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
922 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
923 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
924 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
925 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
926 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
927 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
928 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
929 unsigned sA = srcfmt->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
930 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
931 Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
932 Uint32 ckey = info->src->colorkey; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
933 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
934 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
935 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
936 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
937 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
938 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
939 vector unsigned char vbits; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
940 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
941 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
942 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
943 vector unsigned int vckey; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
944 vector unsigned int vrgbmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
945 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
946 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
947 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
948 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
949 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
950 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
951 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
952 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
953 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
954 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
955 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
956 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
957 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
958 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
959 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
960 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
961 vbits = (vector unsigned char)vec_splat_s8(-1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
962 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
963 ckey &= rgbmask; |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
964 ((unsigned int *)(char*)&vckey)[0] = ckey; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
965 vckey = vec_splat(vckey, 0); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
966 ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
967 vrgbmask = vec_splat(vrgbmask, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
968 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
969 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
970 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
971 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
972 while (condition) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
973 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
974 unsigned sR, sG, sB, dR, dG, dB; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
975 RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
976 if(sA && Pixel != ckey) { \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
977 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
978 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
979 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
980 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
981 } \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
982 dstp++; \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
983 srcp++; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
984 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
985 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
986 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
987 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
988 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
989 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
990 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
991 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
992 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
993 vector unsigned char vsel; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
994 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
995 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
996 vector unsigned char vd_orig; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
997 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
998 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
999 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1000 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1001 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1002 /* vsel is set for items that match the key */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1003 vsel = (vector unsigned char)vec_and((vector unsigned int)vs, vrgbmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1004 vsel = (vector unsigned char)vec_cmpeq((vector unsigned int)vsel, vckey); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1005 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1006 /* permute to source format */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1007 vs = vec_perm(vs, valpha, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1008 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1009 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1010 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1011 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1012 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1013 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1014 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1015 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1016 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1017 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1018 /* mask out color key */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1019 vd = vec_sel(vd, vd_orig, vsel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1020 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1021 /* permute to dest format */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1022 vd = vec_perm(vd, vbits, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1023 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1024 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1025 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1026 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1027 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1028 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1029 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1030 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1031 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1032 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1033 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1034 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1035 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1036 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1037 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1038 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1039 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1040 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1041 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1042 static void Blit32to32PixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1043 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1044 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1045 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1046 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1047 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1048 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1049 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1050 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1051 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1052 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1053 vector unsigned char valphaPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1054 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1055 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1056 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1057 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1058 vector unsigned char vpixelmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1059 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1060 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1061 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1062 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1063 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1064 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1065 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1066 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1067 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1068 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1069 vpixelmask = vec_nor(valphamask, v0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1070 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1071 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1072 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1073 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1074 while ( height-- ) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1075 width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1076 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1077 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1078 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1079 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1080 if(sA) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1081 DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1082 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1083 ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1084 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1085 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1086 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1087 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1088 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1089 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1090 if (width > 0) { |
1487
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
1091 /* vsrcPermute */ |
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
1092 /* vdstPermute */ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1093 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1094 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1095 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1096 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1097 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1098 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1099 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1100 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1101 vector unsigned char vdstalpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1102 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1103 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1104 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1105 vs = vec_perm(vs, v0, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1106 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1107 valpha = vec_perm(vs, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1108 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1109 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1110 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1111 vd = vec_perm(vd, v0, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1112 vdstalpha = vec_and(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1113 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1114 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1115 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1116 /* set the alpha to the dest alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1117 vd = vec_and(vd, vpixelmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1118 vd = vec_or(vd, vdstalpha); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1119 vd = vec_perm(vd, v0, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1120 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1121 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1122 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1123 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1124 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1125 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1126 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1127 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1128 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1129 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1130 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1131 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1132 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1133 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1134 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1135 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1136 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1137 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1138 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1139 static void BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1140 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1141 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1142 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1143 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1144 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1145 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1146 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1147 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1148 vector unsigned char valphaPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1149 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1150 vector unsigned char vpixelmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1151 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1152 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1153 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1154 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1155 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1156 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1157 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1158 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1159 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1160 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1161 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1162 vpixelmask = vec_nor(valphamask, v0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1163 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1164 width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1165 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1166 while ((condition)) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1167 Uint32 dalpha; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1168 Uint32 d; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1169 Uint32 s1; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1170 Uint32 d1; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1171 Uint32 s = *srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1172 Uint32 alpha = s >> 24; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1173 if(alpha) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1174 if(alpha == SDL_ALPHA_OPAQUE) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1175 *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1176 } else { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1177 d = *dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1178 dalpha = d & 0xff000000; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1179 s1 = s & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1180 d1 = d & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1181 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1182 s &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1183 d &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1184 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1185 *dstp = d1 | d | dalpha; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1186 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1187 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1188 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1189 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1190 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1191 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1192 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1193 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1194 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1195 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1196 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1197 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1198 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1199 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1200 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1201 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1202 vector unsigned char vdstalpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1203 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1204 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1205 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1206 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1207 valpha = vec_perm(vs, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1208 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1209 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1210 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1211 vdstalpha = vec_and(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1212 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1213 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1214 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1215 /* set the alpha to the dest alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1216 vd = vec_and(vd, vpixelmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1217 vd = vec_or(vd, vdstalpha); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1218 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1219 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1220 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1221 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1222 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1223 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1224 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1225 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1226 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1227 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1228 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1229 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1230 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1231 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1232 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1233 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1234 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1235 static void Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1236 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1237 /* XXX : 6 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1238 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1239 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1240 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1241 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1242 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1243 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1244 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1245 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1246 unsigned sA = srcfmt->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1247 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1248 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1249 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1250 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1251 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1252 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1253 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1254 vector unsigned char vbits; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1255 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1256 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1257 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1258 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1259 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1260 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1261 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1262 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1263 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1264 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1265 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1266 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1267 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1268 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1269 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1270 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1271 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1272 vbits = (vector unsigned char)vec_splat_s8(-1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1273 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1274 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1275 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1276 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1277 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1278 unsigned sR, sG, sB, dR, dG, dB; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1279 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1280 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1281 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1282 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1283 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1284 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1285 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1286 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1287 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1288 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1289 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1290 vector unsigned char valigner = vec_lvsl(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1291 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1292 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1293 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1294 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1295 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1296 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1297 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1298 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1299 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1300 vs = vec_perm(vs, valpha, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1301 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1302 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1303 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1304 vd = vec_perm(vd, vd, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1305 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1306 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1307 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1308 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1309 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1310 vd = vec_perm(vd, vbits, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1311 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1312 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1313 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1314 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1315 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1316 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1317 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1318 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1319 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1320 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1321 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1322 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1323 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1324 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1325 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1326 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1327 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1328 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1329 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1330 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1331 /* fast RGB888->(A)RGB888 blending */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1332 static void BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1333 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1334 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1335 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1336 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1337 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1338 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1339 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1340 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1341 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1342 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1343 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1344 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1345 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1346 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1347 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1348 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1349 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1350 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1351 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1352 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1353 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1354 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1355 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1356 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1357 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1358 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1359 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1360 Uint32 s = *srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1361 Uint32 d = *dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1362 Uint32 s1 = s & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1363 Uint32 d1 = d & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1364 d1 = (d1 + ((s1 - d1) * alpha >> 8)) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1365 & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1366 s &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1367 d &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1368 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1369 *dstp = d1 | d | 0xff000000; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1370 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1371 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1372 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1373 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1374 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1375 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1376 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1377 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1378 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1379 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1380 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1381 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1382 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1383 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1384 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1385 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1386 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1387 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1388 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1389 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1390 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1391 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1392 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1393 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1394 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1395 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1396 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1397 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1398 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1399 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1400 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1401 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1402 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1403 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1404 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1405 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1406 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1407 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1408 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1409 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1410 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1411 } |
1795 | 1412 #if __MWERKS__ |
1413 #pragma altivec_model off | |
1414 #endif | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
1415 #endif /* SDL_ALTIVEC_BLITTERS */ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1416 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1417 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1418 static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) |
0 | 1419 { |
1420 int width = info->d_width; | |
1421 int height = info->d_height; | |
1422 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1423 int srcskip = info->s_skip >> 2; | |
1424 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1425 int dstskip = info->d_skip >> 2; | |
1426 | |
1427 while(height--) { | |
1428 DUFFS_LOOP4({ | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1429 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1430 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1431 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1432 + (s & d & 0x00010101)) | 0xff000000; |
0 | 1433 }, width); |
1434 srcp += srcskip; | |
1435 dstp += dstskip; | |
1436 } | |
1437 } | |
1438 | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1439 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1440 static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1441 { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1442 unsigned alpha = info->src->alpha; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1443 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1444 BlitRGBtoRGBSurfaceAlpha128(info); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1445 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1446 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1447 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1448 Uint32 *srcp = (Uint32 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1449 int srcskip = info->s_skip >> 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1450 Uint32 *dstp = (Uint32 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1451 int dstskip = info->d_skip >> 2; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1452 Uint32 s; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1453 Uint32 d; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1454 Uint32 s1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1455 Uint32 d1; |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1456 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1457 while(height--) { |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1458 DUFFS_LOOP_DOUBLE2({ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1459 /* One Pixel Blend */ |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1460 s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1461 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1462 s1 = s & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1463 d1 = d & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1464 d1 = (d1 + ((s1 - d1) * alpha >> 8)) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1465 & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1466 s &= 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1467 d &= 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1468 d = (d + ((s - d) * alpha >> 8)) & 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1469 *dstp = d1 | d | 0xff000000; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1470 ++srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1471 ++dstp; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1472 },{ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1473 /* Two Pixels Blend */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1474 s = *srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1475 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1476 s1 = s & 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1477 d1 = d & 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1478 d1 += (s1 - d1) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1479 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1480 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1481 s = ((s & 0xff00) >> 8) | |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1482 ((srcp[1] & 0xff00) << 8); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1483 d = ((d & 0xff00) >> 8) | |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1484 ((dstp[1] & 0xff00) << 8); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1485 d += (s - d) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1486 d &= 0x00ff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1487 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1488 *dstp++ = d1 | ((d << 8) & 0xff00) | 0xff000000; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1489 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1490 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1491 s1 = *srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1492 d1 = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1493 s1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1494 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1495 d1 += (s1 - d1) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1496 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1497 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1498 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1499 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1500 ++dstp; |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1501 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1502 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1503 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1504 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1505 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1506 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1507 |
0 | 1508 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
1509 static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info) | |
1510 { | |
1511 int width = info->d_width; | |
1512 int height = info->d_height; | |
1513 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1514 int srcskip = info->s_skip >> 2; | |
1515 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1516 int dstskip = info->d_skip >> 2; | |
1517 | |
1518 while(height--) { | |
1519 DUFFS_LOOP4({ | |
1520 Uint32 dalpha; | |
1521 Uint32 d; | |
1522 Uint32 s1; | |
1523 Uint32 d1; | |
1524 Uint32 s = *srcp; | |
1525 Uint32 alpha = s >> 24; | |
1526 /* FIXME: Here we special-case opaque alpha since the | |
1527 compositioning used (>>8 instead of /255) doesn't handle | |
1528 it correctly. Also special-case alpha=0 for speed? | |
1529 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1530 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1531 if(alpha == SDL_ALPHA_OPAQUE) { |
0 | 1532 *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1533 } else { |
0 | 1534 /* |
1535 * take out the middle component (green), and process | |
1536 * the other two in parallel. One multiply less. | |
1537 */ | |
1538 d = *dstp; | |
1539 dalpha = d & 0xff000000; | |
1540 s1 = s & 0xff00ff; | |
1541 d1 = d & 0xff00ff; | |
1542 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; | |
1543 s &= 0xff00; | |
1544 d &= 0xff00; | |
1545 d = (d + ((s - d) * alpha >> 8)) & 0xff00; | |
1546 *dstp = d1 | d | dalpha; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1547 } |
0 | 1548 } |
1549 ++srcp; | |
1550 ++dstp; | |
1551 }, width); | |
1552 srcp += srcskip; | |
1553 dstp += dstskip; | |
1554 } | |
1555 } | |
1556 | |
1542 | 1557 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1558 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ |
3870 | 1559 static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1560 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1561 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1562 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1563 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1564 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1565 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1566 int dstskip = info->d_skip >> 2; |
1542 | 1567 SDL_PixelFormat* sf = info->src; |
1568 Uint32 amask = sf->Amask; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1569 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1570 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1571 /* make mm6 all zeros. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1572 "pxor %%mm6, %%mm6\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1573 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1574 /* Make a mask to preserve the alpha. */ |
1542 | 1575 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ |
1576 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ | |
1577 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ | |
1578 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ | |
1579 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1580 |
1542 | 1581 /* form channel masks */ |
1582 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ | |
1583 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ | |
1584 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ | |
1585 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ | |
1586 | |
1587 /* get alpha channel shift */ | |
1588 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ | |
1589 | |
1590 : /* nothing */ : "m" (sf->Amask), "m" (sf->Ashift) ); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1591 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1592 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1593 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1594 DUFFS_LOOP4({ |
1542 | 1595 Uint32 alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1596 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1597 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1598 "prefetch 64(%0)\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1599 "prefetch 64(%1)\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1600 : : "r" (srcp), "r" (dstp) ); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1601 |
1542 | 1602 alpha = *srcp & amask; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1603 /* FIXME: Here we special-case opaque alpha since the |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1604 compositioning used (>>8 instead of /255) doesn't handle |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1605 it correctly. Also special-case alpha=0 for speed? |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1606 Benchmark this! */ |
1542 | 1607 if(alpha == 0) { |
1608 /* do nothing */ | |
1609 } | |
1610 else if(alpha == amask) { | |
1611 /* opaque alpha -- copy RGB, keep dst alpha */ | |
1612 /* using MMX here to free up regular registers for other things */ | |
1613 __asm__ ( | |
1614 "movd (%0), %%mm0\n\t" /* src(ARGB) -> mm0 (0000ARGB)*/ | |
1615 "movd (%1), %%mm1\n\t" /* dst(ARGB) -> mm1 (0000ARGB)*/ | |
1616 "pand %%mm4, %%mm0\n\t" /* src & chanmask -> mm0 */ | |
1617 "pand %%mm3, %%mm1\n\t" /* dst & ~chanmask -> mm2 */ | |
1618 "por %%mm0, %%mm1\n\t" /* src | dst -> mm1 */ | |
1619 "movd %%mm1, (%1) \n\t" /* mm1 -> dst */ | |
1620 | |
1621 : : "r" (srcp), "r" (dstp) ); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1622 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1623 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1624 else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1625 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1626 /* load in the source, and dst. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1627 "movd (%0), %%mm0\n" /* mm0(s) = 0 0 0 0 | As Rs Gs Bs */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1628 "movd (%1), %%mm1\n" /* mm1(d) = 0 0 0 0 | Ad Rd Gd Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1629 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1630 /* Move the src alpha into mm2 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1631 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1632 /* if supporting pshufw */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1633 /*"pshufw $0x55, %%mm0, %%mm2\n" */ /* mm2 = 0 As 0 As | 0 As 0 As */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1634 /*"psrlw $8, %%mm2\n" */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1635 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1636 /* else: */ |
1542 | 1637 "movd %2, %%mm2\n" |
1638 "psrld %%mm5, %%mm2\n" /* mm2 = 0 0 0 0 | 0 0 0 As */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1639 "punpcklwd %%mm2, %%mm2\n" /* mm2 = 0 0 0 0 | 0 As 0 As */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1640 "punpckldq %%mm2, %%mm2\n" /* mm2 = 0 As 0 As | 0 As 0 As */ |
1542 | 1641 "pand %%mm7, %%mm2\n" /* to preserve dest alpha */ |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1642 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1643 /* move the colors into words. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1644 "punpcklbw %%mm6, %%mm0\n" /* mm0 = 0 As 0 Rs | 0 Gs 0 Bs */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1645 "punpcklbw %%mm6, %%mm1\n" /* mm0 = 0 Ad 0 Rd | 0 Gd 0 Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1646 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1647 /* src - dst */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1648 "psubw %%mm1, %%mm0\n" /* mm0 = As-Ad Rs-Rd | Gs-Gd Bs-Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1649 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1650 /* A * (src-dst) */ |
1542 | 1651 "pmullw %%mm2, %%mm0\n" /* mm0 = 0*As-d As*Rs-d | As*Gs-d As*Bs-d */ |
1652 "psrlw $8, %%mm0\n" /* mm0 = 0>>8 Rc>>8 | Gc>>8 Bc>>8 */ | |
1653 "paddb %%mm1, %%mm0\n" /* mm0 = 0+Ad Rc+Rd | Gc+Gd Bc+Bd */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1654 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1655 "packuswb %%mm0, %%mm0\n" /* mm0 = | Ac Rc Gc Bc */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1656 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1657 "movd %%mm0, (%1)\n" /* result in mm0 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1658 |
1542 | 1659 : : "r" (srcp), "r" (dstp), "r" (alpha) ); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1660 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1661 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1662 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1663 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1664 }, width); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1665 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1666 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1667 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1668 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1669 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1670 "emms\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1671 : ); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1672 } |
1542 | 1673 /* End GCC_ASMBLIT*/ |
1674 | |
1675 #elif MSVC_ASMBLIT | |
1676 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | |
1677 static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) | |
1678 { | |
1679 int width = info->d_width; | |
1680 int height = info->d_height; | |
1681 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1682 int srcskip = info->s_skip >> 2; | |
1683 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1684 int dstskip = info->d_skip >> 2; | |
1685 SDL_PixelFormat* sf = info->src; | |
1686 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | |
1687 Uint32 amask = sf->Amask; | |
1688 Uint32 ashift = sf->Ashift; | |
1689 Uint64 multmask; | |
1690 | |
1691 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | |
1692 | |
1693 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
1694 multmask = ~(0xFFFFi64 << (ashift * 2)); | |
1695 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ | |
1696 | |
1697 while(height--) { | |
1698 DUFFS_LOOP4({ | |
1699 Uint32 alpha; | |
1700 | |
1701 _m_prefetch(srcp + 16); | |
1702 _m_prefetch(dstp + 16); | |
1703 | |
1704 alpha = *srcp & amask; | |
1705 if (alpha == 0) { | |
1706 /* do nothing */ | |
1707 } else if (alpha == amask) { | |
1708 /* copy RGB, keep dst alpha */ | |
1709 *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); | |
1710 } else { | |
1711 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ | |
1712 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ | |
1713 | |
1714 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
1715 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
1716 | |
1717 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ | |
1718 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ | |
1719 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
1720 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
1721 mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ | |
1722 | |
1723 /* blend */ | |
1724 src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */ | |
1725 src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */ | |
1726 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ | |
1727 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */ | |
1728 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
1729 | |
1730 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
1731 } | |
1732 ++srcp; | |
1733 ++dstp; | |
1734 }, width); | |
1735 srcp += srcskip; | |
1736 dstp += dstskip; | |
1737 } | |
1738 _mm_empty(); | |
1739 } | |
1740 /* End MSVC_ASMBLIT */ | |
1741 | |
1742 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1743 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1744 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1745 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1746 /* blend a single 16 bit pixel at 50% */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1747 #define BLEND16_50(d, s, mask) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1748 ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff))) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1749 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1750 /* blend two 16 bit pixels at 50% */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1751 #define BLEND2x16_50(d, s, mask) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1752 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1753 + (s & d & (~(mask | mask << 16)))) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1754 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1755 static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask) |
0 | 1756 { |
1757 int width = info->d_width; | |
1758 int height = info->d_height; | |
1759 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
1760 int srcskip = info->s_skip >> 1; | |
1761 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
1762 int dstskip = info->d_skip >> 1; | |
1763 | |
1764 while(height--) { | |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1765 if(((uintptr_t)srcp ^ (uintptr_t)dstp) & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1766 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1767 * Source and destination not aligned, pipeline it. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1768 * This is mostly a win for big blits but no loss for |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1769 * small ones |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1770 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1771 Uint32 prev_sw; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1772 int w = width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1773 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1774 /* handle odd destination */ |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1775 if((uintptr_t)dstp & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1776 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1777 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1778 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1779 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1780 w--; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1781 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1782 srcp++; /* srcp is now 32-bit aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1783 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1784 /* bootstrap pipeline with first halfword */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1785 prev_sw = ((Uint32 *)srcp)[-1]; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1786 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1787 while(w > 1) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1788 Uint32 sw, dw, s; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1789 sw = *(Uint32 *)srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1790 dw = *(Uint32 *)dstp; |
1443
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1791 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1792 s = (prev_sw << 16) + (sw >> 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1793 #else |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1794 s = (prev_sw >> 16) + (sw << 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1795 #endif |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1796 prev_sw = sw; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1797 *(Uint32 *)dstp = BLEND2x16_50(dw, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1798 dstp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1799 srcp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1800 w -= 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1801 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1802 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1803 /* final pixel if any */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1804 if(w) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1805 Uint16 d = *dstp, s; |
1443
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1806 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1807 s = (Uint16)prev_sw; |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1808 #else |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1809 s = (Uint16)(prev_sw >> 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1810 #endif |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1811 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1812 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1813 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1814 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1815 srcp += srcskip - 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1816 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1817 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1818 /* source and destination are aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1819 int w = width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1820 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1821 /* first odd pixel? */ |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1822 if((uintptr_t)srcp & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1823 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1824 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1825 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1826 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1827 w--; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1828 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1829 /* srcp and dstp are now 32-bit aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1830 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1831 while(w > 1) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1832 Uint32 sw = *(Uint32 *)srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1833 Uint32 dw = *(Uint32 *)dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1834 *(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1835 srcp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1836 dstp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1837 w -= 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1838 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1839 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1840 /* last odd pixel? */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1841 if(w) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1842 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1843 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1844 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1845 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1846 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1847 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1848 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1849 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1850 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1851 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1852 |
1542 | 1853 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1854 /* fast RGB565->RGB565 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1855 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1856 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1857 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1858 if(alpha == 128) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1859 Blit16to16SurfaceAlpha128(info, 0xf7de); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1860 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1861 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1862 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1863 Uint16 *srcp = (Uint16 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1864 int srcskip = info->s_skip >> 1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1865 Uint16 *dstp = (Uint16 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1866 int dstskip = info->d_skip >> 1; |
1542 | 1867 Uint32 s, d; |
1868 Uint8 load[8]; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1869 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1870 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ |
1542 | 1871 *(Uint64 *)load = alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1872 alpha >>= 3; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1873 |
1542 | 1874 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
1875 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | |
1876 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | |
1877 /* position alpha to allow for mullo and mulhi on diff channels | |
1878 to reduce the number of operations */ | |
1879 psllq_i2r(3, mm0); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1880 |
1542 | 1881 /* Setup the 565 color channel masks */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
1882 *(Uint64 *)load = 0x07E007E007E007E0ULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1883 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
1884 *(Uint64 *)load = 0x001F001F001F001FULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1885 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1886 while(height--) { |
1542 | 1887 DUFFS_LOOP_QUATRO2( |
1888 { | |
1889 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1890 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1891 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1892 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1893 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1894 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1895 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1896 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1897 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1898 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1899 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1900 *dstp++ = d | d >> 16; |
1542 | 1901 },{ |
1902 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1903 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1904 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1905 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1906 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1907 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1908 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1909 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1910 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1911 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1912 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1913 *dstp++ = d | d >> 16; |
1542 | 1914 s = *srcp++; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1915 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1916 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1917 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1918 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1919 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1920 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1921 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1922 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1923 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1924 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1925 *dstp++ = d | d >> 16; |
1542 | 1926 },{ |
1927 movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ | |
1928 movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ | |
1929 | |
1930 /* red -- does not need a mask since the right shift clears | |
1931 the uninteresting bits */ | |
1932 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1933 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1934 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 [000r 000r 000r 000r] */ | |
1935 psrlw_i2r(11, mm6); /* mm6 >> 11 -> mm6 [000r 000r 000r 000r] */ | |
1936 | |
1937 /* blend */ | |
1938 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1939 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1940 /* alpha used is actually 11 bits | |
1941 11 + 5 = 16 bits, so the sign bits are lost */ | |
1942 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
1943 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1944 psllw_i2r(11, mm6); /* mm6 << 11 -> mm6 */ | |
1945 | |
1946 movq_r2r(mm6, mm1); /* save new reds in dsts */ | |
1947 | |
1948 /* green -- process the bits in place */ | |
1949 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1950 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1951 pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ | |
1952 pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ | |
1953 | |
1954 /* blend */ | |
1955 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1956 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1957 /* 11 + 11 - 16 = 6 bits, so all the lower uninteresting | |
1958 bits are gone and the sign bits present */ | |
1959 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
1960 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1961 | |
1962 por_r2r(mm6, mm1); /* save new greens in dsts */ | |
1963 | |
1964 /* blue */ | |
1965 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1966 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1967 pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ | |
1968 pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
1969 | |
1970 /* blend */ | |
1971 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1972 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1973 /* 11 + 5 = 16 bits, so the sign bits are lost and | |
1974 the interesting bits will need to be MASKed */ | |
1975 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
1976 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1977 pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
1978 | |
1979 por_r2r(mm6, mm1); /* save new blues in dsts */ | |
1980 | |
1981 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ | |
1982 | |
1983 srcp += 4; | |
1984 dstp += 4; | |
1985 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1986 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1987 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1988 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1989 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1990 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1991 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1992 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1993 /* fast RGB555->RGB555 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1994 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1995 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1996 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1997 if(alpha == 128) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1998 Blit16to16SurfaceAlpha128(info, 0xfbde); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1999 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2000 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2001 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2002 Uint16 *srcp = (Uint16 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2003 int srcskip = info->s_skip >> 1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2004 Uint16 *dstp = (Uint16 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2005 int dstskip = info->d_skip >> 1; |
1542 | 2006 Uint32 s, d; |
2007 Uint8 load[8]; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2008 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2009 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ |
1542 | 2010 *(Uint64 *)load = alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2011 alpha >>= 3; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2012 |
1542 | 2013 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
2014 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | |
2015 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | |
2016 /* position alpha to allow for mullo and mulhi on diff channels | |
2017 to reduce the number of operations */ | |
2018 psllq_i2r(3, mm0); | |
2019 | |
2020 /* Setup the 555 color channel masks */ | |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
2021 *(Uint64 *)load = 0x03E003E003E003E0ULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2022 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
2023 *(Uint64 *)load = 0x001F001F001F001FULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2024 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2025 while(height--) { |
1542 | 2026 DUFFS_LOOP_QUATRO2( |
2027 { | |
2028 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2029 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2030 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2031 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2032 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2033 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2034 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2035 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2036 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2037 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2038 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2039 *dstp++ = d | d >> 16; |
1542 | 2040 },{ |
2041 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2042 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2043 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2044 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2045 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2046 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2047 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2048 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2049 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2050 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2051 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2052 *dstp++ = d | d >> 16; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2053 s = *srcp++; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2054 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2055 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2056 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2057 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2058 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2059 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2060 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2061 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2062 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2063 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2064 *dstp++ = d | d >> 16; |
1542 | 2065 },{ |
2066 movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ | |
2067 movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ | |
2068 | |
2069 /* red -- process the bits in place */ | |
2070 psllq_i2r(5, mm4); /* turn MASKGREEN into MASKRED */ | |
2071 /* by reusing the GREEN mask we free up another mmx | |
2072 register to accumulate the result */ | |
2073 | |
2074 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2075 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2076 pand_r2r(mm4, mm5); /* src & MASKRED -> mm5 */ | |
2077 pand_r2r(mm4, mm6); /* dst & MASKRED -> mm6 */ | |
2078 | |
2079 /* blend */ | |
2080 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2081 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2082 /* 11 + 15 - 16 = 10 bits, uninteresting bits will be | |
2083 cleared by a MASK below */ | |
2084 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2085 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2086 pand_r2r(mm4, mm6); /* mm6 & MASKRED -> mm6 */ | |
2087 | |
2088 psrlq_i2r(5, mm4); /* turn MASKRED back into MASKGREEN */ | |
2089 | |
2090 movq_r2r(mm6, mm1); /* save new reds in dsts */ | |
2091 | |
2092 /* green -- process the bits in place */ | |
2093 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2094 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2095 pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ | |
2096 pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ | |
2097 | |
2098 /* blend */ | |
2099 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2100 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2101 /* 11 + 10 - 16 = 5 bits, so all the lower uninteresting | |
2102 bits are gone and the sign bits present */ | |
2103 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2104 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2105 | |
2106 por_r2r(mm6, mm1); /* save new greens in dsts */ | |
2107 | |
2108 /* blue */ | |
2109 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2110 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2111 pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ | |
2112 pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2113 | |
2114 /* blend */ | |
2115 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2116 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2117 /* 11 + 5 = 16 bits, so the sign bits are lost and | |
2118 the interesting bits will need to be MASKed */ | |
2119 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
2120 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2121 pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2122 | |
2123 por_r2r(mm6, mm1); /* save new blues in dsts */ | |
2124 | |
2125 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ | |
2126 | |
2127 srcp += 4; | |
2128 dstp += 4; | |
2129 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2130 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2131 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2132 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2133 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2134 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2135 } |
1542 | 2136 /* End GCC_ASMBLIT */ |
2137 | |
2138 #elif MSVC_ASMBLIT | |
2139 /* fast RGB565->RGB565 blending with surface alpha */ | |
2140 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) | |
2141 { | |
2142 unsigned alpha = info->src->alpha; | |
2143 if(alpha == 128) { | |
2144 Blit16to16SurfaceAlpha128(info, 0xf7de); | |
2145 } else { | |
2146 int width = info->d_width; | |
2147 int height = info->d_height; | |
2148 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
2149 int srcskip = info->s_skip >> 1; | |
2150 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2151 int dstskip = info->d_skip >> 1; | |
2152 Uint32 s, d; | |
2153 | |
2154 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; | |
2155 | |
2156 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | |
2157 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ | |
2158 alpha >>= 3; /* downscale alpha to 5 bits */ | |
2159 | |
2160 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
2161 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
2162 /* position alpha to allow for mullo and mulhi on diff channels | |
2163 to reduce the number of operations */ | |
2164 mm_alpha = _mm_slli_si64(mm_alpha, 3); | |
2165 | |
2166 /* Setup the 565 color channel masks */ | |
2167 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ | |
2168 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | |
2169 | |
2170 while(height--) { | |
2171 DUFFS_LOOP_QUATRO2( | |
2172 { | |
2173 s = *srcp++; | |
2174 d = *dstp; | |
2175 /* | |
2176 * shift out the middle component (green) to | |
2177 * the high 16 bits, and process all three RGB | |
2178 * components at the same time. | |
2179 */ | |
2180 s = (s | s << 16) & 0x07e0f81f; | |
2181 d = (d | d << 16) & 0x07e0f81f; | |
2182 d += (s - d) * alpha >> 5; | |
2183 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2184 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2185 },{ |
2186 s = *srcp++; | |
2187 d = *dstp; | |
2188 /* | |
2189 * shift out the middle component (green) to | |
2190 * the high 16 bits, and process all three RGB | |
2191 * components at the same time. | |
2192 */ | |
2193 s = (s | s << 16) & 0x07e0f81f; | |
2194 d = (d | d << 16) & 0x07e0f81f; | |
2195 d += (s - d) * alpha >> 5; | |
2196 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2197 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2198 s = *srcp++; |
2199 d = *dstp; | |
2200 /* | |
2201 * shift out the middle component (green) to | |
2202 * the high 16 bits, and process all three RGB | |
2203 * components at the same time. | |
2204 */ | |
2205 s = (s | s << 16) & 0x07e0f81f; | |
2206 d = (d | d << 16) & 0x07e0f81f; | |
2207 d += (s - d) * alpha >> 5; | |
2208 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2209 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2210 },{ |
2211 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ | |
2212 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ | |
2213 | |
2214 /* red */ | |
2215 src2 = src1; | |
2216 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */ | |
2217 | |
2218 dst2 = dst1; | |
2219 dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */ | |
2220 | |
2221 /* blend */ | |
2222 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2223 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2224 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2225 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2226 dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */ | |
2227 | |
2228 mm_res = dst2; /* RED -> mm_res */ | |
2229 | |
2230 /* green -- process the bits in place */ | |
2231 src2 = src1; | |
2232 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ | |
2233 | |
2234 dst2 = dst1; | |
2235 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ | |
2236 | |
2237 /* blend */ | |
2238 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2239 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2240 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2241 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2242 | |
2243 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ | |
2244 | |
2245 /* blue */ | |
2246 src2 = src1; | |
2247 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ | |
2248 | |
2249 dst2 = dst1; | |
2250 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ | |
2251 | |
2252 /* blend */ | |
2253 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2254 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2255 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2256 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2257 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ | |
2258 | |
2259 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ | |
2260 | |
2261 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ | |
2262 | |
2263 srcp += 4; | |
2264 dstp += 4; | |
2265 }, width); | |
2266 srcp += srcskip; | |
2267 dstp += dstskip; | |
2268 } | |
2269 _mm_empty(); | |
2270 } | |
2271 } | |
2272 | |
2273 /* fast RGB555->RGB555 blending with surface alpha */ | |
2274 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) | |
2275 { | |
2276 unsigned alpha = info->src->alpha; | |
2277 if(alpha == 128) { | |
2278 Blit16to16SurfaceAlpha128(info, 0xfbde); | |
2279 } else { | |
2280 int width = info->d_width; | |
2281 int height = info->d_height; | |
2282 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
2283 int srcskip = info->s_skip >> 1; | |
2284 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2285 int dstskip = info->d_skip >> 1; | |
2286 Uint32 s, d; | |
2287 | |
2288 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; | |
2289 | |
2290 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | |
2291 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ | |
2292 alpha >>= 3; /* downscale alpha to 5 bits */ | |
2293 | |
2294 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
2295 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
2296 /* position alpha to allow for mullo and mulhi on diff channels | |
2297 to reduce the number of operations */ | |
2298 mm_alpha = _mm_slli_si64(mm_alpha, 3); | |
2299 | |
2300 /* Setup the 555 color channel masks */ | |
2301 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ | |
2302 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ | |
2303 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | |
2304 | |
2305 while(height--) { | |
2306 DUFFS_LOOP_QUATRO2( | |
2307 { | |
2308 s = *srcp++; | |
2309 d = *dstp; | |
2310 /* | |
2311 * shift out the middle component (green) to | |
2312 * the high 16 bits, and process all three RGB | |
2313 * components at the same time. | |
2314 */ | |
2315 s = (s | s << 16) & 0x03e07c1f; | |
2316 d = (d | d << 16) & 0x03e07c1f; | |
2317 d += (s - d) * alpha >> 5; | |
2318 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2319 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2320 },{ |
2321 s = *srcp++; | |
2322 d = *dstp; | |
2323 /* | |
2324 * shift out the middle component (green) to | |
2325 * the high 16 bits, and process all three RGB | |
2326 * components at the same time. | |
2327 */ | |
2328 s = (s | s << 16) & 0x03e07c1f; | |
2329 d = (d | d << 16) & 0x03e07c1f; | |
2330 d += (s - d) * alpha >> 5; | |
2331 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2332 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2333 s = *srcp++; |
2334 d = *dstp; | |
2335 /* | |
2336 * shift out the middle component (green) to | |
2337 * the high 16 bits, and process all three RGB | |
2338 * components at the same time. | |
2339 */ | |
2340 s = (s | s << 16) & 0x03e07c1f; | |
2341 d = (d | d << 16) & 0x03e07c1f; | |
2342 d += (s - d) * alpha >> 5; | |
2343 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2344 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2345 },{ |
2346 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ | |
2347 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ | |
2348 | |
2349 /* red -- process the bits in place */ | |
2350 src2 = src1; | |
2351 src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */ | |
2352 | |
2353 dst2 = dst1; | |
2354 dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */ | |
2355 | |
2356 /* blend */ | |
2357 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2358 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2359 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2360 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2361 dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */ | |
2362 | |
2363 mm_res = dst2; /* RED -> mm_res */ | |
2364 | |
2365 /* green -- process the bits in place */ | |
2366 src2 = src1; | |
2367 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ | |
2368 | |
2369 dst2 = dst1; | |
2370 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ | |
2371 | |
2372 /* blend */ | |
2373 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2374 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2375 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2376 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2377 | |
2378 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ | |
2379 | |
2380 /* blue */ | |
2381 src2 = src1; /* src -> src2 */ | |
2382 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ | |
2383 | |
2384 dst2 = dst1; /* dst -> dst2 */ | |
2385 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ | |
2386 | |
2387 /* blend */ | |
2388 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2389 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2390 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2391 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2392 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ | |
2393 | |
2394 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ | |
2395 | |
2396 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ | |
2397 | |
2398 srcp += 4; | |
2399 dstp += 4; | |
2400 }, width); | |
2401 srcp += srcskip; | |
2402 dstp += dstskip; | |
2403 } | |
2404 _mm_empty(); | |
2405 } | |
2406 } | |
2407 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2408 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2409 /* fast RGB565->RGB565 blending with surface alpha */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2410 static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2411 { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2412 unsigned alpha = info->src->alpha; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2413 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2414 Blit16to16SurfaceAlpha128(info, 0xf7de); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2415 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2416 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2417 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2418 Uint16 *srcp = (Uint16 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2419 int srcskip = info->s_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2420 Uint16 *dstp = (Uint16 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2421 int dstskip = info->d_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2422 alpha >>= 3; /* downscale alpha to 5 bits */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2423 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2424 while(height--) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2425 DUFFS_LOOP4({ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2426 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2427 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2428 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2429 * shift out the middle component (green) to |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2430 * the high 16 bits, and process all three RGB |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2431 * components at the same time. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2432 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2433 s = (s | s << 16) & 0x07e0f81f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2434 d = (d | d << 16) & 0x07e0f81f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2435 d += (s - d) * alpha >> 5; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2436 d &= 0x07e0f81f; |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2437 *dstp++ = (Uint16)(d | d >> 16); |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2438 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2439 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2440 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2441 } |
0 | 2442 } |
2443 } | |
2444 | |
2445 /* fast RGB555->RGB555 blending with surface alpha */ | |
2446 static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) | |
2447 { | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2448 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2449 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2450 Blit16to16SurfaceAlpha128(info, 0xfbde); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2451 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2452 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2453 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2454 Uint16 *srcp = (Uint16 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2455 int srcskip = info->s_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2456 Uint16 *dstp = (Uint16 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2457 int dstskip = info->d_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2458 alpha >>= 3; /* downscale alpha to 5 bits */ |
0 | 2459 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2460 while(height--) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2461 DUFFS_LOOP4({ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2462 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2463 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2464 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2465 * shift out the middle component (green) to |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2466 * the high 16 bits, and process all three RGB |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2467 * components at the same time. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2468 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2469 s = (s | s << 16) & 0x03e07c1f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2470 d = (d | d << 16) & 0x03e07c1f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2471 d += (s - d) * alpha >> 5; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2472 d &= 0x03e07c1f; |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2473 *dstp++ = (Uint16)(d | d >> 16); |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2474 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2475 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2476 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2477 } |
0 | 2478 } |
2479 } | |
2480 | |
2481 /* fast ARGB8888->RGB565 blending with pixel alpha */ | |
2482 static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) | |
2483 { | |
2484 int width = info->d_width; | |
2485 int height = info->d_height; | |
2486 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
2487 int srcskip = info->s_skip >> 2; | |
2488 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2489 int dstskip = info->d_skip >> 1; | |
2490 | |
2491 while(height--) { | |
2492 DUFFS_LOOP4({ | |
2493 Uint32 s = *srcp; | |
2494 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ | |
2495 /* FIXME: Here we special-case opaque alpha since the | |
2496 compositioning used (>>8 instead of /255) doesn't handle | |
2497 it correctly. Also special-case alpha=0 for speed? | |
2498 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2499 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2500 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2501 *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f)); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2502 } else { |
0 | 2503 Uint32 d = *dstp; |
2504 /* | |
2505 * convert source and destination to G0RAB65565 | |
2506 * and blend all components at the same time | |
2507 */ | |
2508 s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800) | |
2509 + (s >> 3 & 0x1f); | |
2510 d = (d | d << 16) & 0x07e0f81f; | |
2511 d += (s - d) * alpha >> 5; | |
2512 d &= 0x07e0f81f; | |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2513 *dstp = (Uint16)(d | d >> 16); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2514 } |
0 | 2515 } |
2516 srcp++; | |
2517 dstp++; | |
2518 }, width); | |
2519 srcp += srcskip; | |
2520 dstp += dstskip; | |
2521 } | |
2522 } | |
2523 | |
2524 /* fast ARGB8888->RGB555 blending with pixel alpha */ | |
2525 static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) | |
2526 { | |
2527 int width = info->d_width; | |
2528 int height = info->d_height; | |
2529 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
2530 int srcskip = info->s_skip >> 2; | |
2531 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2532 int dstskip = info->d_skip >> 1; | |
2533 | |
2534 while(height--) { | |
2535 DUFFS_LOOP4({ | |
2536 unsigned alpha; | |
2537 Uint32 s = *srcp; | |
2538 alpha = s >> 27; /* downscale alpha to 5 bits */ | |
2539 /* FIXME: Here we special-case opaque alpha since the | |
2540 compositioning used (>>8 instead of /255) doesn't handle | |
2541 it correctly. Also special-case alpha=0 for speed? | |
2542 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2543 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2544 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2545 *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f)); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2546 } else { |
0 | 2547 Uint32 d = *dstp; |
2548 /* | |
2549 * convert source and destination to G0RAB65565 | |
2550 * and blend all components at the same time | |
2551 */ | |
2552 s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00) | |
2553 + (s >> 3 & 0x1f); | |
2554 d = (d | d << 16) & 0x03e07c1f; | |
2555 d += (s - d) * alpha >> 5; | |
2556 d &= 0x03e07c1f; | |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2557 *dstp = (Uint16)(d | d >> 16); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2558 } |
0 | 2559 } |
2560 srcp++; | |
2561 dstp++; | |
2562 }, width); | |
2563 srcp += srcskip; | |
2564 dstp += dstskip; | |
2565 } | |
2566 } | |
2567 | |
2568 /* General (slow) N->N blending with per-surface alpha */ | |
2569 static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) | |
2570 { | |
2571 int width = info->d_width; | |
2572 int height = info->d_height; | |
2573 Uint8 *src = info->s_pixels; | |
2574 int srcskip = info->s_skip; | |
2575 Uint8 *dst = info->d_pixels; | |
2576 int dstskip = info->d_skip; | |
2577 SDL_PixelFormat *srcfmt = info->src; | |
2578 SDL_PixelFormat *dstfmt = info->dst; | |
2579 int srcbpp = srcfmt->BytesPerPixel; | |
2580 int dstbpp = dstfmt->BytesPerPixel; | |
2581 unsigned sA = srcfmt->alpha; | |
2582 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | |
2583 | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2584 if(sA) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2585 while ( height-- ) { |
0 | 2586 DUFFS_LOOP4( |
2587 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2588 Uint32 Pixel; |
0 | 2589 unsigned sR; |
2590 unsigned sG; | |
2591 unsigned sB; | |
2592 unsigned dR; | |
2593 unsigned dG; | |
2594 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2595 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2596 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); |
0 | 2597 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2598 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | |
2599 src += srcbpp; | |
2600 dst += dstbpp; | |
2601 }, | |
2602 width); | |
2603 src += srcskip; | |
2604 dst += dstskip; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2605 } |
0 | 2606 } |
2607 } | |
2608 | |
2609 /* General (slow) colorkeyed N->N blending with per-surface alpha */ | |
2610 static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) | |
2611 { | |
2612 int width = info->d_width; | |
2613 int height = info->d_height; | |
2614 Uint8 *src = info->s_pixels; | |
2615 int srcskip = info->s_skip; | |
2616 Uint8 *dst = info->d_pixels; | |
2617 int dstskip = info->d_skip; | |
2618 SDL_PixelFormat *srcfmt = info->src; | |
2619 SDL_PixelFormat *dstfmt = info->dst; | |
2620 Uint32 ckey = srcfmt->colorkey; | |
2621 int srcbpp = srcfmt->BytesPerPixel; | |
2622 int dstbpp = dstfmt->BytesPerPixel; | |
2623 unsigned sA = srcfmt->alpha; | |
2624 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | |
2625 | |
2626 while ( height-- ) { | |
2627 DUFFS_LOOP4( | |
2628 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2629 Uint32 Pixel; |
0 | 2630 unsigned sR; |
2631 unsigned sG; | |
2632 unsigned sB; | |
2633 unsigned dR; | |
2634 unsigned dG; | |
2635 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2636 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2637 if(sA && Pixel != ckey) { |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2638 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2639 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); |
0 | 2640 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2641 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | |
2642 } | |
2643 src += srcbpp; | |
2644 dst += dstbpp; | |
2645 }, | |
2646 width); | |
2647 src += srcskip; | |
2648 dst += dstskip; | |
2649 } | |
2650 } | |
2651 | |
2652 /* General (slow) N->N blending with pixel alpha */ | |
2653 static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) | |
2654 { | |
2655 int width = info->d_width; | |
2656 int height = info->d_height; | |
2657 Uint8 *src = info->s_pixels; | |
2658 int srcskip = info->s_skip; | |
2659 Uint8 *dst = info->d_pixels; | |
2660 int dstskip = info->d_skip; | |
2661 SDL_PixelFormat *srcfmt = info->src; | |
2662 SDL_PixelFormat *dstfmt = info->dst; | |
2663 | |
2664 int srcbpp; | |
2665 int dstbpp; | |
2666 | |
2667 /* Set up some basic variables */ | |
2668 srcbpp = srcfmt->BytesPerPixel; | |
2669 dstbpp = dstfmt->BytesPerPixel; | |
2670 | |
2671 /* FIXME: for 8bpp source alpha, this doesn't get opaque values | |
2672 quite right. for <8bpp source alpha, it gets them very wrong | |
2673 (check all macros!) | |
2674 It is unclear whether there is a good general solution that doesn't | |
2675 need a branch (or a divide). */ | |
2676 while ( height-- ) { | |
2677 DUFFS_LOOP4( | |
2678 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2679 Uint32 Pixel; |
0 | 2680 unsigned sR; |
2681 unsigned sG; | |
2682 unsigned sB; | |
2683 unsigned dR; | |
2684 unsigned dG; | |
2685 unsigned dB; | |
2686 unsigned sA; | |
2687 unsigned dA; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2688 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2689 if(sA) { |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2690 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2691 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2692 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2693 } |
0 | 2694 src += srcbpp; |
2695 dst += dstbpp; | |
2696 }, | |
2697 width); | |
2698 src += srcskip; | |
2699 dst += dstskip; | |
2700 } | |
2701 } | |
2702 | |
2703 | |
2704 SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) | |
2705 { | |
2706 SDL_PixelFormat *sf = surface->format; | |
2707 SDL_PixelFormat *df = surface->map->dst->format; | |
2708 | |
2709 if(sf->Amask == 0) { | |
2710 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { | |
2711 if(df->BytesPerPixel == 1) | |
2712 return BlitNto1SurfaceAlphaKey; | |
2713 else | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2714 #if SDL_ALTIVEC_BLITTERS |
1240 | 2715 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && |
2716 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2717 return Blit32to32SurfaceAlphaKeyAltivec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2718 else |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2719 #endif |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2720 return BlitNtoNSurfaceAlphaKey; |
0 | 2721 } else { |
2722 /* Per-surface alpha blits */ | |
2723 switch(df->BytesPerPixel) { | |
2724 case 1: | |
2725 return BlitNto1SurfaceAlpha; | |
2726 | |
2727 case 2: | |
2728 if(surface->map->identity) { | |
2729 if(df->Gmask == 0x7e0) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2730 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2731 #if MMX_ASMBLIT |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
2732 if(SDL_HasMMX()) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2733 return Blit565to565SurfaceAlphaMMX; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2734 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2735 #endif |
0 | 2736 return Blit565to565SurfaceAlpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2737 } |
0 | 2738 else if(df->Gmask == 0x3e0) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2739 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2740 #if MMX_ASMBLIT |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
2741 if(SDL_HasMMX()) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2742 return Blit555to555SurfaceAlphaMMX; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2743 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2744 #endif |
0 | 2745 return Blit555to555SurfaceAlpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2746 } |
0 | 2747 } |
2748 return BlitNtoNSurfaceAlpha; | |
2749 | |
2750 case 4: | |
2751 if(sf->Rmask == df->Rmask | |
2752 && sf->Gmask == df->Gmask | |
2753 && sf->Bmask == df->Bmask | |
2754 && sf->BytesPerPixel == 4) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2755 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2756 #if MMX_ASMBLIT |
1542 | 2757 if(sf->Rshift % 8 == 0 |
2758 && sf->Gshift % 8 == 0 | |
2759 && sf->Bshift % 8 == 0 | |
2760 && SDL_HasMMX()) | |
2761 return BlitRGBtoRGBSurfaceAlphaMMX; | |
2762 #endif | |
2763 if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) | |
2764 { | |
1617
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2765 #if SDL_ALTIVEC_BLITTERS |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2766 if(!(surface->map->dst->flags & SDL_HWSURFACE) |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2767 && SDL_HasAltiVec()) |
1542 | 2768 return BlitRGBtoRGBSurfaceAlphaAltivec; |
2769 #endif | |
2770 return BlitRGBtoRGBSurfaceAlpha; | |
2771 } | |
2772 } | |
2773 #if SDL_ALTIVEC_BLITTERS | |
2774 if((sf->BytesPerPixel == 4) && | |
2775 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
2776 return Blit32to32SurfaceAlphaAltivec; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2777 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2778 #endif |
1542 | 2779 return BlitNtoNSurfaceAlpha; |
0 | 2780 |
2781 case 3: | |
2782 default: | |
2783 return BlitNtoNSurfaceAlpha; | |
2784 } | |
2785 } | |
2786 } else { | |
2787 /* Per-pixel alpha blits */ | |
2788 switch(df->BytesPerPixel) { | |
2789 case 1: | |
2790 return BlitNto1PixelAlpha; | |
2791 | |
2792 case 2: | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2793 #if SDL_ALTIVEC_BLITTERS |
1240 | 2794 if(sf->BytesPerPixel == 4 && !(surface->map->dst->flags & SDL_HWSURFACE) && |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2795 df->Gmask == 0x7e0 && |
1240 | 2796 df->Bmask == 0x1f && SDL_HasAltiVec()) |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2797 return Blit32to565PixelAlphaAltivec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2798 else |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2799 #endif |
0 | 2800 if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 |
2801 && sf->Gmask == 0xff00 | |
2802 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) | |
2803 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { | |
2804 if(df->Gmask == 0x7e0) | |
2805 return BlitARGBto565PixelAlpha; | |
2806 else if(df->Gmask == 0x3e0) | |
2807 return BlitARGBto555PixelAlpha; | |
2808 } | |
2809 return BlitNtoNPixelAlpha; | |
2810 | |
2811 case 4: | |
1542 | 2812 if(sf->Rmask == df->Rmask |
0 | 2813 && sf->Gmask == df->Gmask |
2814 && sf->Bmask == df->Bmask | |
2815 && sf->BytesPerPixel == 4) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2816 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2817 #if MMX_ASMBLIT |
1542 | 2818 if(sf->Rshift % 8 == 0 |
2819 && sf->Gshift % 8 == 0 | |
2820 && sf->Bshift % 8 == 0 | |
2821 && sf->Ashift % 8 == 0 | |
2822 && sf->Aloss == 0) | |
2823 { | |
2824 if(SDL_Has3DNow()) | |
2825 return BlitRGBtoRGBPixelAlphaMMX3DNOW; | |
2826 if(SDL_HasMMX()) | |
2827 return BlitRGBtoRGBPixelAlphaMMX; | |
2828 } | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2829 #endif |
1542 | 2830 if(sf->Amask == 0xff000000) |
2831 { | |
1617
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2832 #if SDL_ALTIVEC_BLITTERS |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2833 if(!(surface->map->dst->flags & SDL_HWSURFACE) |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2834 && SDL_HasAltiVec()) |
1542 | 2835 return BlitRGBtoRGBPixelAlphaAltivec; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2836 #endif |
1542 | 2837 return BlitRGBtoRGBPixelAlpha; |
2838 } | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2839 } |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2840 #if SDL_ALTIVEC_BLITTERS |
1542 | 2841 if (sf->Amask && sf->BytesPerPixel == 4 && |
2842 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
2843 return Blit32to32PixelAlphaAltivec; | |
2844 else | |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2845 #endif |
1542 | 2846 return BlitNtoNPixelAlpha; |
0 | 2847 |
2848 case 3: | |
2849 default: | |
2850 return BlitNtoNPixelAlpha; | |
2851 } | |
2852 } | |
2853 } | |
2854 |