Mercurial > sdl-ios-xcode
annotate src/video/SDL_blit_A.c @ 1643:51038e80ae59
More general fix for bug #189
The clipping is done at a higher level, and the low level functions are
passed clipped rectangles. Drivers which don't support source clipping
have not been changed, so the image will be squished instead of clipped,
but at least they will no longer crash when the destination rect was out
of bounds.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 17 Apr 2006 06:47:23 +0000 |
parents | b255b4058d37 |
children | 14717b52abc0 |
rev | line source |
---|---|
0 | 1 /* |
2 SDL - Simple DirectMedia Layer | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
3 Copyright (C) 1997-2006 Sam Lantinga |
0 | 4 |
5 This library is free software; you can redistribute it and/or | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
6 modify it under the terms of the GNU Lesser General Public |
0 | 7 License as published by the Free Software Foundation; either |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
8 version 2.1 of the License, or (at your option) any later version. |
0 | 9 |
10 This library is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
13 Lesser General Public License for more details. |
0 | 14 |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
15 You should have received a copy of the GNU Lesser General Public |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
16 License along with this library; if not, write to the Free Software |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 18 |
19 Sam Lantinga | |
252
e8157fcb3114
Updated the source with the correct e-mail address
Sam Lantinga <slouken@libsdl.org>
parents:
1
diff
changeset
|
20 slouken@libsdl.org |
0 | 21 */ |
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
22 #include "SDL_config.h" |
0 | 23 |
24 #include "SDL_video.h" | |
25 #include "SDL_blit.h" | |
26 | |
1542 | 27 #if SDL_ASSEMBLY_ROUTINES |
28 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
29 #define MMX_ASMBLIT 1 |
1542 | 30 #define GCC_ASMBLIT 1 |
31 #elif defined(_MSC_VER) && (_MSC_VER >= 1200) && defined(_M_IX86) | |
32 #define MMX_ASMBLIT 1 | |
33 #define MSVC_ASMBLIT 1 | |
880
9ef41050100c
Date: Tue, 30 Mar 2004 21:26:47 -0600
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
34 #endif |
1542 | 35 #endif /* SDL_ASSEMBLY_ROUTINES */ |
880
9ef41050100c
Date: Tue, 30 Mar 2004 21:26:47 -0600
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
36 |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
37 /* Function to check the CPU flags */ |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
38 #include "SDL_cpuinfo.h" |
1542 | 39 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
40 #include "mmx.h" |
1542 | 41 #elif MSVC_ASMBLIT |
42 #include <mmintrin.h> | |
43 #include <mm3dnow.h> | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
44 #endif |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
45 |
0 | 46 /* Functions to perform alpha blended blitting */ |
47 | |
48 /* N->1 blending with per-surface alpha */ | |
49 static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) | |
50 { | |
51 int width = info->d_width; | |
52 int height = info->d_height; | |
53 Uint8 *src = info->s_pixels; | |
54 int srcskip = info->s_skip; | |
55 Uint8 *dst = info->d_pixels; | |
56 int dstskip = info->d_skip; | |
57 Uint8 *palmap = info->table; | |
58 SDL_PixelFormat *srcfmt = info->src; | |
59 SDL_PixelFormat *dstfmt = info->dst; | |
60 int srcbpp = srcfmt->BytesPerPixel; | |
61 | |
62 const unsigned A = srcfmt->alpha; | |
63 | |
64 while ( height-- ) { | |
65 DUFFS_LOOP4( | |
66 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
67 Uint32 Pixel; |
0 | 68 unsigned sR; |
69 unsigned sG; | |
70 unsigned sB; | |
71 unsigned dR; | |
72 unsigned dG; | |
73 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
74 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
0 | 75 dR = dstfmt->palette->colors[*dst].r; |
76 dG = dstfmt->palette->colors[*dst].g; | |
77 dB = dstfmt->palette->colors[*dst].b; | |
78 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); | |
79 dR &= 0xff; | |
80 dG &= 0xff; | |
81 dB &= 0xff; | |
82 /* Pack RGB into 8bit pixel */ | |
83 if ( palmap == NULL ) { | |
84 *dst =((dR>>5)<<(3+2))| | |
85 ((dG>>5)<<(2))| | |
86 ((dB>>6)<<(0)); | |
87 } else { | |
88 *dst = palmap[((dR>>5)<<(3+2))| | |
89 ((dG>>5)<<(2)) | | |
90 ((dB>>6)<<(0))]; | |
91 } | |
92 dst++; | |
93 src += srcbpp; | |
94 }, | |
95 width); | |
96 src += srcskip; | |
97 dst += dstskip; | |
98 } | |
99 } | |
100 | |
101 /* N->1 blending with pixel alpha */ | |
102 static void BlitNto1PixelAlpha(SDL_BlitInfo *info) | |
103 { | |
104 int width = info->d_width; | |
105 int height = info->d_height; | |
106 Uint8 *src = info->s_pixels; | |
107 int srcskip = info->s_skip; | |
108 Uint8 *dst = info->d_pixels; | |
109 int dstskip = info->d_skip; | |
110 Uint8 *palmap = info->table; | |
111 SDL_PixelFormat *srcfmt = info->src; | |
112 SDL_PixelFormat *dstfmt = info->dst; | |
113 int srcbpp = srcfmt->BytesPerPixel; | |
114 | |
115 /* FIXME: fix alpha bit field expansion here too? */ | |
116 while ( height-- ) { | |
117 DUFFS_LOOP4( | |
118 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
119 Uint32 Pixel; |
0 | 120 unsigned sR; |
121 unsigned sG; | |
122 unsigned sB; | |
123 unsigned sA; | |
124 unsigned dR; | |
125 unsigned dG; | |
126 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
127 DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); |
0 | 128 dR = dstfmt->palette->colors[*dst].r; |
129 dG = dstfmt->palette->colors[*dst].g; | |
130 dB = dstfmt->palette->colors[*dst].b; | |
131 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); | |
132 dR &= 0xff; | |
133 dG &= 0xff; | |
134 dB &= 0xff; | |
135 /* Pack RGB into 8bit pixel */ | |
136 if ( palmap == NULL ) { | |
137 *dst =((dR>>5)<<(3+2))| | |
138 ((dG>>5)<<(2))| | |
139 ((dB>>6)<<(0)); | |
140 } else { | |
141 *dst = palmap[((dR>>5)<<(3+2))| | |
142 ((dG>>5)<<(2)) | | |
143 ((dB>>6)<<(0)) ]; | |
144 } | |
145 dst++; | |
146 src += srcbpp; | |
147 }, | |
148 width); | |
149 src += srcskip; | |
150 dst += dstskip; | |
151 } | |
152 } | |
153 | |
154 /* colorkeyed N->1 blending with per-surface alpha */ | |
155 static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) | |
156 { | |
157 int width = info->d_width; | |
158 int height = info->d_height; | |
159 Uint8 *src = info->s_pixels; | |
160 int srcskip = info->s_skip; | |
161 Uint8 *dst = info->d_pixels; | |
162 int dstskip = info->d_skip; | |
163 Uint8 *palmap = info->table; | |
164 SDL_PixelFormat *srcfmt = info->src; | |
165 SDL_PixelFormat *dstfmt = info->dst; | |
166 int srcbpp = srcfmt->BytesPerPixel; | |
167 Uint32 ckey = srcfmt->colorkey; | |
168 | |
169 const int A = srcfmt->alpha; | |
170 | |
171 while ( height-- ) { | |
172 DUFFS_LOOP( | |
173 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
174 Uint32 Pixel; |
0 | 175 unsigned sR; |
176 unsigned sG; | |
177 unsigned sB; | |
178 unsigned dR; | |
179 unsigned dG; | |
180 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
181 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
182 if ( Pixel != ckey ) { |
0 | 183 dR = dstfmt->palette->colors[*dst].r; |
184 dG = dstfmt->palette->colors[*dst].g; | |
185 dB = dstfmt->palette->colors[*dst].b; | |
186 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); | |
187 dR &= 0xff; | |
188 dG &= 0xff; | |
189 dB &= 0xff; | |
190 /* Pack RGB into 8bit pixel */ | |
191 if ( palmap == NULL ) { | |
192 *dst =((dR>>5)<<(3+2))| | |
193 ((dG>>5)<<(2)) | | |
194 ((dB>>6)<<(0)); | |
195 } else { | |
196 *dst = palmap[((dR>>5)<<(3+2))| | |
197 ((dG>>5)<<(2)) | | |
198 ((dB>>6)<<(0)) ]; | |
199 } | |
200 } | |
201 dst++; | |
202 src += srcbpp; | |
203 }, | |
204 width); | |
205 src += srcskip; | |
206 dst += dstskip; | |
207 } | |
208 } | |
209 | |
1542 | 210 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
211 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
212 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
213 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
214 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
215 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
216 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
217 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
218 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
219 int dstskip = info->d_skip >> 2; |
1542 | 220 Uint32 dalpha = info->dst->Amask; |
221 Uint8 load[8]; | |
222 | |
223 *(Uint64 *)load = 0x00fefefe00fefefeULL;/* alpha128 mask */ | |
224 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ | |
225 *(Uint64 *)load = 0x0001010100010101ULL;/* !alpha128 mask */ | |
226 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ | |
227 movd_m2r(dalpha, mm7); /* dst alpha mask */ | |
228 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
229 while(height--) { |
1542 | 230 DUFFS_LOOP_DOUBLE2( |
231 { | |
232 Uint32 s = *srcp++; | |
233 Uint32 d = *dstp; | |
234 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | |
235 + (s & d & 0x00010101)) | dalpha; | |
236 },{ | |
237 movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ | |
238 movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ | |
239 | |
240 movq_m2r((*srcp), mm1);/* 2 x src -> mm1(ARGBARGB) */ | |
241 movq_r2r(mm1, mm5); /* 2 x src -> mm5(ARGBARGB) */ | |
242 | |
243 pand_r2r(mm4, mm6); /* dst & mask -> mm6 */ | |
244 pand_r2r(mm4, mm5); /* src & mask -> mm5 */ | |
245 paddd_r2r(mm6, mm5); /* mm6 + mm5 -> mm5 */ | |
246 pand_r2r(mm1, mm2); /* src & dst -> mm2 */ | |
247 psrld_i2r(1, mm5); /* mm5 >> 1 -> mm5 */ | |
248 pand_r2r(mm3, mm2); /* mm2 & !mask -> mm2 */ | |
249 paddd_r2r(mm5, mm2); /* mm5 + mm2 -> mm2 */ | |
250 | |
251 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | |
252 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ | |
253 dstp += 2; | |
254 srcp += 2; | |
255 }, width); | |
256 srcp += srcskip; | |
257 dstp += dstskip; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
258 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
259 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
260 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
261 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
262 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
263 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
264 { |
1542 | 265 SDL_PixelFormat* df = info->dst; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
266 unsigned alpha = info->src->alpha; |
1542 | 267 |
268 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | |
269 /* only call a128 version when R,G,B occupy lower bits */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
270 BlitRGBtoRGBSurfaceAlpha128MMX(info); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
271 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
272 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
273 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
274 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
275 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
276 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
277 int dstskip = info->d_skip >> 2; |
1542 | 278 |
279 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ | |
280 /* form the alpha mult */ | |
281 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ | |
282 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | |
283 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | |
284 alpha = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); | |
285 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ | |
286 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ | |
287 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ | |
288 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ | |
289 movd_m2r(df->Amask, mm7); /* dst alpha mask */ | |
290 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
291 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
292 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
293 DUFFS_LOOP_DOUBLE2({ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
294 /* One Pixel Blend */ |
1542 | 295 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ |
296 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
297 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ | |
298 punpcklbw_r2r(mm5, mm2); /* 0A0R0G0B -> mm2(dst) */ | |
299 | |
300 psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ | |
301 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
302 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | |
303 paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | |
304 | |
305 packuswb_r2r(mm5, mm2); /* ARGBARGB -> mm2 */ | |
306 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | |
307 movd_r2m(mm2, *dstp);/* mm2 -> pixel */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
308 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
309 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
310 },{ |
1542 | 311 /* Two Pixels Blend */ |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
312 movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ |
1542 | 313 movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ |
314 movq_r2r(mm0, mm1); /* 2 x src -> mm1(ARGBARGB) */ | |
315 movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ | |
316 | |
317 punpcklbw_r2r(mm5, mm0); /* low - 0A0R0G0B -> mm0(src1) */ | |
318 punpckhbw_r2r(mm5, mm1); /* high - 0A0R0G0B -> mm1(src2) */ | |
319 punpcklbw_r2r(mm5, mm2); /* low - 0A0R0G0B -> mm2(dst1) */ | |
320 punpckhbw_r2r(mm5, mm6); /* high - 0A0R0G0B -> mm6(dst2) */ | |
321 | |
322 psubw_r2r(mm2, mm0);/* src1 - dst1 -> mm0 */ | |
323 pmullw_r2r(mm4, mm0); /* mm0 * alpha -> mm0 */ | |
324 psrlw_i2r(8, mm0); /* mm0 >> 8 -> mm1 */ | |
325 paddb_r2r(mm0, mm2); /* mm0 + mm2(dst1) -> mm2 */ | |
326 | |
327 psubw_r2r(mm6, mm1);/* src2 - dst2 -> mm1 */ | |
328 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
329 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | |
330 paddb_r2r(mm1, mm6); /* mm1 + mm6(dst2) -> mm6 */ | |
331 | |
332 packuswb_r2r(mm6, mm2); /* ARGBARGB -> mm2 */ | |
333 por_r2r(mm7, mm2); /* mm7(dst alpha) | mm2 -> mm2 */ | |
334 | |
335 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ | |
336 | |
337 srcp += 2; | |
338 dstp += 2; | |
339 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
340 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
341 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
342 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
343 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
344 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
345 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
346 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
347 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
348 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
349 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
350 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
351 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
352 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
353 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
354 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
355 int dstskip = info->d_skip >> 2; |
1542 | 356 SDL_PixelFormat* sf = info->src; |
357 Uint32 amask = sf->Amask; | |
358 | |
359 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ | |
360 /* form multiplication mask */ | |
361 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ | |
362 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ | |
363 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ | |
364 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ | |
365 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ | |
366 /* form channel masks */ | |
367 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ | |
368 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ | |
369 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ | |
370 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ | |
371 /* get alpha channel shift */ | |
372 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */ | |
373 | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
374 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
375 DUFFS_LOOP4({ |
1542 | 376 Uint32 alpha = *srcp & amask; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
377 /* FIXME: Here we special-case opaque alpha since the |
1542 | 378 compositioning used (>>8 instead of /255) doesn't handle |
379 it correctly. Also special-case alpha=0 for speed? | |
380 Benchmark this! */ | |
381 if(alpha == 0) { | |
382 /* do nothing */ | |
383 } else if(alpha == amask) { | |
384 /* opaque alpha -- copy RGB, keep dst alpha */ | |
385 /* using MMX here to free up regular registers for other things */ | |
386 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
387 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
388 pand_r2r(mm0, mm1); /* src & chanmask -> mm1 */ | |
389 pand_r2r(mm3, mm2); /* dst & ~chanmask -> mm2 */ | |
390 por_r2r(mm1, mm2); /* src | dst -> mm2 */ | |
391 movd_r2m(mm2, (*dstp)); /* mm2 -> dst */ | |
392 } else { | |
393 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
394 punpcklbw_r2r(mm6, mm1); /* 0A0R0G0B -> mm1 */ | |
395 | |
396 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
397 punpcklbw_r2r(mm6, mm2); /* 0A0R0G0B -> mm2 */ | |
398 | |
399 __asm__ __volatile__ ( | |
400 "movd %0, %%mm4" | |
401 : : "r" (alpha) ); /* 0000A000 -> mm4 */ | |
402 psrld_r2r(mm5, mm4); /* mm4 >> mm5 -> mm4 (0000000A) */ | |
403 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | |
404 punpcklwd_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | |
405 pand_r2r(mm7, mm4); /* 000A0A0A -> mm4, preserve dst alpha on add */ | |
406 | |
407 /* blend */ | |
408 psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ | |
409 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
410 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1(000R0G0B) */ | |
411 paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | |
412 | |
413 packuswb_r2r(mm6, mm2); /* 0000ARGB -> mm2 */ | |
414 movd_r2m(mm2, *dstp);/* mm2 -> dst */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
415 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
416 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
417 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
418 }, width); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
419 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
420 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
421 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
422 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
423 } |
1542 | 424 /* End GCC_ASMBLIT */ |
425 | |
426 #elif MSVC_ASMBLIT | |
427 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | |
428 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) | |
429 { | |
430 int width = info->d_width; | |
431 int height = info->d_height; | |
432 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
433 int srcskip = info->s_skip >> 2; | |
434 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
435 int dstskip = info->d_skip >> 2; | |
436 Uint32 dalpha = info->dst->Amask; | |
437 | |
438 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; | |
439 | |
440 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ | |
441 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ | |
442 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ | |
443 | |
444 while (height--) { | |
445 int n = width; | |
446 if ( n & 1 ) { | |
447 Uint32 s = *srcp++; | |
448 Uint32 d = *dstp; | |
449 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | |
450 + (s & d & 0x00010101)) | dalpha; | |
451 n--; | |
452 } | |
453 | |
454 for (n >>= 1; n > 0; --n) { | |
455 dst1 = *(__m64*)dstp; /* 2 x dst -> dst1(ARGBARGB) */ | |
456 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | |
457 | |
458 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB) */ | |
459 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | |
460 | |
461 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ | |
462 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ | |
463 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ | |
464 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ | |
465 | |
466 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ | |
467 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ | |
468 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ | |
469 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ | |
470 | |
471 *(__m64*)dstp = dst1; /* dst1 -> 2 x dst pixels */ | |
472 dstp += 2; | |
473 srcp += 2; | |
474 } | |
475 | |
476 srcp += srcskip; | |
477 dstp += dstskip; | |
478 } | |
479 _mm_empty(); | |
480 } | |
481 | |
482 /* fast RGB888->(A)RGB888 blending with surface alpha */ | |
483 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) | |
484 { | |
485 SDL_PixelFormat* df = info->dst; | |
486 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; | |
487 unsigned alpha = info->src->alpha; | |
488 | |
489 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | |
490 /* only call a128 version when R,G,B occupy lower bits */ | |
491 BlitRGBtoRGBSurfaceAlpha128MMX(info); | |
492 } else { | |
493 int width = info->d_width; | |
494 int height = info->d_height; | |
495 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
496 int srcskip = info->s_skip >> 2; | |
497 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
498 int dstskip = info->d_skip >> 2; | |
499 Uint32 dalpha = df->Amask; | |
500 Uint32 amult; | |
501 | |
502 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; | |
503 | |
504 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
505 /* form the alpha mult */ | |
506 amult = alpha | (alpha << 8); | |
507 amult = amult | (amult << 16); | |
508 chanmask = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); | |
509 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ | |
510 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ | |
511 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ | |
512 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ | |
513 | |
514 while (height--) { | |
515 int n = width; | |
516 if (n & 1) { | |
517 /* One Pixel Blend */ | |
518 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB)*/ | |
519 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ | |
520 | |
521 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
522 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
523 | |
524 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ | |
525 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
526 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ | |
527 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ | |
528 | |
529 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
530 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ | |
531 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
532 | |
533 ++srcp; | |
534 ++dstp; | |
535 | |
536 n--; | |
537 } | |
538 | |
539 for (n >>= 1; n > 0; --n) { | |
540 /* Two Pixels Blend */ | |
541 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB)*/ | |
542 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | |
543 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ | |
544 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ | |
545 | |
546 dst1 = *(__m64*)dstp;/* 2 x dst -> dst1(ARGBARGB) */ | |
547 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | |
548 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ | |
549 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ | |
550 | |
551 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ | |
552 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ | |
553 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ | |
554 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ | |
555 | |
556 src2 = _mm_sub_pi16(src2, dst2);/* src2 - dst2 -> src2 */ | |
557 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
558 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ | |
559 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ | |
560 | |
561 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ | |
562 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ | |
563 | |
564 *(__m64*)dstp = dst1; /* dst1 -> 2 x pixel */ | |
565 | |
566 srcp += 2; | |
567 dstp += 2; | |
568 } | |
569 srcp += srcskip; | |
570 dstp += dstskip; | |
571 } | |
572 _mm_empty(); | |
573 } | |
574 } | |
575 | |
576 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | |
577 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) | |
578 { | |
579 int width = info->d_width; | |
580 int height = info->d_height; | |
581 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
582 int srcskip = info->s_skip >> 2; | |
583 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
584 int dstskip = info->d_skip >> 2; | |
585 SDL_PixelFormat* sf = info->src; | |
586 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | |
587 Uint32 amask = sf->Amask; | |
588 Uint32 ashift = sf->Ashift; | |
589 Uint64 multmask; | |
590 | |
591 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | |
592 | |
593 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
594 multmask = ~(0xFFFFi64 << (ashift * 2)); | |
595 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ | |
596 | |
597 while(height--) { | |
598 DUFFS_LOOP4({ | |
599 Uint32 alpha = *srcp & amask; | |
600 if (alpha == 0) { | |
601 /* do nothing */ | |
602 } else if (alpha == amask) { | |
603 /* opaque alpha -- copy RGB, keep dst alpha */ | |
604 *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); | |
605 } else { | |
606 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ | |
607 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ | |
608 | |
609 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
610 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
611 | |
612 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ | |
613 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ | |
614 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
615 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
616 mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ | |
617 | |
618 /* blend */ | |
619 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ | |
620 src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src1 - dst1) * alpha -> src1 */ | |
621 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ | |
622 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1 -> dst1(0A0R0G0B) */ | |
623 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
624 | |
625 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
626 } | |
627 ++srcp; | |
628 ++dstp; | |
629 }, width); | |
630 srcp += srcskip; | |
631 dstp += dstskip; | |
632 } | |
633 _mm_empty(); | |
634 } | |
635 /* End MSVC_ASMBLIT */ | |
636 | |
637 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
638 |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
639 #if SDL_ALTIVEC_BLITTERS |
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
640 #if HAVE_ALTIVEC_H |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
641 #include <altivec.h> |
1175
867f521591e5
Fixed Altivec support on Mac OS X.
Ryan C. Gordon <icculus@icculus.org>
parents:
1162
diff
changeset
|
642 #endif |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
643 #include <assert.h> |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
644 |
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
645 #if (defined(__MACOSX__) && (__GNUC__ < 4)) |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
646 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
647 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
648 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
649 (vector unsigned short) ( a,b,c,d,e,f,g,h ) |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
650 #else |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
651 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
652 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
653 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
654 (vector unsigned short) { a,b,c,d,e,f,g,h } |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
655 #endif |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
656 |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
657 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
658 #define VECPRINT(msg, v) do { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
659 vector unsigned int tmpvec = (vector unsigned int)(v); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
660 unsigned int *vp = (unsigned int *)&tmpvec; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
661 printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
662 } while (0) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
663 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
664 /* the permuation vector that takes the high bytes out of all the appropriate shorts |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
665 (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
666 0x00, 0x10, 0x02, 0x12, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
667 0x04, 0x14, 0x06, 0x16, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
668 0x08, 0x18, 0x0A, 0x1A, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
669 0x0C, 0x1C, 0x0E, 0x1E ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
670 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
671 #define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
672 #define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
673 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
674 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
675 ? vec_lvsl(0, src) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
676 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
677 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
678 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
679 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
680 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
681 vector unsigned short vtemp1 = vec_mule(vs, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
682 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
683 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
684 /* valpha2 is 255-alpha */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
685 vector unsigned char valpha2 = vec_nor(valpha, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
686 /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
687 vector unsigned short vtemp3 = vec_mule(vd, valpha2); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
688 /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
689 vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
690 /* add source and dest */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
691 vtemp1 = vec_add(vtemp1, vtemp3); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
692 vtemp2 = vec_add(vtemp2, vtemp4); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
693 /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
694 vtemp1 = vec_add(vtemp1, v1_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
695 vtemp3 = vec_sr(vtemp1, v8_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
696 vtemp1 = vec_add(vtemp1, vtemp3); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
697 /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
698 vtemp2 = vec_add(vtemp2, v1_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
699 vtemp4 = vec_sr(vtemp2, v8_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
700 vtemp2 = vec_add(vtemp2, vtemp4); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
701 /* (>>8) and get ARGBARGBARGBARGB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
702 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
703 } while (0) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
704 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
705 /* Calculate the permute vector used for 32->32 swizzling */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
706 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
707 const SDL_PixelFormat *dstfmt) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
708 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
709 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
710 * We have to assume that the bits that aren't used by other |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
711 * colors is alpha, and it's one complete byte, since some formats |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
712 * leave alpha with a zero mask, but we should still swizzle the bits. |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
713 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
714 /* ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
715 const static struct SDL_PixelFormat default_pixel_format = { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
716 NULL, 0, 0, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
717 0, 0, 0, 0, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
718 16, 8, 0, 24, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
719 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
720 0, 0}; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
721 if (!srcfmt) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
722 srcfmt = &default_pixel_format; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
723 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
724 if (!dstfmt) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
725 dstfmt = &default_pixel_format; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
726 } |
1487
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
727 const vector unsigned char plus = VECUINT8_LITERAL |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
728 ( 0x00, 0x00, 0x00, 0x00, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
729 0x04, 0x04, 0x04, 0x04, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
730 0x08, 0x08, 0x08, 0x08, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
731 0x0C, 0x0C, 0x0C, 0x0C ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
732 vector unsigned char vswiz; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
733 vector unsigned int srcvec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
734 #define RESHIFT(X) (3 - ((X) >> 3)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
735 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
736 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
737 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
738 Uint32 amask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
739 /* Use zero for alpha if either surface doesn't have alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
740 if (dstfmt->Amask) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
741 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
742 } else { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
743 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
744 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
745 #undef RESHIFT |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
746 ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
747 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
748 return(vswiz); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
749 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
750 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
751 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
752 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
753 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
754 Uint8 *src = (Uint8 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
755 int srcskip = info->s_skip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
756 Uint8 *dst = (Uint8 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
757 int dstskip = info->d_skip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
758 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
759 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
760 vector unsigned char v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
761 vector unsigned short v8_16 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
762 vector unsigned short v1_16 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
763 vector unsigned short v2_16 = vec_splat_u16(2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
764 vector unsigned short v3_16 = vec_splat_u16(3); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
765 vector unsigned int v8_32 = vec_splat_u32(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
766 vector unsigned int v16_32 = vec_add(v8_32, v8_32); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
767 vector unsigned short v3f = VECUINT16_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
768 0x003f, 0x003f, 0x003f, 0x003f, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
769 0x003f, 0x003f, 0x003f, 0x003f); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
770 vector unsigned short vfc = VECUINT16_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
771 0x00fc, 0x00fc, 0x00fc, 0x00fc, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
772 0x00fc, 0x00fc, 0x00fc, 0x00fc); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
773 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
774 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
775 0x10 - 0x1f is the alpha |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
776 0x00 - 0x0e evens are the red |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
777 0x01 - 0x0f odds are zero |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
778 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
779 vector unsigned char vredalpha1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
780 0x10, 0x00, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
781 0x10, 0x02, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
782 0x10, 0x04, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
783 0x10, 0x06, 0x01, 0x01 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
784 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
785 vector unsigned char vredalpha2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
786 vec_add((vector unsigned int)vredalpha1, vec_sl(v8_32, v16_32)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
787 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
788 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
789 0x00 - 0x0f is ARxx ARxx ARxx ARxx |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
790 0x11 - 0x0f odds are blue |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
791 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
792 vector unsigned char vblue1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
793 0x00, 0x01, 0x02, 0x11, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
794 0x04, 0x05, 0x06, 0x13, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
795 0x08, 0x09, 0x0a, 0x15, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
796 0x0c, 0x0d, 0x0e, 0x17 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
797 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
798 vector unsigned char vblue2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
799 vec_add((vector unsigned int)vblue1, v8_32) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
800 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
801 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
802 0x00 - 0x0f is ARxB ARxB ARxB ARxB |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
803 0x10 - 0x0e evens are green |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
804 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
805 vector unsigned char vgreen1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
806 0x00, 0x01, 0x10, 0x03, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
807 0x04, 0x05, 0x12, 0x07, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
808 0x08, 0x09, 0x14, 0x0b, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
809 0x0c, 0x0d, 0x16, 0x0f |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
810 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
811 vector unsigned char vgreen2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
812 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
813 ); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
814 vector unsigned char vgmerge = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
815 0x00, 0x02, 0x00, 0x06, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
816 0x00, 0x0a, 0x00, 0x0e, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
817 0x00, 0x12, 0x00, 0x16, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
818 0x00, 0x1a, 0x00, 0x1e); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
819 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
820 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
821 vector unsigned char valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
822 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
823 vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
824 vf800 = vec_sl(vf800, vec_splat_u16(8)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
825 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
826 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
827 int extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
828 vector unsigned char valigner; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
829 vector unsigned char vsrc; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
830 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
831 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
832 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
833 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
834 while (condition) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
835 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
836 unsigned sR, sG, sB, dR, dG, dB, sA; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
837 DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
838 if(sA) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
839 unsigned short dstpixel = *((unsigned short *)dst); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
840 dR = (dstpixel >> 8) & 0xf8; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
841 dG = (dstpixel >> 3) & 0xfc; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
842 dB = (dstpixel << 3) & 0xf8; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
843 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
844 *((unsigned short *)dst) = ( \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
845 ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
846 ); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
847 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
848 src += 4; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
849 dst += 2; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
850 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
851 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
852 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
853 extrawidth = (width % 8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
854 valigner = VEC_ALIGNER(src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
855 vsrc = (vector unsigned char)vec_ld(0, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
856 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
857 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
858 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
859 vector unsigned char vsrc1, vsrc2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
860 vector unsigned char vdst1, vdst2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
861 vector unsigned short vR, vG, vB; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
862 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
863 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
864 /* Load 8 pixels from src as ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
865 voverflow = (vector unsigned char)vec_ld(15, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
866 vsrc = vec_perm(vsrc, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
867 vsrc1 = vec_perm(vsrc, vsrc, vpermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
868 src += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
869 vsrc = (vector unsigned char)vec_ld(15, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
870 voverflow = vec_perm(voverflow, vsrc, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
871 vsrc2 = vec_perm(voverflow, voverflow, vpermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
872 src += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
873 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
874 /* Load 8 pixels from dst as XRGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
875 voverflow = vec_ld(0, dst); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
876 vR = vec_and((vector unsigned short)voverflow, vf800); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
877 vB = vec_sl((vector unsigned short)voverflow, v3_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
878 vG = vec_sl(vB, v2_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
879 vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
880 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
881 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
882 vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
883 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
884 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
885 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
886 /* Alpha blend 8 pixels as ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
887 valpha = vec_perm(vsrc1, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
888 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, v8_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
889 valpha = vec_perm(vsrc2, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
890 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, v8_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
891 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
892 /* Convert 8 pixels to 565 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
893 vpixel = (vector unsigned short)vec_packpx((vector unsigned int)vdst1, (vector unsigned int)vdst2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
894 vgpixel = (vector unsigned short)vec_perm(vdst1, vdst2, vgmerge); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
895 vgpixel = vec_and(vgpixel, vfc); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
896 vgpixel = vec_sl(vgpixel, v3_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
897 vrpixel = vec_sl(vpixel, v1_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
898 vrpixel = vec_and(vrpixel, vf800); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
899 vbpixel = vec_and(vpixel, v3f); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
900 vdst1 = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
901 vdst1 = vec_or(vdst1, (vector unsigned char)vbpixel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
902 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
903 /* Store 8 pixels */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
904 vec_st(vdst1, 0, dst); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
905 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
906 width -= 8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
907 dst += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
908 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
909 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
910 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
911 src += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
912 dst += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
913 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
914 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
915 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
916 static void Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
917 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
918 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
919 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
920 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
921 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
922 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
923 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
924 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
925 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
926 unsigned sA = srcfmt->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
927 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
928 Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
929 Uint32 ckey = info->src->colorkey; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
930 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
931 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
932 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
933 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
934 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
935 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
936 vector unsigned char vbits; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
937 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
938 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
939 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
940 vector unsigned int vckey; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
941 vector unsigned int vrgbmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
942 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
943 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
944 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
945 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
946 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
947 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
948 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
949 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
950 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
951 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
952 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
953 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
954 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
955 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
956 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
957 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
958 vbits = (vector unsigned char)vec_splat_s8(-1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
959 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
960 ckey &= rgbmask; |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
961 ((unsigned int *)(char*)&vckey)[0] = ckey; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
962 vckey = vec_splat(vckey, 0); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
963 ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
964 vrgbmask = vec_splat(vrgbmask, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
965 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
966 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
967 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
968 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
969 while (condition) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
970 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
971 unsigned sR, sG, sB, dR, dG, dB; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
972 RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
973 if(sA && Pixel != ckey) { \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
974 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
975 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
976 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
977 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
978 } \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
979 dstp++; \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
980 srcp++; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
981 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
982 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
983 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
984 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
985 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
986 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
987 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
988 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
989 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
990 vector unsigned char vsel; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
991 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
992 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
993 vector unsigned char vd_orig; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
994 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
995 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
996 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
997 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
998 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
999 /* vsel is set for items that match the key */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1000 vsel = (vector unsigned char)vec_and((vector unsigned int)vs, vrgbmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1001 vsel = (vector unsigned char)vec_cmpeq((vector unsigned int)vsel, vckey); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1002 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1003 /* permute to source format */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1004 vs = vec_perm(vs, valpha, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1005 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1006 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1007 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1008 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1009 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1010 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1011 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1012 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1013 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1014 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1015 /* mask out color key */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1016 vd = vec_sel(vd, vd_orig, vsel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1017 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1018 /* permute to dest format */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1019 vd = vec_perm(vd, vbits, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1020 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1021 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1022 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1023 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1024 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1025 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1026 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1027 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1028 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1029 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1030 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1031 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1032 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1033 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1034 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1035 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1036 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1037 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1038 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1039 static void Blit32to32PixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1040 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1041 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1042 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1043 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1044 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1045 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1046 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1047 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1048 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1049 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1050 vector unsigned char valphaPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1051 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1052 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1053 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1054 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1055 vector unsigned char vpixelmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1056 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1057 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1058 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1059 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1060 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1061 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1062 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1063 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1064 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1065 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1066 vpixelmask = vec_nor(valphamask, v0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1067 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1068 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1069 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1070 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1071 while ( height-- ) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1072 width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1073 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1074 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1075 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1076 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1077 if(sA) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1078 DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1079 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1080 ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1081 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1082 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1083 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1084 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1085 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1086 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1087 if (width > 0) { |
1487
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
1088 /* vsrcPermute */ |
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
1089 /* vdstPermute */ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1090 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1091 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1092 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1093 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1094 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1095 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1096 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1097 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1098 vector unsigned char vdstalpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1099 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1100 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1101 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1102 vs = vec_perm(vs, v0, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1103 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1104 valpha = vec_perm(vs, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1105 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1106 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1107 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1108 vd = vec_perm(vd, v0, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1109 vdstalpha = vec_and(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1110 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1111 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1112 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1113 /* set the alpha to the dest alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1114 vd = vec_and(vd, vpixelmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1115 vd = vec_or(vd, vdstalpha); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1116 vd = vec_perm(vd, v0, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1117 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1118 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1119 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1120 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1121 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1122 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1123 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1124 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1125 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1126 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1127 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1128 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1129 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1130 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1131 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1132 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1133 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1134 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1135 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1136 static void BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1137 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1138 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1139 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1140 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1141 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1142 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1143 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1144 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1145 vector unsigned char valphaPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1146 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1147 vector unsigned char vpixelmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1148 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1149 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1150 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1151 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1152 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1153 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1154 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1155 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1156 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1157 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1158 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1159 vpixelmask = vec_nor(valphamask, v0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1160 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1161 width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1162 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1163 while ((condition)) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1164 Uint32 dalpha; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1165 Uint32 d; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1166 Uint32 s1; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1167 Uint32 d1; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1168 Uint32 s = *srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1169 Uint32 alpha = s >> 24; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1170 if(alpha) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1171 if(alpha == SDL_ALPHA_OPAQUE) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1172 *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1173 } else { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1174 d = *dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1175 dalpha = d & 0xff000000; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1176 s1 = s & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1177 d1 = d & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1178 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1179 s &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1180 d &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1181 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1182 *dstp = d1 | d | dalpha; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1183 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1184 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1185 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1186 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1187 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1188 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1189 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1190 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1191 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1192 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1193 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1194 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1195 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1196 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1197 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1198 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1199 vector unsigned char vdstalpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1200 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1201 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1202 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1203 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1204 valpha = vec_perm(vs, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1205 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1206 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1207 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1208 vdstalpha = vec_and(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1209 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1210 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1211 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1212 /* set the alpha to the dest alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1213 vd = vec_and(vd, vpixelmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1214 vd = vec_or(vd, vdstalpha); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1215 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1216 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1217 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1218 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1219 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1220 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1221 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1222 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1223 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1224 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1225 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1226 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1227 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1228 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1229 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1230 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1231 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1232 static void Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1233 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1234 /* XXX : 6 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1235 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1236 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1237 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1238 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1239 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1240 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1241 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1242 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1243 unsigned sA = srcfmt->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1244 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1245 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1246 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1247 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1248 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1249 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1250 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1251 vector unsigned char vbits; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1252 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1253 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1254 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1255 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1256 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1257 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1258 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1259 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1260 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1261 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1262 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1263 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1264 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1265 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1266 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1267 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1268 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1269 vbits = (vector unsigned char)vec_splat_s8(-1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1270 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1271 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1272 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1273 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1274 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1275 unsigned sR, sG, sB, dR, dG, dB; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1276 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1277 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1278 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1279 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1280 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1281 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1282 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1283 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1284 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1285 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1286 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1287 vector unsigned char valigner = vec_lvsl(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1288 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1289 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1290 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1291 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1292 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1293 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1294 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1295 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1296 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1297 vs = vec_perm(vs, valpha, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1298 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1299 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1300 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1301 vd = vec_perm(vd, vd, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1302 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1303 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1304 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1305 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1306 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1307 vd = vec_perm(vd, vbits, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1308 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1309 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1310 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1311 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1312 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1313 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1314 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1315 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1316 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1317 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1318 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1319 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1320 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1321 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1322 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1323 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1324 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1325 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1326 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1327 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1328 /* fast RGB888->(A)RGB888 blending */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1329 static void BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1330 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1331 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1332 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1333 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1334 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1335 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1336 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1337 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1338 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1339 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1340 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1341 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1342 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1343 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1344 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1345 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1346 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1347 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1348 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1349 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1350 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1351 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1352 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1353 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1354 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1355 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1356 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1357 Uint32 s = *srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1358 Uint32 d = *dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1359 Uint32 s1 = s & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1360 Uint32 d1 = d & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1361 d1 = (d1 + ((s1 - d1) * alpha >> 8)) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1362 & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1363 s &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1364 d &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1365 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1366 *dstp = d1 | d | 0xff000000; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1367 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1368 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1369 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1370 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1371 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1372 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1373 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1374 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1375 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1376 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1377 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1378 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1379 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1380 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1381 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1382 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1383 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1384 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1385 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1386 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1387 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1388 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1389 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1390 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1391 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1392 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1393 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1394 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1395 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1396 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1397 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1398 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1399 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1400 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1401 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1402 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1403 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1404 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1405 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1406 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1407 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1408 } |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
1409 #endif /* SDL_ALTIVEC_BLITTERS */ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1410 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1411 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1412 static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) |
0 | 1413 { |
1414 int width = info->d_width; | |
1415 int height = info->d_height; | |
1416 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1417 int srcskip = info->s_skip >> 2; | |
1418 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1419 int dstskip = info->d_skip >> 2; | |
1420 | |
1421 while(height--) { | |
1422 DUFFS_LOOP4({ | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1423 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1424 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1425 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1426 + (s & d & 0x00010101)) | 0xff000000; |
0 | 1427 }, width); |
1428 srcp += srcskip; | |
1429 dstp += dstskip; | |
1430 } | |
1431 } | |
1432 | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1433 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1434 static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1435 { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1436 unsigned alpha = info->src->alpha; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1437 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1438 BlitRGBtoRGBSurfaceAlpha128(info); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1439 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1440 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1441 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1442 Uint32 *srcp = (Uint32 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1443 int srcskip = info->s_skip >> 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1444 Uint32 *dstp = (Uint32 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1445 int dstskip = info->d_skip >> 2; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1446 Uint32 s; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1447 Uint32 d; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1448 Uint32 s1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1449 Uint32 d1; |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1450 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1451 while(height--) { |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1452 DUFFS_LOOP_DOUBLE2({ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1453 /* One Pixel Blend */ |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1454 s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1455 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1456 s1 = s & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1457 d1 = d & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1458 d1 = (d1 + ((s1 - d1) * alpha >> 8)) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1459 & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1460 s &= 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1461 d &= 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1462 d = (d + ((s - d) * alpha >> 8)) & 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1463 *dstp = d1 | d | 0xff000000; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1464 ++srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1465 ++dstp; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1466 },{ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1467 /* Two Pixels Blend */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1468 s = *srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1469 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1470 s1 = s & 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1471 d1 = d & 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1472 d1 += (s1 - d1) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1473 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1474 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1475 s = ((s & 0xff00) >> 8) | |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1476 ((srcp[1] & 0xff00) << 8); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1477 d = ((d & 0xff00) >> 8) | |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1478 ((dstp[1] & 0xff00) << 8); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1479 d += (s - d) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1480 d &= 0x00ff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1481 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1482 *dstp++ = d1 | ((d << 8) & 0xff00) | 0xff000000; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1483 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1484 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1485 s1 = *srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1486 d1 = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1487 s1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1488 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1489 d1 += (s1 - d1) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1490 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1491 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1492 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1493 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1494 ++dstp; |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1495 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1496 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1497 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1498 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1499 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1500 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1501 |
0 | 1502 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
1503 static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info) | |
1504 { | |
1505 int width = info->d_width; | |
1506 int height = info->d_height; | |
1507 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1508 int srcskip = info->s_skip >> 2; | |
1509 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1510 int dstskip = info->d_skip >> 2; | |
1511 | |
1512 while(height--) { | |
1513 DUFFS_LOOP4({ | |
1514 Uint32 dalpha; | |
1515 Uint32 d; | |
1516 Uint32 s1; | |
1517 Uint32 d1; | |
1518 Uint32 s = *srcp; | |
1519 Uint32 alpha = s >> 24; | |
1520 /* FIXME: Here we special-case opaque alpha since the | |
1521 compositioning used (>>8 instead of /255) doesn't handle | |
1522 it correctly. Also special-case alpha=0 for speed? | |
1523 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1524 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1525 if(alpha == SDL_ALPHA_OPAQUE) { |
0 | 1526 *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1527 } else { |
0 | 1528 /* |
1529 * take out the middle component (green), and process | |
1530 * the other two in parallel. One multiply less. | |
1531 */ | |
1532 d = *dstp; | |
1533 dalpha = d & 0xff000000; | |
1534 s1 = s & 0xff00ff; | |
1535 d1 = d & 0xff00ff; | |
1536 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; | |
1537 s &= 0xff00; | |
1538 d &= 0xff00; | |
1539 d = (d + ((s - d) * alpha >> 8)) & 0xff00; | |
1540 *dstp = d1 | d | dalpha; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1541 } |
0 | 1542 } |
1543 ++srcp; | |
1544 ++dstp; | |
1545 }, width); | |
1546 srcp += srcskip; | |
1547 dstp += dstskip; | |
1548 } | |
1549 } | |
1550 | |
1542 | 1551 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1552 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1553 inline static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1554 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1555 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1556 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1557 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1558 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1559 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1560 int dstskip = info->d_skip >> 2; |
1542 | 1561 SDL_PixelFormat* sf = info->src; |
1562 Uint32 amask = sf->Amask; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1563 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1564 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1565 /* make mm6 all zeros. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1566 "pxor %%mm6, %%mm6\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1567 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1568 /* Make a mask to preserve the alpha. */ |
1542 | 1569 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ |
1570 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ | |
1571 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ | |
1572 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ | |
1573 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1574 |
1542 | 1575 /* form channel masks */ |
1576 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ | |
1577 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ | |
1578 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ | |
1579 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ | |
1580 | |
1581 /* get alpha channel shift */ | |
1582 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ | |
1583 | |
1584 : /* nothing */ : "m" (sf->Amask), "m" (sf->Ashift) ); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1585 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1586 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1587 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1588 DUFFS_LOOP4({ |
1542 | 1589 Uint32 alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1590 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1591 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1592 "prefetch 64(%0)\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1593 "prefetch 64(%1)\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1594 : : "r" (srcp), "r" (dstp) ); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1595 |
1542 | 1596 alpha = *srcp & amask; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1597 /* FIXME: Here we special-case opaque alpha since the |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1598 compositioning used (>>8 instead of /255) doesn't handle |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1599 it correctly. Also special-case alpha=0 for speed? |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1600 Benchmark this! */ |
1542 | 1601 if(alpha == 0) { |
1602 /* do nothing */ | |
1603 } | |
1604 else if(alpha == amask) { | |
1605 /* opaque alpha -- copy RGB, keep dst alpha */ | |
1606 /* using MMX here to free up regular registers for other things */ | |
1607 __asm__ ( | |
1608 "movd (%0), %%mm0\n\t" /* src(ARGB) -> mm0 (0000ARGB)*/ | |
1609 "movd (%1), %%mm1\n\t" /* dst(ARGB) -> mm1 (0000ARGB)*/ | |
1610 "pand %%mm4, %%mm0\n\t" /* src & chanmask -> mm0 */ | |
1611 "pand %%mm3, %%mm1\n\t" /* dst & ~chanmask -> mm2 */ | |
1612 "por %%mm0, %%mm1\n\t" /* src | dst -> mm1 */ | |
1613 "movd %%mm1, (%1) \n\t" /* mm1 -> dst */ | |
1614 | |
1615 : : "r" (srcp), "r" (dstp) ); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1616 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1617 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1618 else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1619 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1620 /* load in the source, and dst. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1621 "movd (%0), %%mm0\n" /* mm0(s) = 0 0 0 0 | As Rs Gs Bs */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1622 "movd (%1), %%mm1\n" /* mm1(d) = 0 0 0 0 | Ad Rd Gd Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1623 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1624 /* Move the src alpha into mm2 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1625 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1626 /* if supporting pshufw */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1627 /*"pshufw $0x55, %%mm0, %%mm2\n" */ /* mm2 = 0 As 0 As | 0 As 0 As */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1628 /*"psrlw $8, %%mm2\n" */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1629 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1630 /* else: */ |
1542 | 1631 "movd %2, %%mm2\n" |
1632 "psrld %%mm5, %%mm2\n" /* mm2 = 0 0 0 0 | 0 0 0 As */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1633 "punpcklwd %%mm2, %%mm2\n" /* mm2 = 0 0 0 0 | 0 As 0 As */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1634 "punpckldq %%mm2, %%mm2\n" /* mm2 = 0 As 0 As | 0 As 0 As */ |
1542 | 1635 "pand %%mm7, %%mm2\n" /* to preserve dest alpha */ |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1636 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1637 /* move the colors into words. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1638 "punpcklbw %%mm6, %%mm0\n" /* mm0 = 0 As 0 Rs | 0 Gs 0 Bs */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1639 "punpcklbw %%mm6, %%mm1\n" /* mm0 = 0 Ad 0 Rd | 0 Gd 0 Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1640 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1641 /* src - dst */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1642 "psubw %%mm1, %%mm0\n" /* mm0 = As-Ad Rs-Rd | Gs-Gd Bs-Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1643 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1644 /* A * (src-dst) */ |
1542 | 1645 "pmullw %%mm2, %%mm0\n" /* mm0 = 0*As-d As*Rs-d | As*Gs-d As*Bs-d */ |
1646 "psrlw $8, %%mm0\n" /* mm0 = 0>>8 Rc>>8 | Gc>>8 Bc>>8 */ | |
1647 "paddb %%mm1, %%mm0\n" /* mm0 = 0+Ad Rc+Rd | Gc+Gd Bc+Bd */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1648 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1649 "packuswb %%mm0, %%mm0\n" /* mm0 = | Ac Rc Gc Bc */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1650 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1651 "movd %%mm0, (%1)\n" /* result in mm0 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1652 |
1542 | 1653 : : "r" (srcp), "r" (dstp), "r" (alpha) ); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1654 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1655 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1656 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1657 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1658 }, width); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1659 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1660 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1661 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1662 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1663 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1664 "emms\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1665 : ); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1666 } |
1542 | 1667 /* End GCC_ASMBLIT*/ |
1668 | |
1669 #elif MSVC_ASMBLIT | |
1670 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | |
1671 static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) | |
1672 { | |
1673 int width = info->d_width; | |
1674 int height = info->d_height; | |
1675 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1676 int srcskip = info->s_skip >> 2; | |
1677 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1678 int dstskip = info->d_skip >> 2; | |
1679 SDL_PixelFormat* sf = info->src; | |
1680 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | |
1681 Uint32 amask = sf->Amask; | |
1682 Uint32 ashift = sf->Ashift; | |
1683 Uint64 multmask; | |
1684 | |
1685 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | |
1686 | |
1687 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
1688 multmask = ~(0xFFFFi64 << (ashift * 2)); | |
1689 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ | |
1690 | |
1691 while(height--) { | |
1692 DUFFS_LOOP4({ | |
1693 Uint32 alpha; | |
1694 | |
1695 _m_prefetch(srcp + 16); | |
1696 _m_prefetch(dstp + 16); | |
1697 | |
1698 alpha = *srcp & amask; | |
1699 if (alpha == 0) { | |
1700 /* do nothing */ | |
1701 } else if (alpha == amask) { | |
1702 /* copy RGB, keep dst alpha */ | |
1703 *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); | |
1704 } else { | |
1705 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ | |
1706 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ | |
1707 | |
1708 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
1709 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
1710 | |
1711 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ | |
1712 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ | |
1713 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
1714 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
1715 mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ | |
1716 | |
1717 /* blend */ | |
1718 src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */ | |
1719 src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */ | |
1720 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ | |
1721 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */ | |
1722 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
1723 | |
1724 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
1725 } | |
1726 ++srcp; | |
1727 ++dstp; | |
1728 }, width); | |
1729 srcp += srcskip; | |
1730 dstp += dstskip; | |
1731 } | |
1732 _mm_empty(); | |
1733 } | |
1734 /* End MSVC_ASMBLIT */ | |
1735 | |
1736 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1737 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1738 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1739 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1740 /* blend a single 16 bit pixel at 50% */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1741 #define BLEND16_50(d, s, mask) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1742 ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff))) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1743 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1744 /* blend two 16 bit pixels at 50% */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1745 #define BLEND2x16_50(d, s, mask) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1746 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1747 + (s & d & (~(mask | mask << 16)))) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1748 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1749 static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask) |
0 | 1750 { |
1751 int width = info->d_width; | |
1752 int height = info->d_height; | |
1753 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
1754 int srcskip = info->s_skip >> 1; | |
1755 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
1756 int dstskip = info->d_skip >> 1; | |
1757 | |
1758 while(height--) { | |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1759 if(((uintptr_t)srcp ^ (uintptr_t)dstp) & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1760 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1761 * Source and destination not aligned, pipeline it. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1762 * This is mostly a win for big blits but no loss for |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1763 * small ones |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1764 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1765 Uint32 prev_sw; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1766 int w = width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1767 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1768 /* handle odd destination */ |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1769 if((uintptr_t)dstp & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1770 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1771 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1772 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1773 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1774 w--; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1775 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1776 srcp++; /* srcp is now 32-bit aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1777 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1778 /* bootstrap pipeline with first halfword */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1779 prev_sw = ((Uint32 *)srcp)[-1]; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1780 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1781 while(w > 1) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1782 Uint32 sw, dw, s; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1783 sw = *(Uint32 *)srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1784 dw = *(Uint32 *)dstp; |
1443
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1785 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1786 s = (prev_sw << 16) + (sw >> 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1787 #else |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1788 s = (prev_sw >> 16) + (sw << 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1789 #endif |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1790 prev_sw = sw; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1791 *(Uint32 *)dstp = BLEND2x16_50(dw, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1792 dstp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1793 srcp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1794 w -= 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1795 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1796 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1797 /* final pixel if any */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1798 if(w) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1799 Uint16 d = *dstp, s; |
1443
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1800 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1801 s = (Uint16)prev_sw; |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1802 #else |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1803 s = (Uint16)(prev_sw >> 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1804 #endif |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1805 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1806 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1807 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1808 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1809 srcp += srcskip - 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1810 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1811 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1812 /* source and destination are aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1813 int w = width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1814 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1815 /* first odd pixel? */ |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1816 if((uintptr_t)srcp & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1817 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1818 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1819 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1820 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1821 w--; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1822 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1823 /* srcp and dstp are now 32-bit aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1824 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1825 while(w > 1) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1826 Uint32 sw = *(Uint32 *)srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1827 Uint32 dw = *(Uint32 *)dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1828 *(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1829 srcp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1830 dstp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1831 w -= 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1832 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1833 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1834 /* last odd pixel? */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1835 if(w) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1836 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1837 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1838 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1839 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1840 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1841 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1842 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1843 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1844 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1845 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1846 |
1542 | 1847 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1848 /* fast RGB565->RGB565 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1849 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1850 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1851 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1852 if(alpha == 128) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1853 Blit16to16SurfaceAlpha128(info, 0xf7de); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1854 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1855 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1856 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1857 Uint16 *srcp = (Uint16 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1858 int srcskip = info->s_skip >> 1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1859 Uint16 *dstp = (Uint16 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1860 int dstskip = info->d_skip >> 1; |
1542 | 1861 Uint32 s, d; |
1862 Uint8 load[8]; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1863 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1864 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ |
1542 | 1865 *(Uint64 *)load = alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1866 alpha >>= 3; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1867 |
1542 | 1868 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
1869 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | |
1870 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | |
1871 /* position alpha to allow for mullo and mulhi on diff channels | |
1872 to reduce the number of operations */ | |
1873 psllq_i2r(3, mm0); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1874 |
1542 | 1875 /* Setup the 565 color channel masks */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
1876 *(Uint64 *)load = 0x07E007E007E007E0ULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1877 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
1878 *(Uint64 *)load = 0x001F001F001F001FULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1879 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1880 while(height--) { |
1542 | 1881 DUFFS_LOOP_QUATRO2( |
1882 { | |
1883 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1884 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1885 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1886 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1887 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1888 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1889 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1890 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1891 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1892 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1893 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1894 *dstp++ = d | d >> 16; |
1542 | 1895 },{ |
1896 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1897 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1898 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1899 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1900 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1901 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1902 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1903 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1904 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1905 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1906 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1907 *dstp++ = d | d >> 16; |
1542 | 1908 s = *srcp++; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1909 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1910 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1911 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1912 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1913 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1914 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1915 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1916 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1917 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1918 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1919 *dstp++ = d | d >> 16; |
1542 | 1920 },{ |
1921 movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ | |
1922 movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ | |
1923 | |
1924 /* red -- does not need a mask since the right shift clears | |
1925 the uninteresting bits */ | |
1926 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1927 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1928 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 [000r 000r 000r 000r] */ | |
1929 psrlw_i2r(11, mm6); /* mm6 >> 11 -> mm6 [000r 000r 000r 000r] */ | |
1930 | |
1931 /* blend */ | |
1932 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1933 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1934 /* alpha used is actually 11 bits | |
1935 11 + 5 = 16 bits, so the sign bits are lost */ | |
1936 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
1937 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1938 psllw_i2r(11, mm6); /* mm6 << 11 -> mm6 */ | |
1939 | |
1940 movq_r2r(mm6, mm1); /* save new reds in dsts */ | |
1941 | |
1942 /* green -- process the bits in place */ | |
1943 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1944 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1945 pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ | |
1946 pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ | |
1947 | |
1948 /* blend */ | |
1949 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1950 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1951 /* 11 + 11 - 16 = 6 bits, so all the lower uninteresting | |
1952 bits are gone and the sign bits present */ | |
1953 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
1954 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1955 | |
1956 por_r2r(mm6, mm1); /* save new greens in dsts */ | |
1957 | |
1958 /* blue */ | |
1959 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1960 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1961 pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ | |
1962 pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
1963 | |
1964 /* blend */ | |
1965 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1966 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1967 /* 11 + 5 = 16 bits, so the sign bits are lost and | |
1968 the interesting bits will need to be MASKed */ | |
1969 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
1970 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1971 pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
1972 | |
1973 por_r2r(mm6, mm1); /* save new blues in dsts */ | |
1974 | |
1975 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ | |
1976 | |
1977 srcp += 4; | |
1978 dstp += 4; | |
1979 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1980 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1981 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1982 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1983 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1984 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1985 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1986 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1987 /* fast RGB555->RGB555 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1988 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1989 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1990 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1991 if(alpha == 128) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1992 Blit16to16SurfaceAlpha128(info, 0xfbde); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1993 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1994 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1995 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1996 Uint16 *srcp = (Uint16 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1997 int srcskip = info->s_skip >> 1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1998 Uint16 *dstp = (Uint16 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1999 int dstskip = info->d_skip >> 1; |
1542 | 2000 Uint32 s, d; |
2001 Uint8 load[8]; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2002 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2003 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ |
1542 | 2004 *(Uint64 *)load = alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2005 alpha >>= 3; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2006 |
1542 | 2007 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
2008 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | |
2009 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | |
2010 /* position alpha to allow for mullo and mulhi on diff channels | |
2011 to reduce the number of operations */ | |
2012 psllq_i2r(3, mm0); | |
2013 | |
2014 /* Setup the 555 color channel masks */ | |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
2015 *(Uint64 *)load = 0x03E003E003E003E0ULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2016 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
2017 *(Uint64 *)load = 0x001F001F001F001FULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2018 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2019 while(height--) { |
1542 | 2020 DUFFS_LOOP_QUATRO2( |
2021 { | |
2022 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2023 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2024 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2025 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2026 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2027 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2028 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2029 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2030 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2031 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2032 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2033 *dstp++ = d | d >> 16; |
1542 | 2034 },{ |
2035 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2036 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2037 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2038 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2039 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2040 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2041 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2042 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2043 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2044 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2045 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2046 *dstp++ = d | d >> 16; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2047 s = *srcp++; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2048 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2049 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2050 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2051 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2052 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2053 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2054 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2055 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2056 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2057 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2058 *dstp++ = d | d >> 16; |
1542 | 2059 },{ |
2060 movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ | |
2061 movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ | |
2062 | |
2063 /* red -- process the bits in place */ | |
2064 psllq_i2r(5, mm4); /* turn MASKGREEN into MASKRED */ | |
2065 /* by reusing the GREEN mask we free up another mmx | |
2066 register to accumulate the result */ | |
2067 | |
2068 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2069 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2070 pand_r2r(mm4, mm5); /* src & MASKRED -> mm5 */ | |
2071 pand_r2r(mm4, mm6); /* dst & MASKRED -> mm6 */ | |
2072 | |
2073 /* blend */ | |
2074 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2075 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2076 /* 11 + 15 - 16 = 10 bits, uninteresting bits will be | |
2077 cleared by a MASK below */ | |
2078 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2079 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2080 pand_r2r(mm4, mm6); /* mm6 & MASKRED -> mm6 */ | |
2081 | |
2082 psrlq_i2r(5, mm4); /* turn MASKRED back into MASKGREEN */ | |
2083 | |
2084 movq_r2r(mm6, mm1); /* save new reds in dsts */ | |
2085 | |
2086 /* green -- process the bits in place */ | |
2087 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2088 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2089 pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ | |
2090 pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ | |
2091 | |
2092 /* blend */ | |
2093 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2094 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2095 /* 11 + 10 - 16 = 5 bits, so all the lower uninteresting | |
2096 bits are gone and the sign bits present */ | |
2097 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2098 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2099 | |
2100 por_r2r(mm6, mm1); /* save new greens in dsts */ | |
2101 | |
2102 /* blue */ | |
2103 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2104 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2105 pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ | |
2106 pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2107 | |
2108 /* blend */ | |
2109 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2110 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2111 /* 11 + 5 = 16 bits, so the sign bits are lost and | |
2112 the interesting bits will need to be MASKed */ | |
2113 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
2114 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2115 pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2116 | |
2117 por_r2r(mm6, mm1); /* save new blues in dsts */ | |
2118 | |
2119 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ | |
2120 | |
2121 srcp += 4; | |
2122 dstp += 4; | |
2123 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2124 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2125 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2126 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2127 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2128 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2129 } |
1542 | 2130 /* End GCC_ASMBLIT */ |
2131 | |
2132 #elif MSVC_ASMBLIT | |
2133 /* fast RGB565->RGB565 blending with surface alpha */ | |
2134 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) | |
2135 { | |
2136 unsigned alpha = info->src->alpha; | |
2137 if(alpha == 128) { | |
2138 Blit16to16SurfaceAlpha128(info, 0xf7de); | |
2139 } else { | |
2140 int width = info->d_width; | |
2141 int height = info->d_height; | |
2142 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
2143 int srcskip = info->s_skip >> 1; | |
2144 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2145 int dstskip = info->d_skip >> 1; | |
2146 Uint32 s, d; | |
2147 | |
2148 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; | |
2149 | |
2150 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | |
2151 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ | |
2152 alpha >>= 3; /* downscale alpha to 5 bits */ | |
2153 | |
2154 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
2155 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
2156 /* position alpha to allow for mullo and mulhi on diff channels | |
2157 to reduce the number of operations */ | |
2158 mm_alpha = _mm_slli_si64(mm_alpha, 3); | |
2159 | |
2160 /* Setup the 565 color channel masks */ | |
2161 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ | |
2162 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | |
2163 | |
2164 while(height--) { | |
2165 DUFFS_LOOP_QUATRO2( | |
2166 { | |
2167 s = *srcp++; | |
2168 d = *dstp; | |
2169 /* | |
2170 * shift out the middle component (green) to | |
2171 * the high 16 bits, and process all three RGB | |
2172 * components at the same time. | |
2173 */ | |
2174 s = (s | s << 16) & 0x07e0f81f; | |
2175 d = (d | d << 16) & 0x07e0f81f; | |
2176 d += (s - d) * alpha >> 5; | |
2177 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2178 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2179 },{ |
2180 s = *srcp++; | |
2181 d = *dstp; | |
2182 /* | |
2183 * shift out the middle component (green) to | |
2184 * the high 16 bits, and process all three RGB | |
2185 * components at the same time. | |
2186 */ | |
2187 s = (s | s << 16) & 0x07e0f81f; | |
2188 d = (d | d << 16) & 0x07e0f81f; | |
2189 d += (s - d) * alpha >> 5; | |
2190 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2191 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2192 s = *srcp++; |
2193 d = *dstp; | |
2194 /* | |
2195 * shift out the middle component (green) to | |
2196 * the high 16 bits, and process all three RGB | |
2197 * components at the same time. | |
2198 */ | |
2199 s = (s | s << 16) & 0x07e0f81f; | |
2200 d = (d | d << 16) & 0x07e0f81f; | |
2201 d += (s - d) * alpha >> 5; | |
2202 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2203 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2204 },{ |
2205 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ | |
2206 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ | |
2207 | |
2208 /* red */ | |
2209 src2 = src1; | |
2210 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */ | |
2211 | |
2212 dst2 = dst1; | |
2213 dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */ | |
2214 | |
2215 /* blend */ | |
2216 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2217 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2218 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2219 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2220 dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */ | |
2221 | |
2222 mm_res = dst2; /* RED -> mm_res */ | |
2223 | |
2224 /* green -- process the bits in place */ | |
2225 src2 = src1; | |
2226 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ | |
2227 | |
2228 dst2 = dst1; | |
2229 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ | |
2230 | |
2231 /* blend */ | |
2232 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2233 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2234 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2235 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2236 | |
2237 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ | |
2238 | |
2239 /* blue */ | |
2240 src2 = src1; | |
2241 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ | |
2242 | |
2243 dst2 = dst1; | |
2244 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ | |
2245 | |
2246 /* blend */ | |
2247 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2248 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2249 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2250 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2251 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ | |
2252 | |
2253 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ | |
2254 | |
2255 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ | |
2256 | |
2257 srcp += 4; | |
2258 dstp += 4; | |
2259 }, width); | |
2260 srcp += srcskip; | |
2261 dstp += dstskip; | |
2262 } | |
2263 _mm_empty(); | |
2264 } | |
2265 } | |
2266 | |
2267 /* fast RGB555->RGB555 blending with surface alpha */ | |
2268 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) | |
2269 { | |
2270 unsigned alpha = info->src->alpha; | |
2271 if(alpha == 128) { | |
2272 Blit16to16SurfaceAlpha128(info, 0xfbde); | |
2273 } else { | |
2274 int width = info->d_width; | |
2275 int height = info->d_height; | |
2276 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
2277 int srcskip = info->s_skip >> 1; | |
2278 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2279 int dstskip = info->d_skip >> 1; | |
2280 Uint32 s, d; | |
2281 | |
2282 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; | |
2283 | |
2284 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | |
2285 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ | |
2286 alpha >>= 3; /* downscale alpha to 5 bits */ | |
2287 | |
2288 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
2289 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
2290 /* position alpha to allow for mullo and mulhi on diff channels | |
2291 to reduce the number of operations */ | |
2292 mm_alpha = _mm_slli_si64(mm_alpha, 3); | |
2293 | |
2294 /* Setup the 555 color channel masks */ | |
2295 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ | |
2296 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ | |
2297 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | |
2298 | |
2299 while(height--) { | |
2300 DUFFS_LOOP_QUATRO2( | |
2301 { | |
2302 s = *srcp++; | |
2303 d = *dstp; | |
2304 /* | |
2305 * shift out the middle component (green) to | |
2306 * the high 16 bits, and process all three RGB | |
2307 * components at the same time. | |
2308 */ | |
2309 s = (s | s << 16) & 0x03e07c1f; | |
2310 d = (d | d << 16) & 0x03e07c1f; | |
2311 d += (s - d) * alpha >> 5; | |
2312 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2313 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2314 },{ |
2315 s = *srcp++; | |
2316 d = *dstp; | |
2317 /* | |
2318 * shift out the middle component (green) to | |
2319 * the high 16 bits, and process all three RGB | |
2320 * components at the same time. | |
2321 */ | |
2322 s = (s | s << 16) & 0x03e07c1f; | |
2323 d = (d | d << 16) & 0x03e07c1f; | |
2324 d += (s - d) * alpha >> 5; | |
2325 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2326 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2327 s = *srcp++; |
2328 d = *dstp; | |
2329 /* | |
2330 * shift out the middle component (green) to | |
2331 * the high 16 bits, and process all three RGB | |
2332 * components at the same time. | |
2333 */ | |
2334 s = (s | s << 16) & 0x03e07c1f; | |
2335 d = (d | d << 16) & 0x03e07c1f; | |
2336 d += (s - d) * alpha >> 5; | |
2337 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2338 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2339 },{ |
2340 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ | |
2341 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ | |
2342 | |
2343 /* red -- process the bits in place */ | |
2344 src2 = src1; | |
2345 src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */ | |
2346 | |
2347 dst2 = dst1; | |
2348 dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */ | |
2349 | |
2350 /* blend */ | |
2351 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2352 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2353 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2354 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2355 dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */ | |
2356 | |
2357 mm_res = dst2; /* RED -> mm_res */ | |
2358 | |
2359 /* green -- process the bits in place */ | |
2360 src2 = src1; | |
2361 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ | |
2362 | |
2363 dst2 = dst1; | |
2364 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ | |
2365 | |
2366 /* blend */ | |
2367 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2368 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2369 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2370 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2371 | |
2372 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ | |
2373 | |
2374 /* blue */ | |
2375 src2 = src1; /* src -> src2 */ | |
2376 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ | |
2377 | |
2378 dst2 = dst1; /* dst -> dst2 */ | |
2379 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ | |
2380 | |
2381 /* blend */ | |
2382 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2383 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2384 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2385 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2386 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ | |
2387 | |
2388 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ | |
2389 | |
2390 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ | |
2391 | |
2392 srcp += 4; | |
2393 dstp += 4; | |
2394 }, width); | |
2395 srcp += srcskip; | |
2396 dstp += dstskip; | |
2397 } | |
2398 _mm_empty(); | |
2399 } | |
2400 } | |
2401 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2402 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2403 /* fast RGB565->RGB565 blending with surface alpha */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2404 static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2405 { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2406 unsigned alpha = info->src->alpha; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2407 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2408 Blit16to16SurfaceAlpha128(info, 0xf7de); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2409 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2410 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2411 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2412 Uint16 *srcp = (Uint16 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2413 int srcskip = info->s_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2414 Uint16 *dstp = (Uint16 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2415 int dstskip = info->d_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2416 alpha >>= 3; /* downscale alpha to 5 bits */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2417 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2418 while(height--) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2419 DUFFS_LOOP4({ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2420 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2421 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2422 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2423 * shift out the middle component (green) to |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2424 * the high 16 bits, and process all three RGB |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2425 * components at the same time. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2426 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2427 s = (s | s << 16) & 0x07e0f81f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2428 d = (d | d << 16) & 0x07e0f81f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2429 d += (s - d) * alpha >> 5; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2430 d &= 0x07e0f81f; |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2431 *dstp++ = (Uint16)(d | d >> 16); |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2432 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2433 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2434 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2435 } |
0 | 2436 } |
2437 } | |
2438 | |
2439 /* fast RGB555->RGB555 blending with surface alpha */ | |
2440 static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) | |
2441 { | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2442 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2443 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2444 Blit16to16SurfaceAlpha128(info, 0xfbde); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2445 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2446 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2447 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2448 Uint16 *srcp = (Uint16 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2449 int srcskip = info->s_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2450 Uint16 *dstp = (Uint16 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2451 int dstskip = info->d_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2452 alpha >>= 3; /* downscale alpha to 5 bits */ |
0 | 2453 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2454 while(height--) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2455 DUFFS_LOOP4({ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2456 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2457 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2458 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2459 * shift out the middle component (green) to |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2460 * the high 16 bits, and process all three RGB |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2461 * components at the same time. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2462 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2463 s = (s | s << 16) & 0x03e07c1f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2464 d = (d | d << 16) & 0x03e07c1f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2465 d += (s - d) * alpha >> 5; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2466 d &= 0x03e07c1f; |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2467 *dstp++ = (Uint16)(d | d >> 16); |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2468 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2469 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2470 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2471 } |
0 | 2472 } |
2473 } | |
2474 | |
2475 /* fast ARGB8888->RGB565 blending with pixel alpha */ | |
2476 static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) | |
2477 { | |
2478 int width = info->d_width; | |
2479 int height = info->d_height; | |
2480 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
2481 int srcskip = info->s_skip >> 2; | |
2482 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2483 int dstskip = info->d_skip >> 1; | |
2484 | |
2485 while(height--) { | |
2486 DUFFS_LOOP4({ | |
2487 Uint32 s = *srcp; | |
2488 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ | |
2489 /* FIXME: Here we special-case opaque alpha since the | |
2490 compositioning used (>>8 instead of /255) doesn't handle | |
2491 it correctly. Also special-case alpha=0 for speed? | |
2492 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2493 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2494 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2495 *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f)); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2496 } else { |
0 | 2497 Uint32 d = *dstp; |
2498 /* | |
2499 * convert source and destination to G0RAB65565 | |
2500 * and blend all components at the same time | |
2501 */ | |
2502 s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800) | |
2503 + (s >> 3 & 0x1f); | |
2504 d = (d | d << 16) & 0x07e0f81f; | |
2505 d += (s - d) * alpha >> 5; | |
2506 d &= 0x07e0f81f; | |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2507 *dstp = (Uint16)(d | d >> 16); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2508 } |
0 | 2509 } |
2510 srcp++; | |
2511 dstp++; | |
2512 }, width); | |
2513 srcp += srcskip; | |
2514 dstp += dstskip; | |
2515 } | |
2516 } | |
2517 | |
2518 /* fast ARGB8888->RGB555 blending with pixel alpha */ | |
2519 static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) | |
2520 { | |
2521 int width = info->d_width; | |
2522 int height = info->d_height; | |
2523 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
2524 int srcskip = info->s_skip >> 2; | |
2525 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2526 int dstskip = info->d_skip >> 1; | |
2527 | |
2528 while(height--) { | |
2529 DUFFS_LOOP4({ | |
2530 unsigned alpha; | |
2531 Uint32 s = *srcp; | |
2532 alpha = s >> 27; /* downscale alpha to 5 bits */ | |
2533 /* FIXME: Here we special-case opaque alpha since the | |
2534 compositioning used (>>8 instead of /255) doesn't handle | |
2535 it correctly. Also special-case alpha=0 for speed? | |
2536 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2537 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2538 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2539 *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f)); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2540 } else { |
0 | 2541 Uint32 d = *dstp; |
2542 /* | |
2543 * convert source and destination to G0RAB65565 | |
2544 * and blend all components at the same time | |
2545 */ | |
2546 s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00) | |
2547 + (s >> 3 & 0x1f); | |
2548 d = (d | d << 16) & 0x03e07c1f; | |
2549 d += (s - d) * alpha >> 5; | |
2550 d &= 0x03e07c1f; | |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2551 *dstp = (Uint16)(d | d >> 16); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2552 } |
0 | 2553 } |
2554 srcp++; | |
2555 dstp++; | |
2556 }, width); | |
2557 srcp += srcskip; | |
2558 dstp += dstskip; | |
2559 } | |
2560 } | |
2561 | |
2562 /* General (slow) N->N blending with per-surface alpha */ | |
2563 static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) | |
2564 { | |
2565 int width = info->d_width; | |
2566 int height = info->d_height; | |
2567 Uint8 *src = info->s_pixels; | |
2568 int srcskip = info->s_skip; | |
2569 Uint8 *dst = info->d_pixels; | |
2570 int dstskip = info->d_skip; | |
2571 SDL_PixelFormat *srcfmt = info->src; | |
2572 SDL_PixelFormat *dstfmt = info->dst; | |
2573 int srcbpp = srcfmt->BytesPerPixel; | |
2574 int dstbpp = dstfmt->BytesPerPixel; | |
2575 unsigned sA = srcfmt->alpha; | |
2576 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | |
2577 | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2578 if(sA) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2579 while ( height-- ) { |
0 | 2580 DUFFS_LOOP4( |
2581 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2582 Uint32 Pixel; |
0 | 2583 unsigned sR; |
2584 unsigned sG; | |
2585 unsigned sB; | |
2586 unsigned dR; | |
2587 unsigned dG; | |
2588 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2589 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2590 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); |
0 | 2591 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2592 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | |
2593 src += srcbpp; | |
2594 dst += dstbpp; | |
2595 }, | |
2596 width); | |
2597 src += srcskip; | |
2598 dst += dstskip; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2599 } |
0 | 2600 } |
2601 } | |
2602 | |
2603 /* General (slow) colorkeyed N->N blending with per-surface alpha */ | |
2604 static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) | |
2605 { | |
2606 int width = info->d_width; | |
2607 int height = info->d_height; | |
2608 Uint8 *src = info->s_pixels; | |
2609 int srcskip = info->s_skip; | |
2610 Uint8 *dst = info->d_pixels; | |
2611 int dstskip = info->d_skip; | |
2612 SDL_PixelFormat *srcfmt = info->src; | |
2613 SDL_PixelFormat *dstfmt = info->dst; | |
2614 Uint32 ckey = srcfmt->colorkey; | |
2615 int srcbpp = srcfmt->BytesPerPixel; | |
2616 int dstbpp = dstfmt->BytesPerPixel; | |
2617 unsigned sA = srcfmt->alpha; | |
2618 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | |
2619 | |
2620 while ( height-- ) { | |
2621 DUFFS_LOOP4( | |
2622 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2623 Uint32 Pixel; |
0 | 2624 unsigned sR; |
2625 unsigned sG; | |
2626 unsigned sB; | |
2627 unsigned dR; | |
2628 unsigned dG; | |
2629 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2630 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2631 if(sA && Pixel != ckey) { |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2632 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2633 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); |
0 | 2634 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2635 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | |
2636 } | |
2637 src += srcbpp; | |
2638 dst += dstbpp; | |
2639 }, | |
2640 width); | |
2641 src += srcskip; | |
2642 dst += dstskip; | |
2643 } | |
2644 } | |
2645 | |
2646 /* General (slow) N->N blending with pixel alpha */ | |
2647 static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) | |
2648 { | |
2649 int width = info->d_width; | |
2650 int height = info->d_height; | |
2651 Uint8 *src = info->s_pixels; | |
2652 int srcskip = info->s_skip; | |
2653 Uint8 *dst = info->d_pixels; | |
2654 int dstskip = info->d_skip; | |
2655 SDL_PixelFormat *srcfmt = info->src; | |
2656 SDL_PixelFormat *dstfmt = info->dst; | |
2657 | |
2658 int srcbpp; | |
2659 int dstbpp; | |
2660 | |
2661 /* Set up some basic variables */ | |
2662 srcbpp = srcfmt->BytesPerPixel; | |
2663 dstbpp = dstfmt->BytesPerPixel; | |
2664 | |
2665 /* FIXME: for 8bpp source alpha, this doesn't get opaque values | |
2666 quite right. for <8bpp source alpha, it gets them very wrong | |
2667 (check all macros!) | |
2668 It is unclear whether there is a good general solution that doesn't | |
2669 need a branch (or a divide). */ | |
2670 while ( height-- ) { | |
2671 DUFFS_LOOP4( | |
2672 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2673 Uint32 Pixel; |
0 | 2674 unsigned sR; |
2675 unsigned sG; | |
2676 unsigned sB; | |
2677 unsigned dR; | |
2678 unsigned dG; | |
2679 unsigned dB; | |
2680 unsigned sA; | |
2681 unsigned dA; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2682 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2683 if(sA) { |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2684 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2685 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2686 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2687 } |
0 | 2688 src += srcbpp; |
2689 dst += dstbpp; | |
2690 }, | |
2691 width); | |
2692 src += srcskip; | |
2693 dst += dstskip; | |
2694 } | |
2695 } | |
2696 | |
2697 | |
2698 SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) | |
2699 { | |
2700 SDL_PixelFormat *sf = surface->format; | |
2701 SDL_PixelFormat *df = surface->map->dst->format; | |
2702 | |
2703 if(sf->Amask == 0) { | |
2704 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { | |
2705 if(df->BytesPerPixel == 1) | |
2706 return BlitNto1SurfaceAlphaKey; | |
2707 else | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2708 #if SDL_ALTIVEC_BLITTERS |
1240 | 2709 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && |
2710 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2711 return Blit32to32SurfaceAlphaKeyAltivec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2712 else |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2713 #endif |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2714 return BlitNtoNSurfaceAlphaKey; |
0 | 2715 } else { |
2716 /* Per-surface alpha blits */ | |
2717 switch(df->BytesPerPixel) { | |
2718 case 1: | |
2719 return BlitNto1SurfaceAlpha; | |
2720 | |
2721 case 2: | |
2722 if(surface->map->identity) { | |
2723 if(df->Gmask == 0x7e0) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2724 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2725 #if MMX_ASMBLIT |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
2726 if(SDL_HasMMX()) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2727 return Blit565to565SurfaceAlphaMMX; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2728 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2729 #endif |
0 | 2730 return Blit565to565SurfaceAlpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2731 } |
0 | 2732 else if(df->Gmask == 0x3e0) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2733 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2734 #if MMX_ASMBLIT |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
2735 if(SDL_HasMMX()) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2736 return Blit555to555SurfaceAlphaMMX; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2737 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2738 #endif |
0 | 2739 return Blit555to555SurfaceAlpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2740 } |
0 | 2741 } |
2742 return BlitNtoNSurfaceAlpha; | |
2743 | |
2744 case 4: | |
2745 if(sf->Rmask == df->Rmask | |
2746 && sf->Gmask == df->Gmask | |
2747 && sf->Bmask == df->Bmask | |
2748 && sf->BytesPerPixel == 4) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2749 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2750 #if MMX_ASMBLIT |
1542 | 2751 if(sf->Rshift % 8 == 0 |
2752 && sf->Gshift % 8 == 0 | |
2753 && sf->Bshift % 8 == 0 | |
2754 && SDL_HasMMX()) | |
2755 return BlitRGBtoRGBSurfaceAlphaMMX; | |
2756 #endif | |
2757 if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) | |
2758 { | |
1617
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2759 #if SDL_ALTIVEC_BLITTERS |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2760 if(!(surface->map->dst->flags & SDL_HWSURFACE) |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2761 && SDL_HasAltiVec()) |
1542 | 2762 return BlitRGBtoRGBSurfaceAlphaAltivec; |
2763 #endif | |
2764 return BlitRGBtoRGBSurfaceAlpha; | |
2765 } | |
2766 } | |
2767 #if SDL_ALTIVEC_BLITTERS | |
2768 if((sf->BytesPerPixel == 4) && | |
2769 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
2770 return Blit32to32SurfaceAlphaAltivec; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2771 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2772 #endif |
1542 | 2773 return BlitNtoNSurfaceAlpha; |
0 | 2774 |
2775 case 3: | |
2776 default: | |
2777 return BlitNtoNSurfaceAlpha; | |
2778 } | |
2779 } | |
2780 } else { | |
2781 /* Per-pixel alpha blits */ | |
2782 switch(df->BytesPerPixel) { | |
2783 case 1: | |
2784 return BlitNto1PixelAlpha; | |
2785 | |
2786 case 2: | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2787 #if SDL_ALTIVEC_BLITTERS |
1240 | 2788 if(sf->BytesPerPixel == 4 && !(surface->map->dst->flags & SDL_HWSURFACE) && |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2789 df->Gmask == 0x7e0 && |
1240 | 2790 df->Bmask == 0x1f && SDL_HasAltiVec()) |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2791 return Blit32to565PixelAlphaAltivec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2792 else |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2793 #endif |
0 | 2794 if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 |
2795 && sf->Gmask == 0xff00 | |
2796 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) | |
2797 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { | |
2798 if(df->Gmask == 0x7e0) | |
2799 return BlitARGBto565PixelAlpha; | |
2800 else if(df->Gmask == 0x3e0) | |
2801 return BlitARGBto555PixelAlpha; | |
2802 } | |
2803 return BlitNtoNPixelAlpha; | |
2804 | |
2805 case 4: | |
1542 | 2806 if(sf->Rmask == df->Rmask |
0 | 2807 && sf->Gmask == df->Gmask |
2808 && sf->Bmask == df->Bmask | |
2809 && sf->BytesPerPixel == 4) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2810 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2811 #if MMX_ASMBLIT |
1542 | 2812 if(sf->Rshift % 8 == 0 |
2813 && sf->Gshift % 8 == 0 | |
2814 && sf->Bshift % 8 == 0 | |
2815 && sf->Ashift % 8 == 0 | |
2816 && sf->Aloss == 0) | |
2817 { | |
2818 if(SDL_Has3DNow()) | |
2819 return BlitRGBtoRGBPixelAlphaMMX3DNOW; | |
2820 if(SDL_HasMMX()) | |
2821 return BlitRGBtoRGBPixelAlphaMMX; | |
2822 } | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2823 #endif |
1542 | 2824 if(sf->Amask == 0xff000000) |
2825 { | |
1617
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2826 #if SDL_ALTIVEC_BLITTERS |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2827 if(!(surface->map->dst->flags & SDL_HWSURFACE) |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2828 && SDL_HasAltiVec()) |
1542 | 2829 return BlitRGBtoRGBPixelAlphaAltivec; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2830 #endif |
1542 | 2831 return BlitRGBtoRGBPixelAlpha; |
2832 } | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2833 } |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2834 #if SDL_ALTIVEC_BLITTERS |
1542 | 2835 if (sf->Amask && sf->BytesPerPixel == 4 && |
2836 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
2837 return Blit32to32PixelAlphaAltivec; | |
2838 else | |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2839 #endif |
1542 | 2840 return BlitNtoNPixelAlpha; |
0 | 2841 |
2842 case 3: | |
2843 default: | |
2844 return BlitNtoNPixelAlpha; | |
2845 } | |
2846 } | |
2847 } | |
2848 |