Mercurial > sdl-ios-xcode
annotate src/video/SDL_blit_A.c @ 3922:4e02435ad2be SDL-1.2
Removed some debug printf() calls I accidentally added to Subversion.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 23 Feb 2007 00:37:07 +0000 |
parents | af4d584e0edb |
children | d65b4a73c991 |
rev | line source |
---|---|
0 | 1 /* |
2 SDL - Simple DirectMedia Layer | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
3 Copyright (C) 1997-2006 Sam Lantinga |
0 | 4 |
5 This library is free software; you can redistribute it and/or | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
6 modify it under the terms of the GNU Lesser General Public |
0 | 7 License as published by the Free Software Foundation; either |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
8 version 2.1 of the License, or (at your option) any later version. |
0 | 9 |
10 This library is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
13 Lesser General Public License for more details. |
0 | 14 |
1312
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
15 You should have received a copy of the GNU Lesser General Public |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
16 License along with this library; if not, write to the Free Software |
c9b51268668f
Updated copyright information and removed rcs id lines (problematic in branch merges)
Sam Lantinga <slouken@libsdl.org>
parents:
1240
diff
changeset
|
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 18 |
19 Sam Lantinga | |
252
e8157fcb3114
Updated the source with the correct e-mail address
Sam Lantinga <slouken@libsdl.org>
parents:
1
diff
changeset
|
20 slouken@libsdl.org |
0 | 21 */ |
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
22 #include "SDL_config.h" |
0 | 23 |
24 #include "SDL_video.h" | |
25 #include "SDL_blit.h" | |
26 | |
1542 | 27 #if SDL_ASSEMBLY_ROUTINES |
28 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
29 #define MMX_ASMBLIT 1 |
1542 | 30 #define GCC_ASMBLIT 1 |
31 #elif defined(_MSC_VER) && (_MSC_VER >= 1200) && defined(_M_IX86) | |
32 #define MMX_ASMBLIT 1 | |
33 #define MSVC_ASMBLIT 1 | |
880
9ef41050100c
Date: Tue, 30 Mar 2004 21:26:47 -0600
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
34 #endif |
1542 | 35 #endif /* SDL_ASSEMBLY_ROUTINES */ |
880
9ef41050100c
Date: Tue, 30 Mar 2004 21:26:47 -0600
Sam Lantinga <slouken@libsdl.org>
parents:
769
diff
changeset
|
36 |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
37 /* Function to check the CPU flags */ |
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
38 #include "SDL_cpuinfo.h" |
1542 | 39 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
40 #include "mmx.h" |
1542 | 41 #elif MSVC_ASMBLIT |
42 #include <mmintrin.h> | |
43 #include <mm3dnow.h> | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
44 #endif |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
45 |
0 | 46 /* Functions to perform alpha blended blitting */ |
47 | |
48 /* N->1 blending with per-surface alpha */ | |
49 static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) | |
50 { | |
51 int width = info->d_width; | |
52 int height = info->d_height; | |
53 Uint8 *src = info->s_pixels; | |
54 int srcskip = info->s_skip; | |
55 Uint8 *dst = info->d_pixels; | |
56 int dstskip = info->d_skip; | |
57 Uint8 *palmap = info->table; | |
58 SDL_PixelFormat *srcfmt = info->src; | |
59 SDL_PixelFormat *dstfmt = info->dst; | |
60 int srcbpp = srcfmt->BytesPerPixel; | |
61 | |
62 const unsigned A = srcfmt->alpha; | |
63 | |
64 while ( height-- ) { | |
65 DUFFS_LOOP4( | |
66 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
67 Uint32 Pixel; |
0 | 68 unsigned sR; |
69 unsigned sG; | |
70 unsigned sB; | |
71 unsigned dR; | |
72 unsigned dG; | |
73 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
74 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
0 | 75 dR = dstfmt->palette->colors[*dst].r; |
76 dG = dstfmt->palette->colors[*dst].g; | |
77 dB = dstfmt->palette->colors[*dst].b; | |
78 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); | |
79 dR &= 0xff; | |
80 dG &= 0xff; | |
81 dB &= 0xff; | |
82 /* Pack RGB into 8bit pixel */ | |
83 if ( palmap == NULL ) { | |
84 *dst =((dR>>5)<<(3+2))| | |
85 ((dG>>5)<<(2))| | |
86 ((dB>>6)<<(0)); | |
87 } else { | |
88 *dst = palmap[((dR>>5)<<(3+2))| | |
89 ((dG>>5)<<(2)) | | |
90 ((dB>>6)<<(0))]; | |
91 } | |
92 dst++; | |
93 src += srcbpp; | |
94 }, | |
95 width); | |
96 src += srcskip; | |
97 dst += dstskip; | |
98 } | |
99 } | |
100 | |
101 /* N->1 blending with pixel alpha */ | |
102 static void BlitNto1PixelAlpha(SDL_BlitInfo *info) | |
103 { | |
104 int width = info->d_width; | |
105 int height = info->d_height; | |
106 Uint8 *src = info->s_pixels; | |
107 int srcskip = info->s_skip; | |
108 Uint8 *dst = info->d_pixels; | |
109 int dstskip = info->d_skip; | |
110 Uint8 *palmap = info->table; | |
111 SDL_PixelFormat *srcfmt = info->src; | |
112 SDL_PixelFormat *dstfmt = info->dst; | |
113 int srcbpp = srcfmt->BytesPerPixel; | |
114 | |
115 /* FIXME: fix alpha bit field expansion here too? */ | |
116 while ( height-- ) { | |
117 DUFFS_LOOP4( | |
118 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
119 Uint32 Pixel; |
0 | 120 unsigned sR; |
121 unsigned sG; | |
122 unsigned sB; | |
123 unsigned sA; | |
124 unsigned dR; | |
125 unsigned dG; | |
126 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
127 DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); |
0 | 128 dR = dstfmt->palette->colors[*dst].r; |
129 dG = dstfmt->palette->colors[*dst].g; | |
130 dB = dstfmt->palette->colors[*dst].b; | |
131 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); | |
132 dR &= 0xff; | |
133 dG &= 0xff; | |
134 dB &= 0xff; | |
135 /* Pack RGB into 8bit pixel */ | |
136 if ( palmap == NULL ) { | |
137 *dst =((dR>>5)<<(3+2))| | |
138 ((dG>>5)<<(2))| | |
139 ((dB>>6)<<(0)); | |
140 } else { | |
141 *dst = palmap[((dR>>5)<<(3+2))| | |
142 ((dG>>5)<<(2)) | | |
143 ((dB>>6)<<(0)) ]; | |
144 } | |
145 dst++; | |
146 src += srcbpp; | |
147 }, | |
148 width); | |
149 src += srcskip; | |
150 dst += dstskip; | |
151 } | |
152 } | |
153 | |
154 /* colorkeyed N->1 blending with per-surface alpha */ | |
155 static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) | |
156 { | |
157 int width = info->d_width; | |
158 int height = info->d_height; | |
159 Uint8 *src = info->s_pixels; | |
160 int srcskip = info->s_skip; | |
161 Uint8 *dst = info->d_pixels; | |
162 int dstskip = info->d_skip; | |
163 Uint8 *palmap = info->table; | |
164 SDL_PixelFormat *srcfmt = info->src; | |
165 SDL_PixelFormat *dstfmt = info->dst; | |
166 int srcbpp = srcfmt->BytesPerPixel; | |
167 Uint32 ckey = srcfmt->colorkey; | |
168 | |
169 const int A = srcfmt->alpha; | |
170 | |
171 while ( height-- ) { | |
172 DUFFS_LOOP( | |
173 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
174 Uint32 Pixel; |
0 | 175 unsigned sR; |
176 unsigned sG; | |
177 unsigned sB; | |
178 unsigned dR; | |
179 unsigned dG; | |
180 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
181 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
182 if ( Pixel != ckey ) { |
0 | 183 dR = dstfmt->palette->colors[*dst].r; |
184 dG = dstfmt->palette->colors[*dst].g; | |
185 dB = dstfmt->palette->colors[*dst].b; | |
186 ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); | |
187 dR &= 0xff; | |
188 dG &= 0xff; | |
189 dB &= 0xff; | |
190 /* Pack RGB into 8bit pixel */ | |
191 if ( palmap == NULL ) { | |
192 *dst =((dR>>5)<<(3+2))| | |
193 ((dG>>5)<<(2)) | | |
194 ((dB>>6)<<(0)); | |
195 } else { | |
196 *dst = palmap[((dR>>5)<<(3+2))| | |
197 ((dG>>5)<<(2)) | | |
198 ((dB>>6)<<(0)) ]; | |
199 } | |
200 } | |
201 dst++; | |
202 src += srcbpp; | |
203 }, | |
204 width); | |
205 src += srcskip; | |
206 dst += dstskip; | |
207 } | |
208 } | |
209 | |
1542 | 210 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
211 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
212 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
213 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
214 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
215 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
216 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
217 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
218 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
219 int dstskip = info->d_skip >> 2; |
1542 | 220 Uint32 dalpha = info->dst->Amask; |
221 Uint8 load[8]; | |
222 | |
223 *(Uint64 *)load = 0x00fefefe00fefefeULL;/* alpha128 mask */ | |
224 movq_m2r(*load, mm4); /* alpha128 mask -> mm4 */ | |
225 *(Uint64 *)load = 0x0001010100010101ULL;/* !alpha128 mask */ | |
226 movq_m2r(*load, mm3); /* !alpha128 mask -> mm3 */ | |
227 movd_m2r(dalpha, mm7); /* dst alpha mask */ | |
228 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
229 while(height--) { |
1542 | 230 DUFFS_LOOP_DOUBLE2( |
231 { | |
232 Uint32 s = *srcp++; | |
233 Uint32 d = *dstp; | |
234 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | |
235 + (s & d & 0x00010101)) | dalpha; | |
236 },{ | |
237 movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ | |
238 movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ | |
239 | |
240 movq_m2r((*srcp), mm1);/* 2 x src -> mm1(ARGBARGB) */ | |
241 movq_r2r(mm1, mm5); /* 2 x src -> mm5(ARGBARGB) */ | |
242 | |
243 pand_r2r(mm4, mm6); /* dst & mask -> mm6 */ | |
244 pand_r2r(mm4, mm5); /* src & mask -> mm5 */ | |
245 paddd_r2r(mm6, mm5); /* mm6 + mm5 -> mm5 */ | |
246 pand_r2r(mm1, mm2); /* src & dst -> mm2 */ | |
247 psrld_i2r(1, mm5); /* mm5 >> 1 -> mm5 */ | |
248 pand_r2r(mm3, mm2); /* mm2 & !mask -> mm2 */ | |
249 paddd_r2r(mm5, mm2); /* mm5 + mm2 -> mm2 */ | |
250 | |
251 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | |
252 movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ | |
253 dstp += 2; | |
254 srcp += 2; | |
255 }, width); | |
256 srcp += srcskip; | |
257 dstp += dstskip; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
258 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
259 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
260 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
261 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
262 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
263 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
264 { |
1542 | 265 SDL_PixelFormat* df = info->dst; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
266 unsigned alpha = info->src->alpha; |
1542 | 267 |
268 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | |
269 /* only call a128 version when R,G,B occupy lower bits */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
270 BlitRGBtoRGBSurfaceAlpha128MMX(info); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
271 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
272 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
273 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
274 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
275 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
276 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
277 int dstskip = info->d_skip >> 2; |
1542 | 278 |
279 pxor_r2r(mm5, mm5); /* 0 -> mm5 */ | |
280 /* form the alpha mult */ | |
281 movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ | |
282 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | |
283 punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | |
284 alpha = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); | |
285 movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ | |
286 punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ | |
287 pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ | |
288 /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ | |
289 movd_m2r(df->Amask, mm7); /* dst alpha mask */ | |
290 punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
291 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
292 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
293 DUFFS_LOOP_DOUBLE2({ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
294 /* One Pixel Blend */ |
1542 | 295 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ |
296 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
297 punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ | |
298 punpcklbw_r2r(mm5, mm2); /* 0A0R0G0B -> mm2(dst) */ | |
299 | |
300 psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ | |
301 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
302 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | |
303 paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | |
304 | |
305 packuswb_r2r(mm5, mm2); /* ARGBARGB -> mm2 */ | |
306 por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ | |
307 movd_r2m(mm2, *dstp);/* mm2 -> pixel */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
308 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
309 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
310 },{ |
1542 | 311 /* Two Pixels Blend */ |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
312 movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ |
1542 | 313 movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ |
314 movq_r2r(mm0, mm1); /* 2 x src -> mm1(ARGBARGB) */ | |
315 movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ | |
316 | |
317 punpcklbw_r2r(mm5, mm0); /* low - 0A0R0G0B -> mm0(src1) */ | |
318 punpckhbw_r2r(mm5, mm1); /* high - 0A0R0G0B -> mm1(src2) */ | |
319 punpcklbw_r2r(mm5, mm2); /* low - 0A0R0G0B -> mm2(dst1) */ | |
320 punpckhbw_r2r(mm5, mm6); /* high - 0A0R0G0B -> mm6(dst2) */ | |
321 | |
322 psubw_r2r(mm2, mm0);/* src1 - dst1 -> mm0 */ | |
323 pmullw_r2r(mm4, mm0); /* mm0 * alpha -> mm0 */ | |
324 psrlw_i2r(8, mm0); /* mm0 >> 8 -> mm1 */ | |
325 paddb_r2r(mm0, mm2); /* mm0 + mm2(dst1) -> mm2 */ | |
326 | |
327 psubw_r2r(mm6, mm1);/* src2 - dst2 -> mm1 */ | |
328 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
329 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ | |
330 paddb_r2r(mm1, mm6); /* mm1 + mm6(dst2) -> mm6 */ | |
331 | |
332 packuswb_r2r(mm6, mm2); /* ARGBARGB -> mm2 */ | |
333 por_r2r(mm7, mm2); /* mm7(dst alpha) | mm2 -> mm2 */ | |
334 | |
335 movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ | |
336 | |
337 srcp += 2; | |
338 dstp += 2; | |
339 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
340 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
341 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
342 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
343 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
344 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
345 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
346 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
347 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
348 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
349 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
350 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
351 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
352 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
353 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
354 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
355 int dstskip = info->d_skip >> 2; |
1542 | 356 SDL_PixelFormat* sf = info->src; |
357 Uint32 amask = sf->Amask; | |
358 | |
359 pxor_r2r(mm6, mm6); /* 0 -> mm6 */ | |
360 /* form multiplication mask */ | |
361 movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ | |
362 punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ | |
363 pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ | |
364 movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ | |
365 pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ | |
366 /* form channel masks */ | |
367 movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ | |
368 packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ | |
369 packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ | |
370 pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ | |
371 /* get alpha channel shift */ | |
372 movd_m2r(sf->Ashift, mm5); /* Ashift -> mm5 */ | |
373 | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
374 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
375 DUFFS_LOOP4({ |
1542 | 376 Uint32 alpha = *srcp & amask; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
377 /* FIXME: Here we special-case opaque alpha since the |
1542 | 378 compositioning used (>>8 instead of /255) doesn't handle |
379 it correctly. Also special-case alpha=0 for speed? | |
380 Benchmark this! */ | |
381 if(alpha == 0) { | |
382 /* do nothing */ | |
383 } else if(alpha == amask) { | |
384 /* opaque alpha -- copy RGB, keep dst alpha */ | |
385 /* using MMX here to free up regular registers for other things */ | |
386 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
387 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
388 pand_r2r(mm0, mm1); /* src & chanmask -> mm1 */ | |
389 pand_r2r(mm3, mm2); /* dst & ~chanmask -> mm2 */ | |
390 por_r2r(mm1, mm2); /* src | dst -> mm2 */ | |
391 movd_r2m(mm2, (*dstp)); /* mm2 -> dst */ | |
392 } else { | |
393 movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ | |
394 punpcklbw_r2r(mm6, mm1); /* 0A0R0G0B -> mm1 */ | |
395 | |
396 movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ | |
397 punpcklbw_r2r(mm6, mm2); /* 0A0R0G0B -> mm2 */ | |
398 | |
399 __asm__ __volatile__ ( | |
400 "movd %0, %%mm4" | |
401 : : "r" (alpha) ); /* 0000A000 -> mm4 */ | |
402 psrld_r2r(mm5, mm4); /* mm4 >> mm5 -> mm4 (0000000A) */ | |
403 punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ | |
404 punpcklwd_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ | |
405 pand_r2r(mm7, mm4); /* 000A0A0A -> mm4, preserve dst alpha on add */ | |
406 | |
407 /* blend */ | |
408 psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ | |
409 pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ | |
410 psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1(000R0G0B) */ | |
411 paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ | |
412 | |
413 packuswb_r2r(mm6, mm2); /* 0000ARGB -> mm2 */ | |
414 movd_r2m(mm2, *dstp);/* mm2 -> dst */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
415 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
416 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
417 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
418 }, width); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
419 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
420 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
421 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
422 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
423 } |
1542 | 424 /* End GCC_ASMBLIT */ |
425 | |
426 #elif MSVC_ASMBLIT | |
427 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ | |
428 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) | |
429 { | |
430 int width = info->d_width; | |
431 int height = info->d_height; | |
432 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
433 int srcskip = info->s_skip >> 2; | |
434 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
435 int dstskip = info->d_skip >> 2; | |
436 Uint32 dalpha = info->dst->Amask; | |
437 | |
438 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; | |
439 | |
440 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ | |
441 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ | |
442 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ | |
443 | |
444 while (height--) { | |
445 int n = width; | |
446 if ( n & 1 ) { | |
447 Uint32 s = *srcp++; | |
448 Uint32 d = *dstp; | |
449 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) | |
450 + (s & d & 0x00010101)) | dalpha; | |
451 n--; | |
452 } | |
453 | |
454 for (n >>= 1; n > 0; --n) { | |
455 dst1 = *(__m64*)dstp; /* 2 x dst -> dst1(ARGBARGB) */ | |
456 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | |
457 | |
458 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB) */ | |
459 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | |
460 | |
461 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ | |
462 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ | |
463 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ | |
464 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ | |
465 | |
466 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ | |
467 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ | |
468 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ | |
469 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ | |
470 | |
471 *(__m64*)dstp = dst1; /* dst1 -> 2 x dst pixels */ | |
472 dstp += 2; | |
473 srcp += 2; | |
474 } | |
475 | |
476 srcp += srcskip; | |
477 dstp += dstskip; | |
478 } | |
479 _mm_empty(); | |
480 } | |
481 | |
482 /* fast RGB888->(A)RGB888 blending with surface alpha */ | |
483 static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) | |
484 { | |
485 SDL_PixelFormat* df = info->dst; | |
486 Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; | |
487 unsigned alpha = info->src->alpha; | |
488 | |
489 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { | |
490 /* only call a128 version when R,G,B occupy lower bits */ | |
491 BlitRGBtoRGBSurfaceAlpha128MMX(info); | |
492 } else { | |
493 int width = info->d_width; | |
494 int height = info->d_height; | |
495 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
496 int srcskip = info->s_skip >> 2; | |
497 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
498 int dstskip = info->d_skip >> 2; | |
499 Uint32 dalpha = df->Amask; | |
500 Uint32 amult; | |
501 | |
502 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; | |
503 | |
504 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
505 /* form the alpha mult */ | |
506 amult = alpha | (alpha << 8); | |
507 amult = amult | (amult << 16); | |
508 chanmask = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); | |
509 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ | |
510 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ | |
511 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ | |
512 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ | |
513 | |
514 while (height--) { | |
515 int n = width; | |
516 if (n & 1) { | |
517 /* One Pixel Blend */ | |
518 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB)*/ | |
519 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ | |
520 | |
521 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
522 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
523 | |
524 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ | |
525 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
526 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ | |
527 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ | |
528 | |
529 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
530 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ | |
531 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
532 | |
533 ++srcp; | |
534 ++dstp; | |
535 | |
536 n--; | |
537 } | |
538 | |
539 for (n >>= 1; n > 0; --n) { | |
540 /* Two Pixels Blend */ | |
541 src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB)*/ | |
542 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ | |
543 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ | |
544 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ | |
545 | |
546 dst1 = *(__m64*)dstp;/* 2 x dst -> dst1(ARGBARGB) */ | |
547 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ | |
548 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ | |
549 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ | |
550 | |
551 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ | |
552 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ | |
553 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ | |
554 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ | |
555 | |
556 src2 = _mm_sub_pi16(src2, dst2);/* src2 - dst2 -> src2 */ | |
557 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
558 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ | |
559 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ | |
560 | |
561 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ | |
562 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ | |
563 | |
564 *(__m64*)dstp = dst1; /* dst1 -> 2 x pixel */ | |
565 | |
566 srcp += 2; | |
567 dstp += 2; | |
568 } | |
569 srcp += srcskip; | |
570 dstp += dstskip; | |
571 } | |
572 _mm_empty(); | |
573 } | |
574 } | |
575 | |
576 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ | |
577 static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) | |
578 { | |
579 int width = info->d_width; | |
580 int height = info->d_height; | |
581 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
582 int srcskip = info->s_skip >> 2; | |
583 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
584 int dstskip = info->d_skip >> 2; | |
585 SDL_PixelFormat* sf = info->src; | |
586 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | |
587 Uint32 amask = sf->Amask; | |
588 Uint32 ashift = sf->Ashift; | |
589 Uint64 multmask; | |
590 | |
591 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | |
592 | |
593 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
594 multmask = ~(0xFFFFi64 << (ashift * 2)); | |
595 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ | |
596 | |
597 while(height--) { | |
598 DUFFS_LOOP4({ | |
599 Uint32 alpha = *srcp & amask; | |
600 if (alpha == 0) { | |
601 /* do nothing */ | |
602 } else if (alpha == amask) { | |
603 /* opaque alpha -- copy RGB, keep dst alpha */ | |
604 *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); | |
605 } else { | |
606 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ | |
607 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ | |
608 | |
609 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
610 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
611 | |
612 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ | |
613 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ | |
614 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
615 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
616 mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ | |
617 | |
618 /* blend */ | |
619 src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ | |
620 src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src1 - dst1) * alpha -> src1 */ | |
621 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ | |
622 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1 -> dst1(0A0R0G0B) */ | |
623 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
624 | |
625 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
626 } | |
627 ++srcp; | |
628 ++dstp; | |
629 }, width); | |
630 srcp += srcskip; | |
631 dstp += dstskip; | |
632 } | |
633 _mm_empty(); | |
634 } | |
635 /* End MSVC_ASMBLIT */ | |
636 | |
637 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
638 |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
639 #if SDL_ALTIVEC_BLITTERS |
1795 | 640 #if __MWERKS__ |
641 #pragma altivec_model on | |
642 #endif | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
643 #if HAVE_ALTIVEC_H |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
644 #include <altivec.h> |
1175
867f521591e5
Fixed Altivec support on Mac OS X.
Ryan C. Gordon <icculus@icculus.org>
parents:
1162
diff
changeset
|
645 #endif |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
646 #include <assert.h> |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
647 |
1402
d910939febfa
Use consistent identifiers for the various platforms we support.
Sam Lantinga <slouken@libsdl.org>
parents:
1361
diff
changeset
|
648 #if (defined(__MACOSX__) && (__GNUC__ < 4)) |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
649 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
650 (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
651 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
652 (vector unsigned short) ( a,b,c,d,e,f,g,h ) |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
653 #else |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
654 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
655 (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
656 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
657 (vector unsigned short) { a,b,c,d,e,f,g,h } |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
658 #endif |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
659 |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
660 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
661 #define VECPRINT(msg, v) do { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
662 vector unsigned int tmpvec = (vector unsigned int)(v); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
663 unsigned int *vp = (unsigned int *)&tmpvec; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
664 printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
665 } while (0) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
666 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
667 /* the permuation vector that takes the high bytes out of all the appropriate shorts |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
668 (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
669 0x00, 0x10, 0x02, 0x12, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
670 0x04, 0x14, 0x06, 0x16, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
671 0x08, 0x18, 0x0A, 0x1A, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
672 0x0C, 0x1C, 0x0E, 0x1E ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
673 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
674 #define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
675 #define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
676 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
677 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
678 ? vec_lvsl(0, src) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
679 : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
680 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
681 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
682 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
683 /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
684 vector unsigned short vtemp1 = vec_mule(vs, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
685 /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
686 vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
687 /* valpha2 is 255-alpha */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
688 vector unsigned char valpha2 = vec_nor(valpha, valpha); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
689 /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
690 vector unsigned short vtemp3 = vec_mule(vd, valpha2); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
691 /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
692 vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
693 /* add source and dest */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
694 vtemp1 = vec_add(vtemp1, vtemp3); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
695 vtemp2 = vec_add(vtemp2, vtemp4); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
696 /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
697 vtemp1 = vec_add(vtemp1, v1_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
698 vtemp3 = vec_sr(vtemp1, v8_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
699 vtemp1 = vec_add(vtemp1, vtemp3); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
700 /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
701 vtemp2 = vec_add(vtemp2, v1_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
702 vtemp4 = vec_sr(vtemp2, v8_16); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
703 vtemp2 = vec_add(vtemp2, vtemp4); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
704 /* (>>8) and get ARGBARGBARGBARGB */ \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
705 vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
706 } while (0) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
707 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
708 /* Calculate the permute vector used for 32->32 swizzling */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
709 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
710 const SDL_PixelFormat *dstfmt) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
711 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
712 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
713 * We have to assume that the bits that aren't used by other |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
714 * colors is alpha, and it's one complete byte, since some formats |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
715 * leave alpha with a zero mask, but we should still swizzle the bits. |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
716 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
717 /* ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
718 const static struct SDL_PixelFormat default_pixel_format = { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
719 NULL, 0, 0, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
720 0, 0, 0, 0, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
721 16, 8, 0, 24, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
722 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
723 0, 0}; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
724 if (!srcfmt) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
725 srcfmt = &default_pixel_format; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
726 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
727 if (!dstfmt) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
728 dstfmt = &default_pixel_format; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
729 } |
1487
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
730 const vector unsigned char plus = VECUINT8_LITERAL |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
731 ( 0x00, 0x00, 0x00, 0x00, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
732 0x04, 0x04, 0x04, 0x04, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
733 0x08, 0x08, 0x08, 0x08, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
734 0x0C, 0x0C, 0x0C, 0x0C ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
735 vector unsigned char vswiz; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
736 vector unsigned int srcvec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
737 #define RESHIFT(X) (3 - ((X) >> 3)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
738 Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
739 Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
740 Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
741 Uint32 amask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
742 /* Use zero for alpha if either surface doesn't have alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
743 if (dstfmt->Amask) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
744 amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
745 } else { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
746 amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
747 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
748 #undef RESHIFT |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
749 ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
750 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
751 return(vswiz); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
752 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
753 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
754 static void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
755 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
756 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
757 Uint8 *src = (Uint8 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
758 int srcskip = info->s_skip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
759 Uint8 *dst = (Uint8 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
760 int dstskip = info->d_skip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
761 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
762 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
763 vector unsigned char v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
764 vector unsigned short v8_16 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
765 vector unsigned short v1_16 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
766 vector unsigned short v2_16 = vec_splat_u16(2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
767 vector unsigned short v3_16 = vec_splat_u16(3); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
768 vector unsigned int v8_32 = vec_splat_u32(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
769 vector unsigned int v16_32 = vec_add(v8_32, v8_32); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
770 vector unsigned short v3f = VECUINT16_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
771 0x003f, 0x003f, 0x003f, 0x003f, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
772 0x003f, 0x003f, 0x003f, 0x003f); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
773 vector unsigned short vfc = VECUINT16_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
774 0x00fc, 0x00fc, 0x00fc, 0x00fc, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
775 0x00fc, 0x00fc, 0x00fc, 0x00fc); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
776 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
777 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
778 0x10 - 0x1f is the alpha |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
779 0x00 - 0x0e evens are the red |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
780 0x01 - 0x0f odds are zero |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
781 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
782 vector unsigned char vredalpha1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
783 0x10, 0x00, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
784 0x10, 0x02, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
785 0x10, 0x04, 0x01, 0x01, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
786 0x10, 0x06, 0x01, 0x01 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
787 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
788 vector unsigned char vredalpha2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
789 vec_add((vector unsigned int)vredalpha1, vec_sl(v8_32, v16_32)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
790 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
791 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
792 0x00 - 0x0f is ARxx ARxx ARxx ARxx |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
793 0x11 - 0x0f odds are blue |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
794 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
795 vector unsigned char vblue1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
796 0x00, 0x01, 0x02, 0x11, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
797 0x04, 0x05, 0x06, 0x13, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
798 0x08, 0x09, 0x0a, 0x15, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
799 0x0c, 0x0d, 0x0e, 0x17 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
800 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
801 vector unsigned char vblue2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
802 vec_add((vector unsigned int)vblue1, v8_32) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
803 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
804 /* |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
805 0x00 - 0x0f is ARxB ARxB ARxB ARxB |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
806 0x10 - 0x0e evens are green |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
807 */ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
808 vector unsigned char vgreen1 = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
809 0x00, 0x01, 0x10, 0x03, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
810 0x04, 0x05, 0x12, 0x07, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
811 0x08, 0x09, 0x14, 0x0b, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
812 0x0c, 0x0d, 0x16, 0x0f |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
813 ); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
814 vector unsigned char vgreen2 = (vector unsigned char)( |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
815 vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
816 ); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
817 vector unsigned char vgmerge = VECUINT8_LITERAL( |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
818 0x00, 0x02, 0x00, 0x06, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
819 0x00, 0x0a, 0x00, 0x0e, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
820 0x00, 0x12, 0x00, 0x16, |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
821 0x00, 0x1a, 0x00, 0x1e); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
822 vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
823 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
824 vector unsigned char valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
825 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
826 vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
827 vf800 = vec_sl(vf800, vec_splat_u16(8)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
828 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
829 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
830 int extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
831 vector unsigned char valigner; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
832 vector unsigned char vsrc; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
833 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
834 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
835 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
836 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
837 while (condition) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
838 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
839 unsigned sR, sG, sB, dR, dG, dB, sA; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
840 DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
841 if(sA) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
842 unsigned short dstpixel = *((unsigned short *)dst); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
843 dR = (dstpixel >> 8) & 0xf8; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
844 dG = (dstpixel >> 3) & 0xfc; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
845 dB = (dstpixel << 3) & 0xf8; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
846 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
847 *((unsigned short *)dst) = ( \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
848 ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
849 ); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
850 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
851 src += 4; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
852 dst += 2; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
853 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
854 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
855 ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
856 extrawidth = (width % 8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
857 valigner = VEC_ALIGNER(src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
858 vsrc = (vector unsigned char)vec_ld(0, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
859 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
860 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
861 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
862 vector unsigned char vsrc1, vsrc2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
863 vector unsigned char vdst1, vdst2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
864 vector unsigned short vR, vG, vB; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
865 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
866 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
867 /* Load 8 pixels from src as ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
868 voverflow = (vector unsigned char)vec_ld(15, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
869 vsrc = vec_perm(vsrc, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
870 vsrc1 = vec_perm(vsrc, vsrc, vpermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
871 src += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
872 vsrc = (vector unsigned char)vec_ld(15, src); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
873 voverflow = vec_perm(voverflow, vsrc, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
874 vsrc2 = vec_perm(voverflow, voverflow, vpermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
875 src += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
876 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
877 /* Load 8 pixels from dst as XRGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
878 voverflow = vec_ld(0, dst); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
879 vR = vec_and((vector unsigned short)voverflow, vf800); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
880 vB = vec_sl((vector unsigned short)voverflow, v3_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
881 vG = vec_sl(vB, v2_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
882 vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
883 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
884 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
885 vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
886 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
887 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
888 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
889 /* Alpha blend 8 pixels as ARGB */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
890 valpha = vec_perm(vsrc1, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
891 VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, v8_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
892 valpha = vec_perm(vsrc2, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
893 VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, v8_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
894 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
895 /* Convert 8 pixels to 565 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
896 vpixel = (vector unsigned short)vec_packpx((vector unsigned int)vdst1, (vector unsigned int)vdst2); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
897 vgpixel = (vector unsigned short)vec_perm(vdst1, vdst2, vgmerge); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
898 vgpixel = vec_and(vgpixel, vfc); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
899 vgpixel = vec_sl(vgpixel, v3_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
900 vrpixel = vec_sl(vpixel, v1_16); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
901 vrpixel = vec_and(vrpixel, vf800); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
902 vbpixel = vec_and(vpixel, v3f); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
903 vdst1 = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
904 vdst1 = vec_or(vdst1, (vector unsigned char)vbpixel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
905 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
906 /* Store 8 pixels */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
907 vec_st(vdst1, 0, dst); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
908 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
909 width -= 8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
910 dst += 16; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
911 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
912 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
913 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
914 src += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
915 dst += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
916 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
917 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
918 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
919 static void Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
920 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
921 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
922 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
923 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
924 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
925 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
926 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
927 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
928 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
929 unsigned sA = srcfmt->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
930 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
931 Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
932 Uint32 ckey = info->src->colorkey; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
933 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
934 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
935 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
936 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
937 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
938 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
939 vector unsigned char vbits; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
940 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
941 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
942 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
943 vector unsigned int vckey; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
944 vector unsigned int vrgbmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
945 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
946 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
947 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
948 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
949 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
950 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
951 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
952 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
953 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
954 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
955 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
956 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
957 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
958 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
959 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
960 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
961 vbits = (vector unsigned char)vec_splat_s8(-1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
962 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
963 ckey &= rgbmask; |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
964 ((unsigned int *)(char*)&vckey)[0] = ckey; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
965 vckey = vec_splat(vckey, 0); |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
966 ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
967 vrgbmask = vec_splat(vrgbmask, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
968 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
969 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
970 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
971 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
972 while (condition) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
973 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
974 unsigned sR, sG, sB, dR, dG, dB; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
975 RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
976 if(sA && Pixel != ckey) { \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
977 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
978 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
979 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
980 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
981 } \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
982 dstp++; \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
983 srcp++; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
984 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
985 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
986 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
987 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
988 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
989 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
990 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
991 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
992 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
993 vector unsigned char vsel; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
994 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
995 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
996 vector unsigned char vd_orig; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
997 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
998 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
999 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1000 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1001 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1002 /* vsel is set for items that match the key */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1003 vsel = (vector unsigned char)vec_and((vector unsigned int)vs, vrgbmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1004 vsel = (vector unsigned char)vec_cmpeq((vector unsigned int)vsel, vckey); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1005 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1006 /* permute to source format */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1007 vs = vec_perm(vs, valpha, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1008 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1009 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1010 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1011 vd_orig = vd = vec_perm(vd, v0, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1012 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1013 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1014 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1015 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1016 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1017 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1018 /* mask out color key */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1019 vd = vec_sel(vd, vd_orig, vsel); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1020 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1021 /* permute to dest format */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1022 vd = vec_perm(vd, vbits, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1023 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1024 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1025 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1026 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1027 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1028 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1029 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1030 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1031 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1032 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1033 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1034 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1035 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1036 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1037 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1038 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1039 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1040 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1041 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1042 static void Blit32to32PixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1043 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1044 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1045 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1046 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1047 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1048 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1049 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1050 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1051 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1052 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1053 vector unsigned char valphaPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1054 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1055 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1056 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1057 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1058 vector unsigned char vpixelmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1059 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1060 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1061 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1062 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1063 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1064 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1065 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1066 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1067 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1068 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1069 vpixelmask = vec_nor(valphamask, v0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1070 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1071 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1072 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1073 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1074 while ( height-- ) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1075 width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1076 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1077 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1078 unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1079 DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1080 if(sA) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1081 DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1082 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1083 ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1084 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1085 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1086 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1087 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1088 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1089 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1090 if (width > 0) { |
1487
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
1091 /* vsrcPermute */ |
dc6b59e925a2
Cleaning up warnings on MacOS X
Sam Lantinga <slouken@libsdl.org>
parents:
1456
diff
changeset
|
1092 /* vdstPermute */ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1093 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1094 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1095 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1096 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1097 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1098 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1099 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1100 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1101 vector unsigned char vdstalpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1102 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1103 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1104 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1105 vs = vec_perm(vs, v0, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1106 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1107 valpha = vec_perm(vs, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1108 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1109 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1110 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1111 vd = vec_perm(vd, v0, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1112 vdstalpha = vec_and(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1113 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1114 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1115 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1116 /* set the alpha to the dest alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1117 vd = vec_and(vd, vpixelmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1118 vd = vec_or(vd, vdstalpha); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1119 vd = vec_perm(vd, v0, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1120 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1121 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1122 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1123 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1124 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1125 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1126 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1127 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1128 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1129 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1130 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1131 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1132 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1133 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1134 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1135 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1136 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1137 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1138 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1139 static void BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1140 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1141 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1142 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1143 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1144 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1145 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1146 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1147 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1148 vector unsigned char valphaPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1149 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1150 vector unsigned char vpixelmask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1151 vector unsigned char v0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1152 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1153 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1154 v0 = vec_splat_u8(0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1155 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1156 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1157 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1158 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1159 valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1160 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1161 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1162 vpixelmask = vec_nor(valphamask, v0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1163 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1164 width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1165 #define ONE_PIXEL_BLEND(condition, widthvar) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1166 while ((condition)) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1167 Uint32 dalpha; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1168 Uint32 d; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1169 Uint32 s1; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1170 Uint32 d1; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1171 Uint32 s = *srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1172 Uint32 alpha = s >> 24; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1173 if(alpha) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1174 if(alpha == SDL_ALPHA_OPAQUE) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1175 *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1176 } else { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1177 d = *dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1178 dalpha = d & 0xff000000; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1179 s1 = s & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1180 d1 = d & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1181 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1182 s &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1183 d &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1184 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1185 *dstp = d1 | d | dalpha; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1186 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1187 } \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1188 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1189 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1190 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1191 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1192 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1193 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1194 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1195 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1196 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1197 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1198 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1199 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1200 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1201 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1202 vector unsigned char vdstalpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1203 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1204 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1205 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1206 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1207 valpha = vec_perm(vs, v0, valphaPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1208 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1209 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1210 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1211 vdstalpha = vec_and(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1212 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1213 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1214 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1215 /* set the alpha to the dest alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1216 vd = vec_and(vd, vpixelmask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1217 vd = vec_or(vd, vdstalpha); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1218 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1219 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1220 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1221 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1222 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1223 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1224 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1225 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1226 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1227 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1228 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1229 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1230 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1231 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1232 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1233 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1234 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1235 static void Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1236 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1237 /* XXX : 6 */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1238 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1239 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1240 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1241 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1242 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1243 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1244 SDL_PixelFormat *srcfmt = info->src; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1245 SDL_PixelFormat *dstfmt = info->dst; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1246 unsigned sA = srcfmt->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1247 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1248 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1249 vector unsigned char vsrcPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1250 vector unsigned char vdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1251 vector unsigned char vsdstPermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1252 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1253 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1254 vector unsigned char vbits; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1255 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1256 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1257 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1258 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1259 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1260 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1261 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1262 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1263 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1264 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1265 vsrcPermute = calc_swizzle32(srcfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1266 vdstPermute = calc_swizzle32(NULL, dstfmt); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1267 vsdstPermute = calc_swizzle32(dstfmt, NULL); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1268 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1269 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1270 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1271 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1272 vbits = (vector unsigned char)vec_splat_s8(-1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1273 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1274 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1275 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1276 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1277 Uint32 Pixel; \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1278 unsigned sR, sG, sB, dR, dG, dB; \ |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1279 DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
1280 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1281 ACCURATE_ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1282 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1283 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1284 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1285 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1286 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1287 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1288 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1289 int extrawidth = (width % 4); |
3910
af4d584e0edb
Handle source data alignment correctly in Blit32to32SurfaceAlphaAltivec().
Ryan C. Gordon <icculus@icculus.org>
parents:
3899
diff
changeset
|
1290 vector unsigned char valigner = VEC_ALIGNER(srcp); |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1291 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1292 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1293 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1294 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1295 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1296 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1297 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1298 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1299 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1300 vs = vec_perm(vs, valpha, vsrcPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1301 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1302 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1303 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1304 vd = vec_perm(vd, vd, vsdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1305 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1306 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1307 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1308 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1309 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1310 vd = vec_perm(vd, vbits, vdstPermute); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1311 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1312 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1313 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1314 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1315 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1316 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1317 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1318 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1319 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1320 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1321 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1322 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1323 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1324 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1325 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1326 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1327 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1328 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1329 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1330 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1331 /* fast RGB888->(A)RGB888 blending */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1332 static void BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo *info) |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1333 { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1334 unsigned alpha = info->src->alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1335 int height = info->d_height; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1336 Uint32 *srcp = (Uint32 *)info->s_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1337 int srcskip = info->s_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1338 Uint32 *dstp = (Uint32 *)info->d_pixels; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1339 int dstskip = info->d_skip >> 2; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1340 vector unsigned char mergePermute; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1341 vector unsigned char valpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1342 vector unsigned char valphamask; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1343 vector unsigned short v1; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1344 vector unsigned short v8; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1345 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1346 mergePermute = VEC_MERGE_PERMUTE(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1347 v1 = vec_splat_u16(1); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1348 v8 = vec_splat_u16(8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1349 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1350 /* set the alpha to 255 on the destination surf */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1351 valphamask = VEC_ALPHA_MASK(); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1352 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1353 /* set a vector full of alpha and 255-alpha */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1354 ((unsigned char *)&valpha)[0] = alpha; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1355 valpha = vec_splat(valpha, 0); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1356 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1357 while(height--) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1358 int width = info->d_width; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1359 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1360 Uint32 s = *srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1361 Uint32 d = *dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1362 Uint32 s1 = s & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1363 Uint32 d1 = d & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1364 d1 = (d1 + ((s1 - d1) * alpha >> 8)) \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1365 & 0xff00ff; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1366 s &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1367 d &= 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1368 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1369 *dstp = d1 | d | 0xff000000; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1370 ++srcp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1371 ++dstp; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1372 widthvar--; \ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1373 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1374 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1375 if (width > 0) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1376 int extrawidth = (width % 4); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1377 vector unsigned char valigner = VEC_ALIGNER(srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1378 vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1379 width -= extrawidth; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1380 while (width) { |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1381 vector unsigned char voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1382 vector unsigned char vd; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1383 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1384 /* s = *srcp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1385 voverflow = (vector unsigned char)vec_ld(15, srcp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1386 vs = vec_perm(vs, voverflow, valigner); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1387 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1388 /* d = *dstp */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1389 vd = (vector unsigned char)vec_ld(0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1390 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1391 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1392 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1393 /* set the alpha channel to full on */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1394 vd = vec_or(vd, valphamask); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1395 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1396 /* *dstp = res */ |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1397 vec_st((vector unsigned int)vd, 0, dstp); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1398 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1399 srcp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1400 dstp += 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1401 width -= 4; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1402 vs = voverflow; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1403 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1404 ONE_PIXEL_BLEND((extrawidth), extrawidth); |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1405 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1406 #undef ONE_PIXEL_BLEND |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1407 |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1408 srcp += srcskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1409 dstp += dstskip; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1410 } |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1411 } |
1795 | 1412 #if __MWERKS__ |
1413 #pragma altivec_model off | |
1414 #endif | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
1415 #endif /* SDL_ALTIVEC_BLITTERS */ |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
1416 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1417 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1418 static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) |
0 | 1419 { |
1420 int width = info->d_width; | |
1421 int height = info->d_height; | |
1422 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1423 int srcskip = info->s_skip >> 2; | |
1424 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1425 int dstskip = info->d_skip >> 2; | |
1426 | |
1427 while(height--) { | |
1428 DUFFS_LOOP4({ | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1429 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1430 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1431 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1432 + (s & d & 0x00010101)) | 0xff000000; |
0 | 1433 }, width); |
1434 srcp += srcskip; | |
1435 dstp += dstskip; | |
1436 } | |
1437 } | |
1438 | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1439 /* fast RGB888->(A)RGB888 blending with surface alpha */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1440 static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1441 { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1442 unsigned alpha = info->src->alpha; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1443 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1444 BlitRGBtoRGBSurfaceAlpha128(info); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1445 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1446 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1447 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1448 Uint32 *srcp = (Uint32 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1449 int srcskip = info->s_skip >> 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1450 Uint32 *dstp = (Uint32 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1451 int dstskip = info->d_skip >> 2; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1452 Uint32 s; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1453 Uint32 d; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1454 Uint32 s1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1455 Uint32 d1; |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1456 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1457 while(height--) { |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1458 DUFFS_LOOP_DOUBLE2({ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1459 /* One Pixel Blend */ |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1460 s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1461 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1462 s1 = s & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1463 d1 = d & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1464 d1 = (d1 + ((s1 - d1) * alpha >> 8)) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1465 & 0xff00ff; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1466 s &= 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1467 d &= 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1468 d = (d + ((s - d) * alpha >> 8)) & 0xff00; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1469 *dstp = d1 | d | 0xff000000; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1470 ++srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1471 ++dstp; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1472 },{ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1473 /* Two Pixels Blend */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1474 s = *srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1475 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1476 s1 = s & 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1477 d1 = d & 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1478 d1 += (s1 - d1) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1479 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1480 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1481 s = ((s & 0xff00) >> 8) | |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1482 ((srcp[1] & 0xff00) << 8); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1483 d = ((d & 0xff00) >> 8) | |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1484 ((dstp[1] & 0xff00) << 8); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1485 d += (s - d) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1486 d &= 0x00ff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1487 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1488 *dstp++ = d1 | ((d << 8) & 0xff00) | 0xff000000; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1489 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1490 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1491 s1 = *srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1492 d1 = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1493 s1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1494 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1495 d1 += (s1 - d1) * alpha >> 8; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1496 d1 &= 0xff00ff; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1497 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1498 *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1499 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1500 ++dstp; |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1501 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1502 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1503 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1504 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1505 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1506 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1507 |
0 | 1508 /* fast ARGB888->(A)RGB888 blending with pixel alpha */ |
1509 static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info) | |
1510 { | |
1511 int width = info->d_width; | |
1512 int height = info->d_height; | |
1513 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1514 int srcskip = info->s_skip >> 2; | |
1515 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1516 int dstskip = info->d_skip >> 2; | |
1517 | |
1518 while(height--) { | |
1519 DUFFS_LOOP4({ | |
1520 Uint32 dalpha; | |
1521 Uint32 d; | |
1522 Uint32 s1; | |
1523 Uint32 d1; | |
1524 Uint32 s = *srcp; | |
1525 Uint32 alpha = s >> 24; | |
1526 /* FIXME: Here we special-case opaque alpha since the | |
1527 compositioning used (>>8 instead of /255) doesn't handle | |
1528 it correctly. Also special-case alpha=0 for speed? | |
1529 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1530 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1531 if(alpha == SDL_ALPHA_OPAQUE) { |
0 | 1532 *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1533 } else { |
0 | 1534 /* |
1535 * take out the middle component (green), and process | |
1536 * the other two in parallel. One multiply less. | |
1537 */ | |
1538 d = *dstp; | |
1539 dalpha = d & 0xff000000; | |
1540 s1 = s & 0xff00ff; | |
1541 d1 = d & 0xff00ff; | |
1542 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; | |
1543 s &= 0xff00; | |
1544 d &= 0xff00; | |
1545 d = (d + ((s - d) * alpha >> 8)) & 0xff00; | |
1546 *dstp = d1 | d | dalpha; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1547 } |
0 | 1548 } |
1549 ++srcp; | |
1550 ++dstp; | |
1551 }, width); | |
1552 srcp += srcskip; | |
1553 dstp += dstskip; | |
1554 } | |
1555 } | |
1556 | |
1542 | 1557 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1558 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ |
3870 | 1559 static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1560 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1561 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1562 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1563 Uint32 *srcp = (Uint32 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1564 int srcskip = info->s_skip >> 2; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1565 Uint32 *dstp = (Uint32 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1566 int dstskip = info->d_skip >> 2; |
1542 | 1567 SDL_PixelFormat* sf = info->src; |
1568 Uint32 amask = sf->Amask; | |
3899
081aecdb0911
From: Gabriel Gambetta
Ryan C. Gordon <icculus@icculus.org>
parents:
3870
diff
changeset
|
1569 Uint32 ashift = sf->Ashift; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1570 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1571 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1572 /* make mm6 all zeros. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1573 "pxor %%mm6, %%mm6\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1574 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1575 /* Make a mask to preserve the alpha. */ |
1542 | 1576 "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ |
1577 "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ | |
1578 "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ | |
1579 "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ | |
1580 "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1581 |
1542 | 1582 /* form channel masks */ |
1583 "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ | |
1584 "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ | |
1585 "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ | |
1586 "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ | |
1587 | |
1588 /* get alpha channel shift */ | |
1589 "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ | |
1590 | |
3899
081aecdb0911
From: Gabriel Gambetta
Ryan C. Gordon <icculus@icculus.org>
parents:
3870
diff
changeset
|
1591 : /* nothing */ : "m" (amask), "m" (ashift) ); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1592 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1593 while(height--) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1594 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1595 DUFFS_LOOP4({ |
1542 | 1596 Uint32 alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1597 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1598 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1599 "prefetch 64(%0)\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1600 "prefetch 64(%1)\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1601 : : "r" (srcp), "r" (dstp) ); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1602 |
1542 | 1603 alpha = *srcp & amask; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1604 /* FIXME: Here we special-case opaque alpha since the |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1605 compositioning used (>>8 instead of /255) doesn't handle |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1606 it correctly. Also special-case alpha=0 for speed? |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1607 Benchmark this! */ |
1542 | 1608 if(alpha == 0) { |
1609 /* do nothing */ | |
1610 } | |
1611 else if(alpha == amask) { | |
1612 /* opaque alpha -- copy RGB, keep dst alpha */ | |
1613 /* using MMX here to free up regular registers for other things */ | |
1614 __asm__ ( | |
1615 "movd (%0), %%mm0\n\t" /* src(ARGB) -> mm0 (0000ARGB)*/ | |
1616 "movd (%1), %%mm1\n\t" /* dst(ARGB) -> mm1 (0000ARGB)*/ | |
1617 "pand %%mm4, %%mm0\n\t" /* src & chanmask -> mm0 */ | |
1618 "pand %%mm3, %%mm1\n\t" /* dst & ~chanmask -> mm2 */ | |
1619 "por %%mm0, %%mm1\n\t" /* src | dst -> mm1 */ | |
1620 "movd %%mm1, (%1) \n\t" /* mm1 -> dst */ | |
1621 | |
1622 : : "r" (srcp), "r" (dstp) ); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1623 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1624 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1625 else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1626 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1627 /* load in the source, and dst. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1628 "movd (%0), %%mm0\n" /* mm0(s) = 0 0 0 0 | As Rs Gs Bs */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1629 "movd (%1), %%mm1\n" /* mm1(d) = 0 0 0 0 | Ad Rd Gd Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1630 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1631 /* Move the src alpha into mm2 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1632 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1633 /* if supporting pshufw */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1634 /*"pshufw $0x55, %%mm0, %%mm2\n" */ /* mm2 = 0 As 0 As | 0 As 0 As */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1635 /*"psrlw $8, %%mm2\n" */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1636 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1637 /* else: */ |
1542 | 1638 "movd %2, %%mm2\n" |
1639 "psrld %%mm5, %%mm2\n" /* mm2 = 0 0 0 0 | 0 0 0 As */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1640 "punpcklwd %%mm2, %%mm2\n" /* mm2 = 0 0 0 0 | 0 As 0 As */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1641 "punpckldq %%mm2, %%mm2\n" /* mm2 = 0 As 0 As | 0 As 0 As */ |
1542 | 1642 "pand %%mm7, %%mm2\n" /* to preserve dest alpha */ |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1643 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1644 /* move the colors into words. */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1645 "punpcklbw %%mm6, %%mm0\n" /* mm0 = 0 As 0 Rs | 0 Gs 0 Bs */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1646 "punpcklbw %%mm6, %%mm1\n" /* mm0 = 0 Ad 0 Rd | 0 Gd 0 Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1647 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1648 /* src - dst */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1649 "psubw %%mm1, %%mm0\n" /* mm0 = As-Ad Rs-Rd | Gs-Gd Bs-Bd */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1650 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1651 /* A * (src-dst) */ |
1542 | 1652 "pmullw %%mm2, %%mm0\n" /* mm0 = 0*As-d As*Rs-d | As*Gs-d As*Bs-d */ |
1653 "psrlw $8, %%mm0\n" /* mm0 = 0>>8 Rc>>8 | Gc>>8 Bc>>8 */ | |
1654 "paddb %%mm1, %%mm0\n" /* mm0 = 0+Ad Rc+Rd | Gc+Gd Bc+Bd */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1655 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1656 "packuswb %%mm0, %%mm0\n" /* mm0 = | Ac Rc Gc Bc */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1657 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1658 "movd %%mm0, (%1)\n" /* result in mm0 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1659 |
1542 | 1660 : : "r" (srcp), "r" (dstp), "r" (alpha) ); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1661 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1662 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1663 ++srcp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1664 ++dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1665 }, width); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1666 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1667 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1668 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1669 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1670 __asm__ ( |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1671 "emms\n" |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1672 : ); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1673 } |
1542 | 1674 /* End GCC_ASMBLIT*/ |
1675 | |
1676 #elif MSVC_ASMBLIT | |
1677 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ | |
1678 static void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) | |
1679 { | |
1680 int width = info->d_width; | |
1681 int height = info->d_height; | |
1682 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
1683 int srcskip = info->s_skip >> 2; | |
1684 Uint32 *dstp = (Uint32 *)info->d_pixels; | |
1685 int dstskip = info->d_skip >> 2; | |
1686 SDL_PixelFormat* sf = info->src; | |
1687 Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; | |
1688 Uint32 amask = sf->Amask; | |
1689 Uint32 ashift = sf->Ashift; | |
1690 Uint64 multmask; | |
1691 | |
1692 __m64 src1, dst1, mm_alpha, mm_zero, dmask; | |
1693 | |
1694 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ | |
1695 multmask = ~(0xFFFFi64 << (ashift * 2)); | |
1696 dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ | |
1697 | |
1698 while(height--) { | |
1699 DUFFS_LOOP4({ | |
1700 Uint32 alpha; | |
1701 | |
1702 _m_prefetch(srcp + 16); | |
1703 _m_prefetch(dstp + 16); | |
1704 | |
1705 alpha = *srcp & amask; | |
1706 if (alpha == 0) { | |
1707 /* do nothing */ | |
1708 } else if (alpha == amask) { | |
1709 /* copy RGB, keep dst alpha */ | |
1710 *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); | |
1711 } else { | |
1712 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ | |
1713 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ | |
1714 | |
1715 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ | |
1716 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ | |
1717 | |
1718 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ | |
1719 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ | |
1720 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
1721 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
1722 mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ | |
1723 | |
1724 /* blend */ | |
1725 src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */ | |
1726 src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */ | |
1727 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ | |
1728 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */ | |
1729 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ | |
1730 | |
1731 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ | |
1732 } | |
1733 ++srcp; | |
1734 ++dstp; | |
1735 }, width); | |
1736 srcp += srcskip; | |
1737 dstp += dstskip; | |
1738 } | |
1739 _mm_empty(); | |
1740 } | |
1741 /* End MSVC_ASMBLIT */ | |
1742 | |
1743 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1744 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1745 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1746 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1747 /* blend a single 16 bit pixel at 50% */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1748 #define BLEND16_50(d, s, mask) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1749 ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff))) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1750 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1751 /* blend two 16 bit pixels at 50% */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1752 #define BLEND2x16_50(d, s, mask) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1753 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1754 + (s & d & (~(mask | mask << 16)))) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1755 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1756 static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask) |
0 | 1757 { |
1758 int width = info->d_width; | |
1759 int height = info->d_height; | |
1760 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
1761 int srcskip = info->s_skip >> 1; | |
1762 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
1763 int dstskip = info->d_skip >> 1; | |
1764 | |
1765 while(height--) { | |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1766 if(((uintptr_t)srcp ^ (uintptr_t)dstp) & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1767 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1768 * Source and destination not aligned, pipeline it. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1769 * This is mostly a win for big blits but no loss for |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1770 * small ones |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1771 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1772 Uint32 prev_sw; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1773 int w = width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1774 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1775 /* handle odd destination */ |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1776 if((uintptr_t)dstp & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1777 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1778 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1779 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1780 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1781 w--; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1782 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1783 srcp++; /* srcp is now 32-bit aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1784 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1785 /* bootstrap pipeline with first halfword */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1786 prev_sw = ((Uint32 *)srcp)[-1]; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1787 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1788 while(w > 1) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1789 Uint32 sw, dw, s; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1790 sw = *(Uint32 *)srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1791 dw = *(Uint32 *)dstp; |
1443
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1792 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1793 s = (prev_sw << 16) + (sw >> 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1794 #else |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1795 s = (prev_sw >> 16) + (sw << 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1796 #endif |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1797 prev_sw = sw; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1798 *(Uint32 *)dstp = BLEND2x16_50(dw, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1799 dstp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1800 srcp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1801 w -= 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1802 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1803 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1804 /* final pixel if any */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1805 if(w) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1806 Uint16 d = *dstp, s; |
1443
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1807 #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1808 s = (Uint16)prev_sw; |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1809 #else |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1810 s = (Uint16)(prev_sw >> 16); |
9ebbbb4ae53b
Fixed some OpenWatcom warnings
Sam Lantinga <slouken@libsdl.org>
parents:
1428
diff
changeset
|
1811 #endif |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1812 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1813 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1814 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1815 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1816 srcp += srcskip - 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1817 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1818 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1819 /* source and destination are aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1820 int w = width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1821 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1822 /* first odd pixel? */ |
1456
84de7511f79f
Fixed a bunch of 64-bit compatibility problems
Sam Lantinga <slouken@libsdl.org>
parents:
1443
diff
changeset
|
1823 if((uintptr_t)srcp & 2) { |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1824 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1825 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1826 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1827 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1828 w--; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1829 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1830 /* srcp and dstp are now 32-bit aligned */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1831 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1832 while(w > 1) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1833 Uint32 sw = *(Uint32 *)srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1834 Uint32 dw = *(Uint32 *)dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1835 *(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1836 srcp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1837 dstp += 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1838 w -= 2; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1839 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1840 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1841 /* last odd pixel? */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1842 if(w) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1843 Uint16 d = *dstp, s = *srcp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1844 *dstp = BLEND16_50(d, s, mask); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1845 srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1846 dstp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1847 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1848 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1849 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1850 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1851 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1852 } |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
1853 |
1542 | 1854 #if GCC_ASMBLIT |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1855 /* fast RGB565->RGB565 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1856 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1857 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1858 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1859 if(alpha == 128) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1860 Blit16to16SurfaceAlpha128(info, 0xf7de); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1861 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1862 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1863 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1864 Uint16 *srcp = (Uint16 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1865 int srcskip = info->s_skip >> 1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1866 Uint16 *dstp = (Uint16 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1867 int dstskip = info->d_skip >> 1; |
1542 | 1868 Uint32 s, d; |
1869 Uint8 load[8]; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1870 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1871 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ |
1542 | 1872 *(Uint64 *)load = alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1873 alpha >>= 3; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1874 |
1542 | 1875 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
1876 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | |
1877 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | |
1878 /* position alpha to allow for mullo and mulhi on diff channels | |
1879 to reduce the number of operations */ | |
1880 psllq_i2r(3, mm0); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1881 |
1542 | 1882 /* Setup the 565 color channel masks */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
1883 *(Uint64 *)load = 0x07E007E007E007E0ULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1884 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
1885 *(Uint64 *)load = 0x001F001F001F001FULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1886 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1887 while(height--) { |
1542 | 1888 DUFFS_LOOP_QUATRO2( |
1889 { | |
1890 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1891 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1892 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1893 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1894 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1895 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1896 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1897 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1898 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1899 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1900 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1901 *dstp++ = d | d >> 16; |
1542 | 1902 },{ |
1903 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1904 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1905 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1906 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1907 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1908 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1909 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1910 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1911 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1912 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1913 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1914 *dstp++ = d | d >> 16; |
1542 | 1915 s = *srcp++; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1916 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1917 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1918 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1919 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1920 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1921 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1922 s = (s | s << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1923 d = (d | d << 16) & 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1924 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1925 d &= 0x07e0f81f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1926 *dstp++ = d | d >> 16; |
1542 | 1927 },{ |
1928 movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ | |
1929 movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ | |
1930 | |
1931 /* red -- does not need a mask since the right shift clears | |
1932 the uninteresting bits */ | |
1933 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1934 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1935 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 [000r 000r 000r 000r] */ | |
1936 psrlw_i2r(11, mm6); /* mm6 >> 11 -> mm6 [000r 000r 000r 000r] */ | |
1937 | |
1938 /* blend */ | |
1939 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1940 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1941 /* alpha used is actually 11 bits | |
1942 11 + 5 = 16 bits, so the sign bits are lost */ | |
1943 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
1944 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1945 psllw_i2r(11, mm6); /* mm6 << 11 -> mm6 */ | |
1946 | |
1947 movq_r2r(mm6, mm1); /* save new reds in dsts */ | |
1948 | |
1949 /* green -- process the bits in place */ | |
1950 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1951 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1952 pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ | |
1953 pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ | |
1954 | |
1955 /* blend */ | |
1956 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1957 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1958 /* 11 + 11 - 16 = 6 bits, so all the lower uninteresting | |
1959 bits are gone and the sign bits present */ | |
1960 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
1961 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1962 | |
1963 por_r2r(mm6, mm1); /* save new greens in dsts */ | |
1964 | |
1965 /* blue */ | |
1966 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
1967 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
1968 pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ | |
1969 pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
1970 | |
1971 /* blend */ | |
1972 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
1973 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
1974 /* 11 + 5 = 16 bits, so the sign bits are lost and | |
1975 the interesting bits will need to be MASKed */ | |
1976 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
1977 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
1978 pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
1979 | |
1980 por_r2r(mm6, mm1); /* save new blues in dsts */ | |
1981 | |
1982 movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ | |
1983 | |
1984 srcp += 4; | |
1985 dstp += 4; | |
1986 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1987 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1988 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1989 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1990 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1991 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1992 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1993 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1994 /* fast RGB555->RGB555 blending with surface alpha */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1995 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1996 { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1997 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1998 if(alpha == 128) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
1999 Blit16to16SurfaceAlpha128(info, 0xfbde); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2000 } else { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2001 int width = info->d_width; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2002 int height = info->d_height; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2003 Uint16 *srcp = (Uint16 *)info->s_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2004 int srcskip = info->s_skip >> 1; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2005 Uint16 *dstp = (Uint16 *)info->d_pixels; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2006 int dstskip = info->d_skip >> 1; |
1542 | 2007 Uint32 s, d; |
2008 Uint8 load[8]; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2009 |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2010 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ |
1542 | 2011 *(Uint64 *)load = alpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2012 alpha >>= 3; /* downscale alpha to 5 bits */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2013 |
1542 | 2014 movq_m2r(*load, mm0); /* alpha(0000000A) -> mm0 */ |
2015 punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ | |
2016 punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ | |
2017 /* position alpha to allow for mullo and mulhi on diff channels | |
2018 to reduce the number of operations */ | |
2019 psllq_i2r(3, mm0); | |
2020 | |
2021 /* Setup the 555 color channel masks */ | |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
2022 *(Uint64 *)load = 0x03E003E003E003E0ULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2023 movq_m2r(*load, mm4); /* MASKGREEN -> mm4 */ |
720
f90d80d68071
N Sep 17 8791 Sam Lantinga Re: tks source released
Sam Lantinga <slouken@libsdl.org>
parents:
689
diff
changeset
|
2024 *(Uint64 *)load = 0x001F001F001F001FULL; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2025 movq_m2r(*load, mm7); /* MASKBLUE -> mm7 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2026 while(height--) { |
1542 | 2027 DUFFS_LOOP_QUATRO2( |
2028 { | |
2029 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2030 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2031 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2032 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2033 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2034 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2035 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2036 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2037 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2038 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2039 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2040 *dstp++ = d | d >> 16; |
1542 | 2041 },{ |
2042 s = *srcp++; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2043 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2044 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2045 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2046 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2047 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2048 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2049 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2050 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2051 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2052 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2053 *dstp++ = d | d >> 16; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2054 s = *srcp++; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2055 d = *dstp; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2056 /* |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2057 * shift out the middle component (green) to |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2058 * the high 16 bits, and process all three RGB |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2059 * components at the same time. |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2060 */ |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2061 s = (s | s << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2062 d = (d | d << 16) & 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2063 d += (s - d) * alpha >> 5; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2064 d &= 0x03e07c1f; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2065 *dstp++ = d | d >> 16; |
1542 | 2066 },{ |
2067 movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ | |
2068 movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ | |
2069 | |
2070 /* red -- process the bits in place */ | |
2071 psllq_i2r(5, mm4); /* turn MASKGREEN into MASKRED */ | |
2072 /* by reusing the GREEN mask we free up another mmx | |
2073 register to accumulate the result */ | |
2074 | |
2075 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2076 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2077 pand_r2r(mm4, mm5); /* src & MASKRED -> mm5 */ | |
2078 pand_r2r(mm4, mm6); /* dst & MASKRED -> mm6 */ | |
2079 | |
2080 /* blend */ | |
2081 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2082 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2083 /* 11 + 15 - 16 = 10 bits, uninteresting bits will be | |
2084 cleared by a MASK below */ | |
2085 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2086 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2087 pand_r2r(mm4, mm6); /* mm6 & MASKRED -> mm6 */ | |
2088 | |
2089 psrlq_i2r(5, mm4); /* turn MASKRED back into MASKGREEN */ | |
2090 | |
2091 movq_r2r(mm6, mm1); /* save new reds in dsts */ | |
2092 | |
2093 /* green -- process the bits in place */ | |
2094 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2095 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2096 pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ | |
2097 pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ | |
2098 | |
2099 /* blend */ | |
2100 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2101 pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2102 /* 11 + 10 - 16 = 5 bits, so all the lower uninteresting | |
2103 bits are gone and the sign bits present */ | |
2104 psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ | |
2105 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2106 | |
2107 por_r2r(mm6, mm1); /* save new greens in dsts */ | |
2108 | |
2109 /* blue */ | |
2110 movq_r2r(mm2, mm5); /* src -> mm5 */ | |
2111 movq_r2r(mm3, mm6); /* dst -> mm6 */ | |
2112 pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ | |
2113 pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2114 | |
2115 /* blend */ | |
2116 psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ | |
2117 pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ | |
2118 /* 11 + 5 = 16 bits, so the sign bits are lost and | |
2119 the interesting bits will need to be MASKed */ | |
2120 psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ | |
2121 paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ | |
2122 pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ | |
2123 | |
2124 por_r2r(mm6, mm1); /* save new blues in dsts */ | |
2125 | |
2126 movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ | |
2127 | |
2128 srcp += 4; | |
2129 dstp += 4; | |
2130 }, width); | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2131 srcp += srcskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2132 dstp += dstskip; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2133 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2134 emms(); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2135 } |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2136 } |
1542 | 2137 /* End GCC_ASMBLIT */ |
2138 | |
2139 #elif MSVC_ASMBLIT | |
2140 /* fast RGB565->RGB565 blending with surface alpha */ | |
2141 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) | |
2142 { | |
2143 unsigned alpha = info->src->alpha; | |
2144 if(alpha == 128) { | |
2145 Blit16to16SurfaceAlpha128(info, 0xf7de); | |
2146 } else { | |
2147 int width = info->d_width; | |
2148 int height = info->d_height; | |
2149 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
2150 int srcskip = info->s_skip >> 1; | |
2151 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2152 int dstskip = info->d_skip >> 1; | |
2153 Uint32 s, d; | |
2154 | |
2155 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; | |
2156 | |
2157 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | |
2158 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ | |
2159 alpha >>= 3; /* downscale alpha to 5 bits */ | |
2160 | |
2161 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
2162 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
2163 /* position alpha to allow for mullo and mulhi on diff channels | |
2164 to reduce the number of operations */ | |
2165 mm_alpha = _mm_slli_si64(mm_alpha, 3); | |
2166 | |
2167 /* Setup the 565 color channel masks */ | |
2168 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ | |
2169 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | |
2170 | |
2171 while(height--) { | |
2172 DUFFS_LOOP_QUATRO2( | |
2173 { | |
2174 s = *srcp++; | |
2175 d = *dstp; | |
2176 /* | |
2177 * shift out the middle component (green) to | |
2178 * the high 16 bits, and process all three RGB | |
2179 * components at the same time. | |
2180 */ | |
2181 s = (s | s << 16) & 0x07e0f81f; | |
2182 d = (d | d << 16) & 0x07e0f81f; | |
2183 d += (s - d) * alpha >> 5; | |
2184 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2185 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2186 },{ |
2187 s = *srcp++; | |
2188 d = *dstp; | |
2189 /* | |
2190 * shift out the middle component (green) to | |
2191 * the high 16 bits, and process all three RGB | |
2192 * components at the same time. | |
2193 */ | |
2194 s = (s | s << 16) & 0x07e0f81f; | |
2195 d = (d | d << 16) & 0x07e0f81f; | |
2196 d += (s - d) * alpha >> 5; | |
2197 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2198 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2199 s = *srcp++; |
2200 d = *dstp; | |
2201 /* | |
2202 * shift out the middle component (green) to | |
2203 * the high 16 bits, and process all three RGB | |
2204 * components at the same time. | |
2205 */ | |
2206 s = (s | s << 16) & 0x07e0f81f; | |
2207 d = (d | d << 16) & 0x07e0f81f; | |
2208 d += (s - d) * alpha >> 5; | |
2209 d &= 0x07e0f81f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2210 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2211 },{ |
2212 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ | |
2213 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ | |
2214 | |
2215 /* red */ | |
2216 src2 = src1; | |
2217 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */ | |
2218 | |
2219 dst2 = dst1; | |
2220 dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */ | |
2221 | |
2222 /* blend */ | |
2223 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2224 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2225 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2226 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2227 dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */ | |
2228 | |
2229 mm_res = dst2; /* RED -> mm_res */ | |
2230 | |
2231 /* green -- process the bits in place */ | |
2232 src2 = src1; | |
2233 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ | |
2234 | |
2235 dst2 = dst1; | |
2236 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ | |
2237 | |
2238 /* blend */ | |
2239 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2240 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2241 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2242 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2243 | |
2244 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ | |
2245 | |
2246 /* blue */ | |
2247 src2 = src1; | |
2248 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ | |
2249 | |
2250 dst2 = dst1; | |
2251 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ | |
2252 | |
2253 /* blend */ | |
2254 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2255 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2256 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2257 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2258 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ | |
2259 | |
2260 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ | |
2261 | |
2262 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ | |
2263 | |
2264 srcp += 4; | |
2265 dstp += 4; | |
2266 }, width); | |
2267 srcp += srcskip; | |
2268 dstp += dstskip; | |
2269 } | |
2270 _mm_empty(); | |
2271 } | |
2272 } | |
2273 | |
2274 /* fast RGB555->RGB555 blending with surface alpha */ | |
2275 static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) | |
2276 { | |
2277 unsigned alpha = info->src->alpha; | |
2278 if(alpha == 128) { | |
2279 Blit16to16SurfaceAlpha128(info, 0xfbde); | |
2280 } else { | |
2281 int width = info->d_width; | |
2282 int height = info->d_height; | |
2283 Uint16 *srcp = (Uint16 *)info->s_pixels; | |
2284 int srcskip = info->s_skip >> 1; | |
2285 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2286 int dstskip = info->d_skip >> 1; | |
2287 Uint32 s, d; | |
2288 | |
2289 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; | |
2290 | |
2291 alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ | |
2292 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ | |
2293 alpha >>= 3; /* downscale alpha to 5 bits */ | |
2294 | |
2295 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ | |
2296 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ | |
2297 /* position alpha to allow for mullo and mulhi on diff channels | |
2298 to reduce the number of operations */ | |
2299 mm_alpha = _mm_slli_si64(mm_alpha, 3); | |
2300 | |
2301 /* Setup the 555 color channel masks */ | |
2302 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ | |
2303 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ | |
2304 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ | |
2305 | |
2306 while(height--) { | |
2307 DUFFS_LOOP_QUATRO2( | |
2308 { | |
2309 s = *srcp++; | |
2310 d = *dstp; | |
2311 /* | |
2312 * shift out the middle component (green) to | |
2313 * the high 16 bits, and process all three RGB | |
2314 * components at the same time. | |
2315 */ | |
2316 s = (s | s << 16) & 0x03e07c1f; | |
2317 d = (d | d << 16) & 0x03e07c1f; | |
2318 d += (s - d) * alpha >> 5; | |
2319 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2320 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2321 },{ |
2322 s = *srcp++; | |
2323 d = *dstp; | |
2324 /* | |
2325 * shift out the middle component (green) to | |
2326 * the high 16 bits, and process all three RGB | |
2327 * components at the same time. | |
2328 */ | |
2329 s = (s | s << 16) & 0x03e07c1f; | |
2330 d = (d | d << 16) & 0x03e07c1f; | |
2331 d += (s - d) * alpha >> 5; | |
2332 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2333 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2334 s = *srcp++; |
2335 d = *dstp; | |
2336 /* | |
2337 * shift out the middle component (green) to | |
2338 * the high 16 bits, and process all three RGB | |
2339 * components at the same time. | |
2340 */ | |
2341 s = (s | s << 16) & 0x03e07c1f; | |
2342 d = (d | d << 16) & 0x03e07c1f; | |
2343 d += (s - d) * alpha >> 5; | |
2344 d &= 0x03e07c1f; | |
1546
4b835e36633d
*** empty log message ***
Sam Lantinga <slouken@libsdl.org>
parents:
1542
diff
changeset
|
2345 *dstp++ = (Uint16)(d | d >> 16); |
1542 | 2346 },{ |
2347 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ | |
2348 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ | |
2349 | |
2350 /* red -- process the bits in place */ | |
2351 src2 = src1; | |
2352 src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */ | |
2353 | |
2354 dst2 = dst1; | |
2355 dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */ | |
2356 | |
2357 /* blend */ | |
2358 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2359 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2360 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2361 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2362 dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */ | |
2363 | |
2364 mm_res = dst2; /* RED -> mm_res */ | |
2365 | |
2366 /* green -- process the bits in place */ | |
2367 src2 = src1; | |
2368 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ | |
2369 | |
2370 dst2 = dst1; | |
2371 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ | |
2372 | |
2373 /* blend */ | |
2374 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2375 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2376 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ | |
2377 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2378 | |
2379 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ | |
2380 | |
2381 /* blue */ | |
2382 src2 = src1; /* src -> src2 */ | |
2383 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ | |
2384 | |
2385 dst2 = dst1; /* dst -> dst2 */ | |
2386 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ | |
2387 | |
2388 /* blend */ | |
2389 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ | |
2390 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ | |
2391 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ | |
2392 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ | |
2393 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ | |
2394 | |
2395 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ | |
2396 | |
2397 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ | |
2398 | |
2399 srcp += 4; | |
2400 dstp += 4; | |
2401 }, width); | |
2402 srcp += srcskip; | |
2403 dstp += dstskip; | |
2404 } | |
2405 _mm_empty(); | |
2406 } | |
2407 } | |
2408 #endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2409 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2410 /* fast RGB565->RGB565 blending with surface alpha */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2411 static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2412 { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2413 unsigned alpha = info->src->alpha; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2414 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2415 Blit16to16SurfaceAlpha128(info, 0xf7de); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2416 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2417 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2418 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2419 Uint16 *srcp = (Uint16 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2420 int srcskip = info->s_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2421 Uint16 *dstp = (Uint16 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2422 int dstskip = info->d_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2423 alpha >>= 3; /* downscale alpha to 5 bits */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2424 |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2425 while(height--) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2426 DUFFS_LOOP4({ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2427 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2428 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2429 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2430 * shift out the middle component (green) to |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2431 * the high 16 bits, and process all three RGB |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2432 * components at the same time. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2433 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2434 s = (s | s << 16) & 0x07e0f81f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2435 d = (d | d << 16) & 0x07e0f81f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2436 d += (s - d) * alpha >> 5; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2437 d &= 0x07e0f81f; |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2438 *dstp++ = (Uint16)(d | d >> 16); |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2439 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2440 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2441 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2442 } |
0 | 2443 } |
2444 } | |
2445 | |
2446 /* fast RGB555->RGB555 blending with surface alpha */ | |
2447 static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) | |
2448 { | |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2449 unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2450 if(alpha == 128) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2451 Blit16to16SurfaceAlpha128(info, 0xfbde); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2452 } else { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2453 int width = info->d_width; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2454 int height = info->d_height; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2455 Uint16 *srcp = (Uint16 *)info->s_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2456 int srcskip = info->s_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2457 Uint16 *dstp = (Uint16 *)info->d_pixels; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2458 int dstskip = info->d_skip >> 1; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2459 alpha >>= 3; /* downscale alpha to 5 bits */ |
0 | 2460 |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2461 while(height--) { |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2462 DUFFS_LOOP4({ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2463 Uint32 s = *srcp++; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2464 Uint32 d = *dstp; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2465 /* |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2466 * shift out the middle component (green) to |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2467 * the high 16 bits, and process all three RGB |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2468 * components at the same time. |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2469 */ |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2470 s = (s | s << 16) & 0x03e07c1f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2471 d = (d | d << 16) & 0x03e07c1f; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2472 d += (s - d) * alpha >> 5; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2473 d &= 0x03e07c1f; |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2474 *dstp++ = (Uint16)(d | d >> 16); |
1
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2475 }, width); |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2476 srcp += srcskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2477 dstp += dstskip; |
cf2af46e9e2a
Changes since SDL 1.2.0 release
Sam Lantinga <slouken@lokigames.com>
parents:
0
diff
changeset
|
2478 } |
0 | 2479 } |
2480 } | |
2481 | |
2482 /* fast ARGB8888->RGB565 blending with pixel alpha */ | |
2483 static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) | |
2484 { | |
2485 int width = info->d_width; | |
2486 int height = info->d_height; | |
2487 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
2488 int srcskip = info->s_skip >> 2; | |
2489 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2490 int dstskip = info->d_skip >> 1; | |
2491 | |
2492 while(height--) { | |
2493 DUFFS_LOOP4({ | |
2494 Uint32 s = *srcp; | |
2495 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ | |
2496 /* FIXME: Here we special-case opaque alpha since the | |
2497 compositioning used (>>8 instead of /255) doesn't handle | |
2498 it correctly. Also special-case alpha=0 for speed? | |
2499 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2500 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2501 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2502 *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f)); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2503 } else { |
0 | 2504 Uint32 d = *dstp; |
2505 /* | |
2506 * convert source and destination to G0RAB65565 | |
2507 * and blend all components at the same time | |
2508 */ | |
2509 s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800) | |
2510 + (s >> 3 & 0x1f); | |
2511 d = (d | d << 16) & 0x07e0f81f; | |
2512 d += (s - d) * alpha >> 5; | |
2513 d &= 0x07e0f81f; | |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2514 *dstp = (Uint16)(d | d >> 16); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2515 } |
0 | 2516 } |
2517 srcp++; | |
2518 dstp++; | |
2519 }, width); | |
2520 srcp += srcskip; | |
2521 dstp += dstskip; | |
2522 } | |
2523 } | |
2524 | |
2525 /* fast ARGB8888->RGB555 blending with pixel alpha */ | |
2526 static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) | |
2527 { | |
2528 int width = info->d_width; | |
2529 int height = info->d_height; | |
2530 Uint32 *srcp = (Uint32 *)info->s_pixels; | |
2531 int srcskip = info->s_skip >> 2; | |
2532 Uint16 *dstp = (Uint16 *)info->d_pixels; | |
2533 int dstskip = info->d_skip >> 1; | |
2534 | |
2535 while(height--) { | |
2536 DUFFS_LOOP4({ | |
2537 unsigned alpha; | |
2538 Uint32 s = *srcp; | |
2539 alpha = s >> 27; /* downscale alpha to 5 bits */ | |
2540 /* FIXME: Here we special-case opaque alpha since the | |
2541 compositioning used (>>8 instead of /255) doesn't handle | |
2542 it correctly. Also special-case alpha=0 for speed? | |
2543 Benchmark this! */ | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2544 if(alpha) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2545 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2546 *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f)); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2547 } else { |
0 | 2548 Uint32 d = *dstp; |
2549 /* | |
2550 * convert source and destination to G0RAB65565 | |
2551 * and blend all components at the same time | |
2552 */ | |
2553 s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00) | |
2554 + (s >> 3 & 0x1f); | |
2555 d = (d | d << 16) & 0x03e07c1f; | |
2556 d += (s - d) * alpha >> 5; | |
2557 d &= 0x03e07c1f; | |
1428
5f52867ba65c
Update for Visual C++ 6.0
Sam Lantinga <slouken@libsdl.org>
parents:
1402
diff
changeset
|
2558 *dstp = (Uint16)(d | d >> 16); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2559 } |
0 | 2560 } |
2561 srcp++; | |
2562 dstp++; | |
2563 }, width); | |
2564 srcp += srcskip; | |
2565 dstp += dstskip; | |
2566 } | |
2567 } | |
2568 | |
2569 /* General (slow) N->N blending with per-surface alpha */ | |
2570 static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) | |
2571 { | |
2572 int width = info->d_width; | |
2573 int height = info->d_height; | |
2574 Uint8 *src = info->s_pixels; | |
2575 int srcskip = info->s_skip; | |
2576 Uint8 *dst = info->d_pixels; | |
2577 int dstskip = info->d_skip; | |
2578 SDL_PixelFormat *srcfmt = info->src; | |
2579 SDL_PixelFormat *dstfmt = info->dst; | |
2580 int srcbpp = srcfmt->BytesPerPixel; | |
2581 int dstbpp = dstfmt->BytesPerPixel; | |
2582 unsigned sA = srcfmt->alpha; | |
2583 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | |
2584 | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2585 if(sA) { |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2586 while ( height-- ) { |
0 | 2587 DUFFS_LOOP4( |
2588 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2589 Uint32 Pixel; |
0 | 2590 unsigned sR; |
2591 unsigned sG; | |
2592 unsigned sB; | |
2593 unsigned dR; | |
2594 unsigned dG; | |
2595 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2596 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2597 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); |
0 | 2598 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2599 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | |
2600 src += srcbpp; | |
2601 dst += dstbpp; | |
2602 }, | |
2603 width); | |
2604 src += srcskip; | |
2605 dst += dstskip; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2606 } |
0 | 2607 } |
2608 } | |
2609 | |
2610 /* General (slow) colorkeyed N->N blending with per-surface alpha */ | |
2611 static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) | |
2612 { | |
2613 int width = info->d_width; | |
2614 int height = info->d_height; | |
2615 Uint8 *src = info->s_pixels; | |
2616 int srcskip = info->s_skip; | |
2617 Uint8 *dst = info->d_pixels; | |
2618 int dstskip = info->d_skip; | |
2619 SDL_PixelFormat *srcfmt = info->src; | |
2620 SDL_PixelFormat *dstfmt = info->dst; | |
2621 Uint32 ckey = srcfmt->colorkey; | |
2622 int srcbpp = srcfmt->BytesPerPixel; | |
2623 int dstbpp = dstfmt->BytesPerPixel; | |
2624 unsigned sA = srcfmt->alpha; | |
2625 unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; | |
2626 | |
2627 while ( height-- ) { | |
2628 DUFFS_LOOP4( | |
2629 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2630 Uint32 Pixel; |
0 | 2631 unsigned sR; |
2632 unsigned sG; | |
2633 unsigned sB; | |
2634 unsigned dR; | |
2635 unsigned dG; | |
2636 unsigned dB; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2637 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2638 if(sA && Pixel != ckey) { |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2639 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); |
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2640 DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); |
0 | 2641 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
2642 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); | |
2643 } | |
2644 src += srcbpp; | |
2645 dst += dstbpp; | |
2646 }, | |
2647 width); | |
2648 src += srcskip; | |
2649 dst += dstskip; | |
2650 } | |
2651 } | |
2652 | |
2653 /* General (slow) N->N blending with pixel alpha */ | |
2654 static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) | |
2655 { | |
2656 int width = info->d_width; | |
2657 int height = info->d_height; | |
2658 Uint8 *src = info->s_pixels; | |
2659 int srcskip = info->s_skip; | |
2660 Uint8 *dst = info->d_pixels; | |
2661 int dstskip = info->d_skip; | |
2662 SDL_PixelFormat *srcfmt = info->src; | |
2663 SDL_PixelFormat *dstfmt = info->dst; | |
2664 | |
2665 int srcbpp; | |
2666 int dstbpp; | |
2667 | |
2668 /* Set up some basic variables */ | |
2669 srcbpp = srcfmt->BytesPerPixel; | |
2670 dstbpp = dstfmt->BytesPerPixel; | |
2671 | |
2672 /* FIXME: for 8bpp source alpha, this doesn't get opaque values | |
2673 quite right. for <8bpp source alpha, it gets them very wrong | |
2674 (check all macros!) | |
2675 It is unclear whether there is a good general solution that doesn't | |
2676 need a branch (or a divide). */ | |
2677 while ( height-- ) { | |
2678 DUFFS_LOOP4( | |
2679 { | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2680 Uint32 Pixel; |
0 | 2681 unsigned sR; |
2682 unsigned sG; | |
2683 unsigned sB; | |
2684 unsigned dR; | |
2685 unsigned dG; | |
2686 unsigned dB; | |
2687 unsigned sA; | |
2688 unsigned dA; | |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2689 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2690 if(sA) { |
1162
2651158f59b8
Enable altivec blitters on PowerPC Linux, and some fixes for recent
Ryan C. Gordon <icculus@icculus.org>
parents:
1047
diff
changeset
|
2691 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2692 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2693 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2694 } |
0 | 2695 src += srcbpp; |
2696 dst += dstbpp; | |
2697 }, | |
2698 width); | |
2699 src += srcskip; | |
2700 dst += dstskip; | |
2701 } | |
2702 } | |
2703 | |
2704 | |
2705 SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) | |
2706 { | |
2707 SDL_PixelFormat *sf = surface->format; | |
2708 SDL_PixelFormat *df = surface->map->dst->format; | |
2709 | |
2710 if(sf->Amask == 0) { | |
2711 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { | |
2712 if(df->BytesPerPixel == 1) | |
2713 return BlitNto1SurfaceAlphaKey; | |
2714 else | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2715 #if SDL_ALTIVEC_BLITTERS |
1240 | 2716 if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && |
2717 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2718 return Blit32to32SurfaceAlphaKeyAltivec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2719 else |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2720 #endif |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2721 return BlitNtoNSurfaceAlphaKey; |
0 | 2722 } else { |
2723 /* Per-surface alpha blits */ | |
2724 switch(df->BytesPerPixel) { | |
2725 case 1: | |
2726 return BlitNto1SurfaceAlpha; | |
2727 | |
2728 case 2: | |
2729 if(surface->map->identity) { | |
2730 if(df->Gmask == 0x7e0) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2731 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2732 #if MMX_ASMBLIT |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
2733 if(SDL_HasMMX()) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2734 return Blit565to565SurfaceAlphaMMX; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2735 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2736 #endif |
0 | 2737 return Blit565to565SurfaceAlpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2738 } |
0 | 2739 else if(df->Gmask == 0x3e0) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2740 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2741 #if MMX_ASMBLIT |
739
22dbf364c017
Added SDL_HasMMX(), SDL_Has3DNow(), SDL_HasSSE() in SDL_cpuinfo.h
Sam Lantinga <slouken@libsdl.org>
parents:
720
diff
changeset
|
2742 if(SDL_HasMMX()) |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2743 return Blit555to555SurfaceAlphaMMX; |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2744 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2745 #endif |
0 | 2746 return Blit555to555SurfaceAlpha; |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2747 } |
0 | 2748 } |
2749 return BlitNtoNSurfaceAlpha; | |
2750 | |
2751 case 4: | |
2752 if(sf->Rmask == df->Rmask | |
2753 && sf->Gmask == df->Gmask | |
2754 && sf->Bmask == df->Bmask | |
2755 && sf->BytesPerPixel == 4) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2756 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2757 #if MMX_ASMBLIT |
1542 | 2758 if(sf->Rshift % 8 == 0 |
2759 && sf->Gshift % 8 == 0 | |
2760 && sf->Bshift % 8 == 0 | |
2761 && SDL_HasMMX()) | |
2762 return BlitRGBtoRGBSurfaceAlphaMMX; | |
2763 #endif | |
2764 if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) | |
2765 { | |
1617
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2766 #if SDL_ALTIVEC_BLITTERS |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2767 if(!(surface->map->dst->flags & SDL_HWSURFACE) |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2768 && SDL_HasAltiVec()) |
1542 | 2769 return BlitRGBtoRGBSurfaceAlphaAltivec; |
2770 #endif | |
2771 return BlitRGBtoRGBSurfaceAlpha; | |
2772 } | |
2773 } | |
2774 #if SDL_ALTIVEC_BLITTERS | |
2775 if((sf->BytesPerPixel == 4) && | |
2776 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
2777 return Blit32to32SurfaceAlphaAltivec; | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2778 else |
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2779 #endif |
1542 | 2780 return BlitNtoNSurfaceAlpha; |
0 | 2781 |
2782 case 3: | |
2783 default: | |
2784 return BlitNtoNSurfaceAlpha; | |
2785 } | |
2786 } | |
2787 } else { | |
2788 /* Per-pixel alpha blits */ | |
2789 switch(df->BytesPerPixel) { | |
2790 case 1: | |
2791 return BlitNto1PixelAlpha; | |
2792 | |
2793 case 2: | |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2794 #if SDL_ALTIVEC_BLITTERS |
1240 | 2795 if(sf->BytesPerPixel == 4 && !(surface->map->dst->flags & SDL_HWSURFACE) && |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2796 df->Gmask == 0x7e0 && |
1240 | 2797 df->Bmask == 0x1f && SDL_HasAltiVec()) |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2798 return Blit32to565PixelAlphaAltivec; |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2799 else |
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2800 #endif |
0 | 2801 if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 |
2802 && sf->Gmask == 0xff00 | |
2803 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) | |
2804 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { | |
2805 if(df->Gmask == 0x7e0) | |
2806 return BlitARGBto565PixelAlpha; | |
2807 else if(df->Gmask == 0x3e0) | |
2808 return BlitARGBto555PixelAlpha; | |
2809 } | |
2810 return BlitNtoNPixelAlpha; | |
2811 | |
2812 case 4: | |
1542 | 2813 if(sf->Rmask == df->Rmask |
0 | 2814 && sf->Gmask == df->Gmask |
2815 && sf->Bmask == df->Bmask | |
2816 && sf->BytesPerPixel == 4) | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2817 { |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2818 #if MMX_ASMBLIT |
1542 | 2819 if(sf->Rshift % 8 == 0 |
2820 && sf->Gshift % 8 == 0 | |
2821 && sf->Bshift % 8 == 0 | |
2822 && sf->Ashift % 8 == 0 | |
2823 && sf->Aloss == 0) | |
2824 { | |
2825 if(SDL_Has3DNow()) | |
2826 return BlitRGBtoRGBPixelAlphaMMX3DNOW; | |
2827 if(SDL_HasMMX()) | |
2828 return BlitRGBtoRGBPixelAlphaMMX; | |
2829 } | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2830 #endif |
1542 | 2831 if(sf->Amask == 0xff000000) |
2832 { | |
1617
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2833 #if SDL_ALTIVEC_BLITTERS |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2834 if(!(surface->map->dst->flags & SDL_HWSURFACE) |
b255b4058d37
Patch from Alex to fix reverted code
Sam Lantinga <slouken@libsdl.org>
parents:
1546
diff
changeset
|
2835 && SDL_HasAltiVec()) |
1542 | 2836 return BlitRGBtoRGBPixelAlphaAltivec; |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2837 #endif |
1542 | 2838 return BlitRGBtoRGBPixelAlpha; |
2839 } | |
689
5bb080d35049
Date: Tue, 19 Aug 2003 17:57:00 +0200
Sam Lantinga <slouken@libsdl.org>
parents:
297
diff
changeset
|
2840 } |
1361
19418e4422cb
New configure-based build system. Still work in progress, but much improved
Sam Lantinga <slouken@libsdl.org>
parents:
1358
diff
changeset
|
2841 #if SDL_ALTIVEC_BLITTERS |
1542 | 2842 if (sf->Amask && sf->BytesPerPixel == 4 && |
2843 !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) | |
2844 return Blit32to32PixelAlphaAltivec; | |
2845 else | |
1047
ffaaf7ecf685
Altivec-optimized blitters!
Ryan C. Gordon <icculus@icculus.org>
parents:
880
diff
changeset
|
2846 #endif |
1542 | 2847 return BlitNtoNPixelAlpha; |
0 | 2848 |
2849 case 3: | |
2850 default: | |
2851 return BlitNtoNPixelAlpha; | |
2852 } | |
2853 } | |
2854 } | |
2855 |