Mercurial > sdl-ios-xcode
annotate src/hermes/mmxp2_32.asm @ 1690:43ba677b4f62 SDL-1.3
Fixed bug #241
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Tue, 20 Jun 2006 05:27:03 +0000 |
parents | 88c2d6aed428 |
children | 393092a3ebf6 |
rev | line source |
---|---|
0 | 1 ; |
2 ; pII-optimised MMX format converters for HERMES | |
3 ; Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk) | |
4 ; and (c) 1999 Jonathan Matthew (jmatthew@uq.net.au) | |
5 ; This source code is licensed under the GNU LGPL | |
6 ; | |
7 ; Please refer to the file COPYING.LIB contained in the distribution for | |
8 ; licensing conditions | |
9 ; | |
10 ; COPYRIGHT NOTICE | |
11 ; | |
12 ; This file partly contains code that is (c) Intel Corporation, specifically | |
13 ; the mode detection routine, and the converter to 15 bit (8 pixel | |
14 ; conversion routine from the mmx programming tutorial pages). | |
15 ; | |
16 ; | |
17 ; These routines aren't exactly pII optimised - it's just that as they | |
18 ; are, they're terrible on p5 MMXs, but less so on pIIs. Someone needs to | |
19 ; optimise them for p5 MMXs.. | |
20 | |
21 BITS 32 | |
22 | |
23 | |
24 GLOBAL _ConvertMMXpII32_24RGB888 | |
25 GLOBAL _ConvertMMXpII32_16RGB565 | |
26 GLOBAL _ConvertMMXpII32_16BGR565 | |
27 GLOBAL _ConvertMMXpII32_16RGB555 | |
28 GLOBAL _ConvertMMXpII32_16BGR555 | |
29 | |
30 EXTERN _mmxreturn | |
31 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
32 ;; Macros for conversion routines |
0 | 33 |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
34 %macro _push_immq_mask 1 |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
35 push dword %1 |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
36 push dword %1 |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
37 %endmacro |
0 | 38 |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
39 %macro load_immq 2 |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
40 _push_immq_mask %2 |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
41 movq %1, [esp] |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
42 %endmacro |
0 | 43 |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
44 %macro pand_immq 2 |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
45 _push_immq_mask %2 |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
46 pand %1, [esp] |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
47 %endmacro |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
48 |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
49 %define CLEANUP_IMMQ_LOADS(num) \ |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
50 add esp, byte 8 * num |
0 | 51 |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
52 %define mmx32_rgb888_mask 00ffffffh |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
53 %define mmx32_rgb565_b 000000f8h |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
54 %define mmx32_rgb565_g 0000fc00h |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
55 %define mmx32_rgb565_r 00f80000h |
0 | 56 |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
57 %define mmx32_rgb555_rb 00f800f8h |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
58 %define mmx32_rgb555_g 0000f800h |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
59 %define mmx32_rgb555_mul 20000008h |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
60 %define mmx32_bgr555_mul 00082000h |
0 | 61 |
62 SECTION .text | |
63 | |
64 _ConvertMMXpII32_24RGB888: | |
65 | |
66 ; set up mm6 as the mask, mm7 as zero | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
67 load_immq mm6, mmx32_rgb888_mask |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
68 CLEANUP_IMMQ_LOADS(1) |
0 | 69 pxor mm7, mm7 |
70 | |
71 mov edx, ecx ; save ecx | |
72 and ecx, 0fffffffch ; clear lower two bits | |
73 jnz .L1 | |
74 jmp .L2 | |
75 | |
76 .L1: | |
77 | |
78 movq mm0, [esi] ; A R G B a r g b | |
79 pand mm0, mm6 ; 0 R G B 0 r g b | |
80 movq mm1, [esi+8] ; A R G B a r g b | |
81 pand mm1, mm6 ; 0 R G B 0 r g b | |
82 | |
83 movq mm2, mm0 ; 0 R G B 0 r g b | |
84 punpckhdq mm2, mm7 ; 0 0 0 0 0 R G B | |
85 punpckldq mm0, mm7 ; 0 0 0 0 0 r g b | |
86 psllq mm2, 24 ; 0 0 R G B 0 0 0 | |
87 por mm0, mm2 ; 0 0 R G B r g b | |
88 | |
89 movq mm3, mm1 ; 0 R G B 0 r g b | |
90 psllq mm3, 48 ; g b 0 0 0 0 0 0 | |
91 por mm0, mm3 ; g b R G B r g b | |
92 | |
93 movq mm4, mm1 ; 0 R G B 0 r g b | |
94 punpckhdq mm4, mm7 ; 0 0 0 0 0 R G B | |
95 punpckldq mm1, mm7 ; 0 0 0 0 0 r g b | |
96 psrlq mm1, 16 ; 0 0 0 R G B 0 r | |
97 psllq mm4, 8 ; 0 0 0 0 R G B 0 | |
98 por mm1, mm4 ; 0 0 0 0 R G B r | |
99 | |
100 movq [edi], mm0 | |
101 add esi, BYTE 16 | |
102 movd [edi+8], mm1 | |
103 add edi, BYTE 12 | |
104 sub ecx, BYTE 4 | |
105 jnz .L1 | |
106 | |
107 .L2: | |
108 mov ecx, edx | |
109 and ecx, BYTE 3 | |
110 jz .L4 | |
111 .L3: | |
112 mov al, [esi] | |
113 mov bl, [esi+1] | |
114 mov dl, [esi+2] | |
115 mov [edi], al | |
116 mov [edi+1], bl | |
117 mov [edi+2], dl | |
118 add esi, BYTE 4 | |
119 add edi, BYTE 3 | |
120 dec ecx | |
121 jnz .L3 | |
122 .L4: | |
123 jmp _mmxreturn | |
124 | |
125 | |
126 | |
127 _ConvertMMXpII32_16RGB565: | |
128 | |
129 ; set up masks | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
130 load_immq mm5, mmx32_rgb565_b |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
131 load_immq mm6, mmx32_rgb565_g |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
132 load_immq mm7, mmx32_rgb565_r |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
133 CLEANUP_IMMQ_LOADS(3) |
0 | 134 |
135 mov edx, ecx | |
136 shr ecx, 2 | |
137 jnz .L1 | |
138 jmp .L2 ; not necessary at the moment, but doesn't hurt (much) | |
139 | |
140 .L1: | |
141 movq mm0, [esi] ; argb | |
142 movq mm1, mm0 ; argb | |
143 pand mm0, mm6 ; 00g0 | |
144 movq mm3, mm1 ; argb | |
145 pand mm1, mm5 ; 000b | |
146 pand mm3, mm7 ; 0r00 | |
147 pslld mm1, 2 ; 0 0 000000bb bbb00000 | |
148 por mm0, mm1 ; 0 0 ggggggbb bbb00000 | |
149 psrld mm0, 5 ; 0 0 00000ggg gggbbbbb | |
150 | |
151 movq mm4, [esi+8] ; argb | |
152 movq mm2, mm4 ; argb | |
153 pand mm4, mm6 ; 00g0 | |
154 movq mm1, mm2 ; argb | |
155 pand mm2, mm5 ; 000b | |
156 pand mm1, mm7 ; 0r00 | |
157 pslld mm2, 2 ; 0 0 000000bb bbb00000 | |
158 por mm4, mm2 ; 0 0 ggggggbb bbb00000 | |
159 psrld mm4, 5 ; 0 0 00000ggg gggbbbbb | |
160 | |
161 packuswb mm3, mm1 ; R 0 r 0 | |
162 packssdw mm0, mm4 ; as above.. ish | |
163 por mm0, mm3 ; done. | |
164 movq [edi], mm0 | |
165 | |
166 add esi, 16 | |
167 add edi, 8 | |
168 dec ecx | |
169 jnz .L1 | |
170 | |
171 .L2: | |
172 mov ecx, edx | |
173 and ecx, BYTE 3 | |
174 jz .L4 | |
175 .L3: | |
176 mov al, [esi] | |
177 mov bh, [esi+1] | |
178 mov ah, [esi+2] | |
179 shr al, 3 | |
180 and eax, 0F81Fh ; BYTE? | |
181 shr ebx, 5 | |
182 and ebx, 07E0h ; BYTE? | |
183 add eax, ebx | |
184 mov [edi], al | |
185 mov [edi+1], ah | |
186 add esi, BYTE 4 | |
187 add edi, BYTE 2 | |
188 dec ecx | |
189 jnz .L3 | |
190 | |
191 .L4: | |
192 jmp _mmxreturn | |
193 | |
194 | |
195 _ConvertMMXpII32_16BGR565: | |
196 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
197 load_immq mm5, mmx32_rgb565_r |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
198 load_immq mm6, mmx32_rgb565_g |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
199 load_immq mm7, mmx32_rgb565_b |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
200 CLEANUP_IMMQ_LOADS(3) |
0 | 201 |
202 mov edx, ecx | |
203 shr ecx, 2 | |
204 jnz .L1 | |
205 jmp .L2 | |
206 | |
207 .L1: | |
208 movq mm0, [esi] ; a r g b | |
209 movq mm1, mm0 ; a r g b | |
210 pand mm0, mm6 ; 0 0 g 0 | |
211 movq mm3, mm1 ; a r g b | |
212 pand mm1, mm5 ; 0 r 0 0 | |
213 pand mm3, mm7 ; 0 0 0 b | |
214 | |
215 psllq mm3, 16 ; 0 b 0 0 | |
216 psrld mm1, 14 ; 0 0 000000rr rrr00000 | |
217 por mm0, mm1 ; 0 0 ggggggrr rrr00000 | |
218 psrld mm0, 5 ; 0 0 00000ggg gggrrrrr | |
219 | |
220 movq mm4, [esi+8] ; a r g b | |
221 movq mm2, mm4 ; a r g b | |
222 pand mm4, mm6 ; 0 0 g 0 | |
223 movq mm1, mm2 ; a r g b | |
224 pand mm2, mm5 ; 0 r 0 0 | |
225 pand mm1, mm7 ; 0 0 0 b | |
226 | |
227 psllq mm1, 16 ; 0 b 0 0 | |
228 psrld mm2, 14 ; 0 0 000000rr rrr00000 | |
229 por mm4, mm2 ; 0 0 ggggggrr rrr00000 | |
230 psrld mm4, 5 ; 0 0 00000ggg gggrrrrr | |
231 | |
232 packuswb mm3, mm1 ; BBBBB000 00000000 bbbbb000 00000000 | |
233 packssdw mm0, mm4 ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR | |
234 por mm0, mm3 ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr | |
235 movq [edi], mm0 | |
236 | |
237 add esi, BYTE 16 | |
238 add edi, BYTE 8 | |
239 dec ecx | |
240 jnz .L1 | |
241 | |
242 .L2: | |
243 and edx, BYTE 3 | |
244 jz .L4 | |
245 .L3: | |
246 mov al, [esi+2] | |
247 mov bh, [esi+1] | |
248 mov ah, [esi] | |
249 shr al, 3 | |
250 and eax, 0F81Fh ; BYTE ? | |
251 shr ebx, 5 | |
252 and ebx, 07E0h ; BYTE ? | |
253 add eax, ebx | |
254 mov [edi], al | |
255 mov [edi+1], ah | |
256 add esi, BYTE 4 | |
257 add edi, BYTE 2 | |
258 dec edx | |
259 jnz .L3 | |
260 | |
261 .L4: | |
262 jmp _mmxreturn | |
263 | |
264 _ConvertMMXpII32_16BGR555: | |
265 | |
266 ; the 16BGR555 converter is identical to the RGB555 one, | |
267 ; except it uses a different multiplier for the pmaddwd | |
268 ; instruction. cool huh. | |
269 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
270 load_immq mm7, mmx32_bgr555_mul |
0 | 271 jmp _convert_bgr555_cheat |
272 | |
273 ; This is the same as the Intel version.. they obviously went to | |
274 ; much more trouble to expand/coil the loop than I did, so theirs | |
275 ; would almost certainly be faster, even if only a little. | |
276 ; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is | |
277 ; (I think) a more accurate name.. | |
278 _ConvertMMXpII32_16RGB555: | |
279 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
280 load_immq mm7, mmx32_rgb555_mul |
0 | 281 _convert_bgr555_cheat: |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
282 load_immq mm6, mmx32_rgb555_g |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
283 CLEANUP_IMMQ_LOADS(2) |
0 | 284 |
285 mov edx,ecx ; Save ecx | |
286 | |
287 and ecx,BYTE 0fffffff8h ; clear lower three bits | |
288 jnz .L_OK | |
289
77b6110c797d
Fixed "short jump out of range" error in MMX code
Sam Lantinga <slouken@libsdl.org>
parents:
0
diff
changeset
|
289 jmp near .L2 |
0 | 290 |
291 .L_OK: | |
292 | |
293 movq mm2,[esi+8] | |
294 | |
295 movq mm0,[esi] | |
296 movq mm3,mm2 | |
297 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
298 pand_immq mm3, mmx32_rgb555_rb |
0 | 299 movq mm1,mm0 |
300 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
301 pand_immq mm1, mmx32_rgb555_rb |
0 | 302 pmaddwd mm3,mm7 |
303 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
304 CLEANUP_IMMQ_LOADS(2) |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
305 |
0 | 306 pmaddwd mm1,mm7 |
307 pand mm2,mm6 | |
308 | |
309 .L1: | |
310 movq mm4,[esi+24] | |
311 pand mm0,mm6 | |
312 | |
313 movq mm5,[esi+16] | |
314 por mm3,mm2 | |
315 | |
316 psrld mm3,6 | |
317 por mm1,mm0 | |
318 | |
319 movq mm0,mm4 | |
320 psrld mm1,6 | |
321 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
322 pand_immq mm0, mmx32_rgb555_rb |
0 | 323 packssdw mm1,mm3 |
324 | |
325 movq mm3,mm5 | |
326 pmaddwd mm0,mm7 | |
327 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
328 pand_immq mm3, mmx32_rgb555_rb |
0 | 329 pand mm4,mm6 |
330 | |
331 movq [edi],mm1 | |
332 pmaddwd mm3,mm7 | |
333 | |
334 add esi,BYTE 32 | |
335 por mm4,mm0 | |
336 | |
337 pand mm5,mm6 | |
338 psrld mm4,6 | |
339 | |
340 movq mm2,[esi+8] | |
341 por mm5,mm3 | |
342 | |
343 movq mm0,[esi] | |
344 psrld mm5,6 | |
345 | |
346 movq mm3,mm2 | |
347 movq mm1,mm0 | |
348 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
349 pand_immq mm3, mmx32_rgb555_rb |
0 | 350 packssdw mm5,mm4 |
351 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
352 pand_immq mm1, mmx32_rgb555_rb |
0 | 353 pand mm2,mm6 |
354 | |
1230
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
355 CLEANUP_IMMQ_LOADS(4) |
88c2d6aed428
From Mike Frysinger and/or Gentoo:
Ryan C. Gordon <icculus@icculus.org>
parents:
1199
diff
changeset
|
356 |
0 | 357 movq [edi+8],mm5 |
358 pmaddwd mm3,mm7 | |
359 | |
360 pmaddwd mm1,mm7 | |
361 add edi,BYTE 16 | |
362 | |
363 sub ecx,BYTE 8 | |
364 jz .L2 | |
365 jmp .L1 | |
366 | |
367 | |
368 .L2: | |
369 mov ecx,edx | |
370 | |
371 and ecx,BYTE 7 | |
372 jz .L4 | |
373 | |
374 .L3: | |
375 mov ebx,[esi] | |
376 add esi,BYTE 4 | |
377 | |
378 mov eax,ebx | |
379 mov edx,ebx | |
380 | |
381 shr eax,3 | |
382 shr edx,6 | |
383 | |
384 and eax,BYTE 0000000000011111b | |
385 and edx, 0000001111100000b | |
386 | |
387 shr ebx,9 | |
388 | |
389 or eax,edx | |
390 | |
391 and ebx, 0111110000000000b | |
392 | |
393 or eax,ebx | |
394 | |
395 mov [edi],ax | |
396 add edi,BYTE 2 | |
397 | |
398 dec ecx | |
399 jnz .L3 | |
400 | |
401 .L4: | |
402 jmp _mmxreturn | |
403 | |
1199
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
404 %ifidn __OUTPUT_FORMAT__,elf |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
405 section .note.GNU-stack noalloc noexec nowrite progbits |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
406 %endif |