Mercurial > sdl-ios-xcode
annotate src/hermes/x86p_16.asm @ 1230:88c2d6aed428
From Mike Frysinger and/or Gentoo:
- libsdl-PIC-load-mmx-masks-from-stack.patch
this one may be a little controversial ... the fix here is again that you cant
reference the memory addresses like this to load into a mmx register, so the
way to do it is to push two 32bit words onto the stack, load the 64bit value
off of the stack into the mmx register, and then adjust the stack so that
it's back to normal.
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Thu, 05 Jan 2006 15:25:19 +0000 |
parents | 2d6dc7de1145 |
children | 393092a3ebf6 |
rev | line source |
---|---|
0 | 1 ; |
2 ; x86 format converters for HERMES | |
3 ; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org) | |
4 ; This source code is licensed under the GNU LGPL | |
5 ; | |
6 ; Please refer to the file COPYING.LIB contained in the distribution for | |
7 ; licensing conditions | |
8 ; | |
9 ; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at) | |
10 ; Used with permission. | |
11 ; | |
12 | |
13 | |
14 BITS 32 | |
15 | |
16 GLOBAL _ConvertX86p16_32RGB888 | |
17 GLOBAL _ConvertX86p16_32BGR888 | |
18 GLOBAL _ConvertX86p16_32RGBA888 | |
19 GLOBAL _ConvertX86p16_32BGRA888 | |
20 GLOBAL _ConvertX86p16_24RGB888 | |
21 GLOBAL _ConvertX86p16_24BGR888 | |
22 GLOBAL _ConvertX86p16_16BGR565 | |
23 GLOBAL _ConvertX86p16_16RGB555 | |
24 GLOBAL _ConvertX86p16_16BGR555 | |
25 GLOBAL _ConvertX86p16_8RGB332 | |
26 | |
27 EXTERN _ConvertX86 | |
28 EXTERN _x86return | |
29 | |
30 SECTION .text | |
31 | |
32 _ConvertX86p16_16BGR565: | |
33 | |
34 ; check short | |
35 cmp ecx,BYTE 16 | |
36 ja .L3 | |
37 | |
38 | |
39 .L1 ; short loop | |
40 mov al,[esi] | |
41 mov ah,[esi+1] | |
42 mov ebx,eax | |
43 mov edx,eax | |
44 shr eax,11 | |
45 and eax,BYTE 11111b | |
46 and ebx,11111100000b | |
47 shl edx,11 | |
48 add eax,ebx | |
49 add eax,edx | |
50 mov [edi],al | |
51 mov [edi+1],ah | |
52 add esi,BYTE 2 | |
53 add edi,BYTE 2 | |
54 dec ecx | |
55 jnz .L1 | |
56 .L2 | |
57 jmp _x86return | |
58 | |
59 .L3 ; head | |
60 mov eax,edi | |
61 and eax,BYTE 11b | |
62 jz .L4 | |
63 mov al,[esi] | |
64 mov ah,[esi+1] | |
65 mov ebx,eax | |
66 mov edx,eax | |
67 shr eax,11 | |
68 and eax,BYTE 11111b | |
69 and ebx,11111100000b | |
70 shl edx,11 | |
71 add eax,ebx | |
72 add eax,edx | |
73 mov [edi],al | |
74 mov [edi+1],ah | |
75 add esi,BYTE 2 | |
76 add edi,BYTE 2 | |
77 dec ecx | |
78 | |
79 .L4 ; save count | |
80 push ecx | |
81 | |
82 ; unroll twice | |
83 shr ecx,1 | |
84 | |
85 ; point arrays to end | |
86 lea esi,[esi+ecx*4] | |
87 lea edi,[edi+ecx*4] | |
88 | |
89 ; negative counter | |
90 neg ecx | |
91 jmp SHORT .L6 | |
92 | |
93 .L5 mov [edi+ecx*4-4],eax | |
94 .L6 mov eax,[esi+ecx*4] | |
95 | |
96 mov ebx,[esi+ecx*4] | |
97 and eax,07E007E0h | |
98 | |
99 mov edx,[esi+ecx*4] | |
100 and ebx,0F800F800h | |
101 | |
102 shr ebx,11 | |
103 and edx,001F001Fh | |
104 | |
105 shl edx,11 | |
106 add eax,ebx | |
107 | |
108 add eax,edx | |
109 inc ecx | |
110 | |
111 jnz .L5 | |
112 | |
113 mov [edi+ecx*4-4],eax | |
114 | |
115 ; tail | |
116 pop ecx | |
117 and ecx,BYTE 1 | |
118 jz .L7 | |
119 mov al,[esi] | |
120 mov ah,[esi+1] | |
121 mov ebx,eax | |
122 mov edx,eax | |
123 shr eax,11 | |
124 and eax,BYTE 11111b | |
125 and ebx,11111100000b | |
126 shl edx,11 | |
127 add eax,ebx | |
128 add eax,edx | |
129 mov [edi],al | |
130 mov [edi+1],ah | |
131 add esi,BYTE 2 | |
132 add edi,BYTE 2 | |
133 | |
134 .L7 | |
135 jmp _x86return | |
136 | |
137 | |
138 | |
139 | |
140 | |
141 | |
142 _ConvertX86p16_16RGB555: | |
143 | |
144 ; check short | |
145 cmp ecx,BYTE 32 | |
146 ja .L3 | |
147 | |
148 | |
149 .L1 ; short loop | |
150 mov al,[esi] | |
151 mov ah,[esi+1] | |
152 mov ebx,eax | |
153 shr ebx,1 | |
154 and ebx, 0111111111100000b | |
155 and eax,BYTE 0000000000011111b | |
156 add eax,ebx | |
157 mov [edi],al | |
158 mov [edi+1],ah | |
159 add esi,BYTE 2 | |
160 add edi,BYTE 2 | |
161 dec ecx | |
162 jnz .L1 | |
163 .L2 | |
164 jmp _x86return | |
165 | |
166 .L3 ; head | |
167 mov eax,edi | |
168 and eax,BYTE 11b | |
169 jz .L4 | |
170 mov al,[esi] | |
171 mov ah,[esi+1] | |
172 mov ebx,eax | |
173 shr ebx,1 | |
174 and ebx, 0111111111100000b | |
175 and eax,BYTE 0000000000011111b | |
176 add eax,ebx | |
177 mov [edi],al | |
178 mov [edi+1],ah | |
179 add esi,BYTE 2 | |
180 add edi,BYTE 2 | |
181 dec ecx | |
182 | |
183 .L4 ; save ebp | |
184 push ebp | |
185 | |
186 ; save count | |
187 push ecx | |
188 | |
189 ; unroll four times | |
190 shr ecx,2 | |
191 | |
192 ; point arrays to end | |
193 lea esi,[esi+ecx*8] | |
194 lea edi,[edi+ecx*8] | |
195 | |
196 ; negative counter | |
197 xor ebp,ebp | |
198 sub ebp,ecx | |
199 | |
200 .L5 mov eax,[esi+ebp*8] ; agi? | |
201 mov ecx,[esi+ebp*8+4] | |
202 | |
203 mov ebx,eax | |
204 mov edx,ecx | |
205 | |
206 and eax,0FFC0FFC0h | |
207 and ecx,0FFC0FFC0h | |
208 | |
209 shr eax,1 | |
210 and ebx,001F001Fh | |
211 | |
212 shr ecx,1 | |
213 and edx,001F001Fh | |
214 | |
215 add eax,ebx | |
216 add ecx,edx | |
217 | |
218 mov [edi+ebp*8],eax | |
219 mov [edi+ebp*8+4],ecx | |
220 | |
221 inc ebp | |
222 jnz .L5 | |
223 | |
224 ; tail | |
225 pop ecx | |
226 .L6 and ecx,BYTE 11b | |
227 jz .L7 | |
228 mov al,[esi] | |
229 mov ah,[esi+1] | |
230 mov ebx,eax | |
231 shr ebx,1 | |
232 and ebx, 0111111111100000b | |
233 and eax,BYTE 0000000000011111b | |
234 add eax,ebx | |
235 mov [edi],al | |
236 mov [edi+1],ah | |
237 add esi,BYTE 2 | |
238 add edi,BYTE 2 | |
239 dec ecx | |
240 jmp SHORT .L6 | |
241 | |
242 .L7 pop ebp | |
243 jmp _x86return | |
244 | |
245 | |
246 | |
247 | |
248 | |
249 | |
250 _ConvertX86p16_16BGR555: | |
251 | |
252 ; check short | |
253 cmp ecx,BYTE 16 | |
254 ja .L3 | |
255 | |
256 | |
257 .L1 ; short loop | |
258 mov al,[esi] | |
259 mov ah,[esi+1] | |
260 mov ebx,eax | |
261 mov edx,eax | |
262 shr eax,11 | |
263 and eax,BYTE 11111b | |
264 shr ebx,1 | |
265 and ebx,1111100000b | |
266 shl edx,10 | |
267 and edx,0111110000000000b | |
268 add eax,ebx | |
269 add eax,edx | |
270 mov [edi],al | |
271 mov [edi+1],ah | |
272 add esi,BYTE 2 | |
273 add edi,BYTE 2 | |
274 dec ecx | |
275 jnz .L1 | |
276 .L2 | |
277 jmp _x86return | |
278 | |
279 .L3 ; head | |
280 mov eax,edi | |
281 and eax,BYTE 11b | |
282 jz .L4 | |
283 mov al,[esi] | |
284 mov ah,[esi+1] | |
285 mov ebx,eax | |
286 mov edx,eax | |
287 shr eax,11 | |
288 and eax,BYTE 11111b | |
289 shr ebx,1 | |
290 and ebx,1111100000b | |
291 shl edx,10 | |
292 and edx,0111110000000000b | |
293 add eax,ebx | |
294 add eax,edx | |
295 mov [edi],al | |
296 mov [edi+1],ah | |
297 add esi,BYTE 2 | |
298 add edi,BYTE 2 | |
299 dec ecx | |
300 | |
301 .L4 ; save count | |
302 push ecx | |
303 | |
304 ; unroll twice | |
305 shr ecx,1 | |
306 | |
307 ; point arrays to end | |
308 lea esi,[esi+ecx*4] | |
309 lea edi,[edi+ecx*4] | |
310 | |
311 ; negative counter | |
312 neg ecx | |
313 jmp SHORT .L6 | |
314 | |
315 .L5 mov [edi+ecx*4-4],eax | |
316 .L6 mov eax,[esi+ecx*4] | |
317 | |
318 shr eax,1 | |
319 mov ebx,[esi+ecx*4] | |
320 | |
321 and eax,03E003E0h | |
322 mov edx,[esi+ecx*4] | |
323 | |
324 and ebx,0F800F800h | |
325 | |
326 shr ebx,11 | |
327 and edx,001F001Fh | |
328 | |
329 shl edx,10 | |
330 add eax,ebx | |
331 | |
332 add eax,edx | |
333 inc ecx | |
334 | |
335 jnz .L5 | |
336 | |
337 mov [edi+ecx*4-4],eax | |
338 | |
339 ; tail | |
340 pop ecx | |
341 and ecx,BYTE 1 | |
342 jz .L7 | |
343 mov al,[esi] | |
344 mov ah,[esi+1] | |
345 mov ebx,eax | |
346 mov edx,eax | |
347 shr eax,11 | |
348 and eax,BYTE 11111b | |
349 shr ebx,1 | |
350 and ebx,1111100000b | |
351 shl edx,10 | |
352 and edx,0111110000000000b | |
353 add eax,ebx | |
354 add eax,edx | |
355 mov [edi],al | |
356 mov [edi+1],ah | |
357 add esi,BYTE 2 | |
358 add edi,BYTE 2 | |
359 | |
360 .L7 | |
361 jmp _x86return | |
362 | |
363 | |
364 | |
365 | |
366 | |
367 | |
368 _ConvertX86p16_8RGB332: | |
369 | |
370 ; check short | |
371 cmp ecx,BYTE 16 | |
372 ja .L3 | |
373 | |
374 | |
375 .L1 ; short loop | |
376 mov al,[esi+0] | |
377 mov ah,[esi+1] | |
378 mov ebx,eax | |
379 mov edx,eax | |
380 and eax,BYTE 11000b ; blue | |
381 shr eax,3 | |
382 and ebx,11100000000b ; green | |
383 shr ebx,6 | |
384 and edx,1110000000000000b ; red | |
385 shr edx,8 | |
386 add eax,ebx | |
387 add eax,edx | |
388 mov [edi],al | |
389 add esi,BYTE 2 | |
390 inc edi | |
391 dec ecx | |
392 jnz .L1 | |
393 .L2 | |
394 jmp _x86return | |
395 | |
396 .L3 mov eax,edi | |
397 and eax,BYTE 11b | |
398 jz .L4 | |
399 mov al,[esi+0] | |
400 mov ah,[esi+1] | |
401 mov ebx,eax | |
402 mov edx,eax | |
403 and eax,BYTE 11000b ; blue | |
404 shr eax,3 | |
405 and ebx,11100000000b ; green | |
406 shr ebx,6 | |
407 and edx,1110000000000000b ; red | |
408 shr edx,8 | |
409 add eax,ebx | |
410 add eax,edx | |
411 mov [edi],al | |
412 add esi,BYTE 2 | |
413 inc edi | |
414 dec ecx | |
415 jmp SHORT .L3 | |
416 | |
417 .L4 ; save ebp | |
418 push ebp | |
419 | |
420 ; save count | |
421 push ecx | |
422 | |
423 ; unroll 4 times | |
424 shr ecx,2 | |
425 | |
426 ; prestep | |
427 mov dl,[esi+0] | |
428 mov bl,[esi+1] | |
429 mov dh,[esi+2] | |
430 | |
431 .L5 shl edx,16 | |
432 mov bh,[esi+3] | |
433 | |
434 shl ebx,16 | |
435 mov dl,[esi+4] | |
436 | |
437 mov dh,[esi+6] | |
438 mov bl,[esi+5] | |
439 | |
440 and edx,00011000000110000001100000011000b | |
441 mov bh,[esi+7] | |
442 | |
443 ror edx,16+3 | |
444 mov eax,ebx ; setup eax for reds | |
445 | |
446 and ebx,00000111000001110000011100000111b | |
447 and eax,11100000111000001110000011100000b ; reds | |
448 | |
449 ror ebx,16-2 | |
450 add esi,BYTE 8 | |
451 | |
452 ror eax,16 | |
453 add edi,BYTE 4 | |
454 | |
455 add eax,ebx | |
456 mov bl,[esi+1] ; greens | |
457 | |
458 add eax,edx | |
459 mov dl,[esi+0] ; blues | |
460 | |
461 mov [edi-4],eax | |
462 mov dh,[esi+2] | |
463 | |
464 dec ecx | |
465 jnz .L5 | |
466 | |
467 ; check tail | |
468 pop ecx | |
469 and ecx,BYTE 11b | |
470 jz .L7 | |
471 | |
472 .L6 ; tail | |
473 mov al,[esi+0] | |
474 mov ah,[esi+1] | |
475 mov ebx,eax | |
476 mov edx,eax | |
477 and eax,BYTE 11000b ; blue | |
478 shr eax,3 | |
479 and ebx,11100000000b ; green | |
480 shr ebx,6 | |
481 and edx,1110000000000000b ; red | |
482 shr edx,8 | |
483 add eax,ebx | |
484 add eax,edx | |
485 mov [edi],al | |
486 add esi,BYTE 2 | |
487 inc edi | |
488 dec ecx | |
489 jnz .L6 | |
490 | |
491 .L7 pop ebp | |
492 jmp _x86return | |
493 | |
1199
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
494 %ifidn __OUTPUT_FORMAT__,elf |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
495 section .note.GNU-stack noalloc noexec nowrite progbits |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
496 %endif |