Mercurial > sdl-ios-xcode
annotate src/hermes/x86p_32.asm @ 2079:1ed2155b7ee4
From: Torsten Giebl
Subject: ALLOCA Patch for SDL-1.2
Newsgroups: gmane.comp.lib.sdl
Date: 2006-10-24 00:31:16 GMT
Hello !
As alloca.h is not available on CYGWIN and MinGW32
it should not be checked there or it should be checked
with -mno-cygwin using.
I am a total configure.in newbie, but i found the way
that things are handled in configure.in pretty bad for the
case CYGWIN with MinGW Mode or not.
Maybe this is not possible but i would like to
have a way to detect at the start for example CYGWIN
and then add. ask if configure was called with --enable-cygwin
for example. --enable-cygwin should be optional and disabled
by default as we want to have MinGW Mode by default.
Only if the user wants it he should be able to use it.
Then the whole configure.in would get add. questions
okay the system is CYGWIN with or without MinGW Mode.
The alloca.h thing for example is only
available under CYGWIN ( without MinGW Mode ).
CU
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Fri, 01 Dec 2006 20:25:03 +0000 |
parents | eb4d9d99849b |
children | 180fa05e98e2 |
rev | line source |
---|---|
0 | 1 ; |
2 ; x86 format converters for HERMES | |
3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) | |
4 ; This source code is licensed under the GNU LGPL | |
5 ; | |
6 ; Please refer to the file COPYING.LIB contained in the distribution for | |
7 ; licensing conditions | |
8 ; | |
9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission | |
10 ; | |
11 | |
12 BITS 32 | |
13 | |
1873
eb4d9d99849b
Renamed, per Mike's comment on bug #157
Sam Lantinga <slouken@libsdl.org>
parents:
1871
diff
changeset
|
14 %include "common.inc" |
1871 | 15 |
16 SDL_FUNC _ConvertX86p32_32BGR888 | |
17 SDL_FUNC _ConvertX86p32_32RGBA888 | |
18 SDL_FUNC _ConvertX86p32_32BGRA888 | |
19 SDL_FUNC _ConvertX86p32_24RGB888 | |
20 SDL_FUNC _ConvertX86p32_24BGR888 | |
21 SDL_FUNC _ConvertX86p32_16RGB565 | |
22 SDL_FUNC _ConvertX86p32_16BGR565 | |
23 SDL_FUNC _ConvertX86p32_16RGB555 | |
24 SDL_FUNC _ConvertX86p32_16BGR555 | |
25 SDL_FUNC _ConvertX86p32_8RGB332 | |
0 | 26 |
27 EXTERN _x86return | |
1166
da33b7e6d181
Date: Tue, 1 Nov 2005 20:25:10 +0100
Sam Lantinga <slouken@libsdl.org>
parents:
0
diff
changeset
|
28 |
0 | 29 SECTION .text |
30 | |
31 ;; _Convert_* | |
32 ;; Paramters: | |
33 ;; ESI = source | |
34 ;; EDI = dest | |
35 ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) | |
36 ;; Destroys: | |
37 ;; EAX, EBX, EDX | |
38 | |
39 | |
40 _ConvertX86p32_32BGR888: | |
41 | |
42 ; check short | |
43 cmp ecx,BYTE 32 | |
44 ja .L3 | |
45 | |
46 .L1 ; short loop | |
47 mov edx,[esi] | |
48 bswap edx | |
49 ror edx,8 | |
50 mov [edi],edx | |
51 add esi,BYTE 4 | |
52 add edi,BYTE 4 | |
53 dec ecx | |
54 jnz .L1 | |
55 .L2 | |
56 jmp _x86return | |
57 | |
58 .L3 ; save ebp | |
59 push ebp | |
60 | |
61 ; unroll four times | |
62 mov ebp,ecx | |
63 shr ebp,2 | |
64 | |
65 ; save count | |
66 push ecx | |
67 | |
68 .L4 mov eax,[esi] | |
69 mov ebx,[esi+4] | |
70 | |
71 bswap eax | |
72 | |
73 bswap ebx | |
74 | |
75 ror eax,8 | |
76 mov ecx,[esi+8] | |
77 | |
78 ror ebx,8 | |
79 mov edx,[esi+12] | |
80 | |
81 bswap ecx | |
82 | |
83 bswap edx | |
84 | |
85 ror ecx,8 | |
86 mov [edi+0],eax | |
87 | |
88 ror edx,8 | |
89 mov [edi+4],ebx | |
90 | |
91 mov [edi+8],ecx | |
92 mov [edi+12],edx | |
93 | |
94 add esi,BYTE 16 | |
95 add edi,BYTE 16 | |
96 | |
97 dec ebp | |
98 jnz .L4 | |
99 | |
100 ; check tail | |
101 pop ecx | |
102 and ecx,BYTE 11b | |
103 jz .L6 | |
104 | |
105 .L5 ; tail loop | |
106 mov edx,[esi] | |
107 bswap edx | |
108 ror edx,8 | |
109 mov [edi],edx | |
110 add esi,BYTE 4 | |
111 add edi,BYTE 4 | |
112 dec ecx | |
113 jnz .L5 | |
114 | |
115 .L6 pop ebp | |
116 jmp _x86return | |
117 | |
118 | |
119 | |
120 | |
121 _ConvertX86p32_32RGBA888: | |
122 | |
123 ; check short | |
124 cmp ecx,BYTE 32 | |
125 ja .L3 | |
126 | |
127 .L1 ; short loop | |
128 mov edx,[esi] | |
129 rol edx,8 | |
130 mov [edi],edx | |
131 add esi,BYTE 4 | |
132 add edi,BYTE 4 | |
133 dec ecx | |
134 jnz .L1 | |
135 .L2 | |
136 jmp _x86return | |
137 | |
138 .L3 ; save ebp | |
139 push ebp | |
140 | |
141 ; unroll four times | |
142 mov ebp,ecx | |
143 shr ebp,2 | |
144 | |
145 ; save count | |
146 push ecx | |
147 | |
148 .L4 mov eax,[esi] | |
149 mov ebx,[esi+4] | |
150 | |
151 rol eax,8 | |
152 mov ecx,[esi+8] | |
153 | |
154 rol ebx,8 | |
155 mov edx,[esi+12] | |
156 | |
157 rol ecx,8 | |
158 mov [edi+0],eax | |
159 | |
160 rol edx,8 | |
161 mov [edi+4],ebx | |
162 | |
163 mov [edi+8],ecx | |
164 mov [edi+12],edx | |
165 | |
166 add esi,BYTE 16 | |
167 add edi,BYTE 16 | |
168 | |
169 dec ebp | |
170 jnz .L4 | |
171 | |
172 ; check tail | |
173 pop ecx | |
174 and ecx,BYTE 11b | |
175 jz .L6 | |
176 | |
177 .L5 ; tail loop | |
178 mov edx,[esi] | |
179 rol edx,8 | |
180 mov [edi],edx | |
181 add esi,BYTE 4 | |
182 add edi,BYTE 4 | |
183 dec ecx | |
184 jnz .L5 | |
185 | |
186 .L6 pop ebp | |
187 jmp _x86return | |
188 | |
189 | |
190 | |
191 | |
192 _ConvertX86p32_32BGRA888: | |
193 | |
194 ; check short | |
195 cmp ecx,BYTE 32 | |
196 ja .L3 | |
197 | |
198 .L1 ; short loop | |
199 mov edx,[esi] | |
200 bswap edx | |
201 mov [edi],edx | |
202 add esi,BYTE 4 | |
203 add edi,BYTE 4 | |
204 dec ecx | |
205 jnz .L1 | |
206 .L2 | |
207 jmp _x86return | |
208 | |
209 .L3 ; save ebp | |
210 push ebp | |
211 | |
212 ; unroll four times | |
213 mov ebp,ecx | |
214 shr ebp,2 | |
215 | |
216 ; save count | |
217 push ecx | |
218 | |
219 .L4 mov eax,[esi] | |
220 mov ebx,[esi+4] | |
221 | |
222 mov ecx,[esi+8] | |
223 mov edx,[esi+12] | |
224 | |
225 bswap eax | |
226 | |
227 bswap ebx | |
228 | |
229 bswap ecx | |
230 | |
231 bswap edx | |
232 | |
233 mov [edi+0],eax | |
234 mov [edi+4],ebx | |
235 | |
236 mov [edi+8],ecx | |
237 mov [edi+12],edx | |
238 | |
239 add esi,BYTE 16 | |
240 add edi,BYTE 16 | |
241 | |
242 dec ebp | |
243 jnz .L4 | |
244 | |
245 ; check tail | |
246 pop ecx | |
247 and ecx,BYTE 11b | |
248 jz .L6 | |
249 | |
250 .L5 ; tail loop | |
251 mov edx,[esi] | |
252 bswap edx | |
253 mov [edi],edx | |
254 add esi,BYTE 4 | |
255 add edi,BYTE 4 | |
256 dec ecx | |
257 jnz .L5 | |
258 | |
259 .L6 pop ebp | |
260 jmp _x86return | |
261 | |
262 | |
263 | |
264 | |
265 ;; 32 bit RGB 888 to 24 BIT RGB 888 | |
266 | |
267 _ConvertX86p32_24RGB888: | |
268 | |
269 ; check short | |
270 cmp ecx,BYTE 32 | |
271 ja .L3 | |
272 | |
273 .L1 ; short loop | |
274 mov al,[esi] | |
275 mov bl,[esi+1] | |
276 mov dl,[esi+2] | |
277 mov [edi],al | |
278 mov [edi+1],bl | |
279 mov [edi+2],dl | |
280 add esi,BYTE 4 | |
281 add edi,BYTE 3 | |
282 dec ecx | |
283 jnz .L1 | |
284 .L2 | |
285 jmp _x86return | |
286 | |
287 .L3 ; head | |
288 mov edx,edi | |
289 and edx,BYTE 11b | |
290 jz .L4 | |
291 mov al,[esi] | |
292 mov bl,[esi+1] | |
293 mov dl,[esi+2] | |
294 mov [edi],al | |
295 mov [edi+1],bl | |
296 mov [edi+2],dl | |
297 add esi,BYTE 4 | |
298 add edi,BYTE 3 | |
299 dec ecx | |
300 jmp SHORT .L3 | |
301 | |
302 .L4 ; unroll 4 times | |
303 push ebp | |
304 mov ebp,ecx | |
305 shr ebp,2 | |
306 | |
307 ; save count | |
308 push ecx | |
309 | |
310 .L5 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
311 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
312 | |
313 shl eax,8 ; eax = [R][G][B][.] | |
314 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
315 | |
316 shl ebx,8 ; ebx = [r][g][b][.] | |
317 mov al,[esi+4] ; eax = [R][G][B][b] | |
318 | |
319 ror eax,8 ; eax = [b][R][G][B] (done) | |
320 mov bh,[esi+8+1] ; ebx = [r][g][G][.] | |
321 | |
322 mov [edi],eax | |
323 add edi,BYTE 3*4 | |
324 | |
325 shl ecx,8 ; ecx = [r][g][b][.] | |
326 mov bl,[esi+8+0] ; ebx = [r][g][G][B] | |
327 | |
328 rol ebx,16 ; ebx = [G][B][r][g] (done) | |
329 mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) | |
330 | |
331 mov [edi+4-3*4],ebx | |
332 add esi,BYTE 4*4 | |
333 | |
334 mov [edi+8-3*4],ecx | |
335 dec ebp | |
336 | |
337 jnz .L5 | |
338 | |
339 ; check tail | |
340 pop ecx | |
341 and ecx,BYTE 11b | |
342 jz .L7 | |
343 | |
344 .L6 ; tail loop | |
345 mov al,[esi] | |
346 mov bl,[esi+1] | |
347 mov dl,[esi+2] | |
348 mov [edi],al | |
349 mov [edi+1],bl | |
350 mov [edi+2],dl | |
351 add esi,BYTE 4 | |
352 add edi,BYTE 3 | |
353 dec ecx | |
354 jnz .L6 | |
355 | |
356 .L7 pop ebp | |
357 jmp _x86return | |
358 | |
359 | |
360 | |
361 | |
362 ;; 32 bit RGB 888 to 24 bit BGR 888 | |
363 | |
364 _ConvertX86p32_24BGR888: | |
365 | |
366 ; check short | |
367 cmp ecx,BYTE 32 | |
368 ja .L3 | |
369 | |
370 | |
371 .L1 ; short loop | |
372 mov dl,[esi] | |
373 mov bl,[esi+1] | |
374 mov al,[esi+2] | |
375 mov [edi],al | |
376 mov [edi+1],bl | |
377 mov [edi+2],dl | |
378 add esi,BYTE 4 | |
379 add edi,BYTE 3 | |
380 dec ecx | |
381 jnz .L1 | |
382 .L2 | |
383 jmp _x86return | |
384 | |
385 .L3 ; head | |
386 mov edx,edi | |
387 and edx,BYTE 11b | |
388 jz .L4 | |
389 mov dl,[esi] | |
390 mov bl,[esi+1] | |
391 mov al,[esi+2] | |
392 mov [edi],al | |
393 mov [edi+1],bl | |
394 mov [edi+2],dl | |
395 add esi,BYTE 4 | |
396 add edi,BYTE 3 | |
397 dec ecx | |
398 jmp SHORT .L3 | |
399 | |
400 .L4 ; unroll 4 times | |
401 push ebp | |
402 mov ebp,ecx | |
403 shr ebp,2 | |
404 | |
405 ; save count | |
406 push ecx | |
407 | |
408 .L5 | |
409 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
410 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
411 | |
412 bswap eax ; eax = [B][G][R][A] | |
413 | |
414 bswap ebx ; ebx = [b][g][r][a] | |
415 | |
416 mov al,[esi+4+2] ; eax = [B][G][R][r] | |
417 mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] | |
418 | |
419 ror eax,8 ; eax = [r][B][G][R] (done) | |
420 mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] | |
421 | |
422 ror ebx,16 ; ebx = [G][R][b][g] (done) | |
423 mov [edi],eax | |
424 | |
425 mov [edi+4],ebx | |
426 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
427 | |
428 bswap ecx ; ecx = [b][g][r][a] | |
429 | |
430 mov cl,[esi+8] ; ecx = [b][g][r][B] (done) | |
431 add esi,BYTE 4*4 | |
432 | |
433 mov [edi+8],ecx | |
434 add edi,BYTE 3*4 | |
435 | |
436 dec ebp | |
437 jnz .L5 | |
438 | |
439 ; check tail | |
440 pop ecx | |
441 and ecx,BYTE 11b | |
442 jz .L7 | |
443 | |
444 .L6 ; tail loop | |
445 mov dl,[esi] | |
446 mov bl,[esi+1] | |
447 mov al,[esi+2] | |
448 mov [edi],al | |
449 mov [edi+1],bl | |
450 mov [edi+2],dl | |
451 add esi,BYTE 4 | |
452 add edi,BYTE 3 | |
453 dec ecx | |
454 jnz .L6 | |
455 | |
456 .L7 | |
457 pop ebp | |
458 jmp _x86return | |
459 | |
460 | |
461 | |
462 | |
463 ;; 32 bit RGB 888 to 16 BIT RGB 565 | |
464 | |
465 _ConvertX86p32_16RGB565: | |
466 ; check short | |
467 cmp ecx,BYTE 16 | |
468 ja .L3 | |
469 | |
470 .L1 ; short loop | |
471 mov bl,[esi+0] ; blue | |
472 mov al,[esi+1] ; green | |
473 mov ah,[esi+2] ; red | |
474 shr ah,3 | |
475 and al,11111100b | |
476 shl eax,3 | |
477 shr bl,3 | |
478 add al,bl | |
479 mov [edi+0],al | |
480 mov [edi+1],ah | |
481 add esi,BYTE 4 | |
482 add edi,BYTE 2 | |
483 dec ecx | |
484 jnz .L1 | |
485 | |
486 .L2: ; End of short loop | |
487 jmp _x86return | |
488 | |
489 | |
490 .L3 ; head | |
491 mov ebx,edi | |
492 and ebx,BYTE 11b | |
493 jz .L4 | |
494 | |
495 mov bl,[esi+0] ; blue | |
496 mov al,[esi+1] ; green | |
497 mov ah,[esi+2] ; red | |
498 shr ah,3 | |
499 and al,11111100b | |
500 shl eax,3 | |
501 shr bl,3 | |
502 add al,bl | |
503 mov [edi+0],al | |
504 mov [edi+1],ah | |
505 add esi,BYTE 4 | |
506 add edi,BYTE 2 | |
507 dec ecx | |
508 | |
509 .L4: | |
510 ; save count | |
511 push ecx | |
512 | |
513 ; unroll twice | |
514 shr ecx,1 | |
515 | |
516 ; point arrays to end | |
517 lea esi,[esi+ecx*8] | |
518 lea edi,[edi+ecx*4] | |
519 | |
520 ; negative counter | |
521 neg ecx | |
522 jmp SHORT .L6 | |
523 | |
524 .L5: | |
525 mov [edi+ecx*4-4],eax | |
526 .L6: | |
527 mov eax,[esi+ecx*8] | |
528 | |
529 shr ah,2 | |
530 mov ebx,[esi+ecx*8+4] | |
531 | |
532 shr eax,3 | |
533 mov edx,[esi+ecx*8+4] | |
534 | |
535 shr bh,2 | |
536 mov dl,[esi+ecx*8+2] | |
537 | |
538 shl ebx,13 | |
539 and eax,000007FFh | |
540 | |
541 shl edx,8 | |
542 and ebx,07FF0000h | |
543 | |
544 and edx,0F800F800h | |
545 add eax,ebx | |
546 | |
547 add eax,edx | |
548 inc ecx | |
549 | |
550 jnz .L5 | |
551 | |
552 mov [edi+ecx*4-4],eax | |
553 | |
554 ; tail | |
555 pop ecx | |
556 test cl,1 | |
557 jz .L7 | |
558 | |
559 mov bl,[esi+0] ; blue | |
560 mov al,[esi+1] ; green | |
561 mov ah,[esi+2] ; red | |
562 shr ah,3 | |
563 and al,11111100b | |
564 shl eax,3 | |
565 shr bl,3 | |
566 add al,bl | |
567 mov [edi+0],al | |
568 mov [edi+1],ah | |
569 add esi,BYTE 4 | |
570 add edi,BYTE 2 | |
571 | |
572 .L7: | |
573 jmp _x86return | |
574 | |
575 | |
576 | |
577 | |
578 ;; 32 bit RGB 888 to 16 BIT BGR 565 | |
579 | |
580 _ConvertX86p32_16BGR565: | |
581 | |
582 ; check short | |
583 cmp ecx,BYTE 16 | |
584 ja .L3 | |
585 | |
586 .L1 ; short loop | |
587 mov ah,[esi+0] ; blue | |
588 mov al,[esi+1] ; green | |
589 mov bl,[esi+2] ; red | |
590 shr ah,3 | |
591 and al,11111100b | |
592 shl eax,3 | |
593 shr bl,3 | |
594 add al,bl | |
595 mov [edi+0],al | |
596 mov [edi+1],ah | |
597 add esi,BYTE 4 | |
598 add edi,BYTE 2 | |
599 dec ecx | |
600 jnz .L1 | |
601 .L2 | |
602 jmp _x86return | |
603 | |
604 .L3 ; head | |
605 mov ebx,edi | |
606 and ebx,BYTE 11b | |
607 jz .L4 | |
608 mov ah,[esi+0] ; blue | |
609 mov al,[esi+1] ; green | |
610 mov bl,[esi+2] ; red | |
611 shr ah,3 | |
612 and al,11111100b | |
613 shl eax,3 | |
614 shr bl,3 | |
615 add al,bl | |
616 mov [edi+0],al | |
617 mov [edi+1],ah | |
618 add esi,BYTE 4 | |
619 add edi,BYTE 2 | |
620 dec ecx | |
621 | |
622 .L4 ; save count | |
623 push ecx | |
624 | |
625 ; unroll twice | |
626 shr ecx,1 | |
627 | |
628 ; point arrays to end | |
629 lea esi,[esi+ecx*8] | |
630 lea edi,[edi+ecx*4] | |
631 | |
632 ; negative count | |
633 neg ecx | |
634 jmp SHORT .L6 | |
635 | |
636 .L5 | |
637 mov [edi+ecx*4-4],eax | |
638 .L6 | |
639 mov edx,[esi+ecx*8+4] | |
640 | |
641 mov bh,[esi+ecx*8+4] | |
642 mov ah,[esi+ecx*8] | |
643 | |
644 shr bh,3 | |
645 mov al,[esi+ecx*8+1] | |
646 | |
647 shr ah,3 | |
648 mov bl,[esi+ecx*8+5] | |
649 | |
650 shl eax,3 | |
651 mov dl,[esi+ecx*8+2] | |
652 | |
653 shl ebx,19 | |
654 and eax,0000FFE0h | |
655 | |
656 shr edx,3 | |
657 and ebx,0FFE00000h | |
658 | |
659 and edx,001F001Fh | |
660 add eax,ebx | |
661 | |
662 add eax,edx | |
663 inc ecx | |
664 | |
665 jnz .L5 | |
666 | |
667 mov [edi+ecx*4-4],eax | |
668 | |
669 ; tail | |
670 pop ecx | |
671 and ecx,BYTE 1 | |
672 jz .L7 | |
673 mov ah,[esi+0] ; blue | |
674 mov al,[esi+1] ; green | |
675 mov bl,[esi+2] ; red | |
676 shr ah,3 | |
677 and al,11111100b | |
678 shl eax,3 | |
679 shr bl,3 | |
680 add al,bl | |
681 mov [edi+0],al | |
682 mov [edi+1],ah | |
683 add esi,BYTE 4 | |
684 add edi,BYTE 2 | |
685 | |
686 .L7 | |
687 jmp _x86return | |
688 | |
689 | |
690 | |
691 | |
692 ;; 32 BIT RGB TO 16 BIT RGB 555 | |
693 | |
694 _ConvertX86p32_16RGB555: | |
695 | |
696 ; check short | |
697 cmp ecx,BYTE 16 | |
698 ja .L3 | |
699 | |
700 .L1 ; short loop | |
701 mov bl,[esi+0] ; blue | |
702 mov al,[esi+1] ; green | |
703 mov ah,[esi+2] ; red | |
704 shr ah,3 | |
705 and al,11111000b | |
706 shl eax,2 | |
707 shr bl,3 | |
708 add al,bl | |
709 mov [edi+0],al | |
710 mov [edi+1],ah | |
711 add esi,BYTE 4 | |
712 add edi,BYTE 2 | |
713 dec ecx | |
714 jnz .L1 | |
715 .L2 | |
716 jmp _x86return | |
717 | |
718 .L3 ; head | |
719 mov ebx,edi | |
720 and ebx,BYTE 11b | |
721 jz .L4 | |
722 mov bl,[esi+0] ; blue | |
723 mov al,[esi+1] ; green | |
724 mov ah,[esi+2] ; red | |
725 shr ah,3 | |
726 and al,11111000b | |
727 shl eax,2 | |
728 shr bl,3 | |
729 add al,bl | |
730 mov [edi+0],al | |
731 mov [edi+1],ah | |
732 add esi,BYTE 4 | |
733 add edi,BYTE 2 | |
734 dec ecx | |
735 | |
736 .L4 ; save count | |
737 push ecx | |
738 | |
739 ; unroll twice | |
740 shr ecx,1 | |
741 | |
742 ; point arrays to end | |
743 lea esi,[esi+ecx*8] | |
744 lea edi,[edi+ecx*4] | |
745 | |
746 ; negative counter | |
747 neg ecx | |
748 jmp SHORT .L6 | |
749 | |
750 .L5 | |
751 mov [edi+ecx*4-4],eax | |
752 .L6 | |
753 mov eax,[esi+ecx*8] | |
754 | |
755 shr ah,3 | |
756 mov ebx,[esi+ecx*8+4] | |
757 | |
758 shr eax,3 | |
759 mov edx,[esi+ecx*8+4] | |
760 | |
761 shr bh,3 | |
762 mov dl,[esi+ecx*8+2] | |
763 | |
764 shl ebx,13 | |
765 and eax,000007FFh | |
766 | |
767 shl edx,7 | |
768 and ebx,07FF0000h | |
769 | |
770 and edx,07C007C00h | |
771 add eax,ebx | |
772 | |
773 add eax,edx | |
774 inc ecx | |
775 | |
776 jnz .L5 | |
777 | |
778 mov [edi+ecx*4-4],eax | |
779 | |
780 ; tail | |
781 pop ecx | |
782 and ecx,BYTE 1 | |
783 jz .L7 | |
784 mov bl,[esi+0] ; blue | |
785 mov al,[esi+1] ; green | |
786 mov ah,[esi+2] ; red | |
787 shr ah,3 | |
788 and al,11111000b | |
789 shl eax,2 | |
790 shr bl,3 | |
791 add al,bl | |
792 mov [edi+0],al | |
793 mov [edi+1],ah | |
794 add esi,BYTE 4 | |
795 add edi,BYTE 2 | |
796 | |
797 .L7 | |
798 jmp _x86return | |
799 | |
800 | |
801 | |
802 | |
803 ;; 32 BIT RGB TO 16 BIT BGR 555 | |
804 | |
805 _ConvertX86p32_16BGR555: | |
806 | |
807 ; check short | |
808 cmp ecx,BYTE 16 | |
809 ja .L3 | |
810 | |
811 | |
812 .L1 ; short loop | |
813 mov ah,[esi+0] ; blue | |
814 mov al,[esi+1] ; green | |
815 mov bl,[esi+2] ; red | |
816 shr ah,3 | |
817 and al,11111000b | |
818 shl eax,2 | |
819 shr bl,3 | |
820 add al,bl | |
821 mov [edi+0],al | |
822 mov [edi+1],ah | |
823 add esi,BYTE 4 | |
824 add edi,BYTE 2 | |
825 dec ecx | |
826 jnz .L1 | |
827 .L2 | |
828 jmp _x86return | |
829 | |
830 .L3 ; head | |
831 mov ebx,edi | |
832 and ebx,BYTE 11b | |
833 jz .L4 | |
834 mov ah,[esi+0] ; blue | |
835 mov al,[esi+1] ; green | |
836 mov bl,[esi+2] ; red | |
837 shr ah,3 | |
838 and al,11111000b | |
839 shl eax,2 | |
840 shr bl,3 | |
841 add al,bl | |
842 mov [edi+0],al | |
843 mov [edi+1],ah | |
844 add esi,BYTE 4 | |
845 add edi,BYTE 2 | |
846 dec ecx | |
847 | |
848 .L4 ; save count | |
849 push ecx | |
850 | |
851 ; unroll twice | |
852 shr ecx,1 | |
853 | |
854 ; point arrays to end | |
855 lea esi,[esi+ecx*8] | |
856 lea edi,[edi+ecx*4] | |
857 | |
858 ; negative counter | |
859 neg ecx | |
860 jmp SHORT .L6 | |
861 | |
862 .L5 | |
863 mov [edi+ecx*4-4],eax | |
864 .L6 | |
865 mov edx,[esi+ecx*8+4] | |
866 | |
867 mov bh,[esi+ecx*8+4] | |
868 mov ah,[esi+ecx*8] | |
869 | |
870 shr bh,3 | |
871 mov al,[esi+ecx*8+1] | |
872 | |
873 shr ah,3 | |
874 mov bl,[esi+ecx*8+5] | |
875 | |
876 shl eax,2 | |
877 mov dl,[esi+ecx*8+2] | |
878 | |
879 shl ebx,18 | |
880 and eax,00007FE0h | |
881 | |
882 shr edx,3 | |
883 and ebx,07FE00000h | |
884 | |
885 and edx,001F001Fh | |
886 add eax,ebx | |
887 | |
888 add eax,edx | |
889 inc ecx | |
890 | |
891 jnz .L5 | |
892 | |
893 mov [edi+ecx*4-4],eax | |
894 | |
895 ; tail | |
896 pop ecx | |
897 and ecx,BYTE 1 | |
898 jz .L7 | |
899 mov ah,[esi+0] ; blue | |
900 mov al,[esi+1] ; green | |
901 mov bl,[esi+2] ; red | |
902 shr ah,3 | |
903 and al,11111000b | |
904 shl eax,2 | |
905 shr bl,3 | |
906 add al,bl | |
907 mov [edi+0],al | |
908 mov [edi+1],ah | |
909 add esi,BYTE 4 | |
910 add edi,BYTE 2 | |
911 | |
912 .L7 | |
913 jmp _x86return | |
914 | |
915 | |
916 | |
917 | |
918 | |
919 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) | |
920 ;; This routine writes FOUR pixels at once (dword) and then, if they exist | |
921 ;; the trailing three pixels | |
922 _ConvertX86p32_8RGB332: | |
923 | |
924 | |
925 .L_ALIGNED | |
926 push ecx | |
927 | |
928 shr ecx,2 ; We will draw 4 pixels at once | |
929 jnz .L1 | |
930 | |
931 jmp .L2 ; short jump out of range :( | |
932 | |
933 .L1: | |
934 mov eax,[esi] ; first pair of pixels | |
935 mov edx,[esi+4] | |
936 | |
937 shr dl,6 | |
938 mov ebx,eax | |
939 | |
940 shr al,6 | |
941 and ah,0e0h | |
942 | |
943 shr ebx,16 | |
944 and dh,0e0h | |
945 | |
946 shr ah,3 | |
947 and bl,0e0h | |
948 | |
949 shr dh,3 | |
950 | |
951 or al,bl | |
952 | |
953 mov ebx,edx | |
954 or al,ah | |
955 | |
956 shr ebx,16 | |
957 or dl,dh | |
958 | |
959 and bl,0e0h | |
960 | |
961 or dl,bl | |
962 | |
963 mov ah,dl | |
964 | |
965 | |
966 | |
967 mov ebx,[esi+8] ; second pair of pixels | |
968 | |
969 mov edx,ebx | |
970 and bh,0e0h | |
971 | |
972 shr bl,6 | |
973 and edx,0e00000h | |
974 | |
975 shr edx,16 | |
976 | |
977 shr bh,3 | |
978 | |
979 ror eax,16 | |
980 or bl,dl | |
981 | |
982 mov edx,[esi+12] | |
983 or bl,bh | |
984 | |
985 mov al,bl | |
986 | |
987 mov ebx,edx | |
988 and dh,0e0h | |
989 | |
990 shr dl,6 | |
991 and ebx,0e00000h | |
992 | |
993 shr dh,3 | |
994 mov ah,dl | |
995 | |
996 shr ebx,16 | |
997 or ah,dh | |
998 | |
999 or ah,bl | |
1000 | |
1001 rol eax,16 | |
1002 add esi,BYTE 16 | |
1003 | |
1004 mov [edi],eax | |
1005 add edi,BYTE 4 | |
1006 | |
1007 dec ecx | |
1008 jz .L2 ; L1 out of range for short jump :( | |
1009 | |
1010 jmp .L1 | |
1011 .L2: | |
1012 | |
1013 pop ecx | |
1014 and ecx,BYTE 3 ; mask out number of pixels to draw | |
1015 | |
1016 jz .L4 ; Nothing to do anymore | |
1017 | |
1018 .L3: | |
1019 mov eax,[esi] ; single pixel conversion for trailing pixels | |
1020 | |
1021 mov ebx,eax | |
1022 | |
1023 shr al,6 | |
1024 and ah,0e0h | |
1025 | |
1026 shr ebx,16 | |
1027 | |
1028 shr ah,3 | |
1029 and bl,0e0h | |
1030 | |
1031 or al,ah | |
1032 or al,bl | |
1033 | |
1034 mov [edi],al | |
1035 | |
1036 inc edi | |
1037 add esi,BYTE 4 | |
1038 | |
1039 dec ecx | |
1040 jnz .L3 | |
1041 | |
1042 .L4: | |
1043 jmp _x86return | |
1199
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1044 |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1045 %ifidn __OUTPUT_FORMAT__,elf |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1046 section .note.GNU-stack noalloc noexec nowrite progbits |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1047 %endif |