Mercurial > sdl-ios-xcode
annotate src/hermes/x86p_32.asm @ 1166:da33b7e6d181
Date: Tue, 1 Nov 2005 20:25:10 +0100
From: Dirk Mueller
Subject: [PATCH] build SDL with nonexecutable stack
libSDL is by default marked with an executable stack, which it doesn't
actually need. the reason for this is that there are assembler files in the
source tree not properly annotated with the "noexec stack" section. As such
the linker does a safe-fallback and marks the whole lib as "requires
executable stack".
the patch below removes this by adding annotations. As far as I can see it
shouldn't break anything.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Tue, 01 Nov 2005 23:19:59 +0000 |
parents | 74212992fb08 |
children | 2d6dc7de1145 |
rev | line source |
---|---|
0 | 1 ; |
2 ; x86 format converters for HERMES | |
3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) | |
4 ; This source code is licensed under the GNU LGPL | |
5 ; | |
6 ; Please refer to the file COPYING.LIB contained in the distribution for | |
7 ; licensing conditions | |
8 ; | |
9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission | |
10 ; | |
11 | |
12 | |
13 BITS 32 | |
14 | |
15 GLOBAL _ConvertX86p32_32BGR888 | |
16 GLOBAL _ConvertX86p32_32RGBA888 | |
17 GLOBAL _ConvertX86p32_32BGRA888 | |
18 GLOBAL _ConvertX86p32_24RGB888 | |
19 GLOBAL _ConvertX86p32_24BGR888 | |
20 GLOBAL _ConvertX86p32_16RGB565 | |
21 GLOBAL _ConvertX86p32_16BGR565 | |
22 GLOBAL _ConvertX86p32_16RGB555 | |
23 GLOBAL _ConvertX86p32_16BGR555 | |
24 GLOBAL _ConvertX86p32_8RGB332 | |
25 | |
26 EXTERN _x86return | |
1166
da33b7e6d181
Date: Tue, 1 Nov 2005 20:25:10 +0100
Sam Lantinga <slouken@libsdl.org>
parents:
0
diff
changeset
|
27 |
da33b7e6d181
Date: Tue, 1 Nov 2005 20:25:10 +0100
Sam Lantinga <slouken@libsdl.org>
parents:
0
diff
changeset
|
28 SECTION .note.GNU-stack noalloc progbits noexec nowrite |
0 | 29 SECTION .text |
30 | |
31 | |
32 ;; _Convert_* | |
33 ;; Paramters: | |
34 ;; ESI = source | |
35 ;; EDI = dest | |
36 ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) | |
37 ;; Destroys: | |
38 ;; EAX, EBX, EDX | |
39 | |
40 | |
41 _ConvertX86p32_32BGR888: | |
42 | |
43 ; check short | |
44 cmp ecx,BYTE 32 | |
45 ja .L3 | |
46 | |
47 .L1 ; short loop | |
48 mov edx,[esi] | |
49 bswap edx | |
50 ror edx,8 | |
51 mov [edi],edx | |
52 add esi,BYTE 4 | |
53 add edi,BYTE 4 | |
54 dec ecx | |
55 jnz .L1 | |
56 .L2 | |
57 jmp _x86return | |
58 | |
59 .L3 ; save ebp | |
60 push ebp | |
61 | |
62 ; unroll four times | |
63 mov ebp,ecx | |
64 shr ebp,2 | |
65 | |
66 ; save count | |
67 push ecx | |
68 | |
69 .L4 mov eax,[esi] | |
70 mov ebx,[esi+4] | |
71 | |
72 bswap eax | |
73 | |
74 bswap ebx | |
75 | |
76 ror eax,8 | |
77 mov ecx,[esi+8] | |
78 | |
79 ror ebx,8 | |
80 mov edx,[esi+12] | |
81 | |
82 bswap ecx | |
83 | |
84 bswap edx | |
85 | |
86 ror ecx,8 | |
87 mov [edi+0],eax | |
88 | |
89 ror edx,8 | |
90 mov [edi+4],ebx | |
91 | |
92 mov [edi+8],ecx | |
93 mov [edi+12],edx | |
94 | |
95 add esi,BYTE 16 | |
96 add edi,BYTE 16 | |
97 | |
98 dec ebp | |
99 jnz .L4 | |
100 | |
101 ; check tail | |
102 pop ecx | |
103 and ecx,BYTE 11b | |
104 jz .L6 | |
105 | |
106 .L5 ; tail loop | |
107 mov edx,[esi] | |
108 bswap edx | |
109 ror edx,8 | |
110 mov [edi],edx | |
111 add esi,BYTE 4 | |
112 add edi,BYTE 4 | |
113 dec ecx | |
114 jnz .L5 | |
115 | |
116 .L6 pop ebp | |
117 jmp _x86return | |
118 | |
119 | |
120 | |
121 | |
122 _ConvertX86p32_32RGBA888: | |
123 | |
124 ; check short | |
125 cmp ecx,BYTE 32 | |
126 ja .L3 | |
127 | |
128 .L1 ; short loop | |
129 mov edx,[esi] | |
130 rol edx,8 | |
131 mov [edi],edx | |
132 add esi,BYTE 4 | |
133 add edi,BYTE 4 | |
134 dec ecx | |
135 jnz .L1 | |
136 .L2 | |
137 jmp _x86return | |
138 | |
139 .L3 ; save ebp | |
140 push ebp | |
141 | |
142 ; unroll four times | |
143 mov ebp,ecx | |
144 shr ebp,2 | |
145 | |
146 ; save count | |
147 push ecx | |
148 | |
149 .L4 mov eax,[esi] | |
150 mov ebx,[esi+4] | |
151 | |
152 rol eax,8 | |
153 mov ecx,[esi+8] | |
154 | |
155 rol ebx,8 | |
156 mov edx,[esi+12] | |
157 | |
158 rol ecx,8 | |
159 mov [edi+0],eax | |
160 | |
161 rol edx,8 | |
162 mov [edi+4],ebx | |
163 | |
164 mov [edi+8],ecx | |
165 mov [edi+12],edx | |
166 | |
167 add esi,BYTE 16 | |
168 add edi,BYTE 16 | |
169 | |
170 dec ebp | |
171 jnz .L4 | |
172 | |
173 ; check tail | |
174 pop ecx | |
175 and ecx,BYTE 11b | |
176 jz .L6 | |
177 | |
178 .L5 ; tail loop | |
179 mov edx,[esi] | |
180 rol edx,8 | |
181 mov [edi],edx | |
182 add esi,BYTE 4 | |
183 add edi,BYTE 4 | |
184 dec ecx | |
185 jnz .L5 | |
186 | |
187 .L6 pop ebp | |
188 jmp _x86return | |
189 | |
190 | |
191 | |
192 | |
193 _ConvertX86p32_32BGRA888: | |
194 | |
195 ; check short | |
196 cmp ecx,BYTE 32 | |
197 ja .L3 | |
198 | |
199 .L1 ; short loop | |
200 mov edx,[esi] | |
201 bswap edx | |
202 mov [edi],edx | |
203 add esi,BYTE 4 | |
204 add edi,BYTE 4 | |
205 dec ecx | |
206 jnz .L1 | |
207 .L2 | |
208 jmp _x86return | |
209 | |
210 .L3 ; save ebp | |
211 push ebp | |
212 | |
213 ; unroll four times | |
214 mov ebp,ecx | |
215 shr ebp,2 | |
216 | |
217 ; save count | |
218 push ecx | |
219 | |
220 .L4 mov eax,[esi] | |
221 mov ebx,[esi+4] | |
222 | |
223 mov ecx,[esi+8] | |
224 mov edx,[esi+12] | |
225 | |
226 bswap eax | |
227 | |
228 bswap ebx | |
229 | |
230 bswap ecx | |
231 | |
232 bswap edx | |
233 | |
234 mov [edi+0],eax | |
235 mov [edi+4],ebx | |
236 | |
237 mov [edi+8],ecx | |
238 mov [edi+12],edx | |
239 | |
240 add esi,BYTE 16 | |
241 add edi,BYTE 16 | |
242 | |
243 dec ebp | |
244 jnz .L4 | |
245 | |
246 ; check tail | |
247 pop ecx | |
248 and ecx,BYTE 11b | |
249 jz .L6 | |
250 | |
251 .L5 ; tail loop | |
252 mov edx,[esi] | |
253 bswap edx | |
254 mov [edi],edx | |
255 add esi,BYTE 4 | |
256 add edi,BYTE 4 | |
257 dec ecx | |
258 jnz .L5 | |
259 | |
260 .L6 pop ebp | |
261 jmp _x86return | |
262 | |
263 | |
264 | |
265 | |
266 ;; 32 bit RGB 888 to 24 BIT RGB 888 | |
267 | |
268 _ConvertX86p32_24RGB888: | |
269 | |
270 ; check short | |
271 cmp ecx,BYTE 32 | |
272 ja .L3 | |
273 | |
274 .L1 ; short loop | |
275 mov al,[esi] | |
276 mov bl,[esi+1] | |
277 mov dl,[esi+2] | |
278 mov [edi],al | |
279 mov [edi+1],bl | |
280 mov [edi+2],dl | |
281 add esi,BYTE 4 | |
282 add edi,BYTE 3 | |
283 dec ecx | |
284 jnz .L1 | |
285 .L2 | |
286 jmp _x86return | |
287 | |
288 .L3 ; head | |
289 mov edx,edi | |
290 and edx,BYTE 11b | |
291 jz .L4 | |
292 mov al,[esi] | |
293 mov bl,[esi+1] | |
294 mov dl,[esi+2] | |
295 mov [edi],al | |
296 mov [edi+1],bl | |
297 mov [edi+2],dl | |
298 add esi,BYTE 4 | |
299 add edi,BYTE 3 | |
300 dec ecx | |
301 jmp SHORT .L3 | |
302 | |
303 .L4 ; unroll 4 times | |
304 push ebp | |
305 mov ebp,ecx | |
306 shr ebp,2 | |
307 | |
308 ; save count | |
309 push ecx | |
310 | |
311 .L5 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
312 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
313 | |
314 shl eax,8 ; eax = [R][G][B][.] | |
315 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
316 | |
317 shl ebx,8 ; ebx = [r][g][b][.] | |
318 mov al,[esi+4] ; eax = [R][G][B][b] | |
319 | |
320 ror eax,8 ; eax = [b][R][G][B] (done) | |
321 mov bh,[esi+8+1] ; ebx = [r][g][G][.] | |
322 | |
323 mov [edi],eax | |
324 add edi,BYTE 3*4 | |
325 | |
326 shl ecx,8 ; ecx = [r][g][b][.] | |
327 mov bl,[esi+8+0] ; ebx = [r][g][G][B] | |
328 | |
329 rol ebx,16 ; ebx = [G][B][r][g] (done) | |
330 mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) | |
331 | |
332 mov [edi+4-3*4],ebx | |
333 add esi,BYTE 4*4 | |
334 | |
335 mov [edi+8-3*4],ecx | |
336 dec ebp | |
337 | |
338 jnz .L5 | |
339 | |
340 ; check tail | |
341 pop ecx | |
342 and ecx,BYTE 11b | |
343 jz .L7 | |
344 | |
345 .L6 ; tail loop | |
346 mov al,[esi] | |
347 mov bl,[esi+1] | |
348 mov dl,[esi+2] | |
349 mov [edi],al | |
350 mov [edi+1],bl | |
351 mov [edi+2],dl | |
352 add esi,BYTE 4 | |
353 add edi,BYTE 3 | |
354 dec ecx | |
355 jnz .L6 | |
356 | |
357 .L7 pop ebp | |
358 jmp _x86return | |
359 | |
360 | |
361 | |
362 | |
363 ;; 32 bit RGB 888 to 24 bit BGR 888 | |
364 | |
365 _ConvertX86p32_24BGR888: | |
366 | |
367 ; check short | |
368 cmp ecx,BYTE 32 | |
369 ja .L3 | |
370 | |
371 | |
372 .L1 ; short loop | |
373 mov dl,[esi] | |
374 mov bl,[esi+1] | |
375 mov al,[esi+2] | |
376 mov [edi],al | |
377 mov [edi+1],bl | |
378 mov [edi+2],dl | |
379 add esi,BYTE 4 | |
380 add edi,BYTE 3 | |
381 dec ecx | |
382 jnz .L1 | |
383 .L2 | |
384 jmp _x86return | |
385 | |
386 .L3 ; head | |
387 mov edx,edi | |
388 and edx,BYTE 11b | |
389 jz .L4 | |
390 mov dl,[esi] | |
391 mov bl,[esi+1] | |
392 mov al,[esi+2] | |
393 mov [edi],al | |
394 mov [edi+1],bl | |
395 mov [edi+2],dl | |
396 add esi,BYTE 4 | |
397 add edi,BYTE 3 | |
398 dec ecx | |
399 jmp SHORT .L3 | |
400 | |
401 .L4 ; unroll 4 times | |
402 push ebp | |
403 mov ebp,ecx | |
404 shr ebp,2 | |
405 | |
406 ; save count | |
407 push ecx | |
408 | |
409 .L5 | |
410 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
411 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
412 | |
413 bswap eax ; eax = [B][G][R][A] | |
414 | |
415 bswap ebx ; ebx = [b][g][r][a] | |
416 | |
417 mov al,[esi+4+2] ; eax = [B][G][R][r] | |
418 mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] | |
419 | |
420 ror eax,8 ; eax = [r][B][G][R] (done) | |
421 mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] | |
422 | |
423 ror ebx,16 ; ebx = [G][R][b][g] (done) | |
424 mov [edi],eax | |
425 | |
426 mov [edi+4],ebx | |
427 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
428 | |
429 bswap ecx ; ecx = [b][g][r][a] | |
430 | |
431 mov cl,[esi+8] ; ecx = [b][g][r][B] (done) | |
432 add esi,BYTE 4*4 | |
433 | |
434 mov [edi+8],ecx | |
435 add edi,BYTE 3*4 | |
436 | |
437 dec ebp | |
438 jnz .L5 | |
439 | |
440 ; check tail | |
441 pop ecx | |
442 and ecx,BYTE 11b | |
443 jz .L7 | |
444 | |
445 .L6 ; tail loop | |
446 mov dl,[esi] | |
447 mov bl,[esi+1] | |
448 mov al,[esi+2] | |
449 mov [edi],al | |
450 mov [edi+1],bl | |
451 mov [edi+2],dl | |
452 add esi,BYTE 4 | |
453 add edi,BYTE 3 | |
454 dec ecx | |
455 jnz .L6 | |
456 | |
457 .L7 | |
458 pop ebp | |
459 jmp _x86return | |
460 | |
461 | |
462 | |
463 | |
464 ;; 32 bit RGB 888 to 16 BIT RGB 565 | |
465 | |
466 _ConvertX86p32_16RGB565: | |
467 ; check short | |
468 cmp ecx,BYTE 16 | |
469 ja .L3 | |
470 | |
471 .L1 ; short loop | |
472 mov bl,[esi+0] ; blue | |
473 mov al,[esi+1] ; green | |
474 mov ah,[esi+2] ; red | |
475 shr ah,3 | |
476 and al,11111100b | |
477 shl eax,3 | |
478 shr bl,3 | |
479 add al,bl | |
480 mov [edi+0],al | |
481 mov [edi+1],ah | |
482 add esi,BYTE 4 | |
483 add edi,BYTE 2 | |
484 dec ecx | |
485 jnz .L1 | |
486 | |
487 .L2: ; End of short loop | |
488 jmp _x86return | |
489 | |
490 | |
491 .L3 ; head | |
492 mov ebx,edi | |
493 and ebx,BYTE 11b | |
494 jz .L4 | |
495 | |
496 mov bl,[esi+0] ; blue | |
497 mov al,[esi+1] ; green | |
498 mov ah,[esi+2] ; red | |
499 shr ah,3 | |
500 and al,11111100b | |
501 shl eax,3 | |
502 shr bl,3 | |
503 add al,bl | |
504 mov [edi+0],al | |
505 mov [edi+1],ah | |
506 add esi,BYTE 4 | |
507 add edi,BYTE 2 | |
508 dec ecx | |
509 | |
510 .L4: | |
511 ; save count | |
512 push ecx | |
513 | |
514 ; unroll twice | |
515 shr ecx,1 | |
516 | |
517 ; point arrays to end | |
518 lea esi,[esi+ecx*8] | |
519 lea edi,[edi+ecx*4] | |
520 | |
521 ; negative counter | |
522 neg ecx | |
523 jmp SHORT .L6 | |
524 | |
525 .L5: | |
526 mov [edi+ecx*4-4],eax | |
527 .L6: | |
528 mov eax,[esi+ecx*8] | |
529 | |
530 shr ah,2 | |
531 mov ebx,[esi+ecx*8+4] | |
532 | |
533 shr eax,3 | |
534 mov edx,[esi+ecx*8+4] | |
535 | |
536 shr bh,2 | |
537 mov dl,[esi+ecx*8+2] | |
538 | |
539 shl ebx,13 | |
540 and eax,000007FFh | |
541 | |
542 shl edx,8 | |
543 and ebx,07FF0000h | |
544 | |
545 and edx,0F800F800h | |
546 add eax,ebx | |
547 | |
548 add eax,edx | |
549 inc ecx | |
550 | |
551 jnz .L5 | |
552 | |
553 mov [edi+ecx*4-4],eax | |
554 | |
555 ; tail | |
556 pop ecx | |
557 test cl,1 | |
558 jz .L7 | |
559 | |
560 mov bl,[esi+0] ; blue | |
561 mov al,[esi+1] ; green | |
562 mov ah,[esi+2] ; red | |
563 shr ah,3 | |
564 and al,11111100b | |
565 shl eax,3 | |
566 shr bl,3 | |
567 add al,bl | |
568 mov [edi+0],al | |
569 mov [edi+1],ah | |
570 add esi,BYTE 4 | |
571 add edi,BYTE 2 | |
572 | |
573 .L7: | |
574 jmp _x86return | |
575 | |
576 | |
577 | |
578 | |
579 ;; 32 bit RGB 888 to 16 BIT BGR 565 | |
580 | |
581 _ConvertX86p32_16BGR565: | |
582 | |
583 ; check short | |
584 cmp ecx,BYTE 16 | |
585 ja .L3 | |
586 | |
587 .L1 ; short loop | |
588 mov ah,[esi+0] ; blue | |
589 mov al,[esi+1] ; green | |
590 mov bl,[esi+2] ; red | |
591 shr ah,3 | |
592 and al,11111100b | |
593 shl eax,3 | |
594 shr bl,3 | |
595 add al,bl | |
596 mov [edi+0],al | |
597 mov [edi+1],ah | |
598 add esi,BYTE 4 | |
599 add edi,BYTE 2 | |
600 dec ecx | |
601 jnz .L1 | |
602 .L2 | |
603 jmp _x86return | |
604 | |
605 .L3 ; head | |
606 mov ebx,edi | |
607 and ebx,BYTE 11b | |
608 jz .L4 | |
609 mov ah,[esi+0] ; blue | |
610 mov al,[esi+1] ; green | |
611 mov bl,[esi+2] ; red | |
612 shr ah,3 | |
613 and al,11111100b | |
614 shl eax,3 | |
615 shr bl,3 | |
616 add al,bl | |
617 mov [edi+0],al | |
618 mov [edi+1],ah | |
619 add esi,BYTE 4 | |
620 add edi,BYTE 2 | |
621 dec ecx | |
622 | |
623 .L4 ; save count | |
624 push ecx | |
625 | |
626 ; unroll twice | |
627 shr ecx,1 | |
628 | |
629 ; point arrays to end | |
630 lea esi,[esi+ecx*8] | |
631 lea edi,[edi+ecx*4] | |
632 | |
633 ; negative count | |
634 neg ecx | |
635 jmp SHORT .L6 | |
636 | |
637 .L5 | |
638 mov [edi+ecx*4-4],eax | |
639 .L6 | |
640 mov edx,[esi+ecx*8+4] | |
641 | |
642 mov bh,[esi+ecx*8+4] | |
643 mov ah,[esi+ecx*8] | |
644 | |
645 shr bh,3 | |
646 mov al,[esi+ecx*8+1] | |
647 | |
648 shr ah,3 | |
649 mov bl,[esi+ecx*8+5] | |
650 | |
651 shl eax,3 | |
652 mov dl,[esi+ecx*8+2] | |
653 | |
654 shl ebx,19 | |
655 and eax,0000FFE0h | |
656 | |
657 shr edx,3 | |
658 and ebx,0FFE00000h | |
659 | |
660 and edx,001F001Fh | |
661 add eax,ebx | |
662 | |
663 add eax,edx | |
664 inc ecx | |
665 | |
666 jnz .L5 | |
667 | |
668 mov [edi+ecx*4-4],eax | |
669 | |
670 ; tail | |
671 pop ecx | |
672 and ecx,BYTE 1 | |
673 jz .L7 | |
674 mov ah,[esi+0] ; blue | |
675 mov al,[esi+1] ; green | |
676 mov bl,[esi+2] ; red | |
677 shr ah,3 | |
678 and al,11111100b | |
679 shl eax,3 | |
680 shr bl,3 | |
681 add al,bl | |
682 mov [edi+0],al | |
683 mov [edi+1],ah | |
684 add esi,BYTE 4 | |
685 add edi,BYTE 2 | |
686 | |
687 .L7 | |
688 jmp _x86return | |
689 | |
690 | |
691 | |
692 | |
693 ;; 32 BIT RGB TO 16 BIT RGB 555 | |
694 | |
695 _ConvertX86p32_16RGB555: | |
696 | |
697 ; check short | |
698 cmp ecx,BYTE 16 | |
699 ja .L3 | |
700 | |
701 .L1 ; short loop | |
702 mov bl,[esi+0] ; blue | |
703 mov al,[esi+1] ; green | |
704 mov ah,[esi+2] ; red | |
705 shr ah,3 | |
706 and al,11111000b | |
707 shl eax,2 | |
708 shr bl,3 | |
709 add al,bl | |
710 mov [edi+0],al | |
711 mov [edi+1],ah | |
712 add esi,BYTE 4 | |
713 add edi,BYTE 2 | |
714 dec ecx | |
715 jnz .L1 | |
716 .L2 | |
717 jmp _x86return | |
718 | |
719 .L3 ; head | |
720 mov ebx,edi | |
721 and ebx,BYTE 11b | |
722 jz .L4 | |
723 mov bl,[esi+0] ; blue | |
724 mov al,[esi+1] ; green | |
725 mov ah,[esi+2] ; red | |
726 shr ah,3 | |
727 and al,11111000b | |
728 shl eax,2 | |
729 shr bl,3 | |
730 add al,bl | |
731 mov [edi+0],al | |
732 mov [edi+1],ah | |
733 add esi,BYTE 4 | |
734 add edi,BYTE 2 | |
735 dec ecx | |
736 | |
737 .L4 ; save count | |
738 push ecx | |
739 | |
740 ; unroll twice | |
741 shr ecx,1 | |
742 | |
743 ; point arrays to end | |
744 lea esi,[esi+ecx*8] | |
745 lea edi,[edi+ecx*4] | |
746 | |
747 ; negative counter | |
748 neg ecx | |
749 jmp SHORT .L6 | |
750 | |
751 .L5 | |
752 mov [edi+ecx*4-4],eax | |
753 .L6 | |
754 mov eax,[esi+ecx*8] | |
755 | |
756 shr ah,3 | |
757 mov ebx,[esi+ecx*8+4] | |
758 | |
759 shr eax,3 | |
760 mov edx,[esi+ecx*8+4] | |
761 | |
762 shr bh,3 | |
763 mov dl,[esi+ecx*8+2] | |
764 | |
765 shl ebx,13 | |
766 and eax,000007FFh | |
767 | |
768 shl edx,7 | |
769 and ebx,07FF0000h | |
770 | |
771 and edx,07C007C00h | |
772 add eax,ebx | |
773 | |
774 add eax,edx | |
775 inc ecx | |
776 | |
777 jnz .L5 | |
778 | |
779 mov [edi+ecx*4-4],eax | |
780 | |
781 ; tail | |
782 pop ecx | |
783 and ecx,BYTE 1 | |
784 jz .L7 | |
785 mov bl,[esi+0] ; blue | |
786 mov al,[esi+1] ; green | |
787 mov ah,[esi+2] ; red | |
788 shr ah,3 | |
789 and al,11111000b | |
790 shl eax,2 | |
791 shr bl,3 | |
792 add al,bl | |
793 mov [edi+0],al | |
794 mov [edi+1],ah | |
795 add esi,BYTE 4 | |
796 add edi,BYTE 2 | |
797 | |
798 .L7 | |
799 jmp _x86return | |
800 | |
801 | |
802 | |
803 | |
804 ;; 32 BIT RGB TO 16 BIT BGR 555 | |
805 | |
806 _ConvertX86p32_16BGR555: | |
807 | |
808 ; check short | |
809 cmp ecx,BYTE 16 | |
810 ja .L3 | |
811 | |
812 | |
813 .L1 ; short loop | |
814 mov ah,[esi+0] ; blue | |
815 mov al,[esi+1] ; green | |
816 mov bl,[esi+2] ; red | |
817 shr ah,3 | |
818 and al,11111000b | |
819 shl eax,2 | |
820 shr bl,3 | |
821 add al,bl | |
822 mov [edi+0],al | |
823 mov [edi+1],ah | |
824 add esi,BYTE 4 | |
825 add edi,BYTE 2 | |
826 dec ecx | |
827 jnz .L1 | |
828 .L2 | |
829 jmp _x86return | |
830 | |
831 .L3 ; head | |
832 mov ebx,edi | |
833 and ebx,BYTE 11b | |
834 jz .L4 | |
835 mov ah,[esi+0] ; blue | |
836 mov al,[esi+1] ; green | |
837 mov bl,[esi+2] ; red | |
838 shr ah,3 | |
839 and al,11111000b | |
840 shl eax,2 | |
841 shr bl,3 | |
842 add al,bl | |
843 mov [edi+0],al | |
844 mov [edi+1],ah | |
845 add esi,BYTE 4 | |
846 add edi,BYTE 2 | |
847 dec ecx | |
848 | |
849 .L4 ; save count | |
850 push ecx | |
851 | |
852 ; unroll twice | |
853 shr ecx,1 | |
854 | |
855 ; point arrays to end | |
856 lea esi,[esi+ecx*8] | |
857 lea edi,[edi+ecx*4] | |
858 | |
859 ; negative counter | |
860 neg ecx | |
861 jmp SHORT .L6 | |
862 | |
863 .L5 | |
864 mov [edi+ecx*4-4],eax | |
865 .L6 | |
866 mov edx,[esi+ecx*8+4] | |
867 | |
868 mov bh,[esi+ecx*8+4] | |
869 mov ah,[esi+ecx*8] | |
870 | |
871 shr bh,3 | |
872 mov al,[esi+ecx*8+1] | |
873 | |
874 shr ah,3 | |
875 mov bl,[esi+ecx*8+5] | |
876 | |
877 shl eax,2 | |
878 mov dl,[esi+ecx*8+2] | |
879 | |
880 shl ebx,18 | |
881 and eax,00007FE0h | |
882 | |
883 shr edx,3 | |
884 and ebx,07FE00000h | |
885 | |
886 and edx,001F001Fh | |
887 add eax,ebx | |
888 | |
889 add eax,edx | |
890 inc ecx | |
891 | |
892 jnz .L5 | |
893 | |
894 mov [edi+ecx*4-4],eax | |
895 | |
896 ; tail | |
897 pop ecx | |
898 and ecx,BYTE 1 | |
899 jz .L7 | |
900 mov ah,[esi+0] ; blue | |
901 mov al,[esi+1] ; green | |
902 mov bl,[esi+2] ; red | |
903 shr ah,3 | |
904 and al,11111000b | |
905 shl eax,2 | |
906 shr bl,3 | |
907 add al,bl | |
908 mov [edi+0],al | |
909 mov [edi+1],ah | |
910 add esi,BYTE 4 | |
911 add edi,BYTE 2 | |
912 | |
913 .L7 | |
914 jmp _x86return | |
915 | |
916 | |
917 | |
918 | |
919 | |
920 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) | |
921 ;; This routine writes FOUR pixels at once (dword) and then, if they exist | |
922 ;; the trailing three pixels | |
923 _ConvertX86p32_8RGB332: | |
924 | |
925 | |
926 .L_ALIGNED | |
927 push ecx | |
928 | |
929 shr ecx,2 ; We will draw 4 pixels at once | |
930 jnz .L1 | |
931 | |
932 jmp .L2 ; short jump out of range :( | |
933 | |
934 .L1: | |
935 mov eax,[esi] ; first pair of pixels | |
936 mov edx,[esi+4] | |
937 | |
938 shr dl,6 | |
939 mov ebx,eax | |
940 | |
941 shr al,6 | |
942 and ah,0e0h | |
943 | |
944 shr ebx,16 | |
945 and dh,0e0h | |
946 | |
947 shr ah,3 | |
948 and bl,0e0h | |
949 | |
950 shr dh,3 | |
951 | |
952 or al,bl | |
953 | |
954 mov ebx,edx | |
955 or al,ah | |
956 | |
957 shr ebx,16 | |
958 or dl,dh | |
959 | |
960 and bl,0e0h | |
961 | |
962 or dl,bl | |
963 | |
964 mov ah,dl | |
965 | |
966 | |
967 | |
968 mov ebx,[esi+8] ; second pair of pixels | |
969 | |
970 mov edx,ebx | |
971 and bh,0e0h | |
972 | |
973 shr bl,6 | |
974 and edx,0e00000h | |
975 | |
976 shr edx,16 | |
977 | |
978 shr bh,3 | |
979 | |
980 ror eax,16 | |
981 or bl,dl | |
982 | |
983 mov edx,[esi+12] | |
984 or bl,bh | |
985 | |
986 mov al,bl | |
987 | |
988 mov ebx,edx | |
989 and dh,0e0h | |
990 | |
991 shr dl,6 | |
992 and ebx,0e00000h | |
993 | |
994 shr dh,3 | |
995 mov ah,dl | |
996 | |
997 shr ebx,16 | |
998 or ah,dh | |
999 | |
1000 or ah,bl | |
1001 | |
1002 rol eax,16 | |
1003 add esi,BYTE 16 | |
1004 | |
1005 mov [edi],eax | |
1006 add edi,BYTE 4 | |
1007 | |
1008 dec ecx | |
1009 jz .L2 ; L1 out of range for short jump :( | |
1010 | |
1011 jmp .L1 | |
1012 .L2: | |
1013 | |
1014 pop ecx | |
1015 and ecx,BYTE 3 ; mask out number of pixels to draw | |
1016 | |
1017 jz .L4 ; Nothing to do anymore | |
1018 | |
1019 .L3: | |
1020 mov eax,[esi] ; single pixel conversion for trailing pixels | |
1021 | |
1022 mov ebx,eax | |
1023 | |
1024 shr al,6 | |
1025 and ah,0e0h | |
1026 | |
1027 shr ebx,16 | |
1028 | |
1029 shr ah,3 | |
1030 and bl,0e0h | |
1031 | |
1032 or al,ah | |
1033 or al,bl | |
1034 | |
1035 mov [edi],al | |
1036 | |
1037 inc edi | |
1038 add esi,BYTE 4 | |
1039 | |
1040 dec ecx | |
1041 jnz .L3 | |
1042 | |
1043 .L4: | |
1044 jmp _x86return |