Mercurial > sdl-ios-xcode
annotate src/hermes/x86p_32.asm @ 1676:e136f3ffdc1b SDL-1.3
Adding software renderer implementation
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 12 Jun 2006 09:10:06 +0000 |
parents | 2d6dc7de1145 |
children | 393092a3ebf6 |
rev | line source |
---|---|
0 | 1 ; |
2 ; x86 format converters for HERMES | |
3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) | |
4 ; This source code is licensed under the GNU LGPL | |
5 ; | |
6 ; Please refer to the file COPYING.LIB contained in the distribution for | |
7 ; licensing conditions | |
8 ; | |
9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission | |
10 ; | |
11 | |
12 | |
13 BITS 32 | |
14 | |
15 GLOBAL _ConvertX86p32_32BGR888 | |
16 GLOBAL _ConvertX86p32_32RGBA888 | |
17 GLOBAL _ConvertX86p32_32BGRA888 | |
18 GLOBAL _ConvertX86p32_24RGB888 | |
19 GLOBAL _ConvertX86p32_24BGR888 | |
20 GLOBAL _ConvertX86p32_16RGB565 | |
21 GLOBAL _ConvertX86p32_16BGR565 | |
22 GLOBAL _ConvertX86p32_16RGB555 | |
23 GLOBAL _ConvertX86p32_16BGR555 | |
24 GLOBAL _ConvertX86p32_8RGB332 | |
25 | |
26 EXTERN _x86return | |
1166
da33b7e6d181
Date: Tue, 1 Nov 2005 20:25:10 +0100
Sam Lantinga <slouken@libsdl.org>
parents:
0
diff
changeset
|
27 |
0 | 28 SECTION .text |
29 | |
30 ;; _Convert_* | |
31 ;; Paramters: | |
32 ;; ESI = source | |
33 ;; EDI = dest | |
34 ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) | |
35 ;; Destroys: | |
36 ;; EAX, EBX, EDX | |
37 | |
38 | |
39 _ConvertX86p32_32BGR888: | |
40 | |
41 ; check short | |
42 cmp ecx,BYTE 32 | |
43 ja .L3 | |
44 | |
45 .L1 ; short loop | |
46 mov edx,[esi] | |
47 bswap edx | |
48 ror edx,8 | |
49 mov [edi],edx | |
50 add esi,BYTE 4 | |
51 add edi,BYTE 4 | |
52 dec ecx | |
53 jnz .L1 | |
54 .L2 | |
55 jmp _x86return | |
56 | |
57 .L3 ; save ebp | |
58 push ebp | |
59 | |
60 ; unroll four times | |
61 mov ebp,ecx | |
62 shr ebp,2 | |
63 | |
64 ; save count | |
65 push ecx | |
66 | |
67 .L4 mov eax,[esi] | |
68 mov ebx,[esi+4] | |
69 | |
70 bswap eax | |
71 | |
72 bswap ebx | |
73 | |
74 ror eax,8 | |
75 mov ecx,[esi+8] | |
76 | |
77 ror ebx,8 | |
78 mov edx,[esi+12] | |
79 | |
80 bswap ecx | |
81 | |
82 bswap edx | |
83 | |
84 ror ecx,8 | |
85 mov [edi+0],eax | |
86 | |
87 ror edx,8 | |
88 mov [edi+4],ebx | |
89 | |
90 mov [edi+8],ecx | |
91 mov [edi+12],edx | |
92 | |
93 add esi,BYTE 16 | |
94 add edi,BYTE 16 | |
95 | |
96 dec ebp | |
97 jnz .L4 | |
98 | |
99 ; check tail | |
100 pop ecx | |
101 and ecx,BYTE 11b | |
102 jz .L6 | |
103 | |
104 .L5 ; tail loop | |
105 mov edx,[esi] | |
106 bswap edx | |
107 ror edx,8 | |
108 mov [edi],edx | |
109 add esi,BYTE 4 | |
110 add edi,BYTE 4 | |
111 dec ecx | |
112 jnz .L5 | |
113 | |
114 .L6 pop ebp | |
115 jmp _x86return | |
116 | |
117 | |
118 | |
119 | |
120 _ConvertX86p32_32RGBA888: | |
121 | |
122 ; check short | |
123 cmp ecx,BYTE 32 | |
124 ja .L3 | |
125 | |
126 .L1 ; short loop | |
127 mov edx,[esi] | |
128 rol edx,8 | |
129 mov [edi],edx | |
130 add esi,BYTE 4 | |
131 add edi,BYTE 4 | |
132 dec ecx | |
133 jnz .L1 | |
134 .L2 | |
135 jmp _x86return | |
136 | |
137 .L3 ; save ebp | |
138 push ebp | |
139 | |
140 ; unroll four times | |
141 mov ebp,ecx | |
142 shr ebp,2 | |
143 | |
144 ; save count | |
145 push ecx | |
146 | |
147 .L4 mov eax,[esi] | |
148 mov ebx,[esi+4] | |
149 | |
150 rol eax,8 | |
151 mov ecx,[esi+8] | |
152 | |
153 rol ebx,8 | |
154 mov edx,[esi+12] | |
155 | |
156 rol ecx,8 | |
157 mov [edi+0],eax | |
158 | |
159 rol edx,8 | |
160 mov [edi+4],ebx | |
161 | |
162 mov [edi+8],ecx | |
163 mov [edi+12],edx | |
164 | |
165 add esi,BYTE 16 | |
166 add edi,BYTE 16 | |
167 | |
168 dec ebp | |
169 jnz .L4 | |
170 | |
171 ; check tail | |
172 pop ecx | |
173 and ecx,BYTE 11b | |
174 jz .L6 | |
175 | |
176 .L5 ; tail loop | |
177 mov edx,[esi] | |
178 rol edx,8 | |
179 mov [edi],edx | |
180 add esi,BYTE 4 | |
181 add edi,BYTE 4 | |
182 dec ecx | |
183 jnz .L5 | |
184 | |
185 .L6 pop ebp | |
186 jmp _x86return | |
187 | |
188 | |
189 | |
190 | |
191 _ConvertX86p32_32BGRA888: | |
192 | |
193 ; check short | |
194 cmp ecx,BYTE 32 | |
195 ja .L3 | |
196 | |
197 .L1 ; short loop | |
198 mov edx,[esi] | |
199 bswap edx | |
200 mov [edi],edx | |
201 add esi,BYTE 4 | |
202 add edi,BYTE 4 | |
203 dec ecx | |
204 jnz .L1 | |
205 .L2 | |
206 jmp _x86return | |
207 | |
208 .L3 ; save ebp | |
209 push ebp | |
210 | |
211 ; unroll four times | |
212 mov ebp,ecx | |
213 shr ebp,2 | |
214 | |
215 ; save count | |
216 push ecx | |
217 | |
218 .L4 mov eax,[esi] | |
219 mov ebx,[esi+4] | |
220 | |
221 mov ecx,[esi+8] | |
222 mov edx,[esi+12] | |
223 | |
224 bswap eax | |
225 | |
226 bswap ebx | |
227 | |
228 bswap ecx | |
229 | |
230 bswap edx | |
231 | |
232 mov [edi+0],eax | |
233 mov [edi+4],ebx | |
234 | |
235 mov [edi+8],ecx | |
236 mov [edi+12],edx | |
237 | |
238 add esi,BYTE 16 | |
239 add edi,BYTE 16 | |
240 | |
241 dec ebp | |
242 jnz .L4 | |
243 | |
244 ; check tail | |
245 pop ecx | |
246 and ecx,BYTE 11b | |
247 jz .L6 | |
248 | |
249 .L5 ; tail loop | |
250 mov edx,[esi] | |
251 bswap edx | |
252 mov [edi],edx | |
253 add esi,BYTE 4 | |
254 add edi,BYTE 4 | |
255 dec ecx | |
256 jnz .L5 | |
257 | |
258 .L6 pop ebp | |
259 jmp _x86return | |
260 | |
261 | |
262 | |
263 | |
264 ;; 32 bit RGB 888 to 24 BIT RGB 888 | |
265 | |
266 _ConvertX86p32_24RGB888: | |
267 | |
268 ; check short | |
269 cmp ecx,BYTE 32 | |
270 ja .L3 | |
271 | |
272 .L1 ; short loop | |
273 mov al,[esi] | |
274 mov bl,[esi+1] | |
275 mov dl,[esi+2] | |
276 mov [edi],al | |
277 mov [edi+1],bl | |
278 mov [edi+2],dl | |
279 add esi,BYTE 4 | |
280 add edi,BYTE 3 | |
281 dec ecx | |
282 jnz .L1 | |
283 .L2 | |
284 jmp _x86return | |
285 | |
286 .L3 ; head | |
287 mov edx,edi | |
288 and edx,BYTE 11b | |
289 jz .L4 | |
290 mov al,[esi] | |
291 mov bl,[esi+1] | |
292 mov dl,[esi+2] | |
293 mov [edi],al | |
294 mov [edi+1],bl | |
295 mov [edi+2],dl | |
296 add esi,BYTE 4 | |
297 add edi,BYTE 3 | |
298 dec ecx | |
299 jmp SHORT .L3 | |
300 | |
301 .L4 ; unroll 4 times | |
302 push ebp | |
303 mov ebp,ecx | |
304 shr ebp,2 | |
305 | |
306 ; save count | |
307 push ecx | |
308 | |
309 .L5 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
310 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
311 | |
312 shl eax,8 ; eax = [R][G][B][.] | |
313 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
314 | |
315 shl ebx,8 ; ebx = [r][g][b][.] | |
316 mov al,[esi+4] ; eax = [R][G][B][b] | |
317 | |
318 ror eax,8 ; eax = [b][R][G][B] (done) | |
319 mov bh,[esi+8+1] ; ebx = [r][g][G][.] | |
320 | |
321 mov [edi],eax | |
322 add edi,BYTE 3*4 | |
323 | |
324 shl ecx,8 ; ecx = [r][g][b][.] | |
325 mov bl,[esi+8+0] ; ebx = [r][g][G][B] | |
326 | |
327 rol ebx,16 ; ebx = [G][B][r][g] (done) | |
328 mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) | |
329 | |
330 mov [edi+4-3*4],ebx | |
331 add esi,BYTE 4*4 | |
332 | |
333 mov [edi+8-3*4],ecx | |
334 dec ebp | |
335 | |
336 jnz .L5 | |
337 | |
338 ; check tail | |
339 pop ecx | |
340 and ecx,BYTE 11b | |
341 jz .L7 | |
342 | |
343 .L6 ; tail loop | |
344 mov al,[esi] | |
345 mov bl,[esi+1] | |
346 mov dl,[esi+2] | |
347 mov [edi],al | |
348 mov [edi+1],bl | |
349 mov [edi+2],dl | |
350 add esi,BYTE 4 | |
351 add edi,BYTE 3 | |
352 dec ecx | |
353 jnz .L6 | |
354 | |
355 .L7 pop ebp | |
356 jmp _x86return | |
357 | |
358 | |
359 | |
360 | |
361 ;; 32 bit RGB 888 to 24 bit BGR 888 | |
362 | |
363 _ConvertX86p32_24BGR888: | |
364 | |
365 ; check short | |
366 cmp ecx,BYTE 32 | |
367 ja .L3 | |
368 | |
369 | |
370 .L1 ; short loop | |
371 mov dl,[esi] | |
372 mov bl,[esi+1] | |
373 mov al,[esi+2] | |
374 mov [edi],al | |
375 mov [edi+1],bl | |
376 mov [edi+2],dl | |
377 add esi,BYTE 4 | |
378 add edi,BYTE 3 | |
379 dec ecx | |
380 jnz .L1 | |
381 .L2 | |
382 jmp _x86return | |
383 | |
384 .L3 ; head | |
385 mov edx,edi | |
386 and edx,BYTE 11b | |
387 jz .L4 | |
388 mov dl,[esi] | |
389 mov bl,[esi+1] | |
390 mov al,[esi+2] | |
391 mov [edi],al | |
392 mov [edi+1],bl | |
393 mov [edi+2],dl | |
394 add esi,BYTE 4 | |
395 add edi,BYTE 3 | |
396 dec ecx | |
397 jmp SHORT .L3 | |
398 | |
399 .L4 ; unroll 4 times | |
400 push ebp | |
401 mov ebp,ecx | |
402 shr ebp,2 | |
403 | |
404 ; save count | |
405 push ecx | |
406 | |
407 .L5 | |
408 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
409 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
410 | |
411 bswap eax ; eax = [B][G][R][A] | |
412 | |
413 bswap ebx ; ebx = [b][g][r][a] | |
414 | |
415 mov al,[esi+4+2] ; eax = [B][G][R][r] | |
416 mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] | |
417 | |
418 ror eax,8 ; eax = [r][B][G][R] (done) | |
419 mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] | |
420 | |
421 ror ebx,16 ; ebx = [G][R][b][g] (done) | |
422 mov [edi],eax | |
423 | |
424 mov [edi+4],ebx | |
425 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
426 | |
427 bswap ecx ; ecx = [b][g][r][a] | |
428 | |
429 mov cl,[esi+8] ; ecx = [b][g][r][B] (done) | |
430 add esi,BYTE 4*4 | |
431 | |
432 mov [edi+8],ecx | |
433 add edi,BYTE 3*4 | |
434 | |
435 dec ebp | |
436 jnz .L5 | |
437 | |
438 ; check tail | |
439 pop ecx | |
440 and ecx,BYTE 11b | |
441 jz .L7 | |
442 | |
443 .L6 ; tail loop | |
444 mov dl,[esi] | |
445 mov bl,[esi+1] | |
446 mov al,[esi+2] | |
447 mov [edi],al | |
448 mov [edi+1],bl | |
449 mov [edi+2],dl | |
450 add esi,BYTE 4 | |
451 add edi,BYTE 3 | |
452 dec ecx | |
453 jnz .L6 | |
454 | |
455 .L7 | |
456 pop ebp | |
457 jmp _x86return | |
458 | |
459 | |
460 | |
461 | |
462 ;; 32 bit RGB 888 to 16 BIT RGB 565 | |
463 | |
464 _ConvertX86p32_16RGB565: | |
465 ; check short | |
466 cmp ecx,BYTE 16 | |
467 ja .L3 | |
468 | |
469 .L1 ; short loop | |
470 mov bl,[esi+0] ; blue | |
471 mov al,[esi+1] ; green | |
472 mov ah,[esi+2] ; red | |
473 shr ah,3 | |
474 and al,11111100b | |
475 shl eax,3 | |
476 shr bl,3 | |
477 add al,bl | |
478 mov [edi+0],al | |
479 mov [edi+1],ah | |
480 add esi,BYTE 4 | |
481 add edi,BYTE 2 | |
482 dec ecx | |
483 jnz .L1 | |
484 | |
485 .L2: ; End of short loop | |
486 jmp _x86return | |
487 | |
488 | |
489 .L3 ; head | |
490 mov ebx,edi | |
491 and ebx,BYTE 11b | |
492 jz .L4 | |
493 | |
494 mov bl,[esi+0] ; blue | |
495 mov al,[esi+1] ; green | |
496 mov ah,[esi+2] ; red | |
497 shr ah,3 | |
498 and al,11111100b | |
499 shl eax,3 | |
500 shr bl,3 | |
501 add al,bl | |
502 mov [edi+0],al | |
503 mov [edi+1],ah | |
504 add esi,BYTE 4 | |
505 add edi,BYTE 2 | |
506 dec ecx | |
507 | |
508 .L4: | |
509 ; save count | |
510 push ecx | |
511 | |
512 ; unroll twice | |
513 shr ecx,1 | |
514 | |
515 ; point arrays to end | |
516 lea esi,[esi+ecx*8] | |
517 lea edi,[edi+ecx*4] | |
518 | |
519 ; negative counter | |
520 neg ecx | |
521 jmp SHORT .L6 | |
522 | |
523 .L5: | |
524 mov [edi+ecx*4-4],eax | |
525 .L6: | |
526 mov eax,[esi+ecx*8] | |
527 | |
528 shr ah,2 | |
529 mov ebx,[esi+ecx*8+4] | |
530 | |
531 shr eax,3 | |
532 mov edx,[esi+ecx*8+4] | |
533 | |
534 shr bh,2 | |
535 mov dl,[esi+ecx*8+2] | |
536 | |
537 shl ebx,13 | |
538 and eax,000007FFh | |
539 | |
540 shl edx,8 | |
541 and ebx,07FF0000h | |
542 | |
543 and edx,0F800F800h | |
544 add eax,ebx | |
545 | |
546 add eax,edx | |
547 inc ecx | |
548 | |
549 jnz .L5 | |
550 | |
551 mov [edi+ecx*4-4],eax | |
552 | |
553 ; tail | |
554 pop ecx | |
555 test cl,1 | |
556 jz .L7 | |
557 | |
558 mov bl,[esi+0] ; blue | |
559 mov al,[esi+1] ; green | |
560 mov ah,[esi+2] ; red | |
561 shr ah,3 | |
562 and al,11111100b | |
563 shl eax,3 | |
564 shr bl,3 | |
565 add al,bl | |
566 mov [edi+0],al | |
567 mov [edi+1],ah | |
568 add esi,BYTE 4 | |
569 add edi,BYTE 2 | |
570 | |
571 .L7: | |
572 jmp _x86return | |
573 | |
574 | |
575 | |
576 | |
577 ;; 32 bit RGB 888 to 16 BIT BGR 565 | |
578 | |
579 _ConvertX86p32_16BGR565: | |
580 | |
581 ; check short | |
582 cmp ecx,BYTE 16 | |
583 ja .L3 | |
584 | |
585 .L1 ; short loop | |
586 mov ah,[esi+0] ; blue | |
587 mov al,[esi+1] ; green | |
588 mov bl,[esi+2] ; red | |
589 shr ah,3 | |
590 and al,11111100b | |
591 shl eax,3 | |
592 shr bl,3 | |
593 add al,bl | |
594 mov [edi+0],al | |
595 mov [edi+1],ah | |
596 add esi,BYTE 4 | |
597 add edi,BYTE 2 | |
598 dec ecx | |
599 jnz .L1 | |
600 .L2 | |
601 jmp _x86return | |
602 | |
603 .L3 ; head | |
604 mov ebx,edi | |
605 and ebx,BYTE 11b | |
606 jz .L4 | |
607 mov ah,[esi+0] ; blue | |
608 mov al,[esi+1] ; green | |
609 mov bl,[esi+2] ; red | |
610 shr ah,3 | |
611 and al,11111100b | |
612 shl eax,3 | |
613 shr bl,3 | |
614 add al,bl | |
615 mov [edi+0],al | |
616 mov [edi+1],ah | |
617 add esi,BYTE 4 | |
618 add edi,BYTE 2 | |
619 dec ecx | |
620 | |
621 .L4 ; save count | |
622 push ecx | |
623 | |
624 ; unroll twice | |
625 shr ecx,1 | |
626 | |
627 ; point arrays to end | |
628 lea esi,[esi+ecx*8] | |
629 lea edi,[edi+ecx*4] | |
630 | |
631 ; negative count | |
632 neg ecx | |
633 jmp SHORT .L6 | |
634 | |
635 .L5 | |
636 mov [edi+ecx*4-4],eax | |
637 .L6 | |
638 mov edx,[esi+ecx*8+4] | |
639 | |
640 mov bh,[esi+ecx*8+4] | |
641 mov ah,[esi+ecx*8] | |
642 | |
643 shr bh,3 | |
644 mov al,[esi+ecx*8+1] | |
645 | |
646 shr ah,3 | |
647 mov bl,[esi+ecx*8+5] | |
648 | |
649 shl eax,3 | |
650 mov dl,[esi+ecx*8+2] | |
651 | |
652 shl ebx,19 | |
653 and eax,0000FFE0h | |
654 | |
655 shr edx,3 | |
656 and ebx,0FFE00000h | |
657 | |
658 and edx,001F001Fh | |
659 add eax,ebx | |
660 | |
661 add eax,edx | |
662 inc ecx | |
663 | |
664 jnz .L5 | |
665 | |
666 mov [edi+ecx*4-4],eax | |
667 | |
668 ; tail | |
669 pop ecx | |
670 and ecx,BYTE 1 | |
671 jz .L7 | |
672 mov ah,[esi+0] ; blue | |
673 mov al,[esi+1] ; green | |
674 mov bl,[esi+2] ; red | |
675 shr ah,3 | |
676 and al,11111100b | |
677 shl eax,3 | |
678 shr bl,3 | |
679 add al,bl | |
680 mov [edi+0],al | |
681 mov [edi+1],ah | |
682 add esi,BYTE 4 | |
683 add edi,BYTE 2 | |
684 | |
685 .L7 | |
686 jmp _x86return | |
687 | |
688 | |
689 | |
690 | |
691 ;; 32 BIT RGB TO 16 BIT RGB 555 | |
692 | |
693 _ConvertX86p32_16RGB555: | |
694 | |
695 ; check short | |
696 cmp ecx,BYTE 16 | |
697 ja .L3 | |
698 | |
699 .L1 ; short loop | |
700 mov bl,[esi+0] ; blue | |
701 mov al,[esi+1] ; green | |
702 mov ah,[esi+2] ; red | |
703 shr ah,3 | |
704 and al,11111000b | |
705 shl eax,2 | |
706 shr bl,3 | |
707 add al,bl | |
708 mov [edi+0],al | |
709 mov [edi+1],ah | |
710 add esi,BYTE 4 | |
711 add edi,BYTE 2 | |
712 dec ecx | |
713 jnz .L1 | |
714 .L2 | |
715 jmp _x86return | |
716 | |
717 .L3 ; head | |
718 mov ebx,edi | |
719 and ebx,BYTE 11b | |
720 jz .L4 | |
721 mov bl,[esi+0] ; blue | |
722 mov al,[esi+1] ; green | |
723 mov ah,[esi+2] ; red | |
724 shr ah,3 | |
725 and al,11111000b | |
726 shl eax,2 | |
727 shr bl,3 | |
728 add al,bl | |
729 mov [edi+0],al | |
730 mov [edi+1],ah | |
731 add esi,BYTE 4 | |
732 add edi,BYTE 2 | |
733 dec ecx | |
734 | |
735 .L4 ; save count | |
736 push ecx | |
737 | |
738 ; unroll twice | |
739 shr ecx,1 | |
740 | |
741 ; point arrays to end | |
742 lea esi,[esi+ecx*8] | |
743 lea edi,[edi+ecx*4] | |
744 | |
745 ; negative counter | |
746 neg ecx | |
747 jmp SHORT .L6 | |
748 | |
749 .L5 | |
750 mov [edi+ecx*4-4],eax | |
751 .L6 | |
752 mov eax,[esi+ecx*8] | |
753 | |
754 shr ah,3 | |
755 mov ebx,[esi+ecx*8+4] | |
756 | |
757 shr eax,3 | |
758 mov edx,[esi+ecx*8+4] | |
759 | |
760 shr bh,3 | |
761 mov dl,[esi+ecx*8+2] | |
762 | |
763 shl ebx,13 | |
764 and eax,000007FFh | |
765 | |
766 shl edx,7 | |
767 and ebx,07FF0000h | |
768 | |
769 and edx,07C007C00h | |
770 add eax,ebx | |
771 | |
772 add eax,edx | |
773 inc ecx | |
774 | |
775 jnz .L5 | |
776 | |
777 mov [edi+ecx*4-4],eax | |
778 | |
779 ; tail | |
780 pop ecx | |
781 and ecx,BYTE 1 | |
782 jz .L7 | |
783 mov bl,[esi+0] ; blue | |
784 mov al,[esi+1] ; green | |
785 mov ah,[esi+2] ; red | |
786 shr ah,3 | |
787 and al,11111000b | |
788 shl eax,2 | |
789 shr bl,3 | |
790 add al,bl | |
791 mov [edi+0],al | |
792 mov [edi+1],ah | |
793 add esi,BYTE 4 | |
794 add edi,BYTE 2 | |
795 | |
796 .L7 | |
797 jmp _x86return | |
798 | |
799 | |
800 | |
801 | |
802 ;; 32 BIT RGB TO 16 BIT BGR 555 | |
803 | |
804 _ConvertX86p32_16BGR555: | |
805 | |
806 ; check short | |
807 cmp ecx,BYTE 16 | |
808 ja .L3 | |
809 | |
810 | |
811 .L1 ; short loop | |
812 mov ah,[esi+0] ; blue | |
813 mov al,[esi+1] ; green | |
814 mov bl,[esi+2] ; red | |
815 shr ah,3 | |
816 and al,11111000b | |
817 shl eax,2 | |
818 shr bl,3 | |
819 add al,bl | |
820 mov [edi+0],al | |
821 mov [edi+1],ah | |
822 add esi,BYTE 4 | |
823 add edi,BYTE 2 | |
824 dec ecx | |
825 jnz .L1 | |
826 .L2 | |
827 jmp _x86return | |
828 | |
829 .L3 ; head | |
830 mov ebx,edi | |
831 and ebx,BYTE 11b | |
832 jz .L4 | |
833 mov ah,[esi+0] ; blue | |
834 mov al,[esi+1] ; green | |
835 mov bl,[esi+2] ; red | |
836 shr ah,3 | |
837 and al,11111000b | |
838 shl eax,2 | |
839 shr bl,3 | |
840 add al,bl | |
841 mov [edi+0],al | |
842 mov [edi+1],ah | |
843 add esi,BYTE 4 | |
844 add edi,BYTE 2 | |
845 dec ecx | |
846 | |
847 .L4 ; save count | |
848 push ecx | |
849 | |
850 ; unroll twice | |
851 shr ecx,1 | |
852 | |
853 ; point arrays to end | |
854 lea esi,[esi+ecx*8] | |
855 lea edi,[edi+ecx*4] | |
856 | |
857 ; negative counter | |
858 neg ecx | |
859 jmp SHORT .L6 | |
860 | |
861 .L5 | |
862 mov [edi+ecx*4-4],eax | |
863 .L6 | |
864 mov edx,[esi+ecx*8+4] | |
865 | |
866 mov bh,[esi+ecx*8+4] | |
867 mov ah,[esi+ecx*8] | |
868 | |
869 shr bh,3 | |
870 mov al,[esi+ecx*8+1] | |
871 | |
872 shr ah,3 | |
873 mov bl,[esi+ecx*8+5] | |
874 | |
875 shl eax,2 | |
876 mov dl,[esi+ecx*8+2] | |
877 | |
878 shl ebx,18 | |
879 and eax,00007FE0h | |
880 | |
881 shr edx,3 | |
882 and ebx,07FE00000h | |
883 | |
884 and edx,001F001Fh | |
885 add eax,ebx | |
886 | |
887 add eax,edx | |
888 inc ecx | |
889 | |
890 jnz .L5 | |
891 | |
892 mov [edi+ecx*4-4],eax | |
893 | |
894 ; tail | |
895 pop ecx | |
896 and ecx,BYTE 1 | |
897 jz .L7 | |
898 mov ah,[esi+0] ; blue | |
899 mov al,[esi+1] ; green | |
900 mov bl,[esi+2] ; red | |
901 shr ah,3 | |
902 and al,11111000b | |
903 shl eax,2 | |
904 shr bl,3 | |
905 add al,bl | |
906 mov [edi+0],al | |
907 mov [edi+1],ah | |
908 add esi,BYTE 4 | |
909 add edi,BYTE 2 | |
910 | |
911 .L7 | |
912 jmp _x86return | |
913 | |
914 | |
915 | |
916 | |
917 | |
918 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) | |
919 ;; This routine writes FOUR pixels at once (dword) and then, if they exist | |
920 ;; the trailing three pixels | |
921 _ConvertX86p32_8RGB332: | |
922 | |
923 | |
924 .L_ALIGNED | |
925 push ecx | |
926 | |
927 shr ecx,2 ; We will draw 4 pixels at once | |
928 jnz .L1 | |
929 | |
930 jmp .L2 ; short jump out of range :( | |
931 | |
932 .L1: | |
933 mov eax,[esi] ; first pair of pixels | |
934 mov edx,[esi+4] | |
935 | |
936 shr dl,6 | |
937 mov ebx,eax | |
938 | |
939 shr al,6 | |
940 and ah,0e0h | |
941 | |
942 shr ebx,16 | |
943 and dh,0e0h | |
944 | |
945 shr ah,3 | |
946 and bl,0e0h | |
947 | |
948 shr dh,3 | |
949 | |
950 or al,bl | |
951 | |
952 mov ebx,edx | |
953 or al,ah | |
954 | |
955 shr ebx,16 | |
956 or dl,dh | |
957 | |
958 and bl,0e0h | |
959 | |
960 or dl,bl | |
961 | |
962 mov ah,dl | |
963 | |
964 | |
965 | |
966 mov ebx,[esi+8] ; second pair of pixels | |
967 | |
968 mov edx,ebx | |
969 and bh,0e0h | |
970 | |
971 shr bl,6 | |
972 and edx,0e00000h | |
973 | |
974 shr edx,16 | |
975 | |
976 shr bh,3 | |
977 | |
978 ror eax,16 | |
979 or bl,dl | |
980 | |
981 mov edx,[esi+12] | |
982 or bl,bh | |
983 | |
984 mov al,bl | |
985 | |
986 mov ebx,edx | |
987 and dh,0e0h | |
988 | |
989 shr dl,6 | |
990 and ebx,0e00000h | |
991 | |
992 shr dh,3 | |
993 mov ah,dl | |
994 | |
995 shr ebx,16 | |
996 or ah,dh | |
997 | |
998 or ah,bl | |
999 | |
1000 rol eax,16 | |
1001 add esi,BYTE 16 | |
1002 | |
1003 mov [edi],eax | |
1004 add edi,BYTE 4 | |
1005 | |
1006 dec ecx | |
1007 jz .L2 ; L1 out of range for short jump :( | |
1008 | |
1009 jmp .L1 | |
1010 .L2: | |
1011 | |
1012 pop ecx | |
1013 and ecx,BYTE 3 ; mask out number of pixels to draw | |
1014 | |
1015 jz .L4 ; Nothing to do anymore | |
1016 | |
1017 .L3: | |
1018 mov eax,[esi] ; single pixel conversion for trailing pixels | |
1019 | |
1020 mov ebx,eax | |
1021 | |
1022 shr al,6 | |
1023 and ah,0e0h | |
1024 | |
1025 shr ebx,16 | |
1026 | |
1027 shr ah,3 | |
1028 and bl,0e0h | |
1029 | |
1030 or al,ah | |
1031 or al,bl | |
1032 | |
1033 mov [edi],al | |
1034 | |
1035 inc edi | |
1036 add esi,BYTE 4 | |
1037 | |
1038 dec ecx | |
1039 jnz .L3 | |
1040 | |
1041 .L4: | |
1042 jmp _x86return | |
1199
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1043 |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1044 %ifidn __OUTPUT_FORMAT__,elf |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1045 section .note.GNU-stack noalloc noexec nowrite progbits |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1046 %endif |