Mercurial > sdl-ios-xcode
annotate src/hermes/x86p_32.asm @ 1795:398ac0f88e4d
Fixed bug #220
The AltiVec blitters don't compile, since they require __VEC__ to be enabled in
order for the compiler to understand "vector" and friends (i.e. do AltiVec)
But you don't want to turn AltiVec on globally, since then the code would only
run on a G4 (there are already runtime tests, before using the AltiVec
variants)
The solution here is to enable AltiVec locally, for the actual AltiVec code.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Tue, 09 May 2006 15:09:47 +0000 |
parents | 2d6dc7de1145 |
children | 393092a3ebf6 |
rev | line source |
---|---|
0 | 1 ; |
2 ; x86 format converters for HERMES | |
3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) | |
4 ; This source code is licensed under the GNU LGPL | |
5 ; | |
6 ; Please refer to the file COPYING.LIB contained in the distribution for | |
7 ; licensing conditions | |
8 ; | |
9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission | |
10 ; | |
11 | |
12 | |
13 BITS 32 | |
14 | |
15 GLOBAL _ConvertX86p32_32BGR888 | |
16 GLOBAL _ConvertX86p32_32RGBA888 | |
17 GLOBAL _ConvertX86p32_32BGRA888 | |
18 GLOBAL _ConvertX86p32_24RGB888 | |
19 GLOBAL _ConvertX86p32_24BGR888 | |
20 GLOBAL _ConvertX86p32_16RGB565 | |
21 GLOBAL _ConvertX86p32_16BGR565 | |
22 GLOBAL _ConvertX86p32_16RGB555 | |
23 GLOBAL _ConvertX86p32_16BGR555 | |
24 GLOBAL _ConvertX86p32_8RGB332 | |
25 | |
26 EXTERN _x86return | |
1166
da33b7e6d181
Date: Tue, 1 Nov 2005 20:25:10 +0100
Sam Lantinga <slouken@libsdl.org>
parents:
0
diff
changeset
|
27 |
0 | 28 SECTION .text |
29 | |
30 ;; _Convert_* | |
31 ;; Paramters: | |
32 ;; ESI = source | |
33 ;; EDI = dest | |
34 ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) | |
35 ;; Destroys: | |
36 ;; EAX, EBX, EDX | |
37 | |
38 | |
39 _ConvertX86p32_32BGR888: | |
40 | |
41 ; check short | |
42 cmp ecx,BYTE 32 | |
43 ja .L3 | |
44 | |
45 .L1 ; short loop | |
46 mov edx,[esi] | |
47 bswap edx | |
48 ror edx,8 | |
49 mov [edi],edx | |
50 add esi,BYTE 4 | |
51 add edi,BYTE 4 | |
52 dec ecx | |
53 jnz .L1 | |
54 .L2 | |
55 jmp _x86return | |
56 | |
57 .L3 ; save ebp | |
58 push ebp | |
59 | |
60 ; unroll four times | |
61 mov ebp,ecx | |
62 shr ebp,2 | |
63 | |
64 ; save count | |
65 push ecx | |
66 | |
67 .L4 mov eax,[esi] | |
68 mov ebx,[esi+4] | |
69 | |
70 bswap eax | |
71 | |
72 bswap ebx | |
73 | |
74 ror eax,8 | |
75 mov ecx,[esi+8] | |
76 | |
77 ror ebx,8 | |
78 mov edx,[esi+12] | |
79 | |
80 bswap ecx | |
81 | |
82 bswap edx | |
83 | |
84 ror ecx,8 | |
85 mov [edi+0],eax | |
86 | |
87 ror edx,8 | |
88 mov [edi+4],ebx | |
89 | |
90 mov [edi+8],ecx | |
91 mov [edi+12],edx | |
92 | |
93 add esi,BYTE 16 | |
94 add edi,BYTE 16 | |
95 | |
96 dec ebp | |
97 jnz .L4 | |
98 | |
99 ; check tail | |
100 pop ecx | |
101 and ecx,BYTE 11b | |
102 jz .L6 | |
103 | |
104 .L5 ; tail loop | |
105 mov edx,[esi] | |
106 bswap edx | |
107 ror edx,8 | |
108 mov [edi],edx | |
109 add esi,BYTE 4 | |
110 add edi,BYTE 4 | |
111 dec ecx | |
112 jnz .L5 | |
113 | |
114 .L6 pop ebp | |
115 jmp _x86return | |
116 | |
117 | |
118 | |
119 | |
120 _ConvertX86p32_32RGBA888: | |
121 | |
122 ; check short | |
123 cmp ecx,BYTE 32 | |
124 ja .L3 | |
125 | |
126 .L1 ; short loop | |
127 mov edx,[esi] | |
128 rol edx,8 | |
129 mov [edi],edx | |
130 add esi,BYTE 4 | |
131 add edi,BYTE 4 | |
132 dec ecx | |
133 jnz .L1 | |
134 .L2 | |
135 jmp _x86return | |
136 | |
137 .L3 ; save ebp | |
138 push ebp | |
139 | |
140 ; unroll four times | |
141 mov ebp,ecx | |
142 shr ebp,2 | |
143 | |
144 ; save count | |
145 push ecx | |
146 | |
147 .L4 mov eax,[esi] | |
148 mov ebx,[esi+4] | |
149 | |
150 rol eax,8 | |
151 mov ecx,[esi+8] | |
152 | |
153 rol ebx,8 | |
154 mov edx,[esi+12] | |
155 | |
156 rol ecx,8 | |
157 mov [edi+0],eax | |
158 | |
159 rol edx,8 | |
160 mov [edi+4],ebx | |
161 | |
162 mov [edi+8],ecx | |
163 mov [edi+12],edx | |
164 | |
165 add esi,BYTE 16 | |
166 add edi,BYTE 16 | |
167 | |
168 dec ebp | |
169 jnz .L4 | |
170 | |
171 ; check tail | |
172 pop ecx | |
173 and ecx,BYTE 11b | |
174 jz .L6 | |
175 | |
176 .L5 ; tail loop | |
177 mov edx,[esi] | |
178 rol edx,8 | |
179 mov [edi],edx | |
180 add esi,BYTE 4 | |
181 add edi,BYTE 4 | |
182 dec ecx | |
183 jnz .L5 | |
184 | |
185 .L6 pop ebp | |
186 jmp _x86return | |
187 | |
188 | |
189 | |
190 | |
191 _ConvertX86p32_32BGRA888: | |
192 | |
193 ; check short | |
194 cmp ecx,BYTE 32 | |
195 ja .L3 | |
196 | |
197 .L1 ; short loop | |
198 mov edx,[esi] | |
199 bswap edx | |
200 mov [edi],edx | |
201 add esi,BYTE 4 | |
202 add edi,BYTE 4 | |
203 dec ecx | |
204 jnz .L1 | |
205 .L2 | |
206 jmp _x86return | |
207 | |
208 .L3 ; save ebp | |
209 push ebp | |
210 | |
211 ; unroll four times | |
212 mov ebp,ecx | |
213 shr ebp,2 | |
214 | |
215 ; save count | |
216 push ecx | |
217 | |
218 .L4 mov eax,[esi] | |
219 mov ebx,[esi+4] | |
220 | |
221 mov ecx,[esi+8] | |
222 mov edx,[esi+12] | |
223 | |
224 bswap eax | |
225 | |
226 bswap ebx | |
227 | |
228 bswap ecx | |
229 | |
230 bswap edx | |
231 | |
232 mov [edi+0],eax | |
233 mov [edi+4],ebx | |
234 | |
235 mov [edi+8],ecx | |
236 mov [edi+12],edx | |
237 | |
238 add esi,BYTE 16 | |
239 add edi,BYTE 16 | |
240 | |
241 dec ebp | |
242 jnz .L4 | |
243 | |
244 ; check tail | |
245 pop ecx | |
246 and ecx,BYTE 11b | |
247 jz .L6 | |
248 | |
249 .L5 ; tail loop | |
250 mov edx,[esi] | |
251 bswap edx | |
252 mov [edi],edx | |
253 add esi,BYTE 4 | |
254 add edi,BYTE 4 | |
255 dec ecx | |
256 jnz .L5 | |
257 | |
258 .L6 pop ebp | |
259 jmp _x86return | |
260 | |
261 | |
262 | |
263 | |
264 ;; 32 bit RGB 888 to 24 BIT RGB 888 | |
265 | |
266 _ConvertX86p32_24RGB888: | |
267 | |
268 ; check short | |
269 cmp ecx,BYTE 32 | |
270 ja .L3 | |
271 | |
272 .L1 ; short loop | |
273 mov al,[esi] | |
274 mov bl,[esi+1] | |
275 mov dl,[esi+2] | |
276 mov [edi],al | |
277 mov [edi+1],bl | |
278 mov [edi+2],dl | |
279 add esi,BYTE 4 | |
280 add edi,BYTE 3 | |
281 dec ecx | |
282 jnz .L1 | |
283 .L2 | |
284 jmp _x86return | |
285 | |
286 .L3 ; head | |
287 mov edx,edi | |
288 and edx,BYTE 11b | |
289 jz .L4 | |
290 mov al,[esi] | |
291 mov bl,[esi+1] | |
292 mov dl,[esi+2] | |
293 mov [edi],al | |
294 mov [edi+1],bl | |
295 mov [edi+2],dl | |
296 add esi,BYTE 4 | |
297 add edi,BYTE 3 | |
298 dec ecx | |
299 jmp SHORT .L3 | |
300 | |
301 .L4 ; unroll 4 times | |
302 push ebp | |
303 mov ebp,ecx | |
304 shr ebp,2 | |
305 | |
306 ; save count | |
307 push ecx | |
308 | |
309 .L5 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
310 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
311 | |
312 shl eax,8 ; eax = [R][G][B][.] | |
313 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
314 | |
315 shl ebx,8 ; ebx = [r][g][b][.] | |
316 mov al,[esi+4] ; eax = [R][G][B][b] | |
317 | |
318 ror eax,8 ; eax = [b][R][G][B] (done) | |
319 mov bh,[esi+8+1] ; ebx = [r][g][G][.] | |
320 | |
321 mov [edi],eax | |
322 add edi,BYTE 3*4 | |
323 | |
324 shl ecx,8 ; ecx = [r][g][b][.] | |
325 mov bl,[esi+8+0] ; ebx = [r][g][G][B] | |
326 | |
327 rol ebx,16 ; ebx = [G][B][r][g] (done) | |
328 mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) | |
329 | |
330 mov [edi+4-3*4],ebx | |
331 add esi,BYTE 4*4 | |
332 | |
333 mov [edi+8-3*4],ecx | |
334 dec ebp | |
335 | |
336 jnz .L5 | |
337 | |
338 ; check tail | |
339 pop ecx | |
340 and ecx,BYTE 11b | |
341 jz .L7 | |
342 | |
343 .L6 ; tail loop | |
344 mov al,[esi] | |
345 mov bl,[esi+1] | |
346 mov dl,[esi+2] | |
347 mov [edi],al | |
348 mov [edi+1],bl | |
349 mov [edi+2],dl | |
350 add esi,BYTE 4 | |
351 add edi,BYTE 3 | |
352 dec ecx | |
353 jnz .L6 | |
354 | |
355 .L7 pop ebp | |
356 jmp _x86return | |
357 | |
358 | |
359 | |
360 | |
361 ;; 32 bit RGB 888 to 24 bit BGR 888 | |
362 | |
363 _ConvertX86p32_24BGR888: | |
364 | |
365 ; check short | |
366 cmp ecx,BYTE 32 | |
367 ja .L3 | |
368 | |
369 | |
370 .L1 ; short loop | |
371 mov dl,[esi] | |
372 mov bl,[esi+1] | |
373 mov al,[esi+2] | |
374 mov [edi],al | |
375 mov [edi+1],bl | |
376 mov [edi+2],dl | |
377 add esi,BYTE 4 | |
378 add edi,BYTE 3 | |
379 dec ecx | |
380 jnz .L1 | |
381 .L2 | |
382 jmp _x86return | |
383 | |
384 .L3 ; head | |
385 mov edx,edi | |
386 and edx,BYTE 11b | |
387 jz .L4 | |
388 mov dl,[esi] | |
389 mov bl,[esi+1] | |
390 mov al,[esi+2] | |
391 mov [edi],al | |
392 mov [edi+1],bl | |
393 mov [edi+2],dl | |
394 add esi,BYTE 4 | |
395 add edi,BYTE 3 | |
396 dec ecx | |
397 jmp SHORT .L3 | |
398 | |
399 .L4 ; unroll 4 times | |
400 push ebp | |
401 mov ebp,ecx | |
402 shr ebp,2 | |
403 | |
404 ; save count | |
405 push ecx | |
406 | |
407 .L5 | |
408 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
409 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
410 | |
411 bswap eax ; eax = [B][G][R][A] | |
412 | |
413 bswap ebx ; ebx = [b][g][r][a] | |
414 | |
415 mov al,[esi+4+2] ; eax = [B][G][R][r] | |
416 mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] | |
417 | |
418 ror eax,8 ; eax = [r][B][G][R] (done) | |
419 mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] | |
420 | |
421 ror ebx,16 ; ebx = [G][R][b][g] (done) | |
422 mov [edi],eax | |
423 | |
424 mov [edi+4],ebx | |
425 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
426 | |
427 bswap ecx ; ecx = [b][g][r][a] | |
428 | |
429 mov cl,[esi+8] ; ecx = [b][g][r][B] (done) | |
430 add esi,BYTE 4*4 | |
431 | |
432 mov [edi+8],ecx | |
433 add edi,BYTE 3*4 | |
434 | |
435 dec ebp | |
436 jnz .L5 | |
437 | |
438 ; check tail | |
439 pop ecx | |
440 and ecx,BYTE 11b | |
441 jz .L7 | |
442 | |
443 .L6 ; tail loop | |
444 mov dl,[esi] | |
445 mov bl,[esi+1] | |
446 mov al,[esi+2] | |
447 mov [edi],al | |
448 mov [edi+1],bl | |
449 mov [edi+2],dl | |
450 add esi,BYTE 4 | |
451 add edi,BYTE 3 | |
452 dec ecx | |
453 jnz .L6 | |
454 | |
455 .L7 | |
456 pop ebp | |
457 jmp _x86return | |
458 | |
459 | |
460 | |
461 | |
462 ;; 32 bit RGB 888 to 16 BIT RGB 565 | |
463 | |
464 _ConvertX86p32_16RGB565: | |
465 ; check short | |
466 cmp ecx,BYTE 16 | |
467 ja .L3 | |
468 | |
469 .L1 ; short loop | |
470 mov bl,[esi+0] ; blue | |
471 mov al,[esi+1] ; green | |
472 mov ah,[esi+2] ; red | |
473 shr ah,3 | |
474 and al,11111100b | |
475 shl eax,3 | |
476 shr bl,3 | |
477 add al,bl | |
478 mov [edi+0],al | |
479 mov [edi+1],ah | |
480 add esi,BYTE 4 | |
481 add edi,BYTE 2 | |
482 dec ecx | |
483 jnz .L1 | |
484 | |
485 .L2: ; End of short loop | |
486 jmp _x86return | |
487 | |
488 | |
489 .L3 ; head | |
490 mov ebx,edi | |
491 and ebx,BYTE 11b | |
492 jz .L4 | |
493 | |
494 mov bl,[esi+0] ; blue | |
495 mov al,[esi+1] ; green | |
496 mov ah,[esi+2] ; red | |
497 shr ah,3 | |
498 and al,11111100b | |
499 shl eax,3 | |
500 shr bl,3 | |
501 add al,bl | |
502 mov [edi+0],al | |
503 mov [edi+1],ah | |
504 add esi,BYTE 4 | |
505 add edi,BYTE 2 | |
506 dec ecx | |
507 | |
508 .L4: | |
509 ; save count | |
510 push ecx | |
511 | |
512 ; unroll twice | |
513 shr ecx,1 | |
514 | |
515 ; point arrays to end | |
516 lea esi,[esi+ecx*8] | |
517 lea edi,[edi+ecx*4] | |
518 | |
519 ; negative counter | |
520 neg ecx | |
521 jmp SHORT .L6 | |
522 | |
523 .L5: | |
524 mov [edi+ecx*4-4],eax | |
525 .L6: | |
526 mov eax,[esi+ecx*8] | |
527 | |
528 shr ah,2 | |
529 mov ebx,[esi+ecx*8+4] | |
530 | |
531 shr eax,3 | |
532 mov edx,[esi+ecx*8+4] | |
533 | |
534 shr bh,2 | |
535 mov dl,[esi+ecx*8+2] | |
536 | |
537 shl ebx,13 | |
538 and eax,000007FFh | |
539 | |
540 shl edx,8 | |
541 and ebx,07FF0000h | |
542 | |
543 and edx,0F800F800h | |
544 add eax,ebx | |
545 | |
546 add eax,edx | |
547 inc ecx | |
548 | |
549 jnz .L5 | |
550 | |
551 mov [edi+ecx*4-4],eax | |
552 | |
553 ; tail | |
554 pop ecx | |
555 test cl,1 | |
556 jz .L7 | |
557 | |
558 mov bl,[esi+0] ; blue | |
559 mov al,[esi+1] ; green | |
560 mov ah,[esi+2] ; red | |
561 shr ah,3 | |
562 and al,11111100b | |
563 shl eax,3 | |
564 shr bl,3 | |
565 add al,bl | |
566 mov [edi+0],al | |
567 mov [edi+1],ah | |
568 add esi,BYTE 4 | |
569 add edi,BYTE 2 | |
570 | |
571 .L7: | |
572 jmp _x86return | |
573 | |
574 | |
575 | |
576 | |
577 ;; 32 bit RGB 888 to 16 BIT BGR 565 | |
578 | |
579 _ConvertX86p32_16BGR565: | |
580 | |
581 ; check short | |
582 cmp ecx,BYTE 16 | |
583 ja .L3 | |
584 | |
585 .L1 ; short loop | |
586 mov ah,[esi+0] ; blue | |
587 mov al,[esi+1] ; green | |
588 mov bl,[esi+2] ; red | |
589 shr ah,3 | |
590 and al,11111100b | |
591 shl eax,3 | |
592 shr bl,3 | |
593 add al,bl | |
594 mov [edi+0],al | |
595 mov [edi+1],ah | |
596 add esi,BYTE 4 | |
597 add edi,BYTE 2 | |
598 dec ecx | |
599 jnz .L1 | |
600 .L2 | |
601 jmp _x86return | |
602 | |
603 .L3 ; head | |
604 mov ebx,edi | |
605 and ebx,BYTE 11b | |
606 jz .L4 | |
607 mov ah,[esi+0] ; blue | |
608 mov al,[esi+1] ; green | |
609 mov bl,[esi+2] ; red | |
610 shr ah,3 | |
611 and al,11111100b | |
612 shl eax,3 | |
613 shr bl,3 | |
614 add al,bl | |
615 mov [edi+0],al | |
616 mov [edi+1],ah | |
617 add esi,BYTE 4 | |
618 add edi,BYTE 2 | |
619 dec ecx | |
620 | |
621 .L4 ; save count | |
622 push ecx | |
623 | |
624 ; unroll twice | |
625 shr ecx,1 | |
626 | |
627 ; point arrays to end | |
628 lea esi,[esi+ecx*8] | |
629 lea edi,[edi+ecx*4] | |
630 | |
631 ; negative count | |
632 neg ecx | |
633 jmp SHORT .L6 | |
634 | |
635 .L5 | |
636 mov [edi+ecx*4-4],eax | |
637 .L6 | |
638 mov edx,[esi+ecx*8+4] | |
639 | |
640 mov bh,[esi+ecx*8+4] | |
641 mov ah,[esi+ecx*8] | |
642 | |
643 shr bh,3 | |
644 mov al,[esi+ecx*8+1] | |
645 | |
646 shr ah,3 | |
647 mov bl,[esi+ecx*8+5] | |
648 | |
649 shl eax,3 | |
650 mov dl,[esi+ecx*8+2] | |
651 | |
652 shl ebx,19 | |
653 and eax,0000FFE0h | |
654 | |
655 shr edx,3 | |
656 and ebx,0FFE00000h | |
657 | |
658 and edx,001F001Fh | |
659 add eax,ebx | |
660 | |
661 add eax,edx | |
662 inc ecx | |
663 | |
664 jnz .L5 | |
665 | |
666 mov [edi+ecx*4-4],eax | |
667 | |
668 ; tail | |
669 pop ecx | |
670 and ecx,BYTE 1 | |
671 jz .L7 | |
672 mov ah,[esi+0] ; blue | |
673 mov al,[esi+1] ; green | |
674 mov bl,[esi+2] ; red | |
675 shr ah,3 | |
676 and al,11111100b | |
677 shl eax,3 | |
678 shr bl,3 | |
679 add al,bl | |
680 mov [edi+0],al | |
681 mov [edi+1],ah | |
682 add esi,BYTE 4 | |
683 add edi,BYTE 2 | |
684 | |
685 .L7 | |
686 jmp _x86return | |
687 | |
688 | |
689 | |
690 | |
691 ;; 32 BIT RGB TO 16 BIT RGB 555 | |
692 | |
693 _ConvertX86p32_16RGB555: | |
694 | |
695 ; check short | |
696 cmp ecx,BYTE 16 | |
697 ja .L3 | |
698 | |
699 .L1 ; short loop | |
700 mov bl,[esi+0] ; blue | |
701 mov al,[esi+1] ; green | |
702 mov ah,[esi+2] ; red | |
703 shr ah,3 | |
704 and al,11111000b | |
705 shl eax,2 | |
706 shr bl,3 | |
707 add al,bl | |
708 mov [edi+0],al | |
709 mov [edi+1],ah | |
710 add esi,BYTE 4 | |
711 add edi,BYTE 2 | |
712 dec ecx | |
713 jnz .L1 | |
714 .L2 | |
715 jmp _x86return | |
716 | |
717 .L3 ; head | |
718 mov ebx,edi | |
719 and ebx,BYTE 11b | |
720 jz .L4 | |
721 mov bl,[esi+0] ; blue | |
722 mov al,[esi+1] ; green | |
723 mov ah,[esi+2] ; red | |
724 shr ah,3 | |
725 and al,11111000b | |
726 shl eax,2 | |
727 shr bl,3 | |
728 add al,bl | |
729 mov [edi+0],al | |
730 mov [edi+1],ah | |
731 add esi,BYTE 4 | |
732 add edi,BYTE 2 | |
733 dec ecx | |
734 | |
735 .L4 ; save count | |
736 push ecx | |
737 | |
738 ; unroll twice | |
739 shr ecx,1 | |
740 | |
741 ; point arrays to end | |
742 lea esi,[esi+ecx*8] | |
743 lea edi,[edi+ecx*4] | |
744 | |
745 ; negative counter | |
746 neg ecx | |
747 jmp SHORT .L6 | |
748 | |
749 .L5 | |
750 mov [edi+ecx*4-4],eax | |
751 .L6 | |
752 mov eax,[esi+ecx*8] | |
753 | |
754 shr ah,3 | |
755 mov ebx,[esi+ecx*8+4] | |
756 | |
757 shr eax,3 | |
758 mov edx,[esi+ecx*8+4] | |
759 | |
760 shr bh,3 | |
761 mov dl,[esi+ecx*8+2] | |
762 | |
763 shl ebx,13 | |
764 and eax,000007FFh | |
765 | |
766 shl edx,7 | |
767 and ebx,07FF0000h | |
768 | |
769 and edx,07C007C00h | |
770 add eax,ebx | |
771 | |
772 add eax,edx | |
773 inc ecx | |
774 | |
775 jnz .L5 | |
776 | |
777 mov [edi+ecx*4-4],eax | |
778 | |
779 ; tail | |
780 pop ecx | |
781 and ecx,BYTE 1 | |
782 jz .L7 | |
783 mov bl,[esi+0] ; blue | |
784 mov al,[esi+1] ; green | |
785 mov ah,[esi+2] ; red | |
786 shr ah,3 | |
787 and al,11111000b | |
788 shl eax,2 | |
789 shr bl,3 | |
790 add al,bl | |
791 mov [edi+0],al | |
792 mov [edi+1],ah | |
793 add esi,BYTE 4 | |
794 add edi,BYTE 2 | |
795 | |
796 .L7 | |
797 jmp _x86return | |
798 | |
799 | |
800 | |
801 | |
802 ;; 32 BIT RGB TO 16 BIT BGR 555 | |
803 | |
804 _ConvertX86p32_16BGR555: | |
805 | |
806 ; check short | |
807 cmp ecx,BYTE 16 | |
808 ja .L3 | |
809 | |
810 | |
811 .L1 ; short loop | |
812 mov ah,[esi+0] ; blue | |
813 mov al,[esi+1] ; green | |
814 mov bl,[esi+2] ; red | |
815 shr ah,3 | |
816 and al,11111000b | |
817 shl eax,2 | |
818 shr bl,3 | |
819 add al,bl | |
820 mov [edi+0],al | |
821 mov [edi+1],ah | |
822 add esi,BYTE 4 | |
823 add edi,BYTE 2 | |
824 dec ecx | |
825 jnz .L1 | |
826 .L2 | |
827 jmp _x86return | |
828 | |
829 .L3 ; head | |
830 mov ebx,edi | |
831 and ebx,BYTE 11b | |
832 jz .L4 | |
833 mov ah,[esi+0] ; blue | |
834 mov al,[esi+1] ; green | |
835 mov bl,[esi+2] ; red | |
836 shr ah,3 | |
837 and al,11111000b | |
838 shl eax,2 | |
839 shr bl,3 | |
840 add al,bl | |
841 mov [edi+0],al | |
842 mov [edi+1],ah | |
843 add esi,BYTE 4 | |
844 add edi,BYTE 2 | |
845 dec ecx | |
846 | |
847 .L4 ; save count | |
848 push ecx | |
849 | |
850 ; unroll twice | |
851 shr ecx,1 | |
852 | |
853 ; point arrays to end | |
854 lea esi,[esi+ecx*8] | |
855 lea edi,[edi+ecx*4] | |
856 | |
857 ; negative counter | |
858 neg ecx | |
859 jmp SHORT .L6 | |
860 | |
861 .L5 | |
862 mov [edi+ecx*4-4],eax | |
863 .L6 | |
864 mov edx,[esi+ecx*8+4] | |
865 | |
866 mov bh,[esi+ecx*8+4] | |
867 mov ah,[esi+ecx*8] | |
868 | |
869 shr bh,3 | |
870 mov al,[esi+ecx*8+1] | |
871 | |
872 shr ah,3 | |
873 mov bl,[esi+ecx*8+5] | |
874 | |
875 shl eax,2 | |
876 mov dl,[esi+ecx*8+2] | |
877 | |
878 shl ebx,18 | |
879 and eax,00007FE0h | |
880 | |
881 shr edx,3 | |
882 and ebx,07FE00000h | |
883 | |
884 and edx,001F001Fh | |
885 add eax,ebx | |
886 | |
887 add eax,edx | |
888 inc ecx | |
889 | |
890 jnz .L5 | |
891 | |
892 mov [edi+ecx*4-4],eax | |
893 | |
894 ; tail | |
895 pop ecx | |
896 and ecx,BYTE 1 | |
897 jz .L7 | |
898 mov ah,[esi+0] ; blue | |
899 mov al,[esi+1] ; green | |
900 mov bl,[esi+2] ; red | |
901 shr ah,3 | |
902 and al,11111000b | |
903 shl eax,2 | |
904 shr bl,3 | |
905 add al,bl | |
906 mov [edi+0],al | |
907 mov [edi+1],ah | |
908 add esi,BYTE 4 | |
909 add edi,BYTE 2 | |
910 | |
911 .L7 | |
912 jmp _x86return | |
913 | |
914 | |
915 | |
916 | |
917 | |
918 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) | |
919 ;; This routine writes FOUR pixels at once (dword) and then, if they exist | |
920 ;; the trailing three pixels | |
921 _ConvertX86p32_8RGB332: | |
922 | |
923 | |
924 .L_ALIGNED | |
925 push ecx | |
926 | |
927 shr ecx,2 ; We will draw 4 pixels at once | |
928 jnz .L1 | |
929 | |
930 jmp .L2 ; short jump out of range :( | |
931 | |
932 .L1: | |
933 mov eax,[esi] ; first pair of pixels | |
934 mov edx,[esi+4] | |
935 | |
936 shr dl,6 | |
937 mov ebx,eax | |
938 | |
939 shr al,6 | |
940 and ah,0e0h | |
941 | |
942 shr ebx,16 | |
943 and dh,0e0h | |
944 | |
945 shr ah,3 | |
946 and bl,0e0h | |
947 | |
948 shr dh,3 | |
949 | |
950 or al,bl | |
951 | |
952 mov ebx,edx | |
953 or al,ah | |
954 | |
955 shr ebx,16 | |
956 or dl,dh | |
957 | |
958 and bl,0e0h | |
959 | |
960 or dl,bl | |
961 | |
962 mov ah,dl | |
963 | |
964 | |
965 | |
966 mov ebx,[esi+8] ; second pair of pixels | |
967 | |
968 mov edx,ebx | |
969 and bh,0e0h | |
970 | |
971 shr bl,6 | |
972 and edx,0e00000h | |
973 | |
974 shr edx,16 | |
975 | |
976 shr bh,3 | |
977 | |
978 ror eax,16 | |
979 or bl,dl | |
980 | |
981 mov edx,[esi+12] | |
982 or bl,bh | |
983 | |
984 mov al,bl | |
985 | |
986 mov ebx,edx | |
987 and dh,0e0h | |
988 | |
989 shr dl,6 | |
990 and ebx,0e00000h | |
991 | |
992 shr dh,3 | |
993 mov ah,dl | |
994 | |
995 shr ebx,16 | |
996 or ah,dh | |
997 | |
998 or ah,bl | |
999 | |
1000 rol eax,16 | |
1001 add esi,BYTE 16 | |
1002 | |
1003 mov [edi],eax | |
1004 add edi,BYTE 4 | |
1005 | |
1006 dec ecx | |
1007 jz .L2 ; L1 out of range for short jump :( | |
1008 | |
1009 jmp .L1 | |
1010 .L2: | |
1011 | |
1012 pop ecx | |
1013 and ecx,BYTE 3 ; mask out number of pixels to draw | |
1014 | |
1015 jz .L4 ; Nothing to do anymore | |
1016 | |
1017 .L3: | |
1018 mov eax,[esi] ; single pixel conversion for trailing pixels | |
1019 | |
1020 mov ebx,eax | |
1021 | |
1022 shr al,6 | |
1023 and ah,0e0h | |
1024 | |
1025 shr ebx,16 | |
1026 | |
1027 shr ah,3 | |
1028 and bl,0e0h | |
1029 | |
1030 or al,ah | |
1031 or al,bl | |
1032 | |
1033 mov [edi],al | |
1034 | |
1035 inc edi | |
1036 add esi,BYTE 4 | |
1037 | |
1038 dec ecx | |
1039 jnz .L3 | |
1040 | |
1041 .L4: | |
1042 jmp _x86return | |
1199
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1043 |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1044 %ifidn __OUTPUT_FORMAT__,elf |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1045 section .note.GNU-stack noalloc noexec nowrite progbits |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1046 %endif |