Mercurial > sdl-ios-xcode
annotate src/hermes/x86p_32.asm @ 1901:f1828a500391
Removed libc dependency on Windows again, to fix building with Visual C++ 2005 Express Edition.
Fixed performance problem with testsprite2 on the D3D driver.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Thu, 13 Jul 2006 08:13:02 +0000 |
parents | eb4d9d99849b |
children | 180fa05e98e2 |
rev | line source |
---|---|
0 | 1 ; |
2 ; x86 format converters for HERMES | |
3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) | |
4 ; This source code is licensed under the GNU LGPL | |
5 ; | |
6 ; Please refer to the file COPYING.LIB contained in the distribution for | |
7 ; licensing conditions | |
8 ; | |
9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission | |
10 ; | |
11 | |
12 BITS 32 | |
13 | |
1873
eb4d9d99849b
Renamed, per Mike's comment on bug #157
Sam Lantinga <slouken@libsdl.org>
parents:
1871
diff
changeset
|
14 %include "common.inc" |
1871 | 15 |
16 SDL_FUNC _ConvertX86p32_32BGR888 | |
17 SDL_FUNC _ConvertX86p32_32RGBA888 | |
18 SDL_FUNC _ConvertX86p32_32BGRA888 | |
19 SDL_FUNC _ConvertX86p32_24RGB888 | |
20 SDL_FUNC _ConvertX86p32_24BGR888 | |
21 SDL_FUNC _ConvertX86p32_16RGB565 | |
22 SDL_FUNC _ConvertX86p32_16BGR565 | |
23 SDL_FUNC _ConvertX86p32_16RGB555 | |
24 SDL_FUNC _ConvertX86p32_16BGR555 | |
25 SDL_FUNC _ConvertX86p32_8RGB332 | |
0 | 26 |
27 EXTERN _x86return | |
1166
da33b7e6d181
Date: Tue, 1 Nov 2005 20:25:10 +0100
Sam Lantinga <slouken@libsdl.org>
parents:
0
diff
changeset
|
28 |
0 | 29 SECTION .text |
30 | |
31 ;; _Convert_* | |
32 ;; Paramters: | |
33 ;; ESI = source | |
34 ;; EDI = dest | |
35 ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) | |
36 ;; Destroys: | |
37 ;; EAX, EBX, EDX | |
38 | |
39 | |
40 _ConvertX86p32_32BGR888: | |
41 | |
42 ; check short | |
43 cmp ecx,BYTE 32 | |
44 ja .L3 | |
45 | |
46 .L1 ; short loop | |
47 mov edx,[esi] | |
48 bswap edx | |
49 ror edx,8 | |
50 mov [edi],edx | |
51 add esi,BYTE 4 | |
52 add edi,BYTE 4 | |
53 dec ecx | |
54 jnz .L1 | |
55 .L2 | |
56 jmp _x86return | |
57 | |
58 .L3 ; save ebp | |
59 push ebp | |
60 | |
61 ; unroll four times | |
62 mov ebp,ecx | |
63 shr ebp,2 | |
64 | |
65 ; save count | |
66 push ecx | |
67 | |
68 .L4 mov eax,[esi] | |
69 mov ebx,[esi+4] | |
70 | |
71 bswap eax | |
72 | |
73 bswap ebx | |
74 | |
75 ror eax,8 | |
76 mov ecx,[esi+8] | |
77 | |
78 ror ebx,8 | |
79 mov edx,[esi+12] | |
80 | |
81 bswap ecx | |
82 | |
83 bswap edx | |
84 | |
85 ror ecx,8 | |
86 mov [edi+0],eax | |
87 | |
88 ror edx,8 | |
89 mov [edi+4],ebx | |
90 | |
91 mov [edi+8],ecx | |
92 mov [edi+12],edx | |
93 | |
94 add esi,BYTE 16 | |
95 add edi,BYTE 16 | |
96 | |
97 dec ebp | |
98 jnz .L4 | |
99 | |
100 ; check tail | |
101 pop ecx | |
102 and ecx,BYTE 11b | |
103 jz .L6 | |
104 | |
105 .L5 ; tail loop | |
106 mov edx,[esi] | |
107 bswap edx | |
108 ror edx,8 | |
109 mov [edi],edx | |
110 add esi,BYTE 4 | |
111 add edi,BYTE 4 | |
112 dec ecx | |
113 jnz .L5 | |
114 | |
115 .L6 pop ebp | |
116 jmp _x86return | |
117 | |
118 | |
119 | |
120 | |
121 _ConvertX86p32_32RGBA888: | |
122 | |
123 ; check short | |
124 cmp ecx,BYTE 32 | |
125 ja .L3 | |
126 | |
127 .L1 ; short loop | |
128 mov edx,[esi] | |
129 rol edx,8 | |
130 mov [edi],edx | |
131 add esi,BYTE 4 | |
132 add edi,BYTE 4 | |
133 dec ecx | |
134 jnz .L1 | |
135 .L2 | |
136 jmp _x86return | |
137 | |
138 .L3 ; save ebp | |
139 push ebp | |
140 | |
141 ; unroll four times | |
142 mov ebp,ecx | |
143 shr ebp,2 | |
144 | |
145 ; save count | |
146 push ecx | |
147 | |
148 .L4 mov eax,[esi] | |
149 mov ebx,[esi+4] | |
150 | |
151 rol eax,8 | |
152 mov ecx,[esi+8] | |
153 | |
154 rol ebx,8 | |
155 mov edx,[esi+12] | |
156 | |
157 rol ecx,8 | |
158 mov [edi+0],eax | |
159 | |
160 rol edx,8 | |
161 mov [edi+4],ebx | |
162 | |
163 mov [edi+8],ecx | |
164 mov [edi+12],edx | |
165 | |
166 add esi,BYTE 16 | |
167 add edi,BYTE 16 | |
168 | |
169 dec ebp | |
170 jnz .L4 | |
171 | |
172 ; check tail | |
173 pop ecx | |
174 and ecx,BYTE 11b | |
175 jz .L6 | |
176 | |
177 .L5 ; tail loop | |
178 mov edx,[esi] | |
179 rol edx,8 | |
180 mov [edi],edx | |
181 add esi,BYTE 4 | |
182 add edi,BYTE 4 | |
183 dec ecx | |
184 jnz .L5 | |
185 | |
186 .L6 pop ebp | |
187 jmp _x86return | |
188 | |
189 | |
190 | |
191 | |
192 _ConvertX86p32_32BGRA888: | |
193 | |
194 ; check short | |
195 cmp ecx,BYTE 32 | |
196 ja .L3 | |
197 | |
198 .L1 ; short loop | |
199 mov edx,[esi] | |
200 bswap edx | |
201 mov [edi],edx | |
202 add esi,BYTE 4 | |
203 add edi,BYTE 4 | |
204 dec ecx | |
205 jnz .L1 | |
206 .L2 | |
207 jmp _x86return | |
208 | |
209 .L3 ; save ebp | |
210 push ebp | |
211 | |
212 ; unroll four times | |
213 mov ebp,ecx | |
214 shr ebp,2 | |
215 | |
216 ; save count | |
217 push ecx | |
218 | |
219 .L4 mov eax,[esi] | |
220 mov ebx,[esi+4] | |
221 | |
222 mov ecx,[esi+8] | |
223 mov edx,[esi+12] | |
224 | |
225 bswap eax | |
226 | |
227 bswap ebx | |
228 | |
229 bswap ecx | |
230 | |
231 bswap edx | |
232 | |
233 mov [edi+0],eax | |
234 mov [edi+4],ebx | |
235 | |
236 mov [edi+8],ecx | |
237 mov [edi+12],edx | |
238 | |
239 add esi,BYTE 16 | |
240 add edi,BYTE 16 | |
241 | |
242 dec ebp | |
243 jnz .L4 | |
244 | |
245 ; check tail | |
246 pop ecx | |
247 and ecx,BYTE 11b | |
248 jz .L6 | |
249 | |
250 .L5 ; tail loop | |
251 mov edx,[esi] | |
252 bswap edx | |
253 mov [edi],edx | |
254 add esi,BYTE 4 | |
255 add edi,BYTE 4 | |
256 dec ecx | |
257 jnz .L5 | |
258 | |
259 .L6 pop ebp | |
260 jmp _x86return | |
261 | |
262 | |
263 | |
264 | |
265 ;; 32 bit RGB 888 to 24 BIT RGB 888 | |
266 | |
267 _ConvertX86p32_24RGB888: | |
268 | |
269 ; check short | |
270 cmp ecx,BYTE 32 | |
271 ja .L3 | |
272 | |
273 .L1 ; short loop | |
274 mov al,[esi] | |
275 mov bl,[esi+1] | |
276 mov dl,[esi+2] | |
277 mov [edi],al | |
278 mov [edi+1],bl | |
279 mov [edi+2],dl | |
280 add esi,BYTE 4 | |
281 add edi,BYTE 3 | |
282 dec ecx | |
283 jnz .L1 | |
284 .L2 | |
285 jmp _x86return | |
286 | |
287 .L3 ; head | |
288 mov edx,edi | |
289 and edx,BYTE 11b | |
290 jz .L4 | |
291 mov al,[esi] | |
292 mov bl,[esi+1] | |
293 mov dl,[esi+2] | |
294 mov [edi],al | |
295 mov [edi+1],bl | |
296 mov [edi+2],dl | |
297 add esi,BYTE 4 | |
298 add edi,BYTE 3 | |
299 dec ecx | |
300 jmp SHORT .L3 | |
301 | |
302 .L4 ; unroll 4 times | |
303 push ebp | |
304 mov ebp,ecx | |
305 shr ebp,2 | |
306 | |
307 ; save count | |
308 push ecx | |
309 | |
310 .L5 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
311 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
312 | |
313 shl eax,8 ; eax = [R][G][B][.] | |
314 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
315 | |
316 shl ebx,8 ; ebx = [r][g][b][.] | |
317 mov al,[esi+4] ; eax = [R][G][B][b] | |
318 | |
319 ror eax,8 ; eax = [b][R][G][B] (done) | |
320 mov bh,[esi+8+1] ; ebx = [r][g][G][.] | |
321 | |
322 mov [edi],eax | |
323 add edi,BYTE 3*4 | |
324 | |
325 shl ecx,8 ; ecx = [r][g][b][.] | |
326 mov bl,[esi+8+0] ; ebx = [r][g][G][B] | |
327 | |
328 rol ebx,16 ; ebx = [G][B][r][g] (done) | |
329 mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) | |
330 | |
331 mov [edi+4-3*4],ebx | |
332 add esi,BYTE 4*4 | |
333 | |
334 mov [edi+8-3*4],ecx | |
335 dec ebp | |
336 | |
337 jnz .L5 | |
338 | |
339 ; check tail | |
340 pop ecx | |
341 and ecx,BYTE 11b | |
342 jz .L7 | |
343 | |
344 .L6 ; tail loop | |
345 mov al,[esi] | |
346 mov bl,[esi+1] | |
347 mov dl,[esi+2] | |
348 mov [edi],al | |
349 mov [edi+1],bl | |
350 mov [edi+2],dl | |
351 add esi,BYTE 4 | |
352 add edi,BYTE 3 | |
353 dec ecx | |
354 jnz .L6 | |
355 | |
356 .L7 pop ebp | |
357 jmp _x86return | |
358 | |
359 | |
360 | |
361 | |
362 ;; 32 bit RGB 888 to 24 bit BGR 888 | |
363 | |
364 _ConvertX86p32_24BGR888: | |
365 | |
366 ; check short | |
367 cmp ecx,BYTE 32 | |
368 ja .L3 | |
369 | |
370 | |
371 .L1 ; short loop | |
372 mov dl,[esi] | |
373 mov bl,[esi+1] | |
374 mov al,[esi+2] | |
375 mov [edi],al | |
376 mov [edi+1],bl | |
377 mov [edi+2],dl | |
378 add esi,BYTE 4 | |
379 add edi,BYTE 3 | |
380 dec ecx | |
381 jnz .L1 | |
382 .L2 | |
383 jmp _x86return | |
384 | |
385 .L3 ; head | |
386 mov edx,edi | |
387 and edx,BYTE 11b | |
388 jz .L4 | |
389 mov dl,[esi] | |
390 mov bl,[esi+1] | |
391 mov al,[esi+2] | |
392 mov [edi],al | |
393 mov [edi+1],bl | |
394 mov [edi+2],dl | |
395 add esi,BYTE 4 | |
396 add edi,BYTE 3 | |
397 dec ecx | |
398 jmp SHORT .L3 | |
399 | |
400 .L4 ; unroll 4 times | |
401 push ebp | |
402 mov ebp,ecx | |
403 shr ebp,2 | |
404 | |
405 ; save count | |
406 push ecx | |
407 | |
408 .L5 | |
409 mov eax,[esi] ; first dword eax = [A][R][G][B] | |
410 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | |
411 | |
412 bswap eax ; eax = [B][G][R][A] | |
413 | |
414 bswap ebx ; ebx = [b][g][r][a] | |
415 | |
416 mov al,[esi+4+2] ; eax = [B][G][R][r] | |
417 mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] | |
418 | |
419 ror eax,8 ; eax = [r][B][G][R] (done) | |
420 mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] | |
421 | |
422 ror ebx,16 ; ebx = [G][R][b][g] (done) | |
423 mov [edi],eax | |
424 | |
425 mov [edi+4],ebx | |
426 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | |
427 | |
428 bswap ecx ; ecx = [b][g][r][a] | |
429 | |
430 mov cl,[esi+8] ; ecx = [b][g][r][B] (done) | |
431 add esi,BYTE 4*4 | |
432 | |
433 mov [edi+8],ecx | |
434 add edi,BYTE 3*4 | |
435 | |
436 dec ebp | |
437 jnz .L5 | |
438 | |
439 ; check tail | |
440 pop ecx | |
441 and ecx,BYTE 11b | |
442 jz .L7 | |
443 | |
444 .L6 ; tail loop | |
445 mov dl,[esi] | |
446 mov bl,[esi+1] | |
447 mov al,[esi+2] | |
448 mov [edi],al | |
449 mov [edi+1],bl | |
450 mov [edi+2],dl | |
451 add esi,BYTE 4 | |
452 add edi,BYTE 3 | |
453 dec ecx | |
454 jnz .L6 | |
455 | |
456 .L7 | |
457 pop ebp | |
458 jmp _x86return | |
459 | |
460 | |
461 | |
462 | |
463 ;; 32 bit RGB 888 to 16 BIT RGB 565 | |
464 | |
465 _ConvertX86p32_16RGB565: | |
466 ; check short | |
467 cmp ecx,BYTE 16 | |
468 ja .L3 | |
469 | |
470 .L1 ; short loop | |
471 mov bl,[esi+0] ; blue | |
472 mov al,[esi+1] ; green | |
473 mov ah,[esi+2] ; red | |
474 shr ah,3 | |
475 and al,11111100b | |
476 shl eax,3 | |
477 shr bl,3 | |
478 add al,bl | |
479 mov [edi+0],al | |
480 mov [edi+1],ah | |
481 add esi,BYTE 4 | |
482 add edi,BYTE 2 | |
483 dec ecx | |
484 jnz .L1 | |
485 | |
486 .L2: ; End of short loop | |
487 jmp _x86return | |
488 | |
489 | |
490 .L3 ; head | |
491 mov ebx,edi | |
492 and ebx,BYTE 11b | |
493 jz .L4 | |
494 | |
495 mov bl,[esi+0] ; blue | |
496 mov al,[esi+1] ; green | |
497 mov ah,[esi+2] ; red | |
498 shr ah,3 | |
499 and al,11111100b | |
500 shl eax,3 | |
501 shr bl,3 | |
502 add al,bl | |
503 mov [edi+0],al | |
504 mov [edi+1],ah | |
505 add esi,BYTE 4 | |
506 add edi,BYTE 2 | |
507 dec ecx | |
508 | |
509 .L4: | |
510 ; save count | |
511 push ecx | |
512 | |
513 ; unroll twice | |
514 shr ecx,1 | |
515 | |
516 ; point arrays to end | |
517 lea esi,[esi+ecx*8] | |
518 lea edi,[edi+ecx*4] | |
519 | |
520 ; negative counter | |
521 neg ecx | |
522 jmp SHORT .L6 | |
523 | |
524 .L5: | |
525 mov [edi+ecx*4-4],eax | |
526 .L6: | |
527 mov eax,[esi+ecx*8] | |
528 | |
529 shr ah,2 | |
530 mov ebx,[esi+ecx*8+4] | |
531 | |
532 shr eax,3 | |
533 mov edx,[esi+ecx*8+4] | |
534 | |
535 shr bh,2 | |
536 mov dl,[esi+ecx*8+2] | |
537 | |
538 shl ebx,13 | |
539 and eax,000007FFh | |
540 | |
541 shl edx,8 | |
542 and ebx,07FF0000h | |
543 | |
544 and edx,0F800F800h | |
545 add eax,ebx | |
546 | |
547 add eax,edx | |
548 inc ecx | |
549 | |
550 jnz .L5 | |
551 | |
552 mov [edi+ecx*4-4],eax | |
553 | |
554 ; tail | |
555 pop ecx | |
556 test cl,1 | |
557 jz .L7 | |
558 | |
559 mov bl,[esi+0] ; blue | |
560 mov al,[esi+1] ; green | |
561 mov ah,[esi+2] ; red | |
562 shr ah,3 | |
563 and al,11111100b | |
564 shl eax,3 | |
565 shr bl,3 | |
566 add al,bl | |
567 mov [edi+0],al | |
568 mov [edi+1],ah | |
569 add esi,BYTE 4 | |
570 add edi,BYTE 2 | |
571 | |
572 .L7: | |
573 jmp _x86return | |
574 | |
575 | |
576 | |
577 | |
578 ;; 32 bit RGB 888 to 16 BIT BGR 565 | |
579 | |
580 _ConvertX86p32_16BGR565: | |
581 | |
582 ; check short | |
583 cmp ecx,BYTE 16 | |
584 ja .L3 | |
585 | |
586 .L1 ; short loop | |
587 mov ah,[esi+0] ; blue | |
588 mov al,[esi+1] ; green | |
589 mov bl,[esi+2] ; red | |
590 shr ah,3 | |
591 and al,11111100b | |
592 shl eax,3 | |
593 shr bl,3 | |
594 add al,bl | |
595 mov [edi+0],al | |
596 mov [edi+1],ah | |
597 add esi,BYTE 4 | |
598 add edi,BYTE 2 | |
599 dec ecx | |
600 jnz .L1 | |
601 .L2 | |
602 jmp _x86return | |
603 | |
604 .L3 ; head | |
605 mov ebx,edi | |
606 and ebx,BYTE 11b | |
607 jz .L4 | |
608 mov ah,[esi+0] ; blue | |
609 mov al,[esi+1] ; green | |
610 mov bl,[esi+2] ; red | |
611 shr ah,3 | |
612 and al,11111100b | |
613 shl eax,3 | |
614 shr bl,3 | |
615 add al,bl | |
616 mov [edi+0],al | |
617 mov [edi+1],ah | |
618 add esi,BYTE 4 | |
619 add edi,BYTE 2 | |
620 dec ecx | |
621 | |
622 .L4 ; save count | |
623 push ecx | |
624 | |
625 ; unroll twice | |
626 shr ecx,1 | |
627 | |
628 ; point arrays to end | |
629 lea esi,[esi+ecx*8] | |
630 lea edi,[edi+ecx*4] | |
631 | |
632 ; negative count | |
633 neg ecx | |
634 jmp SHORT .L6 | |
635 | |
636 .L5 | |
637 mov [edi+ecx*4-4],eax | |
638 .L6 | |
639 mov edx,[esi+ecx*8+4] | |
640 | |
641 mov bh,[esi+ecx*8+4] | |
642 mov ah,[esi+ecx*8] | |
643 | |
644 shr bh,3 | |
645 mov al,[esi+ecx*8+1] | |
646 | |
647 shr ah,3 | |
648 mov bl,[esi+ecx*8+5] | |
649 | |
650 shl eax,3 | |
651 mov dl,[esi+ecx*8+2] | |
652 | |
653 shl ebx,19 | |
654 and eax,0000FFE0h | |
655 | |
656 shr edx,3 | |
657 and ebx,0FFE00000h | |
658 | |
659 and edx,001F001Fh | |
660 add eax,ebx | |
661 | |
662 add eax,edx | |
663 inc ecx | |
664 | |
665 jnz .L5 | |
666 | |
667 mov [edi+ecx*4-4],eax | |
668 | |
669 ; tail | |
670 pop ecx | |
671 and ecx,BYTE 1 | |
672 jz .L7 | |
673 mov ah,[esi+0] ; blue | |
674 mov al,[esi+1] ; green | |
675 mov bl,[esi+2] ; red | |
676 shr ah,3 | |
677 and al,11111100b | |
678 shl eax,3 | |
679 shr bl,3 | |
680 add al,bl | |
681 mov [edi+0],al | |
682 mov [edi+1],ah | |
683 add esi,BYTE 4 | |
684 add edi,BYTE 2 | |
685 | |
686 .L7 | |
687 jmp _x86return | |
688 | |
689 | |
690 | |
691 | |
692 ;; 32 BIT RGB TO 16 BIT RGB 555 | |
693 | |
694 _ConvertX86p32_16RGB555: | |
695 | |
696 ; check short | |
697 cmp ecx,BYTE 16 | |
698 ja .L3 | |
699 | |
700 .L1 ; short loop | |
701 mov bl,[esi+0] ; blue | |
702 mov al,[esi+1] ; green | |
703 mov ah,[esi+2] ; red | |
704 shr ah,3 | |
705 and al,11111000b | |
706 shl eax,2 | |
707 shr bl,3 | |
708 add al,bl | |
709 mov [edi+0],al | |
710 mov [edi+1],ah | |
711 add esi,BYTE 4 | |
712 add edi,BYTE 2 | |
713 dec ecx | |
714 jnz .L1 | |
715 .L2 | |
716 jmp _x86return | |
717 | |
718 .L3 ; head | |
719 mov ebx,edi | |
720 and ebx,BYTE 11b | |
721 jz .L4 | |
722 mov bl,[esi+0] ; blue | |
723 mov al,[esi+1] ; green | |
724 mov ah,[esi+2] ; red | |
725 shr ah,3 | |
726 and al,11111000b | |
727 shl eax,2 | |
728 shr bl,3 | |
729 add al,bl | |
730 mov [edi+0],al | |
731 mov [edi+1],ah | |
732 add esi,BYTE 4 | |
733 add edi,BYTE 2 | |
734 dec ecx | |
735 | |
736 .L4 ; save count | |
737 push ecx | |
738 | |
739 ; unroll twice | |
740 shr ecx,1 | |
741 | |
742 ; point arrays to end | |
743 lea esi,[esi+ecx*8] | |
744 lea edi,[edi+ecx*4] | |
745 | |
746 ; negative counter | |
747 neg ecx | |
748 jmp SHORT .L6 | |
749 | |
750 .L5 | |
751 mov [edi+ecx*4-4],eax | |
752 .L6 | |
753 mov eax,[esi+ecx*8] | |
754 | |
755 shr ah,3 | |
756 mov ebx,[esi+ecx*8+4] | |
757 | |
758 shr eax,3 | |
759 mov edx,[esi+ecx*8+4] | |
760 | |
761 shr bh,3 | |
762 mov dl,[esi+ecx*8+2] | |
763 | |
764 shl ebx,13 | |
765 and eax,000007FFh | |
766 | |
767 shl edx,7 | |
768 and ebx,07FF0000h | |
769 | |
770 and edx,07C007C00h | |
771 add eax,ebx | |
772 | |
773 add eax,edx | |
774 inc ecx | |
775 | |
776 jnz .L5 | |
777 | |
778 mov [edi+ecx*4-4],eax | |
779 | |
780 ; tail | |
781 pop ecx | |
782 and ecx,BYTE 1 | |
783 jz .L7 | |
784 mov bl,[esi+0] ; blue | |
785 mov al,[esi+1] ; green | |
786 mov ah,[esi+2] ; red | |
787 shr ah,3 | |
788 and al,11111000b | |
789 shl eax,2 | |
790 shr bl,3 | |
791 add al,bl | |
792 mov [edi+0],al | |
793 mov [edi+1],ah | |
794 add esi,BYTE 4 | |
795 add edi,BYTE 2 | |
796 | |
797 .L7 | |
798 jmp _x86return | |
799 | |
800 | |
801 | |
802 | |
803 ;; 32 BIT RGB TO 16 BIT BGR 555 | |
804 | |
805 _ConvertX86p32_16BGR555: | |
806 | |
807 ; check short | |
808 cmp ecx,BYTE 16 | |
809 ja .L3 | |
810 | |
811 | |
812 .L1 ; short loop | |
813 mov ah,[esi+0] ; blue | |
814 mov al,[esi+1] ; green | |
815 mov bl,[esi+2] ; red | |
816 shr ah,3 | |
817 and al,11111000b | |
818 shl eax,2 | |
819 shr bl,3 | |
820 add al,bl | |
821 mov [edi+0],al | |
822 mov [edi+1],ah | |
823 add esi,BYTE 4 | |
824 add edi,BYTE 2 | |
825 dec ecx | |
826 jnz .L1 | |
827 .L2 | |
828 jmp _x86return | |
829 | |
830 .L3 ; head | |
831 mov ebx,edi | |
832 and ebx,BYTE 11b | |
833 jz .L4 | |
834 mov ah,[esi+0] ; blue | |
835 mov al,[esi+1] ; green | |
836 mov bl,[esi+2] ; red | |
837 shr ah,3 | |
838 and al,11111000b | |
839 shl eax,2 | |
840 shr bl,3 | |
841 add al,bl | |
842 mov [edi+0],al | |
843 mov [edi+1],ah | |
844 add esi,BYTE 4 | |
845 add edi,BYTE 2 | |
846 dec ecx | |
847 | |
848 .L4 ; save count | |
849 push ecx | |
850 | |
851 ; unroll twice | |
852 shr ecx,1 | |
853 | |
854 ; point arrays to end | |
855 lea esi,[esi+ecx*8] | |
856 lea edi,[edi+ecx*4] | |
857 | |
858 ; negative counter | |
859 neg ecx | |
860 jmp SHORT .L6 | |
861 | |
862 .L5 | |
863 mov [edi+ecx*4-4],eax | |
864 .L6 | |
865 mov edx,[esi+ecx*8+4] | |
866 | |
867 mov bh,[esi+ecx*8+4] | |
868 mov ah,[esi+ecx*8] | |
869 | |
870 shr bh,3 | |
871 mov al,[esi+ecx*8+1] | |
872 | |
873 shr ah,3 | |
874 mov bl,[esi+ecx*8+5] | |
875 | |
876 shl eax,2 | |
877 mov dl,[esi+ecx*8+2] | |
878 | |
879 shl ebx,18 | |
880 and eax,00007FE0h | |
881 | |
882 shr edx,3 | |
883 and ebx,07FE00000h | |
884 | |
885 and edx,001F001Fh | |
886 add eax,ebx | |
887 | |
888 add eax,edx | |
889 inc ecx | |
890 | |
891 jnz .L5 | |
892 | |
893 mov [edi+ecx*4-4],eax | |
894 | |
895 ; tail | |
896 pop ecx | |
897 and ecx,BYTE 1 | |
898 jz .L7 | |
899 mov ah,[esi+0] ; blue | |
900 mov al,[esi+1] ; green | |
901 mov bl,[esi+2] ; red | |
902 shr ah,3 | |
903 and al,11111000b | |
904 shl eax,2 | |
905 shr bl,3 | |
906 add al,bl | |
907 mov [edi+0],al | |
908 mov [edi+1],ah | |
909 add esi,BYTE 4 | |
910 add edi,BYTE 2 | |
911 | |
912 .L7 | |
913 jmp _x86return | |
914 | |
915 | |
916 | |
917 | |
918 | |
919 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) | |
920 ;; This routine writes FOUR pixels at once (dword) and then, if they exist | |
921 ;; the trailing three pixels | |
922 _ConvertX86p32_8RGB332: | |
923 | |
924 | |
925 .L_ALIGNED | |
926 push ecx | |
927 | |
928 shr ecx,2 ; We will draw 4 pixels at once | |
929 jnz .L1 | |
930 | |
931 jmp .L2 ; short jump out of range :( | |
932 | |
933 .L1: | |
934 mov eax,[esi] ; first pair of pixels | |
935 mov edx,[esi+4] | |
936 | |
937 shr dl,6 | |
938 mov ebx,eax | |
939 | |
940 shr al,6 | |
941 and ah,0e0h | |
942 | |
943 shr ebx,16 | |
944 and dh,0e0h | |
945 | |
946 shr ah,3 | |
947 and bl,0e0h | |
948 | |
949 shr dh,3 | |
950 | |
951 or al,bl | |
952 | |
953 mov ebx,edx | |
954 or al,ah | |
955 | |
956 shr ebx,16 | |
957 or dl,dh | |
958 | |
959 and bl,0e0h | |
960 | |
961 or dl,bl | |
962 | |
963 mov ah,dl | |
964 | |
965 | |
966 | |
967 mov ebx,[esi+8] ; second pair of pixels | |
968 | |
969 mov edx,ebx | |
970 and bh,0e0h | |
971 | |
972 shr bl,6 | |
973 and edx,0e00000h | |
974 | |
975 shr edx,16 | |
976 | |
977 shr bh,3 | |
978 | |
979 ror eax,16 | |
980 or bl,dl | |
981 | |
982 mov edx,[esi+12] | |
983 or bl,bh | |
984 | |
985 mov al,bl | |
986 | |
987 mov ebx,edx | |
988 and dh,0e0h | |
989 | |
990 shr dl,6 | |
991 and ebx,0e00000h | |
992 | |
993 shr dh,3 | |
994 mov ah,dl | |
995 | |
996 shr ebx,16 | |
997 or ah,dh | |
998 | |
999 or ah,bl | |
1000 | |
1001 rol eax,16 | |
1002 add esi,BYTE 16 | |
1003 | |
1004 mov [edi],eax | |
1005 add edi,BYTE 4 | |
1006 | |
1007 dec ecx | |
1008 jz .L2 ; L1 out of range for short jump :( | |
1009 | |
1010 jmp .L1 | |
1011 .L2: | |
1012 | |
1013 pop ecx | |
1014 and ecx,BYTE 3 ; mask out number of pixels to draw | |
1015 | |
1016 jz .L4 ; Nothing to do anymore | |
1017 | |
1018 .L3: | |
1019 mov eax,[esi] ; single pixel conversion for trailing pixels | |
1020 | |
1021 mov ebx,eax | |
1022 | |
1023 shr al,6 | |
1024 and ah,0e0h | |
1025 | |
1026 shr ebx,16 | |
1027 | |
1028 shr ah,3 | |
1029 and bl,0e0h | |
1030 | |
1031 or al,ah | |
1032 or al,bl | |
1033 | |
1034 mov [edi],al | |
1035 | |
1036 inc edi | |
1037 add esi,BYTE 4 | |
1038 | |
1039 dec ecx | |
1040 jnz .L3 | |
1041 | |
1042 .L4: | |
1043 jmp _x86return | |
1199
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1044 |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1045 %ifidn __OUTPUT_FORMAT__,elf |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1046 section .note.GNU-stack noalloc noexec nowrite progbits |
2d6dc7de1145
From: Mike Frysinger <vapier@gentoo.org>
Ryan C. Gordon <icculus@icculus.org>
parents:
1166
diff
changeset
|
1047 %endif |