0
|
1 ;
|
|
2 ; x86 format converters for HERMES
|
|
3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
|
|
4 ; This source code is licensed under the GNU LGPL
|
|
5 ;
|
|
6 ; Please refer to the file COPYING.LIB contained in the distribution for
|
|
7 ; licensing conditions
|
|
8 ;
|
|
9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
|
|
10 ;
|
|
11
|
|
12
|
|
13 BITS 32
|
|
14
|
|
15 GLOBAL _ConvertX86p32_32BGR888
|
|
16 GLOBAL _ConvertX86p32_32RGBA888
|
|
17 GLOBAL _ConvertX86p32_32BGRA888
|
|
18 GLOBAL _ConvertX86p32_24RGB888
|
|
19 GLOBAL _ConvertX86p32_24BGR888
|
|
20 GLOBAL _ConvertX86p32_16RGB565
|
|
21 GLOBAL _ConvertX86p32_16BGR565
|
|
22 GLOBAL _ConvertX86p32_16RGB555
|
|
23 GLOBAL _ConvertX86p32_16BGR555
|
|
24 GLOBAL _ConvertX86p32_8RGB332
|
|
25
|
|
26 EXTERN _x86return
|
|
27
|
|
28 SECTION .text
|
|
29
|
|
30
|
|
31 ;; _Convert_*
|
|
32 ;; Paramters:
|
|
33 ;; ESI = source
|
|
34 ;; EDI = dest
|
|
35 ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
|
|
36 ;; Destroys:
|
|
37 ;; EAX, EBX, EDX
|
|
38
|
|
39
|
|
40 _ConvertX86p32_32BGR888:
|
|
41
|
|
42 ; check short
|
|
43 cmp ecx,BYTE 32
|
|
44 ja .L3
|
|
45
|
|
46 .L1 ; short loop
|
|
47 mov edx,[esi]
|
|
48 bswap edx
|
|
49 ror edx,8
|
|
50 mov [edi],edx
|
|
51 add esi,BYTE 4
|
|
52 add edi,BYTE 4
|
|
53 dec ecx
|
|
54 jnz .L1
|
|
55 .L2
|
|
56 jmp _x86return
|
|
57
|
|
58 .L3 ; save ebp
|
|
59 push ebp
|
|
60
|
|
61 ; unroll four times
|
|
62 mov ebp,ecx
|
|
63 shr ebp,2
|
|
64
|
|
65 ; save count
|
|
66 push ecx
|
|
67
|
|
68 .L4 mov eax,[esi]
|
|
69 mov ebx,[esi+4]
|
|
70
|
|
71 bswap eax
|
|
72
|
|
73 bswap ebx
|
|
74
|
|
75 ror eax,8
|
|
76 mov ecx,[esi+8]
|
|
77
|
|
78 ror ebx,8
|
|
79 mov edx,[esi+12]
|
|
80
|
|
81 bswap ecx
|
|
82
|
|
83 bswap edx
|
|
84
|
|
85 ror ecx,8
|
|
86 mov [edi+0],eax
|
|
87
|
|
88 ror edx,8
|
|
89 mov [edi+4],ebx
|
|
90
|
|
91 mov [edi+8],ecx
|
|
92 mov [edi+12],edx
|
|
93
|
|
94 add esi,BYTE 16
|
|
95 add edi,BYTE 16
|
|
96
|
|
97 dec ebp
|
|
98 jnz .L4
|
|
99
|
|
100 ; check tail
|
|
101 pop ecx
|
|
102 and ecx,BYTE 11b
|
|
103 jz .L6
|
|
104
|
|
105 .L5 ; tail loop
|
|
106 mov edx,[esi]
|
|
107 bswap edx
|
|
108 ror edx,8
|
|
109 mov [edi],edx
|
|
110 add esi,BYTE 4
|
|
111 add edi,BYTE 4
|
|
112 dec ecx
|
|
113 jnz .L5
|
|
114
|
|
115 .L6 pop ebp
|
|
116 jmp _x86return
|
|
117
|
|
118
|
|
119
|
|
120
|
|
121 _ConvertX86p32_32RGBA888:
|
|
122
|
|
123 ; check short
|
|
124 cmp ecx,BYTE 32
|
|
125 ja .L3
|
|
126
|
|
127 .L1 ; short loop
|
|
128 mov edx,[esi]
|
|
129 rol edx,8
|
|
130 mov [edi],edx
|
|
131 add esi,BYTE 4
|
|
132 add edi,BYTE 4
|
|
133 dec ecx
|
|
134 jnz .L1
|
|
135 .L2
|
|
136 jmp _x86return
|
|
137
|
|
138 .L3 ; save ebp
|
|
139 push ebp
|
|
140
|
|
141 ; unroll four times
|
|
142 mov ebp,ecx
|
|
143 shr ebp,2
|
|
144
|
|
145 ; save count
|
|
146 push ecx
|
|
147
|
|
148 .L4 mov eax,[esi]
|
|
149 mov ebx,[esi+4]
|
|
150
|
|
151 rol eax,8
|
|
152 mov ecx,[esi+8]
|
|
153
|
|
154 rol ebx,8
|
|
155 mov edx,[esi+12]
|
|
156
|
|
157 rol ecx,8
|
|
158 mov [edi+0],eax
|
|
159
|
|
160 rol edx,8
|
|
161 mov [edi+4],ebx
|
|
162
|
|
163 mov [edi+8],ecx
|
|
164 mov [edi+12],edx
|
|
165
|
|
166 add esi,BYTE 16
|
|
167 add edi,BYTE 16
|
|
168
|
|
169 dec ebp
|
|
170 jnz .L4
|
|
171
|
|
172 ; check tail
|
|
173 pop ecx
|
|
174 and ecx,BYTE 11b
|
|
175 jz .L6
|
|
176
|
|
177 .L5 ; tail loop
|
|
178 mov edx,[esi]
|
|
179 rol edx,8
|
|
180 mov [edi],edx
|
|
181 add esi,BYTE 4
|
|
182 add edi,BYTE 4
|
|
183 dec ecx
|
|
184 jnz .L5
|
|
185
|
|
186 .L6 pop ebp
|
|
187 jmp _x86return
|
|
188
|
|
189
|
|
190
|
|
191
|
|
192 _ConvertX86p32_32BGRA888:
|
|
193
|
|
194 ; check short
|
|
195 cmp ecx,BYTE 32
|
|
196 ja .L3
|
|
197
|
|
198 .L1 ; short loop
|
|
199 mov edx,[esi]
|
|
200 bswap edx
|
|
201 mov [edi],edx
|
|
202 add esi,BYTE 4
|
|
203 add edi,BYTE 4
|
|
204 dec ecx
|
|
205 jnz .L1
|
|
206 .L2
|
|
207 jmp _x86return
|
|
208
|
|
209 .L3 ; save ebp
|
|
210 push ebp
|
|
211
|
|
212 ; unroll four times
|
|
213 mov ebp,ecx
|
|
214 shr ebp,2
|
|
215
|
|
216 ; save count
|
|
217 push ecx
|
|
218
|
|
219 .L4 mov eax,[esi]
|
|
220 mov ebx,[esi+4]
|
|
221
|
|
222 mov ecx,[esi+8]
|
|
223 mov edx,[esi+12]
|
|
224
|
|
225 bswap eax
|
|
226
|
|
227 bswap ebx
|
|
228
|
|
229 bswap ecx
|
|
230
|
|
231 bswap edx
|
|
232
|
|
233 mov [edi+0],eax
|
|
234 mov [edi+4],ebx
|
|
235
|
|
236 mov [edi+8],ecx
|
|
237 mov [edi+12],edx
|
|
238
|
|
239 add esi,BYTE 16
|
|
240 add edi,BYTE 16
|
|
241
|
|
242 dec ebp
|
|
243 jnz .L4
|
|
244
|
|
245 ; check tail
|
|
246 pop ecx
|
|
247 and ecx,BYTE 11b
|
|
248 jz .L6
|
|
249
|
|
250 .L5 ; tail loop
|
|
251 mov edx,[esi]
|
|
252 bswap edx
|
|
253 mov [edi],edx
|
|
254 add esi,BYTE 4
|
|
255 add edi,BYTE 4
|
|
256 dec ecx
|
|
257 jnz .L5
|
|
258
|
|
259 .L6 pop ebp
|
|
260 jmp _x86return
|
|
261
|
|
262
|
|
263
|
|
264
|
|
265 ;; 32 bit RGB 888 to 24 BIT RGB 888
|
|
266
|
|
267 _ConvertX86p32_24RGB888:
|
|
268
|
|
269 ; check short
|
|
270 cmp ecx,BYTE 32
|
|
271 ja .L3
|
|
272
|
|
273 .L1 ; short loop
|
|
274 mov al,[esi]
|
|
275 mov bl,[esi+1]
|
|
276 mov dl,[esi+2]
|
|
277 mov [edi],al
|
|
278 mov [edi+1],bl
|
|
279 mov [edi+2],dl
|
|
280 add esi,BYTE 4
|
|
281 add edi,BYTE 3
|
|
282 dec ecx
|
|
283 jnz .L1
|
|
284 .L2
|
|
285 jmp _x86return
|
|
286
|
|
287 .L3 ; head
|
|
288 mov edx,edi
|
|
289 and edx,BYTE 11b
|
|
290 jz .L4
|
|
291 mov al,[esi]
|
|
292 mov bl,[esi+1]
|
|
293 mov dl,[esi+2]
|
|
294 mov [edi],al
|
|
295 mov [edi+1],bl
|
|
296 mov [edi+2],dl
|
|
297 add esi,BYTE 4
|
|
298 add edi,BYTE 3
|
|
299 dec ecx
|
|
300 jmp SHORT .L3
|
|
301
|
|
302 .L4 ; unroll 4 times
|
|
303 push ebp
|
|
304 mov ebp,ecx
|
|
305 shr ebp,2
|
|
306
|
|
307 ; save count
|
|
308 push ecx
|
|
309
|
|
310 .L5 mov eax,[esi] ; first dword eax = [A][R][G][B]
|
|
311 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
|
|
312
|
|
313 shl eax,8 ; eax = [R][G][B][.]
|
|
314 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b]
|
|
315
|
|
316 shl ebx,8 ; ebx = [r][g][b][.]
|
|
317 mov al,[esi+4] ; eax = [R][G][B][b]
|
|
318
|
|
319 ror eax,8 ; eax = [b][R][G][B] (done)
|
|
320 mov bh,[esi+8+1] ; ebx = [r][g][G][.]
|
|
321
|
|
322 mov [edi],eax
|
|
323 add edi,BYTE 3*4
|
|
324
|
|
325 shl ecx,8 ; ecx = [r][g][b][.]
|
|
326 mov bl,[esi+8+0] ; ebx = [r][g][G][B]
|
|
327
|
|
328 rol ebx,16 ; ebx = [G][B][r][g] (done)
|
|
329 mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done)
|
|
330
|
|
331 mov [edi+4-3*4],ebx
|
|
332 add esi,BYTE 4*4
|
|
333
|
|
334 mov [edi+8-3*4],ecx
|
|
335 dec ebp
|
|
336
|
|
337 jnz .L5
|
|
338
|
|
339 ; check tail
|
|
340 pop ecx
|
|
341 and ecx,BYTE 11b
|
|
342 jz .L7
|
|
343
|
|
344 .L6 ; tail loop
|
|
345 mov al,[esi]
|
|
346 mov bl,[esi+1]
|
|
347 mov dl,[esi+2]
|
|
348 mov [edi],al
|
|
349 mov [edi+1],bl
|
|
350 mov [edi+2],dl
|
|
351 add esi,BYTE 4
|
|
352 add edi,BYTE 3
|
|
353 dec ecx
|
|
354 jnz .L6
|
|
355
|
|
356 .L7 pop ebp
|
|
357 jmp _x86return
|
|
358
|
|
359
|
|
360
|
|
361
|
|
362 ;; 32 bit RGB 888 to 24 bit BGR 888
|
|
363
|
|
364 _ConvertX86p32_24BGR888:
|
|
365
|
|
366 ; check short
|
|
367 cmp ecx,BYTE 32
|
|
368 ja .L3
|
|
369
|
|
370
|
|
371 .L1 ; short loop
|
|
372 mov dl,[esi]
|
|
373 mov bl,[esi+1]
|
|
374 mov al,[esi+2]
|
|
375 mov [edi],al
|
|
376 mov [edi+1],bl
|
|
377 mov [edi+2],dl
|
|
378 add esi,BYTE 4
|
|
379 add edi,BYTE 3
|
|
380 dec ecx
|
|
381 jnz .L1
|
|
382 .L2
|
|
383 jmp _x86return
|
|
384
|
|
385 .L3 ; head
|
|
386 mov edx,edi
|
|
387 and edx,BYTE 11b
|
|
388 jz .L4
|
|
389 mov dl,[esi]
|
|
390 mov bl,[esi+1]
|
|
391 mov al,[esi+2]
|
|
392 mov [edi],al
|
|
393 mov [edi+1],bl
|
|
394 mov [edi+2],dl
|
|
395 add esi,BYTE 4
|
|
396 add edi,BYTE 3
|
|
397 dec ecx
|
|
398 jmp SHORT .L3
|
|
399
|
|
400 .L4 ; unroll 4 times
|
|
401 push ebp
|
|
402 mov ebp,ecx
|
|
403 shr ebp,2
|
|
404
|
|
405 ; save count
|
|
406 push ecx
|
|
407
|
|
408 .L5
|
|
409 mov eax,[esi] ; first dword eax = [A][R][G][B]
|
|
410 mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
|
|
411
|
|
412 bswap eax ; eax = [B][G][R][A]
|
|
413
|
|
414 bswap ebx ; ebx = [b][g][r][a]
|
|
415
|
|
416 mov al,[esi+4+2] ; eax = [B][G][R][r]
|
|
417 mov bh,[esi+4+4+1] ; ebx = [b][g][G][a]
|
|
418
|
|
419 ror eax,8 ; eax = [r][B][G][R] (done)
|
|
420 mov bl,[esi+4+4+2] ; ebx = [b][g][G][R]
|
|
421
|
|
422 ror ebx,16 ; ebx = [G][R][b][g] (done)
|
|
423 mov [edi],eax
|
|
424
|
|
425 mov [edi+4],ebx
|
|
426 mov ecx,[esi+12] ; third dword ecx = [a][r][g][b]
|
|
427
|
|
428 bswap ecx ; ecx = [b][g][r][a]
|
|
429
|
|
430 mov cl,[esi+8] ; ecx = [b][g][r][B] (done)
|
|
431 add esi,BYTE 4*4
|
|
432
|
|
433 mov [edi+8],ecx
|
|
434 add edi,BYTE 3*4
|
|
435
|
|
436 dec ebp
|
|
437 jnz .L5
|
|
438
|
|
439 ; check tail
|
|
440 pop ecx
|
|
441 and ecx,BYTE 11b
|
|
442 jz .L7
|
|
443
|
|
444 .L6 ; tail loop
|
|
445 mov dl,[esi]
|
|
446 mov bl,[esi+1]
|
|
447 mov al,[esi+2]
|
|
448 mov [edi],al
|
|
449 mov [edi+1],bl
|
|
450 mov [edi+2],dl
|
|
451 add esi,BYTE 4
|
|
452 add edi,BYTE 3
|
|
453 dec ecx
|
|
454 jnz .L6
|
|
455
|
|
456 .L7
|
|
457 pop ebp
|
|
458 jmp _x86return
|
|
459
|
|
460
|
|
461
|
|
462
|
|
463 ;; 32 bit RGB 888 to 16 BIT RGB 565
|
|
464
|
|
465 _ConvertX86p32_16RGB565:
|
|
466 ; check short
|
|
467 cmp ecx,BYTE 16
|
|
468 ja .L3
|
|
469
|
|
470 .L1 ; short loop
|
|
471 mov bl,[esi+0] ; blue
|
|
472 mov al,[esi+1] ; green
|
|
473 mov ah,[esi+2] ; red
|
|
474 shr ah,3
|
|
475 and al,11111100b
|
|
476 shl eax,3
|
|
477 shr bl,3
|
|
478 add al,bl
|
|
479 mov [edi+0],al
|
|
480 mov [edi+1],ah
|
|
481 add esi,BYTE 4
|
|
482 add edi,BYTE 2
|
|
483 dec ecx
|
|
484 jnz .L1
|
|
485
|
|
486 .L2: ; End of short loop
|
|
487 jmp _x86return
|
|
488
|
|
489
|
|
490 .L3 ; head
|
|
491 mov ebx,edi
|
|
492 and ebx,BYTE 11b
|
|
493 jz .L4
|
|
494
|
|
495 mov bl,[esi+0] ; blue
|
|
496 mov al,[esi+1] ; green
|
|
497 mov ah,[esi+2] ; red
|
|
498 shr ah,3
|
|
499 and al,11111100b
|
|
500 shl eax,3
|
|
501 shr bl,3
|
|
502 add al,bl
|
|
503 mov [edi+0],al
|
|
504 mov [edi+1],ah
|
|
505 add esi,BYTE 4
|
|
506 add edi,BYTE 2
|
|
507 dec ecx
|
|
508
|
|
509 .L4:
|
|
510 ; save count
|
|
511 push ecx
|
|
512
|
|
513 ; unroll twice
|
|
514 shr ecx,1
|
|
515
|
|
516 ; point arrays to end
|
|
517 lea esi,[esi+ecx*8]
|
|
518 lea edi,[edi+ecx*4]
|
|
519
|
|
520 ; negative counter
|
|
521 neg ecx
|
|
522 jmp SHORT .L6
|
|
523
|
|
524 .L5:
|
|
525 mov [edi+ecx*4-4],eax
|
|
526 .L6:
|
|
527 mov eax,[esi+ecx*8]
|
|
528
|
|
529 shr ah,2
|
|
530 mov ebx,[esi+ecx*8+4]
|
|
531
|
|
532 shr eax,3
|
|
533 mov edx,[esi+ecx*8+4]
|
|
534
|
|
535 shr bh,2
|
|
536 mov dl,[esi+ecx*8+2]
|
|
537
|
|
538 shl ebx,13
|
|
539 and eax,000007FFh
|
|
540
|
|
541 shl edx,8
|
|
542 and ebx,07FF0000h
|
|
543
|
|
544 and edx,0F800F800h
|
|
545 add eax,ebx
|
|
546
|
|
547 add eax,edx
|
|
548 inc ecx
|
|
549
|
|
550 jnz .L5
|
|
551
|
|
552 mov [edi+ecx*4-4],eax
|
|
553
|
|
554 ; tail
|
|
555 pop ecx
|
|
556 test cl,1
|
|
557 jz .L7
|
|
558
|
|
559 mov bl,[esi+0] ; blue
|
|
560 mov al,[esi+1] ; green
|
|
561 mov ah,[esi+2] ; red
|
|
562 shr ah,3
|
|
563 and al,11111100b
|
|
564 shl eax,3
|
|
565 shr bl,3
|
|
566 add al,bl
|
|
567 mov [edi+0],al
|
|
568 mov [edi+1],ah
|
|
569 add esi,BYTE 4
|
|
570 add edi,BYTE 2
|
|
571
|
|
572 .L7:
|
|
573 jmp _x86return
|
|
574
|
|
575
|
|
576
|
|
577
|
|
578 ;; 32 bit RGB 888 to 16 BIT BGR 565
|
|
579
|
|
580 _ConvertX86p32_16BGR565:
|
|
581
|
|
582 ; check short
|
|
583 cmp ecx,BYTE 16
|
|
584 ja .L3
|
|
585
|
|
586 .L1 ; short loop
|
|
587 mov ah,[esi+0] ; blue
|
|
588 mov al,[esi+1] ; green
|
|
589 mov bl,[esi+2] ; red
|
|
590 shr ah,3
|
|
591 and al,11111100b
|
|
592 shl eax,3
|
|
593 shr bl,3
|
|
594 add al,bl
|
|
595 mov [edi+0],al
|
|
596 mov [edi+1],ah
|
|
597 add esi,BYTE 4
|
|
598 add edi,BYTE 2
|
|
599 dec ecx
|
|
600 jnz .L1
|
|
601 .L2
|
|
602 jmp _x86return
|
|
603
|
|
604 .L3 ; head
|
|
605 mov ebx,edi
|
|
606 and ebx,BYTE 11b
|
|
607 jz .L4
|
|
608 mov ah,[esi+0] ; blue
|
|
609 mov al,[esi+1] ; green
|
|
610 mov bl,[esi+2] ; red
|
|
611 shr ah,3
|
|
612 and al,11111100b
|
|
613 shl eax,3
|
|
614 shr bl,3
|
|
615 add al,bl
|
|
616 mov [edi+0],al
|
|
617 mov [edi+1],ah
|
|
618 add esi,BYTE 4
|
|
619 add edi,BYTE 2
|
|
620 dec ecx
|
|
621
|
|
622 .L4 ; save count
|
|
623 push ecx
|
|
624
|
|
625 ; unroll twice
|
|
626 shr ecx,1
|
|
627
|
|
628 ; point arrays to end
|
|
629 lea esi,[esi+ecx*8]
|
|
630 lea edi,[edi+ecx*4]
|
|
631
|
|
632 ; negative count
|
|
633 neg ecx
|
|
634 jmp SHORT .L6
|
|
635
|
|
636 .L5
|
|
637 mov [edi+ecx*4-4],eax
|
|
638 .L6
|
|
639 mov edx,[esi+ecx*8+4]
|
|
640
|
|
641 mov bh,[esi+ecx*8+4]
|
|
642 mov ah,[esi+ecx*8]
|
|
643
|
|
644 shr bh,3
|
|
645 mov al,[esi+ecx*8+1]
|
|
646
|
|
647 shr ah,3
|
|
648 mov bl,[esi+ecx*8+5]
|
|
649
|
|
650 shl eax,3
|
|
651 mov dl,[esi+ecx*8+2]
|
|
652
|
|
653 shl ebx,19
|
|
654 and eax,0000FFE0h
|
|
655
|
|
656 shr edx,3
|
|
657 and ebx,0FFE00000h
|
|
658
|
|
659 and edx,001F001Fh
|
|
660 add eax,ebx
|
|
661
|
|
662 add eax,edx
|
|
663 inc ecx
|
|
664
|
|
665 jnz .L5
|
|
666
|
|
667 mov [edi+ecx*4-4],eax
|
|
668
|
|
669 ; tail
|
|
670 pop ecx
|
|
671 and ecx,BYTE 1
|
|
672 jz .L7
|
|
673 mov ah,[esi+0] ; blue
|
|
674 mov al,[esi+1] ; green
|
|
675 mov bl,[esi+2] ; red
|
|
676 shr ah,3
|
|
677 and al,11111100b
|
|
678 shl eax,3
|
|
679 shr bl,3
|
|
680 add al,bl
|
|
681 mov [edi+0],al
|
|
682 mov [edi+1],ah
|
|
683 add esi,BYTE 4
|
|
684 add edi,BYTE 2
|
|
685
|
|
686 .L7
|
|
687 jmp _x86return
|
|
688
|
|
689
|
|
690
|
|
691
|
|
692 ;; 32 BIT RGB TO 16 BIT RGB 555
|
|
693
|
|
694 _ConvertX86p32_16RGB555:
|
|
695
|
|
696 ; check short
|
|
697 cmp ecx,BYTE 16
|
|
698 ja .L3
|
|
699
|
|
700 .L1 ; short loop
|
|
701 mov bl,[esi+0] ; blue
|
|
702 mov al,[esi+1] ; green
|
|
703 mov ah,[esi+2] ; red
|
|
704 shr ah,3
|
|
705 and al,11111000b
|
|
706 shl eax,2
|
|
707 shr bl,3
|
|
708 add al,bl
|
|
709 mov [edi+0],al
|
|
710 mov [edi+1],ah
|
|
711 add esi,BYTE 4
|
|
712 add edi,BYTE 2
|
|
713 dec ecx
|
|
714 jnz .L1
|
|
715 .L2
|
|
716 jmp _x86return
|
|
717
|
|
718 .L3 ; head
|
|
719 mov ebx,edi
|
|
720 and ebx,BYTE 11b
|
|
721 jz .L4
|
|
722 mov bl,[esi+0] ; blue
|
|
723 mov al,[esi+1] ; green
|
|
724 mov ah,[esi+2] ; red
|
|
725 shr ah,3
|
|
726 and al,11111000b
|
|
727 shl eax,2
|
|
728 shr bl,3
|
|
729 add al,bl
|
|
730 mov [edi+0],al
|
|
731 mov [edi+1],ah
|
|
732 add esi,BYTE 4
|
|
733 add edi,BYTE 2
|
|
734 dec ecx
|
|
735
|
|
736 .L4 ; save count
|
|
737 push ecx
|
|
738
|
|
739 ; unroll twice
|
|
740 shr ecx,1
|
|
741
|
|
742 ; point arrays to end
|
|
743 lea esi,[esi+ecx*8]
|
|
744 lea edi,[edi+ecx*4]
|
|
745
|
|
746 ; negative counter
|
|
747 neg ecx
|
|
748 jmp SHORT .L6
|
|
749
|
|
750 .L5
|
|
751 mov [edi+ecx*4-4],eax
|
|
752 .L6
|
|
753 mov eax,[esi+ecx*8]
|
|
754
|
|
755 shr ah,3
|
|
756 mov ebx,[esi+ecx*8+4]
|
|
757
|
|
758 shr eax,3
|
|
759 mov edx,[esi+ecx*8+4]
|
|
760
|
|
761 shr bh,3
|
|
762 mov dl,[esi+ecx*8+2]
|
|
763
|
|
764 shl ebx,13
|
|
765 and eax,000007FFh
|
|
766
|
|
767 shl edx,7
|
|
768 and ebx,07FF0000h
|
|
769
|
|
770 and edx,07C007C00h
|
|
771 add eax,ebx
|
|
772
|
|
773 add eax,edx
|
|
774 inc ecx
|
|
775
|
|
776 jnz .L5
|
|
777
|
|
778 mov [edi+ecx*4-4],eax
|
|
779
|
|
780 ; tail
|
|
781 pop ecx
|
|
782 and ecx,BYTE 1
|
|
783 jz .L7
|
|
784 mov bl,[esi+0] ; blue
|
|
785 mov al,[esi+1] ; green
|
|
786 mov ah,[esi+2] ; red
|
|
787 shr ah,3
|
|
788 and al,11111000b
|
|
789 shl eax,2
|
|
790 shr bl,3
|
|
791 add al,bl
|
|
792 mov [edi+0],al
|
|
793 mov [edi+1],ah
|
|
794 add esi,BYTE 4
|
|
795 add edi,BYTE 2
|
|
796
|
|
797 .L7
|
|
798 jmp _x86return
|
|
799
|
|
800
|
|
801
|
|
802
|
|
803 ;; 32 BIT RGB TO 16 BIT BGR 555
|
|
804
|
|
805 _ConvertX86p32_16BGR555:
|
|
806
|
|
807 ; check short
|
|
808 cmp ecx,BYTE 16
|
|
809 ja .L3
|
|
810
|
|
811
|
|
812 .L1 ; short loop
|
|
813 mov ah,[esi+0] ; blue
|
|
814 mov al,[esi+1] ; green
|
|
815 mov bl,[esi+2] ; red
|
|
816 shr ah,3
|
|
817 and al,11111000b
|
|
818 shl eax,2
|
|
819 shr bl,3
|
|
820 add al,bl
|
|
821 mov [edi+0],al
|
|
822 mov [edi+1],ah
|
|
823 add esi,BYTE 4
|
|
824 add edi,BYTE 2
|
|
825 dec ecx
|
|
826 jnz .L1
|
|
827 .L2
|
|
828 jmp _x86return
|
|
829
|
|
830 .L3 ; head
|
|
831 mov ebx,edi
|
|
832 and ebx,BYTE 11b
|
|
833 jz .L4
|
|
834 mov ah,[esi+0] ; blue
|
|
835 mov al,[esi+1] ; green
|
|
836 mov bl,[esi+2] ; red
|
|
837 shr ah,3
|
|
838 and al,11111000b
|
|
839 shl eax,2
|
|
840 shr bl,3
|
|
841 add al,bl
|
|
842 mov [edi+0],al
|
|
843 mov [edi+1],ah
|
|
844 add esi,BYTE 4
|
|
845 add edi,BYTE 2
|
|
846 dec ecx
|
|
847
|
|
848 .L4 ; save count
|
|
849 push ecx
|
|
850
|
|
851 ; unroll twice
|
|
852 shr ecx,1
|
|
853
|
|
854 ; point arrays to end
|
|
855 lea esi,[esi+ecx*8]
|
|
856 lea edi,[edi+ecx*4]
|
|
857
|
|
858 ; negative counter
|
|
859 neg ecx
|
|
860 jmp SHORT .L6
|
|
861
|
|
862 .L5
|
|
863 mov [edi+ecx*4-4],eax
|
|
864 .L6
|
|
865 mov edx,[esi+ecx*8+4]
|
|
866
|
|
867 mov bh,[esi+ecx*8+4]
|
|
868 mov ah,[esi+ecx*8]
|
|
869
|
|
870 shr bh,3
|
|
871 mov al,[esi+ecx*8+1]
|
|
872
|
|
873 shr ah,3
|
|
874 mov bl,[esi+ecx*8+5]
|
|
875
|
|
876 shl eax,2
|
|
877 mov dl,[esi+ecx*8+2]
|
|
878
|
|
879 shl ebx,18
|
|
880 and eax,00007FE0h
|
|
881
|
|
882 shr edx,3
|
|
883 and ebx,07FE00000h
|
|
884
|
|
885 and edx,001F001Fh
|
|
886 add eax,ebx
|
|
887
|
|
888 add eax,edx
|
|
889 inc ecx
|
|
890
|
|
891 jnz .L5
|
|
892
|
|
893 mov [edi+ecx*4-4],eax
|
|
894
|
|
895 ; tail
|
|
896 pop ecx
|
|
897 and ecx,BYTE 1
|
|
898 jz .L7
|
|
899 mov ah,[esi+0] ; blue
|
|
900 mov al,[esi+1] ; green
|
|
901 mov bl,[esi+2] ; red
|
|
902 shr ah,3
|
|
903 and al,11111000b
|
|
904 shl eax,2
|
|
905 shr bl,3
|
|
906 add al,bl
|
|
907 mov [edi+0],al
|
|
908 mov [edi+1],ah
|
|
909 add esi,BYTE 4
|
|
910 add edi,BYTE 2
|
|
911
|
|
912 .L7
|
|
913 jmp _x86return
|
|
914
|
|
915
|
|
916
|
|
917
|
|
918
|
|
919 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
|
|
920 ;; This routine writes FOUR pixels at once (dword) and then, if they exist
|
|
921 ;; the trailing three pixels
|
|
922 _ConvertX86p32_8RGB332:
|
|
923
|
|
924
|
|
925 .L_ALIGNED
|
|
926 push ecx
|
|
927
|
|
928 shr ecx,2 ; We will draw 4 pixels at once
|
|
929 jnz .L1
|
|
930
|
|
931 jmp .L2 ; short jump out of range :(
|
|
932
|
|
933 .L1:
|
|
934 mov eax,[esi] ; first pair of pixels
|
|
935 mov edx,[esi+4]
|
|
936
|
|
937 shr dl,6
|
|
938 mov ebx,eax
|
|
939
|
|
940 shr al,6
|
|
941 and ah,0e0h
|
|
942
|
|
943 shr ebx,16
|
|
944 and dh,0e0h
|
|
945
|
|
946 shr ah,3
|
|
947 and bl,0e0h
|
|
948
|
|
949 shr dh,3
|
|
950
|
|
951 or al,bl
|
|
952
|
|
953 mov ebx,edx
|
|
954 or al,ah
|
|
955
|
|
956 shr ebx,16
|
|
957 or dl,dh
|
|
958
|
|
959 and bl,0e0h
|
|
960
|
|
961 or dl,bl
|
|
962
|
|
963 mov ah,dl
|
|
964
|
|
965
|
|
966
|
|
967 mov ebx,[esi+8] ; second pair of pixels
|
|
968
|
|
969 mov edx,ebx
|
|
970 and bh,0e0h
|
|
971
|
|
972 shr bl,6
|
|
973 and edx,0e00000h
|
|
974
|
|
975 shr edx,16
|
|
976
|
|
977 shr bh,3
|
|
978
|
|
979 ror eax,16
|
|
980 or bl,dl
|
|
981
|
|
982 mov edx,[esi+12]
|
|
983 or bl,bh
|
|
984
|
|
985 mov al,bl
|
|
986
|
|
987 mov ebx,edx
|
|
988 and dh,0e0h
|
|
989
|
|
990 shr dl,6
|
|
991 and ebx,0e00000h
|
|
992
|
|
993 shr dh,3
|
|
994 mov ah,dl
|
|
995
|
|
996 shr ebx,16
|
|
997 or ah,dh
|
|
998
|
|
999 or ah,bl
|
|
1000
|
|
1001 rol eax,16
|
|
1002 add esi,BYTE 16
|
|
1003
|
|
1004 mov [edi],eax
|
|
1005 add edi,BYTE 4
|
|
1006
|
|
1007 dec ecx
|
|
1008 jz .L2 ; L1 out of range for short jump :(
|
|
1009
|
|
1010 jmp .L1
|
|
1011 .L2:
|
|
1012
|
|
1013 pop ecx
|
|
1014 and ecx,BYTE 3 ; mask out number of pixels to draw
|
|
1015
|
|
1016 jz .L4 ; Nothing to do anymore
|
|
1017
|
|
1018 .L3:
|
|
1019 mov eax,[esi] ; single pixel conversion for trailing pixels
|
|
1020
|
|
1021 mov ebx,eax
|
|
1022
|
|
1023 shr al,6
|
|
1024 and ah,0e0h
|
|
1025
|
|
1026 shr ebx,16
|
|
1027
|
|
1028 shr ah,3
|
|
1029 and bl,0e0h
|
|
1030
|
|
1031 or al,ah
|
|
1032 or al,bl
|
|
1033
|
|
1034 mov [edi],al
|
|
1035
|
|
1036 inc edi
|
|
1037 add esi,BYTE 4
|
|
1038
|
|
1039 dec ecx
|
|
1040 jnz .L3
|
|
1041
|
|
1042 .L4:
|
|
1043 jmp _x86return
|