view src/hermes/x86p_32.asm @ 1629:ef4a796e7f24

Fixed bug #55 From Christian Walther: When writing my patch for #12, I ended up doing all sorts of changes to the way application/window activating/deactivating is handled in the Quartz backend, resulting in the attached patch. It does make the code a bit cleaner IMHO, but as it might be regarded as a case of "if it ain't broken, don't fix it" I'd like to hear other people's opinion about it. Please shout if some change strikes you as unnecessary or wrong, and I'll explain the reasons behind it. As far as I tested it, it does not introduce any new bugs, but I may well have missed some. - The most fundamental change (that triggered most of the others) is irrelevant for the usual single-window SDL applications, it only affects the people who are crazy enough to display other Cocoa windows alongside the SDL window (I'm actually doing this currently, although the additional window only displays debugging info and won't be present in the final product): Before, some things were done on the application becoming active, some on the window becoming key, and some on the window becoming main. Conceptually, all these actions belong to the window becoming key, so that's what I implemented. However, since in a single-window application these three events always happen together, the previous implementation "ain't broken". - This slightly changed the meaning of the SDL_APPMOUSEFOCUS flag from SDL_GetAppState(): Before, it meant "window is main and mouse is inside window (or mode is fullscreen)". Now, it means "window is key and mouse is inside window (or mode is fullscreen)". It makes more sense to me that way. (See http://developer.apple.com/documentation/Cocoa/Conceptual/WinPanel/Concepts/ChangingMainKeyWindow.html for a discussion of what key and main windows are.) The other two flags are unchanged: SDL_APPACTIVE = application is not hidden and window is not minimized, SDL_APPINPUTFOCUS = window is key (or mode is fullscreen). - As a side effect, the reorganization fixes the following two issues (and maybe others) (but they could also be fixed in less invasive ways): * A regression that was introduced in revision 1.42 of SDL_QuartzVideo.m (http://libsdl.org/cgi/cvsweb.cgi/SDL12/src/video/quartz/SDL_QuartzVideo.m.diff?r1=1.41&r2=1.42) (from half-desirable to undesirable behavior): Situation: While in windowed mode, hide the cursor using SDL_ShowCursor(SDL_DISABLE), move the mouse outside of the window so that the cursor becomes visible again, and SDL_SetVideoMode() to a fullscreen mode. What happened before revision 1.42: The cursor is visible, but becomes invisible as soon as the mouse is moved (half-desirable). What happens in revision 1.42 and after (including current CVS): The cursor is visible and stays visible (undesirable). What happens after my patch: The cursor is invisible from the beginning (desirable). * When the cursor is hidden and grabbed, switch away from the application using cmd-tab (which ungrabs and makes the cursor visible), move the cursor outside of the SDL window, then cmd-tab back to the application. In 1.2.8 and in the current CVS, the cursor is re-grabbed, but it stays visible (immovable in the middle of the window). With my patch, the cursor is correctly re-grabbed and hidden. (For some reason, it still doesn't work correctly if you switch back to the application using the dock instead of cmd-tab. I haven't been able to figure out why. I can step over [NSCursor hide] being called in the debugger, but it seems to have no effect.) - The patch includes my patch for #12 (it was easier to obtain using cvs diff that way). If you apply both of them, you will end up with 6 duplicate lines in SDL_QuartzEvents.m.
author Sam Lantinga <slouken@libsdl.org>
date Thu, 13 Apr 2006 14:17:48 +0000
parents 2d6dc7de1145
children 393092a3ebf6
line wrap: on
line source

;
; x86 format converters for HERMES
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
; 
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions		
;
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
; 

	
BITS 32

GLOBAL _ConvertX86p32_32BGR888
GLOBAL _ConvertX86p32_32RGBA888
GLOBAL _ConvertX86p32_32BGRA888
GLOBAL _ConvertX86p32_24RGB888	
GLOBAL _ConvertX86p32_24BGR888
GLOBAL _ConvertX86p32_16RGB565
GLOBAL _ConvertX86p32_16BGR565
GLOBAL _ConvertX86p32_16RGB555
GLOBAL _ConvertX86p32_16BGR555
GLOBAL _ConvertX86p32_8RGB332

EXTERN _x86return
	
SECTION .text

;; _Convert_*
;; Paramters:	
;;   ESI = source 
;;   EDI = dest
;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
;; Destroys:
;;   EAX, EBX, EDX


_ConvertX86p32_32BGR888:

    ; check short
    cmp ecx,BYTE 32
    ja .L3

.L1 ; short loop
    mov edx,[esi]
    bswap edx
    ror edx,8
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L1
.L2
    jmp _x86return

.L3 ; save ebp
    push ebp

    ; unroll four times
    mov ebp,ecx
    shr ebp,2
    
    ; save count
    push ecx

.L4     mov eax,[esi]
        mov ebx,[esi+4]

        bswap eax

        bswap ebx

        ror eax,8
        mov ecx,[esi+8]

        ror ebx,8
        mov edx,[esi+12]

        bswap ecx

        bswap edx

        ror ecx,8
        mov [edi+0],eax

        ror edx,8
        mov [edi+4],ebx

        mov [edi+8],ecx
        mov [edi+12],edx

        add esi,BYTE 16
        add edi,BYTE 16

        dec ebp
        jnz .L4                 

    ; check tail
    pop ecx
    and ecx,BYTE 11b
    jz .L6

.L5 ; tail loop
    mov edx,[esi]
    bswap edx
    ror edx,8
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L5

.L6 pop ebp
    jmp _x86return
	

	
		
_ConvertX86p32_32RGBA888:
	
    ; check short
    cmp ecx,BYTE 32
    ja .L3

.L1 ; short loop
    mov edx,[esi]
    rol edx,8
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L1
.L2
    jmp _x86return

.L3 ; save ebp
    push ebp

    ; unroll four times
    mov ebp,ecx
    shr ebp,2
    
    ; save count
    push ecx

.L4     mov eax,[esi]
        mov ebx,[esi+4]

        rol eax,8
        mov ecx,[esi+8]

        rol ebx,8
        mov edx,[esi+12]

        rol ecx,8
        mov [edi+0],eax

        rol edx,8
        mov [edi+4],ebx

        mov [edi+8],ecx
        mov [edi+12],edx

        add esi,BYTE 16
        add edi,BYTE 16

        dec ebp
        jnz .L4                 

    ; check tail
    pop ecx
    and ecx,BYTE 11b
    jz .L6

.L5 ; tail loop
    mov edx,[esi]
    rol edx,8
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L5

.L6 pop ebp
    jmp _x86return

	


_ConvertX86p32_32BGRA888:

    ; check short
    cmp ecx,BYTE 32
    ja .L3

.L1 ; short loop
    mov edx,[esi]
    bswap edx
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L1
.L2
    jmp _x86return

.L3 ; save ebp
    push ebp

    ; unroll four times
    mov ebp,ecx
    shr ebp,2
    
    ; save count
    push ecx

.L4     mov eax,[esi]
        mov ebx,[esi+4]

        mov ecx,[esi+8]
        mov edx,[esi+12]

        bswap eax

        bswap ebx

        bswap ecx

        bswap edx

        mov [edi+0],eax
        mov [edi+4],ebx

        mov [edi+8],ecx
        mov [edi+12],edx

        add esi,BYTE 16
        add edi,BYTE 16

        dec ebp
        jnz .L4                 

    ; check tail
    pop ecx
    and ecx,BYTE 11b
    jz .L6

.L5 ; tail loop
    mov edx,[esi]
    bswap edx
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L5

.L6 pop ebp
    jmp _x86return


	
	
;; 32 bit RGB 888 to 24 BIT RGB 888

_ConvertX86p32_24RGB888:

	; check short
	cmp ecx,BYTE 32
	ja .L3

.L1	; short loop
	mov al,[esi]
	mov bl,[esi+1]
	mov dl,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jnz .L1
.L2 
	jmp _x86return

.L3	;	 head
	mov edx,edi
	and edx,BYTE 11b
	jz .L4
	mov al,[esi]
	mov bl,[esi+1]
	mov dl,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jmp SHORT .L3

.L4 ; unroll 4 times
	push ebp
	mov ebp,ecx
	shr ebp,2

    ; save count
	push ecx

.L5     mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]

        shl eax,8                       ;                        eax = [R][G][B][.]
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]

        shl ebx,8                       ;                        ebx = [r][g][b][.]
        mov al,[esi+4]                  ;                        eax = [R][G][B][b]

        ror eax,8                       ;                        eax = [b][R][G][B] (done)
        mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]

        mov [edi],eax
        add edi,BYTE 3*4

        shl ecx,8                       ;                        ecx = [r][g][b][.]
        mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]

        rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
        mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)

        mov [edi+4-3*4],ebx
        add esi,BYTE 4*4
        
        mov [edi+8-3*4],ecx
        dec ebp

        jnz .L5

    ; check tail
	pop ecx
	and ecx,BYTE 11b
	jz .L7

.L6 ; tail loop
	mov al,[esi]
	mov bl,[esi+1]
	mov dl,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jnz .L6

.L7	pop ebp
	jmp _x86return




;; 32 bit RGB 888 to 24 bit BGR 888

_ConvertX86p32_24BGR888:

	; check short
	cmp ecx,BYTE 32
	ja .L3

	
.L1	; short loop
	mov dl,[esi]
	mov bl,[esi+1]
	mov al,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jnz .L1
.L2
	jmp _x86return

.L3 ; head
	mov edx,edi
	and edx,BYTE 11b
	jz .L4
	mov dl,[esi]
	mov bl,[esi+1]
	mov al,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jmp SHORT .L3

.L4	; unroll 4 times
	push ebp
	mov ebp,ecx
	shr ebp,2

	; save count
	push ecx

.L5     
	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
        
        bswap eax                       ;                        eax = [B][G][R][A]

        bswap ebx                       ;                        ebx = [b][g][r][a]

        mov al,[esi+4+2]                ;                        eax = [B][G][R][r] 
        mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]

        ror eax,8                       ;                        eax = [r][B][G][R] (done)
        mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]

        ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
        mov [edi],eax
    
        mov [edi+4],ebx
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
        
        bswap ecx                       ;                        ecx = [b][g][r][a]
        
        mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
        add esi,BYTE 4*4

        mov [edi+8],ecx
        add edi,BYTE 3*4

        dec ebp
        jnz .L5

	; check tail
	pop ecx
	and ecx,BYTE 11b
	jz .L7

.L6	; tail loop
	mov dl,[esi]
	mov bl,[esi+1]
	mov al,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jnz .L6

.L7 
	pop ebp
	jmp _x86return
 

	
		
;; 32 bit RGB 888 to 16 BIT RGB 565 

_ConvertX86p32_16RGB565:
	; check short
	cmp ecx,BYTE 16
	ja .L3

.L1 ; short loop
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
        and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx
	jnz .L1

.L2:				; End of short loop
	jmp _x86return

	
.L3	; head
	mov ebx,edi
	and ebx,BYTE 11b
	jz .L4
	
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx

.L4:	 
    ; save count
	push ecx

    ; unroll twice
	shr ecx,1
    
    ; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

    ; negative counter 
	neg ecx
	jmp SHORT .L6

.L5:	    
	mov [edi+ecx*4-4],eax
.L6:	
	mov eax,[esi+ecx*8]

        shr ah,2
        mov ebx,[esi+ecx*8+4]

        shr eax,3
        mov edx,[esi+ecx*8+4]

        shr bh,2
        mov dl,[esi+ecx*8+2]

        shl ebx,13
        and eax,000007FFh
        
        shl edx,8
        and ebx,07FF0000h

        and edx,0F800F800h
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax

    ; tail
	pop ecx
	test cl,1
	jz .L7
	
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2

.L7:	
	jmp _x86return



	
;; 32 bit RGB 888 to 16 BIT BGR 565 

_ConvertX86p32_16BGR565:
	
	; check short
	cmp ecx,BYTE 16
	ja .L3

.L1	; short loop
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx
	jnz .L1
.L2
	jmp _x86return

.L3	; head
	mov ebx,edi
	and ebx,BYTE 11b
	jz .L4   
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx

.L4	; save count
	push ecx

	; unroll twice
	shr ecx,1
    
	; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

	; negative count
	neg ecx
	jmp SHORT .L6

.L5     
	mov [edi+ecx*4-4],eax            
.L6     
	mov edx,[esi+ecx*8+4]

        mov bh,[esi+ecx*8+4]                       
        mov ah,[esi+ecx*8]                       

        shr bh,3
        mov al,[esi+ecx*8+1]             

        shr ah,3
        mov bl,[esi+ecx*8+5]           

        shl eax,3
        mov dl,[esi+ecx*8+2]

        shl ebx,19
        and eax,0000FFE0h              
                
        shr edx,3
        and ebx,0FFE00000h             
        
        and edx,001F001Fh               
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax            

	; tail
	pop ecx
	and ecx,BYTE 1
	jz .L7
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2

.L7 
	jmp _x86return


	
	
;; 32 BIT RGB TO 16 BIT RGB 555

_ConvertX86p32_16RGB555:

	; check short
	cmp ecx,BYTE 16
	ja .L3

.L1	; short loop
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx
	jnz .L1
.L2
	jmp _x86return

.L3	; head
	mov ebx,edi
        and ebx,BYTE 11b
	jz .L4   
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx

.L4	; save count
	push ecx

	; unroll twice
	shr ecx,1
    
	; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

	; negative counter 
	neg ecx
	jmp SHORT .L6

.L5     
	mov [edi+ecx*4-4],eax
.L6     
	mov eax,[esi+ecx*8]

        shr ah,3
        mov ebx,[esi+ecx*8+4]

        shr eax,3
        mov edx,[esi+ecx*8+4]

        shr bh,3
        mov dl,[esi+ecx*8+2]

        shl ebx,13
        and eax,000007FFh
        
        shl edx,7
        and ebx,07FF0000h

        and edx,07C007C00h
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax

	; tail
	pop ecx
	and ecx,BYTE 1
	jz .L7
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2

.L7
	jmp _x86return




;; 32 BIT RGB TO 16 BIT BGR 555
	
_ConvertX86p32_16BGR555:
	
	; check short
	cmp ecx,BYTE 16
	ja .L3


.L1	; short loop
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx
	jnz .L1
.L2 
	jmp _x86return

.L3	; head
	mov ebx,edi
        and ebx,BYTE 11b
	jz .L4   
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx

.L4	; save count
	push ecx

	; unroll twice
	shr ecx,1
    
	; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

	; negative counter 
	neg ecx
	jmp SHORT .L6

.L5     
	mov [edi+ecx*4-4],eax            
.L6     
	mov edx,[esi+ecx*8+4]

        mov bh,[esi+ecx*8+4]                       
        mov ah,[esi+ecx*8]                       

        shr bh,3
        mov al,[esi+ecx*8+1]             

        shr ah,3
        mov bl,[esi+ecx*8+5]           

        shl eax,2
        mov dl,[esi+ecx*8+2]

        shl ebx,18
        and eax,00007FE0h              
                
        shr edx,3
        and ebx,07FE00000h             
        
        and edx,001F001Fh               
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax            

	; tail
	pop ecx
	and ecx,BYTE 1
	jz .L7
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2

.L7
	jmp _x86return




	
;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
;; This routine writes FOUR pixels at once (dword) and then, if they exist
;; the trailing three pixels
_ConvertX86p32_8RGB332:

	
.L_ALIGNED
	push ecx

	shr ecx,2		; We will draw 4 pixels at once
	jnz .L1
	
	jmp .L2			; short jump out of range :(
	
.L1:
	mov eax,[esi]		; first pair of pixels
	mov edx,[esi+4]

	shr dl,6
	mov ebx,eax

	shr al,6
	and ah,0e0h

	shr ebx,16
	and dh,0e0h
	
	shr ah,3
	and bl,0e0h

	shr dh,3
	
	or al,bl
	
	mov ebx,edx	
	or al,ah
	
	shr ebx,16
	or dl,dh

	and bl,0e0h
	
	or dl,bl

	mov ah,dl

	
		
	mov ebx,[esi+8]		; second pair of pixels

	mov edx,ebx
	and bh,0e0h

	shr bl,6
	and edx,0e00000h

	shr edx,16

	shr bh,3

	ror eax,16
	or bl,dl

	mov edx,[esi+12]
	or bl,bh
	
	mov al,bl

	mov ebx,edx
	and dh,0e0h

	shr dl,6
	and ebx,0e00000h
	
	shr dh,3
	mov ah,dl

	shr ebx,16
	or ah,dh

	or ah,bl

	rol eax,16
	add esi,BYTE 16
			
	mov [edi],eax	
	add edi,BYTE 4
	
	dec ecx
	jz .L2			; L1 out of range for short jump :(
	
	jmp .L1
.L2:
	
	pop ecx
	and ecx,BYTE 3		; mask out number of pixels to draw
	
	jz .L4			; Nothing to do anymore

.L3:
	mov eax,[esi]		; single pixel conversion for trailing pixels

        mov ebx,eax

        shr al,6
        and ah,0e0h

        shr ebx,16

        shr ah,3
        and bl,0e0h

        or al,ah
        or al,bl

        mov [edi],al

        inc edi
        add esi,BYTE 4

	dec ecx
	jnz .L3
	
.L4:	
	jmp _x86return

%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif