view src/hermes/x86p_32.asm @ 4139:568c9b3c0167 SDL-1.2

* Added configure option --enable-screensaver, to allow enabling the screensaver by default. * Use XResetScreenSaver() instead of disabling screensaver entirely. Full discussion summary from Erik on the SDL mailing list: Current behaviour ================= SDL changes the user's display power management settings without permission from the user and without telling the user. The interface that it uses to do so is DPMSDisable/DPMSEnable, which should only ever be used by configuration utilities like KControl, never by normal application programs, let alone by the libraries that they use. Using an interface that is not at all intended for what SDL tries to achieve means that it will not work as it should. Firstly, the power management is completely disabled during the whole lifetime of the SDL program, not only when it should be. Secondly, it makes SDL non-reentrant, meaning that things will break when multiple SDL programs are clients of the same X server simultaneously. Thirdly, no cleanup mechanism ensures that the setting is restored if the client does not do that (for example if it crashes). In addition to that, this interface is broken on xorg, [http://bugs.freedesktop.org/show_bug.cgi?id=13962], so what SDL tries to do does not work at all on that implementation of the X Window System. (The reason that the DPMSEnable works in KControl is that it calls DPMSSetTimeout immediately after, [http://websvn.kde.org/tags/KDE/3.5.9/kdebase/kcontrol/energy/energy.cpp?annotate=774532#l343]). The problems that the current behaviour causes ============================================== 1. Information leak. When the user is away, someone might see what the user has on the display when the user counts on the screensaver preventing this. This does not even require physical access to the workstation, it is enough to see it from a distance. 2. Draining battery. An SDL program that runs on a laptop will quickly drain the battery while the user is away. The system will soon shut down and require recharging before being usable again, while it should in fact have consumed very little energy if the user's settings would have been obeyed. 3. Wasting energy. Even if battery issues are not considered, energy as such is wasted. 4. Display wear. The display may be worn out. The problems that the current behaviour tries to solve ====================================================== 1. Preventing screensaver while playing movies. Many SDL applications are media players. They have reasons to prevent screensavers from being activated while a movie is being played. When a user clicks on the play button it can be interpreted as saying "play this movie, but do not turn off the display while playing it, because I will watch it even though I do not interact with the system". 2. Preventing screensaver when some input bypasses X. Sometimes SDL uses input from another source than the X server, so that the X server is bypassed. This obviously breaks the screensaver handling. SDL tries to work around that. 3. Preventing screensaver when all input bypasses X. There is something called Direct Graphics Access mode, where a program takes control of both the display and the input devices from the X server. This obviously means that the X server can not handle the screensaver alone, since screensaver handling depends on input handling. SDL does not do what it should to help the X server to handle the screensaver. Nor does SDL take care of screeensaver handling itself. SDL simply disables the screensaver completely. How the problems should be solved ================================= The correct way for an application program to prevent the screensaver under X is to call XResetScreenSaver. This was recently discovered and implemented by the mplayer developers, [http://svn.mplayerhq.hu/mplayer?view=rev&revision=25637]. SDL needs to wrap this in an API call (SDL_ResetScreenSaver) and implement it for the other video targets (if they do not have a corresponding call, SDL should do what it takes on that particular target, for example sending fake key events). 1. When a movie is played, the player should reset the screensaver when the animation is advanced to a new frame. The same applies to anything similar, like slideshows. 2. When the X server is handling input, it must handle all input (keyboards, mice, gamepads, ...). This is necessary, not only to be able to handle the screensaver, but also so that it can send the events to the correct (the currently active) client. If there is an input device that the X server can not handle for some reason (such as lack of Plug and Play capability), the program that handles the device as a workaround must simulate what would happen if the X server would have handled the device, by calling XResetScreenSaver when input is received from the device. 3. When the X server is not handling the input, it depends on the program that does to call XResetScreenSaver whenever an input event occurs. Alternatively the program must handle the screensaver countdown internally and call XActivateScreenSaver.
author Sam Lantinga <slouken@libsdl.org>
date Fri, 29 Feb 2008 13:55:44 +0000
parents 540466e900db
children
line wrap: on
line source

;
; x86 format converters for HERMES
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
; 
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions		
;
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
; 

BITS 32

%include "common.inc"

SDL_FUNC _ConvertX86p32_32BGR888
SDL_FUNC _ConvertX86p32_32RGBA888
SDL_FUNC _ConvertX86p32_32BGRA888
SDL_FUNC _ConvertX86p32_24RGB888	
SDL_FUNC _ConvertX86p32_24BGR888
SDL_FUNC _ConvertX86p32_16RGB565
SDL_FUNC _ConvertX86p32_16BGR565
SDL_FUNC _ConvertX86p32_16RGB555
SDL_FUNC _ConvertX86p32_16BGR555
SDL_FUNC _ConvertX86p32_8RGB332

SECTION .text

;; _Convert_*
;; Paramters:	
;;   ESI = source 
;;   EDI = dest
;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
;; Destroys:
;;   EAX, EBX, EDX


_ConvertX86p32_32BGR888:

    ; check short
    cmp ecx,BYTE 32
    ja .L3

.L1 ; short loop
    mov edx,[esi]
    bswap edx
    ror edx,8
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L1
.L2
    retn

.L3 ; save ebp
    push ebp

    ; unroll four times
    mov ebp,ecx
    shr ebp,2
    
    ; save count
    push ecx

.L4     mov eax,[esi]
        mov ebx,[esi+4]

        bswap eax

        bswap ebx

        ror eax,8
        mov ecx,[esi+8]

        ror ebx,8
        mov edx,[esi+12]

        bswap ecx

        bswap edx

        ror ecx,8
        mov [edi+0],eax

        ror edx,8
        mov [edi+4],ebx

        mov [edi+8],ecx
        mov [edi+12],edx

        add esi,BYTE 16
        add edi,BYTE 16

        dec ebp
        jnz .L4                 

    ; check tail
    pop ecx
    and ecx,BYTE 11b
    jz .L6

.L5 ; tail loop
    mov edx,[esi]
    bswap edx
    ror edx,8
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L5

.L6 pop ebp
    retn
	

	
		
_ConvertX86p32_32RGBA888:
	
    ; check short
    cmp ecx,BYTE 32
    ja .L3

.L1 ; short loop
    mov edx,[esi]
    rol edx,8
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L1
.L2
    retn

.L3 ; save ebp
    push ebp

    ; unroll four times
    mov ebp,ecx
    shr ebp,2
    
    ; save count
    push ecx

.L4     mov eax,[esi]
        mov ebx,[esi+4]

        rol eax,8
        mov ecx,[esi+8]

        rol ebx,8
        mov edx,[esi+12]

        rol ecx,8
        mov [edi+0],eax

        rol edx,8
        mov [edi+4],ebx

        mov [edi+8],ecx
        mov [edi+12],edx

        add esi,BYTE 16
        add edi,BYTE 16

        dec ebp
        jnz .L4                 

    ; check tail
    pop ecx
    and ecx,BYTE 11b
    jz .L6

.L5 ; tail loop
    mov edx,[esi]
    rol edx,8
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L5

.L6 pop ebp
    retn

	


_ConvertX86p32_32BGRA888:

    ; check short
    cmp ecx,BYTE 32
    ja .L3

.L1 ; short loop
    mov edx,[esi]
    bswap edx
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L1
.L2
    retn

.L3 ; save ebp
    push ebp

    ; unroll four times
    mov ebp,ecx
    shr ebp,2
    
    ; save count
    push ecx

.L4     mov eax,[esi]
        mov ebx,[esi+4]

        mov ecx,[esi+8]
        mov edx,[esi+12]

        bswap eax

        bswap ebx

        bswap ecx

        bswap edx

        mov [edi+0],eax
        mov [edi+4],ebx

        mov [edi+8],ecx
        mov [edi+12],edx

        add esi,BYTE 16
        add edi,BYTE 16

        dec ebp
        jnz .L4                 

    ; check tail
    pop ecx
    and ecx,BYTE 11b
    jz .L6

.L5 ; tail loop
    mov edx,[esi]
    bswap edx
    mov [edi],edx
    add esi,BYTE 4
    add edi,BYTE 4
    dec ecx
    jnz .L5

.L6 pop ebp
    retn


	
	
;; 32 bit RGB 888 to 24 BIT RGB 888

_ConvertX86p32_24RGB888:

	; check short
	cmp ecx,BYTE 32
	ja .L3

.L1	; short loop
	mov al,[esi]
	mov bl,[esi+1]
	mov dl,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jnz .L1
.L2 
	retn

.L3	;	 head
	mov edx,edi
	and edx,BYTE 11b
	jz .L4
	mov al,[esi]
	mov bl,[esi+1]
	mov dl,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jmp SHORT .L3

.L4 ; unroll 4 times
	push ebp
	mov ebp,ecx
	shr ebp,2

    ; save count
	push ecx

.L5     mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]

        shl eax,8                       ;                        eax = [R][G][B][.]
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]

        shl ebx,8                       ;                        ebx = [r][g][b][.]
        mov al,[esi+4]                  ;                        eax = [R][G][B][b]

        ror eax,8                       ;                        eax = [b][R][G][B] (done)
        mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]

        mov [edi],eax
        add edi,BYTE 3*4

        shl ecx,8                       ;                        ecx = [r][g][b][.]
        mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]

        rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
        mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)

        mov [edi+4-3*4],ebx
        add esi,BYTE 4*4
        
        mov [edi+8-3*4],ecx
        dec ebp

        jnz .L5

    ; check tail
	pop ecx
	and ecx,BYTE 11b
	jz .L7

.L6 ; tail loop
	mov al,[esi]
	mov bl,[esi+1]
	mov dl,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jnz .L6

.L7	pop ebp
	retn




;; 32 bit RGB 888 to 24 bit BGR 888

_ConvertX86p32_24BGR888:

	; check short
	cmp ecx,BYTE 32
	ja .L3

	
.L1	; short loop
	mov dl,[esi]
	mov bl,[esi+1]
	mov al,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jnz .L1
.L2
	retn

.L3 ; head
	mov edx,edi
	and edx,BYTE 11b
	jz .L4
	mov dl,[esi]
	mov bl,[esi+1]
	mov al,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jmp SHORT .L3

.L4	; unroll 4 times
	push ebp
	mov ebp,ecx
	shr ebp,2

	; save count
	push ecx

.L5     
	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
        
        bswap eax                       ;                        eax = [B][G][R][A]

        bswap ebx                       ;                        ebx = [b][g][r][a]

        mov al,[esi+4+2]                ;                        eax = [B][G][R][r] 
        mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]

        ror eax,8                       ;                        eax = [r][B][G][R] (done)
        mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]

        ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
        mov [edi],eax
    
        mov [edi+4],ebx
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
        
        bswap ecx                       ;                        ecx = [b][g][r][a]
        
        mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
        add esi,BYTE 4*4

        mov [edi+8],ecx
        add edi,BYTE 3*4

        dec ebp
        jnz .L5

	; check tail
	pop ecx
	and ecx,BYTE 11b
	jz .L7

.L6	; tail loop
	mov dl,[esi]
	mov bl,[esi+1]
	mov al,[esi+2]
	mov [edi],al
	mov [edi+1],bl
	mov [edi+2],dl
	add esi,BYTE 4
	add edi,BYTE 3
	dec ecx
	jnz .L6

.L7 
	pop ebp
	retn
 

	
		
;; 32 bit RGB 888 to 16 BIT RGB 565 

_ConvertX86p32_16RGB565:
	; check short
	cmp ecx,BYTE 16
	ja .L3

.L1 ; short loop
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
        and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx
	jnz .L1

.L2:				; End of short loop
	retn

	
.L3	; head
	mov ebx,edi
	and ebx,BYTE 11b
	jz .L4
	
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx

.L4:	 
    ; save count
	push ecx

    ; unroll twice
	shr ecx,1
    
    ; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

    ; negative counter 
	neg ecx
	jmp SHORT .L6

.L5:	    
	mov [edi+ecx*4-4],eax
.L6:	
	mov eax,[esi+ecx*8]

        shr ah,2
        mov ebx,[esi+ecx*8+4]

        shr eax,3
        mov edx,[esi+ecx*8+4]

        shr bh,2
        mov dl,[esi+ecx*8+2]

        shl ebx,13
        and eax,000007FFh
        
        shl edx,8
        and ebx,07FF0000h

        and edx,0F800F800h
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax

    ; tail
	pop ecx
	test cl,1
	jz .L7
	
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2

.L7:	
	retn



	
;; 32 bit RGB 888 to 16 BIT BGR 565 

_ConvertX86p32_16BGR565:
	
	; check short
	cmp ecx,BYTE 16
	ja .L3

.L1	; short loop
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx
	jnz .L1
.L2
	retn

.L3	; head
	mov ebx,edi
	and ebx,BYTE 11b
	jz .L4   
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx

.L4	; save count
	push ecx

	; unroll twice
	shr ecx,1
    
	; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

	; negative count
	neg ecx
	jmp SHORT .L6

.L5     
	mov [edi+ecx*4-4],eax            
.L6     
	mov edx,[esi+ecx*8+4]

        mov bh,[esi+ecx*8+4]                       
        mov ah,[esi+ecx*8]                       

        shr bh,3
        mov al,[esi+ecx*8+1]             

        shr ah,3
        mov bl,[esi+ecx*8+5]           

        shl eax,3
        mov dl,[esi+ecx*8+2]

        shl ebx,19
        and eax,0000FFE0h              
                
        shr edx,3
        and ebx,0FFE00000h             
        
        and edx,001F001Fh               
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax            

	; tail
	pop ecx
	and ecx,BYTE 1
	jz .L7
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2

.L7 
	retn


	
	
;; 32 BIT RGB TO 16 BIT RGB 555

_ConvertX86p32_16RGB555:

	; check short
	cmp ecx,BYTE 16
	ja .L3

.L1	; short loop
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx
	jnz .L1
.L2
	retn

.L3	; head
	mov ebx,edi
        and ebx,BYTE 11b
	jz .L4   
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx

.L4	; save count
	push ecx

	; unroll twice
	shr ecx,1
    
	; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

	; negative counter 
	neg ecx
	jmp SHORT .L6

.L5     
	mov [edi+ecx*4-4],eax
.L6     
	mov eax,[esi+ecx*8]

        shr ah,3
        mov ebx,[esi+ecx*8+4]

        shr eax,3
        mov edx,[esi+ecx*8+4]

        shr bh,3
        mov dl,[esi+ecx*8+2]

        shl ebx,13
        and eax,000007FFh
        
        shl edx,7
        and ebx,07FF0000h

        and edx,07C007C00h
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax

	; tail
	pop ecx
	and ecx,BYTE 1
	jz .L7
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2

.L7
	retn




;; 32 BIT RGB TO 16 BIT BGR 555
	
_ConvertX86p32_16BGR555:
	
	; check short
	cmp ecx,BYTE 16
	ja .L3


.L1	; short loop
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx
	jnz .L1
.L2 
	retn

.L3	; head
	mov ebx,edi
        and ebx,BYTE 11b
	jz .L4   
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2
	dec ecx

.L4	; save count
	push ecx

	; unroll twice
	shr ecx,1
    
	; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

	; negative counter 
	neg ecx
	jmp SHORT .L6

.L5     
	mov [edi+ecx*4-4],eax            
.L6     
	mov edx,[esi+ecx*8+4]

        mov bh,[esi+ecx*8+4]                       
        mov ah,[esi+ecx*8]                       

        shr bh,3
        mov al,[esi+ecx*8+1]             

        shr ah,3
        mov bl,[esi+ecx*8+5]           

        shl eax,2
        mov dl,[esi+ecx*8+2]

        shl ebx,18
        and eax,00007FE0h              
                
        shr edx,3
        and ebx,07FE00000h             
        
        and edx,001F001Fh               
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax            

	; tail
	pop ecx
	and ecx,BYTE 1
	jz .L7
	mov ah,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov bl,[esi+2]    ; red
	shr ah,3
	and al,11111000b
	shl eax,2
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,BYTE 4
	add edi,BYTE 2

.L7
	retn




	
;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
;; This routine writes FOUR pixels at once (dword) and then, if they exist
;; the trailing three pixels
_ConvertX86p32_8RGB332:

	
.L_ALIGNED
	push ecx

	shr ecx,2		; We will draw 4 pixels at once
	jnz .L1
	
	jmp .L2			; short jump out of range :(
	
.L1:
	mov eax,[esi]		; first pair of pixels
	mov edx,[esi+4]

	shr dl,6
	mov ebx,eax

	shr al,6
	and ah,0e0h

	shr ebx,16
	and dh,0e0h
	
	shr ah,3
	and bl,0e0h

	shr dh,3
	
	or al,bl
	
	mov ebx,edx	
	or al,ah
	
	shr ebx,16
	or dl,dh

	and bl,0e0h
	
	or dl,bl

	mov ah,dl

	
		
	mov ebx,[esi+8]		; second pair of pixels

	mov edx,ebx
	and bh,0e0h

	shr bl,6
	and edx,0e00000h

	shr edx,16

	shr bh,3

	ror eax,16
	or bl,dl

	mov edx,[esi+12]
	or bl,bh
	
	mov al,bl

	mov ebx,edx
	and dh,0e0h

	shr dl,6
	and ebx,0e00000h
	
	shr dh,3
	mov ah,dl

	shr ebx,16
	or ah,dh

	or ah,bl

	rol eax,16
	add esi,BYTE 16
			
	mov [edi],eax	
	add edi,BYTE 4
	
	dec ecx
	jz .L2			; L1 out of range for short jump :(
	
	jmp .L1
.L2:
	
	pop ecx
	and ecx,BYTE 3		; mask out number of pixels to draw
	
	jz .L4			; Nothing to do anymore

.L3:
	mov eax,[esi]		; single pixel conversion for trailing pixels

        mov ebx,eax

        shr al,6
        and ah,0e0h

        shr ebx,16

        shr ah,3
        and bl,0e0h

        or al,ah
        or al,bl

        mov [edi],al

        inc edi
        add esi,BYTE 4

	dec ecx
	jnz .L3
	
.L4:	
	retn

%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif