Mercurial > sdl-ios-xcode
view src/hermes/mmxp2_32.asm @ 876:9e84d106ec19
(Said Max Horn on the SDL mailing list...)
Hi folks,
based on Eric Wing's patch, I created the attached patch which fixes
the OpenGL coordinate inversion bug in SDL. It works fine over here on
10.3 with Ryan's test program (which I also attached).
There is another change in it: I removed the "- 1" in the two lines
using CGDisplayPixelsHigh()... while I understand from a logical point
of view why they *should* be correct, I checked the actual values
computed that way, and they were off-by-one. After removing the " - 1",
the returned mouse coordinates are correct. I checked this by moving
the mouse to the screen top/bottom in fullscreen mode, BTW. With the
change, the proper values 0 and 479 are returned (in 640x480 mode).
Sam, you may still want to test on 10.1, it's very simple using Ryan's
minimal test code :-)
Cheers,
Max
(Here is the reproduction case for revision history's sake...)
/*
* To compile:
* gcc -o test test.c `sdl-config --cflags` `sdl-config --libs` -framework OpenGL
*
* --ryan.
*/
#include <stdio.h>
#include "SDL.h"
#include "SDL_opengl.h"
int main(int argc, char **argv)
{
Uint32 flags = SDL_OPENGL /* | SDL_FULLSCREEN */;
SDL_Surface *screen;
SDL_Event event;
int done = 0;
GLfloat ratio;
SDL_Init(SDL_INIT_VIDEO);
SDL_ShowCursor(0);
if ((argv[1]) && (strcmp(argv[1], "--grab") == 0))
SDL_WM_GrabInput(SDL_GRAB_ON);
screen = SDL_SetVideoMode(640, 480, 0, flags);
if (!screen)
return(42);
ratio = ((GLfloat) screen->w) / ((GLfloat) screen->h);
glClearColor( 0.0f, 0.0f, 0.0f, 0.0f );
glClearDepth( 1.0f );
glEnable( GL_DEPTH_TEST );
glDepthFunc( GL_LEQUAL );
glViewport( 0, 0, screen->w, screen->h);
glMatrixMode( GL_PROJECTION );
glLoadIdentity();
gluPerspective( 45.0f, ratio, 0.1f, 100.0f );
glMatrixMode( GL_MODELVIEW );
glLoadIdentity();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
SDL_GL_SwapBuffers();
// eh, close enough.
#define MAX_X 6.12
#define MAX_Y 4.50
while (!done)
{
int x, y;
GLfloat glx, gly;
if (!SDL_WaitEvent(&event))
break;
switch (event.type)
{
case SDL_KEYUP:
if (event.key.keysym.sym == SDLK_ESCAPE)
done = 1;
break;
}
SDL_GetMouseState(&x, &y);
glx = ((((GLfloat) x) / ((GLfloat) screen->w)) - 0.5f) * MAX_X;
gly = ((((GLfloat) y) / ((GLfloat) screen->h)) - 0.5f) * MAX_Y;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
glTranslatef(glx,-gly,-6.0f);
glBegin(GL_TRIANGLES);
glColor3f(1,0,0); glVertex3f( 0.00f, 0.25f, 0.00f);
glColor3f(0,1,0); glVertex3f(-0.25f, -0.25f, 0.00f);
glColor3f(0,0,1); glVertex3f( 0.25f, -0.25f, 0.00f);
glEnd();
SDL_GL_SwapBuffers();
}
SDL_Quit();
return(0);
}
/* end of test.c ... */
author | Ryan C. Gordon <icculus@icculus.org> |
---|---|
date | Mon, 22 Mar 2004 09:38:20 +0000 |
parents | 77b6110c797d |
children | da33b7e6d181 |
line wrap: on
line source
; ; pII-optimised MMX format converters for HERMES ; Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk) ; and (c) 1999 Jonathan Matthew (jmatthew@uq.net.au) ; This source code is licensed under the GNU LGPL ; ; Please refer to the file COPYING.LIB contained in the distribution for ; licensing conditions ; ; COPYRIGHT NOTICE ; ; This file partly contains code that is (c) Intel Corporation, specifically ; the mode detection routine, and the converter to 15 bit (8 pixel ; conversion routine from the mmx programming tutorial pages). ; ; ; These routines aren't exactly pII optimised - it's just that as they ; are, they're terrible on p5 MMXs, but less so on pIIs. Someone needs to ; optimise them for p5 MMXs.. BITS 32 GLOBAL _ConvertMMXpII32_24RGB888 GLOBAL _ConvertMMXpII32_16RGB565 GLOBAL _ConvertMMXpII32_16BGR565 GLOBAL _ConvertMMXpII32_16RGB555 GLOBAL _ConvertMMXpII32_16BGR555 EXTERN _mmxreturn SECTION .data ALIGN 8 ;; Constants for conversion routines mmx32_rgb888_mask dd 00ffffffh,00ffffffh mmx32_rgb565_b dd 000000f8h, 000000f8h mmx32_rgb565_g dd 0000fc00h, 0000fc00h mmx32_rgb565_r dd 00f80000h, 00f80000h mmx32_rgb555_rb dd 00f800f8h,00f800f8h mmx32_rgb555_g dd 0000f800h,0000f800h mmx32_rgb555_mul dd 20000008h,20000008h mmx32_bgr555_mul dd 00082000h,00082000h SECTION .text _ConvertMMXpII32_24RGB888: ; set up mm6 as the mask, mm7 as zero movq mm6, qword [mmx32_rgb888_mask] pxor mm7, mm7 mov edx, ecx ; save ecx and ecx, 0fffffffch ; clear lower two bits jnz .L1 jmp .L2 .L1: movq mm0, [esi] ; A R G B a r g b pand mm0, mm6 ; 0 R G B 0 r g b movq mm1, [esi+8] ; A R G B a r g b pand mm1, mm6 ; 0 R G B 0 r g b movq mm2, mm0 ; 0 R G B 0 r g b punpckhdq mm2, mm7 ; 0 0 0 0 0 R G B punpckldq mm0, mm7 ; 0 0 0 0 0 r g b psllq mm2, 24 ; 0 0 R G B 0 0 0 por mm0, mm2 ; 0 0 R G B r g b movq mm3, mm1 ; 0 R G B 0 r g b psllq mm3, 48 ; g b 0 0 0 0 0 0 por mm0, mm3 ; g b R G B r g b movq mm4, mm1 ; 0 R G B 0 r g b punpckhdq mm4, mm7 ; 0 0 0 0 0 R G B punpckldq mm1, mm7 ; 0 0 0 0 0 r g b psrlq mm1, 16 ; 0 0 0 R G B 0 r psllq mm4, 8 ; 0 0 0 0 R G B 0 por mm1, mm4 ; 0 0 0 0 R G B r movq [edi], mm0 add esi, BYTE 16 movd [edi+8], mm1 add edi, BYTE 12 sub ecx, BYTE 4 jnz .L1 .L2: mov ecx, edx and ecx, BYTE 3 jz .L4 .L3: mov al, [esi] mov bl, [esi+1] mov dl, [esi+2] mov [edi], al mov [edi+1], bl mov [edi+2], dl add esi, BYTE 4 add edi, BYTE 3 dec ecx jnz .L3 .L4: jmp _mmxreturn _ConvertMMXpII32_16RGB565: ; set up masks movq mm5, [mmx32_rgb565_b] movq mm6, [mmx32_rgb565_g] movq mm7, [mmx32_rgb565_r] mov edx, ecx shr ecx, 2 jnz .L1 jmp .L2 ; not necessary at the moment, but doesn't hurt (much) .L1: movq mm0, [esi] ; argb movq mm1, mm0 ; argb pand mm0, mm6 ; 00g0 movq mm3, mm1 ; argb pand mm1, mm5 ; 000b pand mm3, mm7 ; 0r00 pslld mm1, 2 ; 0 0 000000bb bbb00000 por mm0, mm1 ; 0 0 ggggggbb bbb00000 psrld mm0, 5 ; 0 0 00000ggg gggbbbbb movq mm4, [esi+8] ; argb movq mm2, mm4 ; argb pand mm4, mm6 ; 00g0 movq mm1, mm2 ; argb pand mm2, mm5 ; 000b pand mm1, mm7 ; 0r00 pslld mm2, 2 ; 0 0 000000bb bbb00000 por mm4, mm2 ; 0 0 ggggggbb bbb00000 psrld mm4, 5 ; 0 0 00000ggg gggbbbbb packuswb mm3, mm1 ; R 0 r 0 packssdw mm0, mm4 ; as above.. ish por mm0, mm3 ; done. movq [edi], mm0 add esi, 16 add edi, 8 dec ecx jnz .L1 .L2: mov ecx, edx and ecx, BYTE 3 jz .L4 .L3: mov al, [esi] mov bh, [esi+1] mov ah, [esi+2] shr al, 3 and eax, 0F81Fh ; BYTE? shr ebx, 5 and ebx, 07E0h ; BYTE? add eax, ebx mov [edi], al mov [edi+1], ah add esi, BYTE 4 add edi, BYTE 2 dec ecx jnz .L3 .L4: jmp _mmxreturn _ConvertMMXpII32_16BGR565: movq mm5, [mmx32_rgb565_r] movq mm6, [mmx32_rgb565_g] movq mm7, [mmx32_rgb565_b] mov edx, ecx shr ecx, 2 jnz .L1 jmp .L2 .L1: movq mm0, [esi] ; a r g b movq mm1, mm0 ; a r g b pand mm0, mm6 ; 0 0 g 0 movq mm3, mm1 ; a r g b pand mm1, mm5 ; 0 r 0 0 pand mm3, mm7 ; 0 0 0 b psllq mm3, 16 ; 0 b 0 0 psrld mm1, 14 ; 0 0 000000rr rrr00000 por mm0, mm1 ; 0 0 ggggggrr rrr00000 psrld mm0, 5 ; 0 0 00000ggg gggrrrrr movq mm4, [esi+8] ; a r g b movq mm2, mm4 ; a r g b pand mm4, mm6 ; 0 0 g 0 movq mm1, mm2 ; a r g b pand mm2, mm5 ; 0 r 0 0 pand mm1, mm7 ; 0 0 0 b psllq mm1, 16 ; 0 b 0 0 psrld mm2, 14 ; 0 0 000000rr rrr00000 por mm4, mm2 ; 0 0 ggggggrr rrr00000 psrld mm4, 5 ; 0 0 00000ggg gggrrrrr packuswb mm3, mm1 ; BBBBB000 00000000 bbbbb000 00000000 packssdw mm0, mm4 ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR por mm0, mm3 ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr movq [edi], mm0 add esi, BYTE 16 add edi, BYTE 8 dec ecx jnz .L1 .L2: and edx, BYTE 3 jz .L4 .L3: mov al, [esi+2] mov bh, [esi+1] mov ah, [esi] shr al, 3 and eax, 0F81Fh ; BYTE ? shr ebx, 5 and ebx, 07E0h ; BYTE ? add eax, ebx mov [edi], al mov [edi+1], ah add esi, BYTE 4 add edi, BYTE 2 dec edx jnz .L3 .L4: jmp _mmxreturn _ConvertMMXpII32_16BGR555: ; the 16BGR555 converter is identical to the RGB555 one, ; except it uses a different multiplier for the pmaddwd ; instruction. cool huh. movq mm7, qword [mmx32_bgr555_mul] jmp _convert_bgr555_cheat ; This is the same as the Intel version.. they obviously went to ; much more trouble to expand/coil the loop than I did, so theirs ; would almost certainly be faster, even if only a little. ; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is ; (I think) a more accurate name.. _ConvertMMXpII32_16RGB555: movq mm7,qword [mmx32_rgb555_mul] _convert_bgr555_cheat: movq mm6,qword [mmx32_rgb555_g] mov edx,ecx ; Save ecx and ecx,BYTE 0fffffff8h ; clear lower three bits jnz .L_OK jmp near .L2 .L_OK: movq mm2,[esi+8] movq mm0,[esi] movq mm3,mm2 pand mm3,qword [mmx32_rgb555_rb] movq mm1,mm0 pand mm1,qword [mmx32_rgb555_rb] pmaddwd mm3,mm7 pmaddwd mm1,mm7 pand mm2,mm6 .L1: movq mm4,[esi+24] pand mm0,mm6 movq mm5,[esi+16] por mm3,mm2 psrld mm3,6 por mm1,mm0 movq mm0,mm4 psrld mm1,6 pand mm0,qword [mmx32_rgb555_rb] packssdw mm1,mm3 movq mm3,mm5 pmaddwd mm0,mm7 pand mm3,qword [mmx32_rgb555_rb] pand mm4,mm6 movq [edi],mm1 pmaddwd mm3,mm7 add esi,BYTE 32 por mm4,mm0 pand mm5,mm6 psrld mm4,6 movq mm2,[esi+8] por mm5,mm3 movq mm0,[esi] psrld mm5,6 movq mm3,mm2 movq mm1,mm0 pand mm3,qword [mmx32_rgb555_rb] packssdw mm5,mm4 pand mm1,qword [mmx32_rgb555_rb] pand mm2,mm6 movq [edi+8],mm5 pmaddwd mm3,mm7 pmaddwd mm1,mm7 add edi,BYTE 16 sub ecx,BYTE 8 jz .L2 jmp .L1 .L2: mov ecx,edx and ecx,BYTE 7 jz .L4 .L3: mov ebx,[esi] add esi,BYTE 4 mov eax,ebx mov edx,ebx shr eax,3 shr edx,6 and eax,BYTE 0000000000011111b and edx, 0000001111100000b shr ebx,9 or eax,edx and ebx, 0111110000000000b or eax,ebx mov [edi],ax add edi,BYTE 2 dec ecx jnz .L3 .L4: jmp _mmxreturn