annotate src/video/SDL_yuv_mmx.c @ 1118:65f4b2dd46b7

Date: Wed, 17 Aug 2005 11:23:40 -0400 From: Matt L <prometheus.uw@gmail.com> To: "Ryan C. Gordon" <icculus@clutteredmind.org> Subject: SDL Patch Hio, I saw your last call on the mailing list. Here's a patch which I submitted about two weeks ago which hasn't made it in. In the current sdl.m4, there's a bug where if your configure.ac, you have AC_LANG(C++) specified, it won't properly compile and link the SDL test program when you run the configure script. This is because only the default CFLAGS is overriden in sdl.m4, and in the patch below, I've fixed it so it overrides CXXFLAGS as well, allowing it to work with g++.
author Ryan C. Gordon <icculus@icculus.org>
date Thu, 18 Aug 2005 06:06:02 +0000
parents 29d7db09776e
children 63fb2da89a4b
rev   line source
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
1 /*
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
2 SDL - Simple DirectMedia Layer
769
b8d311d90021 Updated copyright information for 2004 (Happy New Year!)
Sam Lantinga <slouken@libsdl.org>
parents: 297
diff changeset
3 Copyright (C) 1997-2004 Sam Lantinga
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
4
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
5 This library is free software; you can redistribute it and/or
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
6 modify it under the terms of the GNU Library General Public
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
7 License as published by the Free Software Foundation; either
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
8 version 2 of the License, or (at your option) any later version.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
9
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
10 This library is distributed in the hope that it will be useful,
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
13 Library General Public License for more details.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
14
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
15 You should have received a copy of the GNU Library General Public
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
16 License along with this library; if not, write to the Free
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
18
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
19 Sam Lantinga
252
e8157fcb3114 Updated the source with the correct e-mail address
Sam Lantinga <slouken@libsdl.org>
parents: 0
diff changeset
20 slouken@libsdl.org
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
21 */
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
22
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
23 #ifdef SAVE_RCSID
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
24 static char rcsid =
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
25 "@(#) $Id$";
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
26 #endif
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
27
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
28
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
29 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
30
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
31 #include "SDL_types.h"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
32
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
33 #if __GNUC__ > 2
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
34 # undef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
35 #else
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
36 # define GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
37 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
38
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
39
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
40 #if defined(GCC2_HACK) && defined (__ELF__)
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
41 #define ASM_VAR(X) _##X
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
42 #else
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
43 #define ASM_VAR(X) X
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
44 #endif
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
45
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
46 static volatile unsigned int ASM_VAR(MMX_0080w)[] = {0x00800080, 0x00800080};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
47 static volatile unsigned int ASM_VAR(MMX_00FFw)[] = {0x00ff00ff, 0x00ff00ff};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
48 static volatile unsigned int ASM_VAR(MMX_FF00w)[] = {0xff00ff00, 0xff00ff00};
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
49
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
50 static volatile unsigned short ASM_VAR(MMX_Ycoeff)[] = {0x004a, 0x004a, 0x004a, 0x004a};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
51
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
52 static volatile unsigned short ASM_VAR(MMX_UbluRGB)[] = {0x0072, 0x0072, 0x0072, 0x0072};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
53 static volatile unsigned short ASM_VAR(MMX_VredRGB)[] = {0x0059, 0x0059, 0x0059, 0x0059};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
54 static volatile unsigned short ASM_VAR(MMX_UgrnRGB)[] = {0xffea, 0xffea, 0xffea, 0xffea};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
55 static volatile unsigned short ASM_VAR(MMX_VgrnRGB)[] = {0xffd2, 0xffd2, 0xffd2, 0xffd2};
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
56
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
57 static volatile unsigned short ASM_VAR(MMX_Ublu5x5)[] = {0x0081, 0x0081, 0x0081, 0x0081};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
58 static volatile unsigned short ASM_VAR(MMX_Vred5x5)[] = {0x0066, 0x0066, 0x0066, 0x0066};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
59 static volatile unsigned short ASM_VAR(MMX_Ugrn555)[] = {0xffe7, 0xffe7, 0xffe7, 0xffe7};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
60 static volatile unsigned short ASM_VAR(MMX_Vgrn555)[] = {0xffcc, 0xffcc, 0xffcc, 0xffcc};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
61 static volatile unsigned short ASM_VAR(MMX_Ugrn565)[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
62 static volatile unsigned short ASM_VAR(MMX_Vgrn565)[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
63
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
64 static volatile unsigned short ASM_VAR(MMX_red555)[] = {0x7c00, 0x7c00, 0x7c00, 0x7c00};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
65 static volatile unsigned short ASM_VAR(MMX_red565)[] = {0xf800, 0xf800, 0xf800, 0xf800};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
66 static volatile unsigned short ASM_VAR(MMX_grn555)[] = {0x03e0, 0x03e0, 0x03e0, 0x03e0};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
67 static volatile unsigned short ASM_VAR(MMX_grn565)[] = {0x07e0, 0x07e0, 0x07e0, 0x07e0};
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
68 static volatile unsigned short ASM_VAR(MMX_blu5x5)[] = {0x001f, 0x001f, 0x001f, 0x001f};
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
69
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
70 /**
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
71 This MMX assembler is my first assembler/MMX program ever.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
72 Thus it maybe buggy.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
73 Send patches to:
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
74 mvogt@rhrk.uni-kl.de
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
75
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
76 After it worked fine I have "obfuscated" the code a bit to have
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
77 more parallism in the MMX units. This means I moved
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
78 initilisation around and delayed other instruction.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
79 Performance measurement did not show that this brought any advantage
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
80 but in theory it _should_ be faster this way.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
81
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
82 The overall performanve gain to the C based dither was 30%-40%.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
83 The MMX routine calculates 256bit=8RGB values in each cycle
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
84 (4 for row1 & 4 for row2)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
85
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
86 The red/green/blue.. coefficents are taken from the mpeg_play
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
87 player. They look nice, but I dont know if you can have
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
88 better values, to avoid integer rounding errors.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
89
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
90
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
91 IMPORTANT:
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
92 ==========
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
93
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
94 It is a requirement that the cr/cb/lum are 8 byte aligned and
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
95 the out are 16byte aligned or you will/may get segfaults
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
96
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
97 */
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
98
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
99 void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
100 unsigned char *lum, unsigned char *cr,
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
101 unsigned char *cb, unsigned char *out,
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
102 int rows, int cols, int mod )
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
103 {
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
104 Uint32 *row1;
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
105 Uint32 *row2;
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
106
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
107 unsigned char* y = lum +cols*rows; // Pointer to the end
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
108 int x=0;
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
109 row1 = (Uint32 *)out; // 32 bit target
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
110 row2 = (Uint32 *)out+cols+mod; // start of second row
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
111 mod = (mod+cols+mod)*4; // increment for row1 in byte
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
112
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
113 __asm__ __volatile__ (
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
114 /* We don't really care about PIC - the code should be rewritten to use
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
115 relative addressing for the static tables, so right now we take the
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
116 COW hit on the pages this code resides. Big deal.
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
117 This spill is just to reduce register pressure in the PIC case. */
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
118 "pushl %%ebx\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
119 "movl %0, %%ebx\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
120
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
121 ".align 8\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
122 "1:\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
123
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
124 // create Cr (result in mm1)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
125 "movd (%%ebx), %%mm1\n" // 0 0 0 0 v3 v2 v1 v0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
126 "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
127 "movd (%2), %%mm2\n" // 0 0 0 0 l3 l2 l1 l0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
128 "punpcklbw %%mm7,%%mm1\n" // 0 v3 0 v2 00 v1 00 v0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
129 "punpckldq %%mm1,%%mm1\n" // 00 v1 00 v0 00 v1 00 v0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
130 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
131 "psubw _MMX_0080w,%%mm1\n" // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
132 #else
887
b4b64bb88f2f Date: Mon, 10 May 2004 10:17:46 -0400
Sam Lantinga <slouken@libsdl.org>
parents: 769
diff changeset
133 "psubw %[_MMX_0080w],%%mm1\n" // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
134 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
135
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
136 // create Cr_g (result in mm0)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
137 "movq %%mm1,%%mm0\n" // r1 r1 r0 r0 r1 r1 r0 r0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
138 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
139 "pmullw _MMX_VgrnRGB,%%mm0\n"// red*-46dec=0.7136*64
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
140 "pmullw _MMX_VredRGB,%%mm1\n"// red*89dec=1.4013*64
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
141 #else
887
b4b64bb88f2f Date: Mon, 10 May 2004 10:17:46 -0400
Sam Lantinga <slouken@libsdl.org>
parents: 769
diff changeset
142 "pmullw %[_MMX_VgrnRGB],%%mm0\n"// red*-46dec=0.7136*64
b4b64bb88f2f Date: Mon, 10 May 2004 10:17:46 -0400
Sam Lantinga <slouken@libsdl.org>
parents: 769
diff changeset
143 "pmullw %[_MMX_VredRGB],%%mm1\n"// red*89dec=1.4013*64
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
144 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
145 "psraw $6, %%mm0\n" // red=red/64
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
146 "psraw $6, %%mm1\n" // red=red/64
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
147
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
148 // create L1 L2 (result in mm2,mm4)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
149 // L2=lum+cols
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
150 "movq (%2,%4),%%mm3\n" // 0 0 0 0 L3 L2 L1 L0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
151 "punpckldq %%mm3,%%mm2\n" // L3 L2 L1 L0 l3 l2 l1 l0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
152 "movq %%mm2,%%mm4\n" // L3 L2 L1 L0 l3 l2 l1 l0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
153 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
154 "pand _MMX_FF00w,%%mm2\n" // L3 0 L1 0 l3 0 l1 0
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
155 "pand _MMX_00FFw,%%mm4\n" // 0 L2 0 L0 0 l2 0 l0
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
156 #else
887
b4b64bb88f2f Date: Mon, 10 May 2004 10:17:46 -0400
Sam Lantinga <slouken@libsdl.org>
parents: 769
diff changeset
157 "pand %[_MMX_FF00w],%%mm2\n" // L3 0 L1 0 l3 0 l1 0
b4b64bb88f2f Date: Mon, 10 May 2004 10:17:46 -0400
Sam Lantinga <slouken@libsdl.org>
parents: 769
diff changeset
158 "pand %[_MMX_00FFw],%%mm4\n" // 0 L2 0 L0 0 l2 0 l0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
159 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
160 "psrlw $8,%%mm2\n" // 0 L3 0 L1 0 l3 0 l1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
161
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
162 // create R (result in mm6)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
163 "movq %%mm2,%%mm5\n" // 0 L3 0 L1 0 l3 0 l1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
164 "movq %%mm4,%%mm6\n" // 0 L2 0 L0 0 l2 0 l0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
165 "paddsw %%mm1, %%mm5\n" // lum1+red:x R3 x R1 x r3 x r1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
166 "paddsw %%mm1, %%mm6\n" // lum1+red:x R2 x R0 x r2 x r0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
167 "packuswb %%mm5,%%mm5\n" // R3 R1 r3 r1 R3 R1 r3 r1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
168 "packuswb %%mm6,%%mm6\n" // R2 R0 r2 r0 R2 R0 r2 r0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
169 "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
170 "punpcklbw %%mm5,%%mm6\n" // R3 R2 R1 R0 r3 r2 r1 r0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
171
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
172 // create Cb (result in mm1)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
173 "movd (%1), %%mm1\n" // 0 0 0 0 u3 u2 u1 u0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
174 "punpcklbw %%mm7,%%mm1\n" // 0 u3 0 u2 00 u1 00 u0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
175 "punpckldq %%mm1,%%mm1\n" // 00 u1 00 u0 00 u1 00 u0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
176 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
177 "psubw _MMX_0080w,%%mm1\n" // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
178 #else
887
b4b64bb88f2f Date: Mon, 10 May 2004 10:17:46 -0400
Sam Lantinga <slouken@libsdl.org>
parents: 769
diff changeset
179 "psubw %[_MMX_0080w],%%mm1\n" // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
180 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
181 // create Cb_g (result in mm5)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
182 "movq %%mm1,%%mm5\n" // u1 u1 u0 u0 u1 u1 u0 u0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
183 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
184 "pmullw _MMX_UgrnRGB,%%mm5\n" // blue*-109dec=1.7129*64
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
185 "pmullw _MMX_UbluRGB,%%mm1\n" // blue*114dec=1.78125*64
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
186 #else
887
b4b64bb88f2f Date: Mon, 10 May 2004 10:17:46 -0400
Sam Lantinga <slouken@libsdl.org>
parents: 769
diff changeset
187 "pmullw %[_MMX_UgrnRGB],%%mm5\n" // blue*-109dec=1.7129*64
b4b64bb88f2f Date: Mon, 10 May 2004 10:17:46 -0400
Sam Lantinga <slouken@libsdl.org>
parents: 769
diff changeset
188 "pmullw %[_MMX_UbluRGB],%%mm1\n" // blue*114dec=1.78125*64
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
189 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
190 "psraw $6, %%mm5\n" // blue=red/64
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
191 "psraw $6, %%mm1\n" // blue=blue/64
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
192
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
193 // create G (result in mm7)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
194 "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
195 "movq %%mm4,%%mm7\n" // 0 L2 0 L0 0 l2 0 l1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
196 "paddsw %%mm5, %%mm3\n" // lum1+Cb_g:x G3t x G1t x g3t x g1t
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
197 "paddsw %%mm5, %%mm7\n" // lum1+Cb_g:x G2t x G0t x g2t x g0t
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
198 "paddsw %%mm0, %%mm3\n" // lum1+Cr_g:x G3 x G1 x g3 x g1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
199 "paddsw %%mm0, %%mm7\n" // lum1+blue:x G2 x G0 x g2 x g0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
200 "packuswb %%mm3,%%mm3\n" // G3 G1 g3 g1 G3 G1 g3 g1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
201 "packuswb %%mm7,%%mm7\n" // G2 G0 g2 g0 G2 G0 g2 g0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
202 "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
203
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
204 // create B (result in mm5)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
205 "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
206 "movq %%mm4,%%mm5\n" // 0 L2 0 L0 0 l2 0 l1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
207 "paddsw %%mm1, %%mm3\n" // lum1+blue:x B3 x B1 x b3 x b1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
208 "paddsw %%mm1, %%mm5\n" // lum1+blue:x B2 x B0 x b2 x b0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
209 "packuswb %%mm3,%%mm3\n" // B3 B1 b3 b1 B3 B1 b3 b1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
210 "packuswb %%mm5,%%mm5\n" // B2 B0 b2 b0 B2 B0 b2 b0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
211 "punpcklbw %%mm3,%%mm5\n" // B3 B2 B1 B0 b3 b2 b1 b0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
212
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
213 // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
214
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
215 "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
216 "pxor %%mm4,%%mm4\n" // 0 0 0 0 0 0 0 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
217 "movq %%mm6,%%mm1\n" // R3 R2 R1 R0 r3 r2 r1 r0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
218 "movq %%mm5,%%mm3\n" // B3 B2 B1 B0 b3 b2 b1 b0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
219 // process lower lum
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
220 "punpcklbw %%mm4,%%mm1\n" // 0 r3 0 r2 0 r1 0 r0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
221 "punpcklbw %%mm4,%%mm3\n" // 0 b3 0 b2 0 b1 0 b0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
222 "movq %%mm1,%%mm2\n" // 0 r3 0 r2 0 r1 0 r0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
223 "movq %%mm3,%%mm0\n" // 0 b3 0 b2 0 b1 0 b0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
224 "punpcklwd %%mm1,%%mm3\n" // 0 r1 0 b1 0 r0 0 b0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
225 "punpckhwd %%mm2,%%mm0\n" // 0 r3 0 b3 0 r2 0 b2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
226
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
227 "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
228 "movq %%mm7,%%mm1\n" // G3 G2 G1 G0 g3 g2 g1 g0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
229 "punpcklbw %%mm1,%%mm2\n" // g3 0 g2 0 g1 0 g0 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
230 "punpcklwd %%mm4,%%mm2\n" // 0 0 g1 0 0 0 g0 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
231 "por %%mm3, %%mm2\n" // 0 r1 g1 b1 0 r0 g0 b0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
232 "movq %%mm2,(%3)\n" // wrote out ! row1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
233
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
234 "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
235 "punpcklbw %%mm1,%%mm4\n" // g3 0 g2 0 g1 0 g0 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
236 "punpckhwd %%mm2,%%mm4\n" // 0 0 g3 0 0 0 g2 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
237 "por %%mm0, %%mm4\n" // 0 r3 g3 b3 0 r2 g2 b2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
238 "movq %%mm4,8(%3)\n" // wrote out ! row1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
239
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
240 // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
241 // this can be done "destructive"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
242 "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
243 "punpckhbw %%mm2,%%mm6\n" // 0 R3 0 R2 0 R1 0 R0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
244 "punpckhbw %%mm1,%%mm5\n" // G3 B3 G2 B2 G1 B1 G0 B0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
245 "movq %%mm5,%%mm1\n" // G3 B3 G2 B2 G1 B1 G0 B0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
246 "punpcklwd %%mm6,%%mm1\n" // 0 R1 G1 B1 0 R0 G0 B0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
247 "movq %%mm1,(%5)\n" // wrote out ! row2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
248 "punpckhwd %%mm6,%%mm5\n" // 0 R3 G3 B3 0 R2 G2 B2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
249 "movq %%mm5,8(%5)\n" // wrote out ! row2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
250
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
251 "addl $4,%2\n" // lum+4
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
252 "leal 16(%3),%3\n" // row1+16
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
253 "leal 16(%5),%5\n" // row2+16
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
254 "addl $2, %%ebx\n" // cr+2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
255 "addl $2, %1\n" // cb+2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
256
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
257 "addl $4,%6\n" // x+4
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
258 "cmpl %4,%6\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
259
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
260 "jl 1b\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
261 "addl %4, %2\n" // lum += cols
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
262 "addl %8, %3\n" // row1+= mod
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
263 "addl %8, %5\n" // row2+= mod
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
264 "movl $0, %6\n" // x=0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
265 "cmpl %7, %2\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
266 "jl 1b\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
267 "emms\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
268 "popl %%ebx\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
269 :
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
270 : "m" (cr), "r"(cb),"r"(lum),
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
271 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod)
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
272 #ifndef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
273 ,[_MMX_0080w] "m" (*MMX_0080w),
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
274 [_MMX_00FFw] "m" (*MMX_00FFw),
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
275 [_MMX_FF00w] "m" (*MMX_FF00w),
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
276 [_MMX_VgrnRGB] "m" (*MMX_VgrnRGB),
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
277 [_MMX_VredRGB] "m" (*MMX_VredRGB),
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
278 [_MMX_UgrnRGB] "m" (*MMX_UgrnRGB),
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
279 [_MMX_UbluRGB] "m" (*MMX_UbluRGB)
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
280 #endif
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
281 );
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
282 }
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
283
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
284 void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
285 unsigned char *lum, unsigned char *cr,
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
286 unsigned char *cb, unsigned char *out,
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
287 int rows, int cols, int mod )
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
288 {
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
289 Uint16 *row1;
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
290 Uint16 *row2;
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
291
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
292 unsigned char* y = lum +cols*rows; /* Pointer to the end */
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
293 int x=0;
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
294 row1 = (Uint16 *)out; /* 16 bit target */
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
295 row2 = (Uint16 *)out+cols+mod; /* start of second row */
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
296 mod = (mod+cols+mod)*2; /* increment for row1 in byte */
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
297
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
298
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
299 __asm__ __volatile__(
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
300 "pushl %%ebx\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
301 "movl %0, %%ebx\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
302
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
303 ".align 8\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
304 "1:\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
305 "movd (%1), %%mm0\n" // 4 Cb 0 0 0 0 u3 u2 u1 u0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
306 "pxor %%mm7, %%mm7\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
307 "movd (%%ebx), %%mm1\n" // 4 Cr 0 0 0 0 v3 v2 v1 v0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
308 "punpcklbw %%mm7, %%mm0\n" // 4 W cb 0 u3 0 u2 0 u1 0 u0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
309 "punpcklbw %%mm7, %%mm1\n" // 4 W cr 0 v3 0 v2 0 v1 0 v0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
310 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
311 "psubw _MMX_0080w, %%mm0\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
312 "psubw _MMX_0080w, %%mm1\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
313 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
314 "psubw %[_MMX_0080w], %%mm0\n"
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
315 "psubw %[_MMX_0080w], %%mm1\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
316 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
317 "movq %%mm0, %%mm2\n" // Cb 0 u3 0 u2 0 u1 0 u0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
318 "movq %%mm1, %%mm3\n" // Cr
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
319 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
320 "pmullw _MMX_Ugrn565, %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
321 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
322 "pmullw %[_MMX_Ugrn565], %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
323 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
324 "movq (%2), %%mm6\n" // L1 l7 L6 L5 L4 L3 L2 L1 L0
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
325 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
326 "pmullw _MMX_Ublu5x5, %%mm0\n" // Cb2blue
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
327 "pand _MMX_00FFw, %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
328 "pmullw _MMX_Vgrn565, %%mm3\n" // Cr2green
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
329 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
330 "pmullw %[_MMX_Ublu5x5], %%mm0\n" // Cb2blue
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
331 "pand %[_MMX_00FFw], %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
332 "pmullw %[_MMX_Vgrn565], %%mm3\n" // Cr2green
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
333 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
334 "movq (%2), %%mm7\n" // L2
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
335 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
336 "pmullw _MMX_Vred5x5, %%mm1\n" // Cr2red
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
337 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
338 "pmullw %[_MMX_Vred5x5], %%mm1\n" // Cr2red
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
339 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
340 "psrlw $8, %%mm7\n" // L2 00 L7 00 L5 00 L3 00 L1
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
341 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
342 "pmullw _MMX_Ycoeff, %%mm6\n" // lum1
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
343 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
344 "pmullw %[_MMX_Ycoeff], %%mm6\n" // lum1
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
345 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
346 "paddw %%mm3, %%mm2\n" // Cb2green + Cr2green == green
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
347 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
348 "pmullw _MMX_Ycoeff, %%mm7\n" // lum2
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
349 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
350 "pmullw %[_MMX_Ycoeff], %%mm7\n" // lum2
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
351 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
352
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
353 "movq %%mm6, %%mm4\n" // lum1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
354 "paddw %%mm0, %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
355 "movq %%mm4, %%mm5\n" // lum1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
356 "paddw %%mm1, %%mm4\n" // lum1 +red 00 R6 00 R4 00 R2 00 R0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
357 "paddw %%mm2, %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
358 "psraw $6, %%mm4\n" // R1 0 .. 64
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
359 "movq %%mm7, %%mm3\n" // lum2 00 L7 00 L5 00 L3 00 L1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
360 "psraw $6, %%mm5\n" // G1 - .. +
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
361 "paddw %%mm0, %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
362 "psraw $6, %%mm6\n" // B1 0 .. 64
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
363 "packuswb %%mm4, %%mm4\n" // R1 R1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
364 "packuswb %%mm5, %%mm5\n" // G1 G1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
365 "packuswb %%mm6, %%mm6\n" // B1 B1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
366 "punpcklbw %%mm4, %%mm4\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
367 "punpcklbw %%mm5, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
368
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
369 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
370 "pand _MMX_red565, %%mm4\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
371 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
372 "pand %[_MMX_red565], %%mm4\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
373 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
374 "psllw $3, %%mm5\n" // GREEN 1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
375 "punpcklbw %%mm6, %%mm6\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
376 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
377 "pand _MMX_grn565, %%mm5\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
378 "pand _MMX_red565, %%mm6\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
379 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
380 "pand %[_MMX_grn565], %%mm5\n"
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
381 "pand %[_MMX_red565], %%mm6\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
382 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
383 "por %%mm5, %%mm4\n" //
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
384 "psrlw $11, %%mm6\n" // BLUE 1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
385 "movq %%mm3, %%mm5\n" // lum2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
386 "paddw %%mm1, %%mm3\n" // lum2 +red 00 R7 00 R5 00 R3 00 R1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
387 "paddw %%mm2, %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
388 "psraw $6, %%mm3\n" // R2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
389 "por %%mm6, %%mm4\n" // MM4
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
390 "psraw $6, %%mm5\n" // G2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
391 "movq (%2, %4), %%mm6\n" // L3 load lum2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
392 "psraw $6, %%mm7\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
393 "packuswb %%mm3, %%mm3\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
394 "packuswb %%mm5, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
395 "packuswb %%mm7, %%mm7\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
396 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
397 "pand _MMX_00FFw, %%mm6\n" // L3
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
398 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
399 "pand %[_MMX_00FFw], %%mm6\n" // L3
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
400 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
401 "punpcklbw %%mm3, %%mm3\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
402 "punpcklbw %%mm5, %%mm5\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
403 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
404 "pmullw _MMX_Ycoeff, %%mm6\n" // lum3
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
405 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
406 "pmullw %[_MMX_Ycoeff], %%mm6\n" // lum3
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
407 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
408 "punpcklbw %%mm7, %%mm7\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
409 "psllw $3, %%mm5\n" // GREEN 2
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
410 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
411 "pand _MMX_red565, %%mm7\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
412 "pand _MMX_red565, %%mm3\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
413 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
414 "pand %[_MMX_red565], %%mm7\n"
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
415 "pand %[_MMX_red565], %%mm3\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
416 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
417 "psrlw $11, %%mm7\n" // BLUE 2
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
418 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
419 "pand _MMX_grn565, %%mm5\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
420 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
421 "pand %[_MMX_grn565], %%mm5\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
422 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
423 "por %%mm7, %%mm3\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
424 "movq (%2,%4), %%mm7\n" // L4 load lum2
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
425 "por %%mm5, %%mm3\n" //
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
426 "psrlw $8, %%mm7\n" // L4
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
427 "movq %%mm4, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
428 "punpcklwd %%mm3, %%mm4\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
429 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
430 "pmullw _MMX_Ycoeff, %%mm7\n" // lum4
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
431 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
432 "pmullw %[_MMX_Ycoeff], %%mm7\n" // lum4
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
433 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
434 "punpckhwd %%mm3, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
435
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
436 "movq %%mm4, (%3)\n" // write row1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
437 "movq %%mm5, 8(%3)\n" // write row1
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
438
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
439 "movq %%mm6, %%mm4\n" // Lum3
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
440 "paddw %%mm0, %%mm6\n" // Lum3 +blue
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
441
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
442 "movq %%mm4, %%mm5\n" // Lum3
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
443 "paddw %%mm1, %%mm4\n" // Lum3 +red
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
444 "paddw %%mm2, %%mm5\n" // Lum3 +green
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
445 "psraw $6, %%mm4\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
446 "movq %%mm7, %%mm3\n" // Lum4
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
447 "psraw $6, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
448 "paddw %%mm0, %%mm7\n" // Lum4 +blue
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
449 "psraw $6, %%mm6\n" // Lum3 +blue
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
450 "movq %%mm3, %%mm0\n" // Lum4
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
451 "packuswb %%mm4, %%mm4\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
452 "paddw %%mm1, %%mm3\n" // Lum4 +red
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
453 "packuswb %%mm5, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
454 "paddw %%mm2, %%mm0\n" // Lum4 +green
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
455 "packuswb %%mm6, %%mm6\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
456 "punpcklbw %%mm4, %%mm4\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
457 "punpcklbw %%mm5, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
458 "punpcklbw %%mm6, %%mm6\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
459 "psllw $3, %%mm5\n" // GREEN 3
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
460 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
461 "pand _MMX_red565, %%mm4\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
462 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
463 "pand %[_MMX_red565], %%mm4\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
464 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
465 "psraw $6, %%mm3\n" // psr 6
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
466 "psraw $6, %%mm0\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
467 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
468 "pand _MMX_red565, %%mm6\n" // BLUE
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
469 "pand _MMX_grn565, %%mm5\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
470 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
471 "pand %[_MMX_red565], %%mm6\n" // BLUE
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
472 "pand %[_MMX_grn565], %%mm5\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
473 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
474 "psrlw $11, %%mm6\n" // BLUE 3
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
475 "por %%mm5, %%mm4\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
476 "psraw $6, %%mm7\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
477 "por %%mm6, %%mm4\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
478 "packuswb %%mm3, %%mm3\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
479 "packuswb %%mm0, %%mm0\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
480 "packuswb %%mm7, %%mm7\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
481 "punpcklbw %%mm3, %%mm3\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
482 "punpcklbw %%mm0, %%mm0\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
483 "punpcklbw %%mm7, %%mm7\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
484 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
485 "pand _MMX_red565, %%mm3\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
486 "pand _MMX_red565, %%mm7\n" // BLUE
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
487 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
488 "pand %[_MMX_red565], %%mm3\n"
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
489 "pand %[_MMX_red565], %%mm7\n" // BLUE
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
490 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
491 "psllw $3, %%mm0\n" // GREEN 4
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
492 "psrlw $11, %%mm7\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
493 #ifdef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
494 "pand _MMX_grn565, %%mm0\n"
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
495 #else
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
496 "pand %[_MMX_grn565], %%mm0\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
497 #endif
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
498 "por %%mm7, %%mm3\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
499 "por %%mm0, %%mm3\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
500
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
501 "movq %%mm4, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
502
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
503 "punpcklwd %%mm3, %%mm4\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
504 "punpckhwd %%mm3, %%mm5\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
505
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
506 "movq %%mm4, (%5)\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
507 "movq %%mm5, 8(%5)\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
508
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
509 "addl $8, %6\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
510 "addl $8, %2\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
511 "addl $4, %%ebx\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
512 "addl $4, %1\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
513 "cmpl %4, %6\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
514 "leal 16(%3), %3\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
515 "leal 16(%5),%5\n" // row2+16
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
516
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
517
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
518 "jl 1b\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
519 "addl %4, %2\n" // lum += cols
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
520 "addl %8, %3\n" // row1+= mod
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
521 "addl %8, %5\n" // row2+= mod
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
522 "movl $0, %6\n" // x=0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
523 "cmpl %7, %2\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
524 "jl 1b\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
525 "emms\n"
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
526 "popl %%ebx\n"
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
527 :
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
528 :"m" (cr), "r"(cb),"r"(lum),
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
529 "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod)
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
530 #ifndef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
531 ,[_MMX_0080w] "m" (*MMX_0080w),
949
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
532 [_MMX_Ugrn565] "m" (*MMX_Ugrn565),
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
533 [_MMX_Ublu5x5] "m" (*MMX_Ublu5x5),
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
534 [_MMX_00FFw] "m" (*MMX_00FFw),
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
535 [_MMX_Vgrn565] "m" (*MMX_Vgrn565),
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
536 [_MMX_Vred5x5] "m" (*MMX_Vred5x5),
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
537 [_MMX_Ycoeff] "m" (*MMX_Ycoeff),
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
538 [_MMX_red565] "m" (*MMX_red565),
e0d96eb0af19 I don't know how this got missed, but...
Sam Lantinga <slouken@libsdl.org>
parents: 946
diff changeset
539 [_MMX_grn565] "m" (*MMX_grn565)
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
540 #endif
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
541 );
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
542 }
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
543
1038
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
544 #undef GCC2_HACK
29d7db09776e Ugly hack to make this work with gcc 2.x and 3.x
Sam Lantinga <slouken@libsdl.org>
parents: 949
diff changeset
545
0
74212992fb08 Initial revision
Sam Lantinga <slouken@lokigames.com>
parents:
diff changeset
546 #endif /* GCC i386 inline assembly */