Mercurial > sdl-ios-xcode
view src/libm/e_rem_pio2.c @ 5053:b5b42be9333c
Fixed bug #1026
Vittorio Giovara 2010-07-16 19:09:28 PDT
i was reading SDL_renderer_gles and i noticed that every time we there
is some gl call the gl state is modified with a couple of
glEnableClientState()/glDisableClientState.
While this is completely fine for desktops systems, this is a major
performace kill on mobile devices, right where opengles is
implemented.
Normal practice in this case is to update the glstate once, keep it
always the same and disable/enable other states only in very special
occasions.
On the web there's plenty of documentation (on the top of my head
http://developer.apple.com/iphone/library/documentation/3DDrawing/Conceptual/OpenGLES_ProgrammingGuide/Performance/Performance.html#//apple_ref/doc/uid/TP40008793-CH105-SW5
) and i personally tried this.
I modified my code and got a 10 fps boost, then modified SDL_render_gles and
shifted from 40 fps to 50 fps alone -- considering that i started from ~30fps i
got an 80% performance increase with this technique.
I have attached a dif of my changes, hope that it will be included in
mainstream.
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Wed, 19 Jan 2011 23:56:16 -0800 |
parents | dc1eb82ffdaa |
children |
line wrap: on
line source
/* @(#)e_rem_pio2.c 5.1 93/09/24 */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ #if defined(LIBM_SCCS) && !defined(lint) static const char rcsid[] = "$NetBSD: e_rem_pio2.c,v 1.8 1995/05/10 20:46:02 jtc Exp $"; #endif /* __ieee754_rem_pio2(x,y) * * return the remainder of x rem pi/2 in y[0]+y[1] * use __kernel_rem_pio2() */ #include "math.h" #include "math_private.h" libm_hidden_proto(fabs) /* * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi */ #ifdef __STDC__ static const int32_t two_over_pi[] = { #else static int32_t two_over_pi[] = { #endif 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, }; #ifdef __STDC__ static const int32_t npio2_hw[] = { #else static int32_t npio2_hw[] = { #endif 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, 0x404858EB, 0x404921FB, }; /* * invpio2: 53 bits of 2/pi * pio2_1: first 33 bit of pi/2 * pio2_1t: pi/2 - pio2_1 * pio2_2: second 33 bit of pi/2 * pio2_2t: pi/2 - (pio2_1+pio2_2) * pio2_3: third 33 bit of pi/2 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) */ #ifdef __STDC__ static const double #else static double #endif zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ #ifdef __STDC__ int32_t attribute_hidden __ieee754_rem_pio2(double x, double *y) #else int32_t attribute_hidden __ieee754_rem_pio2(x, y) double x, y[]; #endif { double z = 0.0, w, t, r, fn; double tx[3]; int32_t e0, i, j, nx, n, ix, hx; u_int32_t low; GET_HIGH_WORD(hx, x); /* high word of x */ ix = hx & 0x7fffffff; if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */ y[0] = x; y[1] = 0; return 0; } if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ if (hx > 0) { z = x - pio2_1; if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */ y[0] = z - pio2_1t; y[1] = (z - y[0]) - pio2_1t; } else { /* near pi/2, use 33+33+53 bit pi */ z -= pio2_2; y[0] = z - pio2_2t; y[1] = (z - y[0]) - pio2_2t; } return 1; } else { /* negative x */ z = x + pio2_1; if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */ y[0] = z + pio2_1t; y[1] = (z - y[0]) + pio2_1t; } else { /* near pi/2, use 33+33+53 bit pi */ z += pio2_2; y[0] = z + pio2_2t; y[1] = (z - y[0]) + pio2_2t; } return -1; } } if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ t = fabs(x); n = (int32_t) (t * invpio2 + half); fn = (double) n; r = t - fn * pio2_1; w = fn * pio2_1t; /* 1st round good to 85 bit */ if (n < 32 && ix != npio2_hw[n - 1]) { y[0] = r - w; /* quick check no cancellation */ } else { u_int32_t high; j = ix >> 20; y[0] = r - w; GET_HIGH_WORD(high, y[0]); i = j - ((high >> 20) & 0x7ff); if (i > 16) { /* 2nd iteration needed, good to 118 */ t = r; w = fn * pio2_2; r = t - w; w = fn * pio2_2t - ((t - r) - w); y[0] = r - w; GET_HIGH_WORD(high, y[0]); i = j - ((high >> 20) & 0x7ff); if (i > 49) { /* 3rd iteration need, 151 bits acc */ t = r; /* will cover all possible cases */ w = fn * pio2_3; r = t - w; w = fn * pio2_3t - ((t - r) - w); y[0] = r - w; } } } y[1] = (r - y[0]) - w; if (hx < 0) { y[0] = -y[0]; y[1] = -y[1]; return -n; } else return n; } /* * all other (large) arguments */ if (ix >= 0x7ff00000) { /* x is inf or NaN */ y[0] = y[1] = x - x; return 0; } /* set z = scalbn(|x|,ilogb(x)-23) */ GET_LOW_WORD(low, x); SET_LOW_WORD(z, low); e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */ SET_HIGH_WORD(z, ix - ((int32_t) (e0 << 20))); for (i = 0; i < 2; i++) { tx[i] = (double) ((int32_t) (z)); z = (z - tx[i]) * two24; } tx[2] = z; nx = 3; while (tx[nx - 1] == zero) nx--; /* skip zero term */ n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi); if (hx < 0) { y[0] = -y[0]; y[1] = -y[1]; return -n; } return n; }