#include <math.h>
#include <stdint.h>
static const uint8_t gMaskShift[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9,10,11, 12,13,14,15,16,17,18,19,20,21,22,23, 24,25,26,27, 28,29,30,31,31,31,31,31,31,31,31,31, 31,31,31,31, 31,31,31,31,31,31,31,31,31,31,31,31, 31,31,31,31, 31,31,31,31,31,31,31,31,31,31,31,31, 31,31,31,31, 31,31,31,31,31,31,31,31,31,31,31,31, 31,31,31,31, 31,31,31,31,31,31,31,31,31,31,31,31, 31,31,31,31, 31,31,31,31,31,31,31,31,31,31,31,31, 31,31,31,31, 31,31,31,31,31,31,31,31,31,31,31,31, };
static const double powf_log_table[256] = {
0x1p+0, -0x0p+0, 0x1.fcp-1, 0x1.72c7ba20f7327p-7, 0x1.f80fe03f80fep-1, 0x1.715662c7f3dbcp-6, 0x1.f42f42f42f42fp-1, 0x1.13eea2b6545dfp-5, 0x1.f05dcd30dadecp-1, 0x1.6e7f0bd9710ddp-5, 0x1.ec9b26c9b26cap-1, 0x1.c85f25e12da51p-5, 0x1.e8e6fa39be8e7p-1, 0x1.10c8cd0c74414p-4, 0x1.e540f4898d5f8p-1, 0x1.3d0c813e48ep-4, 0x1.e1a8c536fe1a9p-1, 0x1.68fbf5169e028p-4, 0x1.de1e1e1e1e1e2p-1, 0x1.949866f0b017bp-4, 0x1.daa0b3630957dp-1, 0x1.bfe30e28821cp-4, 0x1.d7303b5cc0ed7p-1, 0x1.eadd1b4ef9a1fp-4, 0x1.d3cc6e80ebbdbp-1, 0x1.0ac3dc2e0ca0cp-3, 0x1.d075075075075p-1, 0x1.1ff2046fb7116p-3, 0x1.cd29c244fe2f3p-1, 0x1.34f99517622aep-3, 0x1.c9ea5dbf193d5p-1, 0x1.49db19c99a54dp-3, 0x1.c6b699f5423cep-1, 0x1.5e971b3a4ee8p-3, 0x1.c38e38e38e38ep-1, 0x1.732e1f41ccdbap-3, 0x1.c070fe3c070fep-1, 0x1.87a0a8f0ff9b2p-3, 0x1.bd5eaf57abd5fp-1, 0x1.9bef38a4ffae5p-3, 0x1.ba5713280dee9p-1, 0x1.b01a4c19f6811p-3, 0x1.b759f2298375ap-1, 0x1.c4225e7d5e3c6p-3, 0x1.b4671655e7f24p-1, 0x1.d807e87fa4521p-3, 0x1.b17e4b17e4b18p-1, 0x1.ebcb6065350a2p-3, 0x1.ae9f5d3eba7d7p-1, 0x1.ff6d3a16f617fp-3, 0x1.abca1af286bcap-1, 0x1.0976f3991af9ep-2, 0x1.a8fe53a8fe53bp-1, 0x1.1326eb8c0aba3p-2, 0x1.a63bd81a98ef6p-1, 0x1.1cc6bb7e3870fp-2, 0x1.a3827a3827a38p-1, 0x1.265698fa26c0ap-2, 0x1.a0d20d20d20d2p-1, 0x1.2fd6b881e82d3p-2, 0x1.9e2a65187566cp-1, 0x1.39474d95e1649p-2, 0x1.9b8b577e61371p-1, 0x1.42a88abb54986p-2, 0x1.98f4bac46d7cp-1, 0x1.4bfaa182b7fe3p-2, 0x1.9666666666666p-1, 0x1.553dc28dd9724p-2, 0x1.93e032e1c9f02p-1, 0x1.5e721d95d124dp-2, 0x1.9161f9add3c0dp-1, 0x1.6797e170c5221p-2, 0x1.8eeb9533d4065p-1, 0x1.70af3c177f74p-2, 0x1.8c7ce0c7ce0c8p-1, 0x1.79b85aaad8878p-2, 0x1.8a15b8a15b8a1p-1, 0x1.82b36978f76d5p-2, 0x1.87b5f9d4d1bc2p-1, 0x1.8ba09402697edp-2, 0x1.855d824ca58e9p-1, 0x1.948004ff12dbfp-2, 0x1.830c30c30c30cp-1, 0x1.9d51e662f92a2p-2, 0x1.80c1e4bbd595fp-1, 0x1.a6166162e9ec8p-2, 0x1.7e7e7e7e7e7e8p-1, 0x1.aecd9e78fdbeap-2, 0x1.7c41df1077c42p-1, 0x1.b777c568f9ae2p-2, 0x1.7a0be82fa0be8p-1, 0x1.c014fd448fe3ap-2, 0x1.77dc7c4cf2aeap-1, 0x1.c8a56c6f80bcap-2, 0x1.75b37e875b37fp-1, 0x1.d12938a39d6fp-2, 0x1.7390d2a6c405ep-1, 0x1.d9a086f4ad416p-2, 0x1.71745d1745d17p-1, 0x1.e20b7bd4365a8p-2, 0x1.6f5e02e4850ffp-1, 0x1.ea6a3b152b1e6p-2, 0x1.6d4da9b536a6dp-1, 0x1.f2bce7ef7d06bp-2, 0x1.6b4337c6cb157p-1, 0x1.fb03a50395dbap-2, 0x1.693e93e93e93fp-1, 0x1.019f4a2edc134p-1, 0x1.673fa57b0cbabp-1, 0x1.05b6ebbca3d9ap-1, 0x1.6546546546546p-1, 0x1.09c8c7a1fd74cp-1, 0x1.63528917c80b3p-1, 0x1.0dd4ee107ae0ap-1, 0x1.61642c8590b21p-1, 0x1.11db6ef5e7873p-1, 0x1.5f7b282135f7bp-1, 0x1.15dc59fdc06b7p-1, 0x1.5d9765d9765d9p-1, 0x1.19d7be92a231p-1, 0x1.5bb8d015e75bcp-1, 0x1.1dcdabdfad537p-1, 0x1.59df51b3bea36p-1, 0x1.21be30d1e0ddbp-1, 0x1.580ad602b580bp-1, 0x1.25a95c196bef3p-1, 0x1.563b48c20563bp-1, 0x1.298f3c2af6595p-1, 0x1.5470961d7ca63p-1, 0x1.2d6fdf40e09c5p-1, 0x1.52aaaaaaaaaabp-1, 0x1.314b535c7b89ep-1, 0x1.50e97366227cbp-1, 0x1.3521a64737cf3p-1, 0x1.4f2cddb0d3225p-1, 0x1.38f2e593cda73p-1, 0x1.4d74d74d74d75p-1, 0x1.3cbf1e9f5cf2fp-1, 0x1.4bc14e5e0a72fp-1, 0x1.40865e9285f33p-1, 0x1.4a1231617641p-1, 0x1.4448b2627ade3p-1, 0x1.48676f31219dcp-1, 0x1.480626d20a876p-1, 0x1.46c0f6feb6ac6p-1, 0x1.4bbec872a4505p-1, 0x1.451eb851eb852p-1, 0x1.4f72a3a555958p-1, 0x1.4380a3065e3fbp-1, 0x1.5321c49bc0c91p-1, 0x1.41e6a74981447p-1, 0x1.56cc37590e6c5p-1, 0x1.4050b59897548p-1, 0x1.5a7207b2d815ap-1, 0x1.3ebebebebebecp-1, 0x1.5e1341520dbp-1, 0x1.3d30b3d30b3d3p-1, 0x1.61afefb3d5201p-1, 0x1.3ba68636adfbp-1, 0x1.65481e2a6477bp-1, 0x1.3a2027932b48fp-1, 0x1.68dbd7ddd6e15p-1, 0x1.389d89d89d89ep-1, 0x1.6c6b27ccfc698p-1, 0x1.371e9f3c04e64p-1, 0x1.6ff618ce24cd7p-1, 0x1.35a35a35a35a3p-1, 0x1.737cb58fe5716p-1, 0x1.342bad7f64b39p-1, 0x1.76ff0899daa49p-1, 0x1.32b78c13521dp-1, 0x1.7a7d1c4d6452p-1, 0x1.3146e92a10d38p-1, 0x1.7df6fae65e424p-1, 0x1.2fd9b8396ba9ep-1, 0x1.816cae7bd40b1p-1, 0x1.2e6fecf2e6fedp-1, 0x1.84de4100b0ce2p-1, 0x1.2d097b425ed09p-1, 0x1.884bbc446ae3fp-1, 0x1.2ba6574cae996p-1, 0x1.8bb529f3ab8f3p-1, 0x1.2a46756e62a46p-1, 0x1.8f1a9398f2d58p-1, 0x1.28e9ca3a728eap-1, 0x1.927c029d3798ap-1, 0x1.27904a7904a79p-1, 0x1.95d980488409ap-1, 0x1.2639eb2639eb2p-1, 0x1.993315c28e8fbp-1, 0x1.24e6a171024e7p-1, 0x1.9c88cc134f3c3p-1, 0x1.239662b9f91cbp-1, 0x1.9fdaac2391e1cp-1, 0x1.2249249249249p-1, 0x1.a328bebd84e8p-1, 0x1.20fedcba98765p-1, 0x1.a6730c8d44efap-1, 0x1.1fb78121fb781p-1, 0x1.a9b99e21655ebp-1, 0x1.1e7307e4ef157p-1, 0x1.acfc7beb75e94p-1, 0x1.1d31674c59d31p-1, 0x1.b03bae40852ap-1, 0x1.1bf295cc93903p-1, 0x1.b3773d59a05ffp-1, 0x1.1ab68a0473c1bp-1, 0x1.b6af315450638p-1, 0x1.197d3abc65f4fp-1, 0x1.b9e3923313e58p-1, 0x1.18469ee58469fp-1, 0x1.bd1467ddd70a7p-1, 0x1.1712ad98b8957p-1, 0x1.c041ba2268731p-1, 0x1.15e15e15e15e1p-1, 0x1.c36b90b4ebc3ap-1, 0x1.14b2a7c2fee92p-1, 0x1.c691f33049bap-1, 0x1.1386822b63cbfp-1, 0x1.c9b4e9169de22p-1, 0x1.125ce4feeb7a1p-1, 0x1.ccd479d1a1f94p-1, 0x1.1135c81135c81p-1, 0x1.cff0acb3170e3p-1, 0x1.10112358e75d3p-1, 0x1.d30988f52c6d3p-1, 0x1.0eeeeeeeeeeefp-1, 0x1.d61f15bae4663p-1, 0x1.0dcf230dcf231p-1, 0x1.d9315a1076fa2p-1, 0x1.0cb1b810ecf57p-1, 0x1.dc405cebb27dcp-1, 0x1.0b96a673e2808p-1, 0x1.df4c252c5a3e1p-1, 0x1.0a7de6d1d6086p-1, 0x1.e254b99c83339p-1, 0x1.096771e4d528cp-1, 0x1.e55a20f0eecf9p-1, 0x1.0853408534085p-1, 0x1.e85c61c963f0dp-1, 0x1.07414ba8f0741p-1, 0x1.eb5b82b10609bp-1, 0x1.06318c6318c63p-1, 0x1.ee578a1eaa83fp-1, 0x1.0523fbe3367d7p-1, 0x1.f1507e752c6c8p-1, 0x1.04189374bc6a8p-1, 0x1.f4466603be71dp-1, 0x1.030f4c7e7859cp-1, 0x1.f73947063b3fdp-1, 0x1.0208208208208p-1, 0x1.fa2927a574422p-1, 0x1.0103091b51f5ep-1, 0x1.fd160df77ed7ap-1, 0x1p-1, 0x1p+0, };
float powf( float x, float y )
{
static const double recip_ln2 = 0x1.71547652b82fep0;
if( x == 1.0f || y == 1.0f)
return x;
union{ float f; uint32_t u; } ux, uy;
ux.f = x;
uy.f = y;
uint32_t absux = ux.u & 0x7fffffff;
uint32_t absuy = uy.u & 0x7fffffff;
if( (ux.u - 1U) >= 0x7f7fffff || (absuy - 1) >= 0x4affffff )
{
if( 0 == absuy )
return 1.0f;
if( x != x || y != y )
return x + y;
uint32_t fractMask = 0x3fffffffU >> gMaskShift[ absuy >> 23 ]; uint32_t onesMask = 0x40000000U >> gMaskShift[ absuy >> 23 ]; uint32_t fractionalBits = absuy & fractMask;
uint32_t onesBit = absuy & onesMask;
if( 0 == absux )
{
if( 0 == fractionalBits && 0 != onesBit )
{
if( y < 0.0f )
return 1.0f / x;
return x;
}
if( 0.0f < y )
return 0.0f;
return 1.0f / __builtin_fabsf(x);
}
if( 0x7f800000 == absuy )
{
if( -1.0f == x )
return 1.0f;
if( absux > 0x3f800000 ) { if( 0.0f < y )
return y;
else
return 0.0f;
}
else
{ if( 0.0f < y )
return 0.0f;
else
return __builtin_fabsf(y);
}
}
if( x == __builtin_inff() )
{
if( y < 0.0f )
return 0.0f;
else
return x;
}
if( x > -__builtin_inff() )
{
if( fractionalBits )
goto nan_sqrt;
goto ipowf;
}
if( 0 == fractionalBits && 0 != onesBit ) return 0.0f < y ? x : -0.0f;
return 0.0f < y ? -x : 0.0f;
}
if( 0x3f000000U == absuy )
goto nan_sqrt;
int32_t i = ((absux >> 23) & 0xff) - 127;
union
{
uint32_t u;
float f;
}m = { (absux & 0x007fffffU) | 0x3f800000U };
if( -127 == i )
{ m.f -= 1.0f; i = ((m.u >> 23) & 0xff) - (127+126);
m.u = (m.u & 0x807fffffU) | 0x3f800000U;
}
double log2x = i;
if( m.f != 1.0f )
{
int index = (m.u >> (23-7-4)) & 0x7f0; const double *tablep = (void*) powf_log_table + index;
double r = (double) m.f;
r *= tablep[0]; log2x += tablep[1]; r -= 1.0;
double rr = r*r;
double small = -0.5 + 0.3333333333333333333333*r;
double large = -0.25 + 0.2*r;
double rrrr = rr * rr;
small *= rr;
small += r;
large *= rrrr;
r = small + large;
log2x += r * recip_ln2;
}
double ylog2x = y * log2x;
if( ylog2x >= 128.0 )
return (float) (0x1.0p128 * ylog2x);
if( ylog2x <= -150.0 )
return (float) ( ylog2x * 0x1.0p-1022 );
int exp = (int) ylog2x;
double f = ylog2x - exp;
static const double c0 = 1.0 + 0.278626872016317130037181614004e-10;
static const double c1 = .693147176943623740308984004029708;
static const double c2 = .240226505817268621584559118975830;
static const double c3 = 0.555041568519883074165425891257052e-1;
static const double c4overc8 = 0.961813690023115610862381719985771e-2 / 0.134107709538786543922336536865157e-5;
static const double c5overc8 = 0.133318252930790403741964203236548e-2 / 0.134107709538786543922336536865157e-5;
static const double c6overc8 = 0.154016177542147239746127455226575e-3 / 0.134107709538786543922336536865157e-5;
static const double c7overc8 = 0.154832722143258821052933667742417e-4 / 0.134107709538786543922336536865157e-5;
static const double c8 = 0.134107709538786543922336536865157e-5;
double z = 1.0;
if( 0.0 != f )
{ double ff = f * f;
double s7 = c7overc8 * f; double s3 = c3 * f;
double s5 = c5overc8 * f; double s1 = c1 * f;
double ffff = ff * ff;
s7 += c6overc8; s3 += c2;
s5 += c4overc8; s1 += c0;
s7 *= ff; s3 *= ff;
s5 += ffff;
double c8ffff = ffff * c8;
s7 += s5; s3 += s1;
s7 *= c8ffff;
z = s3 + s7;
}
union{ uint64_t u; double d; } two_exp = { ((uint64_t) exp + 1023) << 52 };
return (float) (z * two_exp.d );
nan_sqrt:
if( x < 0.0f || y > 0.0f )
return sqrtf(x);
return (float) sqrt( 1.0 / (double) x );
ipowf:
y = y > -0x1.fffffep30f ? y : -0x1.fffffep30f;
y = y < 0x1.fffffep30f ? y : 0x1.fffffep30f;
i = (int) y;
double dx = (double) x;
double r = 1.0;
if( i < 0 )
{
i = -i;
dx = 1.0 / dx;
}
if( i & 1 )
r = dx;
do
{
i >>= 1;
if( 0 == i )
break;
dx *= dx;
if( i & 1 )
r *= dx;
}while(1);
return (float) r;
}