/* sincostan.c -- sin, cos, and tan for standard math library.
These are preliminary versions of sin, cos, and tan written to provide
argument reduction using the correct value of pi, rather than pi rounded to
66-bits as used by the Intel trigonometric instructions. Versions to
enhance accuracy and speed may follow.
*/
#include
// Describe the destination floating-point type.
#define SignificandBits 53
#define ExponentBits 11
/* Define a structure for accessing the internal components of a double.
s.exponent is the raw exponent field.
s.significand2 is the least two significant bits of the significand field.
The other fields are unused.
*/
typedef union
{
double d;
struct
{
#if defined __BIG_ENDIAN__
unsigned int sign : 1;
unsigned int exponent : ExponentBits;
unsigned int significand0 : SignificandBits-1-32;
unsigned int significand1 : 30;
unsigned int significand2 : 2;
#else // defined __BIG_ENDIAN__
unsigned int significand2 : 2;
unsigned int significand1 : 30;
unsigned int significand0 : SignificandBits-1-32;
unsigned int exponent : ExponentBits;
unsigned int sign : 1;
#endif // defined __BIG_ENDIAN__
} s;
} Double;
/* Each of the following numbers is a strict upper bound on some technique
for calculating sin, cos, or tan. That is, the technique is known to apply
when |x| < bound, where x is the function argument. (ReduceFull also
requires that \x| be greater than some amount around 0x1p-622, to avoid
underflow or loss of accuracy.)
If |x| < BoundDenormal, x is denormal or zero.
If |x| < BoundTrivialSin, x is the correctly rounded sine of x.
If |x| < BoundTrivialCos, 1 is the correctly rounded cosine of x.
If |x| < BoundTrivialTan, x is the correctly rounded tangent of x.
If |x| < BoundPolynomial, sinp(x) or cosp(x) may be used for sin(x) or
cos(x), with no argument reduction.
If |x| < BoundMedium, ReduceMedium may be used to reduce x.
If |x| < BoundFull, ReduceFull may be used to reduce x.
Otherwise, x is an infinity or a NaN.
*/
#define BoundDenormal 0x1p-1022
#define BoundTrivialSin 0x1.7137449123ef7p-26
#define BoundTrivialCos 0x1.6A09E667F3BCDp-27
#define BoundTrivialTan 0x1.250BFE1B082F5p-26
#define BoundPolynomial 0x3.243f6a8885a31p-2
#define BoundMedium 0x1.000013be57a40p19
#define BoundFull INFINITY
/* Below, we define methods to test whether a function argument, x, is within
bounds suitable for a techique for implementing the function. Several
methods are defined, using natural floating-point comparisons or using
integer comparisons on 32 or 64 bits of the floating-point representation.
Implementations may choose whichever method is best for performance.
To use, first prepare the key used for checking bounds:
BoundKeyType xk = Key(x);
That is done once at the start of a routine. Then, as many times as
desired, bounds are checked:
if (Within(xk, bound))
...
The "bound" must be one of the names defined above, such as BoundMedium.
If Within(xk, bound) evaluates to true, then |x| < bound. The converse is
not necessarily true; if Within(xk, bound) is false, |x| might be less than
bound. This is because some implementations of Within do not check all
bits of xk and bound. They are intended only as fast screens that get the
majority of values. However, if floor(log[2](|x|)) < floor(log[2](bound)),
Within(xk, bound) is guaranteed to be true -- its evaluation includes at
least all of the floating-point exponent.
*/
#if defined __i386__ // (Add any other desired architectures.)
/* This method uses integer comparisons with the most significant 32 bits
of the IEEE-754 representation of the floating-point number.
*/
#include
typedef uint32_t BoundKeyType;
typedef union
{
double d;
#if defined __BIG_ENDIAN__
struct { uint32_t key, notkey; } s;
#else
struct { uint32_t notkey, key; } s;
#endif
} DoubleForBound;
// Prepare constants containing the bounds in our desired form.
#define DefineBound(bound) \
static const DoubleForBound Double##bound = { bound };
DefineBound(BoundDenormal)
DefineBound(BoundTrivialSin)
DefineBound(BoundTrivialCos)
DefineBound(BoundTrivialTan)
DefineBound(BoundPolynomial)
DefineBound(BoundMedium)
DefineBound(BoundFull)
// Get the key for |x|.
static BoundKeyType Key(double x)
{
const DoubleForBound X = { x };
BoundKeyType t = X.s.key;
return t & 0x7fffffff;
}
#define Within(xk, bound) ((xk) < Double##bound.s.key)
#elif defined __x86_64__ // (Add any other desired architectures.)
/* This method uses integer comparisons with all 64 bits of the IEEE-754
representation of the floating-point number.
*/
#include
typedef uint64_t BoundKeyType;
typedef union
{
double d;
uint64_t key;
} DoubleForBound;
// Prepare constants containing the bounds in our desired form.
#define DefineBound(bound) \
static const DoubleForBound Double##bound = { bound };
DefineBound(BoundDenormal)
DefineBound(BoundTrivialSin)
DefineBound(BoundTrivialCos)
DefineBound(BoundTrivialTan)
DefineBound(BoundPolynomial)
DefineBound(BoundMedium)
DefineBound(BoundFull)
// Get the key for |x|.
static BoundKeyType Key(double x)
{
const DoubleForBound X = { x };
BoundKeyType t = X.key;
return t & 0x7fffffffffffffff;
}
#define Within(xk, bound) ((xk) < Double##bound.key)
#else
// If no other method is selected, use regular floating-point comparison.
typedef double BoundKeyType;
#define DefineBound(bound)
// Get the key for |x|.
static BoundKeyType Key(double x)
{
#if defined __i386__ || defined __x86_64__
// On Intel, just clear the sign bit.
double xk = -0.;
__asm__("andnpd %[x], %[xk]" : [xk] "+x" (xk) : [x] "x" (x));
return xk;
#elif 1
return x < 0 ? -x : +x;
#else
return fabs(x);
#endif
}
#define Within(xk, bound) ((xk) < bound)
#endif
/* Several argument-reduction routines follow. Each has this specification:
static void Reduce(double *xp, int *a, double x).
Input:
x is a number to be reduced modulo pi/2. Each routine has
qualifications on what values it supports for x. Generally, x must not
be a NaN or infinity.
Output:
*xp is set to a residue of x modulo pi/2. Mostly, -pi/4 <= x <= +pi/4,
but *xp may be outside this interval by as much as 3.24128e-11.
*a is set to the arc of the circle x is in. 0 <= *a < 4.
On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some
integer k. This holds even if x is outside [-pi/4, pi/4]; *a matches
*xp.
This means all the reduction routines are returning *xp in radians.
ReduceFull goes through steps in which it has period-fraction units (0 to 1
corresponding to 0 to pi/2 radians), so the restoration of radians is
forced and introduces additional rounding error and execution time. We
might want to do something about that.
Since *xp is a single double, it is insufficient to represent the residue
precisely enough to compute a faithfully rounded function. For that, we
will have to return extended precision, perhaps a long double or perhaps
two doubles.
*/
/* static void ReduceFull(double *xp, int *a, double x).
Input:
x is a number to be reduced modulo pi/2. This routine requires 1 <=
|x|. (The actual limit is something smaller, perhaps around 0x1p-622,
but 1 suffices.)
Output:
*xp is set to x modulo pi/2.
*a is set to the arc of the circle x is in. 0 <= *a < 4.
On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some
integer k.
This routine was adapted from the _sin function in double_fns.h (which
implements vvsin).
*/
static void ReduceFull(double *xp, int *a, double x)
{
// The rows are 8-bit shifts of 27-bit windows on 2/pi * 0x1p400.
static const double TwoOverPiWithOffset[4][45] =
{
{
+0x0.0000000p+0, +0x1.45f3070p+399, -0x1.1b1bbe8p+372,
-0x1.6b01ec8p+345, +0x1.5f47d50p+318, -0x1.6447e48p+291,
-0x1.3ad4ce0p+263, +0x1.e21c820p+237, +0x1.fe51640p+208,
-0x1.5085110p+182, +0x1.586dca0p+154, -0x1.c8e2df0p+129,
+0x1.374b800p+102, +0x1.924bbb0p+74, -0x1.f62e6e0p+48,
+0x1.cfe1df0p+20, -0x1.38d3b58p-6, -0x1.63045e0p-34,
+0x1.1afa980p-63, -0x1.44bb7b0p-88, -0x1.6638fe0p-116,
+0x1.ad17e00p-142, -0x1.bec66e0p-168, -0x1.4e33e58p-195,
+0x1.9cfa4e0p-223, +0x1.08bf178p-249, -0x1.036be40p-279,
+0x1.8ffc4c0p-303, -0x1.0fd3000p-339, -0x1.fc04340p-358,
-0x1.dce94c0p-385, +0x1.4da3ee0p-413, -0x1.64c0980p-439,
-0x1.b069ec8p-465, -0x1.1617380p-493, -0x1.32c3400p-521,
-0x1.5d28ae0p-548, +0x1.eeb1fb0p-574, -0x1.a0e8500p-604,
+0x1.e839cf8p-627, +0x1.e294a48p-654, +0x1.d4d7f68p-681,
+0x1.fb11f90p-708, -0x1.517bd50p-735, +0x1.9823800p-767
},
{
+0x0.0000000p+0, +0x1.45f3000p+399, +0x1.b727240p+377,
-0x1.f56b020p+353, +0x1.3abe900p+325, -0x1.5964480p+299,
+0x1.b6c52b0p+271, +0x1.93c4390p+244, +0x1.07f9480p+214,
-0x1.38a8428p+191, -0x1.0ea7920p+162, -0x1.b7238c0p+135,
+0x1.09374b8p+110, +0x1.924c000p+74, -0x1.15f62e8p+56,
+0x1.21cfe20p+28, -0x1.0a71a70p+1, -0x1.acb1820p-25,
-0x1.77dca08p-52, -0x1.68a25d8p-79, -0x1.ec598e0p-106,
-0x1.fb29740p-133, -0x1.037d8d0p-161, +0x1.1d63980p-188,
+0x1.a99cfa0p-215, +0x1.3908bf0p-241, +0x1.77bf250p-269,
+0x1.d8ffc80p-299, -0x1.a000880p-322, +0x1.6603fc0p-350,
-0x1.a1dce90p-377, -0x1.2fac970p-403, -0x1.2593040p-433,
+0x1.9e4f960p-457, +0x1.36e9e90p-485, -0x1.c099620p-512,
+0x1.7fa8b60p-538, -0x1.5b08a70p-565, -0x1.41a0e80p-596,
-0x1.30be300p-622, -0x1.821d6b8p-646, +0x1.25d4d80p-673,
-0x1.2813b80p-702, -0x1.ca8be00p-730, +0x1.580cc10p-754
},
{
+0x0.0000000p+0, +0x1.4600000p+399, -0x1.9f246c0p+386,
-0x1.bbead60p+360, -0x1.ec54100p+329, -0x1.c159648p+307,
+0x1.c0db628p+280, +0x1.5993c40p+252, +0x1.c820ff0p+225,
+0x1.458eaf0p+198, +0x1.ebbc560p+172, +0x1.b7246e0p+144,
+0x1.d2126f0p+117, -0x1.a3ff370p+91, +0x1.2eea0a0p+64,
-0x1.736f180p+37, -0x1.e214e40p+8, +0x1.62534e8p-17,
-0x1.177dc80p-48, -0x1.0568a28p-71, +0x1.1213a68p-98,
-0x1.c7eca60p-127, +0x1.7df9040p-154, +0x1.cc8eb20p-179,
-0x1.9f2b318p-206, -0x1.6c6f780p-237, +0x1.f8bbdf8p-260,
+0x1.283b200p-288, -0x1.da00080p-318, -0x1.fa67f00p-344,
-0x1.0d0ee80p-372, +0x1.6b414e0p-397, -0x1.7049650p-423,
+0x1.fb3c9f0p-450, +0x1.6136ea0p-477, -0x1.7381320p-505,
-0x1.8680578p-530, +0x1.aea4f78p-557, -0x1.38141a0p-584,
-0x1.d098600p-613, +0x1.ce7de28p-638, +0x1.4a4baa0p-666,
-0x1.404a050p-692, +0x1.1f8d5d0p-720, +0x1.0ac0680p-749
},
{
+0x0.0000000p+0, +0x1.8000000p+399, -0x1.d067c90p+396,
-0x1.b1bbeb0p+368, +0x1.4fe13b0p+341, -0x1.05c1598p+315,
+0x1.bb81b70p+287, -0x1.d6a66c0p+260, -0x1.de37df0p+233,
-0x1.ae9c400p+200, -0x1.4214438p+180, -0x1.4f246e0p+153,
+0x1.b8e9090p+126, +0x1.ba5c010p+99, -0x1.b6d1160p+72,
+0x1.3a32440p+43, -0x1.80f10a0p+17, -0x1.c69dac8p-9,
-0x1.8c11780p-36, +0x1.1afa978p-63, -0x1.12edec8p-90,
+0x1.338e050p-117, -0x1.4ba0818p-144, -0x1.f633718p-171,
+0x1.8e60d50p-198, -0x1.8c16c70p-225, +0x1.17e2f00p-254,
-0x1.036be28p-279, +0x1.ff89800p-308, -0x1.0fd3400p-339,
+0x1.fde5e00p-365, +0x1.18b5a10p-388, -0x1.64b8248p-414,
-0x1.9302618p-441, -0x1.834f648p-468, -0x1.6173820p-497,
+0x1.9a797f8p-522, +0x1.45aea50p-549, -0x1.14e0500p-578,
-0x1.a0e84c0p-604, -0x1.7c63040p-631, -0x1.d6b5b40p-658,
-0x1.59404a0p-684, -0x1.3b81cc0p-714, +0x1.7421580p-738
}
};
Double X = { x };
/* Set ec to the unbiased exponent minus 33. Why 33? I do not know.
This was in the vvsin code in double_fns.h.
*/
int ec = X.s.exponent - (1023+33);
// Set k to ceiling(ec / 27) and m to residue.
int k = (ec + 26) * (607*4) >> 16;
int m = 27*k - ec;
// offset is used to select a row in the reduction table. See below.
int offset = m >> 3;
/* The reduction table, TwoOverPiWithOffset, contains bits of 2/pi.
First, all entries are scaled by 2**400 to avoid overflow/underflow
issues. Each entry contains 27 bits of 2/pi, except in the first two
columns. The first column contains zeroes because no reduction is done
for numbers that are already small. The entries in the second row
start before (at a bit with higher value) the leading bit of 2/pi, so
they contain some leading zeroes. The sign bits form part of the 27
bits represented.
Each row forms a contiguous string of bits of 2/pi. That is, adding
all the entries with sufficient precision yields a single bit string
representing 2/pi. The rows differ in their starting points; row 0
begins with the leading bit of 2/pi, and rows 1, 2, and 3 begin 8, 16,
and 24 bits before that.
*/
/* Scale x to avoid overflow in Dekker split. This is compensated for
in the entries in TwoOverPiWithOffset.
*/
x *= 0x1p-400;
/* Use Dekker's algorithm to split x into 26 bits and 27 bits. This
requires round-to-nearest mode.
*/
double xDekker = x * (0x1p27 + 1);
double x0 = xDekker - (xDekker - x);
double x1 = x - x0;
// Get address of starting point in table.
const double *p0 = &TwoOverPiWithOffset[offset][k];
// Get table entries.
const double fp0 = p0[0];
const double fp1 = p0[1];
const double fp2 = p0[2];
const double fp3 = p0[3];
// Get high bits of x * f, where f is the part of 2/pi we are using.
const double f0 = x1 * fp0 + fp1 * x0;
double f = x1 * fp1 + fp2 * x0;
// Combine to do integer work.
const double fi = f0 + f;
static const double IntegerBias = 0x1.8p52;
// Force the integer bits into a specific position.
Double Fi = { fi + IntegerBias };
/* |fi| is less than 0x1p36, so fi + IntegerBias is well within
[0x1p52, 0x1p53), so it has a known exponent, and the bits with
weight 2 and 1 are the least significant bits in its significand.
We know |fi| is less than 0x1p36 because it is the sum of x1 * fp0,
fp1 * x0, and something small in f. x0 has the same exponent as
x. Say 2**e <= |x| < 2**(e+1). (That is the original x, before we
scaled it by 0x1p-400.) The table entry we look up for fp0 has
magnitude less than 2**(400+27-27*ceiling((e-33)/27)), and fp1 is
less, bounded by 2**(400-27*ceiling((e-33)/27)). Including the
scaling in x, fp1 * x0 is less than 2**(e+1-400) *
2**(400-27*ceiling((e-33)/27)) <= 2**34. Similarly, fp0 * x1 is
less than 2**34, so their sum is less than 2**35.
*/
// Get the two least significant integer bits.
*a = Fi.s.significand2;
double fint = Fi.d - IntegerBias;
const double fp4 = p0[4];
const double fp5 = p0[5];
const double fp6 = p0[6];
f = f0 - fint + f;
f += x1 * fp2 + fp3 * x0;
f += x1 * fp3 + fp4 * x0;
f += x1 * fp4 + fp5 * x0;
f += x1 * fp5 + fp6 * x0;
// Convert to radians by multiplying by pi/2.
*xp = f * 0x3.243F6A8885A3p-1;
}
/* static void ReduceMedium(double *xp, int *a, double x).
Input:
x is a number to be reduced modulo pi/2. This routine requires |x| <=
X. X is described below.
Output:
*xp is set to x modulo pi/2.
*a is set to the arc of the circle x is in. 0 <= *a < 4.
On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some
integer k.
Nomenclature:
p is the period, pi/2.
X is the maximum value x may have. X is 0x1.000013be57a3fp19.
InversePeriod is 1/p, rounded to the nearest double, ties to even.
n is x * InversePeriod, rounded to the nearest double, ties to even,
and then rounded to the nearest integer, ties to even.
N is the maximum value n may have. N is X * InversePeriod, rounded to
the nearest double, ties to even, and then rounded to the nearest
integer, ties to even. N is 333773.
Notes:
In comments in this routine, unquoted expressions are mathematical and
quoted expressions are floating-point. Thus, "a + b" refers to the
floating-point operation of addition, including rounding.
"n * Period[0]" is exact for all |n| <= N. It is inexact for n = N+1,
so this is the source of the bound on x. X is the greatest value for
which |n| <= N. (It is happenstance that Period[0] limits x; if the
period were different, its bits might result in Period[1] causing the
limit. n has some other roles in the errors in this routine that might
limit x when used with other periods or different precisions.)
The difference between InversePeriod and 1/p can cause n to differ from
the ideal value when x is near a multiple of p. The result is that
instead of producing a result inside the target interval [-p/2, p/2], a
result slightly outside the interval is produced. The result may be as
much as 3.24128e-11 outside the interval. The polynomial and the
polynomial evaluation must be satisfactory over this extended interval.
*/
static void ReduceMedium(double *xp, int *a, double x)
{
static const double InversePeriod =
2 / 0x3.243f6a8885a308d313198a2e03707344ap0;
/* Period is an extended-precision representation of the reduction period.
Each element except the last has the property that multiplication by
any integer with magnitude at most N is exact.
*/
static const double Period[] = {
+0x1.921FB54440000p-0,
+0x1.68C234C4C0000p-39,
+0x1.98A2E03707345p-77
};
// Estimate x / p and round to nearest integer.
double n = x * InversePeriod + 0x1.8p52 - 0x1.8p52;
// Record which arc of the circle x lies in.
*a = (int) n & 3;
// This is exact, per design of Period.
double np0 = n * Period[0];
// This is exact, per design of Period.
double np1 = n * Period[1];
/* The error here is at most 1/2 ULP(n * Period[2]), which is
1/2 ULP(n * +0x1.98A2E03707345p-77) = ULP(n * +0x1.98A2E03707345p-78).
n <= N = 333773, so an absolute bound on that is ULP(333773 *
+0x1.98A2E03707345p-78) = 0x1p-111, but we need a bound dependent on n
to partition a proof below.
This is called error (a).
*/
double np2 = n * Period[2];
/* Set x00 to x - n * Period[0]. np0 is very near x, so this is exact,
per David Goldberg, _What Every Computer Scientist Should Know About
Floating-Point Arithmetic_, Theorem 11.
The nomenclature used here is:
x0 is x - n * (Period[0]),
x1 is x - n * (Period[0] + Period[1]), and
x2 is x - n * (Period[0] + Period[1] + Period[2]).
x00 is the first and only "word" of an extended precision
representation of x0.
x10 is the first "word" of x1; it has the most significant bits.
x11 is the next "word" of x1; it has the following bits.
*/
double x00 = x - np0;
/* Subtract n * Period[1] from x0 to produce x1. These operations yield
an exact result, per Knuth, _The Art of Computer Programming_ 2, second
edition, page 221, section 4.2.2, Theorem C, in the sense that x10 +
x11 will exactly equal x0 - n * Period[1]. The first statement puts
the high bits in x10, and the second statement determines what was
rounded away in x10 and puts it in x11.
Knuth assumes |np1| <= |x00|. This is often the case, but perhaps
cancellation has reduced the magnitude of x00. Hoever, if |x00| <
|np1|, this arithmetic is exact, because x00's least significant bit is
at least as large as the ULP of Period[1].
*/
double x10 = x00 - np1;
double x11 = x00 - x10 - np1;
/* This comment demonstrates the rounding error in the following
statement, "t = x11 - np2", is tiny relative to x10.
This is called error (b).
Let LSB(x) be the weight of the least significant bit set in a
floating-point number x. E.g, LSB(1.25) is 1/4, although ULP(1.25) is
2**-52.
Here, x10 and x11 are each multiples of LSB(Period[1]). If |x10| <
2**53 LSB(Period[1]), then x11 is zero, because all the bits of x10 +
x11 fit into x10. In that case, "x11 - np2" is exact.
Otherwise, 2**53 LSB(Period[1]) <= |x10|, so 2 LSB(Period[1]) <=
ULP(x10). LSB(Period[1]) = 0x1p-73, so 0x1p-72 <= ULP(x10).
By construction of x10 and x11, |x11| <= 1/2 ULP(x10).
|np2| <= |"n * Period[2]"| <= |n * Period[2] * (1+2**-53)| = 333733 *
0x1.98A2E03707345p-77 * (1+2**-53) < 0x1.0426p-58.
|"x11 - np2"| <= 2*max(|x11|, |np2|) <= 2*max(1/2 ULP(x10),
0x1.0426p-58) = max(ULP(x10), 0x1.0426p-57). The rounding error in
"x11 - np2" is at most 2**-53 times this, so it is at most max(2**-53
ULP(x10), 0x1p-110). Since ULP(x10) is at least 0x1p-72, the rounding
error is at most 0x1p-38 ULP(x10).
*/
double t = x11 - np2;
/* Add t to x1 to produce x2. As before, we could use Knuth's technique
to produce an exact sum, x20 + x21 = x10 + t. For now, we only return
x20. When we want extended precision, we could return x21 as well.
(If so, be careful to show that |t| <= |x10|, as required for Knuth's
technique, or otherwise show the arithmetic is sufficiently accurate.)
The difference between x20 and x10 + t is called error (c). It is at
most 1/2 ULP(x20).
*/
double x20 = x10 + t;
// double x21 = x10 - x20 + t;
*xp = x20;
/* Having subtracted n * (Period[0] + Period[1] + Period[2]) from x, we
have an error caused by the difference between p and Period[0] +
Period[1] + Period[2], which is less than 0x1.6fdb2p-131. So the error
is less than n * 0x1.6fdb2p-131.
n <= N = 333773, so an absolute bound on that is 333773 *
0x1.6fdb2p-131 < 0x1.d45fp-113, but we need a bound dependent on n to
partition a proof below.
This is called error (d).
Our reduction has four errors:
(a), which is at most ULP(n * +0x1.98A2E03707345p-78) <= 0x1p-111.
(b), which is at most 0x1p-38 ULP(x10).
(c), which is at most 1/2 ULP(x20).
(d), which is at most n * 0x1.6fdb2p-131 < 0x1.d45fp-113.
From the discussion of (b), if x11 is not zero, then 0x1p-72 <=
ULP(x10). Then x10 dominates the result, because np2 is so small that
subtracting it from x1 to produce x2 cannot reduce the exponent by more
than one. So the errors (a), (b), and (d) are tiny relative to x10,
and the total error is very nearly (c), 1/2 ULP(x20).
However, if x11 is zero, then (b) is zero, and we only need to consider
(a), (c), and (d). We partition this into two cases.
Case 0: |x| < 0x1p12.
The double-precision floating-point number closest to a multiple of
pi/2 in that interval is 0x1.6c6cbc45dc8dep6 (according to Maple
code from Muller, _Elementary Functions_), and it is about
0x1.6d61b58c99c43p-60 away from a multiple of pi/2.
n is at most 0x1p12 * InversePi, rounded to a double, so n < 2608.
Then:
(a) is at most ULP(2608 * +0x1.98A2E03707345p-78) = 0x1p-119.
(b) is zero.
(c) is at most 1/2 ULP(x20).
(d) is at most 2608 * 0x1.6fdb2p-131 < 0x1p-119.
The final result might be as low as about 0x1.6d6p-60. That has an
ULP of 0x1p-113, so (a) and (d) are small relative to it, and the
total error is nearly (c), 1/2 ULP(x20).
Case 1: 0x1p12 <= |x|.
We still have |x| <= 0x1.000013be57a3fp19. The double-precision
floating-point number closest to a multiple of pi/2 in that set is
0x1.6c6cbc45dc8dep11 (Muller again), and it is about
0x1.6d61b58c99c43p-55 away from a multiple of pi/2.
Then:
(a) is at most ULP(333773 * +0x1.98A2E03707345p-78) = 0x1p-111.
(b) is zero.
(c) is at most 1/2 ULP(x20).
(d) is at most 333773 * 0x1.6fdb2p-131 < 0x1.d45fp-113.
The final result might be as low as about 0x1.6d6p-55. That has an
ULP of 0x1p-108, so (a) and (d) are small relative to it, and the
total error is nearly (c), 1/2 ULP(x20).
Therefore, the total error is never much more than 1/2 ULP of the value
returned in *xp.
*/
}
/* static double sinp(double r)
Return sine(r) using only a polynomial, no reduction.
Input:
r, with |r| < pi/4 + 3.24128e-11.
Output:
Approximately sine(r) is returned.
*/
static double sinp(double r)
{
double rr = r * r;
/* Derived from Cephes Math Library Release 2.8: June, 2000. Maple's
infnorm routine says this polynomial is within .0688 ULP of
sine(r) for |r| < pi/4 + 3.24128e-11.
*/
return r + r * rr * ((((((
+ 1.58962301576546568060E-10) * rr
- 2.50507477628578072866E-8 ) * rr
+ 2.75573136213857245213E-6 ) * rr
- 1.98412698295895385996E-4 ) * rr
+ 8.33333333332211858878E-3 ) * rr
- 1.66666666666666307295E-1 );
}
/* static double cosp(double r)
Return cosine(r) using only a polynomial, no reduction.
Input:
r, with |r| < pi/4 + 3.24128e-11.
Output:
Approximately cosine(r) is returned.
*/
static double cosp(double r)
{
double rr = r * r;
/* Derived from Cephes Math Library Release 2.8: June, 2000. Maple's
infnorm routine says this polynomial is within .00709 ULP of
cosine(r) for |r| < pi/4 + 3.24128e-11.
*/
return (((((((
- 1.13585365213876817300E-11) * rr
+ 2.08757008419747316778E-9 ) * rr
- 2.75573141792967388112E-7 ) * rr
+ 2.48015872888517045348E-5 ) * rr
- 1.38888888888730564116E-3 ) * rr
+ 4.16666666666665929218E-2 ) * rr
- .5 ) * rr
+ 1;
}
/* double sin(double x).
Notes:
Citations in parentheses below indicate the source of a requirement.
"C" stands for ISO/IEC 9899:TC2.
The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no
requirements since it defers to C and requires errno behavior only if
we choose to support it by arranging for "math_errhandling &
MATH_ERRNO" to be non-zero, which we do not.
Return value:
For +/- zero, return zero with same sign (C F.9 12 and F.9.1.6).
For +/- infinity, return a NaN (C F.9.1.6).
For a NaN, return the same NaN (C F.9 11 and 13).
Otherwise:
If the rounding mode is round-to-nearest, return sine(x) within a
few ULP. The maximum error of this routine is not precisely
known. The maximum error of the reduction might be around 3 ULP,
although this is partly a guess. The polynomials have small
errors. The polynomial evaluation might have an error under 1
ULP. So the worst error for this routine might be under 4 ULP.
Not currently implemented: In other rounding modes, return sine(x)
possibly with slightly worse error, not necessarily honoring the
rounding mode (Ali Sazegari narrowing C F.9 10).
All results are in [-1, 1].
Exceptions:
Raise underflow for a denormal result (C F.9 7 and Draft Standard for
Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the
smallest normal, underflow may or may not be raised. This is stricter
than the older 754 standard.
May or may not raise inexact, even if the result is exact (C F.9 8).
Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite
of C 4.2.1) or an infinity (C F.9.1.6) but not if the input is a quiet
NaN (C F.9 11).
May not raise exceptions otherwise (C F.9 9).
Properties:
Desired to be monotonic. Not yet proven!
*/
double sin(double x)
{
// Get |x| in form used by the bound-check operations for a key.
BoundKeyType xk = Key(x);
double r;
int j0;
/* Handle denormal numbers and zero here to generate underflow for
denormals numbers and to avoid generating inexact for zero.
*/
if (Within(xk, BoundDenormal))
return x * (1 - 0x1p-53);
/* For small numbers, we can return x for sine(x). Unfortunately,
adding this check slows down the other cases, which may be more
frequent.
*/
else if (Within(xk, BoundTrivialSin))
{
/* Make t0 volatile to force compiler to fetch it at run-time
rather than optimizing away the multiplication.
*/
static volatile const double t0 = 1/3.;
/* Get t0 once. If we wrote "t0*t0", the compiler would load it
twice, since it is volatile.
*/
const double t1 = t0;
/* Perform a multiplication and pass it into an assembly construct to
prevent the compiler from knowing we do not use the result and
optimizing away the multiplication.
*/
__asm__("" : : "X" (t1*t1));
// Return the floating-point number nearest sine(x), which is x.
return x;
}
// If |x| is small, no reduction is necessary.
else if (Within(xk, BoundPolynomial))
r = x, j0 = 0;
/* The interval for this could be enlarged by examining the sine
polynomial and figuring out how big r can get before the error is
too large.
*/
// If |x| is medium, we can use a fast reduction routine.
else if (Within(xk, BoundMedium))
ReduceMedium(&r, &j0, x);
// Otherwise, we need the full, slow reduction routine.
else if (Within(xk, BoundFull))
ReduceFull(&r, &j0, x);
else
return x-x;
switch (j0)
{
default:
case 0: return +sinp(r);
case 1: return +cosp(r);
case 2: return -sinp(r);
case 3: return -cosp(r);
}
}
/* double cos(double x).
Notes:
Citations in parentheses below indicate the source of a requirement.
"C" stands for ISO/IEC 9899:TC2.
The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no
requirements since it defers to C and requires errno behavior only if
we choose to support it by arranging for "math_errhandling &
MATH_ERRNO" to be non-zero, which we do not.
Return value:
For +/- infinity, return a NaN (C F.9.1.6).
For a NaN, return the same NaN (C F.9 11 and 13).
Otherwise:
If the rounding mode is round-to-nearest, return cosine(x) within a
few ULP. The maximum error of this routine is not precisely
known. The maximum error of the reduction might be around 3 ULP,
although this is partly a guess. The polynomials have small
errors. The polynomial evaluation might have an error under 1
ULP. So the worst error for this routine might be under 4 ULP.
Not currently implemented: In other rounding modes, return
cosine(x) possibly with slightly worse error, not necessarily
honoring the rounding mode (Ali Sazegari narrowing C F.9 10).
All results are in [-1, 1].
Exceptions:
Raise underflow for a denormal result (C F.9 7 and Draft Standard for
Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the
smallest normal, underflow may or may not be raised. This is stricter
than the older 754 standard.
May or may not raise inexact, even if the result is exact (C F.9 8).
Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite
of C 4.2.1) or an infinity (C F.9.1.5) but not if the input is a quiet
NaN (C F.9 11).
May not raise exceptions otherwise (C F.9 9).
Properties:
Desired to be monotonic. Not yet proven!
*/
double cos(double x)
{
// Get |x| in form used by the bound-check operations for a key.
BoundKeyType xk = Key(x);
double r;
int j0;
// Avoid generating inexact for zero.
if (x == 0)
return 1;
/* For small numbers, we can return 1 for cosine(x). Unfortunately,
adding this check slows down the other cases, which may be more
frequent. It greatly speeds up cases where |x| < 0x1p-492, by
avoiding arithmetic with denormals.
*/
else if (Within(xk, BoundTrivialCos))
{
/* Make t0 volatile to force compiler to fetch it at run-time
rather than optimizing away the multiplication.
*/
static volatile const double t0 = 1/3.;
/* Get t0 once. If we wrote "t0*t0", the compiler would load it
twice, since it is volatile.
*/
const double t1 = t0;
/* Perform a multiplication and pass it into an assembly construct to
prevent the compiler from knowing we do not use the result and
optimizing away the multiplication.
*/
__asm__("" : : "X" (t1*t1));
// Return the floating-point number nearest cosine(x), which is 1.
return 1;
}
// If |x| is small, no reduction is necessary.
else if (Within(xk, BoundPolynomial))
r = x, j0 = 0;
/* The interval for this could be enlarged by examining the cosine
polynomial and figuring out how big r can get before the error is
too large.
*/
// If |x| is medium, we can use a fast reduction routine.
else if (Within(xk, BoundMedium))
ReduceMedium(&r, &j0, x);
// Otherwise, we need the full, slow reduction routine.
else if (Within(xk, BoundFull))
ReduceFull(&r, &j0, x);
else
return x-x;
switch (j0)
{
default:
case 0: return +cosp(r);
case 1: return -sinp(r);
case 2: return -cosp(r);
case 3: return +sinp(r);
}
}
/* double tan(double x).
Notes:
Citations in parentheses below indicate the source of a requirement.
"C" stands for ISO/IEC 9899:TC2.
The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no
requirements since it defers to C and requires errno behavior only if
we choose to support it by arranging for "math_errhandling &
MATH_ERRNO" to be non-zero, which we do not.
Return value:
For +/- zero, return zero with same sign (C F.9 12 and F.9.1.7).
For +/- infinity, return a NaN (C F.9.1.7).
For a NaN, return the same NaN (C F.9 11 and 13).
Otherwise:
If the rounding mode is round-to-nearest, return tangent(x) within
a few ULP. The maximum error of this routine is not precisely
known. The maximum error of the reduction might be around 3 ULP,
although this is partly a guess. The polynomials have small
errors. The polynomial evaluation might have an error under 1
ULP. How the final division affects error has not been considered
yet. 4.55 ULP has been observed.
Not currently implemented: In other rounding modes, return
tangent(x) possibly with slightly worse error, not necessarily
honoring the rounding mode (Ali Sazegari narrowing C F.9 10).
Exceptions:
Raise underflow for a denormal result (C F.9 7 and Draft Standard for
Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the
smallest normal, underflow may or may not be raised. This is stricter
than the older 754 standard.
May or may not raise inexact, even if the result is exact (C F.9 8).
Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite
of C 4.2.1) or an infinity (C F.9.1.7) but not if the input is a quiet
NaN (C F.9 11).
May not raise exceptions otherwise (C F.9 9).
Properties:
Desired to be monotonic. Not yet proven!
*/
double tan(double x)
{
// Get |x| in form used by the bound-check operations for a key.
BoundKeyType xk = Key(x);
double r;
int j0;
/* Handle denormal numbers and zero here to generate underflow for
denormals numbers and to avoid generating inexact for zero.
*/
if (Within(xk, BoundDenormal))
return x * (1 - 0x1p-53);
/* For small numbers, we can return x for tangent(x). Unfortunately,
adding this check slows down the other cases, which may be more
frequent.
*/
else if (Within(xk, BoundTrivialTan))
{
/* Make t0 volatile to force compiler to fetch it at run-time
rather than optimizing away the multiplication.
*/
static volatile const double t0 = 1/3.;
/* Get t0 once. If we wrote "t0*t0", the compiler would load it
twice, since it is volatile.
*/
const double t1 = t0;
/* Perform a multiplication and pass it into an assembly construct to
prevent the compiler from knowing we do not use the result and
optimizing it away.
*/
__asm__("" : : "X" (t1*t1));
// Return the floating-point number nearest tangent(x), which is x.
return x;
}
// If |x| is small, no reduction is necessary.
else if (Within(xk, BoundPolynomial))
r = x, j0 = 0;
/* The interval for this could be enlarged by examining the
polynomials and figuring out how big r can get before the error is
too large.
*/
// If |x| is medium, we can use a fast reduction routine.
else if (Within(xk, BoundMedium))
ReduceMedium(&r, &j0, x);
// Otherwise, we need the full, slow reduction routine.
else if (Within(xk, BoundFull))
ReduceFull(&r, &j0, x);
else
return x-x;
switch (j0)
{
default:
case 0: case 2: return + sinp(r) / cosp(r);
case 1: case 3: return - cosp(r) / sinp(r);
}
}