-- ----------------------------------------------------------------------- -- Title: longest_float_elementary_functions -- Last Mod: Thu Apr 4 12:35:09 1991 -- Author: Vincent Broman -- Copyright 1990 Vincent Broman -- Permission granted to copy, modify, or compile this software for -- one's own use, provided that this copyright notice is preserved intact. -- Permission granted to distribute compiled binary copies of this -- software which are linked in with some other application. -- Permission granted to distribute other copies of this software, -- provided that (1) any copy which is not source code, i.e. not in the -- form in which the software is usually maintained, must be accompanied -- by a copy of the source code from which it was compiled, and (2) the -- one distributing it must refrain from imposing on the recipient -- further restrictions on the distribution of this software. -- -- Description: -- -- a better-than-double-precision implementation of -- the proposed standard generic package of elementary functions, -- from the ISO-IEC/JTC1/SC22/WG9 (Ada) -- Numerics Rapporteur Group proposal, Draft 1.1. -- -- This implementation is temporarily similar to that of double -- so that full accuracy will not be supported if -- longest_float'base'digits is more than 16. -- -- It is assumed that longest_float'machine_mantissa will lie -- between 64 and 113, if these functions are actually invoked. -- The exponent size is assumed to be no more than 15 bits. -- -- This will evolve to conform to later drafts. -- -- Exceptions: argument_error, numeric_error are raised. -- ----------------------------------------------------------------------- -- with math_constants, longest_float_primitive_functions, longest_float_algebraic_functions, longest_float_support_functions; use math_constants, longest_float_primitive_functions, longest_float_algebraic_functions, longest_float_support_functions; package body longest_float_elementary_functions is overflow_error: exception renames numeric_error; -- AI-00387 specifies constraint_error for floating overflows. fminexp: constant longest_float := longest_float( longest_float'machine_emin - 1); fmaxexp: constant longest_float := longest_float( longest_float'machine_emax); mant: constant := longest_float'machine_mantissa; recip_sqrt_epsilon: constant longest_float := 2.0 ** ((mant - 1) / 2); two_pi: constant := 2.0 * pi; half_pi: constant := pi / 2.0; subtype octant_number is integer range 0 .. 7; ---------------------------------------------------------------------- -- high precision argument reduction routines procedure octant_reduce( x: in longest_float; r: out longest_float; nmod8: out octant_number) is -- -- Octant_reduce reduces x to the interval [0, pi/4] with the high -- absolute accuracy needed by trigonometric functions computed in radians -- which are subject to stringent relative accuracy requirements. -- -- R is in 0.0 .. pi/4 and, to within the accuracy described below, -- either (abs( x) - r) is equal to pi/4 times an even integer -- or else (abs( x) - (pi/4 - r)) is equal to pi/4 times an odd integer. -- The angles in odd octants are complemented relative to pi/4 so that -- small sines and cosines may be computed from small angles, -- i.e. r is small anywhere near the x and y coordinate axes. -- Further, r=0 iff x=0, because of the way huge arguments get reduced. -- -- NMOD8 is the octant number of x, in 0 .. 7, i.e. -- for x >= 0 nmod8 is truncate( x / (pi/4)) mod 8, -- for x < 0 nmod8 is 7 - truncate( - x / (pi/4)) mod 8. -- -- The accuracy analyses falls into two cases, where the length of the mantissa -- is between 64 and 89 bits, or between 90 and 113 bits. -- The longer case of 90-113 bits is presented first. ------------- -- -- The relative accuracy of the reduced angle corresponding to (r,nmod8) -- is better than 2 ulp if abs( x) < toobig = pi/4*2**(m/2+1), -- where m = longest_float'machine_mantissa and division of integers truncates. -- -- The inexactness of (r,nmod8) is caused entirely by -- (1) inexact reduction of huge arguments into the -- interval [-toobig, toobig]. -- (2) the difference of 1.41e-89 between pi/4 and sum( pterm), -- (3) the two roundings in producing r itself. -- If abs(x) < toobig, the absolute error in r caused by (2) -- is less than (sum(pterm)-pi/4)*abs(x)/(pi/4) < 2.034e-72 . -- -- The relative error in the angle (r,nmod8) depends heavily on how closely -- floating point numbers can approximate the irrationals k*pi/2, -- for k a positive integer, because these will produce the smallest -- reduced angles and the smallest values for sin and cos. -- Not wishing to exhaustively compute errors for all values of k, -- we can get error bounds from continued fractions, instead. -- -- Let delta be the error in pi/4 under (2). Let a floating point argument -- x = p/q satisify 0 < p/q < pi/4*2**(m/2+1), where q is a power of 2, -- the mantissa p is an integer 2**(m-1) <= p < 2**m, -- and m=113, the worst-case mantissa length. -- Then, q > 2/pi*2**((m+1)/2 - 1) therefore q >= q0 = 2**((m+1)/2 - 1). -- -- Let R be the exact reduced angle. Then before r is rounded as in (3), -- abs(r-R)/R <= delta*(p/q)/(pi/4)/R -- <= delta*(p/q)/(pi/4)/abs( pi/2*k - p/q), -- for the integer k nearest 2/pi*p/q -- = delta*8/(pi**2)/abs( 2/pi - k*q/p) -- = delta*8/(pi**2)/abs( 2/pi - k*q/q0*q0/p) -- <= delta*8/(pi**2)/abs( 2/pi - k'*q0/p) -- for the integer k' nearest 2/pi*p/q0 -- = delta*8/(q0*pi**2)/abs( 2/(pi*q0) - k'/p) -- <= delta*8/(q0*pi**2)/abs( 2/(pi*q0) - a/b) -- where a/b is the first continued fraction convergent to 2/(pi*q0) -- with a denominator b >= 2**m. -- This convergent has an error of about 2.15e-69 , -- obtained from the first 35 terms of the continued fraction: -- 0+1/113187804032455044+1/2+1/2+1/2+1/1+1/6+1/2+1/1+1/5+1/1 -- +1/14+1/16+1/1+1/5+1/1+1/17+1/2+1/1+1/1+1/5+1/1+1/142+1/5 -- +1/1+1/1+1/1+1/6+1/1+1/3+1/2+1/49+1/1+1/1+1/2+1/2+1/1+1/200... -- This gives a relative reduction error bound of 7.372e-38 < 0.00077 ulp. -- -- Consequently, the relative error for non-huge arguments arises -- almost entirely from the <=2 roundings at the end of the reduction. -- -- For arguments abs( x) > toobig, x is reduced and rounded more than -- once, so an extra absolute error is introduced, bounded by: -- max( 2**(exponent(2pi)-1-91), abs(x)*2**(-56+1-91)), -- where the former bound applies if reducing by whole periods produces a -- remainder small than 2pi, the latter bound otherwise. -- -- Test your own system with 2541875874*pi/2 to see how well -- argument reduction is done. You should get -- sin(3992769286.0477674627482610331119516) -- =~ 3.817269481964032839793506508700225238e-29 . -------------- -- The same analysis gives analogous results for the 64-90 bit case -- with the following constants calculated. -- -- m = 90 bits in the worst case. -- toobig = pi/4*2**46 -- error of pi/4-sum(pterm) = 7.015e-72 -- absolute reduction error <= 4.94e-58 -- error of continued fraction of 25 terms = 3.26e-55 -- continued fraction is: -- 0+1/27633741218861+1/10+1/1+1/1+1/4+1/1+1/197 -- +1/10+1/9+1/1+1/1+1/2+1/2+1/2+1/63+1/1+1/58 -- +1/1+1/2+1/2+1/2+1/3+1/1+1/2+1/1+1/6... -- This gives a relative reduction error bound of 9.92e-31 < 0.0025 ulp. -- nbits: constant := longest_float'machine_mantissa/2 + 1; -- significant bits in n oneminus: constant := 1.0 - 3.0 * longest_float'epsilon; -- 3 minimal? toobig: constant := pi / 4.0 * (2.0 ** nbits) * oneminus; p: constant longest_float := pi / 4.0; ax: longest_float := abs( x); n: longest_float; odd_n: boolean; function truncate_nbits (x: longest_float) return longest_float is -- -- truncate to an integer with nbits or fewer significant bits -- begin -- truncate_nbits if exponent( x) < nbits then return truncate( x); else return leading_part( x, nbits); end if; end truncate_nbits; function reduce_by_n_113 (x: longest_float; n: longest_float) return longest_float is -- -- compute x - n * sum(pterm(k),k=1..8) -- erring by less than two roundoffs in the final result. -- x and n are assumed nonnegative. n has an nbit-length even integer value. -- long_float arithmetic is assumed to support model numbers of mantissa -- length between 91 and 113 bits. -- type softreal is array( 1 .. 8) of longest_float; -- -- The high precision eighth-period: -- -- sum(pterm(k),k=1..8) = pi/4 rounded to 292+54 bits, in 8 35-bit pieces. -- the extra accuracy in pterm8 is sometimes helpful, sometimes wasted. -- pterm: constant softreal := ( 16#0.c90fdaa220#e+0, 16#0.168c234c4c#e-9, 16#0.6628b80dc0#e-19, 16#0.1cd129024c#e-28, 16#0.2088a67cc0#e-37, 16#0.74020bbea0#e-46, 16#0.63b139b220#e-55, 16#0.514a08798e3404ddef9519c#e-64); -- -- each product of a 57 bit even integer and a 35 bit pterm(k) computed below -- except the smallest, lands exactly on a 91 bit model number, no roundoff. -- diff: longest_float := x; sumtail: longest_float := 0.0; prod, nextdiff: longest_float; begin -- reduce_by_n_113 for k in 1 .. softreal'last loop -- combine largest terms while the subtraction is still exact, -- then sum tail to combine with head all at once. prod := n * pterm( k); nextdiff := diff - prod; if nextdiff > prod or else - nextdiff > diff then -- nextdiff might be inexact for m in reverse k + 1 .. softreal'last loop sumtail := sumtail + n * pterm( m); end loop; -- total roundoff < 2 * the bound on the last rounding error if abs( diff) > prod then return diff - (prod + sumtail); -- two roundings else return (diff - sumtail) - prod; end if; end if; diff := nextdiff; -- difference is still exact end loop; return diff; end reduce_by_n_113; function reduce_by_n_90 (x: longest_float; n: longest_float) return longest_float is -- -- compute x - n * sum(pterm(k),k=1..12) -- erring by less than two roundoffs in the final result. -- x and n are assumed nonnegative. n has an nbit-length even integer value. -- long_float arithmetic is assumed to support model numbers of mantissa -- length between 64 and 90 bits. -- type softreal is array( 1 .. 12) of longest_float; -- -- The high precision eighth-period: -- -- sum(pterm(k),k=1..12) = pi/4 rounded to 235+73 bits, in 12 19-bit pieces. -- the extra accuracy in pterm12 is sometimes helpful, sometimes wasted. -- pterm: constant softreal := ( 16#0.c90fc0#e+0, 16#0.1aa220#e-4, 16#0.168c20#e-9, 16#0.34c4c0#e-14, 16#0.6628b0#e-19, 16#0.80dc00#e-24, 16#0.1cd128#e-28, 16#0.1024e0#e-33, 16#0.88a660#e-39, 16#0.1cc740#e-43, 16#0.20bbe8#e-49, 16#0.263b139b22514a08798e34#e-54); -- -- each product of a 46 bit even integer and a 19 bit pterm(k) computed below, -- except the smallest, lands exactly on a 64 bit model number, no roundoff. -- diff: longest_float := x; sumtail: longest_float := 0.0; prod, nextdiff: longest_float; begin -- reduce_by_n_90 for k in 1 .. softreal'last loop -- combine largest terms while the subtraction is still exact, -- then sum tail to combine with head all at once. prod := n * pterm( k); nextdiff := diff - prod; if nextdiff > prod or else - nextdiff > diff then -- nextdiff might be inexact for m in reverse k + 1 .. softreal'last loop sumtail := sumtail + n * pterm( m); end loop; -- total roundoff < 2 * the bound on the last rounding error if abs( diff) > prod then return diff - (prod + sumtail); -- two roundings else return (diff - sumtail) - prod; end if; end if; diff := nextdiff; -- difference is still exact end loop; return diff; end reduce_by_n_90; function reduce_by_n (x: longest_float; n: longest_float) return longest_float is begin if longest_float'machine_mantissa <= 90 then return reduce_by_n_90( x, n); else return reduce_by_n_113( x, n); end if; end reduce_by_n; function mod8 (x: longest_float) return octant_number is -- -- return x mod 8 for x having a nonnegative integer value. -- xo8: longest_float := x * (1.0 / 8.0); begin -- mod8 return octant_number( (xo8 - truncate( xo8)) * 8.0); end mod8; begin -- octant_reduce -- if ax too big to handle accurately then reduce by whole periods first while ax > toobig loop n := 8.0 * truncate_nbits( ax / (8.0 * p) * oneminus); -- n < ax / p ax := reduce_by_n( ax, n); end loop; -- -- now ax < pi/4*2**nbits -- n := truncate( ax / p); -- 29 or fewer bits odd_n := odd( n); if odd_n then -- odd octant, complement angle wrt pi/4 -- p - (ax - n*p) = -ax + (n+1)*p ax := - reduce_by_n( ax, n + 1.0); elsif n > 0.0 then -- for even nbr of octants, ax - n*p ax := reduce_by_n( ax, n); else -- n = 0 null; end if; if ax < 0.0 then -- initial n was off by 1 because of roundoff in ax/p r := - ax; if odd_n then n := n + 1.0; else n := n - 1.0; end if; elsif ax > p then -- initial n was off by 1 because of roundoff in ax/p -- want 2 * p - ax r := - reduce_by_n( ax, 2.0); if odd_n then n := n - 1.0; else n := n + 1.0; end if; else -- initial n correct, vast majority of cases r := ax; end if; if x >= 0.0 then nmod8 := mod8( n); else nmod8 := octant_number'last - mod8( n); end if; end octant_reduce; procedure period_reduce( x: in longest_float; p: in longest_float; r: out longest_float; n: out octant_number) is -- -- Period_reduce reduces x to the interval [0, p/8] with the high -- absolute accuracy needed by trigonometric functions -- which are subject to stringent relative accuracy requirements. -- -- X is the input angle, in units such that p is the cyclic period. -- Execution is erroneous if p is not positive. -- R is in 0.0 .. p/8 and, to within the accuracy described below, -- either (abs( x) - r) is equal to p/8 times an even integer -- or else (abs( x) - (p/8 - r)) is equal to p/8 times an odd integer. -- The angles in odd octants are complemented relative to p/8 so that -- small sines and cosines may be computed from small angles, -- i.e. r is small anywhere near the x and y coordinate axes. -- -- N is the octant number of x, in 0 .. 7, i.e. -- for x >= 0 n is truncate( x / (p/8)) mod 8, -- for x < 0 n is 7 - truncate( - x / (p/8)) mod 8. -- -- The reduced angle corresponding to (r,n) is rounded and -- correct up to the relative machine accuracy of r. -- All floating point arithmetic used is exact except possibly for -- (1) complementing r in an odd octant, computing (p/8 - r1), -- (2) the last reductions, if p/8 loses bits by denormalization. -- r1: longest_float := abs( x); exponent_diff: constant integer := exponent( r1) - exponent( p); p1: longest_float := scale( p, exponent_diff); -- p1 & r1 now have same exponent s: integer := exponent_diff + 3; octant: octant_number := 0; begin -- period reduce if s >= 0 then -- We repeatedly subtract (p times a power of 2) from r. loop if r1 >= p1 then -- It is assumed that the floating point subtraction of -- two positives (a-b) is exact if either -- (1) exponent(a) = exponent(b) or -- (2) exponent(a)-1 = exponent(b) >= exponent(a-b). -- This is the case for (r1-p1). r1 := r1 - p1; -- case s is when 0 => r1 := p1 - r1; -- odd octant, complement octant := octant + 1; when 1 => octant := octant + 2; when 2 => octant := octant + 4; when others => null; end case; end if; -- if r1 >= p1 exit when s = 0; s := s - 1; p1 := p1 * 0.5; -- could scale if that's faster end loop; -- down s r := r1; else r := r1; -- small angle, no reduction end if; -- if s nonnegative if x >= 0.0 then n := octant; else n := octant_number'last - octant; -- no effect on odd_octant end if; end period_reduce; procedure ln2_reduce( x: in longest_float; r: out longest_float; n: out integer) is -- -- Ln2_reduce reduces x to an interval of width approximately ln(2), -- with the high absolute accuracy needed by an exponential function which is -- subject to stringent relative accuracy requirements. -- -- This routine requires abs( x) < 11356.8 for spec accuracy. -- -- Upon return, x = r + n * ln(2), -- to within an absolute accuracy of epsilon (using the Ada model). -- Also, abs( r) <= nat_log_2 / 2 + 32768 * epsilon, -- and n is in the range -16384 .. 16384, -- -- The inexactness of the resultant (r,n) is caused entirely by -- (1) the negligible difference of about 2.6e-47 between ln(2) and ln2, -- (2) the two possible roundings in producing the result r itself. -- ln2_a: constant := 16#0.b17217f7d1cf4#e+0; ln2_b: constant := 16#0.39abc9e3b3980#e-12; ln2_c: constant := 16#0.3f2f6af40f343267298b62d8a0d18#e-25; -- ln2_a+ln2_b+ln2_c is ln(2) accurate to 153+62 bits, in 3 50-bit pieces. nrd: longest_float := floor( x / nat_log_2 + 0.5); begin -- ln2_reduce n := integer( nrd); r := ((x - ln2_a * nrd) - ln2_b * nrd) - ln2_c * nrd; -- The multiplications did happen exactly, using model numbers, -- except for the c term which is least significant. -- The a term subtracts exactly. The c term is so small -- that a bound on the absolute error can depend only on the size -- of the result of the second subtraction. So, two roundoffs -- make the absolute error < 1*epsilon, pessimistically. -- -- if we wanted to eliminate slop in the output interval, -- then just test r, adjust n, and repeat the reduction. end ln2_reduce; ---------------------------------------------------------------------- -- implementations of user-callable fns function sqrt( x : longest_float) return longest_float is -- -- Declaration: -- function SQRT (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- SQRT(X) ~ sqrt(X) -- Usage: -- Z := SQRT(X); -- Domain: -- X >= 0.0 -- Range: -- SQRT(X) >= 0.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- (b) SQRT(0.0) = 0.0 -- Notes: -- (a) The upper bound of the reachable range of SQRT is approximately given by -- SQRT(X)<=sqrt(FLOAT_TYPE'SAFE_LARGE) -- (b) Other standards might impose additional constraints on SQRT. For -- example, the IEEE standards for binary and radix-independent -- floating-point arithmetic require greater accuracy in the result of SQRT -- than this standard requires, and they require that SQRT(-0.0)=-0.0. -- An implementation of SQRT in GENERIC_ELEMENTARY_FUNCTIONS that -- conforms to this standard will conform to those other standards if it -- satisfies their additional constraints. -- begin if x > 0.0 then return square_root( x); elsif x = 0.0 then return x; else raise argument_error; end if; end sqrt; function log (x : longest_float) return longest_float is -- -- Declaration: -- function LOG (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- LOG(X) ~ ln(X) -- Usage: -- Z := LOG(X); -- natural logarithm -- Domain: -- X > 0.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) LOG(1.0) = 0.0 -- Notes: -- The reachable range of LOG is approximately given by -- ln(FLOAT_TYPE'SAFE_SMALL) <= LOG(X) <= ln(FLOAT_TYPE'SAFE_LARGE) -- begin if x > 0.0 then -- log_2( 1.0) = 0.0 return log_2( x) * nat_log_2; else raise argument_error; end if; end log; function log (x, base : longest_float) return longest_float is -- -- Declaration: -- function LOG (X, BASE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- LOG(X,BASE) ~ log to base BASE of X -- Usage: -- Z := LOG(X, 10.0); -- base 10 logarithm -- Z := LOG(X, 2.0); -- base 2 logarithm -- Z := LOG(X, BASE); -- base BASE logarithm -- Domain: -- (a) X > 0.0 -- (b) BASE > 0.0 -- (c) BASE /= 1.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) LOG(1.0,BASE) = 0.0 -- Notes: -- (a) When BASE > 1.0, the reachable range of LOG is approximately given by -- log to base BASE of FLOAT_TYPE'SAFE_SMALL <= LOG(X,BASE) <= -- log to base BASE of FLOAT_TYPE'SAFE_LARGE -- (b) When 0.0 < BASE < 1.0, the reachable range of LOG is approximately given by -- log to base BASE of FLOAT_TYPE'SAFE_LARGE <= LOG(X,BASE) <= -- log to base BASE of FLOAT_TYPE'SAFE_SMALL -- begin if x > 0.0 and base > 0.0 and base /= 1.0 then -- log_2( 1.0) = 0.0 return log_2( x) / log_2( base); else raise argument_error; end if; end log; function exp (x : longest_float) return longest_float is -- -- Declaration: -- function EXP (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- EXP(X) ~ e raised to the X power -- Usage: -- Z := EXP(X); -- e raised to the power X -- Domain: -- Mathematically unbounded -- Range: -- EXP(X) >= 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) EXP(0.0) = 1.0 -- Notes: -- The usable domain of EXP is approximately given by -- X <= ln(FLOAT_TYPE'SAFE_LARGE) -- minarg: constant longest_float := fminexp * nat_log_2; maxarg: constant longest_float := fmaxexp * nat_log_2; begin if x >= maxarg then raise overflow_error; elsif x < minarg then return 0.0; else declare r: longest_float; n: integer; begin ln2_reduce( x, r, n); return scale( reduced_two_to_the( r / nat_log_2), n); end; end if; end exp; function "**" (x, y : longest_float) return longest_float is -- -- Declaration: -- function "**" (X, Y : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- X ** Y ~ X raised to the power Y -- Usage: -- Z := X ** Y; -- X raised to the power Y -- Domain: -- (a) X >= 0.0 -- (b) Y > 0.0 when X = 0.0 -- Range: -- X ** Y >= 0.0 -- Accuracy: -- (a) Maximum relative error (when X > 0.0) = -- (4.0+|Y*ln(X)|/32.0) * FLOAT_TYPE'BASE'EPSILON -- (b) X ** 0.0 = 1.0 when X > 0.0 -- (c) 0.0 ** Y = 0.0 when Y > 0.0 -- (d) X ** 1.0 = X -- (e) 1.0 ** Y =1.0 -- Notes: -- The usable domain of "**", when X > 0.0, is approximately the set of -- values for X and Y satisfying -- Y*ln(X) <= ln(FLOAT_TYPE'SAFE_LARGE) -- This imposes a positive upper bound on Y (as a function of X) when -- X > 1.0, and a negative lower bound on Y (as a function of X) when -- 0.0 < X < 1.0. -- function half_precision (r: longest_float) return longest_float is -- truncate to half as many bits in the mantissa begin return leading_part( r, longest_float'machine_mantissa / 2); end half_precision; pragma inline( half_precision); begin -- function "**" if x = 1.0 then return 1.0; -- 1**y elsif x > 0.0 then if y = 0.0 then return 1.0; -- x**0 elsif y = 1.0 then return x; -- x**1 else declare -- general case of 2**( y*log_2( x)) sy: longest_float := y; fex, ma, logx: longest_float; begin log_2_parts( fex, ma, x); logx := fex + ma; -- If y*log_2( x) overflows positively, we don't care -- at what point the overflow exception happens. -- Large negative values of y*log_2( x) are detected by -- the following conservative test, which is equivalent to -- sy * logx < 2 * fminexp, but will not overflow, -- because abs( logx) < 2 * abs( fminexp). if (sy / (2.0 * fminexp)) * logx > 1.0 then return 0.0; -- underflow else declare -- y*log_2( x) must have 10 extra bits of accuracy -- to get full precision in the mantissa of the result. -- so we split up the factors in two parts y1: constant longest_float := half_precision( sy); y2: constant longest_float := sy - y1; -- sy = y1 + y2 exactly w1: constant longest_float := half_precision( logx); w2: constant longest_float := (fex - w1) + ma; -- fex + ma = w1 + w2 exactly -- -- what we need is (y1+y2)*(w1+w2) very accurately y1w1: constant longest_float := y1*w1; -- exactly n: constant longest_float := floor( y1w1 + 0.5); frac: constant longest_float := (y1w1 - n) + (sy * w2 + y2 * w1); -- rounded to within 2**-longest_float'mantissa, -- frac is nearly confined to [-.5,.5] . begin return scale( reduced_two_to_the( frac), integer( n)); end; end if; end; end if; elsif x = 0.0 and y > 0.0 then return 0.0; -- 0**y else raise argument_error; -- x < 0 or x = 0 and y <= 0 end if; end "**"; function sin (x : longest_float) return longest_float is -- -- Declaration: -- function SIN (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- SIN(X) ~ sin(X) -- Usage: -- Z := SIN(X); -- X in radians -- Domain: -- Mathematically unbounded -- Range: -- |SIN(X)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- when |X| is less than or equal to some documented implementation-dependent -- threshold, which must not be less than -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2) -- For larger values of |X|, degraded accuracy is allowed. An implementation -- must document its behavior for large |X|. -- (b) SIN(0.0) = 0.0 -- r: longest_float; oct: octant_number; begin octant_reduce( x, r, oct); case oct is when 0| 3 => return reduced_sin( r); when 1| 2 => return reduced_cos( r); when 4| 7 => return - reduced_sin( r); when 5| 6 => return - reduced_cos( r); end case; end sin; function sin (x, cycle : longest_float) return longest_float is -- -- Declaration: -- function SIN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- SIN(X,CYCLE) ~ sin(2Pi * X/CYCLE) -- Usage: -- Z := SIN(X, 360.0); -- X in degrees -- Z := SIN(X, 1.0); -- X in bams (binary angular measure) -- Z := SIN(X, CYCLE); -- X in units such that one complete -- -- cycle of rotation corresponds to -- -- X = CYCLE -- Domain: -- (a) X mathematically unbounded -- (b) CYCLE > 0.0 -- Range: -- |SIN(X,CYCLE)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- (b) For integer k, SIN(X,CYCLE)= 0.0 when X=k*CYCLE/2.0 -- 1.0 when X=(4k+1)*CYCLE/4.0 -- -1.0 when X=(4k+3)*CYCLE/4.0 -- r: longest_float; oct: octant_number; begin if cycle > 0.0 then period_reduce( x, cycle, r, oct); r := r / cycle * two_pi; -- underflow denormal allowed case oct is when 0| 3 => return reduced_sin( r); when 1| 2 => return reduced_cos( r); when 4| 7 => return - reduced_sin( r); when 5| 6 => return - reduced_cos( r); end case; else raise argument_error; end if; end sin; function cos (x : longest_float) return longest_float is -- -- Declaration: -- function COS (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COS(X) ~ cos(X) -- Usage: -- Z := COS(X); -- X in radians -- Domain: -- Mathematically unbounded -- Range: -- |COS(X)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- when |X| is less than or equal to some documented implementation-dependent -- threshold, which must not be less than -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2) -- For larger values of |X|, degraded accuracy is allowed. An implementation -- must document its behavior for large |X|. -- (b) COS(0.0) = 1.0 -- r: longest_float; oct: octant_number; begin octant_reduce( x, r, oct); case oct is when 0| 7 => return reduced_cos( r); when 1| 6 => return reduced_sin( r); when 2| 5 => return - reduced_sin( r); when 3| 4 => return - reduced_cos( r); end case; end cos; function cos (x, cycle : longest_float) return longest_float is -- -- Declaration: -- function COS (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COS(X,CYCLE) ~ cos(2Pi*X/CYCLE) -- Usage: -- Z := COS(X, 360.0); -- X in degrees -- Z := COS(X, 1.0); -- X in bams -- Z := COS(X, CYCLE); -- X in units such that one complete -- -- cycle of rotation corresponds to -- -- X = CYCLE -- Domain: -- (a) X mathematically unbounded -- (b) CYCLE > 0.0 -- Range: -- |COS(X,CYCLE)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- (b) For integer k, COS(X,CYCLE) = 1.0 when X=k*CYCLE -- 0.0 when X=(2k+1)*CYCLE/4.0 -- -1.0 when X=(2k+1)*CYCLE/2.0 -- r: longest_float; oct: octant_number; begin if cycle > 0.0 then period_reduce( x, cycle, r, oct); r := r / cycle * two_pi; -- underflow denormal allowed case oct is when 0| 7 => return reduced_cos( r); when 1| 6 => return reduced_sin( r); when 2| 5 => return - reduced_sin( r); when 3| 4 => return - reduced_cos( r); end case; else raise argument_error; end if; end cos; function tan (x : longest_float) return longest_float is -- -- Declaration: -- function TAN (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- TAN(X) ~ tan(X) -- Usage: -- Z := TAN(X); -- X in radians -- Domain: -- Mathematically unbounded -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- when |X| is less than or equal to some documented implementation-dependent -- threshold, which must not be less than -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2) -- For larger values of |X|, degraded accuracy is allowed. An implementation -- must document its behavior for large |X|. -- (b) TAN(0.0) = 0.0 -- r: longest_float; oct: octant_number; begin octant_reduce( x, r, oct); case oct is when 0| 4 => return reduced_sin( r) / reduced_cos( r); when 1| 5 => if r /= 0.0 then return reduced_cos( r) / reduced_sin( r); else -- x must have been huge return 0.0; end if; when 2| 6 => if r /= 0.0 then return - reduced_cos( r) / reduced_sin( r); else -- x must have been huge return 0.0; end if; when 3| 7 => return - reduced_sin( r) / reduced_cos( r); end case; end tan; function tan (x, cycle : longest_float) return longest_float is -- -- Declaration: -- function TAN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- TAN(X,CYCLE) ~ tan(2Pi*X/CYCLE) -- Usage: -- Z := TAN(X, 360.0); -- X in degrees -- Z := TAN(X, 1.0); -- X in bams -- Z := TAN(X, CYCLE); -- X in units such that one complete -- -- cycle of rotation corresponds to -- -- X = CYCLE -- Domain: -- (a) X /= (2k+1)*CYCLE/4.0, for integer k -- (b) CYCLE > 0.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) TAN(X,CYCLE) = 0.0 when X=k*CYCLE/2.0, for integer k -- r: longest_float; oct: octant_number; begin if cycle > 0.0 then period_reduce( x, cycle, r, oct); r := r / cycle * two_pi; -- underflow denormal allowed case oct is when 0| 4 => return reduced_sin( r) / reduced_cos( r); when 1| 5 => if r /= 0.0 then return reduced_cos( r) / reduced_sin( r); else raise argument_error; end if; when 2| 6 => if r /= 0.0 then return - reduced_cos( r) / reduced_sin( r); else raise argument_error; end if; when 3| 7 => return - reduced_sin( r) / reduced_cos( r); end case; else raise argument_error; end if; end tan; function cot (x : longest_float) return longest_float is -- -- Declaration: -- function COT (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COT(X) ~ cot(X) -- Usage: -- Z := COT(X); -- X in radians -- Domain: -- X /= 0.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- when |X| is less than or equal to some documented implementation-dependent -- threshold, which must not be less than -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2) -- For larger values of |X|, degraded accuracy is allowed. An implementation -- must document its behavior for large |X|. -- r: longest_float; oct: octant_number; begin if x /= 0.0 then octant_reduce( x, r, oct); case oct is when 0| 4 => if r /= 0.0 then return reduced_cos( r) / reduced_sin( r); else -- x must have been huge return 0.0; end if; when 1| 5 => return reduced_sin( r) / reduced_cos( r); when 2| 6 => return - reduced_sin( r) / reduced_cos( r); when 3| 7 => if r /= 0.0 then return - reduced_cos( r) / reduced_sin( r); else -- x must have been huge return 0.0; end if; end case; else raise argument_error; end if; end cot; function cot (x, cycle : longest_float) return longest_float is -- -- Declaration: -- function COT (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COT(X,CYCLE) ~ cot(2Pi*X/CYCLE) -- Usage: -- Z := COT(X, 360.0); -- X in degrees -- Z := COT(X, 1.0); -- X in bams -- Z := COT(X, CYCLE); -- X in units such that one complete -- -- cycle of rotation corresponds to -- -- X = CYCLE -- Domain: -- (a) X /= k*CYCLE/2.0, for integer k -- (b) CYCLE > 0.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) COT(X,CYCLE) = 0.0 when X=(2k+1)*CYCLE/4.0, for integer k -- r: longest_float; oct: octant_number; begin if cycle > 0.0 then period_reduce( x, cycle, r, oct); r := r / cycle * two_pi; -- underflow denormal allowed case oct is when 0| 4 => if r /= 0.0 then return reduced_cos( r) / reduced_sin( r); else raise argument_error; end if; when 1| 5 => return reduced_sin( r) / reduced_cos( r); when 2| 6 => return - reduced_sin( r) / reduced_cos( r); when 3| 7 => if r /= 0.0 then return - reduced_cos( r) / reduced_sin( r); else raise argument_error; end if; end case; else raise argument_error; end if; end cot; function arcsin (x : longest_float) return longest_float is -- -- Declaration: -- function ARCSIN (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCSIN(X) ~ arcsine(X) -- Usage: -- Z := ARCSIN(X); -- Z in radians -- Domain: -- |X| <= 1.0 -- Range: -- |ARCSIN(X)| <= Pi/2 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCSIN(0.0) = 0.0 -- (c) ARCSIN(1.0) = Pi/2 -- (d) ARCSIN(-1.0) = -Pi/2 -- Notes: -- - Pi/2 and Pi/2 are not safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if abs( x) <= 1.0 then return arctan( y => x, x => complement( x)); else raise argument_error; end if; end arcsin; function arcsin (x, cycle : longest_float) return longest_float is -- -- Declaration: -- function ARCSIN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCSIN(X,CYCLE) ~ arcsin(X)*CYCLE/2Pi -- Usage: -- Z := ARCSIN(X, 360.0); -- Z in degrees -- Z := ARCSIN(X, 1.0); -- Z in bams -- Z := ARCSIN(X, CYCLE); -- Z in units such that one complete -- -- cycle of rotation corresponds to -- -- Z = CYCLE -- Domain: -- (a) |X| <= 1.0 -- (b) CYCLE > 0.0 -- Range: -- |ARCSIN(X,CYCLE) <= CYCLE/4.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCSIN(0.0,CYCLE) = 0.0 -- (c) ARCSIN(1.0,CYCLE) = CYCLE/4.0 -- (d) ARCSIN(-1.0,CYCLE) = -CYCLE/4.0 -- Notes: -- - CYCLE/4.0 and CYCLE/4.0 -- might not be safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if abs( x) <= 1.0 and cycle > 0.0 then return arctan( y => x, x => complement( x), cycle => cycle); else raise argument_error; end if; end arcsin; function arccos (x : longest_float) return longest_float is -- -- Declaration: -- function ARCCOS (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCCOS(X) ~ arccos(X) -- Usage: -- Z := ARCCOS(X); -- Z in radians -- Domain: -- |X| <= 1.0 -- Range: -- 0.0 <= ARCCOS(X) <= Pi -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOS(1.0) = 0.0 -- (c) ARCCOS(0.0) = Pi/2 -- (d) ARCCOS(-1.0) = Pi -- Notes: -- Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if abs( x) <= 1.0 then return arctan( y => complement( x), x => x); else raise argument_error; end if; end arccos; function arccos (x, cycle : longest_float) return longest_float is -- -- Declaration: -- function ARCCOS (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCCOS(X,CYCLE) ~ arccos(X)*CYCLE/2Pi -- Usage: -- Z := ARCCOS(X, 360.0); -- Z in degrees -- Z := ARCCOS(X, 1.0); -- Z in bams -- Z := ARCCOS(X, CYCLE); -- Z in units such that one complete -- -- cycle of rotation corresponds to -- -- Z = CYCLE -- Domain: -- (a) |X| <= 1.0 -- (b) CYCLE > 0.0 -- Range: -- 0.0 <= ARCCOS(X,CYCLE) <= CYCLE/2.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOS(1.0,CYCLE) = 0.0 -- (c) ARCCOS(0.0,CYCLE) = CYCLE/4.0 -- (d) ARCCOS(-1.0,CYCLE) = CYCLE/2.0 -- Notes: -- CYCLE/4.0 and CYCLE/2.0 -- might not be safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if abs( x) <= 1.0 and cycle > 0.0 then return arctan( y => complement( x), x => x, cycle => cycle); else raise argument_error; end if; end arccos; function arctan (y : longest_float; x : longest_float := 1.0) return longest_float is -- -- Declaration: -- function ARCTAN (Y : FLOAT_TYPE; -- X : FLOAT_TYPE := 1.0) return FLOAT_TYPE; -- Definition: -- (a) ARCTAN(Y) ~ arctan(Y) -- (b) ARCTAN(Y,X) ~ arctan(Y/X) when X >= 0.0 -- arctan(Y/X)+Pi when X < 0.0 and Y >= 0.0 -- arctan(Y/X)-Pi when X < 0.0 and Y < 0.0 -- Usage: -- Z := ARCTAN(Y); -- Z, in radians, is the angle (in the -- -- quadrant containing the point (1.0,Y)) -- -- whose tangent is Y -- Z := ARCTAN(Y, X); -- Z, in radians, is the angle (in the -- -- quadrant containing the point (X,Y)) -- -- whose tangent is Y/X -- Domain: -- X /= 0.0 when Y = 0.0 -- Range: -- (a) |ARCTAN(Y)| <= Pi/2 -- (b) 0.0 < ARCTAN(Y,X) <= Pi when Y >= 0.0 -- (c) -Pi <= ARCTAN(Y,X) <= 0.0 when Y < 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCTAN(0.0) = 0.0 -- (c) ARCTAN((0.0,X) = 0.0 when X > 0.0 -- Pi when X < 0.0 -- (d) ARCTAN(Y,0.0) = Pi/2 when Y > 0.0 -- -Pi/2 when Y < 0.0 -- Notes: -- -Pi,-Pi/2,Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if x = 0.0 and y = 0.0 then raise argument_error; elsif x >= y then if x >= - y then return reduced_arctan( y / x); -- near +x axis else return - half_pi - reduced_arctan( x / y); -- near -y axis end if; elsif x >= - y then return half_pi - reduced_arctan( x / y); -- near +y axis elsif y >= 0.0 then return pi + reduced_arctan( y / x); -- just above -x axis else return - pi + reduced_arctan( y / x); -- just below -x axis end if; end arctan; function arctan (y : longest_float; x : longest_float := 1.0; cycle : longest_float) return longest_float is -- -- Declaration: -- function ARCTAN (Y : FLOAT_TYPE; -- X : FLOAT_TYPE := 1.0; -- CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- (a) ARCTAN(Y,CYCLE) ~ arctan(Y)*CYCLE/2Pi -- (b) ARCTAN(Y,X,CYCLE) ~ arctan(Y/X)*CYCLE/2Pi when X >= 0.0 -- (arctan(Y/X)+Pi)*CYCLE/2Pi when X < 0.0 and Y >= 0.0 -- (arctan(Y/X)-Pi)*CYCLE/2Pi when X < 0.0 and Y < 0.0 -- Usage: -- Z := ARCTAN(Y, CYCLE => 360.0); -- Z, in degrees, is the -- -- angle (in the quadrant -- -- containing the point -- -- (1.0,Y)) whose tangent is Y -- Z := ARCTAN(Y, CYCLE => 1.0); -- Z, in bams, is the -- -- angle (in the quadrant -- -- containing the point -- -- (1.0,Y)) whose tangent is Y -- Z := ARCTAN(Y, CYCLE => CYCLE); -- Z, in units such that one -- -- complete cycle of rotation -- -- corresponds to Z = CYCLE, -- -- is the angle (in the -- -- quadrant containing the -- -- point (1.0,Y)) whose -- -- tangent is Y -- Z := ARCTAN(Y, X, 360.0); -- Z, in degrees, is the -- -- angle (in the quadrant -- -- containing the point (X,Y)) -- -- whose tangent is Y/X -- Z := ARCTAN(Y, X, 1.0); -- Z, in bams, is the -- -- angle (in the quadrant -- -- containing the point (X,Y)) -- -- whose tangent is Y/X -- Z := ARCTAN(Y, X, CYCLE); -- Z, in units such that one -- -- complete cycle of rotation -- -- corresponds to Z = CYCLE, -- -- is the angle (in the -- -- quadrant containing the -- -- point (X,Y)) whose -- -- tangent is Y/X -- Domain: -- (a) X /= 0.0 when Y = 0.0 -- (b) CYCLE > 0.0 -- Range: -- (a) |ARCTAN(Y,CYCLE)| <= CYCLE/4.0 -- (b) 0.0 <= ARCTAN(Y,X,CYCLE) <= CYCLE/2.0 when Y >= 0.0 -- (c) -CYCLE/2.0 <= ARCTAN(Y,X,CYCLE) <= 0.0 when Y < 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCTAN(0.0,CYCLE) = 0.0 -- (c) ARCTAN(0.0,X,CYCLE) = 0.0 when X > 0.0 -- CYCLE/2.0 when X < 0.0 -- (d) ARCTAN(Y,0.0,CYCLE) = CYCLE/4.0 when Y > 0.0 -- -CYCLE/4.0 when Y < 0.0 -- Notes: -- -CYCLE/2.0, -CYCLE/4.0, CYCLE/4.0 and CYCLE/2.0 -- might not be safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- scale: constant longest_float := cycle / two_pi; begin -- what happens if scale underflows? cycle/4 underflows? if cycle <= 0.0 or else (x = 0.0 and y = 0.0) then raise argument_error; elsif x >= y then if x >= - y then -- near +x axis return scale * reduced_arctan( y / x); else -- near -y axis return - cycle / 4.0 - scale * reduced_arctan( x / y); end if; elsif x >= - y then -- near +y axis return cycle / 4.0 - scale * reduced_arctan( x / y); elsif y >= 0.0 then -- just above -x axis return cycle / 2.0 + scale * reduced_arctan( y / x); else -- just below -x axis return - cycle / 2.0 + scale * reduced_arctan( y / x); end if; end arctan; function arccot (x : longest_float; y : longest_float := 1.0) return longest_float is -- -- Declaration: -- function ARCCOT (X : FLOAT_TYPE; -- Y : FLOAT_TYPE := 1.0) return FLOAT_TYPE; -- Definition: -- (a) ARCCOT(X) ~ arccot(X) -- (b) ARCCOT(X,Y) ~ arccot(X/Y) when Y >= 0.0 -- arccot(X/Y)-Pi when Y < 0.0 -- Usage: -- Z := ARCCOT(X); -- Z, in radians, is the angle (in the -- -- quadrant containing the point (X,1.0) -- -- whose cotangent is X -- Z := ARCCOT(X, Y); -- Z, in radians, is the angle (in the -- -- quadrant containing the point (X,Y)) -- -- whose cotangent is X/Y -- Domain: -- Y /= 0.0 when X = 0.0 -- Range: -- (a) 0.0 <= ARCCOT(X) <= Pi -- (b) 0.0 <= ARCCOT(X,Y) <= Pi when Y >= 0.0 -- (c) -Pi <= ARCCOT(X,Y) <= 0.0 when Y < 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOT(0.0) = Pi/2 -- (c) ARCCOT(0.0,Y) = Pi/2 when Y > 0.0 -- -Pi/2 when Y < 0.0 -- (d) ARCCOT(X,0.0) = 0.0 when X > 0.0 -- Pi when X < 0.0 -- Notes: -- -Pi,-Pi/2,Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (b), (c), or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin return arctan( y => y, x => x); end arccot; function arccot (x : longest_float; y : longest_float := 1.0; cycle : longest_float) return longest_float is -- -- Declaration: -- function ARCCOT (X : FLOAT_TYPE; -- Y : FLOAT_TYPE := 1.0; -- CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- (a) ARCCOT(X,CYCLE) ~ arccot(X)*CYCLE/2Pi -- (b) ARCCOT(X,Y) ~ arccot(X/Y)*CYCLE/2Pi when Y >= 0.0 -- (arccot(X/Y)-Pi)*CYCLE/2Pi -- Usage: -- Z := ARCCOT(X, CYCLE => 360.0); -- Z, in degrees, is the -- -- angle (in the quadrant -- -- containing the point -- -- (X,1.0)) whose cotangent -- -- is X -- Z := ARCCOT(X, CYCLE => 1.0); -- Z, in bams, is the -- -- angle (in the quadrant -- -- containing the point -- -- (X,1.0)) whose cotangent -- -- is X -- Z := ARCCOT(X, CYCLE => CYCLE); -- Z, in units such that one -- -- complete cycle of rotation -- -- corresponds to Z = CYCLE, -- -- is the angle (in the -- -- quadrant containing the -- -- point (X,1.0)) whose -- -- cotangent is X -- Z := ARCCOT(X, Y, 360.0); -- Z, in degrees, is the -- -- angle (in the quadrant -- -- containing the point (X,Y)) -- -- whose cotangent is X/Y -- Z := ARCCOT(X, Y, 1.0); -- Z, in bams, is the -- -- angle (in the quadrant -- -- containing the point (X,Y) -- -- whose cotangent is X/Y -- Z := ARCCOT(X, Y, CYCLE); -- Z, in units such that one -- -- complete cycle of rotation -- -- corresponds to Z = CYCLE -- -- is the angle (in the -- -- quadrant containing the -- -- point (X,Y)) whose -- -- cotangent is X/Y -- Domain: -- (a) Y /= 0.0 when X = 0.0 -- (b) CYCLE > 0.0 -- Range: -- (a) 0.0 <= ARCCOT((X,CYCLE) <= CYCLE/2.0 -- (b) 0.0 <= ARCCOT(X,Y,CYCLE) <= CYCLE/2.0 when Y >= 0.0 -- (c) -CYCLE/2.0 <= ARCCOT(X,Y,CYCLE) <= 0.0 when Y < 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOT(0.0,CYCLE) = CYCLE/4.0 -- (c) ARCCOT(0.0,Y,CYCLE) = CYCLE/4.0 when Y > 0.0 -- -CYCLE/4.0 when Y < 0.0 -- (d) ARCCOT(X,0.0,CYCLE) = 0.0 when X > 0.0 -- CYCLE/2.0 when X < 0.0 -- Notes: -- - CYCLE/2.0, - CYCLE/4.0, CYCLE/4.0 and CYCLE/2.0 -- might not be safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (b), (c), or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin return arctan( y => y, x => x, cycle => cycle); end arccot; function sinh (x : longest_float) return longest_float is -- -- Declaration: -- function SINH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- SINH(X) ~ sinh X -- Usage: -- Z := SINH(X); -- Domain: -- Mathematically unbounded -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) SINH(0.0) = 0.0 -- Notes: -- The usable domain of SINH is approximately given by -- |X| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0) -- ax: constant longest_float := abs( x); meet_exp: constant := (mant + 1) * nat_log_2 / 2.0; begin if ax > meet_exp then -- essentially +-exp(ax)/2, but must round and overflow properly declare ex: constant longest_float := (exp_1 / 2.0) * exp( ax - 1.0); begin if x >= 0.0 then return ex; else return - ex; end if; end; elsif abs( x) <= 0.25 then -- small args need an economized taylor series, relerr<4.7e-21. declare a1: constant := 1.00000000000000000e+00; a3: constant := 1.66666666666666672e-01; a5: constant := 8.33333333333232737e-03; a7: constant := 1.98412698481368145e-04; a9: constant := 2.75572980364608572e-06; a11: constant := 2.50822360881915096e-08; x2: constant longest_float := x * x; begin return x * (a1 + x2 * (a3 + x2 * (a5 + x2 * (a7 + x2 * (a9 + x2 * a11))))); end; else -- 1/4 < abs x <= meet_exp declare ex: constant longest_float := exp( x); begin return 0.5 * (ex - 1.0 / ex); end; end if; end sinh; function cosh (x : longest_float) return longest_float is -- -- Declaration: -- function COSH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COSH(X) ~ cosh X -- Usage: -- Z := COSH(X); -- Domain: -- Mathematically unbounded -- Range: -- COSH(X) >= 1.0 -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) COSH(0.0) = 1.0 -- Notes: -- The usable domain of COSH is approximately given by -- |X| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0) -- ax: constant longest_float := abs( x); meet_exp: constant := (mant + 1) * nat_log_2 / 2.0; begin if ax > meet_exp then -- essentially exp(ax)/2, but must round and overflow properly return (exp_1 / 2.0) * exp( ax - 1.0); else declare ex: constant longest_float := exp( x); -- moderate size begin return 0.5 * (ex + 1.0 / ex); end; end if; end cosh; function tanh (x : longest_float) return longest_float is -- -- Declaration: -- function TANH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- TANH(X) ~ tanh X -- Usage: -- Z := TANH(X); -- Domain: -- Mathematically unbounded -- Range: -- |TANH(X)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) TANH(0.0) = 0.0 -- ax: constant longest_float := abs( x); meet_exp: constant := (mant + 1) * nat_log_2 / 2.0; begin -- check whether exp(-ax) negligible wrt exp(ax) if ax > meet_exp then if x >= 0.0 then return 1.0; else return -1.0; end if; elsif ax <= 0.25 then -- small args need an economized taylor series, relerr<4.2e-18. declare x2: constant longest_float := x * x; a0: constant := 1.00000000000000000e+00; a2: constant := -3.33333333333325599e-01; a4: constant := 1.33333333330730219e-01; a6: constant := -5.39682536340966024e-02; a8: constant := 2.18694674471973313e-02; a10: constant := -8.86251132346534132e-03; a12: constant := 3.57829492817945873e-03; a14: constant := -1.31685489581880250e-03; begin return x * (a0 + x2 * (a2 + x2 * (a4 + x2 * (a6 + x2 * (a8 + x2 * (a10 + x2 * (a12 + x2 * a14))))))); end; else -- 1/4 < abs x <= meet_exp declare ex: longest_float := exp( x); begin return (ex - 1.0 / ex) / (ex + 1.0 / ex); end; end if; end tanh; function coth (x : longest_float) return longest_float is -- -- Declaration: -- function COTH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COTH(X) ~ coth X -- Usage: -- Z := COTH(X); -- Domain: -- X /= 0.0 -- Range: -- |COTH(X)| >= 1.0 -- Accuracy: -- Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- begin if x /= 0.0 then return 1.0 / tanh( x); else raise argument_error; end if; end coth; function arcsinh (x : longest_float) return longest_float is -- -- Declaration: -- function ARCSINH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCSINH(X) ~ arcsinh X -- Usage: -- Z := ARCSINH(X); -- Domain: -- Mathematically unbounded -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCSINH(0.0) = 0.0 -- Notes: -- The reachable range of ARCSINH is approximately given by -- |ARCSINH(X)| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0) -- ax: longest_float := abs( x); as: longest_float; reduce_edge: constant := sqrt_2 / 4.0; -- see reduced_logdel_2 for its domain definition begin if ax >= recip_sqrt_epsilon then as := 1.0 + log_2( ax); elsif ax >= reduce_edge then as := log_2( ax + tan_to_sec( ax)); elsif ax > longest_float'base'epsilon then -- ax < sqrt(1/8) -- z = x+sqrt(1+x**2) -- (z-1)/(z+1) = x/(1+sqrt(1+x**2)) as := reduced_logdel_2( ax / (1.0 + tan_to_sec( ax))); else return x; -- sign already correct for small args end if; if x >= 0.0 then return nat_log_2 * as; else return - nat_log_2 * as; end if; end arcsinh; function arccosh (x : longest_float) return longest_float is -- -- Declaration: -- function ARCCOSH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCCOSH(X) ~ arccosh X -- Usage: -- Z := ARCCOSH(X); -- Domain: -- X >= 1.0 -- Range: -- ARCCOSH(X) >= 0.0 -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOSH(1.0) = 0.0 -- Notes: -- The upper bound of the reachable range of ARCCOSH is approximately given -- by ARCCOSH(X) <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0) -- reduce_edge: constant := 0.75 * sqrt_2; -- see reduced_logdel_2 for its domain definition begin if x >= recip_sqrt_epsilon then return nat_log_2 * (1.0 + log_2( x)); elsif x >= reduce_edge then return nat_log_2 * log_2( x + sec_to_tan( x)); elsif x >= 1.0 then -- z = x+sqrt(x**2-1) -- (z-1)/(z+1) = sqrt((x-1)/(x+1)) return nat_log_2 * reduced_logdel_2( sqrt( (x - 1.0) / (x + 1.0) )); else raise argument_error; end if; end arccosh; function arctanh (x : longest_float) return longest_float is -- -- Declaration: -- function ARCTANH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCTANH(X) ~ arctanh X -- Usage: -- Z := ARCTANH(X); -- Domain: -- |X| < 1.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCTANH(0.0) = 0.0 -- ax: constant longest_float := abs( x); half_ln_2: constant := nat_log_2 / 2.0; reduce_edge: constant := 1.0 / (3.0 + 2.0 * sqrt_2); -- see reduced_logdel_2 for its domain definition begin if ax >= 1.0 then raise argument_error; elsif ax < reduce_edge then return reduced_logdel_2( x) * half_ln_2; else return log_2( (1.0 + x) / (1.0 - x)) * half_ln_2; end if; end arctanh; function arccoth (x : longest_float) return longest_float is -- -- Declaration: -- function ARCCOTH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCCOTH(X) ~ arccoth X -- Usage: -- Z := ARCCOTH(X); -- Domain: -- |X| > 1.0 -- Range: -- Mathematically unbounded -- Accuracy: -- Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- ax: constant longest_float := abs( x); half_ln_2: constant := nat_log_2 / 2.0; reduce_edge: constant := 3.0 + 2.0 * sqrt_2; -- see reduced_logdel_2 for its domain definition begin if ax <= 1.0 then raise argument_error; elsif ax > reduce_edge then return reduced_logdel_2( 1.0 / x) * half_ln_2; else -- 1 < ax <= 3+2*sqrt2 return log_2( (x + 1.0) / (x - 1.0)) * half_ln_2; end if; end arccoth; end longest_float_elementary_functions; -- $Header: l_elementary_functions_b.a,v 3.24 91/04/04 13:28:58 broman Exp $