-- ----------------------------------------------------------------------- -- Title: longest_float_primitive_functions -- Last Mod: Fri Mar 29 14:18:21 1991 -- Author: Vincent Broman -- Copyright 1990 Vincent Broman -- Permission granted to copy, modify, or compile this software for -- one's own use, provided that this copyright notice is preserved intact. -- Permission granted to distribute compiled binary copies of this -- software which are linked in with some other application. -- Permission granted to distribute other copies of this software, -- provided that (1) any copy which is not source code, i.e. not in the -- form in which the software is usually maintained, must be accompanied -- by a copy of the source code from which it was compiled, and (2) the -- one distributing it must refrain from imposing on the recipient -- further restrictions on the distribution of this software. -- -- Visibility: -- Description: -- functions on floating point types whose implementation -- depends on access to machine/implementation aspects -- of the floating point numbers. this includes access to -- the mantissa/exponent parts, integer/fraction operations, -- the spacing of numbers, etc. -- -- This software will evolve in the direction of the -- ACM SIGAda Numeric Working Group package of the same name. -- -- This is a somewhat portable, slow implementation. -- -- Exceptions: numeric_error upon overflow of function scale. -- ----------------------------------------------------------------------- package body longest_float_primitive_functions is -- overflow_error: exception renames numeric_error; -- AI-00387 specifies constraint_error for floating overflows. -- values computed during package elaboration, constant thereafter machine_small: longest_float; machine_normal_small: longest_float; -- smallest nondenormal positive half_machine_epsilon: constant longest_float := 0.5 ** longest_float'machine_mantissa; subtype expbits is integer range 0 .. 6; -- 2**(2**7) might overflow float_exp: constant array( expbits) of integer := (1, 2, 4, 8, 16, 32, 64); longest_float_power: constant array( expbits) of longest_float := (2.0 ** 1, 2.0 ** 2, 2.0 ** 4, 2.0 ** 8, 2.0 ** 16, 2.0 ** 32, 2.0 ** 64); longest_float_neg_power: constant array( expbits) of longest_float := (0.5 ** 1, 0.5 ** 2, 0.5 ** 4, 0.5 ** 8, 0.5 ** 16, 0.5 ** 32, 0.5 ** 64); function exponent( x: longest_float) return integer is -- -- return the exponent k such that 1/2 <= x/(2**k) < 1, -- or zero for x = 0.0 . -- begin if x = 0.0 then return 0; end if; -- nonzero x -- essentially, just multiply by powers of two to get in range declare ax: longest_float := abs( x); exp: integer := 0; -- ax*2.0**exp is invariant begin if ax >= 1.0 then while ax >= longest_float_power( expbits'last) loop ax := ax * longest_float_neg_power( expbits'last); exp := exp + float_exp( expbits'last); end loop; -- ax < 2**64 for n in reverse expbits'first .. expbits'last - 1 loop if ax >= longest_float_power( n) then ax := ax * longest_float_neg_power( n); exp := exp + float_exp( n); end if; -- ax < longest_float_power( n) end loop; -- 1 <= ax < 2 return exp + 1; else -- ax < 1 while ax < longest_float_neg_power( expbits'last) loop ax := ax * longest_float_power( expbits'last); exp := exp - float_exp( expbits'last); end loop; -- 2**-64 <= ax < 1 for n in reverse expbits'first .. expbits'last - 1 loop if ax < longest_float_neg_power( n) then ax := ax * longest_float_power( n); exp := exp - float_exp( n); end if; -- longest_float_neg_power( n) <= ax < 1 end loop; -- 1/2 <= ax < 1 return exp; end if; end; end exponent; function mantissa (x: longest_float) return longest_float is -- -- return scale( x, - exponent( x)) if x is nonzero, 0.0 otherwise. -- begin if x = 0.0 then return 0.0; end if; -- nonzero x -- essentially, just multiply by powers of two to get in range declare ax: longest_float := abs( x); begin if ax >= 1.0 then while ax >= longest_float_power( expbits'last) loop ax := ax * longest_float_neg_power( expbits'last); end loop; for n in reverse expbits'first .. expbits'last - 1 loop if ax >= longest_float_power( n) then ax := ax * longest_float_neg_power( n); end if; end loop; if ax >= 1.0 then ax := ax * 0.5; -- 1 <= ax < 2 end if; else while ax < longest_float_neg_power( expbits'last) loop ax := ax * longest_float_power( expbits'last); end loop; for n in reverse expbits'first .. expbits'last - 1 loop if ax < longest_float_neg_power( n) then ax := ax * longest_float_power( n); end if; end loop; end if; if x > 0.0 then return ax; else return - ax; end if; end; end mantissa; function scale (x: longest_float; k: integer) return longest_float is -- -- return x * 2**k quickly, or quietly underflow to zero, -- or raise an exception on overflow. -- begin if x = 0.0 then return 0.0; end if; -- nonzero x. -- essentially, just multiply repeatedly by 2**(+-2**n). declare y: longest_float := x; exp: integer := k; -- y*2.0**exp is invariant begin if exp < 0 then while exp <= - float_exp( expbits'last) loop y := y * longest_float_neg_power( expbits'last); exp := exp + float_exp( expbits'last); end loop; for n in reverse expbits'first .. expbits'last - 1 loop if exp <= - float_exp( n) then y := y * longest_float_neg_power( n); exp := exp + float_exp( n); end if; end loop; else -- exp >= 0 while exp >= float_exp( expbits'last) loop y := y * longest_float_power( expbits'last); exp := exp - float_exp( expbits'last); end loop; for n in reverse expbits'first .. expbits'last - 1 loop if exp >= float_exp( n) then y := y * longest_float_power( n); exp := exp - float_exp( n); end if; end loop; end if; -- exp = 0 return y; end; end scale; -- -- the leading_part function reduces precision to a specified -- number of bits by truncation, not rounding. -- function leading_part( x: longest_float; k: integer) return longest_float is -- -- set all but the k most significant bits in the mantissa of x to zero, -- i.e. reduce the precision to k bits, truncating, not rounding. -- leading_part( x, k) = 0.0 if k < 1 and -- leading_part( x, k) = x if k >= float_type'machine_mantissa. -- kex: constant integer := k - exponent( x); begin -- leading_part -- the tests on k are needed only to avoid under/overflow if x = 0.0 or k < 1 then return 0.0; elsif k >= longest_float'machine_mantissa then return x; else return scale( truncate( scale( x, kex)), - kex); end if; end leading_part; -- -- oddness is checked by examining one bit of the base 2 -- representation of x. since leading zeroes, one leading one, -- and trailing zeroes of the number are not present explicitly -- in floating point, they are checked for with preliminary if tests. -- function odd( x: longest_float) return boolean is -- x2: constant longest_float := truncate( x) / 2.0; begin return x2 /= truncate( x2); end odd; -- -- truncation requires making all fractional bits zero in a signed -- magnitude representation of floating point numbers. -- for large numbers, this has no effect, as all large floats are integers. -- for abs( x) < 1.0 this produces zero. -- function truncate (x: longest_float) return longest_float is -- -- truncate x to the nearest integer value with absolute value -- not exceeding abs( x). No conversion to an integer type -- is expected, so truncate cannot overflow for large arguments. -- large: constant := 1073741824.0; large2: constant := large * large; large3: constant := large2 * large; large4: constant := large3 * large; type long is range - 1073741824 .. 1073741824; -- 2**120 is longer than a quadruple-precision mantissa ax: constant longest_float := abs( x); rd, frac: longest_float; begin if ax >= large4 then return x; end if; if ax <= large then rd := longest_float( long( x)); elsif ax <= large2 then -- large < abs x <= large2 frac := x - large * longest_float( long( x / large)); -- abs frac <= large / 2 frac := frac - longest_float( long( frac)); -- abs frac <= 0.5 rd := x - frac; elsif ax <= large3 then -- large2 < abs x <= large3 frac := x - large2 * longest_float( long( x / large2)); -- abs frac <= large2 / 2 frac := frac - large * longest_float( long( frac / large)); -- abs frac <= large / 2 frac := frac - longest_float( long( frac)); -- abs frac <= 0.5 rd := x - frac; else -- large3 < abs x < large4 frac := x - large3 * longest_float( long( x / large3)); -- abs frac <= large3 / 2 frac := frac - large2 * longest_float( long( frac / large2)); -- abs frac <= large2 / 2 frac := frac - large * longest_float( long( frac / large)); -- abs frac <= large / 2 frac := frac - longest_float( long( frac)); -- abs frac <= 0.5 rd := x - frac; end if; -- rd is x rounded to the nearest integer if x >= 0.0 then if rd <= x then return rd; else return rd - 1.0; end if; else if rd >= x then return rd; else return rd + 1.0; end if; end if; end truncate; -- -- all the other integer/fraction functions are based on truncate. -- function floor (x: longest_float) return longest_float is -- -- return as a longest_float the greatest integer value <= x. -- begin declare tx: constant longest_float := truncate( x); begin if x >= 0.0 or x = tx then return tx; else return tx - 1.0; -- exactly end if; end; end floor; function ceiling (x: longest_float) return longest_float is -- -- return as a longest_float the least integer value >= x. -- begin declare tx: constant longest_float := truncate( x); begin if x <= 0.0 or x = tx then return tx; else return tx + 1.0; -- exactly end if; end; end ceiling; function round (x: longest_float) return longest_float is -- -- return as a longest_float the integer value nearest x. -- in case of a tie, prefer the even value. -- begin declare tx: longest_float := truncate( x); diff: longest_float := x - tx; -- computed exactly begin if abs( diff) < 0.5 then return tx; elsif diff > 0.5 then return tx + 1.0; elsif diff < -0.5 then return tx - 1.0; elsif diff = 0.5 then if odd( tx) then return tx + 1.0; else return tx; end if; else -- diff = -0.5 if odd( tx) then return tx - 1.0; else return tx; end if; end if; end; end round; function "rem"( x, y: longest_float) return longest_float is -- -- returns the machine-representable value closest to -- x - y * round( x/ y), evaluated with exact arithmetic. -- The result is exact, except for y near zero, -- and only if the implementation does not handle denormalized numbers. -- numeric_error is raised if y = 0.0 . -- begin if y = 0.0 then raise numeric_error; elsif x = 0.0 then return x; else declare ax: longest_float := abs( x); ay: longest_float := abs( y); delexp: integer := exponent( ax) - exponent( ay); sy: longest_float := scale( ay, delexp); -- ax & sy now have same exponent odd_mult: boolean := false; begin -- We repeatedly subtract (ay times a power of 2) from ax. while delexp > 0 loop if ax >= sy then -- It is assumed that the floating point subtraction of -- two positives a and b is exact if 1/2 < a/b < 2. -- This condition holds for ax and sy. ax := ax - sy; end if; delexp := delexp - 1; sy := sy * 0.5; -- might use scale, for speed or exactness end loop; -- unroll last two iterations to handle the requirements -- dealing with evenness and size if delexp = 0 then -- sy = ay if ax >= sy then ax := ax - sy; odd_mult := true; end if; delexp := -1; sy := sy * 0.5; end if; if delexp = -1 then -- 0 <= ax < 2 * sy = ay if odd_mult and ax = sy then ax := - sy; elsif ax > sy then ax := ax - ay; end if; end if; if ax = 0.0 then return 0.0; elsif x > 0.0 then return ax; else return - ax; end if; end; end if; end "rem"; function adjacent( x: longest_float; towards: longest_float) return longest_float is -- -- returns x if towards = x, otherwise returns the float_point number -- nearest to x in the direction toward towards. -- begin if towards = x then return x; elsif towards > x then return successor( x); else return predecessor( x); end if; end adjacent; function successor( x: longest_float) return longest_float is -- -- return the next floating point number greater than x, -- or overflow if x = longest_float'base'last. -- begin if - machine_normal_small <= x and x < machine_normal_small then return x + machine_small; elsif x < 0.0 and mantissa( x) = 0.5 then return x + scale( half_machine_epsilon, exponent( x) - 1); elsif longest_float'machine_overflows or else x < longest_float'base'last then return x + scale( half_machine_epsilon, exponent( x)); else raise overflow_error; end if; end successor; function predecessor( x: longest_float) return longest_float is -- -- return the next floating point number less than x, -- or overflow if x = longest_float'base'first. -- begin if - machine_normal_small < x and x <= machine_normal_small then return x - machine_small; elsif x > 0.0 and mantissa( x) = 0.5 then return x - scale( half_machine_epsilon, exponent( x) - 1); elsif longest_float'machine_overflows or else x > longest_float'base'first then return x - scale( half_machine_epsilon, exponent( x)); else raise overflow_error; end if; end predecessor; function copy_sign( value: longest_float; sign: longest_float) return longest_float is -- -- returns abs( value) with the sign of sign, i.e. -- when sign > 0.0 or sign = +0.0 returns abs( value), -- when sign < 0.0 or sign = -0.0 returns - abs( value). -- begin if sign >= 0.0 then return abs( value); else return - abs( value); end if; end copy_sign; function multiply_sign( value: longest_float; sign: longest_float) return longest_float is -- -- returns value multiplied by the sign of sign, i.e. -- when sign > 0.0 or sign = +0.0 returns value, -- when sign < 0.0 or sign = -0.0 returns - value. -- begin if sign >= 0.0 then return value; else return - value; end if; end multiply_sign; begin -- longest_float_primitive_functions -- initialize constants requiring some computation -- depends on interpretation of 'machine_emin machine_normal_small := 2.0 ** (longest_float'machine_emin - 1); machine_small := machine_normal_small; end longest_float_primitive_functions; -- $Header: pl_primitive_functions_b.a,v 3.23 91/03/29 15:32:24 broman Stab $