Machine Precision: machar.c

Craig A. Mattocks morfz@mindspring.com
Sat Aug 19 00:52:41 2000


Hi,

I recently had to port some code over to Mac OS X and I needed some 
floating point precision information that was not contained in 
<limits.h>. I dug up the following widely published routine which may 
prove helpful to other developers in the same bind.

- Craig

/*************************************************************************
* This file combines the single and double precision versions of machar, *
* a routine for determining machine precision parameters.                *
* Compilation:                                                           *
*    Single precision: "cc -DSP -o machar machar.c"                      *
*    Double precision: "cc -DDP -o machar machar.c" (default)            *
* [This feature provided by D. G. Hough,  August 3, 1988.]               *
*************************************************************************/
#ifndef SP
#define DP 1
#endif

#ifdef SP
#define REAL float
#define ZERO 0.0
#define ONE 1.0
#define PREC "SINGLE "
#define REALSIZE 1
#endif

#ifdef DP
#define REAL double
#define ZERO 0.0e0
#define ONE 1.0e0
#define PREC "DOUBLE "
#define REALSIZE 2
#endif

#include <math.h>
#include <stdio.h>

#define ABS(xxx) ((xxx>ZERO)?(xxx):(-xxx))

main()
{
       /*************************************************************
       * This program prints hardware-determined double-precision   *
       * machine constants obtained from rmachar.  rmachar is a C   *
       * translation of the Fortran routine MACHAR from W. J. Cody, *
       * "MACHAR: A subroutine to dynamically determine machine     *
       * parameters".  TOMS (14), 1988.                             *
       * Descriptions of the machine constants are given in the     *
       * prologue comments in rmachar.                              *
       * Subprograms called:                                        *
       *    rmachar                                                 *
       * Original driver: Richard Bartels, October 16, 1985         *
       * Modified by: W. J. Cody July 26, 1988                      *
       *************************************************************/
       int ibeta,iexp,irnd,it,machep,maxexp,minexp,negep,ngrd;
       int i;
       REAL eps,epsneg, xmax,xmin;
       union wjc
       {
          long int jj[REALSIZE];
          REAL xbig;
       } uval;

       rmachar (&ibeta,&it,&irnd,&ngrd,&machep,&negep,
                &iexp,&minexp,&maxexp, &eps,&epsneg, &xmin,&xmax);

#define DISPLAY(s,x) { \
	uval.xbig = x ; \
	printf(s); \
	printf(" %24.16e ",(double) x) ; \
	for(i=0;i<REALSIZE;i++) printf(" %9X ",uval.jj[i]) ; \
	printf("\n"); \
	}

       printf ("---------------------------------------------\n");
       printf (PREC);
       printf ("precision machine precision parameters\n");
       printf ("---------------------------------------------\n");
       printf ("Base (radix) of floating-point representation : ibeta 
= %d\n", ibeta );
       printf ("Number of digits in floating-point mantissa   : it 
= %d\n", it    );
       printf ("Rounding/underflow indicator (IEEE = 2 or 5)  : irnd 
= %d\n", irnd  );
       printf ("Number of guard digits for mult truncation    : ngrd 
= %d\n", ngrd  );
       printf ("Largest neg exponent so 1+(ibeta)**machep != 1: machep 
= %d\n", machep);
       printf ("Largest neg exponent so 1-(ibeta)**negep  != 1: negep 
= %d\n", negep );
       printf ("Number of bits in the exponent                : iexp 
= %d\n", iexp  );
       printf ("Most neg power --> no lead zeros in mantissa  : minexp 
= %d\n", minexp);
       printf ("Smallest pos power of ibeta that overflows    : maxexp 
= %d\n", maxexp);
       DISPLAY("Floating-point precsion (ibeta)**machep       : eps 
="     , eps   );
       DISPLAY("Floating-point precsion (ibeta)**negep        : epsneg 
="     , epsneg);
       DISPLAY("Smallest useable floating-point number        : xmin 
="     , xmin  );
       DISPLAY("Largest  useable floating-point number        : xmax 
="     , xmax  );
}

rmachar (ibeta,it,irnd,ngrd,machep,negep,
          iexp,minexp,maxexp, eps,epsneg, xmin,xmax)
       /**********************************************************************
       * This subroutine is intended to determine the parameters of the      *
       * floating-point arithmetic system specified below.  The              *
       * determination of the first three uses an extension of an algorithm  *
       * due to M. Malcolm, CACM 15 (1972), pp. 949-951, incorporating some, *
       * but not all, of the improvements suggested by M. Gentleman and S.   *
       * Marovich, CACM 17 (1974), pp. 276-277.  An earlier version of this  *
       * program was published in the book Software Manual for the           *
       * Elementary Functions by W. J. Cody and W. Waite, Prentice-Hall,     *
       * Englewood Cliffs, NJ, 1980.  The present program is a translation   *
       * of the Fortran 77 program in W. J. Cody, "MACHAR: A subroutine to   *
       * dynamically determine machine parameters". TOMS (14), 1988.         *
       *                                                                     *
       * Parameter values reported are as follows:                           *
       *                                                                     *
       *    ibeta   - the radix for the floating-point representation        *
       *    it      - the number of base ibeta digits in the floating-point  *
       *              significand                                            *
       *    irnd    - 0 if floating-point addition chops                     *
       *              1 if floating-point addition rounds, but not in the    *
       *                IEEE style                                           *
       *              2 if floating-point addition rounds in the IEEE style  *
       *              3 if floating-point addition chops, and there is       *
       *                partial underflow                                    *
       *              4 if floating-point addition rounds, but not in the    *
       *                IEEE style, and there is partial underflow           *
       *              5 if floating-point addition rounds in the IEEE style, *
       *                and there is partial underflow                       *
       *    ngrd    - the number of guard digits for multiplication with     *
       *              truncating arithmetic.  It is                          *
       *              0 if floating-point arithmetic rounds, or if it        *
       *                truncates and only  it  base  ibeta digits           *
       *                participate in the post-normalization shift of the   *
       *                floating-point significand in multiplication;        *
       *              1 if floating-point arithmetic truncates and more      *
       *                than  it  base  ibeta  digits participate in the     *
       *                post-normalization shift of the floating-point       *
       *                significand in multiplication.                       *
       *    machep  - the largest negative integer such that                 *
       *              1.0+FLOAT(ibeta)**machep .NE. 1.0, except that         *
       *              machep is bounded below by  -(it+3)                    *
       *    negep   - the largest negative integer such that                 *
       *              1.0-FLOAT(ibeta)**negeps .NE. 1.0, except that         *
       *              negeps is bounded below by  -(it+3)                    *
       *    iexp    - the number of bits (decimal places if ibeta = 10)      *
       *              reserved for the representation of the exponent        *
       *              (including the bias or sign) of a floating-point       *
       *              number.                                                *
       *    minexp  - the largest in magnitude negative integer such that    *
       *              FLOAT(ibeta)**minexp is positive and normalized        *
       *    maxexp  - the smallest positive power of  BETA  that overflows   *
       *    eps     - the smallest positive floating-point number such       *
       *              that  1.0+eps .NE. 1.0. In particular, if either       *
       *              ibeta = 2  or  IRND = 0, eps = FLOAT(ibeta)**machep.   *
       *              Otherwise,  eps = (FLOAT(ibeta)**machep)/2             *
       *    epsneg  - A small positive floating-point number such that       *
       *              1.0-epsneg .NE. 1.0. In particular, if ibeta = 2       *
       *              or  IRND = 0, epsneg = FLOAT(ibeta)**negeps.           *
       *              Otherwise,  epsneg = (ibeta**negeps)/2.  Because       *
       *              negeps is bounded below by -(it+3), epsneg may not     *
       *              be the smallest number that can alter 1.0 by           *
       *              subtraction.                                           *
       *    xmin    - the smallest non-vanishing normalized floating-point   *
       *              power of the radix, i.e.,  xmin = FLOAT(ibeta)**minexp *
       *    xmax    - the largest finite floating-point number.  In          *
       *              particular, xmax = (1.0-epsneg)*FLOAT(ibeta)**maxexp   *
       *              Note - on some machines  xmax  will be only the        *
       *              second, or perhaps third, largest number, being        *
       *              too small by 1 or 2 units in the last digit of         *
       *              the significand.                                       *
       *                                                                     *
       * Latest revision - August 4, 1988                                    *
       * Author - W. J. Cody, Argonne National Laboratory                    *
       **********************************************************************/
       int *ibeta,*iexp,*irnd,*it,*machep,*maxexp,*minexp,*negep,*ngrd;
       REAL *eps,*epsneg, *xmax,*xmin;
{
       int i,iz,j,k;
       int mx,itmp,nxres;
       REAL a,b,beta,betain,one,y,z,zero;
       REAL betah,t,tmp,tmpa,tmp1,two;

       (*irnd) = 1;
       one = (REAL)(*irnd);
       two = one + one;
       a = two;
       b = a;
       zero = 0.0e0;

       /****************************************
       * Determine ibeta and beta a la Malcolm *
       ****************************************/
       tmp = ((a+one)-a)-one;
       while (tmp == zero)
       {
          a = a+a;
          tmp = a+one;
          tmp1 = tmp-a;
          tmp = tmp1-one;
       }
       tmp = a+b;
       itmp = (int)(tmp-a);
       while (itmp == 0)
       {
          b = b+b;
          tmp = a+b;
          itmp = (int)(tmp-a);
       }
       *ibeta = itmp;
       beta = (REAL)(*ibeta);

       /*********************
       * Determine irnd, it *
       *********************/
       (*it) = 0;
       b = one;
       tmp = ((b+one)-b)-one;
       while (tmp == zero)
       {
          *it = *it+1;
          b = b*beta;
          tmp = b+one;
          tmp1 = tmp-b;
          tmp = tmp1-one;
       }
       *irnd = 0;
       betah = beta/two;
       tmp = a+betah;
       tmp1 = tmp-a;
       if (tmp1 != zero) *irnd = 1;
       tmpa = a+beta;
       tmp = tmpa+betah;
       if ((*irnd == 0) && (tmp-tmpa != zero)) *irnd = 2;

       /**************************
       * Determine negep, epsneg *
       **************************/
       (*negep) = (*it) + 3;
       betain = one / beta;
       a = one;
       for (i = 1; i<=(*negep); i++)
       {
          a = a * betain;
       }
       b = a;
       tmp = (one-a);
       tmp = tmp-one;
       while (tmp == zero)
       {
          a = a*beta;
          *negep = *negep-1;
          tmp1 = one-a;
          tmp = tmp1-one;
       }
       (*negep) = -(*negep);
       (*epsneg) = a;

       /************************
       * Determine machep, eps *
       ************************/
       (*machep) = -(*it) - 3;
       a = b;
       tmp = one+a;
       while (tmp-one == zero)
       {
          a = a*beta;
          *machep = *machep+1;
          tmp = one+a;
       }
       *eps = a;

       /*****************
       * Determine ngrd *
       *****************/
       (*ngrd) = 0;
       tmp = one+*eps;
       tmp = tmp*one;
       if (((*irnd) == 0) && (tmp-one) != zero) (*ngrd) = 1;

       /**********************************************
       * Determine iexp, minexp, xmin.               *
       * Loop to determine largest i such that       *
       *    (1/beta) ** (2**(i))                     *
       * does not underflow.                         *
       * Exit from loop is signaled by an underflow. *
       **********************************************/
       i = 0;
       k = 1;
       z = betain;
       t = one+*eps;
       nxres = 0;
       for (;;)
       {
          y = z;
          z = y * y;
          /***********************
          * Check for underflow. *
          ***********************/
          a = z * one;
          tmp = z*t;
          if ((a+a == zero) || (ABS(z) > y)) break;
          tmp1 = tmp*betain;
          if (tmp1*beta == z) break;
          i = i + 1;
          k = k+k;
       }

       /********************************************************
       * Determine k such that (1/beta)**k does not underflow. *
       * First set  k = 2 ** i                                 *
       ********************************************************/
       (*iexp) = i + 1;
       mx = k + k;
       if (*ibeta == 10)
       {
         /*****************************
         * For decimal machines only: *
         *****************************/
         (*iexp) = 2;
          iz = *ibeta;
          while (k >= iz)
          {
             iz = iz * (*ibeta);
             (*iexp) = (*iexp) + 1;
          }
          mx = iz + iz - 1;
       }

       /**********************************************
       * Loop to determine minexp, xmin.             *
       * Exit from loop is signaled by an underflow. *
       **********************************************/
       for (;;)
       {
          (*xmin) = y;
          y = y * betain;
          a = y * one;
          tmp = y*t;
          tmp1 = a+a;
          if ((tmp1 == zero) || (ABS(y) >= (*xmin))) break;
          k = k + 1;
          tmp1 = tmp*betain;
          tmp1 = tmp1*beta;
          if ((tmp1 == y) && (tmp != y))
          {
             nxres = 3;
             *xmin = y;
             break;
          }
       }
       (*minexp) = -k;

       /**************************
       * Determine maxexp, xmax. *
       **************************/
       if ((mx <= k+k-3) && ((*ibeta) != 10))
       {
          mx = mx + mx;
          (*iexp) = (*iexp) + 1;
       }
       (*maxexp) = mx + (*minexp);

       /*********************************************
       * Adjust *irnd to reflect partial underflow. *
       *********************************************/
       (*irnd) = (*irnd) + nxres;

       /**********************************
       * Adjust for IEEE style machines. *
       **********************************/
       if ((*irnd) >= 2) (*maxexp) = (*maxexp)-2;

       /**********************************************************
       * Adjust for machines with implicit leading bit in binary *
       * significand and machines with radix point at extreme    *
       * right of significand.                                   *
       **********************************************************/
       i = (*maxexp) + (*minexp);
       if (((*ibeta) == 2) && (i == 0)) (*maxexp) = (*maxexp) - 1;
       if (i > 20) (*maxexp) = (*maxexp) - 1;
       if (a != y) (*maxexp) = (*maxexp) - 2;
       (*xmax) = one - (*epsneg);
       tmp = (*xmax)*one;
       if (tmp != (*xmax)) (*xmax) = one - beta * (*epsneg);
       (*xmax) = (*xmax) / (beta * beta * beta * (*xmin));
       i = (*maxexp) + (*minexp) + 3;
       if (i > 0)
       {
          for (j = 1; j<=i; j++ )
          {
              if ((*ibeta) == 2) (*xmax) = (*xmax) + (*xmax);
              if ((*ibeta) != 2) (*xmax) = (*xmax) * beta;
          }
       }
       return;
}