1  /******************************************************************************


2  Copyright (c) 2014 Ryan Juckett


3  http://www.ryanjuckett.com/


4 


5  This software is provided 'asis', without any express or implied


6  warranty. In no event will the authors be held liable for any damages


7  arising from the use of this software.


8 


9  Permission is granted to anyone to use this software for any purpose,


10  including commercial applications, and to alter it and redistribute it


11  freely, subject to the following restrictions:


12 


13  1. The origin of this software must not be misrepresented; you must not


14  claim that you wrote the original software. If you use this software


15  in a product, an acknowledgment in the product documentation would be


16  appreciated but is not required.


17 


18  2. Altered source versions must be plainly marked as such, and must not be


19  misrepresented as being the original software.


20 


21  3. This notice may not be removed or altered from any source


22  distribution.


23  ******************************************************************************/


24 


25  #include "Dragon4.h"


26  #include "MathDragon4.h"


27  #include <math.h>


28 


29  //******************************************************************************


30  // Maximum number of 32 bit blocks needed in high precision arithmetic


31  // to print out 64 bit IEEE floating point values.


32  //******************************************************************************


33  const tU32 c_BigInt_MaxBlocks = 35;


34 


35  //******************************************************************************


36  // This structure stores a high precision unsigned integer. It uses a buffer


37  // of 32 bit integer blocks along with a length. The lowest bits of the integer


38  // are stored at the start of the buffer and the length is set to the minimum


39  // value that contains the integer. Thus, there are never any zero blocks at the


40  // end of the buffer.


41  //******************************************************************************


42  struct tBigInt


43  {


44  // Copy integer


45  tBigInt & operator=(const tBigInt &rhs)


46  {


47  tU32 length = rhs.m_length;


48  tU32 * pLhsCur = m_blocks;


49  for (const tU32 *pRhsCur = rhs.m_blocks, *pRhsEnd = pRhsCur + length;


50  pRhsCur != pRhsEnd;


51  ++pLhsCur, ++pRhsCur)


52  {


53  *pLhsCur = *pRhsCur;


54  }


55  m_length = length;


56  return *this;


57  }


58 


59  // Data accessors


60  tU32 GetLength() const { return m_length; }


61  tU32 GetBlock(tU32 idx) const { return m_blocks[idx]; }


62 


63  // Zero helper functions


64  void SetZero() { m_length = 0; }


65  tB IsZero() const { return m_length == 0; }


66 


67  // Basic type accessors


68  void SetU64(tU64 val)


69  {


70  if (val > 0xFFFFFFFF)


71  {


72  m_blocks[0] = val & 0xFFFFFFFF;


73  m_blocks[1] = (val >> 32) & 0xFFFFFFFF;


74  m_length = 2;


75  }


76  else if (val != 0)


77  {


78  m_blocks[0] = val & 0xFFFFFFFF;


79  m_length = 1;


80  }


81  else


82  {


83  m_length = 0;


84  }


85  }


86 


87  void SetU32(tU32 val)


88  {


89  if (val != 0)


90  {


91  m_blocks[0] = val;


92  m_length = (val != 0);


93  }


94  else


95  {


96  m_length = 0;


97  }


98  }


99 


100  tU32 GetU32() const { return (m_length == 0) ? 0 : m_blocks[0]; }


101 


102  // Member data


103  tU32 m_length;


104  tU32 m_blocks[c_BigInt_MaxBlocks];


105  };


106 


107  //******************************************************************************


108  // Returns 0 if (lhs = rhs), negative if (lhs < rhs), positive if (lhs > rhs)


109  //******************************************************************************


110  static tS32 BigInt_Compare(const tBigInt & lhs, const tBigInt & rhs)


111  {


112  // A bigger length implies a bigger number.


113  tS32 lengthDiff = lhs.m_length  rhs.m_length;


114  if (lengthDiff != 0)


115  return lengthDiff;


116 


117  // Compare blocks one by one from high to low.


118  for (tS32 i = lhs.m_length  1; i >= 0; i)


119  {


120  if (lhs.m_blocks[i] == rhs.m_blocks[i])


121  continue;


122  else if (lhs.m_blocks[i] > rhs.m_blocks[i])


123  return 1;


124  else


125  return 1;


126  }


127 


128  // no blocks differed


129  return 0;


130  }


131 


132  //******************************************************************************


133  // result = lhs + rhs


134  //******************************************************************************


135  static void BigInt_Add(tBigInt * pResult, const tBigInt & lhs, const tBigInt & rhs)


136  {


137  // determine which operand has the smaller length


138  const tBigInt * pLarge;


139  const tBigInt * pSmall;


140  if (lhs.m_length < rhs.m_length)


141  {


142  pSmall = &lhs;


143  pLarge = &rhs;


144  }


145  else


146  {


147  pSmall = &rhs;


148  pLarge = &lhs;


149  }


150 


151  const tU32 largeLen = pLarge>m_length;


152  const tU32 smallLen = pSmall>m_length;


153 


154  // The output will be at least as long as the largest input


155  pResult>m_length = largeLen;


156 


157  // Add each block and add carry the overflow to the next block


158  tU64 carry = 0;


159  const tU32 * pLargeCur = pLarge>m_blocks;


160  const tU32 * pLargeEnd = pLargeCur + largeLen;


161  const tU32 * pSmallCur = pSmall>m_blocks;


162  const tU32 * pSmallEnd = pSmallCur + smallLen;


163  tU32 * pResultCur = pResult>m_blocks;


164  while (pSmallCur != pSmallEnd)


165  {


166  tU64 sum = carry + (tU64)(*pLargeCur) + (tU64)(*pSmallCur);


167  carry = sum >> 32;


168  (*pResultCur) = sum & 0xFFFFFFFF;


169  ++pLargeCur;


170  ++pSmallCur;


171  ++pResultCur;


172  }


173 


174  // Add the carry to any blocks that only exist in the large operand


175  while (pLargeCur != pLargeEnd)


176  {


177  tU64 sum = carry + (tU64)(*pLargeCur);


178  carry = sum >> 32;


179  (*pResultCur) = sum & 0xFFFFFFFF;


180  ++pLargeCur;


181  ++pResultCur;


182  }


183 


184  // If there's still a carry, append a new block


185  if (carry != 0)


186  {


187  RJ_ASSERT(carry == 1);


188  RJ_ASSERT((tU32)(pResultCur  pResult>m_blocks) == largeLen && (largeLen < c_BigInt_MaxBlocks));


189  *pResultCur = 1;


190  pResult>m_length = largeLen + 1;


191  }


192  else


193  {


194  pResult>m_length = largeLen;


195  }


196  }


197 


198  //******************************************************************************


199  // result = lhs * rhs


200  //******************************************************************************


201  static void BigInt_Multiply(tBigInt * pResult, const tBigInt &lhs, const tBigInt &rhs)


202  {


203  RJ_ASSERT( pResult != &lhs && pResult != &rhs );


204 


205  // determine which operand has the smaller length


206  const tBigInt * pLarge;


207  const tBigInt * pSmall;


208  if (lhs.m_length < rhs.m_length)


209  {


210  pSmall = &lhs;


211  pLarge = &rhs;


212  }


213  else


214  {


215  pSmall = &rhs;


216  pLarge = &lhs;


217  }


218 


219  // set the maximum possible result length


220  tU32 maxResultLen = pLarge>m_length + pSmall>m_length;


221  RJ_ASSERT( maxResultLen <= c_BigInt_MaxBlocks );


222 


223  // clear the result data


224  for(tU32 * pCur = pResult>m_blocks, *pEnd = pCur + maxResultLen; pCur != pEnd; ++pCur)


225  *pCur = 0;


226 


227  // perform standard long multiplication


228  const tU32 *pLargeBeg = pLarge>m_blocks;


229  const tU32 *pLargeEnd = pLargeBeg + pLarge>m_length;


230 


231  // for each small block


232  tU32 *pResultStart = pResult>m_blocks;


233  for(const tU32 *pSmallCur = pSmall>m_blocks, *pSmallEnd = pSmallCur + pSmall>m_length;


234  pSmallCur != pSmallEnd;


235  ++pSmallCur, ++pResultStart)


236  {


237  // if nonzero, multiply against all the large blocks and add into the result


238  const tU32 multiplier = *pSmallCur;


239  if (multiplier != 0)


240  {


241  const tU32 *pLargeCur = pLargeBeg;


242  tU32 *pResultCur = pResultStart;


243  tU64 carry = 0;


244  do


245  {


246  tU64 product = (*pResultCur) + (*pLargeCur)*(tU64)multiplier + carry;


247  carry = product >> 32;


248  *pResultCur = product & 0xFFFFFFFF;


249  ++pLargeCur;


250  ++pResultCur;


251  } while(pLargeCur != pLargeEnd);


252 


253  RJ_ASSERT(pResultCur < pResult>m_blocks + maxResultLen);


254  *pResultCur = (tU32)(carry & 0xFFFFFFFF);


255  }


256  }


257 


258  // check if the terminating block has no set bits


259  if (maxResultLen > 0 && pResult>m_blocks[maxResultLen  1] == 0)


260  pResult>m_length = maxResultLen1;


261  else


262  pResult>m_length = maxResultLen;


263  }


264 


265  //******************************************************************************


266  // result = lhs * rhs


267  //******************************************************************************


268  static void BigInt_Multiply(tBigInt * pResult, const tBigInt & lhs, tU32 rhs)


269  {


270  // perform long multiplication


271  tU32 carry = 0;


272  tU32 *pResultCur = pResult>m_blocks;


273  const tU32 *pLhsCur = lhs.m_blocks;


274  const tU32 *pLhsEnd = lhs.m_blocks + lhs.m_length;


275  for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++pResultCur )


276  {


277  tU64 product = (tU64)(*pLhsCur) * rhs + carry;


278  *pResultCur = (tU32)(product & 0xFFFFFFFF);


279  carry = product >> 32;


280  }


281 


282  // if there is a remaining carry, grow the array


283  if (carry != 0)


284  {


285  // grow the array


286  RJ_ASSERT(lhs.m_length + 1 <= c_BigInt_MaxBlocks);


287  *pResultCur = (tU32)carry;


288  pResult>m_length = lhs.m_length + 1;


289  }


290  else


291  {


292  pResult>m_length = lhs.m_length;


293  }


294  }


295 


296  //******************************************************************************


297  // result = in * 2


298  //******************************************************************************


299  static void BigInt_Multiply2(tBigInt * pResult, const tBigInt &in)


300  {


301  // shift all the blocks by one


302  tU32 carry = 0;


303 


304  tU32 *pResultCur = pResult>m_blocks;


305  const tU32 *pLhsCur = in.m_blocks;


306  const tU32 *pLhsEnd = in.m_blocks + in.m_length;


307  for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++pResultCur )


308  {


309  tU32 cur = *pLhsCur;


310  *pResultCur = (cur << 1)  carry;


311  carry = cur >> 31;


312  }


313 


314  if (carry != 0)


315  {


316  // grow the array


317  RJ_ASSERT(in.m_length + 1 <= c_BigInt_MaxBlocks);


318  *pResultCur = carry;


319  pResult>m_length = in.m_length + 1;


320  }


321  else


322  {


323  pResult>m_length = in.m_length;


324  }


325  }


326 


327  //******************************************************************************


328  // result = result * 2


329  //******************************************************************************


330  static void BigInt_Multiply2(tBigInt * pResult)


331  {


332  // shift all the blocks by one


333  tU32 carry = 0;


334 


335  tU32 *pCur = pResult>m_blocks;


336  tU32 *pEnd = pResult>m_blocks + pResult>m_length;


337  for ( ; pCur != pEnd; ++pCur )


338  {


339  tU32 cur = *pCur;


340  *pCur = (cur << 1)  carry;


341  carry = cur >> 31;


342  }


343 


344  if (carry != 0)


345  {


346  // grow the array


347  RJ_ASSERT(pResult>m_length + 1 <= c_BigInt_MaxBlocks);


348  *pCur = carry;


349  ++pResult>m_length;


350  }


351  }


352 


353  //******************************************************************************


354  // result = result * 10


355  //******************************************************************************


356  static void BigInt_Multiply10(tBigInt * pResult)


357  {


358  // multiply all the blocks


359  tU64 carry = 0;


360 


361  tU32 *pCur = pResult>m_blocks;


362  tU32 *pEnd = pResult>m_blocks + pResult>m_length;


363  for ( ; pCur != pEnd; ++pCur )


364  {


365  tU64 product = (tU64)(*pCur) * 10ull + carry;


366  (*pCur) = (tU32)(product & 0xFFFFFFFF);


367  carry = product >> 32;


368  }


369 


370  if (carry != 0)


371  {


372  // grow the array


373  RJ_ASSERT(pResult>m_length + 1 <= c_BigInt_MaxBlocks);


374  *pCur = (tU32)carry;


375  ++pResult>m_length;


376  }


377  }


378 


379  //******************************************************************************


380  //******************************************************************************


381  static tU32 g_PowerOf10_U32[] =


382  {


383  1, // 10 ^ 0


384  10, // 10 ^ 1


385  100, // 10 ^ 2


386  1000, // 10 ^ 3


387  10000, // 10 ^ 4


388  100000, // 10 ^ 5


389  1000000, // 10 ^ 6


390  10000000, // 10 ^ 7


391  };


392 


393  //******************************************************************************


394  // Note: This has a lot of wasted space in the big integer structures of the


395  // early table entries. It wouldn't be terribly hard to make the multiply


396  // function work on integer pointers with an array length instead of


397  // the tBigInt struct which would allow us to store a minimal amount of


398  // data here.


399  //******************************************************************************


400  static tBigInt g_PowerOf10_Big[] =


401  {


402  // 10 ^ 8


403  { 1, { 100000000 } },


404  // 10 ^ 16


405  { 2, { 0x6fc10000, 0x002386f2 } },


406  // 10 ^ 32


407  { 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } },


408  // 10 ^ 64


409  { 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed, 0xe93ff9f4, 0x00184f03, } },


410  // 10 ^ 128


411  { 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01, 0x03df9909, 0x0f1538fd,


412  0x2374e42f, 0xd3cff5ec, 0xc404dc08, 0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } },


413  // 10 ^ 256


414  { 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,


415  0x00000000, 0x982e7c01, 0xbed3875b, 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70,


416  0xd595d80f, 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, 0xcc5573c0,


417  0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, 0x5fdcefce, 0x000553f7, } }


418  };


419 


420  //******************************************************************************


421  // result = 10^exponent


422  //******************************************************************************


423  static void BigInt_Pow10(tBigInt * pResult, tU32 exponent)


424  {


425  // make sure the exponent is within the bounds of the lookup table data


426  RJ_ASSERT(exponent < 512);


427 


428  // create two temporary values to reduce large integer copy operations


429  tBigInt temp1;


430  tBigInt temp2;


431  tBigInt *pCurTemp = &temp1;


432  tBigInt *pNextTemp = &temp2;


433 


434  // initialize the result by looking up a 32bit power of 10 corresponding to the first 3 bits


435  tU32 smallExponent = exponent & 0x7;


436  pCurTemp>SetU32(g_PowerOf10_U32[smallExponent]);


437 


438  // remove the low bits that we used for the 32bit lookup table


439  exponent >>= 3;


440  tU32 tableIdx = 0;


441 


442  // while there are remaining bits in the exponent to be processed


443  while (exponent != 0)


444  {


445  // if the current bit is set, multiply it with the corresponding power of 10


446  if(exponent & 1)


447  {


448  // multiply into the next temporary


449  BigInt_Multiply( pNextTemp, *pCurTemp, g_PowerOf10_Big[tableIdx] );


450 


451  // swap to the next temporary


452  tBigInt * pSwap = pCurTemp;


453  pCurTemp = pNextTemp;


454  pNextTemp = pSwap;


455  }


456 


457  // advance to the next bit


458  ++tableIdx;


459  exponent >>= 1;


460  }


461 


462  // output the result


463  *pResult = *pCurTemp;


464  }


465 


466  //******************************************************************************


467  // result = in * 10^exponent


468  //******************************************************************************


469  static void BigInt_MultiplyPow10(tBigInt * pResult, const tBigInt & in, tU32 exponent)


470  {


471  // make sure the exponent is within the bounds of the lookup table data


472  RJ_ASSERT(exponent < 512);


473 


474  // create two temporary values to reduce large integer copy operations


475  tBigInt temp1;


476  tBigInt temp2;


477  tBigInt *pCurTemp = &temp1;


478  tBigInt *pNextTemp = &temp2;


479 


480  // initialize the result by looking up a 32bit power of 10 corresponding to the first 3 bits


481  tU32 smallExponent = exponent & 0x7;


482  if (smallExponent != 0)


483  {


484  BigInt_Multiply( pCurTemp, in, g_PowerOf10_U32[smallExponent] );


485  }


486  else


487  {


488  *pCurTemp = in;


489  }


490 


491  // remove the low bits that we used for the 32bit lookup table


492  exponent >>= 3;


493  tU32 tableIdx = 0;


494 


495  // while there are remaining bits in the exponent to be processed


496  while (exponent != 0)


497  {


498  // if the current bit is set, multiply it with the corresponding power of 10


499  if(exponent & 1)


500  {


501  // multiply into the next temporary


502  BigInt_Multiply( pNextTemp, *pCurTemp, g_PowerOf10_Big[tableIdx] );


503 


504  // swap to the next temporary


505  tBigInt * pSwap = pCurTemp;


506  pCurTemp = pNextTemp;


507  pNextTemp = pSwap;


508  }


509 


510  // advance to the next bit


511  ++tableIdx;


512  exponent >>= 1;


513  }


514 


515  // output the result


516  *pResult = *pCurTemp;


517  }


518 


519  //******************************************************************************


520  // result = 2^exponent


521  //******************************************************************************


522  static inline void BigInt_Pow2(tBigInt * pResult, tU32 exponent)


523  {


524  tU32 blockIdx = exponent / 32;


525  RJ_ASSERT( blockIdx < c_BigInt_MaxBlocks );


526 


527  for ( tU32 i = 0; i <= blockIdx; ++i)


528  pResult>m_blocks[i] = 0;


529 


530  pResult>m_length = blockIdx + 1;


531 


532  tU32 bitIdx = (exponent % 32);


533  pResult>m_blocks[blockIdx] = (1 << bitIdx);


534  }


535 


536  //******************************************************************************


537  // This function will divide two large numbers under the assumption that the


538  // result is within the range [0,10) and the input numbers have been shifted


539  // to satisfy:


540  //  The highest block of the divisor is greater than or equal to 8 such that


541  // there is enough precision to make an accurate first guess at the quotient.


542  //  The highest block of the divisor is less than the maximum value on an


543  // unsigned 32bit integer such that we can safely increment without overflow.


544  //  The dividend does not contain more blocks than the divisor such that we


545  // can estimate the quotient by dividing the equivalently placed high blocks.


546  //


547  // quotient = floor(dividend / divisor)


548  // remainder = dividend  quotient*divisor


549  //


550  // pDividend is updated to be the remainder and the quotient is returned.


551  //******************************************************************************


552  static tU32 BigInt_DivideWithRemainder_MaxQuotient9(tBigInt * pDividend, const tBigInt & divisor)


553  {


554  // Check that the divisor has been correctly shifted into range and that it is not


555  // smaller than the dividend in length.


556  RJ_ASSERT( !divisor.IsZero() &&


557  divisor.m_blocks[divisor.m_length1] >= 8 &&


558  divisor.m_blocks[divisor.m_length1] < 0xFFFFFFFF &&


559  pDividend>m_length <= divisor.m_length );


560 


561  // If the dividend is smaller than the divisor, the quotient is zero and the divisor is already


562  // the remainder.


563  tU32 length = divisor.m_length;


564  if (pDividend>m_length < divisor.m_length)


565  return 0;


566 


567  const tU32 * pFinalDivisorBlock = divisor.m_blocks + length  1;


568  tU32 * pFinalDividendBlock = pDividend>m_blocks + length  1;


569 


570  // Compute an estimated quotient based on the high block value. This will either match the actual quotient or


571  // undershoot by one.


572  tU32 quotient = *pFinalDividendBlock / (*pFinalDivisorBlock + 1);


573  RJ_ASSERT(quotient <= 9);


574 


575  // Divide out the estimated quotient


576  if (quotient != 0)


577  {


578  // dividend = dividend  divisor*quotient


579  const tU32 *pDivisorCur = divisor.m_blocks;


580  tU32 *pDividendCur = pDividend>m_blocks;


581 


582  tU64 borrow = 0;


583  tU64 carry = 0;


584  do


585  {


586  tU64 product = (tU64)*pDivisorCur * (tU64)quotient + carry;


587  carry = product >> 32;


588 


589  tU64 difference = (tU64)*pDividendCur  (product & 0xFFFFFFFF)  borrow;


590  borrow = (difference >> 32) & 1;


591 


592  *pDividendCur = difference & 0xFFFFFFFF;


593 


594  ++pDivisorCur;


595  ++pDividendCur;


596  } while(pDivisorCur <= pFinalDivisorBlock);


597 


598  // remove all leading zero blocks from dividend


599  while (length > 0 && pDividend>m_blocks[length  1] == 0)


600  length;


601 


602  pDividend>m_length = length;


603  }


604 


605  // If the dividend is still larger than the divisor, we overshot our estimate quotient. To correct,


606  // we increment the quotient and subtract one more divisor from the dividend.


607  if ( BigInt_Compare(*pDividend, divisor) >= 0 )


608  {


609  ++quotient;


610 


611  // dividend = dividend  divisor


612  const tU32 *pDivisorCur = divisor.m_blocks;


613  tU32 *pDividendCur = pDividend>m_blocks;


614 


615  tU64 borrow = 0;


616  do


617  {


618  tU64 difference = (tU64)*pDividendCur  (tU64)*pDivisorCur  borrow;


619  borrow = (difference >> 32) & 1;


620 


621  *pDividendCur = difference & 0xFFFFFFFF;


622 


623  ++pDivisorCur;


624  ++pDividendCur;


625  } while(pDivisorCur <= pFinalDivisorBlock);


626 


627  // remove all leading zero blocks from dividend


628  while (length > 0 && pDividend>m_blocks[length  1] == 0)


629  length;


630 


631  pDividend>m_length = length;


632  }


633 


634  return quotient;


635  }


636 


637  //******************************************************************************


638  // result = result << shift


639  //******************************************************************************


640  static void BigInt_ShiftLeft(tBigInt * pResult, tU32 shift)


641  {


642  RJ_ASSERT( shift != 0 );


643 


644  tU32 shiftBlocks = shift / 32;


645  tU32 shiftBits = shift % 32;


646 


647  // process blocks high to low so that we can safely process in place


648  const tU32 * pInBlocks = pResult>m_blocks;


649  tS32 inLength = pResult>m_length;


650  RJ_ASSERT( inLength + shiftBlocks <= c_BigInt_MaxBlocks );


651 


652  // check if the shift is block aligned


653  if (shiftBits == 0)


654  {


655  // copy blocks from high to low


656  for (tU32 * pInCur = pResult>m_blocks + inLength  1, *pOutCur = pInCur + shiftBlocks;


657  pInCur >= pInBlocks;


658  pInCur, pOutCur)


659  {


660  *pOutCur = *pInCur;


661  }


662 


663  // zero the remaining low blocks


664  for ( tU32 i = 0; i < shiftBlocks; ++i)


665  pResult>m_blocks[i] = 0;


666 


667  pResult>m_length += shiftBlocks;


668  }


669  // else we need to shift partial blocks


670  else


671  {


672  tS32 inBlockIdx = inLength  1;


673  tU32 outBlockIdx = inLength + shiftBlocks;


674 


675  // set the length to hold the shifted blocks


676  RJ_ASSERT( outBlockIdx < c_BigInt_MaxBlocks );


677  pResult>m_length = outBlockIdx + 1;


678 


679  // output the initial blocks


680  const tU32 lowBitsShift = (32  shiftBits);


681  tU32 highBits = 0;


682  tU32 block = pResult>m_blocks[inBlockIdx];


683  tU32 lowBits = block >> lowBitsShift;


684  while ( inBlockIdx > 0 )


685  {


686  pResult>m_blocks[outBlockIdx] = highBits  lowBits;


687  highBits = block << shiftBits;


688 


689  inBlockIdx;


690  outBlockIdx;


691 


692  block = pResult>m_blocks[inBlockIdx];


693  lowBits = block >> lowBitsShift;


694  }


695 


696  // output the final blocks


697  RJ_ASSERT( outBlockIdx == shiftBlocks + 1 );


698  pResult>m_blocks[outBlockIdx] = highBits  lowBits;


699  pResult>m_blocks[outBlockIdx1] = block << shiftBits;


700 


701  // zero the remaining low blocks


702  for ( tU32 i = 0; i < shiftBlocks; ++i)


703  pResult>m_blocks[i] = 0;


704 


705  // check if the terminating block has no set bits


706  if (pResult>m_blocks[pResult>m_length  1] == 0)


707  pResult>m_length;


708  }


709  }


710 


711  //******************************************************************************


712  // This is an implementation the Dragon4 algorithm to convert a binary number


713  // in floating point format to a decimal number in string format. The function


714  // returns the number of digits written to the output buffer and the output is


715  // not NUL terminated.


716  //


717  // The floating point input value is (mantissa * 2^exponent).


718  //


719  // See the following papers for more information on the algorithm:


720  // "How to Print FloatingPoint Numbers Accurately"


721  // Steele and White


722  // http://kurtstephens.com/files/p372steele.pdf


723  // "Printing FloatingPoint Numbers Quickly and Accurately"


724  // Burger and Dybvig


725  // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656&rep=rep1&type=pdf


726  //******************************************************************************


727  tU32 Dragon4


728  (


729  const tU64 mantissa, // value significand


730  const tS32 exponent, // value exponent in base 2


731  const tU32 mantissaHighBitIdx, // index of the highest set mantissa bit


732  const tB hasUnequalMargins, // is the high margin twice as large as the low margin


733  const tCutoffMode cutoffMode, // how to determine output length


734  tU32 cutoffNumber, // parameter to the selected cutoffMode


735  tC8 * pOutBuffer, // buffer to output into


736  tU32 bufferSize, // maximum characters that can be printed to pOutBuffer


737  tS32 * pOutExponent // the base 10 exponent of the first digit


738  )


739  {


740  tC8 * pCurDigit = pOutBuffer;


741 


742  RJ_ASSERT( bufferSize > 0 );


743 


744  // if the mantissa is zero, the value is zero regardless of the exponent


745  if (mantissa == 0)


746  {


747  *pCurDigit = '0';


748  *pOutExponent = 0;


749  return 1;


750  }


751 


752  // compute the initial state in integral form such that


753  // value = scaledValue / scale


754  // marginLow = scaledMarginLow / scale


755  tBigInt scale; // positive scale applied to value and margin such that they can be


756  // represented as whole numbers


757  tBigInt scaledValue; // scale * mantissa


758  tBigInt scaledMarginLow; // scale * 0.5 * (distance between this floatingpoint number and its


759  // immediate lower value)


760 


761  // For normalized IEEE floating point values, each time the exponent is incremented the margin also


762  // doubles. That creates a subset of transition numbers where the high margin is twice the size of


763  // the low margin.


764  tBigInt * pScaledMarginHigh;


765  tBigInt optionalMarginHigh;


766 


767  if ( hasUnequalMargins )


768  {


769  // if we have no fractional component


770  if (exponent > 0)


771  {


772  // 1) Expand the input value by multiplying out the mantissa and exponent. This represents


773  // the input value in its whole number representation.


774  // 2) Apply an additional scale of 2 such that later comparisons against the margin values


775  // are simplified.


776  // 3) Set the margin value to the lowest mantissa bit's scale.


777 


778  // scaledValue = 2 * 2 * mantissa*2^exponent


779  scaledValue.SetU64( 4 * mantissa );


780  BigInt_ShiftLeft( &scaledValue, exponent );


781 


782  // scale = 2 * 2 * 1


783  scale.SetU32( 4 );


784 


785  // scaledMarginLow = 2 * 2^(exponent1)


786  BigInt_Pow2( &scaledMarginLow, exponent );


787 


788  // scaledMarginHigh = 2 * 2 * 2^(exponent1)


789  BigInt_Pow2( &optionalMarginHigh, exponent + 1 );


790  }


791  // else we have a fractional exponent


792  else


793  {


794  // In order to track the mantissa data as an integer, we store it as is with a large scale


795 


796  // scaledValue = 2 * 2 * mantissa


797  scaledValue.SetU64( 4 * mantissa );


798 


799  // scale = 2 * 2 * 2^(exponent)


800  BigInt_Pow2(&scale, exponent + 2 );


801 


802  // scaledMarginLow = 2 * 2^(1)


803  scaledMarginLow.SetU32( 1 );


804 


805  // scaledMarginHigh = 2 * 2 * 2^(1)


806  optionalMarginHigh.SetU32( 2 );


807  }


808 


809  // the high and low margins are different


810  pScaledMarginHigh = &optionalMarginHigh;


811  }


812  else


813  {


814  // if we have no fractional component


815  if (exponent > 0)


816  {


817  // 1) Expand the input value by multiplying out the mantissa and exponent. This represents


818  // the input value in its whole number representation.


819  // 2) Apply an additional scale of 2 such that later comparisons against the margin values


820  // are simplified.


821  // 3) Set the margin value to the lowest mantissa bit's scale.


822 


823  // scaledValue = 2 * mantissa*2^exponent


824  scaledValue.SetU64( 2 * mantissa );


825  BigInt_ShiftLeft( &scaledValue, exponent );


826 


827  // scale = 2 * 1


828  scale.SetU32( 2 );


829 


830  // scaledMarginLow = 2 * 2^(exponent1)


831  BigInt_Pow2( &scaledMarginLow, exponent );


832  }


833  // else we have a fractional exponent


834  else


835  {


836  // In order to track the mantissa data as an integer, we store it as is with a large scale


837 


838  // scaledValue = 2 * mantissa


839  scaledValue.SetU64( 2 * mantissa );


840 


841  // scale = 2 * 2^(exponent)


842  BigInt_Pow2(&scale, exponent + 1 );


843 


844  // scaledMarginLow = 2 * 2^(1)


845  scaledMarginLow.SetU32( 1 );


846  }


847 


848  // the high and low margins are equal


849  pScaledMarginHigh = &scaledMarginLow;


850  }


851 


852  // Compute an estimate for digitExponent that will be correct or undershoot by one.


853  // This optimization is based on the paper "Printing FloatingPoint Numbers Quickly and Accurately"


854  // by Burger and Dybvig http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656&rep=rep1&type=pdf


855  // We perform an additional subtraction of 0.69 to increase the frequency of a failed estimate


856  // because that lets us take a faster branch in the code. 0.69 is chosen because 0.69 + log10(2) is


857  // less than one by a reasonable epsilon that will account for any floating point error.


858  //


859  // We want to set digitExponent to floor(log10(v)) + 1


860  // v = mantissa*2^exponent


861  // log2(v) = log2(mantissa) + exponent;


862  // log10(v) = log2(v) * log10(2)


863  // floor(log2(v)) = mantissaHighBitIdx + exponent;


864  // log10(v)  log10(2) < (mantissaHighBitIdx + exponent) * log10(2) <= log10(v)


865  // log10(v) < (mantissaHighBitIdx + exponent) * log10(2) + log10(2) <= log10(v) + log10(2)


866  // floor( log10(v) ) < ceil( (mantissaHighBitIdx + exponent) * log10(2) ) <= floor( log10(v) ) + 1


867  const tF64 log10_2 = 0.30102999566398119521373889472449;


868  tS32 digitExponent = (tS32)(ceil(tF64((tS32)mantissaHighBitIdx + exponent) * log10_2  0.69));


869 


870  // if the digit exponent is smaller than the smallest desired digit for fractional cutoff,


871  // pull the digit back into legal range at which point we will round to the appropriate value.


872  // Note that while our value for digitExponent is still an estimate, this is safe because it


873  // only increases the number. This will either correct digitExponent to an accurate value or it


874  // will clamp it above the accurate value.


875  if (cutoffMode == CutoffMode_FractionLength && digitExponent <= (tS32)cutoffNumber)


876  {


877  digitExponent = (tS32)cutoffNumber + 1;


878  }


879 


880  // Divide value by 10^digitExponent.


881  if (digitExponent > 0)


882  {


883  // The exponent is positive creating a division so we multiply up the scale.


884  tBigInt temp;


885  BigInt_MultiplyPow10( &temp, scale, digitExponent );


886  scale = temp;


887  }


888  else if (digitExponent < 0)


889  {


890  // The exponent is negative creating a multiplication so we multiply up the scaledValue,


891  // scaledMarginLow and scaledMarginHigh.


892  tBigInt pow10;


893  BigInt_Pow10( &pow10, digitExponent);


894 


895  tBigInt temp;


896  BigInt_Multiply( &temp, scaledValue, pow10);


897  scaledValue = temp;


898 


899  BigInt_Multiply( &temp, scaledMarginLow, pow10);


900  scaledMarginLow = temp;


901 


902  if (pScaledMarginHigh != &scaledMarginLow)


903  BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );


904  }


905 


906  // If (value >= 1), our estimate for digitExponent was too low


907  if( BigInt_Compare(scaledValue,scale) >= 0 )


908  {


909  // The exponent estimate was incorrect.


910  // Increment the exponent and don't perform the premultiply needed


911  // for the first loop iteration.


912  digitExponent = digitExponent + 1;


913  }


914  else


915  {


916  // The exponent estimate was correct.


917  // Multiply larger by the output base to prepare for the first loop iteration.


918  BigInt_Multiply10( &scaledValue );


919  BigInt_Multiply10( &scaledMarginLow );


920  if (pScaledMarginHigh != &scaledMarginLow)


921  BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );


922  }


923 


924  // Compute the cutoff exponent (the exponent of the final digit to print).


925  // Default to the maximum size of the output buffer.


926  tS32 cutoffExponent = digitExponent  bufferSize;


927  switch(cutoffMode)


928  {


929  // print digits until we pass the accuracy margin limits or buffer size


930  case CutoffMode_Unique:


931  break;


932 


933  // print cutoffNumber of digits or until we reach the buffer size


934  case CutoffMode_TotalLength:


935  {


936  tS32 desiredCutoffExponent = digitExponent  (tS32)cutoffNumber;


937  if (desiredCutoffExponent > cutoffExponent)


938  cutoffExponent = desiredCutoffExponent;


939  }


940  break;


941 


942  // print cutoffNumber digits past the decimal point or until we reach the buffer size


943  case CutoffMode_FractionLength:


944  {


945  tS32 desiredCutoffExponent = (tS32)cutoffNumber;


946  if (desiredCutoffExponent > cutoffExponent)


947  cutoffExponent = desiredCutoffExponent;


948  }


949  break;


950  }


951 


952  // Output the exponent of the first digit we will print


953  *pOutExponent = digitExponent1;


954 


955  // In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(),


956  // we need to scale up our values such that the highest block of the denominator


957  // is greater than or equal to 8. We also need to guarantee that the numerator


958  // can never have a length greater than the denominator after each loop iteration.


959  // This requires the highest block of the denominator to be less than or equal to


960  // 429496729 which is the highest number that can be multiplied by 10 without


961  // overflowing to a new block.


962  RJ_ASSERT( scale.GetLength() > 0 );


963  tU32 hiBlock = scale.GetBlock( scale.GetLength()  1 );


964  if (hiBlock < 8  hiBlock > 429496729)


965  {


966  // Perform a bit shift on all values to get the highest block of the denominator into


967  // the range [8,429496729]. We are more likely to make accurate quotient estimations


968  // in BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator values so


969  // we shift the denominator to place the highest bit at index 27 of the highest block.


970  // This is safe because (2^28  1) = 268435455 which is less than 429496729. This means


971  // that all values with a highest bit at index 27 are within range.


972  tU32 hiBlockLog2 = LogBase2(hiBlock);


973  RJ_ASSERT(hiBlockLog2 < 3  hiBlockLog2 > 27);


974  tU32 shift = (32 + 27  hiBlockLog2) % 32;


975 


976  BigInt_ShiftLeft( &scale, shift );


977  BigInt_ShiftLeft( &scaledValue, shift);


978  BigInt_ShiftLeft( &scaledMarginLow, shift);


979  if (pScaledMarginHigh != &scaledMarginLow)


980  BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );


981  }


982 


983  // These values are used to inspect why the print loop terminated so we can properly


984  // round the final digit.


985  tB low; // did the value get within marginLow distance from zero


986  tB high; // did the value get within marginHigh distance from one


987  tU32 outputDigit; // current digit being output


988 


989  if (cutoffMode == CutoffMode_Unique)


990  {


991  // For the unique cutoff mode, we will try to print until we have reached a level of


992  // precision that uniquely distinguishes this value from its neighbors. If we run


993  // out of space in the output buffer, we terminate early.


994  for (;;)


995  {


996  digitExponent = digitExponent1;


997 


998  // divide out the scale to extract the digit


999  outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, scale);


1000  RJ_ASSERT( outputDigit < 10 );


1001 


1002  // update the high end of the value


1003  tBigInt scaledValueHigh;


1004  BigInt_Add( &scaledValueHigh, scaledValue, *pScaledMarginHigh );


1005 


1006  // stop looping if we are far enough away from our neighboring values


1007  // or if we have reached the cutoff digit


1008  low = BigInt_Compare(scaledValue, scaledMarginLow) < 0;


1009  high = BigInt_Compare(scaledValueHigh, scale) > 0;


1010  if (low  high  (digitExponent == cutoffExponent))


1011  break;


1012 


1013  // store the output digit


1014  *pCurDigit = (tC8)('0' + outputDigit);


1015  ++pCurDigit;


1016 


1017  // multiply larger by the output base


1018  BigInt_Multiply10( &scaledValue );


1019  BigInt_Multiply10( &scaledMarginLow );


1020  if (pScaledMarginHigh != &scaledMarginLow)


1021  BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );


1022  }


1023  }


1024  else


1025  {


1026  // For length based cutoff modes, we will try to print until we


1027  // have exhausted all precision (i.e. all remaining digits are zeros) or


1028  // until we reach the desired cutoff digit.


1029  low = false;


1030  high = false;


1031 


1032  for (;;)


1033  {


1034  digitExponent = digitExponent1;


1035 


1036  // divide out the scale to extract the digit


1037  outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, scale);


1038  RJ_ASSERT( outputDigit < 10 );


1039 


1040  if ( scaledValue.IsZero()  (digitExponent == cutoffExponent) )


1041  break;


1042 


1043  // store the output digit


1044  *pCurDigit = (tC8)('0' + outputDigit);


1045  ++pCurDigit;


1046 


1047  // multiply larger by the output base


1048  BigInt_Multiply10(&scaledValue);


1049  }


1050  }


1051 


1052  // round off the final digit


1053  // default to rounding down if value got too close to 0


1054  tB roundDown = low;


1055 


1056  // if it is legal to round up and down


1057  if (low == high)


1058  {


1059  // round to the closest digit by comparing value with 0.5. To do this we need to convert


1060  // the inequality to large integer values.


1061  // compare( value, 0.5 )


1062  // compare( scale * value, scale * 0.5 )


1063  // compare( 2 * scale * value, scale )


1064  BigInt_Multiply2(&scaledValue);


1065  tS32 compare = BigInt_Compare(scaledValue, scale);


1066  roundDown = compare < 0;


1067 


1068  // if we are directly in the middle, round towards the even digit (i.e. IEEE rouding rules)


1069  if (compare == 0)


1070  roundDown = (outputDigit & 1) == 0;


1071  }


1072 


1073  // print the rounded digit


1074  if (roundDown)


1075  {


1076  *pCurDigit = (tC8)('0' + outputDigit);


1077  ++pCurDigit;


1078  }


1079  else


1080  {


1081  // handle rounding up


1082  if (outputDigit == 9)


1083  {


1084  // find the first nonnine prior digit


1085  for (;;)


1086  {


1087  // if we are at the first digit


1088  if (pCurDigit == pOutBuffer)


1089  {


1090  // output 1 at the next highest exponent


1091  *pCurDigit = '1';


1092  ++pCurDigit;


1093  *pOutExponent += 1;


1094  break;


1095  }


1096 


1097  pCurDigit;


1098  if (*pCurDigit != '9')


1099  {


1100  // increment the digit


1101  *pCurDigit += 1;


1102  ++pCurDigit;


1103  break;


1104  }


1105  }


1106  }


1107  else


1108  {


1109  // values in the range [0,8] can perform a simple round up


1110  *pCurDigit = (tC8)('0' + outputDigit + 1);


1111  ++pCurDigit;


1112  }


1113  }


1114 


1115  // return the number of digits output


1116  tU32 outputLen = (tU32)(pCurDigit  pOutBuffer);


1117  RJ_ASSERT(outputLen <= bufferSize);


1118  return outputLen;


1119  }

