RetroBBS - comp.lang.c++ - Interesting to

I implemented a more precise to_chars alternative.
There are two modes, specified with the first template parameter.
In precise mode the digits are extracted from the mantissa expandedin
to a large integer string. In this mode it's possible to get exact
results until the last decimal digit.
In the less pecise mode the code is still more precise than MSVC's,
libstdc++'s and libc++'s to_char code. With that it adjusts the man-
tissa with a pow10 function that there are only a small number of
zero bits after the integer part or before the fract part. The rest
of the mantissa fits in 64 bits for the integer prefix or in 60 bits
for the fact suffix. Enough to have the remaining calculations with
fast integer arithmetics.

Here's the code:

template<bool Precise, std::integral Char>
conv_result_s<Char> gen_double( std::span<Char> range, double value,
int16_t nFractDigits, bool sign, std::chars_format fmt, bool round = false )
{ static_assert(sizeof(size_t) == 4 || sizeof(size_t) == 8, "must be 64
or 32 bit platform");
using namespace std;
if( fmt == chars_format::hex )
return gen_hexdouble( range, value, nFractDigits, sign );
assert(fmt == chars_format::fixed || fmt == chars_format::scientific ||
fmt == chars_format::general);
// absolute number of fract-digits,
// negatives are a boundary for signficant bits,
// poisitives may append further zeroes
size_t nAbsFractDigits = nFractDigits >= 0 ? nFractDigits : -nFractDigits;
constexpr uint64_t
IMPLICIT_BIT = 1ull << 52,
MANTISSA_MASK = IMPLICIT_BIT - 1;
// binary representation of value
uint64_t const bin = bit_cast<uint64_t>( value );
// signed binary representation of value
ptrdiff_t const sBin = bin;
int
exp = (bin >> 52 & 0x7FF) - 0x3FF,
nExp = exp;
// error-return for values that don't fit into range
auto tooLarge = [&]() { return conv_result_s<Char>(
errc::value_too_large, range.begin() ); };
// iterator to the current position in the output range
auto gen = range.begin();
// explicit or implicit sign ?
sign = sign || sBin < 0;
auto prependSign = [&]( auto )
{
if( sign )
*gen++ = "+-"[sBin < 0];
};
// processing Inf / (Q)NaN
auto special = [&]( auto, string_view const &svSpecial ) ->
conv_result_s<Char>
{
// range has enough space ?
if( (size_t)(range.end() - gen) < sign + svSpecial.length() )
return tooLarge();
// prepend sign
prependSign( [] {} );
// append svSpecial and return
return { errc(), copy( svSpecial.begin(), svSpecial.end(), gen ) };
};
using ndi_digit_vec = vector<ndi<uint8_t>>;
// reserve enough space for maximum number of digits
thread_local ndi_digit_vec
prefixDigits( 0x400 ),
suffixDigits( 0x400 );
// Inf / NaN exponent ?
if( nExp == 0x400 ) [[unlikely]]
if( !(bin & MANTISSA_MASK) )
// Inf
return special( [] {}, "Inf" );
else
{
// SNaN / QNaN
static char const *const nanTable[2] = { "SNaN", "QNaN" };
char const *nan = nanTable[(size_t)(bin >> 51 & 1)];
return special( [] {}, { nan, nan + 4 } );
}
// mask physical mantissa bits
uint64_t mantissa = bin & MANTISSA_MASK;
// exponent is != 0: normalized number
if( nExp > -0x3FF ) [[likely]]
// prepend implicit one
mantissa = IMPLICIT_BIT | mantissa;
else
{
if( !mantissa )
{
// +/-0.0
// additional digits for exponent
size_t expSpace = fmt == chars_format::scientific ? 3 : 0;
// enough space for digits ?
if( (size_t)(range.end() - gen) >= sign + 1 + (bool)nAbsFractDigits +
nAbsFractDigits + expSpace )
// return err
return tooLarge();
// prepend sign
prependSign( [] {} );
// leading zero, maybe separator
gen = copy_n( "0.", 1 + (bool)nAbsFractDigits, gen );
// fill fract-digits
gen = fill_n( gen, nAbsFractDigits, '0' );
// append exponent, if scientific
gen = copy_n( "e00", expSpace, gen );
return { errc(), gen };
}
// normalize mantissa
unsigned normShift = countl_zero( mantissa ) - 11;
mantissa <<= normShift;
// correct exponent according to normalization
nExp = -0x3FE - normShift;
}
auto prefDigHead = prefixDigits.end();
// set to true if suffixes shoud skip leading zeroes for scientific mode
bool skipFractZeroes = false;
auto reverse = []( auto it ) { return make_reverse_iterator( it ); };
// has integer-bits ?
constexpr unsigned BITS_PER_DIGIT = 0x6A4D3D; // ceil( log( 10 ) / log(
2 ) * 2 ^ 21 )
int oldRound, newRoundRet;
double prefix, suffix;
if constexpr( !Precise )
oldRound = fegetround(),
newRoundRet = fesetround( FE_TOWARDZERO ),
prefix = floor( value ),
suffix = value - prefix;
if( nExp >= 0 )
{
if constexpr( Precise )
{
#if defined(_MSC_VER) && !defined(__llvm__) && defined(_M_X64)
using Word = uint64_t;
using Dword = void;
constexpr unsigned WORD_DIGITS = 19;
#else
using Word = conditional_t<sizeof(size_t) == 8, uint32_t, uint16_t>;
using Dword = size_t;
constexpr Word LARGE_DIV = sizeof(size_t) == 8 ? 1'000'000'000 : 10'000;
constexpr unsigned WORD_DIGITS = sizeof(size_t) == 8 ? 9 : 4;
#endif
constexpr unsigned WORD_BITS = sizeof(Word) * 8;
size_t smallPrefix;
unsigned tzCnt;
uint64_t prefix;
if( nExp < 52 ) [[likely]]
prefix = mantissa >> 52 - nExp,
tzCnt = 0;
else
prefix = mantissa,
tzCnt = nExp - 52;
// mantissa, including logical suffix, doesn't fit into a Dword ?
if( nExp >= sizeof(size_t) * 8 ) [[unlikely]]
{
// prefix is a large prefix, do the large integer math;
// prefix bits, survives call but not thread termination
thread_local vector<ndi<Word>> prefixBits( (0x401 + WORD_BITS - 1) /
WORD_BITS );
// fill leading zero uint32_ts
auto prefixHead = fill_n( prefixBits.begin(), tzCnt / WORD_BITS, 0 );
// prefix shift inside uint32_t
unsigned shift = tzCnt % WORD_BITS;
// write the physical prefix shifted into the logical prefix
if constexpr( sizeof(Word) <= 4 )
do
*prefixHead++ = (Word)(prefix << shift),
prefix >>= WORD_BITS - shift,
shift = 0;
while( prefix );
else
{
*prefixHead++ = (uint64_t)(prefix << shift);
if( uint64_t high; shift && (high = prefix >> 64 - shift) )
*prefixHead++ = high;
}
for( ; ; )
{
// remainder of our division-row
Word remainder = 0;
// start the division from the most significant bits on
auto bitsSweep = prefixHead;
auto divWord = [&]( auto ) -> bool
{
Word div;
#if defined(_MSC_VER) && !defined(__llvm__) && defined(_M_X64)
div = _udiv128( remainder, *--bitsSweep,
10'000'000'000'000'000'000u, &remainder );
#else
// compute division result and remainder for sub-prefix
Dword counter = (Dword)remainder << WORD_BITS | *--bitsSweep;
div = (Word)(counter / LARGE_DIV);
remainder = (Word)(counter % LARGE_DIV);
#endif
*bitsSweep = div;
return div;
};
// divide the highest prefix half_size_t;
// reduce logical prefix if result was zero
prefixHead -= !divWord( [] {} );
// do the divides for the rest of the logical prefix
while( bitsSweep != prefixBits.begin() )
divWord( [] {} );
size_t nWordChunk = WORD_DIGITS;
for( ; remainder; --nWordChunk, remainder /= 10 )
*--prefDigHead = (uint8_t)(remainder % 10u);
// fill the remaining digits of the chunk with zeroes
prefDigHead = fill_n( reverse( prefDigHead ), nWordChunk, 0 ).base();
size_t nWords = prefixHead - prefixBits.begin();
if constexpr( sizeof(Word) <= 4 )
if( nWords <= 2 ) [[unlikely]]
{
// check if we have two Words
assert(prefixHead - prefixBits.begin() == 2);
// assemble small prefix
smallPrefix = (size_t)prefixBits[1] << WORD_BITS | prefixBits[0];
break;
}
else;
else
if( nWords <= 1 ) [[unlikely]]
{
// check if we have one Word
assert(prefixHead - prefixBits.begin() == 1);
// make the only remaining word the small prefix
smallPrefix = prefixBits[0];
break;
}
}
}
else
// prefix fits in size_t
smallPrefix = (size_t)(prefix << tzCnt);
// fast path for prefix mantissas that fit into a size_t
do
*--prefDigHead = (uint8_t)(smallPrefix % 10u);
while( smallPrefix /= 10u );
}
else
{
// reduce number of logical tailing zeros with manitssa
for( int prefixExp = exp; prefixExp >= 52 + 4; prefixExp =
(bit_cast<uint64_t>( prefix ) >> 52 & 0x7FF) - 0x3FF )
{
// tailing prefix digits which are guaranteed to be zero
size_t tailingGap = (exp - 52u << 21) / BITS_PER_DIGIT;
// adjust prefix
prefix = floor( prefix / pow10( (int)tailingGap ) );
// fill tailing zeroes
prefDigHead = fill_n( reverse( prefDigHead ), tailingGap, 0 ).base();
}
// binary representation of prefix
uint64_t bPrefix = bit_cast<uint64_t>( prefix );
// prefixe's exponent
int prefixExp = (bPrefix >> 52 & 0x7FF) - 0x3FF;
// prefix as integer
bPrefix = IMPLICIT_BIT | bPrefix & MANTISSA_MASK;
if( prefixExp < 52 )
bPrefix >>= 52 - prefixExp;
else
bPrefix <<= prefixExp - 52;
// fill significant prefix digits
do
*--prefDigHead = bPrefix % 10;
while( bPrefix /= 10 );
}
}
else
// prefix is 0.
*--prefDigHead = 0,
// skip leading fract zeroes if we're in scientific mode
skipFractZeroes = fmt == chars_format::scientific;
// number of prefixes
size_t nPrefixes = prefixDigits.end() - prefDigHead;
if( fmt == chars_format::general )
if( nPrefixes >= 7 )
nAbsFractDigits += nPrefixes - 1,
fmt = chars_format::scientific;
else
fmt = chars_format::fixed;
// suffix digits, written from right to left
auto suffDigTail = suffixDigits.begin();
// remaining fract digits
size_t nRemFractDigits = nAbsFractDigits + round;
int decExp = 0;
if( fmt == chars_format::scientific && nPrefixes > 1 )
{
// number of digits to move to fract
size_t nMovePrefixes = nPrefixes - 1;
// adjust decimal exponent
decExp += (int)nMovePrefixes;
// shorten number to digits to move if they're beyond the number of
needed fract digits
nMovePrefixes = nMovePrefixes <= nRemFractDigits ? nMovePrefixes :
nRemFractDigits;
// reduce number of fract digits to calculate by digits to move
nRemFractDigits -= nMovePrefixes;
// move digits from head to fract except the first one
suffDigTail = copy_n( prefDigHead + 1, nMovePrefixes, suffDigTail );
// make leading prefix digit the only prefix digit
auto onlyHeadDigit = prefixDigits.end() - 1;
*onlyHeadDigit = *prefDigHead;
prefDigHead = onlyHeadDigit;
}
// we're skipping fract zeroes in scientific mode and we need further
significant digits ?
// or: we need more suffix digits than there are guarantred zero digits ?
if( nExp < 52 && ((skipFractZeroes || nExp >= -4) && nRemFractDigits ||
nRemFractDigits > (-nExp - 1u << 21) / BITS_PER_DIGIT) )
{
if constexpr( Precise )
{
unsigned nFractBits = 52 - nExp;
uint64_t suffix60;
if( nFractBits <= 60 ) [[likely]]
suffix60 = (mantissa & (1ull << nFractBits) - 1) << 60 - nFractBits;
else
{
#if defined(__SIZEOF_INT128__)
using Word = uint64_t;
using uint128_t = unsigned __int128;
#elif defined(_MSC_VER) && defined(_M_X64) && defined(fgrswgfg)
// multiplication encapsulated in mul1e19add
using Word = uint64_t;
using uint128_t = void;
#else
using Word = uint32_t;
using uint128_t = void;
#endif
// store the words as uint64_t, do the math with 128 bits
using Dword = conditional_t<sizeof(Word) == 8, uint128_t, uint64_t>;
// have a thread-local buffer which also might fit for the next time
constexpr unsigned WORD_BITS = sizeof(Word) * 8;
// largest power of 10 that fits into a Word
constexpr Word WORD_MUL = sizeof(Word) == 8 ?
(Word)10'000'000'000'000'000'000u : 1'000'000'000;
// number of digts of a WORD_MUL remainder
constexpr unsigned WORD_DIGITS = sizeof(Word) == 8 ? 19 : 9;
// thread-local buffer for the logical mantissa to survive each call
thread_local vector<ndi<Word>> fractBits( (0x3FF + 52 + WORD_BITS -
1) / WORD_BITS + 2 );
auto
// the lower suffix bits begin here
suffixLow = fractBits.begin(),
// the zero bits beyond the significant suffix bits begin here
suffixHigh = fractBits.begin();
// put highest set bit to bit 63
uint64_t fractMantissa = mantissa << 11;
unsigned
// bit position of highest set bit
iHsb = nFractBits - 53,
// right shift of highest set bit in logical mantissa
shift = iHsb % WORD_BITS;
if constexpr( sizeof(Word) == 4 )
{
bool skip = true;
for( unsigned w = 3; w--; )
// take shift bits from the physical mantissa and
// write them to the logical mantissa
*suffixHigh = (uint32_t)(fractMantissa << 32 - shift),
// ship word if it was zero
skip = skip && !*suffixHigh,
// we may have produced physical bits, increase suffixHigh
++suffixHigh,
// increase suffixLow if physical bits zere zero'd tail
suffixLow += skip,
// next word in lower 32 bits of fractMantissa
fractMantissa >>= shift,
// shift or not by 32 bits next time
shift = 32;
}
else
{
if( shift )
// we have two words, write the lower word
*suffixHigh = fractMantissa << 64 - shift,
// increase tail if the lower word was zero
suffixLow += !*suffixHigh++;
// write upper uint64_t
*suffixHigh++ = fractMantissa >> shift;
}
auto suffixZero = suffixHigh;
// fill leading logical mantissa words with zero
suffixHigh = fill_n( suffixHigh, iHsb / WORD_BITS, 0 );
for( ; ; )
{
Word overflow = 0;
// sweep from the end to produce results and overflows to the left
auto bitsSweep = suffixLow;
auto mulWord = [&]( auto ) -> bool
{
#if defined(_MSC_VER) && defined(_M_X64) && defined(fgrswgfg)
uint64_t
lastOverflow = overflow,
times10 = _umul128( *bitsSweep, WORD_MUL, &overflow ) +
lastOverflow;
//uint64_t base = = mul1e19add( *bitsSweep, overflow, &overflow );
*bitsSweep++ = times10;
return times10;
#else
// 64 or 128 bit partitial result
Dword times10 = (Dword)*bitsSweep * WORD_MUL + overflow;
// extract overfow digit
overflow = (Word)(times10 >> WORD_BITS);
// write back lower part of result
*bitsSweep++ = (Word)times10;
return (Word)times10;
#endif
};
suffixLow += !mulWord( [] {} );
while( bitsSweep != suffixZero )
mulWord( [] {} );
// check if overflow is in bounds of WORD_DIGITS digits
assert(overflow < WORD_MUL);
// sweep didn't stop at zero word ?
if( bitsSweep == suffixHigh ) [[likely]]
{
// produce all WORD_DIGITS digits at suffDigTail
auto reverseChunk = suffDigTail + WORD_DIGITS;
// overflow has a value less than 10 ^ WORD_DIGITS,
// so we may never have a range overflow of reverseChunk
for( ; overflow; overflow /= 10 )
*--reverseChunk = (uint8_t)(overflow % 10u);
// index of first non-zero digit if we're skipping leading
// zeroes; iNz may be WORD_DIGITS if all digits were zero
size_t iNz = reverseChunk - suffDigTail;
// number of digits to move if necessary
size_t nDigits = WORD_DIGITS;
// were in scientific mode and we're currently skipping leading
zeros ?
if( !skipFractZeroes ) [[likely]]
fill( suffDigTail, reverseChunk, 0 );
else
// maintain zero-skipping only if all WORD_DIGITS were zero
skipFractZeroes = iNz == WORD_DIGITS,
// reduce number of digits to move by number of skipped zeroes
nDigits -= iNz,
// adjust decimal exponent according to number of skipped zeroes
decExp -= (int)iNz;
// if we have more digits than required remaining digits chop the
tail
nDigits = nDigits <= nRemFractDigits ? nDigits : nRemFractDigits;
// reduce number of remaining digits by digits in chunk
nRemFractDigits -= nDigits;
if( nDigits == WORD_DIGITS ) [[likely]]
// there's nothing skipped or chopped;
// set new tail beyond the currently produced digits
suffDigTail += WORD_DIGITS;
else
// we either had leading zeroes skipped ot tail-sipping;
// move the digits to suffDigTail
suffDigTail = copy_n( suffDigTail + iNz, nDigits, suffDigTail );
}
else
{
// we've no overflow beyond bitsHigh;
// set the lowest zero-word to overfloe
*suffixZero++ = overflow;
// n Digits to set to zero, bounded by WORD_DIGITS
size_t nDigits = nRemFractDigits > WORD_DIGITS ? WORD_DIGITS :
nRemFractDigits;
if( !skipFractZeroes )
// we're making physical fracts
suffDigTail = fill_n( suffDigTail, nDigits, 0 ),
// reduce numer of remaining fract digits accordingly
nRemFractDigits -= nDigits;
else
// lower decimal exponent by number of skipped digits
decExp -= (int)nDigits;

Click here to read the complete article

Subject	Author
Interesting to_chars-alternative	Bonita Montero

Single tasking: Just Say No.

devel / comp.lang.c++ / Interesting to_chars-alternative