Why Is a Round-Trip Conversion via a String Not Safe for a Double

Why is a round-trip conversion via a string not safe for a double?

I found the bug.

.NET does the following in clr\src\vm\comnumber.cpp:

DoubleToNumber(value, DOUBLE_PRECISION, &number);

if (number.scale == (int) SCALE_NAN) {
gc.refRetVal = gc.numfmt->sNaN;
goto lExit;
}

if (number.scale == SCALE_INF) {
gc.refRetVal = (number.sign? gc.numfmt->sNegativeInfinity: gc.numfmt->sPositiveInfinity);
goto lExit;
}

NumberToDouble(&number, &dTest);

if (dTest == value) {
gc.refRetVal = NumberToString(&number, 'G', DOUBLE_PRECISION, gc.numfmt);
goto lExit;
}

DoubleToNumber(value, 17, &number);

DoubleToNumber is pretty simple -- it just calls _ecvt, which is in the C runtime:

void DoubleToNumber(double value, int precision, NUMBER* number)
{
WRAPPER_CONTRACT
_ASSERTE(number != NULL);

number->precision = precision;
if (((FPDOUBLE*)&value)->exp == 0x7FF) {
number->scale = (((FPDOUBLE*)&value)->mantLo || ((FPDOUBLE*)&value)->mantHi) ? SCALE_NAN: SCALE_INF;
number->sign = ((FPDOUBLE*)&value)->sign;
number->digits[0] = 0;
}
else {
char* src = _ecvt(value, precision, &number->scale, &number->sign);
wchar* dst = number->digits;
if (*src != '0') {
while (*src) *dst++ = *src++;
}
*dst = 0;
}
}

It turns out that _ecvt returns the string 845512408225570.

Notice the trailing zero? It turns out that makes all the difference!

When the zero is present, the result actually parses back to 0.84551240822557006, which is your original number -- so it compares equal, and hence only 15 digits are returned.

However, if I truncate the string at that zero to 84551240822557, then I get back 0.84551240822556994, which is not your original number, and hence it would return 17 digits.

Proof: run the following 64-bit code (most of which I extracted from the Microsoft Shared Source CLI 2.0) in your debugger and examine v at the end of main:

#include <stdlib.h>
#include <string.h>
#include <math.h>

#define min(a, b) (((a) < (b)) ? (a) : (b))

struct NUMBER {
int precision;
int scale;
int sign;
wchar_t digits[20 + 1];
NUMBER() : precision(0), scale(0), sign(0) {}
};

#define I64(x) x##LL
static const unsigned long long rgval64Power10[] = {
// powers of 10
/*1*/ I64(0xa000000000000000),
/*2*/ I64(0xc800000000000000),
/*3*/ I64(0xfa00000000000000),
/*4*/ I64(0x9c40000000000000),
/*5*/ I64(0xc350000000000000),
/*6*/ I64(0xf424000000000000),
/*7*/ I64(0x9896800000000000),
/*8*/ I64(0xbebc200000000000),
/*9*/ I64(0xee6b280000000000),
/*10*/ I64(0x9502f90000000000),
/*11*/ I64(0xba43b74000000000),
/*12*/ I64(0xe8d4a51000000000),
/*13*/ I64(0x9184e72a00000000),
/*14*/ I64(0xb5e620f480000000),
/*15*/ I64(0xe35fa931a0000000),

// powers of 0.1
/*1*/ I64(0xcccccccccccccccd),
/*2*/ I64(0xa3d70a3d70a3d70b),
/*3*/ I64(0x83126e978d4fdf3c),
/*4*/ I64(0xd1b71758e219652e),
/*5*/ I64(0xa7c5ac471b478425),
/*6*/ I64(0x8637bd05af6c69b7),
/*7*/ I64(0xd6bf94d5e57a42be),
/*8*/ I64(0xabcc77118461ceff),
/*9*/ I64(0x89705f4136b4a599),
/*10*/ I64(0xdbe6fecebdedd5c2),
/*11*/ I64(0xafebff0bcb24ab02),
/*12*/ I64(0x8cbccc096f5088cf),
/*13*/ I64(0xe12e13424bb40e18),
/*14*/ I64(0xb424dc35095cd813),
/*15*/ I64(0x901d7cf73ab0acdc),
};

static const signed char rgexp64Power10[] = {
// exponents for both powers of 10 and 0.1
/*1*/ 4,
/*2*/ 7,
/*3*/ 10,
/*4*/ 14,
/*5*/ 17,
/*6*/ 20,
/*7*/ 24,
/*8*/ 27,
/*9*/ 30,
/*10*/ 34,
/*11*/ 37,
/*12*/ 40,
/*13*/ 44,
/*14*/ 47,
/*15*/ 50,
};

static const unsigned long long rgval64Power10By16[] = {
// powers of 10^16
/*1*/ I64(0x8e1bc9bf04000000),
/*2*/ I64(0x9dc5ada82b70b59e),
/*3*/ I64(0xaf298d050e4395d6),
/*4*/ I64(0xc2781f49ffcfa6d4),
/*5*/ I64(0xd7e77a8f87daf7fa),
/*6*/ I64(0xefb3ab16c59b14a0),
/*7*/ I64(0x850fadc09923329c),
/*8*/ I64(0x93ba47c980e98cde),
/*9*/ I64(0xa402b9c5a8d3a6e6),
/*10*/ I64(0xb616a12b7fe617a8),
/*11*/ I64(0xca28a291859bbf90),
/*12*/ I64(0xe070f78d39275566),
/*13*/ I64(0xf92e0c3537826140),
/*14*/ I64(0x8a5296ffe33cc92c),
/*15*/ I64(0x9991a6f3d6bf1762),
/*16*/ I64(0xaa7eebfb9df9de8a),
/*17*/ I64(0xbd49d14aa79dbc7e),
/*18*/ I64(0xd226fc195c6a2f88),
/*19*/ I64(0xe950df20247c83f8),
/*20*/ I64(0x81842f29f2cce373),
/*21*/ I64(0x8fcac257558ee4e2),

// powers of 0.1^16
/*1*/ I64(0xe69594bec44de160),
/*2*/ I64(0xcfb11ead453994c3),
/*3*/ I64(0xbb127c53b17ec165),
/*4*/ I64(0xa87fea27a539e9b3),
/*5*/ I64(0x97c560ba6b0919b5),
/*6*/ I64(0x88b402f7fd7553ab),
/*7*/ I64(0xf64335bcf065d3a0),
/*8*/ I64(0xddd0467c64bce4c4),
/*9*/ I64(0xc7caba6e7c5382ed),
/*10*/ I64(0xb3f4e093db73a0b7),
/*11*/ I64(0xa21727db38cb0053),
/*12*/ I64(0x91ff83775423cc29),
/*13*/ I64(0x8380dea93da4bc82),
/*14*/ I64(0xece53cec4a314f00),
/*15*/ I64(0xd5605fcdcf32e217),
/*16*/ I64(0xc0314325637a1978),
/*17*/ I64(0xad1c8eab5ee43ba2),
/*18*/ I64(0x9becce62836ac5b0),
/*19*/ I64(0x8c71dcd9ba0b495c),
/*20*/ I64(0xfd00b89747823938),
/*21*/ I64(0xe3e27a444d8d991a),
};

static const signed short rgexp64Power10By16[] = {
// exponents for both powers of 10^16 and 0.1^16
/*1*/ 54,
/*2*/ 107,
/*3*/ 160,
/*4*/ 213,
/*5*/ 266,
/*6*/ 319,
/*7*/ 373,
/*8*/ 426,
/*9*/ 479,
/*10*/ 532,
/*11*/ 585,
/*12*/ 638,
/*13*/ 691,
/*14*/ 745,
/*15*/ 798,
/*16*/ 851,
/*17*/ 904,
/*18*/ 957,
/*19*/ 1010,
/*20*/ 1064,
/*21*/ 1117,
};

static unsigned DigitsToInt(wchar_t* p, int count)
{
wchar_t* end = p + count;
unsigned res = *p - '0';
for ( p = p + 1; p < end; p++) {
res = 10 * res + *p - '0';
}
return res;
}
#define Mul32x32To64(a, b) ((unsigned long long)((unsigned long)(a)) * (unsigned long long)((unsigned long)(b)))

static unsigned long long Mul64Lossy(unsigned long long a, unsigned long long b, int* pexp)
{
// it's ok to losse some precision here - Mul64 will be called
// at most twice during the conversion, so the error won't propagate
// to any of the 53 significant bits of the result
unsigned long long val = Mul32x32To64(a >> 32, b >> 32) +
(Mul32x32To64(a >> 32, b) >> 32) +
(Mul32x32To64(a, b >> 32) >> 32);

// normalize
if ((val & I64(0x8000000000000000)) == 0) { val <<= 1; *pexp -= 1; }

return val;
}

void NumberToDouble(NUMBER* number, double* value)
{
unsigned long long val;
int exp;
wchar_t* src = number->digits;
int remaining;
int total;
int count;
int scale;
int absscale;
int index;

total = (int)wcslen(src);
remaining = total;

// skip the leading zeros
while (*src == '0') {
remaining--;
src++;
}

if (remaining == 0) {
*value = 0;
goto done;
}

count = min(remaining, 9);
remaining -= count;
val = DigitsToInt(src, count);

if (remaining > 0) {
count = min(remaining, 9);
remaining -= count;

// get the denormalized power of 10
unsigned long mult = (unsigned long)(rgval64Power10[count-1] >> (64 - rgexp64Power10[count-1]));
val = Mul32x32To64(val, mult) + DigitsToInt(src+9, count);
}

scale = number->scale - (total - remaining);
absscale = abs(scale);
if (absscale >= 22 * 16) {
// overflow / underflow
*(unsigned long long*)value = (scale > 0) ? I64(0x7FF0000000000000) : 0;
goto done;
}

exp = 64;

// normalize the mantisa
if ((val & I64(0xFFFFFFFF00000000)) == 0) { val <<= 32; exp -= 32; }
if ((val & I64(0xFFFF000000000000)) == 0) { val <<= 16; exp -= 16; }
if ((val & I64(0xFF00000000000000)) == 0) { val <<= 8; exp -= 8; }
if ((val & I64(0xF000000000000000)) == 0) { val <<= 4; exp -= 4; }
if ((val & I64(0xC000000000000000)) == 0) { val <<= 2; exp -= 2; }
if ((val & I64(0x8000000000000000)) == 0) { val <<= 1; exp -= 1; }

index = absscale & 15;
if (index) {
int multexp = rgexp64Power10[index-1];
// the exponents are shared between the inverted and regular table
exp += (scale < 0) ? (-multexp + 1) : multexp;

unsigned long long multval = rgval64Power10[index + ((scale < 0) ? 15 : 0) - 1];
val = Mul64Lossy(val, multval, &exp);
}

index = absscale >> 4;
if (index) {
int multexp = rgexp64Power10By16[index-1];
// the exponents are shared between the inverted and regular table
exp += (scale < 0) ? (-multexp + 1) : multexp;

unsigned long long multval = rgval64Power10By16[index + ((scale < 0) ? 21 : 0) - 1];
val = Mul64Lossy(val, multval, &exp);
}

// round & scale down
if ((unsigned long)val & (1 << 10))
{
// IEEE round to even
unsigned long long tmp = val + ((1 << 10) - 1) + (((unsigned long)val >> 11) & 1);
if (tmp < val) {
// overflow
tmp = (tmp >> 1) | I64(0x8000000000000000);
exp += 1;
}
val = tmp;
}
val >>= 11;

exp += 0x3FE;

if (exp <= 0) {
if (exp <= -52) {
// underflow
val = 0;
}
else {
// denormalized
val >>= (-exp+1);
}
}
else
if (exp >= 0x7FF) {
// overflow
val = I64(0x7FF0000000000000);
}
else {
val = ((unsigned long long)exp << 52) + (val & I64(0x000FFFFFFFFFFFFF));
}

*(unsigned long long*)value = val;

done:
if (number->sign) *(unsigned long long*)value |= I64(0x8000000000000000);
}

int main()
{
NUMBER number;
number.precision = 15;
double v = 0.84551240822557006;
char *src = _ecvt(v, number.precision, &number.scale, &number.sign);
int truncate = 0; // change to 1 if you want to truncate
if (truncate)
{
while (*src && src[strlen(src) - 1] == '0')
{
src[strlen(src) - 1] = 0;
}
}
wchar_t* dst = number.digits;
if (*src != '0') {
while (*src) *dst++ = *src++;
}
*dst++ = 0;
NumberToDouble(&number, &v);
return 0;
}

How to know if a double string is round-trip safe?

Use the R format specifier to convert the double to a string:

myDouble.ToString("R")

See The Round-trip ("R") Format Specifier on MSDN.

The round-trip ("R") format specifier guarantees that a numeric value that is converted to a string will be parsed back into the same numeric value. This format is supported only for the Single, Double, and BigInteger types.

(emphasis mine)

Produce a round-trip string for a decimal type

The default output format for decimal round-trips, so you don't have to do anything special. It is just like int in that sense.

64 vs 32 bit double parsing issue with the round-trip format specifier R

I found this interesting point from Microsoft which might explain the issue

In some cases, Double values formatted with the "R" standard numeric
format string do not successfully round-trip if compiled using the
/platform:x64 or /platform:anycpu switches and run on 64-bit systems.

To work around this problem, you can format Double values by using the
"G17" standard numeric format string. The following example uses the
"R" format string with a Double value that does not round-trip
successfully, and also uses the "G17" format string to successfully
round-trip the original value.

The comment and an example can be found here: https://msdn.microsoft.com/en-us/library/kfsatb94(v=vs.110).aspx

Why Is ToString() Rounding My Double Value?

By default the .ToString() method of Double returns 15 digits of precision. If you want the full 17 digits that the double value holds internally, you need to pass the "G17" format specifier to the method.

String s = value.ToString("G17");

Sourced from the MSDN docs:

By default, the return value only
contains 15 digits of precision
although a maximum of 17 digits is
maintained internally. If the value of
this instance has greater than 15
digits, ToString returns
PositiveInfinitySymbol or
NegativeInfinitySymbol instead of the
expected number. If you require more
precision, specify format with the
"G17" format specification, which
always returns 17 digits of precision,
or "R", which returns 15 digits if the
number can be represented with that
precision or 17 digits if the number
can only be represented with maximum
precision.



Related Topics



Leave a reply



Submit