fix: prettier floating point numbers

Add dragonbox to compute the required precision to print floating point
numbers. This avoids uglification of floating point numbers that
happen by default via std::stringstream.

Numbers like 34.34 used to be converted to '34.340000000000003' as strings.
With this version they will be converted to the string '34.34'.

This fixes issue https://github.com/jbeder/yaml-cpp/issues/1289
This commit is contained in:
Simon Gene Gottlieb
2024-07-06 08:00:10 +02:00
committed by Jesse Beder
parent 3d2888cc8a
commit bd070a7b76
7 changed files with 4656 additions and 4 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -21,6 +21,7 @@
#include "yaml-cpp/emittermanip.h"
#include "yaml-cpp/null.h"
#include "yaml-cpp/ostream_wrapper.h"
#include "yaml-cpp/fp_to_string.h"
namespace YAML {
class Binary;
@@ -180,7 +181,7 @@ inline Emitter& Emitter::WriteStreamable(T value) {
}
if (!special) {
stream << value;
stream << fp_to_string(value, stream.precision());
}
m_stream << stream.str();

View File

@@ -0,0 +1,207 @@
// SPDX-FileCopyrightText: 2024 Simon Gene Gottlieb
// SPDX-License-Identifier: MIT
#ifndef YAML_H_FP_TO_STRING
#define YAML_H_FP_TO_STRING
#include "contrib/dragonbox.h"
#include <array>
#include <cassert>
#include <cmath>
#include <sstream>
#include <tuple>
namespace YAML {
namespace detail {
namespace fp_formatting {
/**
* Converts a integer into its ASCII digits.
*
* @param begin/end - a buffer, must be at least 20bytes long
* @param value - input value
* @param width - minimum number of digits, fill with '0' to the left. Must be equal or smaller than the buffer size.
* @return - number of digits filled into the buffer.
*
* Example:
* std::array<char, 20> buffer;
* auto ct = convertToChars(buffer.begin(), buffer.end(), 23, 3);
* assert(ct = 3);
* assert(buffer[0] == '0');
* assert(buffer[1] == '2');
* assert(buffer[2] == '3');
*/
inline auto convertToChars(char* begin, char* end, size_t value, int width=1) -> int {
assert(width >= 1);
assert(end >= begin); // end must be after begin
assert(end-begin >= width); // Buffer must be large enough
assert(end-begin >= 20); // 2^64 has 20digits, so at least 20 digits must be available
// count number of digits, and fill digits array accordingly
int digits_ct{};
while (value > 0) {
char c = value % 10 + '0';
value = value / 10;
digits_ct += 1;
*(end-digits_ct) = c;
}
while(digits_ct < width) {
assert(digits_ct < 64);
digits_ct += 1;
*(end-digits_ct) = '0';
}
// move data to the front of the array
std::memmove(begin, end-digits_ct, digits_ct);
return digits_ct;
}
/**
* Converts a float or double to a string.
*
* converts a value 'v' to a string. Uses dragonbox for formatting.
*/
template <typename T>
auto fp_to_string(T v, int precision = 0) -> std::string {
// assert(precision > 0);
// hardcoded constant, at which exponent should switch to a scientific notation
int const lowerExponentThreshold = -5;
int const upperExponentThreshold = (precision==0)?6:precision;
if (precision == 0) {
precision = 6;
}
// dragonbox/to_decimal does not handle value 0, inf, NaN
if (v == 0 || std::isinf(v) || std::isnan(v)) {
std::stringstream ss;
ss << v;
return ss.str();
}
auto r = jkj::dragonbox::to_decimal(v);
auto digits = std::array<char, 20>{}; // max digits of size_t is 20.
auto digits_ct = convertToChars(digits.data(), digits.data() + digits.size(), r.significand);
// check if requested precision is lower than
// required digits for exact representation
if (digits_ct > precision) {
auto diff = digits_ct - precision;
r.exponent += diff;
digits_ct = precision;
// round numbers if required
if (digits[digits_ct] >= '5') {
int i{digits_ct-1};
digits[i] += 1;
while (digits[i] == '9'+1) {
digits_ct -= 1;
r.exponent += 1;
if (i > 0) {
digits[i-1] += 1;
i -= 1;
} else {
digits_ct = 1;
digits[0] = '1';
break;
}
}
}
}
std::array<char, 28> output_buffer; // max digits of size_t plus sign, a dot and 2 letters for 'e+' or 'e-' and 4 letters for the exponent
auto output_ptr = &output_buffer[0];
// print '-' symbol for negative numbers
if (r.is_negative) {
*(output_ptr++) = '-';
}
// exponent if only a single non-zero digit is before the decimal point
int const exponent = r.exponent + digits_ct - 1;
// case 1: scientific notation
if (exponent >= upperExponentThreshold || exponent <= lowerExponentThreshold) {
// print first digit
*(output_ptr++) = digits[0];
// print digits after decimal point
if (digits_ct > 1) {
*(output_ptr++) = '.';
// print significant numbers after decimal point
for (int i{1}; i < digits_ct; ++i) {
*(output_ptr++) = digits[i];
}
}
*(output_ptr++) = 'e';
*(output_ptr++) = (exponent>=0)?'+':'-';
auto exp_digits = std::array<char, 20>{};
auto exp_digits_ct = convertToChars(exp_digits.data(), exp_digits.data() + exp_digits.size(), std::abs(exponent), /*.precision=*/ 2);
for (int i{0}; i < exp_digits_ct; ++i) {
*(output_ptr++) = exp_digits[i];
}
// case 2: default notation
} else {
auto const digits_end = digits.begin() + digits_ct;
auto digits_iter = digits.begin();
// print digits before point
int const before_decimal_digits = digits_ct + r.exponent;
if (before_decimal_digits > 0) {
// print digits before point
for (int i{0}; i < std::min(before_decimal_digits, digits_ct); ++i) {
*(output_ptr++) = *(digits_iter++);
}
// print trailing zeros before point
for (int i{0}; i < before_decimal_digits - digits_ct; ++i) {
*(output_ptr++) = '0';
}
// print 0 before point if none where printed before
} else {
*(output_ptr++) = '0';
}
if (digits_iter != digits_end) {
*(output_ptr++) = '.';
// print 0 afer decimal point, to fill until first digits
int const after_decimal_zeros = -digits_ct - r.exponent;
for (int i{0}; i < after_decimal_zeros; ++i) {
*(output_ptr++) = '0';
}
// print significant numbers after decimal point
for (;digits_iter < digits_end; ++digits_iter) {
*(output_ptr++) = *digits_iter;
}
}
}
*output_ptr = '\0';
return std::string{&output_buffer[0], output_ptr};
}
}
}
inline auto fp_to_string(float v, size_t precision = 0) -> std::string {
return detail::fp_formatting::fp_to_string(v, precision);
}
inline auto fp_to_string(double v, size_t precision = 0) -> std::string {
return detail::fp_formatting::fp_to_string(v, precision);
}
/**
* dragonbox only works for floats/doubles not long double
*/
inline auto fp_to_string(long double v, size_t precision = std::numeric_limits<long double>::max_digits10) -> std::string {
std::stringstream ss;
ss.precision(precision);
ss.imbue(std::locale("C"));
ss << v;
return ss.str();
}
}
#endif

View File

@@ -28,6 +28,7 @@
#include "yaml-cpp/node/node.h"
#include "yaml-cpp/node/type.h"
#include "yaml-cpp/null.h"
#include "yaml-cpp/fp_to_string.h"
namespace YAML {
@@ -129,7 +130,7 @@ inner_encode(const T& rhs, std::stringstream& stream){
stream << ".inf";
}
} else {
stream << rhs;
stream << fp_to_string(rhs, stream.precision());
}
}

242
test/fp_to_string_test.cpp Normal file
View File

@@ -0,0 +1,242 @@
#include "yaml-cpp/fp_to_string.h"
#include "gtest/gtest.h"
namespace YAML {
namespace {
/**
* Helper function, that converts double to string as std::stringstream would do
*/
template <typename T>
static std::string convert_with_stringstream(T v, size_t precision = 0) {
std::stringstream ss;
if (precision > 0) {
ss << std::setprecision(precision);
}
ss << v;
return ss.str();
}
// Caution: Test involving 'convert_with_stringstream' are based on std::stringstream
// having certain printing behavior, if these changes, the unit test might fail.
// This is not a fault of fp_to_string just a weakness of the way these
// tests are constructed
TEST(FpToStringTest, conversion_double) {
// Issue motivating fp_to_string function,
// https://github.com/jbeder/yaml-cpp/issues/1289
// Original problem at hand:
EXPECT_EQ("34.34", fp_to_string(34.34));
EXPECT_EQ("56.56", fp_to_string(56.56));
EXPECT_EQ("12.12", fp_to_string(12.12));
EXPECT_EQ("78.78", fp_to_string(78.78));
// Special challenge with rounding
// https://github.com/jbeder/yaml-cpp/issues/1289#issuecomment-2211705536
EXPECT_EQ("1.54743e+26", fp_to_string(1.5474250491e+26f));
EXPECT_EQ(convert_with_stringstream(1.5474250491e+26f), fp_to_string(1.5474250491e+26f));
EXPECT_EQ("1.5474251e+26", fp_to_string(1.5474250491e+26f, 8));
// prints the same way as std::stringstream
EXPECT_EQ(convert_with_stringstream(1.), fp_to_string(1.));
EXPECT_EQ(convert_with_stringstream(1e0), fp_to_string(1e0));
EXPECT_EQ(convert_with_stringstream(1e1), fp_to_string(1e1));
EXPECT_EQ(convert_with_stringstream(1e2), fp_to_string(1e2));
EXPECT_EQ(convert_with_stringstream(1e3), fp_to_string(1e3));
EXPECT_EQ(convert_with_stringstream(1e4), fp_to_string(1e4));
EXPECT_EQ(convert_with_stringstream(1e5), fp_to_string(1e5));
EXPECT_EQ(convert_with_stringstream(1e6), fp_to_string(1e6));
EXPECT_EQ(convert_with_stringstream(1e7), fp_to_string(1e7));
EXPECT_EQ(convert_with_stringstream(1e8), fp_to_string(1e8));
EXPECT_EQ(convert_with_stringstream(1e9), fp_to_string(1e9));
// Print by default values below 1e6 without scientific notation
EXPECT_EQ("1", fp_to_string(1.));
EXPECT_EQ("1", fp_to_string(1e0));
EXPECT_EQ("10", fp_to_string(1e1));
EXPECT_EQ("100", fp_to_string(1e2));
EXPECT_EQ("1000", fp_to_string(1e3));
EXPECT_EQ("10000", fp_to_string(1e4));
EXPECT_EQ("100000", fp_to_string(1e5));
EXPECT_EQ("1e+06", fp_to_string(1e6));
EXPECT_EQ("1e+07", fp_to_string(1e7));
EXPECT_EQ("1e+08", fp_to_string(1e8));
EXPECT_EQ("1e+09", fp_to_string(1e9));
// prints the same way as std::stringstream
EXPECT_EQ(convert_with_stringstream(1.), fp_to_string(1.));
EXPECT_EQ(convert_with_stringstream(1e-0), fp_to_string(1e-0));
EXPECT_EQ(convert_with_stringstream(1e-1), fp_to_string(1e-1));
EXPECT_EQ(convert_with_stringstream(1e-2), fp_to_string(1e-2));
EXPECT_EQ(convert_with_stringstream(1e-3), fp_to_string(1e-3));
EXPECT_EQ(convert_with_stringstream(1e-4), fp_to_string(1e-4));
EXPECT_EQ(convert_with_stringstream(1e-5), fp_to_string(1e-5));
EXPECT_EQ(convert_with_stringstream(1e-6), fp_to_string(1e-6));
EXPECT_EQ(convert_with_stringstream(1e-7), fp_to_string(1e-7));
EXPECT_EQ(convert_with_stringstream(1e-8), fp_to_string(1e-8));
EXPECT_EQ(convert_with_stringstream(1e-9), fp_to_string(1e-9));
// Print by default values above 1e-5 without scientific notation
EXPECT_EQ("1", fp_to_string(1.));
EXPECT_EQ("1", fp_to_string(1e-0));
EXPECT_EQ("0.1", fp_to_string(1e-1));
EXPECT_EQ("0.01", fp_to_string(1e-2));
EXPECT_EQ("0.001", fp_to_string(1e-3));
EXPECT_EQ("0.0001", fp_to_string(1e-4));
EXPECT_EQ("1e-05", fp_to_string(1e-5));
EXPECT_EQ("1e-06", fp_to_string(1e-6));
EXPECT_EQ("1e-07", fp_to_string(1e-7));
EXPECT_EQ("1e-08", fp_to_string(1e-8));
EXPECT_EQ("1e-09", fp_to_string(1e-9));
// changing precision has the same effect as std::stringstream
EXPECT_EQ(convert_with_stringstream(123., 1), fp_to_string(123., 1));
EXPECT_EQ(convert_with_stringstream(1234567., 7), fp_to_string(1234567., 7));
EXPECT_EQ(convert_with_stringstream(12345.67, 7), fp_to_string(12345.67, 7));
EXPECT_EQ(convert_with_stringstream(1234567e-9, 7), fp_to_string(1234567e-9, 7));
EXPECT_EQ(convert_with_stringstream(1234567e-9, 1), fp_to_string(1234567e-9, 1));
// known example that is difficult to round
EXPECT_EQ("1", fp_to_string(0.9999, 2));
EXPECT_EQ("-1", fp_to_string(-0.9999, 2));
// some more random tests
EXPECT_EQ("1.25", fp_to_string(1.25));
EXPECT_EQ("34.34", fp_to_string(34.34));
EXPECT_EQ("1e+20", fp_to_string(1e+20));
EXPECT_EQ("1.1e+20", fp_to_string(1.1e+20));
EXPECT_EQ("1e-20", fp_to_string(1e-20));
EXPECT_EQ("1.1e-20", fp_to_string(1.1e-20));
EXPECT_EQ("1e-20", fp_to_string(0.1e-19));
EXPECT_EQ("1.1e-20", fp_to_string(0.11e-19));
EXPECT_EQ("19", fp_to_string(18.9, 2));
EXPECT_EQ("20", fp_to_string(19.9, 2));
EXPECT_EQ("2e+01", fp_to_string(19.9, 1));
EXPECT_EQ("1.2e-05", fp_to_string(1.234e-5, 2));
EXPECT_EQ("1.3e-05", fp_to_string(1.299e-5, 2));
EXPECT_EQ("-1", fp_to_string(-1.));
EXPECT_EQ("-1.25", fp_to_string(-1.25));
EXPECT_EQ("-34.34", fp_to_string(-34.34));
EXPECT_EQ("-1e+20", fp_to_string(-1e+20));
EXPECT_EQ("-1.1e+20", fp_to_string(-1.1e+20));
EXPECT_EQ("-1e-20", fp_to_string(-1e-20));
EXPECT_EQ("-1.1e-20", fp_to_string(-1.1e-20));
EXPECT_EQ("-1e-20", fp_to_string(-0.1e-19));
EXPECT_EQ("-1.1e-20", fp_to_string(-0.11e-19));
EXPECT_EQ("-19", fp_to_string(-18.9, 2));
EXPECT_EQ("-20", fp_to_string(-19.9, 2));
EXPECT_EQ("-2e+01", fp_to_string(-19.9, 1));
EXPECT_EQ("-1.2e-05", fp_to_string(-1.234e-5, 2));
EXPECT_EQ("-1.3e-05", fp_to_string(-1.299e-5, 2));
}
TEST(FpToStringTest, conversion_float) {
// Issue motivating fp_to_string function,
// https://github.com/jbeder/yaml-cpp/issues/1289
// Original problem at hand:
EXPECT_EQ("34.34", fp_to_string(34.34f));
EXPECT_EQ("56.56", fp_to_string(56.56f));
EXPECT_EQ("12.12", fp_to_string(12.12f));
EXPECT_EQ("78.78", fp_to_string(78.78f));
// prints the same way as std::stringstream
EXPECT_EQ(convert_with_stringstream(1.f), fp_to_string(1.f));
EXPECT_EQ(convert_with_stringstream(1e0f), fp_to_string(1e0f));
EXPECT_EQ(convert_with_stringstream(1e1f), fp_to_string(1e1f));
EXPECT_EQ(convert_with_stringstream(1e2f), fp_to_string(1e2f));
EXPECT_EQ(convert_with_stringstream(1e3f), fp_to_string(1e3f));
EXPECT_EQ(convert_with_stringstream(1e4f), fp_to_string(1e4f));
EXPECT_EQ(convert_with_stringstream(1e5f), fp_to_string(1e5f));
EXPECT_EQ(convert_with_stringstream(1e6f), fp_to_string(1e6f));
EXPECT_EQ(convert_with_stringstream(1e7f), fp_to_string(1e7f));
EXPECT_EQ(convert_with_stringstream(1e8f), fp_to_string(1e8f));
EXPECT_EQ(convert_with_stringstream(1e9f), fp_to_string(1e9f));
// Print by default values below 1e6 without scientific notation
EXPECT_EQ("1", fp_to_string(1.f));
EXPECT_EQ("1", fp_to_string(1e0f));
EXPECT_EQ("10", fp_to_string(1e1f));
EXPECT_EQ("100", fp_to_string(1e2f));
EXPECT_EQ("1000", fp_to_string(1e3f));
EXPECT_EQ("10000", fp_to_string(1e4f));
EXPECT_EQ("100000", fp_to_string(1e5f));
EXPECT_EQ("1e+06", fp_to_string(1e6f));
EXPECT_EQ("1e+07", fp_to_string(1e7f));
EXPECT_EQ("1e+08", fp_to_string(1e8f));
EXPECT_EQ("1e+09", fp_to_string(1e9f));
// prints the same way as std::stringstream
EXPECT_EQ(convert_with_stringstream(1.f), fp_to_string(1.f));
EXPECT_EQ(convert_with_stringstream(1e-0f), fp_to_string(1e-0f));
EXPECT_EQ(convert_with_stringstream(1e-1f), fp_to_string(1e-1f));
EXPECT_EQ(convert_with_stringstream(1e-2f), fp_to_string(1e-2f));
EXPECT_EQ(convert_with_stringstream(1e-3f), fp_to_string(1e-3f));
EXPECT_EQ(convert_with_stringstream(1e-4f), fp_to_string(1e-4f));
EXPECT_EQ(convert_with_stringstream(1e-5f), fp_to_string(1e-5f));
EXPECT_EQ(convert_with_stringstream(1e-6f), fp_to_string(1e-6f));
EXPECT_EQ(convert_with_stringstream(1e-7f), fp_to_string(1e-7f));
EXPECT_EQ(convert_with_stringstream(1e-8f), fp_to_string(1e-8f));
EXPECT_EQ(convert_with_stringstream(1e-9f), fp_to_string(1e-9f));
// Print by default values above 1e-5 without scientific notation
EXPECT_EQ("1", fp_to_string(1.f));
EXPECT_EQ("1", fp_to_string(1e-0f));
EXPECT_EQ("0.1", fp_to_string(1e-1f));
EXPECT_EQ("0.01", fp_to_string(1e-2f));
EXPECT_EQ("0.001", fp_to_string(1e-3f));
EXPECT_EQ("0.0001", fp_to_string(1e-4f));
EXPECT_EQ("1e-05", fp_to_string(1e-5f));
EXPECT_EQ("1e-06", fp_to_string(1e-6f));
EXPECT_EQ("1e-07", fp_to_string(1e-7f));
EXPECT_EQ("1e-08", fp_to_string(1e-8f));
EXPECT_EQ("1e-09", fp_to_string(1e-9f));
// changing precision has the same effect as std::stringstream
EXPECT_EQ(convert_with_stringstream(123.f, 1), fp_to_string(123.f, 1));
EXPECT_EQ(convert_with_stringstream(1234567.f, 7), fp_to_string(1234567.f, 7));
EXPECT_EQ(convert_with_stringstream(12345.67f, 7), fp_to_string(12345.67f, 7));
EXPECT_EQ(convert_with_stringstream(1234567e-9f, 7), fp_to_string(1234567e-9f, 7));
EXPECT_EQ(convert_with_stringstream(1234567e-9f, 1), fp_to_string(1234567e-9f, 1));
// known example that is difficult to round
EXPECT_EQ("1", fp_to_string(0.9999f, 2));
EXPECT_EQ("-1", fp_to_string(-0.9999f, 2));
// some more random tests
EXPECT_EQ("1.25", fp_to_string(1.25f));
EXPECT_EQ("34.34", fp_to_string(34.34f));
EXPECT_EQ("1e+20", fp_to_string(1e+20f));
EXPECT_EQ("1.1e+20", fp_to_string(1.1e+20f));
EXPECT_EQ("1e-20", fp_to_string(1e-20f));
EXPECT_EQ("1.1e-20", fp_to_string(1.1e-20f));
EXPECT_EQ("1e-20", fp_to_string(0.1e-19f));
EXPECT_EQ("1.1e-20", fp_to_string(0.11e-19f));
EXPECT_EQ("19", fp_to_string(18.9f, 2));
EXPECT_EQ("20", fp_to_string(19.9f, 2));
EXPECT_EQ("2e+01", fp_to_string(19.9f, 1));
EXPECT_EQ("1.2e-05", fp_to_string(1.234e-5f, 2));
EXPECT_EQ("1.3e-05", fp_to_string(1.299e-5f, 2));
EXPECT_EQ("-1", fp_to_string(-1.f));
EXPECT_EQ("-1.25", fp_to_string(-1.25f));
EXPECT_EQ("-34.34", fp_to_string(-34.34f));
EXPECT_EQ("-1e+20", fp_to_string(-1e+20f));
EXPECT_EQ("-1.1e+20", fp_to_string(-1.1e+20f));
EXPECT_EQ("-1e-20", fp_to_string(-1e-20f));
EXPECT_EQ("-1.1e-20", fp_to_string(-1.1e-20f));
EXPECT_EQ("-1e-20", fp_to_string(-0.1e-19f));
EXPECT_EQ("-1.1e-20", fp_to_string(-0.11e-19f));
EXPECT_EQ("-19", fp_to_string(-18.9f, 2));
EXPECT_EQ("-20", fp_to_string(-19.9f, 2));
EXPECT_EQ("-2e+01", fp_to_string(-19.9f, 1));
EXPECT_EQ("-1.2e-05", fp_to_string(-1.234e-5f, 2));
EXPECT_EQ("-1.3e-05", fp_to_string(-1.299e-5f, 2));
}
} // namespace
} // namespace YAML

View File

@@ -104,9 +104,11 @@ TEST_F(EmitterTest, NumberPrecision) {
out << 3.1425926f;
out << 53.5893;
out << 2384626.4338;
out << 1999926.4338;
out << 9999926.4338;
out << EndSeq;
ExpectEmit("- 3.14\n- 54\n- 2.4e+06");
ExpectEmit("- 3.14\n- 54\n- 2.4e+06\n- 2e+06\n- 1e+07");
}
TEST_F(EmitterTest, SimpleSeq) {

View File

@@ -757,8 +757,15 @@ TEST_F(NodeEmitterTest, SimpleFlowSeqNode) {
node.push_back(1.5);
node.push_back(2.25);
node.push_back(3.125);
node.push_back(34.34);
node.push_back(56.56);
node.push_back(12.12);
node.push_back(78.78);
node.push_back(0.0003);
node.push_back(4000.);
node.push_back(1.5474251e+26f);
ExpectOutput("[1.5, 2.25, 3.125]", node);
ExpectOutput("[1.5, 2.25, 3.125, 34.34, 56.56, 12.12, 78.78, 0.0003, 4000, 1.5474251e+26]", node);
}
TEST_F(NodeEmitterTest, NestFlowSeqNode) {