From 396f41271f5b42dd7bc33bae563f632f7191df22 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 13 Sep 2023 19:17:45 -0400 Subject: [PATCH 1/6] Implement JSON rules --- include/fast_float/ascii_number.h | 24 +++++++++++++++++++++--- include/fast_float/float_common.h | 11 +++++++++-- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 9afcdc4e..eba8b9ad 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -273,6 +273,7 @@ template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t parse_number_string(UC const *p, UC const * pend, parse_options_t options) noexcept { chars_format const fmt = options.format; + parse_rules const rules = options.rules; UC const decimal_point = options.decimal_point; parsed_number_string_t answer; @@ -288,8 +289,15 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par if (p == pend) { return answer; } - if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot - return answer; + if (rules == parse_rules::json) { + if (!is_integer(*p)) { // a sign must be followed by an integer + return answer; + } + } else { + FASTFLOAT_DEBUG_ASSERT(rules == parse_rules::std); + if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot + return answer; + } } } UC const * const start_digits = p; @@ -306,8 +314,14 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par UC const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = span(start_digits, size_t(digit_count)); + // disallow leading zeros + if (rules == parse_rules::json && start_digits[0] == UC('0') && digit_count > 1) { + return answer; + } + int64_t exponent = 0; - if ((p != pend) && (*p == decimal_point)) { + const bool has_decimal_point = (p != pend) && (*p == decimal_point); + if (has_decimal_point) { ++p; UC const * before = p; // can occur at most twice without overflowing, but let it occur more, since @@ -327,6 +341,10 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par if (digit_count == 0) { return answer; } + // or at least two if a decimal point exists, with json rules + else if (rules == parse_rules::json && has_decimal_point && digit_count == 1) { + return answer; + } int64_t exp_number = 0; // explicit exponential part if ((fmt & chars_format::scientific) && (p != pend) && ((UC('e') == *p) || (UC('E') == *p))) { UC const * location_of_e = p; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 4a290f48..466d0942 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -19,6 +19,11 @@ enum chars_format { general = fixed | scientific }; +enum class parse_rules { + std, + json +}; + template struct from_chars_result_t { UC const* ptr; @@ -29,13 +34,15 @@ using from_chars_result = from_chars_result_t; template struct parse_options_t { constexpr explicit parse_options_t(chars_format fmt = chars_format::general, - UC dot = UC('.')) - : format(fmt), decimal_point(dot) {} + UC dot = UC('.'), parse_rules prules = parse_rules::std) + : format(fmt), decimal_point(dot), rules(prules) {} /** Which number formats are accepted */ chars_format format; /** The character used as decimal point */ UC decimal_point; + /** Rules to use for parsing */ + parse_rules rules; }; using parse_options = parse_options_t; From 3f250c5a987e695d0c4e99dda9d766374ba31ef6 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 13 Sep 2023 20:03:10 -0400 Subject: [PATCH 2/6] Use chars_format instead of parse_rules for parsing as JSON --- include/fast_float/ascii_number.h | 8 +++----- include/fast_float/float_common.h | 12 +++--------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index eba8b9ad..417b88be 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -273,7 +273,6 @@ template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t parse_number_string(UC const *p, UC const * pend, parse_options_t options) noexcept { chars_format const fmt = options.format; - parse_rules const rules = options.rules; UC const decimal_point = options.decimal_point; parsed_number_string_t answer; @@ -289,12 +288,11 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par if (p == pend) { return answer; } - if (rules == parse_rules::json) { + if (fmt == chars_format::json) { if (!is_integer(*p)) { // a sign must be followed by an integer return answer; } } else { - FASTFLOAT_DEBUG_ASSERT(rules == parse_rules::std); if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot return answer; } @@ -315,7 +313,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = span(start_digits, size_t(digit_count)); // disallow leading zeros - if (rules == parse_rules::json && start_digits[0] == UC('0') && digit_count > 1) { + if (fmt == chars_format::json && start_digits[0] == UC('0') && digit_count > 1) { return answer; } @@ -342,7 +340,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par return answer; } // or at least two if a decimal point exists, with json rules - else if (rules == parse_rules::json && has_decimal_point && digit_count == 1) { + else if (fmt == chars_format::json && has_decimal_point && digit_count == 1) { return answer; } int64_t exp_number = 0; // explicit exponential part diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 466d0942..70500730 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -16,14 +16,10 @@ enum chars_format { scientific = 1 << 0, fixed = 1 << 2, hex = 1 << 3, + json = 1 << 4 | fixed | scientific, general = fixed | scientific }; -enum class parse_rules { - std, - json -}; - template struct from_chars_result_t { UC const* ptr; @@ -34,15 +30,13 @@ using from_chars_result = from_chars_result_t; template struct parse_options_t { constexpr explicit parse_options_t(chars_format fmt = chars_format::general, - UC dot = UC('.'), parse_rules prules = parse_rules::std) - : format(fmt), decimal_point(dot), rules(prules) {} + UC dot = UC('.')) + : format(fmt), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; /** The character used as decimal point */ UC decimal_point; - /** Rules to use for parsing */ - parse_rules rules; }; using parse_options = parse_options_t; From 4de8d715e6241a379ce51cb55e5d5264c5616b9c Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 13 Sep 2023 21:07:40 -0400 Subject: [PATCH 3/6] Add json fmt test --- include/fast_float/ascii_number.h | 21 +++++++++-------- tests/CMakeLists.txt | 2 ++ tests/json_fmt.cpp | 38 +++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 9 deletions(-) create mode 100644 tests/json_fmt.cpp diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 417b88be..f9680ec7 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -312,9 +312,11 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par UC const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = span(start_digits, size_t(digit_count)); - // disallow leading zeros - if (fmt == chars_format::json && start_digits[0] == UC('0') && digit_count > 1) { - return answer; + if (fmt == chars_format::json) { + // at least 1 digit in integer part, without leading zeros + if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) { + return answer; + } } int64_t exponent = 0; @@ -335,12 +337,13 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } - // we must have encountered at least one integer! - if (digit_count == 0) { - return answer; - } - // or at least two if a decimal point exists, with json rules - else if (fmt == chars_format::json && has_decimal_point && digit_count == 1) { + if (fmt == chars_format::json) { + // at least 1 digit in fractional part + if (has_decimal_point && exponent == 0) { + return answer; + } + } + else if (digit_count == 0) { // we must have encountered at least one integer! return answer; } int64_t exp_number = 0; // explicit exponential part diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 37f6c7f8..e58bea8e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -72,6 +72,8 @@ fast_float_add_cpp_test(long_test) fast_float_add_cpp_test(powersoffive_hardround) fast_float_add_cpp_test(string_test) +fast_float_add_cpp_test(json_fmt) + option(FASTFLOAT_EXHAUSTIVE "Exhaustive tests" OFF) diff --git a/tests/json_fmt.cpp b/tests/json_fmt.cpp new file mode 100644 index 00000000..0c1c365e --- /dev/null +++ b/tests/json_fmt.cpp @@ -0,0 +1,38 @@ + +#include +#include +#include + +#include "fast_float/fast_float.h" + +int main() +{ + const std::vector expected{ -0.2, 0.02, 0.002, 1., 0. }; + const std::vector accept{ "-0.2", "0.02", "0.002", "1e+0000", "0e-2" }; + const std::vector reject{ "-.2", "00.02", "0.e+1", "00.e+1", ".25"}; + const auto fmt = fast_float::chars_format::json; + + for (std::size_t i = 0; i < accept.size(); ++i) + { + const auto& f = accept[i]; + double result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result, fmt); + if (answer.ec != std::errc() || result != expected[i]) { + std::cerr << "json fmt rejected valid json " << f << std::endl; + return EXIT_FAILURE; + } + } + + for (std::size_t i = 0; i < reject.size(); ++i) + { + const auto& f = reject[i]; + double result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result, fmt); + if (answer.ec == std::errc()) { + std::cerr << "json fmt accepted invalid json " << f << std::endl; + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} \ No newline at end of file From 2395482ad5334e5d4478afa7fbf92113594f7398 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Thu, 14 Sep 2023 19:50:21 -0400 Subject: [PATCH 4/6] Ignore FASTFLOAT_ALLOWS_LEADING_PLUS for JSON format --- include/fast_float/ascii_number.h | 2 +- tests/json_fmt.cpp | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index f9680ec7..21fbac45 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -280,7 +280,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par answer.too_many_digits = false; answer.negative = (*p == UC('-')); #ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == UC('-')) || (*p == UC('+'))) { + if ((*p == UC('-')) || (fmt != chars_format::json && *p == UC('+'))) { #else if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here #endif diff --git a/tests/json_fmt.cpp b/tests/json_fmt.cpp index 0c1c365e..b1c39774 100644 --- a/tests/json_fmt.cpp +++ b/tests/json_fmt.cpp @@ -3,13 +3,16 @@ #include #include +// test that this option is ignored +#define FASTFLOAT_ALLOWS_LEADING_PLUS + #include "fast_float/fast_float.h" int main() { const std::vector expected{ -0.2, 0.02, 0.002, 1., 0. }; const std::vector accept{ "-0.2", "0.02", "0.002", "1e+0000", "0e-2" }; - const std::vector reject{ "-.2", "00.02", "0.e+1", "00.e+1", ".25"}; + const std::vector reject{ "-.2", "00.02", "0.e+1", "00.e+1", ".25", "+0.25"}; const auto fmt = fast_float::chars_format::json; for (std::size_t i = 0; i < accept.size(); ++i) From ce562d9c65fee59e71dadff9a29ba86a7fba3b68 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Thu, 14 Sep 2023 20:51:26 -0400 Subject: [PATCH 5/6] Disallow inf/nan in json mode --- include/fast_float/ascii_number.h | 8 ++++---- include/fast_float/float_common.h | 5 ++++- include/fast_float/parse_number.h | 8 +++++++- tests/json_fmt.cpp | 2 +- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 21fbac45..9653889a 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -280,7 +280,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par answer.too_many_digits = false; answer.negative = (*p == UC('-')); #ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == UC('-')) || (fmt != chars_format::json && *p == UC('+'))) { + if ((*p == UC('-')) || (!(fmt & FASTFLOAT_JSONFMT) && *p == UC('+'))) { #else if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here #endif @@ -288,7 +288,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par if (p == pend) { return answer; } - if (fmt == chars_format::json) { + if (fmt & FASTFLOAT_JSONFMT) { if (!is_integer(*p)) { // a sign must be followed by an integer return answer; } @@ -312,7 +312,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par UC const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = span(start_digits, size_t(digit_count)); - if (fmt == chars_format::json) { + if (fmt & FASTFLOAT_JSONFMT) { // at least 1 digit in integer part, without leading zeros if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) { return answer; @@ -337,7 +337,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } - if (fmt == chars_format::json) { + if (fmt & FASTFLOAT_JSONFMT) { // at least 1 digit in fractional part if (has_decimal_point && exponent == 0) { return answer; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 70500730..c1016738 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -12,11 +12,14 @@ namespace fast_float { +#define FASTFLOAT_JSONFMT (1 << 5) + enum chars_format { scientific = 1 << 0, fixed = 1 << 2, hex = 1 << 3, - json = 1 << 4 | fixed | scientific, + no_infnan = 1 << 4, + json = FASTFLOAT_JSONFMT | fixed | scientific | no_infnan, general = fixed | scientific }; diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index e077b9d0..a011a8cb 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -164,7 +164,13 @@ from_chars_result_t from_chars_advanced(UC const * first, UC const * last, } parsed_number_string_t pns = parse_number_string(first, last, options); if (!pns.valid) { - return detail::parse_infnan(first, last, value); + if (options.format & chars_format::no_infnan) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } else { + return detail::parse_infnan(first, last, value); + } } answer.ec = std::errc(); // be optimistic diff --git a/tests/json_fmt.cpp b/tests/json_fmt.cpp index b1c39774..1fdd636d 100644 --- a/tests/json_fmt.cpp +++ b/tests/json_fmt.cpp @@ -12,7 +12,7 @@ int main() { const std::vector expected{ -0.2, 0.02, 0.002, 1., 0. }; const std::vector accept{ "-0.2", "0.02", "0.002", "1e+0000", "0e-2" }; - const std::vector reject{ "-.2", "00.02", "0.e+1", "00.e+1", ".25", "+0.25"}; + const std::vector reject{ "-.2", "00.02", "0.e+1", "00.e+1", ".25", "+0.25", "inf", "nan(snan)"}; const auto fmt = fast_float::chars_format::json; for (std::size_t i = 0; i < accept.size(); ++i) From 7b1fc2f95d000317496107b736037311cb67ba4b Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Thu, 14 Sep 2023 21:07:22 -0400 Subject: [PATCH 6/6] Add an option to allow inf/nan even in json mode - Most JSON parsers offer this option too --- include/fast_float/float_common.h | 1 + tests/json_fmt.cpp | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index c1016738..2ad9e8c2 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -20,6 +20,7 @@ enum chars_format { hex = 1 << 3, no_infnan = 1 << 4, json = FASTFLOAT_JSONFMT | fixed | scientific | no_infnan, + json_or_infnan = FASTFLOAT_JSONFMT | fixed | scientific, general = fixed | scientific }; diff --git a/tests/json_fmt.cpp b/tests/json_fmt.cpp index 1fdd636d..bdd32d94 100644 --- a/tests/json_fmt.cpp +++ b/tests/json_fmt.cpp @@ -10,16 +10,15 @@ int main() { - const std::vector expected{ -0.2, 0.02, 0.002, 1., 0. }; - const std::vector accept{ "-0.2", "0.02", "0.002", "1e+0000", "0e-2" }; - const std::vector reject{ "-.2", "00.02", "0.e+1", "00.e+1", ".25", "+0.25", "inf", "nan(snan)"}; - const auto fmt = fast_float::chars_format::json; + const std::vector expected{ -0.2, 0.02, 0.002, 1., 0., std::numeric_limits::infinity() }; + const std::vector accept{ "-0.2", "0.02", "0.002", "1e+0000", "0e-2", "inf" }; + const std::vector reject{ "-.2", "00.02", "0.e+1", "00.e+1", ".25", "+0.25", "inf", "nan(snan)" }; for (std::size_t i = 0; i < accept.size(); ++i) { const auto& f = accept[i]; double result; - auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result, fmt); + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result, fast_float::chars_format::json_or_infnan); if (answer.ec != std::errc() || result != expected[i]) { std::cerr << "json fmt rejected valid json " << f << std::endl; return EXIT_FAILURE; @@ -30,7 +29,7 @@ int main() { const auto& f = reject[i]; double result; - auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result, fmt); + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result, fast_float::chars_format::json); if (answer.ec == std::errc()) { std::cerr << "json fmt accepted invalid json " << f << std::endl; return EXIT_FAILURE;