LCOV - code coverage report
Current view: top level - json/detail/charconv/detail/fast_float - ascii_number.hpp Coverage Total Hit
Test: coverage_remapped.info Lines: 88.0 % 108 95
Test Date: 2026-02-13 18:42:28 Functions: - 0 0

            Line data    Source code
       1              : // Copyright 2020-2023 Daniel Lemire
       2              : // Copyright 2023 Matt Borland
       3              : // Distributed under the Boost Software License, Version 1.0.
       4              : // https://www.boost.org/LICENSE_1_0.txt
       5              : //
       6              : // Derivative of: https://github.com/fastfloat/fast_float
       7              : 
       8              : #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_ASCII_NUMBER_HPP
       9              : #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_ASCII_NUMBER_HPP
      10              : 
      11              : #include <boost/endian/conversion.hpp>
      12              : #include <boost/json/detail/charconv/detail/fast_float/float_common.hpp>
      13              : #include <cctype>
      14              : #include <cstdint>
      15              : #include <cstring>
      16              : #include <iterator>
      17              : 
      18              : namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float {
      19              : 
      20              : // Next function can be micro-optimized, but compilers are entirely
      21              : // able to optimize it well.
      22              : template <typename UC>
      23              : BOOST_FORCEINLINE constexpr bool is_integer(UC c) noexcept {
      24     29053321 :   return !(c > UC('9') || c < UC('0'));
      25              : }
      26              : 
      27              : BOOST_FORCEINLINE constexpr uint64_t byteswap(uint64_t val) {
      28              :   return (val & 0xFF00000000000000) >> 56
      29              :     | (val & 0x00FF000000000000) >> 40
      30              :     | (val & 0x0000FF0000000000) >> 24
      31              :     | (val & 0x000000FF00000000) >> 8
      32              :     | (val & 0x00000000FF000000) << 8
      33              :     | (val & 0x0000000000FF0000) << 24
      34              :     | (val & 0x000000000000FF00) << 40
      35              :     | (val & 0x00000000000000FF) << 56;
      36              : }
      37              : 
      38              : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
      39              : uint64_t read_u64(const char *chars) {
      40      4835760 :   if (cpp20_and_in_constexpr()) {
      41            0 :     uint64_t val = 0;
      42            0 :     for(int i = 0; i < 8; ++i) {
      43            0 :       val |= uint64_t(*chars) << (i*8);
      44            0 :       ++chars;
      45              :     }
      46            0 :     return val;
      47              :   }
      48              :   uint64_t val;
      49      4835760 :   ::memcpy(&val, chars, sizeof(uint64_t));
      50      4835760 :   endian::little_to_native_inplace(val);
      51      4835760 :   return val;
      52              : }
      53              : 
      54              : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
      55              : void write_u64(uint8_t *chars, uint64_t val) {
      56              :   if (cpp20_and_in_constexpr()) {
      57              :     for(int i = 0; i < 8; ++i) {
      58              :       *chars = uint8_t(val);
      59              :       val >>= 8;
      60              :       ++chars;
      61              :     }
      62              :     return;
      63              :   }
      64              :   endian::native_to_little_inplace(val);
      65              :   ::memcpy(chars, &val, sizeof(uint64_t));
      66              : }
      67              : 
      68              : // credit  @aqrit
      69              : BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
      70              : uint32_t parse_eight_digits_unrolled(uint64_t val) {
      71      2151895 :   constexpr uint64_t mask = 0x000000FF000000FF;
      72      2151895 :   constexpr uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
      73      2151895 :   constexpr uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
      74      2151895 :   val -= 0x3030303030303030;
      75      2151895 :   val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
      76      2151895 :   val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
      77      2151895 :   return uint32_t(val);
      78              : }
      79              : 
      80              : BOOST_FORCEINLINE constexpr
      81              : uint32_t parse_eight_digits_unrolled(const char16_t *)  noexcept  {
      82              :   return 0;
      83              : }
      84              : 
      85              : BOOST_FORCEINLINE constexpr
      86              : uint32_t parse_eight_digits_unrolled(const char32_t *)  noexcept  {
      87              :   return 0;
      88              : }
      89              : 
      90              : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
      91              : uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
      92      4303790 :   return parse_eight_digits_unrolled(read_u64(chars));
      93              : }
      94              : 
      95              : // credit @aqrit
      96              : BOOST_FORCEINLINE constexpr bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
      97      2683865 :   return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080));
      98              : }
      99              : 
     100              : BOOST_FORCEINLINE constexpr
     101              : bool is_made_of_eight_digits_fast(const char16_t *)  noexcept  {
     102              :   return false;
     103              : }
     104              : 
     105              : BOOST_FORCEINLINE constexpr
     106              : bool is_made_of_eight_digits_fast(const char32_t *)  noexcept  {
     107              :   return false;
     108              : }
     109              : 
     110              : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
     111              : bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
     112      5367730 :   return is_made_of_eight_digits_fast(read_u64(chars));
     113              : }
     114              : 
     115              : template <typename UC>
     116              : struct parsed_number_string_t {
     117              :   int64_t exponent{0};
     118              :   uint64_t mantissa{0};
     119              :   UC const * lastmatch{nullptr};
     120              :   bool negative{false};
     121              :   bool valid{false};
     122              :   bool too_many_digits{false};
     123              :   // contains the range of the significant digits
     124              :   span<const UC> integer{};  // non-nullable
     125              :   span<const UC> fraction{}; // nullable
     126              : };
     127              : using byte_span = span<char>;
     128              : using parsed_number_string = parsed_number_string_t<char>;
     129              : // Assuming that you use no more than 19 digits, this will
     130              : // parse an ASCII string.
     131              : template <typename UC>
     132              : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
     133              : parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, parse_options_t<UC> options) noexcept {
     134      1009310 :   chars_format const fmt = options.format;
     135      1009310 :   UC const decimal_point = options.decimal_point;
     136              : 
     137      1009310 :   parsed_number_string_t<UC> answer;
     138      1009310 :   answer.valid = false;
     139      1009310 :   answer.too_many_digits = false;
     140      1009310 :   answer.negative = (*p == UC('-'));
     141      1009310 :   if (*p == UC('-')) // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
     142              :   {
     143         3902 :     ++p;
     144         3902 :     if (p == pend) {
     145            0 :       return answer;
     146              :     }
     147         7804 :     if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
     148            0 :       return answer;
     149              :     }
     150              :   }
     151      1009310 :   UC const * const start_digits = p;
     152              : 
     153      1009310 :   uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
     154              : 
     155     40852898 :   while ((p != pend) && is_integer(*p)) {
     156              :     // a multiplication by 10 is cheaper than an arbitrary integer
     157              :     // multiplication
     158     19417139 :     i = 10 * i +
     159     19417139 :         uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later
     160     19417139 :     ++p;
     161              :   }
     162      1009310 :   UC const * const end_of_integer_part = p;
     163      1009310 :   int64_t digit_count = int64_t(end_of_integer_part - start_digits);
     164      1009310 :   answer.integer = span<const UC>(start_digits, size_t(digit_count));
     165      1009310 :   int64_t exponent = 0;
     166      1009310 :   if ((p != pend) && (*p == decimal_point)) {
     167      1006820 :     ++p;
     168      1006820 :     UC const * before = p;
     169              :     // can occur at most twice without overflowing, but let it occur more, since
     170              :     // for integers with many digits, digit parsing is the primary bottleneck.
     171              :     if (std::is_same<UC,char>::value) {
     172      6837468 :       while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
     173      2139963 :         i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
     174      2139963 :         p += 8;
     175              :       }
     176              :     }
     177      8855670 :     while ((p != pend) && is_integer(*p)) {
     178      3423102 :       uint8_t digit = uint8_t(*p - UC('0'));
     179      3423102 :       ++p;
     180      3423102 :       i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
     181              :     }
     182      1006820 :     exponent = before - p;
     183      1006820 :     answer.fraction = span<const UC>(before, size_t(p - before));
     184      1006820 :     digit_count -= exponent;
     185              :   }
     186              :   // we must have encountered at least one integer!
     187      1009310 :   if (digit_count == 0) {
     188            0 :     return answer;
     189              :   }
     190      1009310 :   int64_t exp_number = 0;            // explicit exponential part
     191      1009310 :   if (((unsigned)fmt & (unsigned)chars_format::scientific) && (p != pend) && ((UC('e') == *p) || (UC('E') == *p))) {
     192      1005136 :     UC const * location_of_e = p;
     193      1005136 :     ++p;
     194      1005136 :     bool neg_exp = false;
     195      1005136 :     if ((p != pend) && (UC('-') == *p)) {
     196       499687 :       neg_exp = true;
     197       499687 :       ++p;
     198       505449 :     } else if ((p != pend) && (UC('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
     199            0 :       ++p;
     200              :     }
     201      2010272 :     if ((p == pend) || !is_integer(*p)) {
     202            0 :       if(!((unsigned)fmt & (unsigned)chars_format::fixed)) {
     203              :         // We are in error.
     204            0 :         return answer;
     205              :       }
     206              :       // Otherwise, we will be ignoring the 'e'.
     207            0 :       p = location_of_e;
     208              :     } else {
     209      7389308 :       while ((p != pend) && is_integer(*p)) {
     210      3192086 :         uint8_t digit = uint8_t(*p - UC('0'));
     211      3192086 :         if (exp_number < 0x10000000) {
     212      3192086 :           exp_number = 10 * exp_number + digit;
     213              :         }
     214      3192086 :         ++p;
     215              :       }
     216      1005136 :       if(neg_exp) { exp_number = - exp_number; }
     217      1005136 :       exponent += exp_number;
     218              :     }
     219      1005136 :   } else {
     220              :     // If it scientific and not fixed, we have to bail out.
     221         4174 :     if(((unsigned)fmt & (unsigned)chars_format::scientific) && !((unsigned)fmt & (unsigned)chars_format::fixed))
     222              :     {
     223            0 :         return answer;
     224              :     }
     225              :   }
     226      1009310 :   answer.lastmatch = p;
     227      1009310 :   answer.valid = true;
     228              : 
     229              :   // If we frequently had to deal with long strings of digits,
     230              :   // we could extend our code by using a 128-bit integer instead
     231              :   // of a 64-bit integer. However, this is uncommon.
     232              :   //
     233              :   // We can deal with up to 19 digits.
     234      1009310 :   if (digit_count > 19) { // this is uncommon
     235              :     // It is possible that the integer had an overflow.
     236              :     // We have to handle the case where we have 0.0000somenumber.
     237              :     // We need to be mindful of the case where we only have zeroes...
     238              :     // E.g., 0.000000000...000.
     239      1003241 :     UC const * start = start_digits;
     240      2111117 :     while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
     241      1107876 :       if(*start == UC('0')) { digit_count --; }
     242      1107876 :       start++;
     243              :     }
     244      1003241 :     if (digit_count > 19) {
     245      1000712 :       answer.too_many_digits = true;
     246              :       // Let us start again, this time, avoiding overflows.
     247              :       // We don't need to check if is_integer, since we use the
     248              :       // pre-tokenized spans from above.
     249      1000712 :       i = 0;
     250      1000712 :       p = answer.integer.ptr;
     251      1000712 :       UC const * int_end = p + answer.integer.len();
     252      1000712 :       constexpr uint64_t minimal_nineteen_digit_integer{1000000000000000000};
     253     19947772 :       while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
     254     18947060 :         i = i * 10 + uint64_t(*p - UC('0'));
     255     18947060 :         ++p;
     256              :       }
     257      1000712 :       if (i >= minimal_nineteen_digit_integer) { // We have a big integers
     258       946260 :         exponent = end_of_integer_part - p + exp_number;
     259              :       } else { // We have a value with a fractional component.
     260        54452 :           p = answer.fraction.ptr;
     261        54452 :           UC const * frac_end = p + answer.fraction.len();
     262       121280 :           while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
     263        66828 :             i = i * 10 + uint64_t(*p - UC('0'));
     264        66828 :             ++p;
     265              :           }
     266        54452 :           exponent = answer.fraction.ptr - p + exp_number;
     267              :       }
     268              :       // We have now corrected both exponent and i, to a truncated value
     269              :     }
     270              :   }
     271      1009310 :   answer.exponent = exponent;
     272      1009310 :   answer.mantissa = i;
     273      1009310 :   return answer;
     274              : }
     275              : 
     276              : }}}}}} // namespace s
     277              : 
     278              : #endif
        

Generated by: LCOV version 2.3