Line data Source code
1 : // Copyright 2020-2023 Daniel Lemire
2 : // Copyright 2023 Matt Borland
3 : // Distributed under the Boost Software License, Version 1.0.
4 : // https://www.boost.org/LICENSE_1_0.txt
5 : //
6 : // Derivative of: https://github.com/fastfloat/fast_float
7 :
8 : #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_ASCII_NUMBER_HPP
9 : #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_ASCII_NUMBER_HPP
10 :
11 : #include <boost/endian/conversion.hpp>
12 : #include <boost/json/detail/charconv/detail/fast_float/float_common.hpp>
13 : #include <cctype>
14 : #include <cstdint>
15 : #include <cstring>
16 : #include <iterator>
17 :
18 : namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float {
19 :
20 : // Next function can be micro-optimized, but compilers are entirely
21 : // able to optimize it well.
22 : template <typename UC>
23 : BOOST_FORCEINLINE constexpr bool is_integer(UC c) noexcept {
24 29053321 : return !(c > UC('9') || c < UC('0'));
25 : }
26 :
27 : BOOST_FORCEINLINE constexpr uint64_t byteswap(uint64_t val) {
28 : return (val & 0xFF00000000000000) >> 56
29 : | (val & 0x00FF000000000000) >> 40
30 : | (val & 0x0000FF0000000000) >> 24
31 : | (val & 0x000000FF00000000) >> 8
32 : | (val & 0x00000000FF000000) << 8
33 : | (val & 0x0000000000FF0000) << 24
34 : | (val & 0x000000000000FF00) << 40
35 : | (val & 0x00000000000000FF) << 56;
36 : }
37 :
38 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
39 : uint64_t read_u64(const char *chars) {
40 4835760 : if (cpp20_and_in_constexpr()) {
41 0 : uint64_t val = 0;
42 0 : for(int i = 0; i < 8; ++i) {
43 0 : val |= uint64_t(*chars) << (i*8);
44 0 : ++chars;
45 : }
46 0 : return val;
47 : }
48 : uint64_t val;
49 4835760 : ::memcpy(&val, chars, sizeof(uint64_t));
50 4835760 : endian::little_to_native_inplace(val);
51 4835760 : return val;
52 : }
53 :
54 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
55 : void write_u64(uint8_t *chars, uint64_t val) {
56 : if (cpp20_and_in_constexpr()) {
57 : for(int i = 0; i < 8; ++i) {
58 : *chars = uint8_t(val);
59 : val >>= 8;
60 : ++chars;
61 : }
62 : return;
63 : }
64 : endian::native_to_little_inplace(val);
65 : ::memcpy(chars, &val, sizeof(uint64_t));
66 : }
67 :
68 : // credit @aqrit
69 : BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
70 : uint32_t parse_eight_digits_unrolled(uint64_t val) {
71 2151895 : constexpr uint64_t mask = 0x000000FF000000FF;
72 2151895 : constexpr uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
73 2151895 : constexpr uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
74 2151895 : val -= 0x3030303030303030;
75 2151895 : val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
76 2151895 : val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
77 2151895 : return uint32_t(val);
78 : }
79 :
80 : BOOST_FORCEINLINE constexpr
81 : uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept {
82 : return 0;
83 : }
84 :
85 : BOOST_FORCEINLINE constexpr
86 : uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept {
87 : return 0;
88 : }
89 :
90 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
91 : uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
92 4303790 : return parse_eight_digits_unrolled(read_u64(chars));
93 : }
94 :
95 : // credit @aqrit
96 : BOOST_FORCEINLINE constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
97 2683865 : return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080));
98 : }
99 :
100 : BOOST_FORCEINLINE constexpr
101 : bool is_made_of_eight_digits_fast(const char16_t *) noexcept {
102 : return false;
103 : }
104 :
105 : BOOST_FORCEINLINE constexpr
106 : bool is_made_of_eight_digits_fast(const char32_t *) noexcept {
107 : return false;
108 : }
109 :
110 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
111 : bool is_made_of_eight_digits_fast(const char *chars) noexcept {
112 5367730 : return is_made_of_eight_digits_fast(read_u64(chars));
113 : }
114 :
115 : template <typename UC>
116 : struct parsed_number_string_t {
117 : int64_t exponent{0};
118 : uint64_t mantissa{0};
119 : UC const * lastmatch{nullptr};
120 : bool negative{false};
121 : bool valid{false};
122 : bool too_many_digits{false};
123 : // contains the range of the significant digits
124 : span<const UC> integer{}; // non-nullable
125 : span<const UC> fraction{}; // nullable
126 : };
127 : using byte_span = span<char>;
128 : using parsed_number_string = parsed_number_string_t<char>;
129 : // Assuming that you use no more than 19 digits, this will
130 : // parse an ASCII string.
131 : template <typename UC>
132 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
133 : parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, parse_options_t<UC> options) noexcept {
134 1009310 : chars_format const fmt = options.format;
135 1009310 : UC const decimal_point = options.decimal_point;
136 :
137 1009310 : parsed_number_string_t<UC> answer;
138 1009310 : answer.valid = false;
139 1009310 : answer.too_many_digits = false;
140 1009310 : answer.negative = (*p == UC('-'));
141 1009310 : if (*p == UC('-')) // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
142 : {
143 3902 : ++p;
144 3902 : if (p == pend) {
145 0 : return answer;
146 : }
147 7804 : if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
148 0 : return answer;
149 : }
150 : }
151 1009310 : UC const * const start_digits = p;
152 :
153 1009310 : uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
154 :
155 40852898 : while ((p != pend) && is_integer(*p)) {
156 : // a multiplication by 10 is cheaper than an arbitrary integer
157 : // multiplication
158 19417139 : i = 10 * i +
159 19417139 : uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later
160 19417139 : ++p;
161 : }
162 1009310 : UC const * const end_of_integer_part = p;
163 1009310 : int64_t digit_count = int64_t(end_of_integer_part - start_digits);
164 1009310 : answer.integer = span<const UC>(start_digits, size_t(digit_count));
165 1009310 : int64_t exponent = 0;
166 1009310 : if ((p != pend) && (*p == decimal_point)) {
167 1006820 : ++p;
168 1006820 : UC const * before = p;
169 : // can occur at most twice without overflowing, but let it occur more, since
170 : // for integers with many digits, digit parsing is the primary bottleneck.
171 : if (std::is_same<UC,char>::value) {
172 6837468 : while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
173 2139963 : i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
174 2139963 : p += 8;
175 : }
176 : }
177 8855670 : while ((p != pend) && is_integer(*p)) {
178 3423102 : uint8_t digit = uint8_t(*p - UC('0'));
179 3423102 : ++p;
180 3423102 : i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
181 : }
182 1006820 : exponent = before - p;
183 1006820 : answer.fraction = span<const UC>(before, size_t(p - before));
184 1006820 : digit_count -= exponent;
185 : }
186 : // we must have encountered at least one integer!
187 1009310 : if (digit_count == 0) {
188 0 : return answer;
189 : }
190 1009310 : int64_t exp_number = 0; // explicit exponential part
191 1009310 : if (((unsigned)fmt & (unsigned)chars_format::scientific) && (p != pend) && ((UC('e') == *p) || (UC('E') == *p))) {
192 1005136 : UC const * location_of_e = p;
193 1005136 : ++p;
194 1005136 : bool neg_exp = false;
195 1005136 : if ((p != pend) && (UC('-') == *p)) {
196 499687 : neg_exp = true;
197 499687 : ++p;
198 505449 : } else if ((p != pend) && (UC('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
199 0 : ++p;
200 : }
201 2010272 : if ((p == pend) || !is_integer(*p)) {
202 0 : if(!((unsigned)fmt & (unsigned)chars_format::fixed)) {
203 : // We are in error.
204 0 : return answer;
205 : }
206 : // Otherwise, we will be ignoring the 'e'.
207 0 : p = location_of_e;
208 : } else {
209 7389308 : while ((p != pend) && is_integer(*p)) {
210 3192086 : uint8_t digit = uint8_t(*p - UC('0'));
211 3192086 : if (exp_number < 0x10000000) {
212 3192086 : exp_number = 10 * exp_number + digit;
213 : }
214 3192086 : ++p;
215 : }
216 1005136 : if(neg_exp) { exp_number = - exp_number; }
217 1005136 : exponent += exp_number;
218 : }
219 1005136 : } else {
220 : // If it scientific and not fixed, we have to bail out.
221 4174 : if(((unsigned)fmt & (unsigned)chars_format::scientific) && !((unsigned)fmt & (unsigned)chars_format::fixed))
222 : {
223 0 : return answer;
224 : }
225 : }
226 1009310 : answer.lastmatch = p;
227 1009310 : answer.valid = true;
228 :
229 : // If we frequently had to deal with long strings of digits,
230 : // we could extend our code by using a 128-bit integer instead
231 : // of a 64-bit integer. However, this is uncommon.
232 : //
233 : // We can deal with up to 19 digits.
234 1009310 : if (digit_count > 19) { // this is uncommon
235 : // It is possible that the integer had an overflow.
236 : // We have to handle the case where we have 0.0000somenumber.
237 : // We need to be mindful of the case where we only have zeroes...
238 : // E.g., 0.000000000...000.
239 1003241 : UC const * start = start_digits;
240 2111117 : while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
241 1107876 : if(*start == UC('0')) { digit_count --; }
242 1107876 : start++;
243 : }
244 1003241 : if (digit_count > 19) {
245 1000712 : answer.too_many_digits = true;
246 : // Let us start again, this time, avoiding overflows.
247 : // We don't need to check if is_integer, since we use the
248 : // pre-tokenized spans from above.
249 1000712 : i = 0;
250 1000712 : p = answer.integer.ptr;
251 1000712 : UC const * int_end = p + answer.integer.len();
252 1000712 : constexpr uint64_t minimal_nineteen_digit_integer{1000000000000000000};
253 19947772 : while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
254 18947060 : i = i * 10 + uint64_t(*p - UC('0'));
255 18947060 : ++p;
256 : }
257 1000712 : if (i >= minimal_nineteen_digit_integer) { // We have a big integers
258 946260 : exponent = end_of_integer_part - p + exp_number;
259 : } else { // We have a value with a fractional component.
260 54452 : p = answer.fraction.ptr;
261 54452 : UC const * frac_end = p + answer.fraction.len();
262 121280 : while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
263 66828 : i = i * 10 + uint64_t(*p - UC('0'));
264 66828 : ++p;
265 : }
266 54452 : exponent = answer.fraction.ptr - p + exp_number;
267 : }
268 : // We have now corrected both exponent and i, to a truncated value
269 : }
270 : }
271 1009310 : answer.exponent = exponent;
272 1009310 : answer.mantissa = i;
273 1009310 : return answer;
274 : }
275 :
276 : }}}}}} // namespace s
277 :
278 : #endif
|