Line data Source code
1 : // Copyright 2020-2023 Daniel Lemire
2 : // Copyright 2023 Matt Borland
3 : // Distributed under the Boost Software License, Version 1.0.
4 : // https://www.boost.org/LICENSE_1_0.txt
5 : //
6 : // Derivative of: https://github.com/fastfloat/fast_float
7 :
8 : #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_DIGIT_COMPARISON_HPP
9 : #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_FASTFLOAT_DIGIT_COMPARISON_HPP
10 :
11 : #include <boost/json/detail/charconv/detail/fast_float/float_common.hpp>
12 : #include <boost/json/detail/charconv/detail/fast_float/bigint.hpp>
13 : #include <boost/json/detail/charconv/detail/fast_float/ascii_number.hpp>
14 : #include <algorithm>
15 : #include <cstdint>
16 : #include <cstring>
17 : #include <iterator>
18 :
19 : namespace boost { namespace json { namespace detail { namespace charconv { namespace detail { namespace fast_float {
20 :
21 : // 1e0 to 1e19
22 : constexpr static uint64_t powers_of_ten_uint64[] = {
23 : 1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL,
24 : 1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL,
25 : 100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL,
26 : 1000000000000000000UL, 10000000000000000000UL};
27 :
28 : // calculate the exponent, in scientific notation, of the number.
29 : // this algorithm is not even close to optimized, but it has no practical
30 : // effect on performance: in order to have a faster algorithm, we'd need
31 : // to slow down performance for faster algorithms, and this is still fast.
32 : template <typename UC>
33 : BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
34 : int32_t scientific_exponent(parsed_number_string_t<UC> & num) noexcept {
35 2986 : uint64_t mantissa = num.mantissa;
36 2986 : int32_t exponent = int32_t(num.exponent);
37 14930 : while (mantissa >= 10000) {
38 11944 : mantissa /= 10000;
39 11944 : exponent += 4;
40 : }
41 5972 : while (mantissa >= 100) {
42 2986 : mantissa /= 100;
43 2986 : exponent += 2;
44 : }
45 2986 : while (mantissa >= 10) {
46 0 : mantissa /= 10;
47 0 : exponent += 1;
48 : }
49 2986 : return exponent;
50 : }
51 :
52 : // this converts a native floating-point number to an extended-precision float.
53 : template <typename T>
54 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
55 : adjusted_mantissa to_extended(T value) noexcept {
56 : using equiv_uint = typename binary_format<T>::equiv_uint;
57 1611 : constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
58 1611 : constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
59 1611 : constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
60 :
61 1611 : adjusted_mantissa am;
62 1611 : int32_t bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
63 : equiv_uint bits;
64 : #ifdef BOOST_JSON_HAS_BIT_CAST
65 : bits = std::bit_cast<equiv_uint>(value);
66 : #else
67 1611 : ::memcpy(&bits, &value, sizeof(T));
68 : #endif
69 1611 : if ((bits & exponent_mask) == 0) {
70 : // denormal
71 0 : am.power2 = 1 - bias;
72 0 : am.mantissa = bits & mantissa_mask;
73 : } else {
74 : // normal
75 1611 : am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
76 1611 : am.power2 -= bias;
77 1611 : am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
78 : }
79 :
80 1611 : return am;
81 : }
82 :
83 : // get the extended precision value of the halfway point between b and b+u.
84 : // we are given a native float that represents b, so we need to adjust it
85 : // halfway between b and b+u.
86 : template <typename T>
87 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
88 : adjusted_mantissa to_extended_halfway(T value) noexcept {
89 1611 : adjusted_mantissa am = to_extended(value);
90 1611 : am.mantissa <<= 1;
91 1611 : am.mantissa += 1;
92 1611 : am.power2 -= 1;
93 1611 : return am;
94 : }
95 :
96 : // round an extended-precision float to the nearest machine float.
97 : template <typename T, typename callback>
98 : BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
99 : void round(adjusted_mantissa& am, callback cb) noexcept {
100 4597 : int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
101 4597 : if (-am.power2 >= mantissa_shift) {
102 : // have a denormal float
103 0 : int32_t shift = -am.power2 + 1;
104 0 : cb(am, std::min<int32_t>(shift, 64));
105 : // check for round-up: if rounding-nearest carried us to the hidden bit.
106 0 : am.power2 = (am.mantissa < (uint64_t(1) << binary_format<T>::mantissa_explicit_bits())) ? 0 : 1;
107 0 : return;
108 : }
109 :
110 : // have a normal float, use the default shift.
111 4597 : cb(am, mantissa_shift);
112 :
113 : // check for carry
114 4597 : if (am.mantissa >= (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
115 0 : am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
116 0 : am.power2++;
117 : }
118 :
119 : // check for infinite: we could have carried to an infinite power
120 4597 : am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
121 4597 : if (am.power2 >= binary_format<T>::infinite_power()) {
122 0 : am.power2 = binary_format<T>::infinite_power();
123 0 : am.mantissa = 0;
124 : }
125 : }
126 :
127 : template <typename callback>
128 : BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
129 : void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept {
130 2986 : const uint64_t mask
131 : = (shift == 64)
132 0 : ? UINT64_MAX
133 2986 : : (uint64_t(1) << shift) - 1;
134 2986 : const uint64_t halfway
135 : = (shift == 0)
136 2986 : ? 0
137 2986 : : uint64_t(1) << (shift - 1);
138 2986 : uint64_t truncated_bits = am.mantissa & mask;
139 2986 : bool is_above = truncated_bits > halfway;
140 2986 : bool is_halfway = truncated_bits == halfway;
141 :
142 : // shift digits into position
143 2986 : if (shift == 64) {
144 0 : am.mantissa = 0;
145 : } else {
146 2986 : am.mantissa >>= shift;
147 : }
148 2986 : am.power2 += shift;
149 :
150 2986 : bool is_odd = (am.mantissa & 1) == 1;
151 2986 : am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
152 2986 : }
153 :
154 : BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
155 : void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
156 1611 : if (shift == 64) {
157 0 : am.mantissa = 0;
158 : } else {
159 1611 : am.mantissa >>= shift;
160 : }
161 1611 : am.power2 += shift;
162 1611 : }
163 : template <typename UC>
164 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
165 : void skip_zeros(UC const * & first, UC const * last) noexcept {
166 : uint64_t val;
167 5972 : while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
168 2986 : ::memcpy(&val, first, sizeof(uint64_t));
169 2986 : if (val != int_cmp_zeros<UC>()) {
170 2986 : break;
171 : }
172 0 : first += int_cmp_len<UC>();
173 : }
174 2986 : while (first != last) {
175 2986 : if (*first != UC('0')) {
176 2986 : break;
177 : }
178 0 : first++;
179 : }
180 2986 : }
181 :
182 : // determine if any non-zero digits were truncated.
183 : // all characters must be valid digits.
184 : template <typename UC>
185 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
186 : bool is_truncated(UC const * first, UC const * last) noexcept {
187 : // do 8-bit optimizations, can just compare to 8 literal 0s.
188 : uint64_t val;
189 0 : while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
190 0 : ::memcpy(&val, first, sizeof(uint64_t));
191 0 : if (val != int_cmp_zeros<UC>()) {
192 0 : return true;
193 : }
194 0 : first += int_cmp_len<UC>();
195 : }
196 0 : while (first != last) {
197 0 : if (*first != UC('0')) {
198 0 : return true;
199 : }
200 0 : ++first;
201 : }
202 0 : return false;
203 : }
204 : template <typename UC>
205 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
206 : bool is_truncated(span<const UC> s) noexcept {
207 0 : return is_truncated(s.ptr, s.ptr + s.len());
208 : }
209 :
210 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
211 : void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept {
212 : // currently unused
213 : }
214 :
215 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
216 : void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept {
217 : // currently unused
218 : }
219 :
220 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
221 : void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
222 11932 : value = value * 100000000 + parse_eight_digits_unrolled(p);
223 11932 : p += 8;
224 11932 : counter += 8;
225 11932 : count += 8;
226 11932 : }
227 :
228 : template <typename UC>
229 : BOOST_FORCEINLINE BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
230 : void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept {
231 21125 : value = value * 10 + limb(*p - UC('0'));
232 21125 : p++;
233 21125 : counter++;
234 21125 : count++;
235 21125 : }
236 :
237 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
238 : void add_native(bigint& big, limb power, limb value) noexcept {
239 0 : big.mul(power);
240 9437 : big.add(value);
241 9437 : }
242 :
243 : BOOST_FORCEINLINE BOOST_JSON_FASTFLOAT_CONSTEXPR20
244 : void round_up_bigint(bigint& big, size_t& count) noexcept {
245 : // need to round-up the digits, but need to avoid rounding
246 : // ....9999 to ...10000, which could cause a false halfway point.
247 : add_native(big, 10, 1);
248 0 : count++;
249 0 : }
250 :
251 : // parse the significant digits into a big integer
252 : template <typename UC>
253 : inline BOOST_JSON_FASTFLOAT_CONSTEXPR20
254 2986 : void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_digits, size_t& digits) noexcept {
255 : // try to minimize the number of big integer and scalar multiplication.
256 : // therefore, try to parse 8 digits at a time, and multiply by the largest
257 : // scalar value (9 or 19 digits) for each step.
258 2986 : size_t counter = 0;
259 2986 : digits = 0;
260 2986 : limb value = 0;
261 : #ifdef BOOST_JSON_FASTFLOAT_64BIT_LIMB
262 2986 : constexpr size_t step = 19;
263 : #else
264 : constexpr size_t step = 9;
265 : #endif
266 :
267 : // process all integer digits.
268 2986 : UC const * p = num.integer.ptr;
269 2986 : UC const * pend = p + num.integer.len();
270 : skip_zeros(p, pend);
271 : // process all digits, in increments of step per loop
272 8073 : while (p != pend) {
273 : if (std::is_same<UC,char>::value) {
274 22118 : while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
275 : parse_eight_digits(p, value, counter, digits);
276 : }
277 : }
278 16053 : while (counter < step && p != pend && digits < max_digits) {
279 : parse_one_digit(p, value, counter, digits);
280 : }
281 5087 : if (digits == max_digits) {
282 : // add the temporary value, then check if we've truncated any digits
283 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
284 0 : bool truncated = is_truncated(p, pend);
285 0 : if (num.fraction.ptr != nullptr) {
286 0 : truncated |= is_truncated(num.fraction);
287 : }
288 0 : if (truncated) {
289 : round_up_bigint(result, digits);
290 : }
291 0 : return;
292 : } else {
293 5087 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
294 5087 : counter = 0;
295 5087 : value = 0;
296 : }
297 : }
298 :
299 : // add our fraction digits, if they're available.
300 2986 : if (num.fraction.ptr != nullptr) {
301 2980 : p = num.fraction.ptr;
302 2980 : pend = p + num.fraction.len();
303 2980 : if (digits == 0) {
304 : skip_zeros(p, pend);
305 : }
306 : // process all digits, in increments of step per loop
307 7330 : while (p != pend) {
308 : if (std::is_same<UC,char>::value) {
309 20620 : while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
310 : parse_eight_digits(p, value, counter, digits);
311 : }
312 : }
313 14509 : while (counter < step && p != pend && digits < max_digits) {
314 : parse_one_digit(p, value, counter, digits);
315 : }
316 4350 : if (digits == max_digits) {
317 : // add the temporary value, then check if we've truncated any digits
318 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
319 0 : bool truncated = is_truncated(p, pend);
320 0 : if (truncated) {
321 : round_up_bigint(result, digits);
322 : }
323 0 : return;
324 : } else {
325 4350 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
326 4350 : counter = 0;
327 4350 : value = 0;
328 : }
329 : }
330 : }
331 :
332 2986 : if (counter != 0) {
333 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
334 : }
335 : }
336 :
337 : template <typename T>
338 : inline BOOST_JSON_FASTFLOAT_CONSTEXPR20
339 1375 : adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept {
340 1375 : bigmant.pow10(uint32_t(exponent));
341 1375 : adjusted_mantissa answer;
342 : bool truncated;
343 1375 : answer.mantissa = bigmant.hi64(truncated);
344 1375 : int bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
345 1375 : answer.power2 = bigmant.bit_length() - 64 + bias;
346 :
347 1375 : round<T>(answer, [truncated](adjusted_mantissa& a, int32_t shift) {
348 1375 : round_nearest_tie_even(a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
349 1375 : return is_above || (is_halfway && truncated) || (is_odd && is_halfway);
350 : });
351 : });
352 :
353 1375 : return answer;
354 : }
355 :
356 : // the scaling here is quite simple: we have, for the real digits `m * 10^e`,
357 : // and for the theoretical digits `n * 2^f`. Since `e` is always negative,
358 : // to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
359 : // we then need to scale by `2^(f- e)`, and then the two significant digits
360 : // are of the same magnitude.
361 : template <typename T>
362 : inline BOOST_JSON_FASTFLOAT_CONSTEXPR20
363 1611 : adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
364 1611 : bigint& real_digits = bigmant;
365 1611 : int32_t real_exp = exponent;
366 :
367 : // get the value of `b`, rounded down, and get a bigint representation of b+h
368 1611 : adjusted_mantissa am_b = am;
369 : // gcc7 buf: use a lambda to remove the noexcept qualifier bug with -Wnoexcept-type.
370 3222 : round<T>(am_b, [](adjusted_mantissa&a, int32_t shift) { round_down(a, shift); });
371 : T b;
372 1611 : to_float(false, am_b, b);
373 1611 : adjusted_mantissa theor = to_extended_halfway(b);
374 1611 : bigint theor_digits(theor.mantissa);
375 1611 : int32_t theor_exp = theor.power2;
376 :
377 : // scale real digits and theor digits to be same power.
378 1611 : int32_t pow2_exp = theor_exp - real_exp;
379 1611 : uint32_t pow5_exp = uint32_t(-real_exp);
380 1611 : if (pow5_exp != 0) {
381 1611 : theor_digits.pow5(pow5_exp);
382 : }
383 1611 : if (pow2_exp > 0) {
384 140 : theor_digits.pow2(uint32_t(pow2_exp));
385 1471 : } else if (pow2_exp < 0) {
386 1469 : real_digits.pow2(uint32_t(-pow2_exp));
387 : }
388 :
389 : // compare digits, and use it to director rounding
390 1611 : int ord = real_digits.compare(theor_digits);
391 1611 : adjusted_mantissa answer = am;
392 1611 : round<T>(answer, [ord](adjusted_mantissa& a, int32_t shift) {
393 1611 : round_nearest_tie_even(a, shift, [ord](bool is_odd, bool, bool) -> bool {
394 1611 : if (ord > 0) {
395 767 : return true;
396 844 : } else if (ord < 0) {
397 844 : return false;
398 : } else {
399 0 : return is_odd;
400 : }
401 : });
402 : });
403 :
404 1611 : return answer;
405 : }
406 :
407 : // parse the significant digits as a big integer to unambiguously round
408 : // the significant digits. here, we are trying to determine how to round
409 : // an extended float representation close to `b+h`, halfway between `b`
410 : // (the float rounded-down) and `b+u`, the next positive float. this
411 : // algorithm is always correct, and uses one of two approaches. when
412 : // the exponent is positive relative to the significant digits (such as
413 : // 1234), we create a big-integer representation, get the high 64-bits,
414 : // determine if any lower bits are truncated, and use that to direct
415 : // rounding. in case of a negative exponent relative to the significant
416 : // digits (such as 1.2345), we create a theoretical representation of
417 : // `b` as a big-integer type, scaled to the same binary exponent as
418 : // the actual digits. we then compare the big integer representations
419 : // of both, and use that to direct rounding.
420 : template <typename T, typename UC>
421 : inline BOOST_JSON_FASTFLOAT_CONSTEXPR20
422 2986 : adjusted_mantissa digit_comp(parsed_number_string_t<UC>& num, adjusted_mantissa am) noexcept {
423 : // remove the invalid exponent bias
424 2986 : am.power2 -= invalid_am_bias;
425 :
426 2986 : int32_t sci_exp = scientific_exponent(num);
427 2986 : size_t max_digits = binary_format<T>::max_digits();
428 2986 : size_t digits = 0;
429 2986 : bigint bigmant;
430 2986 : parse_mantissa(bigmant, num, max_digits, digits);
431 : // can't underflow, since digits is at most max_digits.
432 2986 : int32_t exponent = sci_exp + 1 - int32_t(digits);
433 2986 : if (exponent >= 0) {
434 1375 : return positive_digit_comp<T>(bigmant, exponent);
435 : } else {
436 1611 : return negative_digit_comp<T>(bigmant, am, exponent);
437 : }
438 : }
439 :
440 : }}}}}} // namespace fast_float
441 :
442 : #endif
|