diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/corelib/global/qnumeric_p.h | 49 |
1 files changed, 48 insertions, 1 deletions
diff --git a/src/corelib/global/qnumeric_p.h b/src/corelib/global/qnumeric_p.h index bdace21000f..64fb69c8f3a 100644 --- a/src/corelib/global/qnumeric_p.h +++ b/src/corelib/global/qnumeric_p.h @@ -23,6 +23,10 @@ #include <limits> #include <type_traits> +#ifndef __has_extension +# define __has_extension(X) 0 +#endif + #if !defined(Q_CC_MSVC) && defined(Q_OS_QNX) # include <math.h> # ifdef isnan @@ -281,7 +285,7 @@ QT_WARNING_DISABLE_FLOAT_COMPARE QT_WARNING_POP } -template <typename T> static inline +template <typename T> static std::enable_if_t<std::is_floating_point_v<T> || std::is_same_v<T, qfloat16>, bool> convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true) { @@ -294,6 +298,49 @@ convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true) *value = T(v); return true; } + +#if defined(__SSE2__) && (defined(Q_CC_GNU) || __has_extension(gnu_asm)) + // The x86 CVTSD2SH instruction from SSE2 does what we want: + // - converts out-of-range doubles to ±infinity and sets #O + // - converts underflows to zero and sets #U + // We need to clear any previously-stored exceptions from it before the + // operation (3-cycle cost) and obtain the new state afterwards (1 cycle). + + unsigned csr = _MM_MASK_MASK; // clear stored exception indicators + auto sse_check_result = [&](auto result) { + if ((csr & (_MM_EXCEPT_UNDERFLOW | _MM_EXCEPT_OVERFLOW)) == 0) + return true; + if (csr & _MM_EXCEPT_OVERFLOW) + return false; + + // According to IEEE 754[1], #U is also set when the result is tiny and + // inexact, but still non-zero, so detect that (this won't generate + // good code for types without hardware support). + // [1] https://en.wikipedia.org/wiki/Floating-point_arithmetic#Exception_handling + return result != 0; + }; + + // Written directly in assembly because both Clang and GCC have been + // observed to reorder the STMXCSR instruction above the conversion + // operation. MSVC generates horrid code when using the intrinsics anyway, + // so it's not a loss. + // See https://github.com/llvm/llvm-project/issues/83661. + if constexpr (std::is_same_v<T, float>) { +# ifdef __AVX__ + asm ("vldmxcsr %[csr]\n\t" + "vcvtsd2ss %[in], %[in], %[out]\n\t" + "vstmxcsr %[csr]" + : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v)); +# else + asm ("ldmxcsr %[csr]\n\t" + "cvtsd2ss %[in], %[out]\n\t" + "stmxcsr %[csr]" + : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v)); +# endif + return sse_check_result(*value); + } +#endif // __SSE2__ && inline assembly + if (!qt_is_finite(v) && std::numeric_limits<T>::has_infinity) { // infinity (or NaN) *value = T(v); |
