summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/corelib/global/qnumeric_p.h49
1 files changed, 48 insertions, 1 deletions
diff --git a/src/corelib/global/qnumeric_p.h b/src/corelib/global/qnumeric_p.h
index bdace21000f..64fb69c8f3a 100644
--- a/src/corelib/global/qnumeric_p.h
+++ b/src/corelib/global/qnumeric_p.h
@@ -23,6 +23,10 @@
#include <limits>
#include <type_traits>
+#ifndef __has_extension
+# define __has_extension(X) 0
+#endif
+
#if !defined(Q_CC_MSVC) && defined(Q_OS_QNX)
# include <math.h>
# ifdef isnan
@@ -281,7 +285,7 @@ QT_WARNING_DISABLE_FLOAT_COMPARE
QT_WARNING_POP
}
-template <typename T> static inline
+template <typename T> static
std::enable_if_t<std::is_floating_point_v<T> || std::is_same_v<T, qfloat16>, bool>
convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true)
{
@@ -294,6 +298,49 @@ convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true)
*value = T(v);
return true;
}
+
+#if defined(__SSE2__) && (defined(Q_CC_GNU) || __has_extension(gnu_asm))
+ // The x86 CVTSD2SH instruction from SSE2 does what we want:
+ // - converts out-of-range doubles to ±infinity and sets #O
+ // - converts underflows to zero and sets #U
+ // We need to clear any previously-stored exceptions from it before the
+ // operation (3-cycle cost) and obtain the new state afterwards (1 cycle).
+
+ unsigned csr = _MM_MASK_MASK; // clear stored exception indicators
+ auto sse_check_result = [&](auto result) {
+ if ((csr & (_MM_EXCEPT_UNDERFLOW | _MM_EXCEPT_OVERFLOW)) == 0)
+ return true;
+ if (csr & _MM_EXCEPT_OVERFLOW)
+ return false;
+
+ // According to IEEE 754[1], #U is also set when the result is tiny and
+ // inexact, but still non-zero, so detect that (this won't generate
+ // good code for types without hardware support).
+ // [1] https://en.wikipedia.org/wiki/Floating-point_arithmetic#Exception_handling
+ return result != 0;
+ };
+
+ // Written directly in assembly because both Clang and GCC have been
+ // observed to reorder the STMXCSR instruction above the conversion
+ // operation. MSVC generates horrid code when using the intrinsics anyway,
+ // so it's not a loss.
+ // See https://github.com/llvm/llvm-project/issues/83661.
+ if constexpr (std::is_same_v<T, float>) {
+# ifdef __AVX__
+ asm ("vldmxcsr %[csr]\n\t"
+ "vcvtsd2ss %[in], %[in], %[out]\n\t"
+ "vstmxcsr %[csr]"
+ : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v));
+# else
+ asm ("ldmxcsr %[csr]\n\t"
+ "cvtsd2ss %[in], %[out]\n\t"
+ "stmxcsr %[csr]"
+ : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v));
+# endif
+ return sse_check_result(*value);
+ }
+#endif // __SSE2__ && inline assembly
+
if (!qt_is_finite(v) && std::numeric_limits<T>::has_infinity) {
// infinity (or NaN)
*value = T(v);