// Copyright (C) 2025 Intel Corporation. // Copyright (C) 2020 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only // Qt-Security score:significant reason:default #ifndef Q20BIT_H #define Q20BIT_H #include #if defined(__cpp_lib_bitops) || defined(__cpp_lib_int_pow2) # include #else # include # include # ifdef Q_CC_MSVC // avoiding qsimd.h -> immintrin.h unless necessary, because it increases // compilation time # include # include # endif #endif // // W A R N I N G // ------------- // // This file is not part of the Qt API. Types and functions defined in this // file can reliably be replaced by their std counterparts, once available. // You may use these definitions in your own code, but be aware that we // will remove them once Qt depends on the C++ version that supports // them in namespace std. There will be NO deprecation warning, the // definitions will JUST go away. // // If you can't agree to these terms, don't use these definitions! // // We mean it. // QT_BEGIN_NAMESPACE namespace q20 { #if defined(__cpp_lib_bitops) using std::countl_zero; using std::countr_zero; using std::popcount; using std::rotl; using std::rotr; #else namespace detail { template /*non-constexpr*/ inline auto hw_popcount(T v) noexcept { #if defined(Q_CC_MSVC) && defined(Q_PROCESSOR_X86) && defined(__POPCNT__) // Note: __POPCNT__ comes from qsimd.h, not the compiler. # ifdef Q_PROCESSOR_X86_64 if constexpr (sizeof(T) == sizeof(quint64)) return int(__popcnt64(v)); # endif if constexpr (sizeof(T) == sizeof(quint64)) return int(__popcnt(quint32(v)) + __popcnt(quint32(v >> 32))); if constexpr (sizeof(T) == sizeof(quint32)) return int(__popcnt(v)); return int(__popcnt16(v)); #else Q_UNUSED(v); #endif } template /*non-constexpr*/ inline auto hw_countl_zero(T v) noexcept { #if defined(Q_CC_MSVC) && defined(Q_PROCESSOR_X86) && defined(__LZCNT__) // Note: __LZCNT__ comes from qsimd.h, not the compiler # if defined(Q_PROCESSOR_X86_64) if constexpr (sizeof(T) == sizeof(quint64)) return int(__lzcnt64(v)); # endif if constexpr (sizeof(T) == sizeof(quint32)) return int(__lzcnt(v)); if constexpr (sizeof(T) == sizeof(quint16)) return int(__lzcnt16(v)); if constexpr (sizeof(T) == sizeof(quint8)) return int(__lzcnt(v)) - 24; #endif #if defined(Q_CC_MSVC) && defined(Q_PROCESSOR_X86) constexpr int Digits = std::numeric_limits::digits; unsigned long result; if constexpr (sizeof(T) == sizeof(quint64)) { # ifdef Q_PROCESSOR_X86_64 if (_BitScanReverse64(&result, v) == 0) return Digits; # else if (quint32 h = quint32(v >> 32)) return hw_countl_zero(h); return hw_countl_zero(quint32(v)) + 32; # endif } else { if (_BitScanReverse(&result, v) == 0) return Digits; } // Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31 // and the lsb index is 0. The result for the index when counting up: msb index is 0 (because it // starts there), and the lsb index is 31. result ^= sizeof(T) * 8 - 1; return int(result); #else Q_UNUSED(v); #endif } template /*non-constexpr*/ inline auto hw_countr_zero(T v) noexcept { #if defined(Q_CC_MSVC) && defined(Q_PROCESSOR_X86) && defined(__BMI__) // Note: __BMI__ comes from qsimd.h, not the compiler # if defined(Q_PROCESSOR_X86_64) if constexpr (sizeof(T) == sizeof(quint64)) return int(_tzcnt_u64(v)); # endif if constexpr (sizeof(T) == sizeof(quint32)) return int(_tzcnt_u32(v)); // No _tzcnt_u16 or u8 intrinsics #endif #if defined(Q_CC_MSVC) && defined(Q_PROCESSOR_X86) constexpr int Digits = std::numeric_limits::digits; unsigned long result; if constexpr (sizeof(T) <= sizeof(quint32)) return _BitScanForward(&result, v) ? int(result) : Digits; # ifdef Q_PROCESSOR_X86_64 return _BitScanForward64(&result, v) ? int(result) : Digits; # endif #else Q_UNUSED(v); #endif } } // namespace q20::detail template constexpr std::enable_if_t, int> popcount(T v) noexcept { #if __has_builtin(__builtin_popcountg) return __builtin_popcountg(v); #endif if constexpr (sizeof(T) > sizeof(quint64)) { static_assert(sizeof(T) == 16, "Unsupported integer size"); return popcount(quint64(v)) + popcount(quint64(v >> 64)); } # if __has_builtin(__builtin_popcount) // These GCC/Clang intrinsics are constexpr and use the HW instructions // where available. Note: no runtime detection. if constexpr (sizeof(T) > sizeof(quint32)) return __builtin_popcountll(v); return __builtin_popcount(v); # endif # ifdef QT_SUPPORTS_IS_CONSTANT_EVALUATED // Try hardware functions if not constexpr. Note: no runtime detection. if (!is_constant_evaluated()) { if constexpr (std::is_integral_v) return detail::hw_popcount(v); } # endif constexpr int Digits = std::numeric_limits::digits; int r = (((v ) & 0xfff) * Q_UINT64_C(0x1001001001001) & Q_UINT64_C(0x84210842108421)) % 0x1f; if constexpr (Digits > 12) r += (((v >> 12) & 0xfff) * Q_UINT64_C(0x1001001001001) & Q_UINT64_C(0x84210842108421)) % 0x1f; if constexpr (Digits > 24) r += (((v >> 24) & 0xfff) * Q_UINT64_C(0x1001001001001) & Q_UINT64_C(0x84210842108421)) % 0x1f; if constexpr (Digits > 36) { r += (((v >> 36) & 0xfff) * Q_UINT64_C(0x1001001001001) & Q_UINT64_C(0x84210842108421)) % 0x1f + (((v >> 48) & 0xfff) * Q_UINT64_C(0x1001001001001) & Q_UINT64_C(0x84210842108421)) % 0x1f + (((v >> 60) & 0xfff) * Q_UINT64_C(0x1001001001001) & Q_UINT64_C(0x84210842108421)) % 0x1f; } return r; } template constexpr std::enable_if_t, int> countl_zero(T v) noexcept { #if __has_builtin(__builtin_clzg) // "If two arguments are specified, and first argument is 0, the result is // the second argument." return __builtin_clzg(v, std::numeric_limits::digits); #endif if constexpr (sizeof(T) > sizeof(quint64)) { static_assert(sizeof(T) == 16, "Unsupported integer size"); if (quint64 h = quint64(v >> 64)) return countl_zero(h); return countl_zero(quint64(v)) + 64; } #if __has_builtin(__builtin_clz) // These GCC/Clang intrinsics are constexpr and use the HW instructions // where available. if (!v) return std::numeric_limits::digits; if constexpr (sizeof(T) == sizeof(quint64)) return __builtin_clzll(v); # if __has_builtin(__builtin_clzs) if constexpr (sizeof(T) == sizeof(quint16)) return __builtin_clzs(v); # endif return __builtin_clz(v) - (32 - std::numeric_limits::digits); #endif #ifdef QT_SUPPORTS_IS_CONSTANT_EVALUATED // Try hardware functions if not constexpr. Note: no runtime detection. if (!is_constant_evaluated()) { if constexpr (std::is_integral_v) return detail::hw_countl_zero(v); } #endif // Hacker's Delight, 2nd ed. Fig 5-16, p. 102 v = v | (v >> 1); v = v | (v >> 2); v = v | (v >> 4); if constexpr (sizeof(T) > sizeof(quint8)) v = v | (v >> 8); if constexpr (sizeof(T) > sizeof(quint16)) v = v | (v >> 16); if constexpr (sizeof(T) > sizeof(quint32)) v = v | (v >> 32); return popcount(T(~v)); } template constexpr std::enable_if_t, int> countr_zero(T v) noexcept { #if __has_builtin(__builtin_ctzg) // "If two arguments are specified, and first argument is 0, the result is // the second argument." return __builtin_ctzg(v, std::numeric_limits::digits); #endif if constexpr (sizeof(T) > sizeof(quint64)) { static_assert(sizeof(T) == 16, "Unsupported integer size"); quint64 l = quint64(v); return l ? countr_zero(l) : 64 + countr_zero(quint64(v >> 64)); } #if __has_builtin(__builtin_ctz) // These GCC/Clang intrinsics are constexpr and use the HW instructions // where available. if (!v) return std::numeric_limits::digits; if constexpr (sizeof(T) == sizeof(quint64)) return __builtin_ctzll(v); # if __has_builtin(__builtin_ctzs) if constexpr (sizeof(T) == sizeof(quint16)) return __builtin_ctzs(v); # endif return __builtin_ctz(v); #endif #ifdef QT_SUPPORTS_IS_CONSTANT_EVALUATED // Try hardware functions if not constexpr. Note: no runtime detection. if (!is_constant_evaluated()) { if constexpr (std::is_integral_v) return detail::hw_countr_zero(v); } #endif if constexpr (sizeof(T) > sizeof(quint32)) { quint32 l = quint32(v); return l ? countr_zero(l) : 32 + countr_zero(quint32(v >> 32)); } // see http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel int c = std::numeric_limits::digits; // c will be the number of zero bits on the right QT_WARNING_PUSH QT_WARNING_DISABLE_MSVC(4146) // unary minus operator applied to unsigned type, result still unsigned v &= T(-v); QT_WARNING_POP if (v) c--; if constexpr (sizeof(T) == sizeof(quint32)) { if (v & 0x0000FFFF) c -= 16; if (v & 0x00FF00FF) c -= 8; if (v & 0x0F0F0F0F) c -= 4; if (v & 0x33333333) c -= 2; if (v & 0x55555555) c -= 1; } else if constexpr (sizeof(T) == sizeof(quint16)) { if (v & 0x000000FF) c -= 8; if (v & 0x00000F0F) c -= 4; if (v & 0x00003333) c -= 2; if (v & 0x00005555) c -= 1; } else /*if constexpr (sizeof(T) == sizeof(quint8))*/ { if (v & 0x0000000F) c -= 4; if (v & 0x00000033) c -= 2; if (v & 0x00000055) c -= 1; } return c; } template constexpr std::enable_if_t, T> rotl(T v, int s) noexcept { constexpr int Digits = std::numeric_limits::digits; unsigned n = unsigned(s) % Digits; return (v << n) | (v >> (Digits - n)); } template constexpr std::enable_if_t, T> rotr(T v, int s) noexcept { constexpr int Digits = std::numeric_limits::digits; unsigned n = unsigned(s) % Digits; return (v >> n) | (v << (Digits - n)); } #endif // __cpp_lib_bitops #if defined(__cpp_lib_int_pow2) using std::bit_ceil; using std::bit_floor; using std::bit_width; #else template constexpr std::enable_if_t, T> bit_ceil(T v) noexcept { // Difference from standard: we do not enforce UB constexpr int Digits = std::numeric_limits::digits; if (v <= 1) return 1; return T(1) << (Digits - countl_zero(T(v - 1))); } template constexpr std::enable_if_t, T> bit_width(T v) noexcept { return std::numeric_limits::digits - countl_zero(v); } template constexpr std::enable_if_t, T> bit_floor(T v) noexcept { return v ? T(1) << (bit_width(v) - 1) : 0; } #endif // __cpp_lib_int_pow2 } // namespace q20 QT_END_NAMESPACE #endif // Q20BIT_H