Add check for NaN rounding

thecoop · thecoop · commit 4477bf7fbd49 · 2025-11-28T12:01:09.000Z
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BFloat16.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/BFloat16.java
@@ -23,8 +23,10 @@ public static short floatToBFloat16(float f) {
         // denormal - zero exp, non-zero fraction
         // infinity - all-1 exp, zero fraction
         // NaN - all-1 exp, non-zero fraction
-        // the Float.NaN constant is 0x7fc0_0000, so this won't turn the most common NaN values into
-        // infinities
+
+        // note that floatToIntBits doesn't maintain specific NaN values,
+        // unlike floatToRawIntBits, but instead can return different NaN bit patterns.
+        // this means that a NaN is unlikely to be turned into infinity by rounding
 
         int bits = Float.floatToIntBits(f);
         int bfloat16 = bits >>> 16;
diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/BFloat16Tests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/BFloat16Tests.java
@@ -52,7 +52,12 @@ public void testRoundToEven() {
         assertRounding(construct(0b000000000, 0b0000000_10000000_00000000), 0f);
 
         // rounding the standard NaN value should be unchanged
-        assertThat(Float.floatToIntBits(BFloat16.truncateToBFloat16(Float.NaN)), equalTo(Float.floatToIntBits(Float.NaN)));
+        assertThat(Float.floatToRawIntBits(BFloat16.truncateToBFloat16(Float.NaN)), equalTo(Float.floatToRawIntBits(Float.NaN)));
+
+        // you would expect this to be turned into infinity due to overflow, but instead
+        // it stays a NaN with a different bit pattern due to using floatToIntBits rather than floatToRawIntBits
+        // inside floatToBFloat16
+        assertTrue(Float.isNaN(BFloat16.truncateToBFloat16(construct(0b011111111, 0b0000000_10000000_00000000))));
     }
 
     private static float construct(int exp, int mantissa) {
@@ -71,8 +76,11 @@ private static void assertRounding(float value, float expectedRounded) {
         float rounded = BFloat16.truncateToBFloat16(value);
 
         // System.out.println(value + " rounds to " + rounded);
-        assertEquals(value + " rounded to " + rounded + ", not " + expectedRounded,
-            Float.floatToIntBits(expectedRounded), Float.floatToIntBits(rounded));
+        assertEquals(
+            value + " rounded to " + rounded + ", not " + expectedRounded,
+            Float.floatToIntBits(expectedRounded),
+            Float.floatToIntBits(rounded)
+        );
 
         // there should not be a closer bfloat16 value (comparing using FP math) than the expected rounded value
         float delta = Math.abs(value - rounded);