[Variant] Allow lossless casting from integer to floating point (#8357)

scovich · web-flow · commit d6f40ce62b82 · 2025-09-17T11:24:52.000-04:00
# Which issue does this PR close?

We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax.

- Closes #NNN.

# Rationale for this change

Historically, `Variant::as_fXX` methods don't even try to cast int
values as floating point, which is counter-intuitive.

# What changes are included in this PR?

Allow lossless casting of variant integer values to variant floating
point values, by a naive determination of precision:
* Every floating point number has some number of bits of precision
   * 53 (double)
   * 24 (single)
   * 11 (half)
* Any integer that fits entirely inside the target floating point type's
precision can be converted losslessly
* This produces an intuitive result: "too big" numbers fail to convert,
while "small enough" numbers do convert.
   * This is a sufficient but _not_ a necessary condition.
* Technically, wider integer can be represented losslessly as well, as
long as they have enough trailing zeros
* It's unclear whether allowing those wider values to cast is actually
helpful in practice, because only 1 in 2**k values can cast (where k is
the number of bits of excess precision); it would certainly make input
testing more expensive.

# Are these changes tested?

New unit tests and doc tests.

# Are there any user-facing changes?

Yes. Values that failed to cast before now succeed.
diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs
@@ -144,3 +144,20 @@ pub(crate) const fn expect_size_of<T>(expected: usize) {
         let _ = [""; 0][size];
     }
 }
+
+pub(crate) fn fits_precision<const N: u32>(n: impl Into<i64>) -> bool {
+    n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_fits_precision() {
+        assert!(fits_precision::<10>(1023));
+        assert!(!fits_precision::<10>(1024));
+        assert!(fits_precision::<10>(-1023));
+        assert!(!fits_precision::<10>(-1024));
+    }
+}
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
@@ -28,7 +28,7 @@ use crate::decoder::{
     self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
 };
 use crate::path::{VariantPath, VariantPathElement};
-use crate::utils::{first_byte_from_slice, slice_from_slice};
+use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice};
 use std::ops::Deref;
 
 use arrow_schema::ArrowError;
@@ -1082,8 +1082,8 @@ impl<'m, 'v> Variant<'m, 'v> {
 
     /// Converts this variant to an `f16` if possible.
     ///
-    /// Returns `Some(f16)` for float and double variants,
-    /// `None` for non-floating-point variants.
+    /// Returns `Some(f16)` for floating point values, and integers with up to 11 bits of
+    /// precision. `None` otherwise.
     ///
     /// # Example
     ///
@@ -1099,21 +1099,29 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v2 = Variant::from(std::f64::consts::PI);
     /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
     ///
+    /// // and from integers with no more than 11 bits of precision
+    /// let v3 = Variant::from(2047);
+    /// assert_eq!(v3.as_f16(), Some(f16::from_f32(2047.0)));
+    ///
     /// // but not from other variants
-    /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_f16(), None);
+    /// let v4 = Variant::from("hello!");
+    /// assert_eq!(v4.as_f16(), None);
     pub fn as_f16(&self) -> Option<f16> {
         match *self {
             Variant::Float(i) => Some(f16::from_f32(i)),
             Variant::Double(i) => Some(f16::from_f64(i)),
+            Variant::Int8(i) => Some(i.into()),
+            Variant::Int16(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
+            Variant::Int32(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
+            Variant::Int64(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
             _ => None,
         }
     }
 
     /// Converts this variant to an `f32` if possible.
     ///
-    /// Returns `Some(f32)` for float and double variants,
-    /// `None` for non-floating-point variants.
+    /// Returns `Some(f32)` for floating point values, and integer values with up to 24 bits of
+    /// precision.  `None` otherwise.
     ///
     /// # Examples
     ///
@@ -1128,23 +1136,31 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v2 = Variant::from(std::f64::consts::PI);
     /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
     ///
+    /// // and from integers with no more than 24 bits of precision
+    /// let v3 = Variant::from(16777215i64);
+    /// assert_eq!(v3.as_f32(), Some(16777215.0));
+    ///
     /// // but not from other variants
-    /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_f32(), None);
+    /// let v4 = Variant::from("hello!");
+    /// assert_eq!(v4.as_f32(), None);
     /// ```
     #[allow(clippy::cast_possible_truncation)]
     pub fn as_f32(&self) -> Option<f32> {
         match *self {
             Variant::Float(i) => Some(i),
             Variant::Double(i) => Some(i as f32),
+            Variant::Int8(i) => Some(i.into()),
+            Variant::Int16(i) => Some(i.into()),
+            Variant::Int32(i) if fits_precision::<24>(i) => Some(i as _),
+            Variant::Int64(i) if fits_precision::<24>(i) => Some(i as _),
             _ => None,
         }
     }
 
     /// Converts this variant to an `f64` if possible.
     ///
-    /// Returns `Some(f64)` for float and double variants,
-    /// `None` for non-floating-point variants.
+    /// Returns `Some(f64)` for floating point values, and integer values with up to 53 bits of
+    /// precision.  `None` otherwise.
     ///
     /// # Examples
     ///
@@ -1159,14 +1175,22 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v2 = Variant::from(std::f64::consts::PI);
     /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
     ///
+    /// // and from integers with no more than 53 bits of precision
+    /// let v3 = Variant::from(9007199254740991i64);
+    /// assert_eq!(v3.as_f64(), Some(9007199254740991.0));
+    ///
     /// // but not from other variants
-    /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_f64(), None);
+    /// let v4 = Variant::from("hello!");
+    /// assert_eq!(v4.as_f64(), None);
     /// ```
     pub fn as_f64(&self) -> Option<f64> {
         match *self {
             Variant::Float(i) => Some(i.into()),
             Variant::Double(i) => Some(i),
+            Variant::Int8(i) => Some(i.into()),
+            Variant::Int16(i) => Some(i.into()),
+            Variant::Int32(i) => Some(i.into()),
+            Variant::Int64(i) if fits_precision::<53>(i) => Some(i as _),
             _ => None,
         }
     }