Skip to content

Commit d6f40ce

Browse files
authored
[Variant] Allow lossless casting from integer to floating point (#8357)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Closes #NNN. # Rationale for this change Historically, `Variant::as_fXX` methods don't even try to cast int values as floating point, which is counter-intuitive. # What changes are included in this PR? Allow lossless casting of variant integer values to variant floating point values, by a naive determination of precision: * Every floating point number has some number of bits of precision * 53 (double) * 24 (single) * 11 (half) * Any integer that fits entirely inside the target floating point type's precision can be converted losslessly * This produces an intuitive result: "too big" numbers fail to convert, while "small enough" numbers do convert. * This is a sufficient but _not_ a necessary condition. * Technically, wider integer can be represented losslessly as well, as long as they have enough trailing zeros * It's unclear whether allowing those wider values to cast is actually helpful in practice, because only 1 in 2**k values can cast (where k is the number of bits of excess precision); it would certainly make input testing more expensive. # Are these changes tested? New unit tests and doc tests. # Are there any user-facing changes? Yes. Values that failed to cast before now succeed.
1 parent d74d9ba commit d6f40ce

File tree

2 files changed

+54
-13
lines changed

2 files changed

+54
-13
lines changed

parquet-variant/src/utils.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,20 @@ pub(crate) const fn expect_size_of<T>(expected: usize) {
144144
let _ = [""; 0][size];
145145
}
146146
}
147+
148+
pub(crate) fn fits_precision<const N: u32>(n: impl Into<i64>) -> bool {
149+
n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N)
150+
}
151+
152+
#[cfg(test)]
153+
mod test {
154+
use super::*;
155+
156+
#[test]
157+
fn test_fits_precision() {
158+
assert!(fits_precision::<10>(1023));
159+
assert!(!fits_precision::<10>(1024));
160+
assert!(fits_precision::<10>(-1023));
161+
assert!(!fits_precision::<10>(-1024));
162+
}
163+
}

parquet-variant/src/variant.rs

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use crate::decoder::{
2828
self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
2929
};
3030
use crate::path::{VariantPath, VariantPathElement};
31-
use crate::utils::{first_byte_from_slice, slice_from_slice};
31+
use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice};
3232
use std::ops::Deref;
3333

3434
use arrow_schema::ArrowError;
@@ -1082,8 +1082,8 @@ impl<'m, 'v> Variant<'m, 'v> {
10821082

10831083
/// Converts this variant to an `f16` if possible.
10841084
///
1085-
/// Returns `Some(f16)` for float and double variants,
1086-
/// `None` for non-floating-point variants.
1085+
/// Returns `Some(f16)` for floating point values, and integers with up to 11 bits of
1086+
/// precision. `None` otherwise.
10871087
///
10881088
/// # Example
10891089
///
@@ -1099,21 +1099,29 @@ impl<'m, 'v> Variant<'m, 'v> {
10991099
/// let v2 = Variant::from(std::f64::consts::PI);
11001100
/// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
11011101
///
1102+
/// // and from integers with no more than 11 bits of precision
1103+
/// let v3 = Variant::from(2047);
1104+
/// assert_eq!(v3.as_f16(), Some(f16::from_f32(2047.0)));
1105+
///
11021106
/// // but not from other variants
1103-
/// let v3 = Variant::from("hello!");
1104-
/// assert_eq!(v3.as_f16(), None);
1107+
/// let v4 = Variant::from("hello!");
1108+
/// assert_eq!(v4.as_f16(), None);
11051109
pub fn as_f16(&self) -> Option<f16> {
11061110
match *self {
11071111
Variant::Float(i) => Some(f16::from_f32(i)),
11081112
Variant::Double(i) => Some(f16::from_f64(i)),
1113+
Variant::Int8(i) => Some(i.into()),
1114+
Variant::Int16(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
1115+
Variant::Int32(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
1116+
Variant::Int64(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
11091117
_ => None,
11101118
}
11111119
}
11121120

11131121
/// Converts this variant to an `f32` if possible.
11141122
///
1115-
/// Returns `Some(f32)` for float and double variants,
1116-
/// `None` for non-floating-point variants.
1123+
/// Returns `Some(f32)` for floating point values, and integer values with up to 24 bits of
1124+
/// precision. `None` otherwise.
11171125
///
11181126
/// # Examples
11191127
///
@@ -1128,23 +1136,31 @@ impl<'m, 'v> Variant<'m, 'v> {
11281136
/// let v2 = Variant::from(std::f64::consts::PI);
11291137
/// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
11301138
///
1139+
/// // and from integers with no more than 24 bits of precision
1140+
/// let v3 = Variant::from(16777215i64);
1141+
/// assert_eq!(v3.as_f32(), Some(16777215.0));
1142+
///
11311143
/// // but not from other variants
1132-
/// let v3 = Variant::from("hello!");
1133-
/// assert_eq!(v3.as_f32(), None);
1144+
/// let v4 = Variant::from("hello!");
1145+
/// assert_eq!(v4.as_f32(), None);
11341146
/// ```
11351147
#[allow(clippy::cast_possible_truncation)]
11361148
pub fn as_f32(&self) -> Option<f32> {
11371149
match *self {
11381150
Variant::Float(i) => Some(i),
11391151
Variant::Double(i) => Some(i as f32),
1152+
Variant::Int8(i) => Some(i.into()),
1153+
Variant::Int16(i) => Some(i.into()),
1154+
Variant::Int32(i) if fits_precision::<24>(i) => Some(i as _),
1155+
Variant::Int64(i) if fits_precision::<24>(i) => Some(i as _),
11401156
_ => None,
11411157
}
11421158
}
11431159

11441160
/// Converts this variant to an `f64` if possible.
11451161
///
1146-
/// Returns `Some(f64)` for float and double variants,
1147-
/// `None` for non-floating-point variants.
1162+
/// Returns `Some(f64)` for floating point values, and integer values with up to 53 bits of
1163+
/// precision. `None` otherwise.
11481164
///
11491165
/// # Examples
11501166
///
@@ -1159,14 +1175,22 @@ impl<'m, 'v> Variant<'m, 'v> {
11591175
/// let v2 = Variant::from(std::f64::consts::PI);
11601176
/// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
11611177
///
1178+
/// // and from integers with no more than 53 bits of precision
1179+
/// let v3 = Variant::from(9007199254740991i64);
1180+
/// assert_eq!(v3.as_f64(), Some(9007199254740991.0));
1181+
///
11621182
/// // but not from other variants
1163-
/// let v3 = Variant::from("hello!");
1164-
/// assert_eq!(v3.as_f64(), None);
1183+
/// let v4 = Variant::from("hello!");
1184+
/// assert_eq!(v4.as_f64(), None);
11651185
/// ```
11661186
pub fn as_f64(&self) -> Option<f64> {
11671187
match *self {
11681188
Variant::Float(i) => Some(i.into()),
11691189
Variant::Double(i) => Some(i),
1190+
Variant::Int8(i) => Some(i.into()),
1191+
Variant::Int16(i) => Some(i.into()),
1192+
Variant::Int32(i) => Some(i.into()),
1193+
Variant::Int64(i) if fits_precision::<53>(i) => Some(i as _),
11701194
_ => None,
11711195
}
11721196
}

0 commit comments

Comments
 (0)