From f0192396575e836f8bb273b3878410afb3b593f8 Mon Sep 17 00:00:00 2001 From: Jagdish Parihar Date: Tue, 15 Jul 2025 23:55:09 +0530 Subject: [PATCH 1/4] casting f16 to f32 while producing the substrait plan --- datafusion/substrait/src/logical_plan/producer/types.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs index d819c2042c08..93fd0fa3b964 100644 --- a/datafusion/substrait/src/logical_plan/producer/types.rs +++ b/datafusion/substrait/src/logical_plan/producer/types.rs @@ -96,7 +96,13 @@ pub(crate) fn to_substrait_type( nullability, })), }), - // Float16 is not supported in Substrait + // Float16 is not supported in Substrait, cast to Float32 + DataType::Float16 => Ok(substrait::proto::Type { + kind: Some(r#type::Kind::Fp32(r#type::Fp32 { + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, + nullability, + })), + }), DataType::Float32 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Fp32(r#type::Fp32 { type_variation_reference: DEFAULT_TYPE_VARIATION_REF, From a1f60bcac0ec38db14450ecd33130b9afe30c2da Mon Sep 17 00:00:00 2001 From: Jagdish Parihar Date: Tue, 15 Jul 2025 23:59:18 +0530 Subject: [PATCH 2/4] Add support for Float16 type variation in Substrait plan --- .../substrait/src/logical_plan/consumer/types.rs | 10 ++++++++-- .../substrait/src/logical_plan/producer/types.rs | 7 ++++--- datafusion/substrait/src/variation_const.rs | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs index 80300af24ac4..47ee843e3139 100644 --- a/datafusion/substrait/src/logical_plan/consumer/types.rs +++ b/datafusion/substrait/src/logical_plan/consumer/types.rs @@ -24,7 +24,7 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, + FLOAT16_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF, @@ -85,7 +85,13 @@ pub fn from_substrait_type( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), }, - r#type::Kind::Fp32(_) => Ok(DataType::Float32), + r#type::Kind::Fp32(fp32) => match fp32.type_variation_reference { + DEFAULT_TYPE_VARIATION_REF => Ok(DataType::Float32), + FLOAT16_TYPE_VARIATION_REF => Ok(DataType::Float16), + v => not_impl_err!( + "Unsupported Substrait type variation {v} of type {s_kind:?}" + ), + }, r#type::Kind::Fp64(_) => Ok(DataType::Float64), r#type::Kind::Timestamp(ts) => { // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs index 93fd0fa3b964..19e17d4335db 100644 --- a/datafusion/substrait/src/logical_plan/producer/types.rs +++ b/datafusion/substrait/src/logical_plan/producer/types.rs @@ -23,7 +23,7 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, + FLOAT16_TYPE_VARIATION_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, }; @@ -96,10 +96,10 @@ pub(crate) fn to_substrait_type( nullability, })), }), - // Float16 is not supported in Substrait, cast to Float32 + // Float16 is not supported in Substrait, cast to Float32 with special variation DataType::Float16 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Fp32(r#type::Fp32 { - type_variation_reference: DEFAULT_TYPE_VARIATION_REF, + type_variation_reference: FLOAT16_TYPE_VARIATION_REF, nullability, })), }), @@ -381,6 +381,7 @@ mod tests { round_trip_type(DataType::UInt32)?; round_trip_type(DataType::Int64)?; round_trip_type(DataType::UInt64)?; + round_trip_type(DataType::Float16)?; round_trip_type(DataType::Float32)?; round_trip_type(DataType::Float64)?; diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index a967e7d5ae48..5b156a23f4fa 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -38,6 +38,7 @@ /// The "system-preferred" variation (i.e., no variation). pub const DEFAULT_TYPE_VARIATION_REF: u32 = 0; pub const UNSIGNED_INTEGER_TYPE_VARIATION_REF: u32 = 1; +pub const FLOAT16_TYPE_VARIATION_REF: u32 = 2; #[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")] pub const TIMESTAMP_SECOND_TYPE_VARIATION_REF: u32 = 0; From dc6075ae201ec726a9ae2fc3cd76317acbecaf7f Mon Sep 17 00:00:00 2001 From: Jagdish Parihar Date: Wed, 16 Jul 2025 00:03:07 +0530 Subject: [PATCH 3/4] fmt fix --- .../substrait/src/logical_plan/consumer/types.rs | 14 +++++++------- .../substrait/src/logical_plan/producer/types.rs | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs index 47ee843e3139..c694c5269190 100644 --- a/datafusion/substrait/src/logical_plan/consumer/types.rs +++ b/datafusion/substrait/src/logical_plan/consumer/types.rs @@ -24,13 +24,13 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - FLOAT16_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, - INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF, - LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, - TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF, - TIMESTAMP_SECOND_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, - TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, - VIEW_CONTAINER_TYPE_VARIATION_REF, + FLOAT16_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF, + INTERVAL_MONTH_DAY_NANO_TYPE_NAME, INTERVAL_MONTH_DAY_NANO_TYPE_REF, + INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, + TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF, + TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF, + TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, + UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, }; use datafusion::arrow::datatypes::{ DataType, Field, Fields, IntervalUnit, Schema, TimeUnit, diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs index 19e17d4335db..48f2b838332b 100644 --- a/datafusion/substrait/src/logical_plan/producer/types.rs +++ b/datafusion/substrait/src/logical_plan/producer/types.rs @@ -23,9 +23,9 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - FLOAT16_TYPE_VARIATION_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, - TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, - VIEW_CONTAINER_TYPE_VARIATION_REF, + FLOAT16_TYPE_VARIATION_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, + TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, + UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, }; use datafusion::arrow::datatypes::{DataType, IntervalUnit}; use datafusion::common::{internal_err, not_impl_err, plan_err, DFSchemaRef}; From 7b113e65019b7b46776acad629261855e4036332 Mon Sep 17 00:00:00 2001 From: Jagdish Parihar Date: Wed, 16 Jul 2025 21:56:00 +0530 Subject: [PATCH 4/4] Refactor Float16 handling in Substrait: use UserDefined type instead of casting to Float32 --- .../src/logical_plan/consumer/types.rs | 24 ++++++++----------- .../src/logical_plan/producer/types.rs | 14 ++++++----- datafusion/substrait/src/variation_const.rs | 6 ++++- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs index c694c5269190..1e6474c75aff 100644 --- a/datafusion/substrait/src/logical_plan/consumer/types.rs +++ b/datafusion/substrait/src/logical_plan/consumer/types.rs @@ -24,13 +24,13 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - FLOAT16_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF, - INTERVAL_MONTH_DAY_NANO_TYPE_NAME, INTERVAL_MONTH_DAY_NANO_TYPE_REF, - INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, - TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF, - TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF, - TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, - UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, + FLOAT16_TYPE_REF, INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, + INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF, + LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, + TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF, + TIMESTAMP_SECOND_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, + TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, + VIEW_CONTAINER_TYPE_VARIATION_REF, }; use datafusion::arrow::datatypes::{ DataType, Field, Fields, IntervalUnit, Schema, TimeUnit, @@ -85,13 +85,7 @@ pub fn from_substrait_type( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), }, - r#type::Kind::Fp32(fp32) => match fp32.type_variation_reference { - DEFAULT_TYPE_VARIATION_REF => Ok(DataType::Float32), - FLOAT16_TYPE_VARIATION_REF => Ok(DataType::Float16), - v => not_impl_err!( - "Unsupported Substrait type variation {v} of type {s_kind:?}" - ), - }, + r#type::Kind::Fp32(_) => Ok(DataType::Float32), r#type::Kind::Fp64(_) => Ok(DataType::Float64), r#type::Kind::Timestamp(ts) => { // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead @@ -266,6 +260,8 @@ pub fn from_substrait_type( } else { #[allow(deprecated)] match u.type_reference { + // Float16 support via UserDefined type + FLOAT16_TYPE_REF => Ok(DataType::Float16), // Kept for backwards compatibility, producers should use IntervalYear instead INTERVAL_YEAR_MONTH_TYPE_REF => { Ok(DataType::Interval(IntervalUnit::YearMonth)) diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs index 48f2b838332b..963681fc86f5 100644 --- a/datafusion/substrait/src/logical_plan/producer/types.rs +++ b/datafusion/substrait/src/logical_plan/producer/types.rs @@ -23,9 +23,9 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - FLOAT16_TYPE_VARIATION_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, - TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, - UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, + FLOAT16_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, + TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, + VIEW_CONTAINER_TYPE_VARIATION_REF, }; use datafusion::arrow::datatypes::{DataType, IntervalUnit}; use datafusion::common::{internal_err, not_impl_err, plan_err, DFSchemaRef}; @@ -96,11 +96,13 @@ pub(crate) fn to_substrait_type( nullability, })), }), - // Float16 is not supported in Substrait, cast to Float32 with special variation + // Float16 is not supported in Substrait, use UserDefined type DataType::Float16 => Ok(substrait::proto::Type { - kind: Some(r#type::Kind::Fp32(r#type::Fp32 { - type_variation_reference: FLOAT16_TYPE_VARIATION_REF, + kind: Some(r#type::Kind::UserDefined(r#type::UserDefined { + type_reference: FLOAT16_TYPE_REF, + type_variation_reference: 0, nullability, + type_parameters: vec![], })), }), DataType::Float32 => Ok(substrait::proto::Type { diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index 5b156a23f4fa..b9abf67d9f7b 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -38,7 +38,6 @@ /// The "system-preferred" variation (i.e., no variation). pub const DEFAULT_TYPE_VARIATION_REF: u32 = 0; pub const UNSIGNED_INTEGER_TYPE_VARIATION_REF: u32 = 1; -pub const FLOAT16_TYPE_VARIATION_REF: u32 = 2; #[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")] pub const TIMESTAMP_SECOND_TYPE_VARIATION_REF: u32 = 0; @@ -123,3 +122,8 @@ pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3; note = "Use Substrait `IntervalCompund` type instead" )] pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano"; + +/// For [`DataType::Float16`]. +/// +/// [`DataType::Float16`]: datafusion::arrow::datatypes::DataType::Float16 +pub const FLOAT16_TYPE_REF: u32 = 4;