Skip to content

Commit 720d4f1

Browse files
fix: null cast not valid in substrait round trip (#18414)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #16272. ## Rationale for this change - Null with null type were throwing invalid casts in substrait round trips <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? - Added a null variation const that allows NULL with NULL types to be casted properly. - Made this a UserDefined type from the substrait side of things <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? - Yes added unit test in producer/types.rs - Previously failing tests pass <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
1 parent b1cdfc6 commit 720d4f1

File tree

3 files changed

+30
-7
lines changed

3 files changed

+30
-7
lines changed

datafusion/substrait/src/logical_plan/consumer/types.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
use super::utils::{from_substrait_precision, next_struct_field_name, DEFAULT_TIMEZONE};
1919
use super::SubstraitConsumer;
20-
use crate::variation_const::FLOAT_16_TYPE_NAME;
2120
#[allow(deprecated)]
2221
use crate::variation_const::{
2322
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
@@ -33,6 +32,7 @@ use crate::variation_const::{
3332
TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
3433
VIEW_CONTAINER_TYPE_VARIATION_REF,
3534
};
35+
use crate::variation_const::{FLOAT_16_TYPE_NAME, NULL_TYPE_NAME};
3636
use datafusion::arrow::datatypes::{
3737
DataType, Field, Fields, IntervalUnit, Schema, TimeUnit,
3838
};
@@ -253,6 +253,7 @@ pub fn from_substrait_type(
253253
// Kept for backwards compatibility, producers should use IntervalCompound instead
254254
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
255255
FLOAT_16_TYPE_NAME => Ok(DataType::Float16),
256+
NULL_TYPE_NAME => Ok(DataType::Null),
256257
_ => not_impl_err!(
257258
"Unsupported Substrait user defined type with ref {} and variation {}",
258259
u.type_reference,

datafusion/substrait/src/logical_plan/producer/types.rs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ use crate::variation_const::{
2323
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
2424
DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
2525
DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
26-
FLOAT_16_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
27-
TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
28-
VIEW_CONTAINER_TYPE_VARIATION_REF,
26+
FLOAT_16_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, NULL_TYPE_NAME,
27+
TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF,
28+
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
2929
};
3030
use datafusion::arrow::datatypes::{DataType, IntervalUnit};
31-
use datafusion::common::{internal_err, not_impl_err, plan_err, DFSchemaRef};
31+
use datafusion::common::{not_impl_err, plan_err, DFSchemaRef};
3232
use substrait::proto::{r#type, NamedStruct};
3333

3434
pub(crate) fn to_substrait_type(
@@ -42,7 +42,17 @@ pub(crate) fn to_substrait_type(
4242
r#type::Nullability::Required as i32
4343
};
4444
match dt {
45-
DataType::Null => internal_err!("Null cast is not valid"),
45+
DataType::Null => {
46+
let type_anchor = producer.register_type(NULL_TYPE_NAME.to_string());
47+
Ok(substrait::proto::Type {
48+
kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
49+
type_reference: type_anchor,
50+
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
51+
nullability,
52+
type_parameters: vec![],
53+
})),
54+
})
55+
}
4656
DataType::Boolean => Ok(substrait::proto::Type {
4757
kind: Some(r#type::Kind::Bool(r#type::Boolean {
4858
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
@@ -377,6 +387,7 @@ mod tests {
377387
use crate::logical_plan::consumer::tests::test_consumer;
378388
use crate::logical_plan::consumer::{
379389
from_substrait_named_struct, from_substrait_type_without_names,
390+
DefaultSubstraitConsumer,
380391
};
381392
use crate::logical_plan::producer::DefaultSubstraitProducer;
382393
use datafusion::arrow::datatypes::{Field, Fields, Schema, TimeUnit};
@@ -386,6 +397,7 @@ mod tests {
386397

387398
#[test]
388399
fn round_trip_types() -> Result<()> {
400+
round_trip_type(DataType::Null)?;
389401
round_trip_type(DataType::Boolean)?;
390402
round_trip_type(DataType::Int8)?;
391403
round_trip_type(DataType::UInt8)?;
@@ -474,7 +486,12 @@ mod tests {
474486
// As DataFusion doesn't consider nullability as a property of the type, but field,
475487
// it doesn't matter if we set nullability to true or false here.
476488
let substrait = to_substrait_type(&mut producer, &dt, true)?;
477-
let consumer = test_consumer();
489+
490+
// Get the extensions from the producer so the consumer can look up
491+
// any registered user-defined types (like "null" or "f16")
492+
let extensions = producer.get_extensions();
493+
let consumer = DefaultSubstraitConsumer::new(&extensions, &state);
494+
478495
let roundtrip_dt = from_substrait_type_without_names(&consumer, &substrait)?;
479496
assert_eq!(dt, roundtrip_dt);
480497
Ok(())

datafusion/substrait/src/variation_const.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,8 @@ pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";
125125

126126
/// Defined in <https://github.com/apache/arrow/blame/main/format/substrait/extension_types.yaml>
127127
pub const FLOAT_16_TYPE_NAME: &str = "fp16";
128+
129+
/// For [`DataType::Null`]
130+
///
131+
/// [`DataType::Null`]: datafusion::arrow::datatypes::DataType::Null
132+
pub const NULL_TYPE_NAME: &str = "null";

0 commit comments

Comments
 (0)