From 26b6f24326d82852e2a52d94d765fc6b8b72841d Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Mon, 14 Jul 2025 21:01:38 -0700 Subject: [PATCH 1/9] Implement next_day --- Cargo.lock | 1 + datafusion/spark/Cargo.toml | 1 + datafusion/spark/src/function/datetime/mod.rs | 17 +- .../spark/src/function/datetime/next_day.rs | 243 ++++++++++++++++++ .../test_files/spark/datetime/next_day.slt | 16 +- 5 files changed, 274 insertions(+), 4 deletions(-) create mode 100644 datafusion/spark/src/function/datetime/next_day.rs diff --git a/Cargo.lock b/Cargo.lock index cb13a55b56da..54cd95f9d641 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2606,6 +2606,7 @@ name = "datafusion-spark" version = "48.0.0" dependencies = [ "arrow", + "chrono", "datafusion-catalog", "datafusion-common", "datafusion-execution", diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml index 2c46cac6b7b0..c3b6c95283c0 100644 --- a/datafusion/spark/Cargo.toml +++ b/datafusion/spark/Cargo.toml @@ -37,6 +37,7 @@ name = "datafusion_spark" [dependencies] arrow = { workspace = true } +chrono.workspace = true datafusion-catalog = { workspace = true } datafusion-common = { workspace = true } datafusion-execution = { workspace = true } diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index a87df9a2c87a..1a9d63750113 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -15,11 +15,24 @@ // specific language governing permissions and limitations // under the License. +pub mod next_day; + use datafusion_expr::ScalarUDF; +use datafusion_functions::make_udf_function; use std::sync::Arc; -pub mod expr_fn {} +make_udf_function!(next_day::SparkNextDay, next_day); + +pub mod expr_fn { + use datafusion_functions::export_functions; + + export_functions!(( + next_day, + "Returns the first date which is later than start_date and named as indicated. The function returns NULL if at least one of the input parameters is NULL. When both of the input parameters are not NULL and day_of_week is an invalid input, the function throws SparkIllegalArgumentException if spark.sql.ansi.enabled is set to true, otherwise NULL.", + arg1 arg2 + )); +} pub fn functions() -> Vec> { - vec![] + vec![next_day()] } diff --git a/datafusion/spark/src/function/datetime/next_day.rs b/datafusion/spark/src/function/datetime/next_day.rs new file mode 100644 index 000000000000..fd1356a03eb7 --- /dev/null +++ b/datafusion/spark/src/function/datetime/next_day.rs @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use chrono::{Datelike, Duration, Weekday}; +use arrow::array::{new_null_array, ArrayRef, AsArray, Date32Array, StringArrayType}; +use arrow::datatypes::{DataType, Date32Type}; +use datafusion_common::types::NativeType; +use datafusion_common::{exec_err, plan_err, Result, ScalarValue}; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; + +/// +#[derive(Debug)] +pub struct SparkNextDay { + signature: Signature, +} + +impl Default for SparkNextDay { + fn default() -> Self { + Self::new() + } +} + +impl SparkNextDay { + pub fn new() -> Self { + Self { + signature: Signature::user_defined(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for SparkNextDay { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "next_day" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Date32) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let ScalarFunctionArgs { args, .. } = args; + let [date, day_of_week] = args.as_slice() else { + return exec_err!( + "Spark `next_day` function requires 2 arguments, got {}", + args.len() + ); + }; + + match (date, day_of_week) { + (ColumnarValue::Scalar(date), ColumnarValue::Scalar(day_of_week)) => { + match (date, day_of_week) { + (ScalarValue::Date32(days), ScalarValue::Utf8(day_of_week) | ScalarValue::LargeUtf8(day_of_week) | ScalarValue::Utf8View(day_of_week)) => { + if let Some(days) = days { + if let Some(day_of_week) = day_of_week { + Ok(ColumnarValue::Scalar(ScalarValue::Date32( + spark_next_day(*days, day_of_week.as_str()), + ))) + } else { + // TODO: if spark.sql.ansi.enabled is false, + // returns NULL instead of an error for a malformed dayOfWeek. + Ok(ColumnarValue::Scalar(ScalarValue::Date32(None))) + } + } else { + Ok(ColumnarValue::Scalar(ScalarValue::Date32(None))) + } + } + _ => exec_err!("Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"), + } + } + (ColumnarValue::Array(date_array), ColumnarValue::Scalar(day_of_week)) => { + match (date_array.data_type(), day_of_week) { + (DataType::Date32, ScalarValue::Utf8(day_of_week) | ScalarValue::LargeUtf8(day_of_week) | ScalarValue::Utf8View(day_of_week)) => { + if let Some(day_of_week) = day_of_week { + let result: Date32Array = date_array + .as_primitive::() + .unary_opt(|days| spark_next_day(days, day_of_week.as_str())) + .with_data_type(DataType::Date32); + Ok(ColumnarValue::Array(Arc::new(result) as ArrayRef)) + } else { + // TODO: if spark.sql.ansi.enabled is false, + // returns NULL instead of an error for a malformed dayOfWeek. + Ok(ColumnarValue::Array(Arc::new(new_null_array(&DataType::Date32, date_array.len())))) + } + } + _ => exec_err!("Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"), + } + } + (ColumnarValue::Array(date_array), ColumnarValue::Array(day_of_week_array)) => { + let result = match (date_array.data_type(), day_of_week_array.data_type()) { + ( + DataType::Date32, + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View, + ) => { + let date_array: &Date32Array = date_array.as_primitive::(); + match day_of_week_array.data_type() { + DataType::Utf8 => { + let day_of_week_array = day_of_week_array.as_string::(); + process_next_day_arrays(date_array, day_of_week_array) + } + DataType::LargeUtf8 => { + let day_of_week_array = day_of_week_array.as_string::(); + process_next_day_arrays(date_array, day_of_week_array) + } + DataType::Utf8View => { + let day_of_week_array = day_of_week_array.as_string_view(); + process_next_day_arrays(date_array, day_of_week_array) + } + other => { + exec_err!("Spark `next_day` function: second arg must be string. Got {other:?}") + } + } + } + (left, right) => { + exec_err!( + "Spark `next_day` function: first arg must be date, second arg must be string. Got {left:?}, {right:?}" + ) + } + }?; + Ok(ColumnarValue::Array(result)) + } + _ => exec_err!("Unsupported args {args:?} for Spark function `next_day`"), + } + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + if arg_types.len() != 2 { + return exec_err!( + "Spark `next_day` function requires 2 arguments, got {}", + arg_types.len() + ); + } + + let current_native_type: NativeType = (&arg_types[0]).into(); + if matches!(current_native_type, NativeType::Date) + || matches!(current_native_type, NativeType::String) + || matches!(current_native_type, NativeType::Null) + { + if matches!(&arg_types[1], DataType::Utf8) + || matches!(&arg_types[1], DataType::LargeUtf8) + || matches!(&arg_types[1], DataType::Utf8View) + { + Ok(vec![DataType::Date32, arg_types[1].clone()]) + } else { + plan_err!( + "The second argument of the Spark `next_day` function must be a string, but got {}", + &arg_types[1] + ) + } + } else { + plan_err!( + "The first argument of the Spark `next_day` function can only be a date or string, but got {}", &arg_types[0] + ) + } + } +} + +fn process_next_day_arrays<'a, S>( + date_array: &Date32Array, + day_of_week_array: &'a S, +) -> Result +where + &'a S: StringArrayType<'a>, +{ + let result = date_array + .iter() + .zip(day_of_week_array.iter()) + .map(|(days, day_of_week)| { + if let Some(days) = days { + if let Some(day_of_week) = day_of_week { + spark_next_day(days, day_of_week) + } else { + // TODO: if spark.sql.ansi.enabled is false, + // returns NULL instead of an error for a malformed dayOfWeek. + None + } + } else { + None + } + }) + .collect::(); + Ok(Arc::new(result) as ArrayRef) +} + +fn spark_next_day(days: i32, day_of_week: &str) -> Option { + let date = Date32Type::to_naive_date(days); + + let day_of_week = day_of_week.trim().to_uppercase(); + let day_of_week = match day_of_week.as_str() { + "MO" | "MON" | "MONDAY" => Some("MONDAY"), + "TU" | "TUE" | "TUESDAY" => Some("TUESDAY"), + "WE" | "WED" | "WEDNESDAY" => Some("WEDNESDAY"), + "TH" | "THU" | "THURSDAY" => Some("THURSDAY"), + "FR" | "FRI" | "FRIDAY" => Some("FRIDAY"), + "SA" | "SAT" | "SATURDAY" => Some("SATURDAY"), + "SU" | "SUN" | "SUNDAY" => Some("SUNDAY"), + _ => { + // TODO: if spark.sql.ansi.enabled is false, + // returns NULL instead of an error for a malformed dayOfWeek. + None + } + }; + + if let Some(day_of_week) = day_of_week { + let day_of_week = day_of_week.parse::(); + match day_of_week { + Ok(day_of_week) => Some(Date32Type::from_naive_date( + date + Duration::days((7 - date.weekday().days_since(day_of_week)) as i64), + )), + Err(_) => { + // TODO: if spark.sql.ansi.enabled is false, + // returns NULL instead of an error for a malformed dayOfWeek. + None + } + } + } else { + None + } +} \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt b/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt index ffc7040f47bd..7bb178d5d54b 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt @@ -23,5 +23,17 @@ ## Original Query: SELECT next_day('2015-01-14', 'TU'); ## PySpark 3.5.5 Result: {'next_day(2015-01-14, TU)': datetime.date(2015, 1, 20), 'typeof(next_day(2015-01-14, TU))': 'date', 'typeof(2015-01-14)': 'string', 'typeof(TU)': 'string'} -#query -#SELECT next_day('2015-01-14'::string, 'TU'::string); +query D +SELECT next_day('2015-01-14'::string, 'TU'::string); +---- +2015-01-20 + +query D +SELECT next_day('2015-07-27'::string, 'Sun'::string); +---- +2015-08-02 + +query D +SELECT next_day('2015-07-27'::string, 'Sat'::string); +---- +2015-08-01 From 1314e81b5e00bad0fd0150ff473d582186ac55bd Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Mon, 14 Jul 2025 21:07:35 -0700 Subject: [PATCH 2/9] cargo fmt --- .../spark/src/function/datetime/next_day.rs | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/datafusion/spark/src/function/datetime/next_day.rs b/datafusion/spark/src/function/datetime/next_day.rs index fd1356a03eb7..a2416291247d 100644 --- a/datafusion/spark/src/function/datetime/next_day.rs +++ b/datafusion/spark/src/function/datetime/next_day.rs @@ -18,12 +18,14 @@ use std::any::Any; use std::sync::Arc; -use chrono::{Datelike, Duration, Weekday}; use arrow::array::{new_null_array, ArrayRef, AsArray, Date32Array, StringArrayType}; use arrow::datatypes::{DataType, Date32Type}; +use chrono::{Datelike, Duration, Weekday}; use datafusion_common::types::NativeType; use datafusion_common::{exec_err, plan_err, Result, ScalarValue}; -use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +}; /// #[derive(Debug)] @@ -110,24 +112,32 @@ impl ScalarUDFImpl for SparkNextDay { _ => exec_err!("Spark `next_day` function: first arg must be date, second arg must be string. Got {args:?}"), } } - (ColumnarValue::Array(date_array), ColumnarValue::Array(day_of_week_array)) => { - let result = match (date_array.data_type(), day_of_week_array.data_type()) { + ( + ColumnarValue::Array(date_array), + ColumnarValue::Array(day_of_week_array), + ) => { + let result = match (date_array.data_type(), day_of_week_array.data_type()) + { ( DataType::Date32, DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View, ) => { - let date_array: &Date32Array = date_array.as_primitive::(); + let date_array: &Date32Array = + date_array.as_primitive::(); match day_of_week_array.data_type() { DataType::Utf8 => { - let day_of_week_array = day_of_week_array.as_string::(); + let day_of_week_array = + day_of_week_array.as_string::(); process_next_day_arrays(date_array, day_of_week_array) } DataType::LargeUtf8 => { - let day_of_week_array = day_of_week_array.as_string::(); + let day_of_week_array = + day_of_week_array.as_string::(); process_next_day_arrays(date_array, day_of_week_array) } DataType::Utf8View => { - let day_of_week_array = day_of_week_array.as_string_view(); + let day_of_week_array = + day_of_week_array.as_string_view(); process_next_day_arrays(date_array, day_of_week_array) } other => { @@ -229,7 +239,9 @@ fn spark_next_day(days: i32, day_of_week: &str) -> Option { let day_of_week = day_of_week.parse::(); match day_of_week { Ok(day_of_week) => Some(Date32Type::from_naive_date( - date + Duration::days((7 - date.weekday().days_since(day_of_week)) as i64), + date + Duration::days( + (7 - date.weekday().days_since(day_of_week)) as i64, + ), )), Err(_) => { // TODO: if spark.sql.ansi.enabled is false, @@ -240,4 +252,4 @@ fn spark_next_day(days: i32, day_of_week: &str) -> Option { } else { None } -} \ No newline at end of file +} From 0c2b126b261eef7dbc1a41eebc2038d7de51d7f0 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 15 Jul 2025 22:09:45 -0700 Subject: [PATCH 3/9] Update datafusion/spark/Cargo.toml Co-authored-by: Bruce Ritchie --- datafusion/spark/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml index c3b6c95283c0..a9f0a020c102 100644 --- a/datafusion/spark/Cargo.toml +++ b/datafusion/spark/Cargo.toml @@ -37,7 +37,7 @@ name = "datafusion_spark" [dependencies] arrow = { workspace = true } -chrono.workspace = true +chrono = { workspace = true } datafusion-catalog = { workspace = true } datafusion-common = { workspace = true } datafusion-execution = { workspace = true } From 2ab891f5e422ee0f91f307dfbbd01afa9bde8c48 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Thu, 24 Jul 2025 22:10:41 -0700 Subject: [PATCH 4/9] PR feedback --- .../spark/src/function/datetime/next_day.rs | 40 ++++--------------- 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/datafusion/spark/src/function/datetime/next_day.rs b/datafusion/spark/src/function/datetime/next_day.rs index a2416291247d..1bef03440ae8 100644 --- a/datafusion/spark/src/function/datetime/next_day.rs +++ b/datafusion/spark/src/function/datetime/next_day.rs @@ -22,9 +22,11 @@ use arrow::array::{new_null_array, ArrayRef, AsArray, Date32Array, StringArrayTy use arrow::datatypes::{DataType, Date32Type}; use chrono::{Datelike, Duration, Weekday}; use datafusion_common::types::NativeType; +// use datafusion_expr_common::signature::TypeSignature; use datafusion_common::{exec_err, plan_err, Result, ScalarValue}; use datafusion_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, + Volatility, }; /// @@ -42,7 +44,10 @@ impl Default for SparkNextDay { impl SparkNextDay { pub fn new() -> Self { Self { - signature: Signature::user_defined(Volatility::Immutable), + signature: Signature::exact( + vec![DataType::Date32, DataType::Utf8], + Volatility::Immutable, + ), } } } @@ -156,37 +161,6 @@ impl ScalarUDFImpl for SparkNextDay { _ => exec_err!("Unsupported args {args:?} for Spark function `next_day`"), } } - - fn coerce_types(&self, arg_types: &[DataType]) -> Result> { - if arg_types.len() != 2 { - return exec_err!( - "Spark `next_day` function requires 2 arguments, got {}", - arg_types.len() - ); - } - - let current_native_type: NativeType = (&arg_types[0]).into(); - if matches!(current_native_type, NativeType::Date) - || matches!(current_native_type, NativeType::String) - || matches!(current_native_type, NativeType::Null) - { - if matches!(&arg_types[1], DataType::Utf8) - || matches!(&arg_types[1], DataType::LargeUtf8) - || matches!(&arg_types[1], DataType::Utf8View) - { - Ok(vec![DataType::Date32, arg_types[1].clone()]) - } else { - plan_err!( - "The second argument of the Spark `next_day` function must be a string, but got {}", - &arg_types[1] - ) - } - } else { - plan_err!( - "The first argument of the Spark `next_day` function can only be a date or string, but got {}", &arg_types[0] - ) - } - } } fn process_next_day_arrays<'a, S>( From 9d5f3e89311b123b89cb150ea16797cda6cd28d5 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Thu, 24 Jul 2025 22:10:57 -0700 Subject: [PATCH 5/9] Add more tests --- .../test_files/spark/datetime/next_day.slt | 51 +++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt b/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt index 7bb178d5d54b..5c756a3de650 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt @@ -24,16 +24,61 @@ ## Original Query: SELECT next_day('2015-01-14', 'TU'); ## PySpark 3.5.5 Result: {'next_day(2015-01-14, TU)': datetime.date(2015, 1, 20), 'typeof(next_day(2015-01-14, TU))': 'date', 'typeof(2015-01-14)': 'string', 'typeof(TU)': 'string'} query D -SELECT next_day('2015-01-14'::string, 'TU'::string); +SELECT next_day('2015-01-14'::DATE, 'TU'::string); ---- 2015-01-20 query D -SELECT next_day('2015-07-27'::string, 'Sun'::string); +SELECT next_day('2015-07-27'::DATE, 'Sun'::string); ---- 2015-08-02 query D -SELECT next_day('2015-07-27'::string, 'Sat'::string); +SELECT next_day('2015-07-27'::DATE, 'Sat'::string); ---- 2015-08-01 + +query error Failed to coerce arguments to satisfy a call to 'next_day' function +SELECT next_day('2015-07-27'::DATE); + +query error Failed to coerce arguments to satisfy a call to 'next_day' function +SELECT next_day('Sun'::string); + +query error 'next_day' does not support zero arguments +SELECT next_day(); + +query error Failed to coerce arguments to satisfy a call to 'next_day' function +SELECT next_day(1::int, 'Sun'::string); + +query error Failed to coerce arguments to satisfy a call to 'next_day' function +SELECT next_day('2015-07-27'::DATE, 'Sat'::string, 'Sun'::string); + +query error Failed to coerce arguments to satisfy a call to 'next_day' function +SELECT next_day('invalid_date'::string, 'Mon'::string); + +query D +SELECT next_day('2000-01-01'::DATE, 2.0::float); +---- +NULL + +query D +SELECT next_day('2020-01-01'::DATE, 'invalid_day'::string); +---- +NULL + +query D +SELECT next_day(a, b) +FROM VALUES + ('2015-01-14'::DATE, 'TU'::string), + ('2015-07-27'::DATE, 'Sun'::string), + ('2000-01-01'::DATE, 'Mon'::string), + (NULL::DATE, NULL::string), + (NULL::DATE, 'Mon'::string), + ('2015-01-14'::DATE, NULL::string) as t(a, b); +---- +2015-01-20 +2015-08-02 +2000-01-03 +NULL +NULL +NULL From e205d7c721b4ef35c323cced08211f522af7056f Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Thu, 24 Jul 2025 22:33:24 -0700 Subject: [PATCH 6/9] Move ansi mode part of doc to a comment and cleanup --- datafusion/spark/src/function/datetime/mod.rs | 4 +++- datafusion/spark/src/function/datetime/next_day.rs | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index 1a9d63750113..639e12c8f775 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -26,9 +26,11 @@ make_udf_function!(next_day::SparkNextDay, next_day); pub mod expr_fn { use datafusion_functions::export_functions; + // TODO: add once ANSI support is added: + // "When both of the input parameters are not NULL and day_of_week is an invalid input, the function throws SparkIllegalArgumentException if spark.sql.ansi.enabled is set to true, otherwise NULL." export_functions!(( next_day, - "Returns the first date which is later than start_date and named as indicated. The function returns NULL if at least one of the input parameters is NULL. When both of the input parameters are not NULL and day_of_week is an invalid input, the function throws SparkIllegalArgumentException if spark.sql.ansi.enabled is set to true, otherwise NULL.", + "Returns the first date which is later than start_date and named as indicated. The function returns NULL if at least one of the input parameters is NULL.", arg1 arg2 )); } diff --git a/datafusion/spark/src/function/datetime/next_day.rs b/datafusion/spark/src/function/datetime/next_day.rs index 1bef03440ae8..a5d253c2737c 100644 --- a/datafusion/spark/src/function/datetime/next_day.rs +++ b/datafusion/spark/src/function/datetime/next_day.rs @@ -21,11 +21,9 @@ use std::sync::Arc; use arrow::array::{new_null_array, ArrayRef, AsArray, Date32Array, StringArrayType}; use arrow::datatypes::{DataType, Date32Type}; use chrono::{Datelike, Duration, Weekday}; -use datafusion_common::types::NativeType; -// use datafusion_expr_common::signature::TypeSignature; -use datafusion_common::{exec_err, plan_err, Result, ScalarValue}; +use datafusion_common::{exec_err, Result, ScalarValue}; use datafusion_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; From 3640d95500743721515d381fb7cb2180d67eaa81 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Fri, 25 Jul 2025 07:42:21 -0700 Subject: [PATCH 7/9] cargo fmt --- datafusion/spark/src/function/datetime/next_day.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/spark/src/function/datetime/next_day.rs b/datafusion/spark/src/function/datetime/next_day.rs index a5d253c2737c..6d7f62ea3aa4 100644 --- a/datafusion/spark/src/function/datetime/next_day.rs +++ b/datafusion/spark/src/function/datetime/next_day.rs @@ -23,8 +23,7 @@ use arrow::datatypes::{DataType, Date32Type}; use chrono::{Datelike, Duration, Weekday}; use datafusion_common::{exec_err, Result, ScalarValue}; use datafusion_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, - Volatility, + ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; /// From 5c172d485f7b0b1429f22b2e1648df0137071158 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Fri, 25 Jul 2025 08:12:34 -0700 Subject: [PATCH 8/9] Add test --- datafusion/sqllogictest/test_files/spark/datetime/next_day.slt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt b/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt index 5c756a3de650..cac05c3f8231 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt @@ -66,6 +66,9 @@ SELECT next_day('2020-01-01'::DATE, 'invalid_day'::string); ---- NULL +query error Cast error: Cannot cast string '2015-13-32' to value of Date32 type +SELECT next_day('2015-13-32'::DATE, 'Sun'::string); + query D SELECT next_day(a, b) FROM VALUES From e83d75231d00d2f9bf2c4a20c2bf0e11c388bea5 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 27 Jul 2025 08:05:48 -0700 Subject: [PATCH 9/9] Remove commentted tests and duplicate tests --- .../sqllogictest/test_files/spark/datetime/next_day.slt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt b/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt index cac05c3f8231..872d1f2b58eb 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/next_day.slt @@ -21,8 +21,6 @@ # For more information, please see: # https://github.com/apache/datafusion/issues/15914 -## Original Query: SELECT next_day('2015-01-14', 'TU'); -## PySpark 3.5.5 Result: {'next_day(2015-01-14, TU)': datetime.date(2015, 1, 20), 'typeof(next_day(2015-01-14, TU))': 'date', 'typeof(2015-01-14)': 'string', 'typeof(TU)': 'string'} query D SELECT next_day('2015-01-14'::DATE, 'TU'::string); ---- @@ -72,15 +70,11 @@ SELECT next_day('2015-13-32'::DATE, 'Sun'::string); query D SELECT next_day(a, b) FROM VALUES - ('2015-01-14'::DATE, 'TU'::string), - ('2015-07-27'::DATE, 'Sun'::string), ('2000-01-01'::DATE, 'Mon'::string), (NULL::DATE, NULL::string), (NULL::DATE, 'Mon'::string), ('2015-01-14'::DATE, NULL::string) as t(a, b); ---- -2015-01-20 -2015-08-02 2000-01-03 NULL NULL