From d9b39a3e7e7306a22bbaa204bb3cd0579b26023f Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 18:55:01 +0000 Subject: [PATCH 01/15] Allow to run datafusion-examples relation_planner without parameters --- datafusion-examples/examples/relation_planner/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/relation_planner/main.rs b/datafusion-examples/examples/relation_planner/main.rs index d2ba2202d1787..a9276e3c51291 100644 --- a/datafusion-examples/examples/relation_planner/main.rs +++ b/datafusion-examples/examples/relation_planner/main.rs @@ -90,7 +90,7 @@ async fn main() -> Result<()> { let example: ExampleKind = std::env::args() .nth(1) - .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))? + .unwrap_or(ExampleKind::All.to_string()) .parse() .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?; From 34211872d0c599261333092b1f4b1cbf4bd1155a Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 15:30:26 +0000 Subject: [PATCH 02/15] datafusion-sql: expose relation module --- datafusion/sql/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs index 7fef670933f9a..3eaba01e97eb4 100644 --- a/datafusion/sql/src/lib.rs +++ b/datafusion/sql/src/lib.rs @@ -47,7 +47,7 @@ mod expr; pub mod parser; pub mod planner; mod query; -mod relation; +pub mod relation; pub mod resolve; mod select; mod set_expr; From 690e1677395bbb35ab7c04ba500c75dc20bd3e6b Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 15:27:47 +0000 Subject: [PATCH 03/15] Add SqlToRelRelationContext constructor --- datafusion/sql/src/relation/mod.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 3115d8dfffbd2..133a1ff4901c1 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -33,11 +33,20 @@ use sqlparser::ast::{FunctionArg, FunctionArgExpr, Spanned, TableFactor}; mod join; -struct SqlToRelRelationContext<'a, 'b, S: ContextProvider> { +pub struct SqlToRelRelationContext<'a, 'b, S: ContextProvider> { planner: &'a SqlToRel<'b, S>, planner_context: &'a mut PlannerContext, } +impl<'a, 'b, S: ContextProvider> SqlToRelRelationContext<'a, 'b, S> { + pub fn new(planner: &'a SqlToRel<'b, S>, planner_context: &'a mut PlannerContext) -> Self { + Self { + planner, + planner_context, + } + } +} + // Implement RelationPlannerContext impl<'a, 'b, S: ContextProvider> RelationPlannerContext for SqlToRelRelationContext<'a, 'b, S> @@ -117,11 +126,7 @@ impl SqlToRel<'_, S> { let mut current_relation = relation; for planner in planners.iter() { - let mut context = SqlToRelRelationContext { - planner: self, - planner_context, - }; - + let mut context = SqlToRelRelationContext::new(self, planner_context); match planner.plan_relation(current_relation, &mut context)? { RelationPlanning::Planned(planned) => { return Ok(RelationPlanning::Planned(planned)); From 5b5a83803b547395194f27b3a614f28161b1d8cc Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 15:27:24 +0000 Subject: [PATCH 04/15] Add parse_sql_literal module --- .../optimizer/src/simplify_expressions/mod.rs | 1 + .../simplify_sql_literal.rs | 253 ++++++++++++++++++ 2 files changed, 254 insertions(+) create mode 100644 datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs b/datafusion/optimizer/src/simplify_expressions/mod.rs index e238fca32689d..1b738d6c0c600 100644 --- a/datafusion/optimizer/src/simplify_expressions/mod.rs +++ b/datafusion/optimizer/src/simplify_expressions/mod.rs @@ -23,6 +23,7 @@ mod inlist_simplifier; mod regex; pub mod simplify_exprs; mod simplify_predicates; +pub mod simplify_sql_literal; mod unwrap_cast; mod utils; diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs new file mode 100644 index 0000000000000..cf90fc9906668 --- /dev/null +++ b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs @@ -0,0 +1,253 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Parses and simplifies a SQL expression to a literal of a given type. +//! +//! This module provides functionality to parse and simplify static SQL expressions +//! used in SQL constructs like `FROM TABLE SAMPLE (10 + 50 * 2)`. If they are required +//! in a planning (not an execution) phase, they need to be reduced to literals of a given type. + +use crate::simplify_expressions::ExprSimplifier; +use arrow::datatypes::DataType; +use datafusion_common::{ + plan_datafusion_err, plan_err, DFSchemaRef, DataFusionError, Result, ScalarValue, +}; +use datafusion_expr::execution_props::ExecutionProps; +use datafusion_expr::planner::RelationPlannerContext; +use datafusion_expr::simplify::SimplifyContext; +use datafusion_expr::sqlparser::ast; +use datafusion_expr::Expr; +use std::sync::Arc; + +/// Parse and simplifies a SQL expression to a numeric literal of a given type `T`. +/// +/// This function simplifies and coerces the expression, then extracts the underlying +/// native type using `TryFrom`. +/// +/// # Arguments +/// * `expr` - A logical AST expression +/// * `target_type` - Arrow type to cast the literal to +/// * `schema` - Schema reference for expression planning +/// * `context` - `RelationPlannerContext` context +/// +/// # Returns +/// A `Result` containing a literal type +/// +/// # Example +/// ``` +/// let value: f64 = parse_sql_literal(&expr, &DataType::Float64, &schema, &mut context)?; +/// ``` +pub fn parse_sql_literal( + expr: &ast::Expr, + target_type: &DataType, + schema: &DFSchemaRef, + context: &mut dyn RelationPlannerContext, +) -> Result +where + T: TryFrom, +{ + match context.sql_to_expr(expr.clone(), &Arc::clone(schema)) { + Ok(logical_expr) => { + log::debug!("Parsing expr {logical_expr:?} to type {target_type}"); + + let execution_props = ExecutionProps::new(); + let simplifier = ExprSimplifier::new( + SimplifyContext::new(&execution_props).with_schema(Arc::clone(schema)), + ); + + // Simplify and coerce expression in case of constant arithmetic operations (e.g., 10 + 5) + let simplified_expr: Expr = simplifier + .simplify(logical_expr.clone()) + .map_err(|err| plan_datafusion_err!("Cannot simplify {expr:?}: {err}"))?; + let coerced_expr: Expr = simplifier.coerce(simplified_expr, schema)?; + log::debug!("Coerced expression: {:?}", &coerced_expr); + + match coerced_expr { + Expr::Literal(scalar_value, _) => { + // It is a literal - proceed to the underlying value + // Cast to the target type if needed + let casted_scalar = scalar_value.cast_to(target_type)?; + + // Extract the native type + T::try_from(casted_scalar).map_err(|err| { + plan_datafusion_err!( + "Cannot extract {} from scalar value: {err}", + std::any::type_name::() + ) + }) + } + actual => { + plan_err!( + "Cannot extract literal from coerced {actual:?} expression given {expr:?} expression" + ) + } + } + } + Err(err) => { + plan_err!("Cannot construct logical expression from {expr:?}: {err}") + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::datatypes::DataType; + use datafusion_common::config::ConfigOptions; + use datafusion_common::{not_impl_err, DFSchema, TableReference}; + use datafusion_expr::planner::ContextProvider; + use datafusion_expr::sqlparser::parser::Parser; + use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; + use datafusion_sql::planner::{PlannerContext, SqlToRel}; + use datafusion_sql::relation::SqlToRelRelationContext; + use datafusion_sql::sqlparser::dialect::GenericDialect; + use std::sync::Arc; + + // Simple mock context provider for testing + struct MockContextProvider { + options: ConfigOptions, + } + + impl ContextProvider for MockContextProvider { + fn get_table_source(&self, _: TableReference) -> Result> { + not_impl_err!("mock") + } + + fn get_function_meta(&self, _name: &str) -> Option> { + None + } + + fn get_aggregate_meta(&self, _name: &str) -> Option> { + None + } + + fn get_variable_type(&self, _variable_names: &[String]) -> Option { + None + } + + fn get_window_meta(&self, _name: &str) -> Option> { + None + } + + fn options(&self) -> &ConfigOptions { + &self.options + } + + fn udf_names(&self) -> Vec { + vec![] + } + + fn udaf_names(&self) -> Vec { + vec![] + } + + fn udwf_names(&self) -> Vec { + vec![] + } + } + + #[test] + fn test_parse_sql_float_literal() { + let test_cases = vec![ + ("0.0", 0.0), + ("1.0", 1.0), + ("0", 0.0), + ("1", 1.0), + ("0.5", 0.5), + ("100.0", 100.0), + ("0.001", 0.001), + ("999.999", 999.999), + ("1.0 + 2.0", 3.0), + ("10.0 * 0.5", 5.0), + ("100.0 / 4.0", 25.0), + ("(80.0 + 2.0*10.0) / 4.0", 25.0), + ("50.0 - 10.0", 40.0), + ("1e2", 100.0), + ("1.5e1", 15.0), + ("2.5e-1", 0.25), + ]; + + let schema = DFSchemaRef::new(DFSchema::empty()); + let context = MockContextProvider { + options: ConfigOptions::default(), + }; + let sql_to_rel = SqlToRel::new(&context); + let mut planner_context = PlannerContext::new(); + let mut sqltorel_context = + SqlToRelRelationContext::new(&sql_to_rel, &mut planner_context); + let dialect = GenericDialect {}; + + for (sql_expr, expected) in test_cases { + let ast_expr = Parser::new(&dialect) + .try_with_sql(sql_expr) + .unwrap() + .parse_expr() + .unwrap(); + + let result: Result = parse_sql_literal( + &ast_expr, + &DataType::Float64, + &schema, + &mut sqltorel_context, + ); + + match result { + Ok(value) => { + assert!( + (value - expected).abs() < 1e-10, + "For expression '{sql_expr}': expected {expected}, got {value}", + ); + } + Err(e) => panic!("Failed to parse expression '{sql_expr}': {e}"), + } + } + } + + #[test] + fn test_parse_sql_integer_literal() { + let schema = DFSchemaRef::new(DFSchema::empty()); + let context = MockContextProvider { + options: ConfigOptions::default(), + }; + let sql_to_rel = SqlToRel::new(&context); + let mut planner_context = PlannerContext::new(); + let mut sqltorel_context = + SqlToRelRelationContext::new(&sql_to_rel, &mut planner_context); + let dialect = GenericDialect {}; + + // Integer + let ast_expr = Parser::new(&dialect) + .try_with_sql("2 + 4") + .unwrap() + .parse_expr() + .unwrap(); + + let result: Result = parse_sql_literal( + &ast_expr, + &DataType::Int64, + &schema, + &mut sqltorel_context, + ); + + match result { + Ok(value) => { + assert_eq!(6, value); + } + Err(e) => panic!("Failed to parse expression: {e}"), + } + } +} From b2bc6cfa129018545e69a8e73688d5cb012eaf7c Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 13:22:31 +0000 Subject: [PATCH 05/15] Switch table sample example to lib sql parsing --- .../examples/relation_planner/table_sample.rs | 62 +++++++------------ 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/datafusion-examples/examples/relation_planner/table_sample.rs b/datafusion-examples/examples/relation_planner/table_sample.rs index a3ae5396eda1f..533a75a4a3828 100644 --- a/datafusion-examples/examples/relation_planner/table_sample.rs +++ b/datafusion-examples/examples/relation_planner/table_sample.rs @@ -83,9 +83,7 @@ use std::{ any::Any, fmt::{self, Debug, Formatter}, hash::{Hash, Hasher}, - ops::{Add, Div, Mul, Sub}, pin::Pin, - str::FromStr, sync::Arc, task::{Context, Poll}, }; @@ -94,7 +92,7 @@ use arrow::{ array::{ArrayRef, Int32Array, RecordBatch, StringArray, UInt32Array}, compute, }; -use arrow_schema::SchemaRef; +use arrow_schema::{DataType, SchemaRef}; use futures::{ ready, stream::{Stream, StreamExt}, @@ -102,6 +100,7 @@ use futures::{ use rand::{rngs::StdRng, Rng, SeedableRng}; use tonic::async_trait; +use datafusion::optimizer::simplify_expressions::simplify_sql_literal::parse_sql_literal; use datafusion::{ execution::{ context::QueryPlanner, RecordBatchStream, SendableRecordBatchStream, @@ -384,6 +383,8 @@ impl RelationPlanner for TableSamplePlanner { }; let input = context.plan(base_relation)?; + let schema = input.schema(); + // Handle bucket sampling (Hive-style: TABLESAMPLE(BUCKET x OUT OF y)) if let Some(bucket) = sample.bucket { if bucket.on.is_some() { @@ -415,7 +416,12 @@ impl RelationPlanner for TableSamplePlanner { match quantity.unit { // TABLESAMPLE (N ROWS) - exact row limit Some(TableSampleUnit::Rows) => { - let rows = parse_quantity::(&quantity.value)?; + let rows = parse_sql_literal::( + &quantity.value, + &DataType::Int64, + schema, + context, + )?; if rows < 0 { return plan_err!("row count must be non-negative, got {}", rows); } @@ -427,7 +433,12 @@ impl RelationPlanner for TableSamplePlanner { // TABLESAMPLE (N PERCENT) - percentage sampling Some(TableSampleUnit::Percent) => { - let percent = parse_quantity::(&quantity.value)?; + let percent = parse_sql_literal::( + &quantity.value, + &DataType::Float64, + schema, + context, + )?; let fraction = percent / 100.0; let plan = TableSamplePlanNode::new(input, fraction, seed).into_plan(); Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias))) @@ -435,7 +446,12 @@ impl RelationPlanner for TableSamplePlanner { // TABLESAMPLE (N) - fraction if <1.0, row limit if >=1.0 None => { - let value = parse_quantity::(&quantity.value)?; + let value = parse_sql_literal::( + &quantity.value, + &DataType::Float64, + schema, + context, + )?; if value < 0.0 { return plan_err!("sample value must be non-negative, got {}", value); } @@ -454,40 +470,6 @@ impl RelationPlanner for TableSamplePlanner { } } -/// Parse a SQL expression as a numeric value (supports basic arithmetic). -fn parse_quantity(expr: &ast::Expr) -> Result -where - T: FromStr + Add + Sub + Mul + Div, -{ - eval_numeric_expr(expr) - .ok_or_else(|| plan_datafusion_err!("invalid numeric expression: {:?}", expr)) -} - -/// Recursively evaluate numeric SQL expressions. -fn eval_numeric_expr(expr: &ast::Expr) -> Option -where - T: FromStr + Add + Sub + Mul + Div, -{ - match expr { - ast::Expr::Value(v) => match &v.value { - ast::Value::Number(n, _) => n.to_string().parse().ok(), - _ => None, - }, - ast::Expr::BinaryOp { left, op, right } => { - let l = eval_numeric_expr::(left)?; - let r = eval_numeric_expr::(right)?; - match op { - ast::BinaryOperator::Plus => Some(l + r), - ast::BinaryOperator::Minus => Some(l - r), - ast::BinaryOperator::Multiply => Some(l * r), - ast::BinaryOperator::Divide => Some(l / r), - _ => None, - } - } - _ => None, - } -} - /// Custom logical plan node representing a TABLESAMPLE operation. /// /// Stores sampling parameters (bounds, seed) and wraps the input plan. From a46e040333e74f86594c6b3c2835db78750da9cc Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 19:25:19 +0000 Subject: [PATCH 06/15] Reformat --- .../src/simplify_expressions/simplify_sql_literal.rs | 6 +++--- datafusion/sql/src/relation/mod.rs | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs index cf90fc9906668..efaf5f4783e24 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs @@ -24,13 +24,13 @@ use crate::simplify_expressions::ExprSimplifier; use arrow::datatypes::DataType; use datafusion_common::{ - plan_datafusion_err, plan_err, DFSchemaRef, DataFusionError, Result, ScalarValue, + DFSchemaRef, DataFusionError, Result, ScalarValue, plan_datafusion_err, plan_err, }; +use datafusion_expr::Expr; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::planner::RelationPlannerContext; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::sqlparser::ast; -use datafusion_expr::Expr; use std::sync::Arc; /// Parse and simplifies a SQL expression to a numeric literal of a given type `T`. @@ -108,7 +108,7 @@ mod tests { use super::*; use arrow::datatypes::DataType; use datafusion_common::config::ConfigOptions; - use datafusion_common::{not_impl_err, DFSchema, TableReference}; + use datafusion_common::{DFSchema, TableReference, not_impl_err}; use datafusion_expr::planner::ContextProvider; use datafusion_expr::sqlparser::parser::Parser; use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 133a1ff4901c1..5f96779d1a1f6 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -39,7 +39,10 @@ pub struct SqlToRelRelationContext<'a, 'b, S: ContextProvider> { } impl<'a, 'b, S: ContextProvider> SqlToRelRelationContext<'a, 'b, S> { - pub fn new(planner: &'a SqlToRel<'b, S>, planner_context: &'a mut PlannerContext) -> Self { + pub fn new( + planner: &'a SqlToRel<'b, S>, + planner_context: &'a mut PlannerContext, + ) -> Self { Self { planner, planner_context, From 1cb87c1f6b4fd9addda977cbadaafb18054f0548 Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 19:29:06 +0000 Subject: [PATCH 07/15] Use unwrap_or_else --- datafusion-examples/examples/relation_planner/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/relation_planner/main.rs b/datafusion-examples/examples/relation_planner/main.rs index a9276e3c51291..15079f644612d 100644 --- a/datafusion-examples/examples/relation_planner/main.rs +++ b/datafusion-examples/examples/relation_planner/main.rs @@ -90,7 +90,7 @@ async fn main() -> Result<()> { let example: ExampleKind = std::env::args() .nth(1) - .unwrap_or(ExampleKind::All.to_string()) + .unwrap_or_else(|| ExampleKind::All.to_string()) .parse() .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?; From e5a95d4c5aa7ea8e29b075cff7fb14492fb678e3 Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 19:36:26 +0000 Subject: [PATCH 08/15] Ignore doctest --- .../src/simplify_expressions/simplify_sql_literal.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs index efaf5f4783e24..f866993efab37 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs @@ -48,8 +48,8 @@ use std::sync::Arc; /// A `Result` containing a literal type /// /// # Example -/// ``` -/// let value: f64 = parse_sql_literal(&expr, &DataType::Float64, &schema, &mut context)?; +/// ```ignore +/// let value: f64 = parse_sql_literal(&expr, &DataType::Float64, &schema, &mut relPlannerContext)?; /// ``` pub fn parse_sql_literal( expr: &ast::Expr, From 93ba06e7a5bdfa44ed06f0d8270848184ec8bb17 Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 20:00:19 +0000 Subject: [PATCH 09/15] Gate parse_sql_literal with sql feature --- datafusion/optimizer/src/simplify_expressions/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs b/datafusion/optimizer/src/simplify_expressions/mod.rs index 1b738d6c0c600..a7410c476da5d 100644 --- a/datafusion/optimizer/src/simplify_expressions/mod.rs +++ b/datafusion/optimizer/src/simplify_expressions/mod.rs @@ -23,6 +23,7 @@ mod inlist_simplifier; mod regex; pub mod simplify_exprs; mod simplify_predicates; +#[cfg(feature = "sql")] pub mod simplify_sql_literal; mod unwrap_cast; mod utils; From 90f7a4b4fcb651663404efbdfaf35e2ca054adc2 Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 20:00:32 +0000 Subject: [PATCH 10/15] Enable optimizer feature sql --- datafusion/optimizer/Cargo.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 0fb08684cd145..221c4a2da2295 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -41,7 +41,9 @@ workspace = true name = "datafusion_optimizer" [features] +default = ["sql"] recursive_protection = ["dep:recursive"] +sql = ["datafusion-expr/sql"] [dependencies] arrow = { workspace = true } From 964a326a6fc5b9210e0d0002857faff37a84469a Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 22:32:41 +0000 Subject: [PATCH 11/15] Enable sql feature for datafusion in examples --- datafusion-examples/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index b0190dadf3c3f..d4efee6c1f0cb 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -45,7 +45,7 @@ bytes = { workspace = true } dashmap = { workspace = true } # note only use main datafusion crate for examples base64 = "0.22.1" -datafusion = { workspace = true, default-features = true, features = ["parquet_encryption"] } +datafusion = { workspace = true, default-features = true, features = ["parquet_encryption", "sql"] } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-physical-expr-adapter = { workspace = true } From df70b2416fc38f7f46b081b2b846f9a89e7fd305 Mon Sep 17 00:00:00 2001 From: theirix Date: Sat, 13 Dec 2025 23:15:04 +0000 Subject: [PATCH 12/15] Set sql feature for optimizer crate --- datafusion/core/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index d2ecd34886def..ced6c21277dea 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -98,6 +98,7 @@ serde = [ ] sql = [ "datafusion-common/sql", + "datafusion-optimizer/sql", "datafusion-functions-nested?/sql", "datafusion-sql", "sqlparser", From 78e2dc1d337fd33823b0e3f236028e6352657188 Mon Sep 17 00:00:00 2001 From: theirix Date: Wed, 24 Dec 2025 15:28:05 +0000 Subject: [PATCH 13/15] Omit example launcher change --- datafusion-examples/examples/relation_planner/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/examples/relation_planner/main.rs b/datafusion-examples/examples/relation_planner/main.rs index 15079f644612d..d2ba2202d1787 100644 --- a/datafusion-examples/examples/relation_planner/main.rs +++ b/datafusion-examples/examples/relation_planner/main.rs @@ -90,7 +90,7 @@ async fn main() -> Result<()> { let example: ExampleKind = std::env::args() .nth(1) - .unwrap_or_else(|| ExampleKind::All.to_string()) + .ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))? .parse() .map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?; From 7aa15c86c16ca6e97c631aeaec95384700c4d060 Mon Sep 17 00:00:00 2001 From: theirix Date: Wed, 24 Dec 2025 16:28:40 +0000 Subject: [PATCH 14/15] Simplify signature by deriving a primitive type from ArrowPrimitiveType --- .../examples/relation_planner/table_sample.rs | 27 ++++-------- .../simplify_sql_literal.rs | 42 ++++++++----------- 2 files changed, 25 insertions(+), 44 deletions(-) diff --git a/datafusion-examples/examples/relation_planner/table_sample.rs b/datafusion-examples/examples/relation_planner/table_sample.rs index 533a75a4a3828..9f8d8f818ab18 100644 --- a/datafusion-examples/examples/relation_planner/table_sample.rs +++ b/datafusion-examples/examples/relation_planner/table_sample.rs @@ -88,11 +88,12 @@ use std::{ task::{Context, Poll}, }; +use arrow::datatypes::{Float64Type, Int64Type}; use arrow::{ array::{ArrayRef, Int32Array, RecordBatch, StringArray, UInt32Array}, compute, }; -use arrow_schema::{DataType, SchemaRef}; +use arrow_schema::SchemaRef; use futures::{ ready, stream::{Stream, StreamExt}, @@ -416,12 +417,8 @@ impl RelationPlanner for TableSamplePlanner { match quantity.unit { // TABLESAMPLE (N ROWS) - exact row limit Some(TableSampleUnit::Rows) => { - let rows = parse_sql_literal::( - &quantity.value, - &DataType::Int64, - schema, - context, - )?; + let rows: i64 = + parse_sql_literal::(&quantity.value, schema, context)?; if rows < 0 { return plan_err!("row count must be non-negative, got {}", rows); } @@ -433,12 +430,8 @@ impl RelationPlanner for TableSamplePlanner { // TABLESAMPLE (N PERCENT) - percentage sampling Some(TableSampleUnit::Percent) => { - let percent = parse_sql_literal::( - &quantity.value, - &DataType::Float64, - schema, - context, - )?; + let percent: f64 = + parse_sql_literal::(&quantity.value, schema, context)?; let fraction = percent / 100.0; let plan = TableSamplePlanNode::new(input, fraction, seed).into_plan(); Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias))) @@ -446,12 +439,8 @@ impl RelationPlanner for TableSamplePlanner { // TABLESAMPLE (N) - fraction if <1.0, row limit if >=1.0 None => { - let value = parse_sql_literal::( - &quantity.value, - &DataType::Float64, - schema, - context, - )?; + let value = + parse_sql_literal::(&quantity.value, schema, context)?; if value < 0.0 { return plan_err!("sample value must be non-negative, got {}", value); } diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs index f866993efab37..95a16702feb83 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs @@ -22,7 +22,7 @@ //! in a planning (not an execution) phase, they need to be reduced to literals of a given type. use crate::simplify_expressions::ExprSimplifier; -use arrow::datatypes::DataType; +use arrow::datatypes::ArrowPrimitiveType; use datafusion_common::{ DFSchemaRef, DataFusionError, Result, ScalarValue, plan_datafusion_err, plan_err, }; @@ -33,14 +33,14 @@ use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::sqlparser::ast; use std::sync::Arc; -/// Parse and simplifies a SQL expression to a numeric literal of a given type `T`. +/// Parse and simplifies a SQL expression to a numeric literal, +/// corresponding to an arrow primitive type `T` (for example, Float64Type). /// /// This function simplifies and coerces the expression, then extracts the underlying /// native type using `TryFrom`. /// /// # Arguments /// * `expr` - A logical AST expression -/// * `target_type` - Arrow type to cast the literal to /// * `schema` - Schema reference for expression planning /// * `context` - `RelationPlannerContext` context /// @@ -49,20 +49,20 @@ use std::sync::Arc; /// /// # Example /// ```ignore -/// let value: f64 = parse_sql_literal(&expr, &DataType::Float64, &schema, &mut relPlannerContext)?; +/// let value: f64 = parse_sql_literal::(&expr, &schema, &mut relPlannerContext)?; /// ``` pub fn parse_sql_literal( expr: &ast::Expr, - target_type: &DataType, schema: &DFSchemaRef, context: &mut dyn RelationPlannerContext, -) -> Result +) -> Result where - T: TryFrom, + T: ArrowPrimitiveType, + ::Native: TryFrom, { match context.sql_to_expr(expr.clone(), &Arc::clone(schema)) { Ok(logical_expr) => { - log::debug!("Parsing expr {logical_expr:?} to type {target_type}"); + log::debug!("Parsing expr {:?} to type {}", logical_expr, T::DATA_TYPE); let execution_props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( @@ -80,10 +80,10 @@ where Expr::Literal(scalar_value, _) => { // It is a literal - proceed to the underlying value // Cast to the target type if needed - let casted_scalar = scalar_value.cast_to(target_type)?; + let casted_scalar = scalar_value.cast_to(&T::DATA_TYPE)?; // Extract the native type - T::try_from(casted_scalar).map_err(|err| { + T::Native::try_from(casted_scalar).map_err(|err| { plan_datafusion_err!( "Cannot extract {} from scalar value: {err}", std::any::type_name::() @@ -106,7 +106,7 @@ where #[cfg(test)] mod tests { use super::*; - use arrow::datatypes::DataType; + use arrow::datatypes::{DataType, Float64Type, Int64Type}; use datafusion_common::config::ConfigOptions; use datafusion_common::{DFSchema, TableReference, not_impl_err}; use datafusion_expr::planner::ContextProvider; @@ -187,7 +187,7 @@ mod tests { }; let sql_to_rel = SqlToRel::new(&context); let mut planner_context = PlannerContext::new(); - let mut sqltorel_context = + let mut sql_context = SqlToRelRelationContext::new(&sql_to_rel, &mut planner_context); let dialect = GenericDialect {}; @@ -198,12 +198,8 @@ mod tests { .parse_expr() .unwrap(); - let result: Result = parse_sql_literal( - &ast_expr, - &DataType::Float64, - &schema, - &mut sqltorel_context, - ); + let result: Result = + parse_sql_literal::(&ast_expr, &schema, &mut sql_context); match result { Ok(value) => { @@ -225,7 +221,7 @@ mod tests { }; let sql_to_rel = SqlToRel::new(&context); let mut planner_context = PlannerContext::new(); - let mut sqltorel_context = + let mut sql_context = SqlToRelRelationContext::new(&sql_to_rel, &mut planner_context); let dialect = GenericDialect {}; @@ -236,12 +232,8 @@ mod tests { .parse_expr() .unwrap(); - let result: Result = parse_sql_literal( - &ast_expr, - &DataType::Int64, - &schema, - &mut sqltorel_context, - ); + let result: Result = + parse_sql_literal::(&ast_expr, &schema, &mut sql_context); match result { Ok(value) => { From ff8f3ccb40b6ef3d48a734e2e56af8c521998cba Mon Sep 17 00:00:00 2001 From: theirix Date: Thu, 25 Dec 2025 10:48:16 +0000 Subject: [PATCH 15/15] Use an empty schema to avoid passing it around --- .../examples/relation_planner/table_sample.rs | 10 +++------ .../simplify_sql_literal.rs | 22 ++++++++++--------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/datafusion-examples/examples/relation_planner/table_sample.rs b/datafusion-examples/examples/relation_planner/table_sample.rs index 9f8d8f818ab18..7a9704688a158 100644 --- a/datafusion-examples/examples/relation_planner/table_sample.rs +++ b/datafusion-examples/examples/relation_planner/table_sample.rs @@ -384,8 +384,6 @@ impl RelationPlanner for TableSamplePlanner { }; let input = context.plan(base_relation)?; - let schema = input.schema(); - // Handle bucket sampling (Hive-style: TABLESAMPLE(BUCKET x OUT OF y)) if let Some(bucket) = sample.bucket { if bucket.on.is_some() { @@ -417,8 +415,7 @@ impl RelationPlanner for TableSamplePlanner { match quantity.unit { // TABLESAMPLE (N ROWS) - exact row limit Some(TableSampleUnit::Rows) => { - let rows: i64 = - parse_sql_literal::(&quantity.value, schema, context)?; + let rows: i64 = parse_sql_literal::(&quantity.value, context)?; if rows < 0 { return plan_err!("row count must be non-negative, got {}", rows); } @@ -431,7 +428,7 @@ impl RelationPlanner for TableSamplePlanner { // TABLESAMPLE (N PERCENT) - percentage sampling Some(TableSampleUnit::Percent) => { let percent: f64 = - parse_sql_literal::(&quantity.value, schema, context)?; + parse_sql_literal::(&quantity.value, context)?; let fraction = percent / 100.0; let plan = TableSamplePlanNode::new(input, fraction, seed).into_plan(); Ok(RelationPlanning::Planned(PlannedRelation::new(plan, alias))) @@ -439,8 +436,7 @@ impl RelationPlanner for TableSamplePlanner { // TABLESAMPLE (N) - fraction if <1.0, row limit if >=1.0 None => { - let value = - parse_sql_literal::(&quantity.value, schema, context)?; + let value = parse_sql_literal::(&quantity.value, context)?; if value < 0.0 { return plan_err!("sample value must be non-negative, got {}", value); } diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs index 95a16702feb83..34c9806073961 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_sql_literal.rs @@ -24,7 +24,8 @@ use crate::simplify_expressions::ExprSimplifier; use arrow::datatypes::ArrowPrimitiveType; use datafusion_common::{ - DFSchemaRef, DataFusionError, Result, ScalarValue, plan_datafusion_err, plan_err, + DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, plan_datafusion_err, + plan_err, }; use datafusion_expr::Expr; use datafusion_expr::execution_props::ExecutionProps; @@ -53,27 +54,30 @@ use std::sync::Arc; /// ``` pub fn parse_sql_literal( expr: &ast::Expr, - schema: &DFSchemaRef, context: &mut dyn RelationPlannerContext, ) -> Result where T: ArrowPrimitiveType, ::Native: TryFrom, { - match context.sql_to_expr(expr.clone(), &Arc::clone(schema)) { + // Empty schema is sufficient because it parses only literal expressions + let schema = DFSchemaRef::new(DFSchema::empty()); + + match context.sql_to_expr(expr.clone(), &schema) { Ok(logical_expr) => { log::debug!("Parsing expr {:?} to type {}", logical_expr, T::DATA_TYPE); let execution_props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(Arc::clone(schema)), + SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)), ); // Simplify and coerce expression in case of constant arithmetic operations (e.g., 10 + 5) let simplified_expr: Expr = simplifier .simplify(logical_expr.clone()) .map_err(|err| plan_datafusion_err!("Cannot simplify {expr:?}: {err}"))?; - let coerced_expr: Expr = simplifier.coerce(simplified_expr, schema)?; + let coerced_expr: Expr = + simplifier.coerce(simplified_expr, schema.as_ref())?; log::debug!("Coerced expression: {:?}", &coerced_expr); match coerced_expr { @@ -108,7 +112,7 @@ mod tests { use super::*; use arrow::datatypes::{DataType, Float64Type, Int64Type}; use datafusion_common::config::ConfigOptions; - use datafusion_common::{DFSchema, TableReference, not_impl_err}; + use datafusion_common::{TableReference, not_impl_err}; use datafusion_expr::planner::ContextProvider; use datafusion_expr::sqlparser::parser::Parser; use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; @@ -181,7 +185,6 @@ mod tests { ("2.5e-1", 0.25), ]; - let schema = DFSchemaRef::new(DFSchema::empty()); let context = MockContextProvider { options: ConfigOptions::default(), }; @@ -199,7 +202,7 @@ mod tests { .unwrap(); let result: Result = - parse_sql_literal::(&ast_expr, &schema, &mut sql_context); + parse_sql_literal::(&ast_expr, &mut sql_context); match result { Ok(value) => { @@ -215,7 +218,6 @@ mod tests { #[test] fn test_parse_sql_integer_literal() { - let schema = DFSchemaRef::new(DFSchema::empty()); let context = MockContextProvider { options: ConfigOptions::default(), }; @@ -233,7 +235,7 @@ mod tests { .unwrap(); let result: Result = - parse_sql_literal::(&ast_expr, &schema, &mut sql_context); + parse_sql_literal::(&ast_expr, &mut sql_context); match result { Ok(value) => {