diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index d2ecd34886de..b63a3f9ccfa3 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -22,7 +22,7 @@ keywords = ["arrow", "query", "sql"] include = ["benches/*.rs", "src/**/*.rs", "Cargo.toml", "LICENSE.txt", "NOTICE.txt"] readme = "../../README.md" version = { workspace = true } -edition = { workspace = true } +edition = "2024" homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/core/benches/data_utils/mod.rs b/datafusion/core/benches/data_utils/mod.rs index ff2a4e247494..630bc056600b 100644 --- a/datafusion/core/benches/data_utils/mod.rs +++ b/datafusion/core/benches/data_utils/mod.rs @@ -18,9 +18,9 @@ //! This module provides the in-memory table for more realistic benchmarking. use arrow::array::{ - builder::{Int64Builder, StringBuilder}, ArrayRef, Float32Array, Float64Array, RecordBatch, StringArray, StringViewBuilder, UInt64Array, + builder::{Int64Builder, StringBuilder}, }; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::datasource::MemTable; diff --git a/datafusion/core/benches/distinct_query_sql.rs b/datafusion/core/benches/distinct_query_sql.rs index 1eb1524df8b6..0e638e293d8c 100644 --- a/datafusion/core/benches/distinct_query_sql.rs +++ b/datafusion/core/benches/distinct_query_sql.rs @@ -24,10 +24,10 @@ mod data_utils; use crate::criterion::Criterion; use data_utils::{create_table_provider, make_data}; use datafusion::execution::context::SessionContext; -use datafusion::physical_plan::{collect, ExecutionPlan}; +use datafusion::physical_plan::{ExecutionPlan, collect}; use datafusion::{datasource::MemTable, error::Result}; -use datafusion_execution::config::SessionConfig; use datafusion_execution::TaskContext; +use datafusion_execution::config::SessionConfig; use parking_lot::Mutex; use std::hint::black_box; diff --git a/datafusion/core/benches/filter_query_sql.rs b/datafusion/core/benches/filter_query_sql.rs index 16905e0f9660..3b80518d32dc 100644 --- a/datafusion/core/benches/filter_query_sql.rs +++ b/datafusion/core/benches/filter_query_sql.rs @@ -20,7 +20,7 @@ use arrow::{ datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion::prelude::SessionContext; use datafusion::{datasource::MemTable, error::Result}; use futures::executor::block_on; diff --git a/datafusion/core/benches/map_query_sql.rs b/datafusion/core/benches/map_query_sql.rs index 9e3d7a271721..67904197bc25 100644 --- a/datafusion/core/benches/map_query_sql.rs +++ b/datafusion/core/benches/map_query_sql.rs @@ -20,10 +20,10 @@ use std::hint::black_box; use std::sync::Arc; use arrow::array::{ArrayRef, Int32Array, RecordBatch}; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use parking_lot::Mutex; -use rand::prelude::ThreadRng; use rand::Rng; +use rand::prelude::ThreadRng; use tokio::runtime::Runtime; use datafusion::prelude::SessionContext; diff --git a/datafusion/core/benches/parquet_query_sql.rs b/datafusion/core/benches/parquet_query_sql.rs index e2b381048013..e44524127bf1 100644 --- a/datafusion/core/benches/parquet_query_sql.rs +++ b/datafusion/core/benches/parquet_query_sql.rs @@ -23,14 +23,14 @@ use arrow::datatypes::{ SchemaRef, }; use arrow::record_batch::RecordBatch; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_common::instant::Instant; use futures::stream::StreamExt; use parquet::arrow::ArrowWriter; use parquet::file::properties::{WriterProperties, WriterVersion}; -use rand::distr::uniform::SampleUniform; use rand::distr::Alphanumeric; +use rand::distr::uniform::SampleUniform; use rand::prelude::*; use rand::rng; use std::fs::File; diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs index 782c29a8096f..e6763b4761c2 100644 --- a/datafusion/core/benches/physical_plan.rs +++ b/datafusion/core/benches/physical_plan.rs @@ -32,7 +32,7 @@ use tokio::runtime::Runtime; use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion::physical_plan::{ collect, - expressions::{col, PhysicalSortExpr}, + expressions::{PhysicalSortExpr, col}, }; use datafusion::prelude::SessionContext; use datafusion_datasource::memory::MemorySourceConfig; diff --git a/datafusion/core/benches/preserve_file_partitioning.rs b/datafusion/core/benches/preserve_file_partitioning.rs index 3a218ea55ddf..17ebca52cd1d 100644 --- a/datafusion/core/benches/preserve_file_partitioning.rs +++ b/datafusion/core/benches/preserve_file_partitioning.rs @@ -38,8 +38,8 @@ use arrow::array::{ArrayRef, Float64Array, StringArray, TimestampMillisecondArra use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; use arrow::record_batch::RecordBatch; use arrow::util::pretty::pretty_format_batches; -use criterion::{criterion_group, criterion_main, Criterion}; -use datafusion::prelude::{col, ParquetReadOptions, SessionConfig, SessionContext}; +use criterion::{Criterion, criterion_group, criterion_main}; +use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext, col}; use datafusion_expr::SortExpr; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; diff --git a/datafusion/core/benches/push_down_filter.rs b/datafusion/core/benches/push_down_filter.rs index 139fb12c3094..3c2199c708de 100644 --- a/datafusion/core/benches/push_down_filter.rs +++ b/datafusion/core/benches/push_down_filter.rs @@ -18,16 +18,16 @@ use arrow::array::RecordBatch; use arrow::datatypes::{DataType, Field, Schema}; use bytes::{BufMut, BytesMut}; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion::config::ConfigOptions; use datafusion::prelude::{ParquetReadOptions, SessionContext}; use datafusion_execution::object_store::ObjectStoreUrl; -use datafusion_physical_optimizer::filter_pushdown::FilterPushdown; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::filter_pushdown::FilterPushdown; use datafusion_physical_plan::ExecutionPlan; +use object_store::ObjectStore; use object_store::memory::InMemory; use object_store::path::Path; -use object_store::ObjectStore; use parquet::arrow::ArrowWriter; use std::sync::Arc; diff --git a/datafusion/core/benches/scalar.rs b/datafusion/core/benches/scalar.rs index 540f7212e96e..d06ed3f28b74 100644 --- a/datafusion/core/benches/scalar.rs +++ b/datafusion/core/benches/scalar.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion::scalar::ScalarValue; fn criterion_benchmark(c: &mut Criterion) { diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs index 276151e253f7..4ba57a1530e8 100644 --- a/datafusion/core/benches/sort.rs +++ b/datafusion/core/benches/sort.rs @@ -78,18 +78,18 @@ use datafusion::physical_plan::sorts::sort::SortExec; use datafusion::{ execution::context::TaskContext, physical_plan::{ + ExecutionPlan, ExecutionPlanProperties, coalesce_partitions::CoalescePartitionsExec, - sorts::sort_preserving_merge::SortPreservingMergeExec, ExecutionPlan, - ExecutionPlanProperties, + sorts::sort_preserving_merge::SortPreservingMergeExec, }, prelude::SessionContext, }; use datafusion_datasource::memory::MemorySourceConfig; -use datafusion_physical_expr::{expressions::col, PhysicalSortExpr}; +use datafusion_physical_expr::{PhysicalSortExpr, expressions::col}; use datafusion_physical_expr_common::sort_expr::LexOrdering; /// Benchmarks for SortPreservingMerge stream -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use futures::StreamExt; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -355,14 +355,14 @@ fn utf8_high_cardinality_streams(sorted: bool) -> PartitionedBatches { /// Create a batch of (utf8_low, utf8_low, utf8_high) fn utf8_tuple_streams(sorted: bool) -> PartitionedBatches { - let mut gen = DataGenerator::new(); + let mut data_gen = DataGenerator::new(); // need to sort by the combined key, so combine them together - let mut tuples: Vec<_> = gen + let mut tuples: Vec<_> = data_gen .utf8_low_cardinality_values() .into_iter() - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.utf8_high_cardinality_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.utf8_high_cardinality_values()) .collect(); if sorted { @@ -388,14 +388,14 @@ fn utf8_tuple_streams(sorted: bool) -> PartitionedBatches { /// Create a batch of (utf8_view_low, utf8_view_low, utf8_view_high) fn utf8_view_tuple_streams(sorted: bool) -> PartitionedBatches { - let mut gen = DataGenerator::new(); + let mut data_gen = DataGenerator::new(); // need to sort by the combined key, so combine them together - let mut tuples: Vec<_> = gen + let mut tuples: Vec<_> = data_gen .utf8_low_cardinality_values() .into_iter() - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.utf8_high_cardinality_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.utf8_high_cardinality_values()) .collect(); if sorted { @@ -421,15 +421,15 @@ fn utf8_view_tuple_streams(sorted: bool) -> PartitionedBatches { /// Create a batch of (f64, utf8_low, utf8_low, i64) fn mixed_tuple_streams(sorted: bool) -> PartitionedBatches { - let mut gen = DataGenerator::new(); + let mut data_gen = DataGenerator::new(); // need to sort by the combined key, so combine them together - let mut tuples: Vec<_> = gen + let mut tuples: Vec<_> = data_gen .i64_values() .into_iter() - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.i64_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.i64_values()) .collect(); if sorted { @@ -459,15 +459,15 @@ fn mixed_tuple_streams(sorted: bool) -> PartitionedBatches { /// Create a batch of (f64, utf8_view_low, utf8_view_low, i64) fn mixed_tuple_with_utf8_view_streams(sorted: bool) -> PartitionedBatches { - let mut gen = DataGenerator::new(); + let mut data_gen = DataGenerator::new(); // need to sort by the combined key, so combine them together - let mut tuples: Vec<_> = gen + let mut tuples: Vec<_> = data_gen .i64_values() .into_iter() - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.i64_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.i64_values()) .collect(); if sorted { @@ -497,8 +497,8 @@ fn mixed_tuple_with_utf8_view_streams(sorted: bool) -> PartitionedBatches { /// Create a batch of (utf8_dict) fn dictionary_streams(sorted: bool) -> PartitionedBatches { - let mut gen = DataGenerator::new(); - let mut values = gen.utf8_low_cardinality_values(); + let mut data_gen = DataGenerator::new(); + let mut values = data_gen.utf8_low_cardinality_values(); if sorted { values.sort_unstable(); } @@ -512,12 +512,12 @@ fn dictionary_streams(sorted: bool) -> PartitionedBatches { /// Create a batch of (utf8_dict, utf8_dict, utf8_dict) fn dictionary_tuple_streams(sorted: bool) -> PartitionedBatches { - let mut gen = DataGenerator::new(); - let mut tuples: Vec<_> = gen + let mut data_gen = DataGenerator::new(); + let mut tuples: Vec<_> = data_gen .utf8_low_cardinality_values() .into_iter() - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.utf8_low_cardinality_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.utf8_low_cardinality_values()) .collect(); if sorted { @@ -543,13 +543,13 @@ fn dictionary_tuple_streams(sorted: bool) -> PartitionedBatches { /// Create a batch of (utf8_dict, utf8_dict, utf8_dict, i64) fn mixed_dictionary_tuple_streams(sorted: bool) -> PartitionedBatches { - let mut gen = DataGenerator::new(); - let mut tuples: Vec<_> = gen + let mut data_gen = DataGenerator::new(); + let mut tuples: Vec<_> = data_gen .utf8_low_cardinality_values() .into_iter() - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.utf8_low_cardinality_values()) - .zip(gen.i64_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.utf8_low_cardinality_values()) + .zip(data_gen.i64_values()) .collect(); if sorted { diff --git a/datafusion/core/benches/sort_limit_query_sql.rs b/datafusion/core/benches/sort_limit_query_sql.rs index 7c8e5d730d99..c18070fb7725 100644 --- a/datafusion/core/benches/sort_limit_query_sql.rs +++ b/datafusion/core/benches/sort_limit_query_sql.rs @@ -98,8 +98,7 @@ fn create_context() -> Arc> { ctx_holder.lock().push(Arc::new(Mutex::new(ctx))) }); - let ctx = ctx_holder.lock().first().unwrap().clone(); - ctx + ctx_holder.lock().first().unwrap().clone() } fn criterion_benchmark(c: &mut Criterion) { diff --git a/datafusion/core/benches/spm.rs b/datafusion/core/benches/spm.rs index ecc3f908d4b1..9db1306d2bd1 100644 --- a/datafusion/core/benches/spm.rs +++ b/datafusion/core/benches/spm.rs @@ -20,13 +20,13 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Int32Array, Int64Array, RecordBatch, StringArray}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::expressions::col; use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr::expressions::col; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; -use datafusion_physical_plan::{collect, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, collect}; use criterion::async_executor::FuturesExecutor; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion_datasource::memory::MemorySourceConfig; fn generate_spm_for_round_robin_tie_breaker( diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 027e925d4f4a..7cce7e0bd7db 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -31,15 +31,15 @@ use arrow::datatypes::{DataType, Field, Fields, Schema}; use criterion::Bencher; use datafusion::datasource::MemTable; use datafusion::execution::context::SessionContext; -use datafusion_common::{config::Dialect, ScalarValue}; +use datafusion_common::{ScalarValue, config::Dialect}; use datafusion_expr::col; use rand_distr::num_traits::NumCast; use std::hint::black_box; use std::path::PathBuf; use std::sync::Arc; +use test_utils::TableDef; use test_utils::tpcds::tpcds_schemas; use test_utils::tpch::tpch_schemas; -use test_utils::TableDef; use tokio::runtime::Runtime; const BENCHMARKS_PATH_1: &str = "../../benchmarks/"; @@ -242,8 +242,10 @@ fn criterion_benchmark(c: &mut Criterion) { if !PathBuf::from(format!("{BENCHMARKS_PATH_1}{CLICKBENCH_DATA_PATH}")).exists() && !PathBuf::from(format!("{BENCHMARKS_PATH_2}{CLICKBENCH_DATA_PATH}")).exists() { - panic!("benchmarks/data/hits_partitioned/ could not be loaded. Please run \ - 'benchmarks/bench.sh data clickbench_partitioned' prior to running this benchmark") + panic!( + "benchmarks/data/hits_partitioned/ could not be loaded. Please run \ + 'benchmarks/bench.sh data clickbench_partitioned' prior to running this benchmark" + ) } let ctx = create_context(); diff --git a/datafusion/core/benches/sql_planner_extended.rs b/datafusion/core/benches/sql_planner_extended.rs index aff7cb4d101d..adaf3e5911e9 100644 --- a/datafusion/core/benches/sql_planner_extended.rs +++ b/datafusion/core/benches/sql_planner_extended.rs @@ -18,7 +18,7 @@ use arrow::array::{ArrayRef, RecordBatch}; use arrow_schema::DataType; use arrow_schema::TimeUnit::Nanosecond; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion::prelude::{DataFrame, SessionContext}; use datafusion_catalog::MemTable; use datafusion_common::ScalarValue; diff --git a/datafusion/core/benches/sql_query_with_io.rs b/datafusion/core/benches/sql_query_with_io.rs index 58797dfed6b6..0c188f7ba104 100644 --- a/datafusion/core/benches/sql_query_with_io.rs +++ b/datafusion/core/benches/sql_query_with_io.rs @@ -20,7 +20,7 @@ use std::{fmt::Write, sync::Arc, time::Duration}; use arrow::array::{Int64Builder, RecordBatch, UInt64Builder}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use bytes::Bytes; -use criterion::{criterion_group, criterion_main, Criterion, SamplingMode}; +use criterion::{Criterion, SamplingMode, criterion_group, criterion_main}; use datafusion::{ datasource::{ file_format::parquet::ParquetFormat, @@ -31,13 +31,13 @@ use datafusion::{ use datafusion_execution::runtime_env::RuntimeEnv; use itertools::Itertools; use object_store::{ + ObjectStore, memory::InMemory, path::Path, throttle::{ThrottleConfig, ThrottledStore}, - ObjectStore, }; use parquet::arrow::ArrowWriter; -use rand::{rngs::StdRng, Rng, SeedableRng}; +use rand::{Rng, SeedableRng, rngs::StdRng}; use tokio::runtime::Runtime; use url::Url; diff --git a/datafusion/core/benches/struct_query_sql.rs b/datafusion/core/benches/struct_query_sql.rs index 5c7b42731082..96434fc379ea 100644 --- a/datafusion/core/benches/struct_query_sql.rs +++ b/datafusion/core/benches/struct_query_sql.rs @@ -20,7 +20,7 @@ use arrow::{ datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use datafusion::prelude::SessionContext; use datafusion::{datasource::MemTable, error::Result}; use futures::executor::block_on; diff --git a/datafusion/core/benches/topk_aggregate.rs b/datafusion/core/benches/topk_aggregate.rs index 16e044416761..a4ae479de4d2 100644 --- a/datafusion/core/benches/topk_aggregate.rs +++ b/datafusion/core/benches/topk_aggregate.rs @@ -18,13 +18,13 @@ mod data_utils; use arrow::util::pretty::pretty_format_batches; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use data_utils::make_data; -use datafusion::physical_plan::{collect, displayable, ExecutionPlan}; +use datafusion::physical_plan::{ExecutionPlan, collect, displayable}; use datafusion::prelude::SessionContext; use datafusion::{datasource::MemTable, error::Result}; -use datafusion_execution::config::SessionConfig; use datafusion_execution::TaskContext; +use datafusion_execution::config::SessionConfig; use std::hint::black_box; use std::sync::Arc; use tokio::runtime::Runtime; @@ -46,7 +46,9 @@ async fn create_context( opts.optimizer.enable_topk_aggregation = use_topk; let ctx = SessionContext::new_with_config(cfg); let _ = ctx.register_table("traces", mem_table)?; - let sql = format!("select max(timestamp_ms) from traces group by trace_id order by max(timestamp_ms) desc limit {limit};"); + let sql = format!( + "select max(timestamp_ms) from traces group by trace_id order by max(timestamp_ms) desc limit {limit};" + ); let df = ctx.sql(sql.as_str()).await?; let physical_plan = df.create_physical_plan().await?; let actual_phys_plan = displayable(physical_plan.as_ref()).indent(true).to_string(); diff --git a/datafusion/core/src/bin/print_functions_docs.rs b/datafusion/core/src/bin/print_functions_docs.rs index a9e6d4a30e0e..74a10bf079e6 100644 --- a/datafusion/core/src/bin/print_functions_docs.rs +++ b/datafusion/core/src/bin/print_functions_docs.rs @@ -16,10 +16,10 @@ // under the License. use datafusion::execution::SessionStateDefaults; -use datafusion_common::{not_impl_err, HashSet, Result}; +use datafusion_common::{HashSet, Result, not_impl_err}; use datafusion_expr::{ - aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF, - DocSection, Documentation, ScalarUDF, WindowUDF, + AggregateUDF, DocSection, Documentation, ScalarUDF, WindowUDF, + aggregate_doc_sections, scalar_doc_sections, window_doc_sections, }; use itertools::Itertools; use std::env::args; @@ -255,7 +255,9 @@ fn print_docs( for f in &providers_with_no_docs { eprintln!(" - {f}"); } - not_impl_err!("Some functions do not have documentation. Please implement `documentation` for: {providers_with_no_docs:?}") + not_impl_err!( + "Some functions do not have documentation. Please implement `documentation` for: {providers_with_no_docs:?}" + ) } else { Ok(docs) } diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 66dbd89f8647..0d060db3bf14 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -26,19 +26,19 @@ use crate::datasource::file_format::csv::CsvFormatFactory; use crate::datasource::file_format::format_as_file_type; use crate::datasource::file_format::json::JsonFormatFactory; use crate::datasource::{ - provider_as_source, DefaultTableSource, MemTable, TableProvider, + DefaultTableSource, MemTable, TableProvider, provider_as_source, }; use crate::error::Result; -use crate::execution::context::{SessionState, TaskContext}; use crate::execution::FunctionRegistry; +use crate::execution::context::{SessionState, TaskContext}; use crate::logical_expr::utils::find_window_exprs; use crate::logical_expr::{ - col, ident, Expr, JoinType, LogicalPlan, LogicalPlanBuilder, - LogicalPlanBuilderOptions, Partitioning, TableType, + Expr, JoinType, LogicalPlan, LogicalPlanBuilder, LogicalPlanBuilderOptions, + Partitioning, TableType, col, ident, }; use crate::physical_plan::{ - collect, collect_partitioned, execute_stream, execute_stream_partitioned, - ExecutionPlan, SendableRecordBatchStream, + ExecutionPlan, SendableRecordBatchStream, collect, collect_partitioned, + execute_stream, execute_stream_partitioned, }; use crate::prelude::SessionContext; use std::any::Any; @@ -52,18 +52,17 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use arrow_schema::FieldRef; use datafusion_common::config::{CsvOptions, JsonOptions}; use datafusion_common::{ - exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, - unqualified_field_not_found, Column, DFSchema, DataFusionError, ParamValues, - ScalarValue, SchemaError, TableReference, UnnestOptions, + Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError, + TableReference, UnnestOptions, exec_err, internal_datafusion_err, not_impl_err, + plan_datafusion_err, plan_err, unqualified_field_not_found, }; use datafusion_expr::select_expr::SelectExpr; use datafusion_expr::{ - case, + ExplainOption, SortExpr, TableProviderFilterPushDown, UNNAMED_TABLE, case, dml::InsertOp, expr::{Alias, ScalarFunction}, is_null, lit, utils::COUNT_STAR_EXPANSION, - ExplainOption, SortExpr, TableProviderFilterPushDown, UNNAMED_TABLE, }; use datafusion_functions::core::coalesce; use datafusion_functions_aggregate::expr_fn::{ diff --git a/datafusion/core/src/datasource/dynamic_file.rs b/datafusion/core/src/datasource/dynamic_file.rs index 256a11ba693b..50ee96da3dff 100644 --- a/datafusion/core/src/datasource/dynamic_file.rs +++ b/datafusion/core/src/datasource/dynamic_file.rs @@ -20,9 +20,9 @@ use std::sync::Arc; +use crate::datasource::TableProvider; use crate::datasource::listing::ListingTableConfigExt; use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl}; -use crate::datasource::TableProvider; use crate::error::Result; use crate::execution::context::SessionState; diff --git a/datafusion/core/src/datasource/empty.rs b/datafusion/core/src/datasource/empty.rs index 77686c5eb7c2..5aeca92b1626 100644 --- a/datafusion/core/src/datasource/empty.rs +++ b/datafusion/core/src/datasource/empty.rs @@ -28,8 +28,8 @@ use datafusion_common::project_schema; use crate::datasource::{TableProvider, TableType}; use crate::error::Result; use crate::logical_expr::Expr; -use datafusion_physical_plan::empty::EmptyExec; use datafusion_physical_plan::ExecutionPlan; +use datafusion_physical_plan::empty::EmptyExec; /// An empty plan that is useful for testing and generating plans /// without mapping them to actual data. diff --git a/datafusion/core/src/datasource/file_format/avro.rs b/datafusion/core/src/datasource/file_format/avro.rs index 3428d08a6ae5..b287b1ef3a4e 100644 --- a/datafusion/core/src/datasource/file_format/avro.rs +++ b/datafusion/core/src/datasource/file_format/avro.rs @@ -26,20 +26,21 @@ mod tests { use crate::{ datasource::file_format::test_util::scan_format, prelude::SessionContext, }; - use arrow::array::{as_string_array, Array}; + use arrow::array::{Array, as_string_array}; use datafusion_catalog::Session; use datafusion_common::test_util::batches_to_string; use datafusion_common::{ + Result, cast::{ as_binary_array, as_boolean_array, as_float32_array, as_float64_array, as_int32_array, as_timestamp_microsecond_array, }, - test_util, Result, + test_util, }; use datafusion_datasource_avro::AvroFormat; use datafusion_execution::config::SessionConfig; - use datafusion_physical_plan::{collect, ExecutionPlan}; + use datafusion_physical_plan::{ExecutionPlan, collect}; use futures::StreamExt; use insta::assert_snapshot; @@ -245,7 +246,10 @@ mod tests { values.push(array.value(i)); } - assert_eq!("[1235865600000000, 1235865660000000, 1238544000000000, 1238544060000000, 1233446400000000, 1233446460000000, 1230768000000000, 1230768060000000]", format!("{values:?}")); + assert_eq!( + "[1235865600000000, 1235865660000000, 1238544000000000, 1238544060000000, 1233446400000000, 1233446460000000, 1230768000000000, 1230768060000000]", + format!("{values:?}") + ); Ok(()) } diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index 52fb8ae904eb..4b855ba58265 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -32,12 +32,12 @@ mod tests { use crate::prelude::{CsvReadOptions, SessionConfig, SessionContext}; use arrow_schema::{DataType, Field, Schema, SchemaRef}; use datafusion_catalog::Session; + use datafusion_common::Result; use datafusion_common::cast::as_string_array; use datafusion_common::config::CsvOptions; use datafusion_common::internal_err; use datafusion_common::stats::Precision; use datafusion_common::test_util::{arrow_test_data, batches_to_string}; - use datafusion_common::Result; use datafusion_datasource::decoder::{ BatchDeserializer, DecoderDeserializer, DeserializerOutput, }; @@ -45,7 +45,7 @@ mod tests { use datafusion_datasource::file_format::FileFormat; use datafusion_datasource::write::BatchSerializer; use datafusion_expr::{col, lit}; - use datafusion_physical_plan::{collect, ExecutionPlan}; + use datafusion_physical_plan::{ExecutionPlan, collect}; use arrow::array::{ Array, BooleanArray, Float64Array, Int32Array, RecordBatch, StringArray, @@ -57,8 +57,8 @@ mod tests { use bytes::Bytes; use chrono::DateTime; use datafusion_common::parsers::CompressionTypeVariant; - use futures::stream::BoxStream; use futures::StreamExt; + use futures::stream::BoxStream; use insta::assert_snapshot; use object_store::chunked::ChunkedStore; use object_store::local::LocalFileSystem; @@ -706,11 +706,11 @@ mod tests { let re = Regex::new(r"DataSourceExec: file_groups=\{(\d+) group").unwrap(); - if let Some(captures) = re.captures(&plan) { - if let Some(match_) = captures.get(1) { - let n_partitions = match_.as_str().parse::().unwrap(); - return Ok(n_partitions); - } + if let Some(captures) = re.captures(&plan) + && let Some(match_) = captures.get(1) + { + let n_partitions = match_.as_str().parse::().unwrap(); + return Ok(n_partitions); } internal_err!("query contains no DataSourceExec") @@ -944,17 +944,19 @@ mod tests { let files: Vec<_> = std::fs::read_dir(&path).unwrap().collect(); assert_eq!(files.len(), 1); - assert!(files - .last() - .unwrap() - .as_ref() - .unwrap() - .path() - .file_name() - .unwrap() - .to_str() - .unwrap() - .ends_with(".csv.gz")); + assert!( + files + .last() + .unwrap() + .as_ref() + .unwrap() + .path() + .file_name() + .unwrap() + .to_str() + .unwrap() + .ends_with(".csv.gz") + ); Ok(()) } @@ -983,17 +985,19 @@ mod tests { let files: Vec<_> = std::fs::read_dir(&path).unwrap().collect(); assert_eq!(files.len(), 1); - assert!(files - .last() - .unwrap() - .as_ref() - .unwrap() - .path() - .file_name() - .unwrap() - .to_str() - .unwrap() - .ends_with(".csv")); + assert!( + files + .last() + .unwrap() + .as_ref() + .unwrap() + .path() + .file_name() + .unwrap() + .to_str() + .unwrap() + .ends_with(".csv") + ); Ok(()) } @@ -1191,7 +1195,9 @@ mod tests { ) -> Result<()> { let schema = csv_schema(); let generator = CsvBatchGenerator::new(batch_size, line_count); - let mut deserializer = csv_deserializer(batch_size, &schema); + + let schema_clone = Arc::clone(&schema); + let mut deserializer = csv_deserializer(batch_size, &schema_clone); for data in generator { deserializer.digest(data); @@ -1230,7 +1236,8 @@ mod tests { ) -> Result<()> { let schema = csv_schema(); let generator = CsvBatchGenerator::new(batch_size, line_count); - let mut deserializer = csv_deserializer(batch_size, &schema); + let schema_clone = Arc::clone(&schema); + let mut deserializer = csv_deserializer(batch_size, &schema_clone); for data in generator { deserializer.digest(data); @@ -1499,7 +1506,7 @@ mod tests { // Create a temp file with a .csv suffix so the reader accepts it let mut tmp = tempfile::Builder::new().suffix(".csv").tempfile()?; // ensures path ends with .csv - // CSV has header "a,b,c". First data row is truncated (only "1,2"), second row is complete. + // CSV has header "a,b,c". First data row is truncated (only "1,2"), second row is complete. write!(tmp, "a,b,c\n1,2\n3,4,5\n")?; let path = tmp.path().to_str().unwrap().to_string(); diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs index 34d3d64f07fb..1d20d458ede0 100644 --- a/datafusion/core/src/datasource/file_format/json.rs +++ b/datafusion/core/src/datasource/file_format/json.rs @@ -36,7 +36,7 @@ mod tests { BatchDeserializer, DecoderDeserializer, DeserializerOutput, }; use datafusion_datasource::file_format::FileFormat; - use datafusion_physical_plan::{collect, ExecutionPlan}; + use datafusion_physical_plan::{ExecutionPlan, collect}; use arrow::compute::concat_batches; use arrow::datatypes::{DataType, Field}; @@ -187,11 +187,11 @@ mod tests { let re = Regex::new(r"file_groups=\{(\d+) group").unwrap(); - if let Some(captures) = re.captures(&plan) { - if let Some(match_) = captures.get(1) { - let count = match_.as_str().parse::().unwrap(); - return Ok(count); - } + if let Some(captures) = re.captures(&plan) + && let Some(match_) = captures.get(1) + { + let count = match_.as_str().parse::().unwrap(); + return Ok(count); } internal_err!("Query contains no Exec: file_groups") diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index 2756ea21cd00..6bbb63f6a17a 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -39,9 +39,9 @@ pub(crate) mod test_util { use arrow_schema::SchemaRef; use datafusion_catalog::Session; use datafusion_common::Result; - use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::TableSchema; - use datafusion_datasource::{file_format::FileFormat, PartitionedFile}; + use datafusion_datasource::file_scan_config::FileScanConfigBuilder; + use datafusion_datasource::{PartitionedFile, file_format::FileFormat}; use datafusion_execution::object_store::ObjectStoreUrl; use std::sync::Arc; @@ -73,15 +73,17 @@ pub(crate) mod test_util { .infer_stats(state, &store, file_schema.clone(), &meta) .await?; - let file_groups = vec![vec![PartitionedFile { - object_meta: meta, - partition_values: vec![], - range: None, - statistics: None, - extensions: None, - metadata_size_hint: None, - }] - .into()]; + let file_groups = vec![ + vec![PartitionedFile { + object_meta: meta, + partition_values: vec![], + range: None, + statistics: None, + extensions: None, + metadata_size_hint: None, + }] + .into(), + ]; let exec = format .create_physical_plan( @@ -133,7 +135,10 @@ mod tests { .write_parquet(out_dir_url, DataFrameWriteOptions::new(), None) .await .expect_err("should fail because input file does not match inferred schema"); - assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'"); + assert_eq!( + e.strip_backtrace(), + "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'" + ); Ok(()) } } diff --git a/datafusion/core/src/datasource/file_format/options.rs b/datafusion/core/src/datasource/file_format/options.rs index 6cca0d503376..146c5f6f5fd0 100644 --- a/datafusion/core/src/datasource/file_format/options.rs +++ b/datafusion/core/src/datasource/file_format/options.rs @@ -25,9 +25,9 @@ use crate::datasource::file_format::avro::AvroFormat; #[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormat; +use crate::datasource::file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD; use crate::datasource::file_format::arrow::ArrowFormat; use crate::datasource::file_format::file_compression_type::FileCompressionType; -use crate::datasource::file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD; use crate::datasource::listing::ListingTableUrl; use crate::datasource::{file_format::csv::CsvFormat, listing::ListingOptions}; use crate::error::Result; diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 87131e082434..050212bc6b84 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -107,8 +107,8 @@ pub(crate) mod test_util { mod tests { use std::fmt::{self, Display, Formatter}; - use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; use crate::datasource::file_format::parquet::test_util::store_parquet; @@ -120,6 +120,7 @@ mod tests { use arrow::array::RecordBatch; use arrow_schema::Schema; use datafusion_catalog::Session; + use datafusion_common::ScalarValue::Utf8; use datafusion_common::cast::{ as_binary_array, as_binary_view_array, as_boolean_array, as_float32_array, as_float64_array, as_int32_array, as_timestamp_nanosecond_array, @@ -127,7 +128,6 @@ mod tests { use datafusion_common::config::{ParquetOptions, TableParquetOptions}; use datafusion_common::stats::Precision; use datafusion_common::test_util::batches_to_string; - use datafusion_common::ScalarValue::Utf8; use datafusion_common::{Result, ScalarValue}; use datafusion_datasource::file_format::FileFormat; use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig}; @@ -135,33 +135,33 @@ mod tests { use datafusion_datasource_parquet::{ ParquetFormat, ParquetFormatFactory, ParquetSink, }; + use datafusion_execution::TaskContext; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_execution::runtime_env::RuntimeEnv; - use datafusion_execution::TaskContext; use datafusion_expr::dml::InsertOp; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; - use datafusion_physical_plan::{collect, ExecutionPlan}; + use datafusion_physical_plan::{ExecutionPlan, collect}; use crate::test_util::bounded_stream; use arrow::array::{ - types::Int32Type, Array, ArrayRef, DictionaryArray, Int32Array, Int64Array, - StringArray, + Array, ArrayRef, DictionaryArray, Int32Array, Int64Array, StringArray, + types::Int32Type, }; use arrow::datatypes::{DataType, Field}; use async_trait::async_trait; use datafusion_datasource::file_groups::FileGroup; use datafusion_datasource_parquet::metadata::DFParquetMetadata; - use futures::stream::BoxStream; use futures::StreamExt; + use futures::stream::BoxStream; use insta::assert_snapshot; - use object_store::local::LocalFileSystem; use object_store::ObjectMeta; + use object_store::local::LocalFileSystem; use object_store::{ - path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectStore, - PutMultipartOptions, PutOptions, PutPayload, PutResult, + GetOptions, GetResult, ListResult, MultipartUpload, ObjectStore, + PutMultipartOptions, PutOptions, PutPayload, PutResult, path::Path, }; - use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::ParquetRecordBatchStreamBuilder; + use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::file::metadata::{ KeyValue, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex, }; @@ -930,7 +930,10 @@ mod tests { values.push(array.value(i)); } - assert_eq!("[1235865600000000000, 1235865660000000000, 1238544000000000000, 1238544060000000000, 1233446400000000000, 1233446460000000000, 1230768000000000000, 1230768060000000000]", format!("{values:?}")); + assert_eq!( + "[1235865600000000000, 1235865660000000000, 1238544000000000000, 1238544060000000000, 1233446400000000000, 1233446460000000000, 1230768000000000000, 1230768060000000000]", + format!("{values:?}") + ); Ok(()) } diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 7db79485d184..7f957108cf78 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -113,8 +113,8 @@ mod tests { use crate::prelude::*; use crate::{ datasource::{ - file_format::csv::CsvFormat, file_format::json::JsonFormat, - provider_as_source, DefaultTableSource, MemTable, + DefaultTableSource, MemTable, file_format::csv::CsvFormat, + file_format::json::JsonFormat, provider_as_source, }, execution::options::ArrowReadOptions, test::{ @@ -129,21 +129,20 @@ mod tests { ListingOptions, ListingTable, ListingTableConfig, SchemaSource, }; use datafusion_common::{ - assert_contains, + DataFusionError, Result, ScalarValue, assert_contains, stats::Precision, test_util::{batches_to_string, datafusion_test_data}, - DataFusionError, Result, ScalarValue, }; + use datafusion_datasource::ListingTableUrl; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_format::FileFormat; - use datafusion_datasource::ListingTableUrl; use datafusion_expr::dml::InsertOp; use datafusion_expr::{BinaryExpr, LogicalPlanBuilder, Operator}; - use datafusion_physical_expr::expressions::binary; use datafusion_physical_expr::PhysicalSortExpr; + use datafusion_physical_expr::expressions::binary; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::empty::EmptyExec; - use datafusion_physical_plan::{collect, ExecutionPlanProperties}; + use datafusion_physical_plan::{ExecutionPlanProperties, collect}; use std::collections::HashMap; use std::io::Write; use std::sync::Arc; @@ -283,32 +282,36 @@ mod tests { // sort expr, but non column ( vec![vec![col("int_col").add(lit(1)).sort(true, true)]], - Ok(vec![[PhysicalSortExpr { - expr: binary( - physical_col("int_col", &schema).unwrap(), - Operator::Plus, - physical_lit(1), - &schema, - ) - .unwrap(), - options: SortOptions { - descending: false, - nulls_first: true, - }, - }] - .into()]), + Ok(vec![ + [PhysicalSortExpr { + expr: binary( + physical_col("int_col", &schema).unwrap(), + Operator::Plus, + physical_lit(1), + &schema, + ) + .unwrap(), + options: SortOptions { + descending: false, + nulls_first: true, + }, + }] + .into(), + ]), ), // ok with one column ( vec![vec![col("string_col").sort(true, false)]], - Ok(vec![[PhysicalSortExpr { - expr: physical_col("string_col", &schema).unwrap(), - options: SortOptions { - descending: false, - nulls_first: false, - }, - }] - .into()]), + Ok(vec![ + [PhysicalSortExpr { + expr: physical_col("string_col", &schema).unwrap(), + options: SortOptions { + descending: false, + nulls_first: false, + }, + }] + .into(), + ]), ), // ok with two columns, different options ( @@ -316,19 +319,21 @@ mod tests { col("string_col").sort(true, false), col("int_col").sort(false, true), ]], - Ok(vec![[ - PhysicalSortExpr::new_default( - physical_col("string_col", &schema).unwrap(), - ) - .asc() - .nulls_last(), - PhysicalSortExpr::new_default( - physical_col("int_col", &schema).unwrap(), - ) - .desc() - .nulls_first(), - ] - .into()]), + Ok(vec![ + [ + PhysicalSortExpr::new_default( + physical_col("string_col", &schema).unwrap(), + ) + .asc() + .nulls_last(), + PhysicalSortExpr::new_default( + physical_col("int_col", &schema).unwrap(), + ) + .desc() + .nulls_first(), + ] + .into(), + ]), ), ]; @@ -725,8 +730,8 @@ mod tests { } #[tokio::test] - async fn test_insert_into_append_new_parquet_files_invalid_session_fails( - ) -> Result<()> { + async fn test_insert_into_append_new_parquet_files_invalid_session_fails() + -> Result<()> { let mut config_map: HashMap = HashMap::new(); config_map.insert( "datafusion.execution.parquet.compression".into(), @@ -740,7 +745,10 @@ mod tests { ) .await .expect_err("Example should fail!"); - assert_eq!(e.strip_backtrace(), "Invalid or Unsupported Configuration: zstd compression requires specifying a level such as zstd(4)"); + assert_eq!( + e.strip_backtrace(), + "Invalid or Unsupported Configuration: zstd compression requires specifying a level such as zstd(4)" + ); Ok(()) } @@ -1410,7 +1418,9 @@ mod tests { ]; for (format, batch_size, soft_max_rows, expected_files) in test_cases { - println!("Testing insert with format: {format}, batch_size: {batch_size}, expected files: {expected_files}"); + println!( + "Testing insert with format: {format}, batch_size: {batch_size}, expected files: {expected_files}" + ); let mut config_map = HashMap::new(); config_map.insert( diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 9c5e41ace949..3ca388af0c4c 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -28,8 +28,8 @@ use crate::datasource::listing::{ use crate::execution::context::SessionState; use arrow::datatypes::DataType; -use datafusion_common::{arrow_datafusion_err, plan_err, ToDFSchema}; -use datafusion_common::{config_datafusion_err, Result}; +use datafusion_common::{Result, config_datafusion_err}; +use datafusion_common::{ToDFSchema, arrow_datafusion_err, plan_err}; use datafusion_expr::CreateExternalTable; use async_trait::async_trait; @@ -220,9 +220,9 @@ mod tests { datasource::file_format::csv::CsvFormat, execution::context::SessionContext, test_util::parquet_test_data, }; + use datafusion_execution::cache::CacheAccessor; use datafusion_execution::cache::cache_manager::CacheManagerConfig; use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; - use datafusion_execution::cache::CacheAccessor; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use glob::Pattern; diff --git a/datafusion/core/src/datasource/memory_test.rs b/datafusion/core/src/datasource/memory_test.rs index c16837c73b4f..c7721cafb02e 100644 --- a/datafusion/core/src/datasource/memory_test.rs +++ b/datafusion/core/src/datasource/memory_test.rs @@ -19,7 +19,7 @@ mod tests { use crate::datasource::MemTable; - use crate::datasource::{provider_as_source, DefaultTableSource}; + use crate::datasource::{DefaultTableSource, provider_as_source}; use crate::physical_plan::collect; use crate::prelude::SessionContext; use arrow::array::{AsArray, Int32Array}; @@ -29,8 +29,8 @@ mod tests { use arrow_schema::SchemaRef; use datafusion_catalog::TableProvider; use datafusion_common::{DataFusionError, Result}; - use datafusion_expr::dml::InsertOp; use datafusion_expr::LogicalPlanBuilder; + use datafusion_expr::dml::InsertOp; use futures::StreamExt; use std::collections::HashMap; use std::sync::Arc; @@ -329,12 +329,11 @@ mod tests { ); let col = batch.column(0).as_primitive::(); assert_eq!(col.len(), 1, "expected 1 row, got {}", col.len()); - let val = col - .iter() + + col.iter() .next() .expect("had value") - .expect("expected non null"); - val + .expect("expected non null") } // Test inserting a single batch of data into a single partition diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs index 52564663b92f..7d37ef8cf24a 100644 --- a/datafusion/core/src/datasource/mod.rs +++ b/datafusion/core/src/datasource/mod.rs @@ -31,7 +31,7 @@ mod view_test; // backwards compatibility pub use self::default_table_source::{ - provider_as_source, source_as_provider, DefaultTableSource, + DefaultTableSource, provider_as_source, source_as_provider, }; pub use self::memory::MemTable; pub use self::view::ViewTable; @@ -53,22 +53,20 @@ pub use datafusion_physical_expr::create_ordering; mod tests { use crate::prelude::SessionContext; - use ::object_store::{path::Path, ObjectMeta}; + use ::object_store::{ObjectMeta, path::Path}; use arrow::{ array::Int32Array, datatypes::{DataType, Field, Schema, SchemaRef}, record_batch::RecordBatch, }; use datafusion_common::{ - record_batch, + Result, ScalarValue, record_batch, test_util::batches_to_sort_string, tree_node::{Transformed, TransformedResult, TreeNode}, - Result, ScalarValue, }; use datafusion_datasource::{ - file_scan_config::FileScanConfigBuilder, + PartitionedFile, file_scan_config::FileScanConfigBuilder, schema_adapter::DefaultSchemaAdapterFactory, source::DataSourceExec, - PartitionedFile, }; use datafusion_datasource_parquet::source::ParquetSource; use datafusion_physical_expr::expressions::{Column, Literal}; diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs index b97ab0e9cacf..4f8fc6d0b971 100644 --- a/datafusion/core/src/datasource/physical_plan/avro.rs +++ b/datafusion/core/src/datasource/physical_plan/avro.rs @@ -31,21 +31,21 @@ mod tests { use crate::test::object_store::local_unpartitioned_file; use arrow::datatypes::{DataType, Field, SchemaBuilder}; use datafusion_common::test_util::batches_to_string; - use datafusion_common::{test_util, Result, ScalarValue}; + use datafusion_common::{Result, ScalarValue, test_util}; use datafusion_datasource::file_format::FileFormat; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::{PartitionedFile, TableSchema}; - use datafusion_datasource_avro::source::AvroSource; use datafusion_datasource_avro::AvroFormat; + use datafusion_datasource_avro::source::AvroSource; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_physical_plan::ExecutionPlan; use datafusion_datasource::source::DataSourceExec; use futures::StreamExt; use insta::assert_snapshot; + use object_store::ObjectStore; use object_store::chunked::ChunkedStore; use object_store::local::LocalFileSystem; - use object_store::ObjectStore; use rstest::*; use url::Url; diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index 660be4faffbc..892ae5b58635 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -40,10 +40,10 @@ mod tests { use datafusion_common::config::CsvOptions; use datafusion_common::test_util::arrow_test_data; use datafusion_common::test_util::batches_to_string; - use datafusion_common::{assert_batches_eq, Result}; + use datafusion_common::{Result, assert_batches_eq}; use datafusion_execution::config::SessionConfig; - use datafusion_physical_plan::metrics::MetricsSet; use datafusion_physical_plan::ExecutionPlan; + use datafusion_physical_plan::metrics::MetricsSet; #[cfg(feature = "compression")] use datafusion_datasource::file_compression_type::FileCompressionType; @@ -621,7 +621,10 @@ mod tests { .collect() .await .unwrap_err(); - assert_eq!(e.strip_backtrace(), "Arrow error: Csv error: incorrect number of fields for line 1, expected 2 got more than 2") + assert_eq!( + e.strip_backtrace(), + "Arrow error: Csv error: incorrect number of fields for line 1, expected 2 got more than 2" + ) } #[tokio::test] @@ -656,8 +659,8 @@ mod tests { } #[tokio::test] - async fn test_create_external_table_with_terminator_with_newlines_in_values( - ) -> Result<()> { + async fn test_create_external_table_with_terminator_with_newlines_in_values() + -> Result<()> { let ctx = SessionContext::new(); ctx.sql(r#" CREATE EXTERNAL TABLE t1 ( @@ -707,7 +710,10 @@ mod tests { ) .await .expect_err("should fail because input file does not match inferred schema"); - assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'"); + assert_eq!( + e.strip_backtrace(), + "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'" + ); Ok(()) } diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs index 3efea0330258..922d13fa1759 100644 --- a/datafusion/core/src/datasource/physical_plan/json.rs +++ b/datafusion/core/src/datasource/physical_plan/json.rs @@ -34,9 +34,9 @@ mod tests { use crate::execution::SessionState; use crate::prelude::{CsvReadOptions, NdJsonReadOptions, SessionContext}; use crate::test::partitioned_file_groups; + use datafusion_common::Result; use datafusion_common::cast::{as_int32_array, as_int64_array, as_string_array}; use datafusion_common::test_util::batches_to_string; - use datafusion_common::Result; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_format::FileFormat; use datafusion_datasource_json::JsonFormat; @@ -51,9 +51,9 @@ mod tests { use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::source::DataSourceExec; use insta::assert_snapshot; + use object_store::ObjectStore; use object_store::chunked::ChunkedStore; use object_store::local::LocalFileSystem; - use object_store::ObjectStore; use rstest::*; use tempfile::TempDir; use url::Url; @@ -499,7 +499,10 @@ mod tests { .write_json(out_dir_url, DataFrameWriteOptions::new(), None) .await .expect_err("should fail because input file does not match inferred schema"); - assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'"); + assert_eq!( + e.strip_backtrace(), + "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'" + ); Ok(()) } diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs index 1ac292e260fd..c57b08545b75 100644 --- a/datafusion/core/src/datasource/physical_plan/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/mod.rs @@ -43,8 +43,8 @@ pub use datafusion_datasource::file::FileSource; pub use datafusion_datasource::file_groups::FileGroup; pub use datafusion_datasource::file_groups::FileGroupPartitioner; pub use datafusion_datasource::file_scan_config::{ - wrap_partition_type_in_dict, wrap_partition_value_in_dict, FileScanConfig, - FileScanConfigBuilder, + FileScanConfig, FileScanConfigBuilder, wrap_partition_type_in_dict, + wrap_partition_value_in_dict, }; pub use datafusion_datasource::file_sink_config::*; @@ -57,10 +57,10 @@ mod tests { use std::sync::Arc; use arrow::array::{ - cast::AsArray, - types::{Float32Type, Float64Type, UInt32Type}, BinaryArray, BooleanArray, Float32Array, Int32Array, Int64Array, RecordBatch, StringArray, UInt64Array, + cast::AsArray, + types::{Float32Type, Float64Type, UInt32Type}, }; use arrow::datatypes::{DataType, Field, Schema}; use arrow_schema::SchemaRef; diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs index 6ed01cde14a3..4613561c666e 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet.rs @@ -38,7 +38,7 @@ mod tests { use crate::prelude::{ParquetReadOptions, SessionConfig, SessionContext}; use crate::test::object_store::local_unpartitioned_file; use arrow::array::{ - ArrayRef, AsArray, Date64Array, Int32Array, Int64Array, Int8Array, StringArray, + ArrayRef, AsArray, Date64Array, Int8Array, Int32Array, Int64Array, StringArray, StringViewArray, StructArray, TimestampNanosecondArray, }; use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaBuilder}; @@ -48,7 +48,7 @@ mod tests { use bytes::{BufMut, BytesMut}; use datafusion_common::config::TableParquetOptions; use datafusion_common::test_util::{batches_to_sort_string, batches_to_string}; - use datafusion_common::{assert_contains, Result, ScalarValue}; + use datafusion_common::{Result, ScalarValue, assert_contains}; use datafusion_datasource::file_format::FileFormat; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::source::DataSourceExec; @@ -60,7 +60,7 @@ mod tests { DefaultParquetFileReaderFactory, ParquetFileReaderFactory, ParquetFormat, }; use datafusion_execution::object_store::ObjectStoreUrl; - use datafusion_expr::{col, lit, when, Expr}; + use datafusion_expr::{Expr, col, lit, when}; use datafusion_physical_expr::planner::logical2physical; use datafusion_physical_plan::analyze::AnalyzeExec; use datafusion_physical_plan::collect; @@ -1271,8 +1271,10 @@ mod tests { .with_table_schema(Arc::new(table_schema)) .round_trip_to_batches(vec![batch1, batch2]) .await; - assert_contains!(read.unwrap_err().to_string(), - "Cannot cast column 'c3' from 'Date64' (physical data type) to 'Int8' (logical data type)"); + assert_contains!( + read.unwrap_err().to_string(), + "Cannot cast column 'c3' from 'Date64' (physical data type) to 'Int8' (logical data type)" + ); } #[tokio::test] diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 5bba78e44d27..a769bb01b435 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -34,12 +34,12 @@ use crate::{ datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }, - datasource::{provider_as_source, MemTable, ViewTable}, + datasource::{MemTable, ViewTable, provider_as_source}, error::Result, execution::{ + FunctionRegistry, options::ArrowReadOptions, runtime_env::{RuntimeEnv, RuntimeEnvBuilder}, - FunctionRegistry, }, logical_expr::AggregateUDF, logical_expr::ScalarUDF, @@ -59,43 +59,43 @@ pub use crate::execution::session_state::SessionState; use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; -use datafusion_catalog::memory::MemorySchemaProvider; use datafusion_catalog::MemoryCatalogProvider; +use datafusion_catalog::memory::MemorySchemaProvider; use datafusion_catalog::{ DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory, }; use datafusion_common::config::{ConfigField, ConfigOptions}; use datafusion_common::metadata::ScalarAndMetadata; use datafusion_common::{ + DFSchema, DataFusionError, ParamValues, SchemaReference, TableReference, config::{ConfigExtension, TableOptions}, exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, tree_node::{TreeNodeRecursion, TreeNodeVisitor}, - DFSchema, DataFusionError, ParamValues, SchemaReference, TableReference, }; +pub use datafusion_execution::TaskContext; use datafusion_execution::cache::cache_manager::{ DEFAULT_LIST_FILES_CACHE_MEMORY_LIMIT, DEFAULT_LIST_FILES_CACHE_TTL, DEFAULT_METADATA_CACHE_LIMIT, }; pub use datafusion_execution::config::SessionConfig; use datafusion_execution::disk_manager::{ - DiskManagerBuilder, DEFAULT_MAX_TEMP_DIRECTORY_SIZE, + DEFAULT_MAX_TEMP_DIRECTORY_SIZE, DiskManagerBuilder, }; use datafusion_execution::registry::SerializerRegistry; -pub use datafusion_execution::TaskContext; pub use datafusion_expr::execution_props::ExecutionProps; #[cfg(feature = "sql")] use datafusion_expr::planner::RelationPlanner; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::{ + Expr, UserDefinedLogicalNode, WindowUDF, expr_rewriter::FunctionRewrite, logical_plan::{DdlStatement, Statement}, planner::ExprPlanner, - Expr, UserDefinedLogicalNode, WindowUDF, }; +use datafusion_optimizer::Analyzer; use datafusion_optimizer::analyzer::type_coercion::TypeCoercion; use datafusion_optimizer::simplify_expressions::ExprSimplifier; -use datafusion_optimizer::Analyzer; use datafusion_optimizer::{AnalyzerRule, OptimizerRule}; use datafusion_session::SessionStore; @@ -695,7 +695,7 @@ impl SessionContext { match ddl { DdlStatement::CreateExternalTable(cmd) => { (Box::pin(async move { self.create_external_table(&cmd).await }) - as std::pin::Pin + Send>>) + as std::pin::Pin + Send>>) .await } DdlStatement::CreateMemoryTable(cmd) => { @@ -1272,7 +1272,9 @@ impl SessionContext { match unit { "m" if minutes.is_none() && seconds.is_none() => minutes = Some(number), "s" if seconds.is_none() => seconds = Some(number), - _ => plan_err!("Invalid duration, unit must be either 'm' (minutes), or 's' (seconds), and be in the correct order")?, + _ => plan_err!( + "Invalid duration, unit must be either 'm' (minutes), or 's' (seconds), and be in the correct order" + )?, } } @@ -1320,13 +1322,12 @@ impl SessionContext { .and_then(|c| c.schema(&resolved.schema)) }; - if let Some(schema) = maybe_schema { - if let Some(table_provider) = schema.table(&table).await? { - if table_provider.table_type() == table_type { - schema.deregister_table(&table)?; - return Ok(true); - } - } + if let Some(schema) = maybe_schema + && let Some(table_provider) = schema.table(&table).await? + && table_provider.table_type() == table_type + { + schema.deregister_table(&table)?; + return Ok(true); } Ok(false) @@ -1342,7 +1343,7 @@ impl SessionContext { _ => { return Err(DataFusionError::Configuration( "Function factory has not been configured".to_string(), - )) + )); } } }; diff --git a/datafusion/core/src/execution/context/parquet.rs b/datafusion/core/src/execution/context/parquet.rs index 731f7e59ecfa..823dc946ea73 100644 --- a/datafusion/core/src/execution/context/parquet.rs +++ b/datafusion/core/src/execution/context/parquet.rs @@ -113,7 +113,7 @@ mod tests { }; use datafusion_execution::config::SessionConfig; - use tempfile::{tempdir, TempDir}; + use tempfile::{TempDir, tempdir}; #[tokio::test] async fn read_with_glob_path() -> Result<()> { @@ -355,7 +355,9 @@ mod tests { let expected_path = binding[0].as_str(); assert_eq!( read_df.unwrap_err().strip_backtrace(), - format!("Execution error: File path '{expected_path}' does not match the expected extension '.parquet'") + format!( + "Execution error: File path '{expected_path}' does not match the expected extension '.parquet'" + ) ); // Read the dataframe from 'output3.parquet.snappy.parquet' with the correct file extension. diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 2466f9544bd1..6a9ebcdf5125 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -27,14 +27,14 @@ use crate::catalog::{CatalogProviderList, SchemaProvider, TableProviderFactory}; use crate::datasource::file_format::FileFormatFactory; #[cfg(feature = "sql")] use crate::datasource::provider_as_source; -use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner}; use crate::execution::SessionStateDefaults; +use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner}; use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner}; use arrow_schema::{DataType, FieldRef}; +use datafusion_catalog::MemoryCatalogProviderList; use datafusion_catalog::information_schema::{ - InformationSchemaProvider, INFORMATION_SCHEMA, + INFORMATION_SCHEMA, InformationSchemaProvider, }; -use datafusion_catalog::MemoryCatalogProviderList; use datafusion_catalog::{TableFunction, TableFunctionImpl}; use datafusion_common::alias::AliasGenerator; #[cfg(feature = "sql")] @@ -43,12 +43,14 @@ use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions}; use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan}; use datafusion_common::tree_node::TreeNode; use datafusion_common::{ - config_err, exec_err, plan_datafusion_err, DFSchema, DataFusionError, - ResolvedTableReference, TableReference, + DFSchema, DataFusionError, ResolvedTableReference, TableReference, config_err, + exec_err, plan_datafusion_err, }; +use datafusion_execution::TaskContext; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnv; -use datafusion_execution::TaskContext; +#[cfg(feature = "sql")] +use datafusion_expr::TableSource; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::expr_rewriter::FunctionRewrite; use datafusion_expr::planner::ExprPlanner; @@ -56,8 +58,6 @@ use datafusion_expr::planner::ExprPlanner; use datafusion_expr::planner::{RelationPlanner, TypePlanner}; use datafusion_expr::registry::{FunctionRegistry, SerializerRegistry}; use datafusion_expr::simplify::SimplifyInfo; -#[cfg(feature = "sql")] -use datafusion_expr::TableSource; use datafusion_expr::{ AggregateUDF, Explain, Expr, ExprSchemable, LogicalPlan, ScalarUDF, WindowUDF, }; @@ -67,8 +67,8 @@ use datafusion_optimizer::{ }; use datafusion_physical_expr::create_physical_expr; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_optimizer::optimizer::PhysicalOptimizer; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::optimizer::PhysicalOptimizer; use datafusion_physical_plan::ExecutionPlan; use datafusion_session::Session; #[cfg(feature = "sql")] @@ -504,10 +504,10 @@ impl SessionState { let resolved = self.resolve_table_ref(reference); if let Entry::Vacant(v) = provider.tables.entry(resolved) { let resolved = v.key(); - if let Ok(schema) = self.schema_for_ref(resolved.clone()) { - if let Some(table) = schema.table(&resolved.table).await? { - v.insert(provider_as_source(table)); - } + if let Ok(schema) = self.schema_for_ref(resolved.clone()) + && let Some(table) = schema.table(&resolved.table).await? + { + v.insert(provider_as_source(table)); } } } @@ -840,10 +840,18 @@ impl SessionState { overwrite: bool, ) -> Result<(), DataFusionError> { let ext = file_format.get_ext().to_lowercase(); - match (self.file_formats.entry(ext.clone()), overwrite){ - (Entry::Vacant(e), _) => {e.insert(file_format);}, - (Entry::Occupied(mut e), true) => {e.insert(file_format);}, - (Entry::Occupied(_), false) => return config_err!("File type already registered for extension {ext}. Set overwrite to true to replace this extension."), + match (self.file_formats.entry(ext.clone()), overwrite) { + (Entry::Vacant(e), _) => { + e.insert(file_format); + } + (Entry::Occupied(mut e), true) => { + e.insert(file_format); + } + (Entry::Occupied(_), false) => { + return config_err!( + "File type already registered for extension {ext}. Set overwrite to true to replace this extension." + ); + } }; Ok(()) } @@ -1865,7 +1873,7 @@ impl ContextProvider for SessionContextProvider<'_> { } fn get_variable_type(&self, variable_names: &[String]) -> Option { - use datafusion_expr::var_provider::{is_system_variables, VarType}; + use datafusion_expr::var_provider::{VarType, is_system_variables}; if variable_names.is_empty() { return None; @@ -2162,9 +2170,9 @@ mod tests { use super::{SessionContextProvider, SessionStateBuilder}; use crate::common::assert_contains; use crate::config::ConfigOptions; + use crate::datasource::MemTable; use crate::datasource::empty::EmptyTable; use crate::datasource::provider_as_source; - use crate::datasource::MemTable; use crate::execution::context::SessionState; use crate::logical_expr::planner::ExprPlanner; use crate::logical_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; @@ -2174,13 +2182,13 @@ mod tests { use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_catalog::MemoryCatalogProviderList; - use datafusion_common::config::Dialect; use datafusion_common::DFSchema; use datafusion_common::Result; + use datafusion_common::config::Dialect; use datafusion_execution::config::SessionConfig; use datafusion_expr::Expr; - use datafusion_optimizer::optimizer::OptimizerRule; use datafusion_optimizer::Optimizer; + use datafusion_optimizer::optimizer::OptimizerRule; use datafusion_physical_plan::display::DisplayableExecutionPlan; use datafusion_sql::planner::{PlannerContext, SqlToRel}; use std::collections::HashMap; @@ -2287,13 +2295,15 @@ mod tests { .table_exist("employee"); assert!(is_exist); let new_state = SessionStateBuilder::new_from_existing(session_state).build(); - assert!(new_state - .catalog_list() - .catalog(default_catalog.as_str()) - .unwrap() - .schema(default_schema.as_str()) - .unwrap() - .table_exist("employee")); + assert!( + new_state + .catalog_list() + .catalog(default_catalog.as_str()) + .unwrap() + .schema(default_schema.as_str()) + .unwrap() + .table_exist("employee") + ); // if `with_create_default_catalog_and_schema` is disabled, the new one shouldn't create default catalog and schema let disable_create_default = @@ -2301,10 +2311,12 @@ mod tests { let without_default_state = SessionStateBuilder::new() .with_config(disable_create_default) .build(); - assert!(without_default_state - .catalog_list() - .catalog(&default_catalog) - .is_none()); + assert!( + without_default_state + .catalog_list() + .catalog(&default_catalog) + .is_none() + ); let new_state = SessionStateBuilder::new_from_existing(without_default_state).build(); assert!(new_state.catalog_list().catalog(&default_catalog).is_none()); diff --git a/datafusion/core/src/execution/session_state_defaults.rs b/datafusion/core/src/execution/session_state_defaults.rs index bc7fd6bb17cc..721710d4e057 100644 --- a/datafusion/core/src/execution/session_state_defaults.rs +++ b/datafusion/core/src/execution/session_state_defaults.rs @@ -17,6 +17,7 @@ use crate::catalog::listing_schema::ListingSchemaProvider; use crate::catalog::{CatalogProvider, TableProviderFactory}; +use crate::datasource::file_format::FileFormatFactory; use crate::datasource::file_format::arrow::ArrowFormatFactory; #[cfg(feature = "avro")] use crate::datasource::file_format::avro::AvroFormatFactory; @@ -24,7 +25,6 @@ use crate::datasource::file_format::csv::CsvFormatFactory; use crate::datasource::file_format::json::JsonFormatFactory; #[cfg(feature = "parquet")] use crate::datasource::file_format::parquet::ParquetFormatFactory; -use crate::datasource::file_format::FileFormatFactory; use crate::datasource::provider::DefaultTableFactory; use crate::execution::context::SessionState; #[cfg(feature = "nested_expressions")] diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 3e216c619bba..9eaf1403e575 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use crate::datasource::file_format::file_type_to_format; use crate::datasource::listing::ListingTableUrl; use crate::datasource::physical_plan::FileSinkConfig; -use crate::datasource::{source_as_provider, DefaultTableSource}; +use crate::datasource::{DefaultTableSource, source_as_provider}; use crate::error::{DataFusionError, Result}; use crate::execution::context::{ExecutionProps, SessionState}; use crate::logical_expr::utils::generate_sort_key; @@ -52,12 +52,12 @@ use crate::physical_plan::union::UnionExec; use crate::physical_plan::unnest::UnnestExec; use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec}; use crate::physical_plan::{ - displayable, windows, ExecutionPlan, ExecutionPlanProperties, InputOrderMode, - Partitioning, PhysicalExpr, WindowExpr, + ExecutionPlan, ExecutionPlanProperties, InputOrderMode, Partitioning, PhysicalExpr, + WindowExpr, displayable, windows, }; use crate::schema_equivalence::schema_satisfied_by; -use arrow::array::{builder::StringBuilder, RecordBatch}; +use arrow::array::{RecordBatch, builder::StringBuilder}; use arrow::compute::SortOptions; use arrow::datatypes::Schema; use arrow_schema::Field; @@ -66,18 +66,18 @@ use datafusion_common::display::ToStringifiedPlan; use datafusion_common::format::ExplainAnalyzeLevel; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; use datafusion_common::{ - assert_eq_or_internal_err, assert_or_internal_err, TableReference, + DFSchema, ScalarValue, exec_err, internal_datafusion_err, internal_err, not_impl_err, + plan_err, }; use datafusion_common::{ - exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, - ScalarValue, + TableReference, assert_eq_or_internal_err, assert_or_internal_err, }; use datafusion_datasource::file_groups::FileGroup; use datafusion_datasource::memory::MemorySourceConfig; use datafusion_expr::dml::{CopyTo, InsertOp}; use datafusion_expr::expr::{ - physical_name, AggregateFunction, AggregateFunctionParams, Alias, GroupingSet, - NullTreatment, WindowFunction, WindowFunctionParams, + AggregateFunction, AggregateFunctionParams, Alias, GroupingSet, NullTreatment, + WindowFunction, WindowFunctionParams, physical_name, }; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary; @@ -90,7 +90,7 @@ use datafusion_expr::{ use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; use datafusion_physical_expr::expressions::Literal; use datafusion_physical_expr::{ - create_physical_sort_exprs, LexOrdering, PhysicalSortExpr, + LexOrdering, PhysicalSortExpr, create_physical_sort_exprs, }; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::empty::EmptyExec; @@ -104,7 +104,7 @@ use datafusion_physical_plan::unnest::ListUnnest; use async_trait::async_trait; use datafusion_physical_plan::async_func::{AsyncFuncExec, AsyncMapper}; use futures::{StreamExt, TryStreamExt}; -use itertools::{multiunzip, Itertools}; +use itertools::{Itertools, multiunzip}; use log::debug; use tokio::sync::Mutex; @@ -528,12 +528,22 @@ impl DefaultPhysicalPlanner { let keep_partition_by_columns = match source_option_tuples .get("execution.keep_partition_by_columns") - .map(|v| v.trim()) { - None => session_state.config().options().execution.keep_partition_by_columns, + .map(|v| v.trim()) + { + None => { + session_state + .config() + .options() + .execution + .keep_partition_by_columns + } Some("true") => true, Some("false") => false, - Some(value) => - return Err(DataFusionError::Configuration(format!("provided value for 'execution.keep_partition_by_columns' was not recognized: \"{value}\""))), + Some(value) => { + return Err(DataFusionError::Configuration(format!( + "provided value for 'execution.keep_partition_by_columns' was not recognized: \"{value}\"" + ))); + } }; let sink_format = file_type_to_format(file_type)? @@ -601,8 +611,8 @@ impl DefaultPhysicalPlanner { let get_sort_keys = |expr: &Expr| match expr { Expr::WindowFunction(window_fun) => { let WindowFunctionParams { - ref partition_by, - ref order_by, + partition_by, + order_by, .. } = &window_fun.as_ref().params; generate_sort_key(partition_by, order_by) @@ -612,8 +622,8 @@ impl DefaultPhysicalPlanner { match &**expr { Expr::WindowFunction(window_fun) => { let WindowFunctionParams { - ref partition_by, - ref order_by, + partition_by, + order_by, .. } = &window_fun.as_ref().params; generate_sort_key(partition_by, order_by) @@ -626,11 +636,11 @@ impl DefaultPhysicalPlanner { let sort_keys = get_sort_keys(&window_expr[0])?; if window_expr.len() > 1 { debug_assert!( - window_expr[1..] - .iter() - .all(|expr| get_sort_keys(expr).unwrap() == sort_keys), - "all window expressions shall have the same sort keys, as guaranteed by logical planning" - ); + window_expr[1..] + .iter() + .all(|expr| get_sort_keys(expr).unwrap() == sort_keys), + "all window expressions shall have the same sort keys, as guaranteed by logical planning" + ); } let logical_schema = node.schema(); @@ -737,10 +747,10 @@ impl DefaultPhysicalPlanner { )); } } - return internal_err!("Physical input schema should be the same as the one converted from logical input schema. Differences: {}", differences - .iter() - .map(|s| format!("\n\t- {s}")) - .join("")); + return internal_err!( + "Physical input schema should be the same as the one converted from logical input schema. Differences: {}", + differences.iter().map(|s| format!("\n\t- {s}")).join("") + ); } let groups = self.create_grouping_physical_expr( @@ -800,7 +810,7 @@ impl DefaultPhysicalPlanner { _ => { return internal_err!( "Unexpected result from try_plan_async_exprs" - ) + ); } } } @@ -898,7 +908,7 @@ impl DefaultPhysicalPlanner { _ => { return internal_err!( "Unexpected result from try_plan_async_exprs" - ) + ); } }; @@ -1489,19 +1499,24 @@ impl DefaultPhysicalPlanner { } let plan = match maybe_plan { - Some(v) => Ok(v), - _ => plan_err!("No installed planner was able to convert the custom node to an execution plan: {:?}", node) - }?; + Some(v) => Ok(v), + _ => plan_err!( + "No installed planner was able to convert the custom node to an execution plan: {:?}", + node + ), + }?; // Ensure the ExecutionPlan's schema matches the // declared logical schema to catch and warn about // logic errors when creating user defined plans. if !node.schema().matches_arrow_schema(&plan.schema()) { return plan_err!( - "Extension planner for {:?} created an ExecutionPlan with mismatched schema. \ + "Extension planner for {:?} created an ExecutionPlan with mismatched schema. \ LogicalPlan schema: {:?}, ExecutionPlan schema: {:?}", - node, node.schema(), plan.schema() - ); + node, + node.schema(), + plan.schema() + ); } else { plan } @@ -1528,17 +1543,17 @@ impl DefaultPhysicalPlanner { LogicalPlan::Explain(_) => { return internal_err!( "Unsupported logical plan: Explain must be root of the plan" - ) + ); } LogicalPlan::Distinct(_) => { return internal_err!( "Unsupported logical plan: Distinct should be replaced to Aggregate" - ) + ); } LogicalPlan::Analyze(_) => { return internal_err!( "Unsupported logical plan: Analyze must be root of the plan" - ) + ); } }; Ok(exec_node) @@ -1884,9 +1899,10 @@ pub fn create_window_expr_with_name( if !is_window_frame_bound_valid(window_frame) { return plan_err!( - "Invalid window frame: start bound ({}) cannot be larger than end bound ({})", - window_frame.start_bound, window_frame.end_bound - ); + "Invalid window frame: start bound ({}) cannot be larger than end bound ({})", + window_frame.start_bound, + window_frame.end_bound + ); } let window_frame = Arc::new(window_frame.clone()); @@ -2546,7 +2562,8 @@ impl<'a> OptimizationInvariantChecker<'a> { if self.rule.schema_check() && !is_allowed_schema_change(previous_schema.as_ref(), plan.schema().as_ref()) { - internal_err!("PhysicalOptimizer rule '{}' failed. Schema mismatch. Expected original schema: {:?}, got new schema: {:?}", + internal_err!( + "PhysicalOptimizer rule '{}' failed. Schema mismatch. Expected original schema: {:?}, got new schema: {:?}", self.rule.name(), previous_schema, plan.schema() @@ -2641,11 +2658,11 @@ mod tests { use std::ops::{BitAnd, Not}; use super::*; - use crate::datasource::file_format::options::CsvReadOptions; use crate::datasource::MemTable; + use crate::datasource::file_format::options::CsvReadOptions; use crate::physical_plan::{ - expressions, DisplayAs, DisplayFormatType, PlanProperties, - SendableRecordBatchStream, + DisplayAs, DisplayFormatType, PlanProperties, SendableRecordBatchStream, + expressions, }; use crate::prelude::{SessionConfig, SessionContext}; use crate::test_util::{scan_empty, scan_empty_with_partitions}; @@ -2656,12 +2673,12 @@ mod tests { use arrow_schema::SchemaRef; use datafusion_common::config::ConfigOptions; use datafusion_common::{ - assert_contains, DFSchemaRef, TableReference, ToDFSchema as _, + DFSchemaRef, TableReference, ToDFSchema as _, assert_contains, }; - use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_execution::TaskContext; + use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::builder::subquery_alias; - use datafusion_expr::{col, lit, LogicalPlanBuilder, UserDefinedLogicalNodeCore}; + use datafusion_expr::{LogicalPlanBuilder, UserDefinedLogicalNodeCore, col, lit}; use datafusion_functions_aggregate::count::count_all; use datafusion_functions_aggregate::expr_fn::sum; use datafusion_physical_expr::EquivalenceProperties; @@ -3063,8 +3080,7 @@ mod tests { .create_physical_plan(&logical_plan, &session_state) .await; - let expected_error = - "No installed planner was able to convert the custom node to an execution plan: NoOp"; + let expected_error = "No installed planner was able to convert the custom node to an execution plan: NoOp"; match plan { Ok(_) => panic!("Expected planning failure"), Err(e) => assert!( @@ -3321,18 +3337,27 @@ mod tests { if let Some(plan) = plan.as_any().downcast_ref::() { let stringified_plans = plan.stringified_plans(); assert!(stringified_plans.len() >= 4); - assert!(stringified_plans - .iter() - .any(|p| matches!(p.plan_type, PlanType::FinalLogicalPlan))); - assert!(stringified_plans - .iter() - .any(|p| matches!(p.plan_type, PlanType::InitialPhysicalPlan))); - assert!(stringified_plans - .iter() - .any(|p| matches!(p.plan_type, PlanType::OptimizedPhysicalPlan { .. }))); - assert!(stringified_plans - .iter() - .any(|p| matches!(p.plan_type, PlanType::FinalPhysicalPlan))); + assert!( + stringified_plans + .iter() + .any(|p| matches!(p.plan_type, PlanType::FinalLogicalPlan)) + ); + assert!( + stringified_plans + .iter() + .any(|p| matches!(p.plan_type, PlanType::InitialPhysicalPlan)) + ); + assert!( + stringified_plans.iter().any(|p| matches!( + p.plan_type, + PlanType::OptimizedPhysicalPlan { .. } + )) + ); + assert!( + stringified_plans + .iter() + .any(|p| matches!(p.plan_type, PlanType::FinalPhysicalPlan)) + ); } else { panic!( "Plan was not an explain plan: {}", @@ -3699,8 +3724,12 @@ digraph { } fn check_invariants(&self, check: InvariantLevel) -> Result<()> { match check { - InvariantLevel::Always => plan_err!("extension node failed it's user-defined always-invariant check"), - InvariantLevel::Executable => panic!("the OptimizationInvariantChecker should not be checking for executableness"), + InvariantLevel::Always => plan_err!( + "extension node failed it's user-defined always-invariant check" + ), + InvariantLevel::Executable => panic!( + "the OptimizationInvariantChecker should not be checking for executableness" + ), } } fn schema(&self) -> SchemaRef { @@ -3784,9 +3813,11 @@ digraph { let expected_err = OptimizationInvariantChecker::new(&rule) .check(&failing_node, &ok_plan.schema()) .unwrap_err(); - assert!(expected_err - .to_string() - .contains("extension node failed it's user-defined always-invariant check")); + assert!( + expected_err.to_string().contains( + "extension node failed it's user-defined always-invariant check" + ) + ); // Test: should fail when descendent extension node fails let failing_node: Arc = Arc::new(InvariantFailsExtensionNode); @@ -3797,9 +3828,11 @@ digraph { let expected_err = OptimizationInvariantChecker::new(&rule) .check(&invalid_plan, &ok_plan.schema()) .unwrap_err(); - assert!(expected_err - .to_string() - .contains("extension node failed it's user-defined always-invariant check")); + assert!( + expected_err.to_string().contains( + "extension node failed it's user-defined always-invariant check" + ) + ); Ok(()) } @@ -4118,9 +4151,10 @@ digraph { async fn test_aggregate_schema_mismatch_field_metadata() { let logical_schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)])); - let physical_schema = - Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false) - .with_metadata(HashMap::from([("key".into(), "value".into())]))])); + let physical_schema = Arc::new(Schema::new(vec![ + Field::new("c1", DataType::Int32, false) + .with_metadata(HashMap::from([("key".into(), "value".into())])), + ])); let err = plan_with_schemas( logical_schema, diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs index d723620d3232..50e4a2649c92 100644 --- a/datafusion/core/src/prelude.rs +++ b/datafusion/core/src/prelude.rs @@ -34,10 +34,10 @@ pub use crate::execution::options::{ pub use datafusion_common::Column; pub use datafusion_expr::{ + Expr, expr_fn::*, lit, lit_timestamp_nano, logical_plan::{JoinType, Partitioning}, - Expr, }; pub use datafusion_functions::expr_fn::*; #[cfg(feature = "nested_expressions")] diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs index b0ff3eb3ae41..717182f1d3d5 100644 --- a/datafusion/core/src/test/mod.rs +++ b/datafusion/core/src/test/mod.rs @@ -25,9 +25,9 @@ use std::io::{BufReader, BufWriter}; use std::path::Path; use std::sync::Arc; +use crate::datasource::file_format::FileFormat; use crate::datasource::file_format::csv::CsvFormat; use crate::datasource::file_format::file_compression_type::FileCompressionType; -use crate::datasource::file_format::FileFormat; use crate::datasource::physical_plan::CsvSource; use crate::datasource::{MemTable, TableProvider}; @@ -42,21 +42,21 @@ use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; #[cfg(feature = "compression")] use datafusion_common::DataFusionError; -use datafusion_datasource::source::DataSourceExec; use datafusion_datasource::TableSchema; +use datafusion_datasource::source::DataSourceExec; -#[cfg(feature = "compression")] -use bzip2::write::BzEncoder; #[cfg(feature = "compression")] use bzip2::Compression as BzCompression; +#[cfg(feature = "compression")] +use bzip2::write::BzEncoder; use datafusion_datasource::file_groups::FileGroup; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource_csv::partitioned_csv_config; #[cfg(feature = "compression")] -use flate2::write::GzEncoder; -#[cfg(feature = "compression")] use flate2::Compression as GzCompression; #[cfg(feature = "compression")] +use flate2::write::GzEncoder; +#[cfg(feature = "compression")] use liblzma::write::XzEncoder; use object_store::local_unpartitioned_file; #[cfg(feature = "compression")] diff --git a/datafusion/core/src/test/object_store.rs b/datafusion/core/src/test/object_store.rs index d31c2719973e..a0438e3d74ab 100644 --- a/datafusion/core/src/test/object_store.rs +++ b/datafusion/core/src/test/object_store.rs @@ -20,20 +20,20 @@ use crate::{ execution::{context::SessionState, session_state::SessionStateBuilder}, object_store::{ - memory::InMemory, path::Path, Error, GetOptions, GetResult, ListResult, - MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, - PutPayload, PutResult, + Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, + ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, + memory::InMemory, path::Path, }, prelude::SessionContext, }; -use futures::{stream::BoxStream, FutureExt}; +use futures::{FutureExt, stream::BoxStream}; use std::{ fmt::{Debug, Display, Formatter}, sync::Arc, }; use tokio::{ sync::Barrier, - time::{timeout, Duration}, + time::{Duration, timeout}, }; use url::Url; diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index b5213cee3f2d..44e884c23a68 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -32,15 +32,15 @@ use crate::logical_expr::execution_props::ExecutionProps; use crate::logical_expr::simplify::SimplifyContext; use crate::optimizer::simplify_expressions::ExprSimplifier; use crate::physical_expr::create_physical_expr; +use crate::physical_plan::ExecutionPlan; use crate::physical_plan::filter::FilterExec; use crate::physical_plan::metrics::MetricsSet; -use crate::physical_plan::ExecutionPlan; use crate::prelude::{Expr, SessionConfig, SessionContext}; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::source::DataSourceExec; -use object_store::path::Path; use object_store::ObjectMeta; +use object_store::path::Path; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; @@ -203,13 +203,12 @@ impl TestParquetFile { /// Recursively searches for DataSourceExec and returns the metrics /// on the first one it finds pub fn parquet_metrics(plan: &Arc) -> Option { - if let Some(data_source_exec) = plan.as_any().downcast_ref::() { - if data_source_exec + if let Some(data_source_exec) = plan.as_any().downcast_ref::() + && data_source_exec .downcast_to_file_source::() .is_some() - { - return data_source_exec.metrics(); - } + { + return data_source_exec.metrics(); } for child in plan.children() { diff --git a/datafusion/core/tests/catalog/memory.rs b/datafusion/core/tests/catalog/memory.rs index 06ed141b2e8b..5258f3bf9757 100644 --- a/datafusion/core/tests/catalog/memory.rs +++ b/datafusion/core/tests/catalog/memory.rs @@ -116,10 +116,12 @@ async fn test_mem_provider() { assert!(provider.deregister_table(table_name).unwrap().is_none()); let test_table = EmptyTable::new(Arc::new(Schema::empty())); // register table successfully - assert!(provider - .register_table(table_name.to_string(), Arc::new(test_table)) - .unwrap() - .is_none()); + assert!( + provider + .register_table(table_name.to_string(), Arc::new(test_table)) + .unwrap() + .is_none() + ); assert!(provider.table_exist(table_name)); let other_table = EmptyTable::new(Arc::new(Schema::empty())); let result = provider.register_table(table_name.to_string(), Arc::new(other_table)); diff --git a/datafusion/core/tests/catalog_listing/pruned_partition_list.rs b/datafusion/core/tests/catalog_listing/pruned_partition_list.rs index 3cdaa3bb9b34..f4782ee13c24 100644 --- a/datafusion/core/tests/catalog_listing/pruned_partition_list.rs +++ b/datafusion/core/tests/catalog_listing/pruned_partition_list.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use arrow_schema::DataType; use futures::{FutureExt, StreamExt as _, TryStreamExt as _}; -use object_store::{memory::InMemory, path::Path, ObjectStore as _}; +use object_store::{ObjectStore as _, memory::InMemory, path::Path}; use datafusion::execution::SessionStateBuilder; use datafusion_catalog_listing::helpers::{ @@ -27,7 +27,7 @@ use datafusion_catalog_listing::helpers::{ }; use datafusion_common::ScalarValue; use datafusion_datasource::ListingTableUrl; -use datafusion_expr::{col, lit, Expr}; +use datafusion_expr::{Expr, col, lit}; use datafusion_session::Session; #[tokio::test] diff --git a/datafusion/core/tests/config_from_env.rs b/datafusion/core/tests/config_from_env.rs index 976597c8a9ac..6375d4e25d8e 100644 --- a/datafusion/core/tests/config_from_env.rs +++ b/datafusion/core/tests/config_from_env.rs @@ -20,35 +20,43 @@ use std::env; #[test] fn from_env() { - // Note: these must be a single test to avoid interference from concurrent execution - let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS"; - // valid testing in different cases - for bool_option in ["true", "TRUE", "True", "tRUe"] { - env::set_var(env_key, bool_option); - let config = ConfigOptions::from_env().unwrap(); - env::remove_var(env_key); - assert!(config.optimizer.filter_null_join_keys); - } + unsafe { + // Note: these must be a single test to avoid interference from concurrent execution + let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS"; + // valid testing in different cases + for bool_option in ["true", "TRUE", "True", "tRUe"] { + env::set_var(env_key, bool_option); + let config = ConfigOptions::from_env().unwrap(); + env::remove_var(env_key); + assert!(config.optimizer.filter_null_join_keys); + } - // invalid testing - env::set_var(env_key, "ttruee"); - let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); - assert_eq!(err, "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`"); - env::remove_var(env_key); + // invalid testing + env::set_var(env_key, "ttruee"); + let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); + assert_eq!( + err, + "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`" + ); + env::remove_var(env_key); - let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE"; + let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE"; - // for valid testing - env::set_var(env_key, "4096"); - let config = ConfigOptions::from_env().unwrap(); - assert_eq!(config.execution.batch_size, 4096); + // for valid testing + env::set_var(env_key, "4096"); + let config = ConfigOptions::from_env().unwrap(); + assert_eq!(config.execution.batch_size, 4096); - // for invalid testing - env::set_var(env_key, "abc"); - let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); - assert_eq!(err, "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string"); + // for invalid testing + env::set_var(env_key, "abc"); + let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); + assert_eq!( + err, + "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string" + ); - env::remove_var(env_key); - let config = ConfigOptions::from_env().unwrap(); - assert_eq!(config.execution.batch_size, 8192); // set to its default value + env::remove_var(env_key); + let config = ConfigOptions::from_env().unwrap(); + assert_eq!(config.execution.batch_size, 8192); // set to its default value + } } diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs index 44da7d4e62e5..7b6a3c5fbed7 100644 --- a/datafusion/core/tests/custom_sources_cases/mod.rs +++ b/datafusion/core/tests/custom_sources_cases/mod.rs @@ -28,11 +28,11 @@ use datafusion::datasource::{TableProvider, TableType}; use datafusion::error::Result; use datafusion::execution::context::{SessionContext, TaskContext}; use datafusion::logical_expr::{ - col, Expr, LogicalPlan, LogicalPlanBuilder, TableScan, UNNAMED_TABLE, + Expr, LogicalPlan, LogicalPlanBuilder, TableScan, UNNAMED_TABLE, col, }; use datafusion::physical_plan::{ - collect, ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - RecordBatchStream, SendableRecordBatchStream, Statistics, + ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, + RecordBatchStream, SendableRecordBatchStream, Statistics, collect, }; use datafusion::scalar::ScalarValue; use datafusion_catalog::Session; @@ -40,9 +40,9 @@ use datafusion_common::cast::as_primitive_array; use datafusion_common::project_schema; use datafusion_common::stats::Precision; use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_plan::PlanProperties; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; -use datafusion_physical_plan::PlanProperties; use async_trait::async_trait; use futures::stream::Stream; diff --git a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs index f4e905e1eda0..ca1eaa1f958e 100644 --- a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs +++ b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs @@ -35,7 +35,7 @@ use datafusion::prelude::*; use datafusion::scalar::ScalarValue; use datafusion_catalog::Session; use datafusion_common::cast::as_primitive_array; -use datafusion_common::{internal_err, not_impl_err, DataFusionError}; +use datafusion_common::{DataFusionError, internal_err, not_impl_err}; use datafusion_expr::expr::{BinaryExpr, Cast}; use datafusion_functions_aggregate::expr_fn::count; use datafusion_physical_expr::EquivalenceProperties; diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index 56cdd78d7051..014f356cd64c 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{types::Int32Type, ListArray}; +use arrow::array::{ListArray, types::Int32Type}; use arrow::datatypes::SchemaRef; use arrow::datatypes::{DataType, Field, Schema}; use arrow::{ @@ -31,7 +31,7 @@ use datafusion::prelude::*; use datafusion_common::test_util::batches_to_string; use datafusion_common::{DFSchema, ScalarValue}; use datafusion_expr::expr::Alias; -use datafusion_expr::{table_scan, ExprSchemable, LogicalPlanBuilder}; +use datafusion_expr::{ExprSchemable, LogicalPlanBuilder, table_scan}; use datafusion_functions_aggregate::expr_fn::{approx_median, approx_percentile_cont}; use datafusion_functions_nested::map::map; use insta::assert_snapshot; diff --git a/datafusion/core/tests/dataframe/describe.rs b/datafusion/core/tests/dataframe/describe.rs index 9bd69dfa72b4..c61fe4fed161 100644 --- a/datafusion/core/tests/dataframe/describe.rs +++ b/datafusion/core/tests/dataframe/describe.rs @@ -17,7 +17,7 @@ use datafusion::prelude::{ParquetReadOptions, SessionContext}; use datafusion_common::test_util::batches_to_string; -use datafusion_common::{test_util::parquet_test_data, Result}; +use datafusion_common::{Result, test_util::parquet_test_data}; use insta::assert_snapshot; #[tokio::test] diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 6eb9d9743d31..c87e9fa7ab3b 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -20,10 +20,10 @@ mod dataframe_functions; mod describe; use arrow::array::{ - record_batch, Array, ArrayRef, BooleanArray, DictionaryArray, FixedSizeListArray, - FixedSizeListBuilder, Float32Array, Float64Array, Int32Array, Int32Builder, - Int8Array, LargeListArray, ListArray, ListBuilder, RecordBatch, StringArray, - StringBuilder, StructBuilder, UInt32Array, UInt32Builder, UnionArray, + Array, ArrayRef, BooleanArray, DictionaryArray, FixedSizeListArray, + FixedSizeListBuilder, Float32Array, Float64Array, Int8Array, Int32Array, + Int32Builder, LargeListArray, ListArray, ListBuilder, RecordBatch, StringArray, + StringBuilder, StructBuilder, UInt32Array, UInt32Builder, UnionArray, record_batch, }; use arrow::buffer::ScalarBuffer; use arrow::datatypes::{ @@ -66,8 +66,8 @@ use datafusion::test_util::{ use datafusion_catalog::TableProvider; use datafusion_common::test_util::{batches_to_sort_string, batches_to_string}; use datafusion_common::{ - assert_contains, internal_datafusion_err, Constraint, Constraints, DFSchema, - DataFusionError, ScalarValue, SchemaError, TableReference, UnnestOptions, + Constraint, Constraints, DFSchema, DataFusionError, ScalarValue, SchemaError, + TableReference, UnnestOptions, assert_contains, internal_datafusion_err, }; use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::file_format::format_as_file_type; @@ -76,21 +76,21 @@ use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::expr::{GroupingSet, NullTreatment, Sort, WindowFunction}; use datafusion_expr::var_provider::{VarProvider, VarType}; use datafusion_expr::{ - cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder, - scalar_subquery, when, wildcard, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, - LogicalPlanBuilder, ScalarFunctionImplementation, SortExpr, TableType, WindowFrame, - WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, + Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, LogicalPlanBuilder, + ScalarFunctionImplementation, SortExpr, TableType, WindowFrame, WindowFrameBound, + WindowFrameUnits, WindowFunctionDefinition, cast, col, create_udf, exists, + in_subquery, lit, out_ref_col, placeholder, scalar_subquery, when, wildcard, }; +use datafusion_physical_expr::Partitioning; use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_expr::expressions::Column; -use datafusion_physical_expr::Partitioning; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; use datafusion_physical_plan::empty::EmptyExec; -use datafusion_physical_plan::{displayable, ExecutionPlan, ExecutionPlanProperties}; +use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties, displayable}; use datafusion::error::Result as DataFusionResult; use datafusion_functions_window::expr_fn::lag; @@ -2234,12 +2234,14 @@ async fn row_writer_resize_test() -> Result<()> { let data = RecordBatch::try_new( schema, - vec![ - Arc::new(StringArray::from(vec![ - Some("2a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"), - Some("3a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800"), - ])) - ], + vec![Arc::new(StringArray::from(vec![ + Some( + "2a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + ), + Some( + "3a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800", + ), + ]))], )?; let ctx = SessionContext::new(); @@ -3114,15 +3116,17 @@ async fn test_count_wildcard_on_window() -> Result<()> { let df_results = ctx .table("t1") .await? - .select(vec![count_all_window() - .order_by(vec![Sort::new(col("a"), false, true)]) - .window_frame(WindowFrame::new_bounds( - WindowFrameUnits::Range, - WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))), - WindowFrameBound::Following(ScalarValue::UInt32(Some(2))), - )) - .build() - .unwrap()])? + .select(vec![ + count_all_window() + .order_by(vec![Sort::new(col("a"), false, true)]) + .window_frame(WindowFrame::new_bounds( + WindowFrameUnits::Range, + WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))), + WindowFrameBound::Following(ScalarValue::UInt32(Some(2))), + )) + .build() + .unwrap(), + ])? .explain(false, false)? .collect() .await?; @@ -3150,8 +3154,8 @@ async fn test_count_wildcard_on_window() -> Result<()> { #[tokio::test] // Test with `repartition_sorts` disabled, causing a full resort of the data -async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false( -) -> Result<()> { +async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false() +-> Result<()> { assert_snapshot!( union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?, @r" @@ -3168,8 +3172,8 @@ async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_reparti #[tokio::test] // Test with `repartition_sorts` enabled to preserve pre-sorted partitions and avoid resorting -async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true( -) -> Result<()> { +async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true() +-> Result<()> { assert_snapshot!( union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?, @r#" @@ -6306,7 +6310,10 @@ async fn test_insert_into_checking() -> Result<()> { .await .unwrap_err(); - assert_contains!(e.to_string(), "Inserting query schema mismatch: Expected table field 'a' with type Int64, but got 'column1' with type Utf8"); + assert_contains!( + e.to_string(), + "Inserting query schema mismatch: Expected table field 'a' with type Int64, but got 'column1' with type Utf8" + ); Ok(()) } @@ -6438,7 +6445,10 @@ async fn test_insert_into_casting_support() -> Result<()> { .await .unwrap_err(); - assert_contains!(e.to_string(), "Inserting query schema mismatch: Expected table field 'a' with type Float16, but got 'a' with type Utf8."); + assert_contains!( + e.to_string(), + "Inserting query schema mismatch: Expected table field 'a' with type Float16, but got 'a' with type Utf8." + ); // Testing case2: // Inserting query schema mismatch: Expected table field 'a' with type Utf8View, but got 'a' with type Utf8. diff --git a/datafusion/core/tests/execution/coop.rs b/datafusion/core/tests/execution/coop.rs index fb9495da506a..2de0b95d0c4b 100644 --- a/datafusion/core/tests/execution/coop.rs +++ b/datafusion/core/tests/execution/coop.rs @@ -22,25 +22,25 @@ use datafusion::common::NullEquality; use datafusion::functions_aggregate::sum; use datafusion::physical_expr::aggregate::AggregateExprBuilder; use datafusion::physical_plan; +use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; use datafusion::physical_plan::execution_plan::Boundedness; -use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionContext; -use datafusion_common::{exec_datafusion_err, DataFusionError, JoinType, ScalarValue}; +use datafusion_common::{DataFusionError, JoinType, ScalarValue, exec_datafusion_err}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr_common::operator::Operator; use datafusion_expr_common::operator::Operator::{Divide, Eq, Gt, Modulo}; use datafusion_functions_aggregate::min_max; +use datafusion_physical_expr::Partitioning; use datafusion_physical_expr::expressions::{ - binary, col, lit, BinaryExpr, Column, Literal, + BinaryExpr, Column, Literal, binary, col, lit, }; -use datafusion_physical_expr::Partitioning; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; -use datafusion_physical_optimizer::ensure_coop::EnsureCooperative; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::ensure_coop::EnsureCooperative; use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion_physical_plan::coop::make_cooperative; use datafusion_physical_plan::filter::FilterExec; @@ -136,7 +136,7 @@ fn make_lazy_exec_with_range( }; // Instantiate the generator with the batch and limit - let gen = RangeBatchGenerator { + let batch_gen = RangeBatchGenerator { schema: Arc::clone(&schema), boundedness, value_range: range, @@ -145,7 +145,7 @@ fn make_lazy_exec_with_range( }; // Wrap the generator in a trait object behind Arc> - let generator: Arc> = Arc::new(RwLock::new(gen)); + let generator: Arc> = Arc::new(RwLock::new(batch_gen)); // Create a LazyMemoryExec with one partition using our generator let mut exec = LazyMemoryExec::try_new(schema, vec![generator]).unwrap(); diff --git a/datafusion/core/tests/execution/datasource_split.rs b/datafusion/core/tests/execution/datasource_split.rs index 0b90c6f32616..370249cd8044 100644 --- a/datafusion/core/tests/execution/datasource_split.rs +++ b/datafusion/core/tests/execution/datasource_split.rs @@ -22,7 +22,7 @@ use arrow::{ }; use datafusion_datasource::memory::MemorySourceConfig; use datafusion_execution::TaskContext; -use datafusion_physical_plan::{common::collect, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, common::collect}; use std::sync::Arc; /// Helper function to create a memory source with the given batch size and collect all batches diff --git a/datafusion/core/tests/execution/logical_plan.rs b/datafusion/core/tests/execution/logical_plan.rs index ef2e263f2c46..3eaa3fb2ed5e 100644 --- a/datafusion/core/tests/execution/logical_plan.rs +++ b/datafusion/core/tests/execution/logical_plan.rs @@ -20,7 +20,7 @@ use arrow::array::Int64Array; use arrow::datatypes::{DataType, Field, Schema}; -use datafusion::datasource::{provider_as_source, ViewTable}; +use datafusion::datasource::{ViewTable, provider_as_source}; use datafusion::execution::session_state::SessionStateBuilder; use datafusion_common::{Column, DFSchema, DFSchemaRef, Result, ScalarValue, Spans}; use datafusion_execution::TaskContext; diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index ad2b86684459..90c1b96749b3 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -16,17 +16,17 @@ // under the License. use arrow::array::{ - builder::{ListBuilder, StringBuilder}, ArrayRef, Int64Array, RecordBatch, StringArray, StructArray, + builder::{ListBuilder, StringBuilder}, }; use arrow::datatypes::{DataType, Field}; use arrow::util::pretty::{pretty_format_batches, pretty_format_columns}; use datafusion::prelude::*; use datafusion_common::{DFSchema, ScalarValue}; +use datafusion_expr::ExprFunctionExt; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::expr::NullTreatment; use datafusion_expr::simplify::SimplifyContext; -use datafusion_expr::ExprFunctionExt; use datafusion_functions::core::expr_ext::FieldAccessor; use datafusion_functions_aggregate::first_last::first_value_udaf; use datafusion_functions_aggregate::sum::sum_udaf; diff --git a/datafusion/core/tests/expr_api/parse_sql_expr.rs b/datafusion/core/tests/expr_api/parse_sql_expr.rs index 92c18204324f..b0d8b3a349ae 100644 --- a/datafusion/core/tests/expr_api/parse_sql_expr.rs +++ b/datafusion/core/tests/expr_api/parse_sql_expr.rs @@ -19,9 +19,9 @@ use arrow::datatypes::{DataType, Field, Schema}; use datafusion::prelude::{CsvReadOptions, SessionContext}; use datafusion_common::DFSchema; use datafusion_common::{DFSchemaRef, Result, ToDFSchema}; +use datafusion_expr::Expr; use datafusion_expr::col; use datafusion_expr::lit; -use datafusion_expr::Expr; use datafusion_sql::unparser::Unparser; /// A schema like: /// diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index 46c36c6abdac..470e10b59261 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -24,15 +24,15 @@ use arrow::array::{ArrayRef, Int32Array}; use arrow::datatypes::{DataType, Field, Schema}; use chrono::{DateTime, TimeZone, Utc}; use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*}; -use datafusion_common::cast::as_int32_array; use datafusion_common::ScalarValue; +use datafusion_common::cast::as_int32_array; use datafusion_common::{DFSchemaRef, ToDFSchema}; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::logical_plan::builder::table_scan_with_filters; use datafusion_expr::simplify::SimplifyInfo; use datafusion_expr::{ - table_scan, Cast, ColumnarValue, ExprSchemable, LogicalPlan, LogicalPlanBuilder, - ScalarUDF, Volatility, + Cast, ColumnarValue, ExprSchemable, LogicalPlan, LogicalPlanBuilder, ScalarUDF, + Volatility, table_scan, }; use datafusion_functions::math; use datafusion_optimizer::optimizer::Optimizer; diff --git a/datafusion/core/tests/fifo/mod.rs b/datafusion/core/tests/fifo/mod.rs index 141a3f3b7558..36cc769417db 100644 --- a/datafusion/core/tests/fifo/mod.rs +++ b/datafusion/core/tests/fifo/mod.rs @@ -22,21 +22,21 @@ mod unix_test { use std::fs::File; use std::path::PathBuf; - use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; + use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Duration; use arrow::array::Array; use arrow::csv::ReaderBuilder; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; - use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable}; use datafusion::datasource::TableProvider; + use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable}; use datafusion::{ prelude::{CsvReadOptions, SessionConfig, SessionContext}, test_util::{aggr_test_schema, arrow_test_data}, }; use datafusion_common::instant::Instant; - use datafusion_common::{exec_err, Result}; + use datafusion_common::{Result, exec_err}; use datafusion_expr::SortExpr; use futures::StreamExt; @@ -44,7 +44,7 @@ mod unix_test { use nix::unistd; use tempfile::TempDir; use tokio::io::AsyncWriteExt; - use tokio::task::{spawn_blocking, JoinHandle}; + use tokio::task::{JoinHandle, spawn_blocking}; /// Makes a TableProvider for a fifo file fn fifo_table( diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index 4e04da26f70b..97d1db5728cf 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -24,37 +24,37 @@ use crate::fuzz_cases::aggregation_fuzzer::{ }; use arrow::array::{ - types::Int64Type, Array, ArrayRef, AsArray, Int32Array, Int64Array, RecordBatch, - StringArray, + Array, ArrayRef, AsArray, Int32Array, Int64Array, RecordBatch, StringArray, + types::Int64Type, }; use arrow::compute::concat_batches; use arrow::datatypes::DataType; use arrow::util::pretty::pretty_format_batches; use arrow_schema::{Field, Schema, SchemaRef}; +use datafusion::datasource::MemTable; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::datasource::source::DataSourceExec; -use datafusion::datasource::MemTable; use datafusion::prelude::{DataFrame, SessionConfig, SessionContext}; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; use datafusion_common::{HashMap, Result}; use datafusion_common_runtime::JoinSet; use datafusion_functions_aggregate::sum::sum_udaf; -use datafusion_physical_expr::expressions::{col, lit, Column}; use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr::expressions::{Column, col, lit}; use datafusion_physical_plan::InputOrderMode; -use test_utils::{add_empty_batches, StringBatchGenerator}; +use test_utils::{StringBatchGenerator, add_empty_batches}; +use datafusion_execution::TaskContext; use datafusion_execution::memory_pool::FairSpillPool; use datafusion_execution::runtime_env::RuntimeEnvBuilder; -use datafusion_execution::TaskContext; use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; use datafusion_physical_plan::metrics::MetricValue; -use datafusion_physical_plan::{collect, displayable, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, collect, displayable}; use rand::rngs::StdRng; -use rand::{random, rng, Rng, SeedableRng}; +use rand::{Rng, SeedableRng, random, rng}; // ======================================================================== // The new aggregation fuzz tests based on [`AggregationFuzzer`] @@ -326,15 +326,14 @@ async fn run_aggregate_test(input1: Vec, group_by_columns: Vec<&str .unwrap(), ); - let aggregate_expr = - vec![ - AggregateExprBuilder::new(sum_udaf(), vec![col("d", &schema).unwrap()]) - .schema(Arc::clone(&schema)) - .alias("sum1") - .build() - .map(Arc::new) - .unwrap(), - ]; + let aggregate_expr = vec![ + AggregateExprBuilder::new(sum_udaf(), vec![col("d", &schema).unwrap()]) + .schema(Arc::clone(&schema)) + .alias("sum1") + .build() + .map(Arc::new) + .unwrap(), + ]; let expr = group_by_columns .iter() .map(|elem| (col(elem, &schema).unwrap(), (*elem).to_string())) @@ -650,7 +649,9 @@ pub(crate) fn assert_spill_count_metric( if expect_spill && spill_count == 0 { panic!("Expected spill but SpillCount metric not found or SpillCount was 0."); } else if !expect_spill && spill_count > 0 { - panic!("Expected no spill but found SpillCount metric with value greater than 0."); + panic!( + "Expected no spill but found SpillCount metric with value greater than 0." + ); } spill_count diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs index fa8ea0b31c02..bf71053d6c85 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs @@ -25,7 +25,7 @@ use datafusion_catalog::TableProvider; use datafusion_common::ScalarValue; use datafusion_common::{error::Result, utils::get_available_parallelism}; use datafusion_expr::col; -use rand::{rng, Rng}; +use rand::{Rng, rng}; use crate::fuzz_cases::aggregation_fuzzer::data_generator::Dataset; diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs index aaf2d1b9bad4..e49cffa89b04 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs @@ -18,7 +18,7 @@ use arrow::array::RecordBatch; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_physical_expr::{expressions::col, PhysicalSortExpr}; +use datafusion_physical_expr::{PhysicalSortExpr, expressions::col}; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::sorts::sort::sort_batch; use test_utils::stagger_batch; @@ -209,8 +209,8 @@ mod test { sort_keys_set: vec![vec!["b".to_string()]], }; - let mut gen = DatasetGenerator::new(config); - let datasets = gen.generate().unwrap(); + let mut data_gen = DatasetGenerator::new(config); + let datasets = data_gen.generate().unwrap(); // Should Generate 2 datasets assert_eq!(datasets.len(), 2); diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index 1a8ef278cc29..430762b1c28d 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -19,9 +19,9 @@ use std::sync::Arc; use arrow::array::RecordBatch; use arrow::util::pretty::pretty_format_batches; -use datafusion_common::{internal_datafusion_err, Result}; +use datafusion_common::{Result, internal_datafusion_err}; use datafusion_common_runtime::JoinSet; -use rand::{rng, Rng}; +use rand::{Rng, rng}; use crate::fuzz_cases::aggregation_fuzzer::query_builder::QueryBuilder; use crate::fuzz_cases::aggregation_fuzzer::{ diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs index 766e2bedd74c..0d04e98536f2 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs @@ -17,7 +17,7 @@ use std::{collections::HashSet, str::FromStr}; -use rand::{rng, seq::SliceRandom, Rng}; +use rand::{Rng, rng, seq::SliceRandom}; /// Random aggregate query builder /// diff --git a/datafusion/core/tests/fuzz_cases/distinct_count_string_fuzz.rs b/datafusion/core/tests/fuzz_cases/distinct_count_string_fuzz.rs index 3049631d4b3f..92adda200d1a 100644 --- a/datafusion/core/tests/fuzz_cases/distinct_count_string_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/distinct_count_string_fuzz.rs @@ -19,7 +19,7 @@ use std::sync::Arc; -use arrow::array::{cast::AsArray, Array, OffsetSizeTrait, RecordBatch}; +use arrow::array::{Array, OffsetSizeTrait, RecordBatch, cast::AsArray}; use datafusion::datasource::MemTable; use datafusion_common_runtime::JoinSet; diff --git a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs index 171839b390ff..a57095066ee1 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs @@ -16,19 +16,19 @@ // under the License. use crate::fuzz_cases::equivalence::utils::{ - create_random_schema, create_test_params, create_test_schema_2, + TestScalarUDF, create_random_schema, create_test_params, create_test_schema_2, generate_table_for_eq_properties, generate_table_for_orderings, - is_table_same_after_sort, TestScalarUDF, + is_table_same_after_sort, }; use arrow::compute::SortOptions; -use datafusion_common::config::ConfigOptions; use datafusion_common::Result; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{Operator, ScalarUDF}; +use datafusion_physical_expr::ScalarFunctionExpr; use datafusion_physical_expr::equivalence::{ convert_to_orderings, convert_to_sort_exprs, }; -use datafusion_physical_expr::expressions::{col, BinaryExpr}; -use datafusion_physical_expr::ScalarFunctionExpr; +use datafusion_physical_expr::expressions::{BinaryExpr, col}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use itertools::Itertools; diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs index a72a1558b2e4..2f67e211ce91 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs @@ -16,15 +16,15 @@ // under the License. use crate::fuzz_cases::equivalence::utils::{ - apply_projection, create_random_schema, generate_table_for_eq_properties, - is_table_same_after_sort, TestScalarUDF, + TestScalarUDF, apply_projection, create_random_schema, + generate_table_for_eq_properties, is_table_same_after_sort, }; use arrow::compute::SortOptions; -use datafusion_common::config::ConfigOptions; use datafusion_common::Result; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::equivalence::ProjectionMapping; -use datafusion_physical_expr::expressions::{col, BinaryExpr}; +use datafusion_physical_expr::expressions::{BinaryExpr, col}; use datafusion_physical_expr::{PhysicalExprRef, ScalarFunctionExpr}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; diff --git a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs index 382c4da94321..1490eb08a029 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs @@ -18,13 +18,13 @@ use std::sync::Arc; use crate::fuzz_cases::equivalence::utils::{ - create_random_schema, generate_table_for_eq_properties, is_table_same_after_sort, - TestScalarUDF, + TestScalarUDF, create_random_schema, generate_table_for_eq_properties, + is_table_same_after_sort, }; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; -use datafusion_physical_expr::expressions::{col, BinaryExpr}; +use datafusion_physical_expr::expressions::{BinaryExpr, col}; use datafusion_physical_expr::{LexOrdering, ScalarFunctionExpr}; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; diff --git a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs index be35ddca8f02..580a22672108 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/utils.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/utils.rs @@ -20,21 +20,21 @@ use std::cmp::Ordering; use std::sync::Arc; use arrow::array::{ArrayRef, Float32Array, Float64Array, RecordBatch, UInt32Array}; -use arrow::compute::{lexsort_to_indices, take_record_batch, SortColumn, SortOptions}; +use arrow::compute::{SortColumn, SortOptions, lexsort_to_indices, take_record_batch}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::utils::{compare_rows, get_row_at_idx}; -use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result}; +use datafusion_common::{Result, exec_err, internal_datafusion_err, plan_err}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; use datafusion_physical_expr::equivalence::{ - convert_to_orderings, EquivalenceClass, ProjectionMapping, + EquivalenceClass, ProjectionMapping, convert_to_orderings, }; use datafusion_physical_expr::{ConstExpr, EquivalenceProperties}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; -use datafusion_physical_plan::expressions::{col, Column}; +use datafusion_physical_plan::expressions::{Column, col}; use itertools::izip; use rand::prelude::*; diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs index 81fb3b3b0894..ce422494db10 100644 --- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs @@ -38,8 +38,8 @@ use datafusion::physical_plan::joins::{ }; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_common::{NullEquality, ScalarValue}; -use datafusion_physical_expr::expressions::Literal; use datafusion_physical_expr::PhysicalExprRef; +use datafusion_physical_expr::expressions::Literal; use itertools::Itertools; use rand::Rng; @@ -921,7 +921,9 @@ impl JoinFuzzTestCase { std::fs::remove_dir_all(fuzz_debug).unwrap_or(()); std::fs::create_dir_all(fuzz_debug).unwrap(); let out_dir_name = &format!("{fuzz_debug}/batch_size_{batch_size}"); - println!("Test result data mismatch found. HJ rows {hj_rows}, SMJ rows {smj_rows}, NLJ rows {nlj_rows}"); + println!( + "Test result data mismatch found. HJ rows {hj_rows}, SMJ rows {smj_rows}, NLJ rows {nlj_rows}" + ); println!("The debug is ON. Input data will be saved to {out_dir_name}"); Self::save_partitioned_batches_as_parquet( @@ -972,14 +974,18 @@ impl JoinFuzzTestCase { } if join_tests.contains(&NljHj) { - let err_msg_rowcnt = format!("NestedLoopJoinExec and HashJoinExec produced different row counts, batch_size: {batch_size}"); + let err_msg_rowcnt = format!( + "NestedLoopJoinExec and HashJoinExec produced different row counts, batch_size: {batch_size}" + ); assert_eq!(nlj_rows, hj_rows, "{}", err_msg_rowcnt.as_str()); if nlj_rows == 0 && hj_rows == 0 { // both joins returned no rows, skip content comparison continue; } - let err_msg_contents = format!("NestedLoopJoinExec and HashJoinExec produced different results, batch_size: {batch_size}"); + let err_msg_contents = format!( + "NestedLoopJoinExec and HashJoinExec produced different results, batch_size: {batch_size}" + ); // row level compare if any of joins returns the result // the reason is different formatting when there is no rows for (i, (nlj_line, hj_line)) in nlj_formatted_sorted @@ -997,10 +1003,16 @@ impl JoinFuzzTestCase { } if join_tests.contains(&HjSmj) { - let err_msg_row_cnt = format!("HashJoinExec and SortMergeJoinExec produced different row counts, batch_size: {}", &batch_size); + let err_msg_row_cnt = format!( + "HashJoinExec and SortMergeJoinExec produced different row counts, batch_size: {}", + &batch_size + ); assert_eq!(hj_rows, smj_rows, "{}", err_msg_row_cnt.as_str()); - let err_msg_contents = format!("SortMergeJoinExec and HashJoinExec produced different results, batch_size: {}", &batch_size); + let err_msg_contents = format!( + "SortMergeJoinExec and HashJoinExec produced different results, batch_size: {}", + &batch_size + ); // row level compare if any of joins returns the result // the reason is different formatting when there is no rows if smj_rows > 0 || hj_rows > 0 { diff --git a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs index 4c5ebf040241..1c5741e7a21b 100644 --- a/datafusion/core/tests/fuzz_cases/limit_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/limit_fuzz.rs @@ -24,7 +24,7 @@ use arrow::util::pretty::pretty_format_batches; use datafusion::datasource::MemTable; use datafusion::prelude::SessionContext; use datafusion_common::assert_contains; -use rand::{rng, Rng}; +use rand::{Rng, rng}; use std::sync::Arc; use test_utils::stagger_batch; diff --git a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs index b92dec64e3f1..59430a98cc4b 100644 --- a/datafusion/core/tests/fuzz_cases/merge_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/merge_fuzz.rs @@ -27,7 +27,7 @@ use arrow::{ use datafusion::datasource::memory::MemorySourceConfig; use datafusion::physical_plan::{ collect, - expressions::{col, PhysicalSortExpr}, + expressions::{PhysicalSortExpr, col}, sorts::sort_preserving_merge::SortPreservingMergeExec, }; use datafusion::prelude::{SessionConfig, SessionContext}; diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs index 51ec8f03e5d2..8a84e4c5d181 100644 --- a/datafusion/core/tests/fuzz_cases/pruning.rs +++ b/datafusion/core/tests/fuzz_cases/pruning.rs @@ -29,9 +29,9 @@ use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::source::DataSourceExec; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_physical_expr::PhysicalExpr; -use datafusion_physical_plan::{collect, filter::FilterExec, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, collect, filter::FilterExec}; use itertools::Itertools; -use object_store::{memory::InMemory, path::Path, ObjectStore, PutPayload}; +use object_store::{ObjectStore, PutPayload, memory::InMemory, path::Path}; use parquet::{ arrow::ArrowWriter, file::properties::{EnabledStatistics, WriterProperties}, diff --git a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs index 2e6bd107b3dd..22b145f5095a 100644 --- a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs +++ b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs @@ -19,23 +19,23 @@ use std::sync::Arc; use arrow::array::{ArrayRef, DictionaryArray, PrimitiveArray, RecordBatch}; use arrow::datatypes::{ - ArrowPrimitiveType, BooleanType, DataType, Date32Type, Date64Type, Decimal128Type, - Decimal256Type, Decimal32Type, Decimal64Type, DurationMicrosecondType, + ArrowPrimitiveType, BooleanType, DataType, Date32Type, Date64Type, Decimal32Type, + Decimal64Type, Decimal128Type, Decimal256Type, DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType, Field, - Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, + Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, Schema, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, - TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, - UInt8Type, + TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, + UInt64Type, }; use arrow_schema::{ - DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, - DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, - DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE, + DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION, + DECIMAL64_MAX_SCALE, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, + DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, }; -use datafusion_common::{arrow_datafusion_err, Result}; -use rand::{rng, rngs::StdRng, Rng, SeedableRng}; +use datafusion_common::{Result, arrow_datafusion_err}; +use rand::{Rng, SeedableRng, rng, rngs::StdRng}; use test_utils::array_gen::{ BinaryArrayGenerator, BooleanArrayGenerator, DecimalArrayGenerator, PrimitiveArrayGenerator, StringArrayGenerator, diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs index 28d28a6622a7..0d8a066d432d 100644 --- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use arrow::{ - array::{as_string_array, ArrayRef, Int32Array, StringArray}, + array::{ArrayRef, Int32Array, StringArray, as_string_array}, compute::SortOptions, record_batch::RecordBatch, }; @@ -28,7 +28,7 @@ use datafusion::datasource::memory::MemorySourceConfig; use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::physical_plan::expressions::PhysicalSortExpr; use datafusion::physical_plan::sorts::sort::SortExec; -use datafusion::physical_plan::{collect, ExecutionPlan}; +use datafusion::physical_plan::{ExecutionPlan, collect}; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_common::cast::as_int32_array; use datafusion_execution::memory_pool::GreedyMemoryPool; diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs index 99b20790fc46..c424a314270c 100644 --- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs @@ -20,34 +20,33 @@ mod sp_repartition_fuzz_tests { use std::sync::Arc; use arrow::array::{ArrayRef, Int64Array, RecordBatch, UInt64Array}; - use arrow::compute::{concat_batches, lexsort, SortColumn, SortOptions}; + use arrow::compute::{SortColumn, SortOptions, concat_batches, lexsort}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::datasource::source::DataSourceExec; use datafusion::physical_plan::{ - collect, + ExecutionPlan, Partitioning, collect, metrics::{BaselineMetrics, ExecutionPlanMetricsSet}, repartition::RepartitionExec, sorts::sort_preserving_merge::SortPreservingMergeExec, sorts::streaming_merge::StreamingMergeBuilder, stream::RecordBatchStreamAdapter, - ExecutionPlan, Partitioning, }; use datafusion::prelude::SessionContext; use datafusion_common::Result; use datafusion_execution::{config::SessionConfig, memory_pool::MemoryConsumer}; + use datafusion_physical_expr::ConstExpr; use datafusion_physical_expr::equivalence::{ EquivalenceClass, EquivalenceProperties, }; - use datafusion_physical_expr::expressions::{col, Column}; - use datafusion_physical_expr::ConstExpr; + use datafusion_physical_expr::expressions::{Column, col}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use test_utils::add_empty_batches; use itertools::izip; - use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng}; + use rand::{Rng, SeedableRng, rngs::StdRng, seq::SliceRandom}; // Generate a schema which consists of 6 columns (a, b, c, d, e, f) fn create_test_schema() -> Result { diff --git a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs index 9c7e4f1b8450..376306f3e065 100644 --- a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs @@ -24,7 +24,7 @@ use arrow::array::RecordBatch; use arrow_schema::SchemaRef; use datafusion::datasource::MemTable; use datafusion::prelude::{SessionConfig, SessionContext}; -use datafusion_common::{human_readable_size, instant::Instant, Result}; +use datafusion_common::{Result, human_readable_size, instant::Instant}; use datafusion_execution::disk_manager::DiskManagerBuilder; use datafusion_execution::memory_pool::{MemoryPool, UnboundedMemoryPool}; use datafusion_expr::display_schema; @@ -32,14 +32,14 @@ use datafusion_physical_plan::spill::get_record_batch_memory_size; use std::time::Duration; use datafusion_execution::{memory_pool::FairSpillPool, runtime_env::RuntimeEnvBuilder}; -use rand::prelude::IndexedRandom; use rand::Rng; -use rand::{rngs::StdRng, SeedableRng}; +use rand::prelude::IndexedRandom; +use rand::{SeedableRng, rngs::StdRng}; use crate::fuzz_cases::aggregation_fuzzer::check_equality_of_batches; use super::aggregation_fuzzer::ColumnDescr; -use super::record_batch_generator::{get_supported_types_columns, RecordBatchGenerator}; +use super::record_batch_generator::{RecordBatchGenerator, get_supported_types_columns}; /// Entry point for executing the sort query fuzzer. /// @@ -175,16 +175,16 @@ impl SortQueryFuzzer { n_round: usize, n_query: usize, ) -> bool { - if let Some(time_limit) = self.time_limit { - if Instant::now().duration_since(start_time) > time_limit { - println!( - "[SortQueryFuzzer] Time limit reached: {} queries ({} random configs each) in {} rounds", - n_round * self.queries_per_round + n_query, - self.config_variations_per_query, - n_round - ); - return true; - } + if let Some(time_limit) = self.time_limit + && Instant::now().duration_since(start_time) > time_limit + { + println!( + "[SortQueryFuzzer] Time limit reached: {} queries ({} random configs each) in {} rounds", + n_round * self.queries_per_round + n_query, + self.config_variations_per_query, + n_round + ); + return true; } false } diff --git a/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs b/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs index 6ea0182c25f2..16481516e0be 100644 --- a/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs +++ b/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs @@ -27,9 +27,9 @@ use arrow::{array::StringArray, compute::SortOptions, record_batch::RecordBatch} use arrow_schema::{DataType, Field, Schema}; use datafusion::common::Result; use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::expressions::PhysicalSortExpr; use datafusion::physical_plan::sorts::sort::SortExec; -use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionConfig; use datafusion_common::units::{KB, MB}; use datafusion_execution::memory_pool::{ @@ -38,7 +38,7 @@ use datafusion_execution::memory_pool::{ use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_functions_aggregate::array_agg::array_agg_udaf; use datafusion_physical_expr::aggregate::AggregateExprBuilder; -use datafusion_physical_expr::expressions::{col, Column}; +use datafusion_physical_expr::expressions::{Column, col}; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, @@ -80,9 +80,9 @@ async fn test_sort_with_limited_memory() -> Result<()> { let total_spill_files_size = spill_count * record_batch_size; assert!( - total_spill_files_size > pool_size, - "Total spill files size {total_spill_files_size} should be greater than pool size {pool_size}", - ); + total_spill_files_size > pool_size, + "Total spill files size {total_spill_files_size} should be greater than pool size {pool_size}", + ); Ok(()) } @@ -126,8 +126,8 @@ async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch() -> } #[tokio::test] -async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_changing_memory_reservation( -) -> Result<()> { +async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_changing_memory_reservation() +-> Result<()> { let record_batch_size = 8192; let pool_size = 2 * MB as usize; let task_ctx = { @@ -164,8 +164,8 @@ async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_c } #[tokio::test] -async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_take_all_memory( -) -> Result<()> { +async fn test_sort_with_limited_memory_and_different_sizes_of_record_batch_and_take_all_memory() +-> Result<()> { let record_batch_size = 8192; let pool_size = 2 * MB as usize; let task_ctx = { @@ -356,16 +356,16 @@ async fn test_aggregate_with_high_cardinality_with_limited_memory() -> Result<() let total_spill_files_size = spill_count * record_batch_size; assert!( - total_spill_files_size > pool_size, - "Total spill files size {total_spill_files_size} should be greater than pool size {pool_size}", - ); + total_spill_files_size > pool_size, + "Total spill files size {total_spill_files_size} should be greater than pool size {pool_size}", + ); Ok(()) } #[tokio::test] -async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch( -) -> Result<()> { +async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch() +-> Result<()> { let record_batch_size = 8192; let pool_size = 2 * MB as usize; let task_ctx = { @@ -398,8 +398,8 @@ async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_ } #[tokio::test] -async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch_and_changing_memory_reservation( -) -> Result<()> { +async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch_and_changing_memory_reservation() +-> Result<()> { let record_batch_size = 8192; let pool_size = 2 * MB as usize; let task_ctx = { @@ -432,8 +432,8 @@ async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_ } #[tokio::test] -async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch_and_take_all_memory( -) -> Result<()> { +async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_sizes_of_record_batch_and_take_all_memory() +-> Result<()> { let record_batch_size = 8192; let pool_size = 2 * MB as usize; let task_ctx = { @@ -466,8 +466,8 @@ async fn test_aggregate_with_high_cardinality_with_limited_memory_and_different_ } #[tokio::test] -async fn test_aggregate_with_high_cardinality_with_limited_memory_and_large_record_batch( -) -> Result<()> { +async fn test_aggregate_with_high_cardinality_with_limited_memory_and_large_record_batch() +-> Result<()> { let record_batch_size = 8192; let pool_size = 2 * MB as usize; let task_ctx = { diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs index 65a41d39d3c5..2ecfcd84aba9 100644 --- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs @@ -18,19 +18,19 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Int32Array, StringArray}; -use arrow::compute::{concat_batches, SortOptions}; +use arrow::compute::{SortOptions, concat_batches}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; use arrow::util::pretty::pretty_format_batches; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::datasource::source::DataSourceExec; use datafusion::functions_window::row_number::row_number_udwf; +use datafusion::physical_plan::InputOrderMode::{Linear, PartiallySorted, Sorted}; use datafusion::physical_plan::sorts::sort::SortExec; use datafusion::physical_plan::windows::{ - create_window_expr, schema_add_window_field, BoundedWindowAggExec, WindowAggExec, + BoundedWindowAggExec, WindowAggExec, create_window_expr, schema_add_window_field, }; -use datafusion::physical_plan::InputOrderMode::{Linear, PartiallySorted, Sorted}; -use datafusion::physical_plan::{collect, InputOrderMode}; +use datafusion::physical_plan::{InputOrderMode, collect}; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_common::HashMap; use datafusion_common::{Result, ScalarValue}; @@ -445,14 +445,14 @@ fn get_random_function( let fn_name = window_fn_map.keys().collect::>()[rand_fn_idx]; let (window_fn, args) = window_fn_map.values().collect::>()[rand_fn_idx]; let mut args = args.clone(); - if let WindowFunctionDefinition::AggregateUDF(udf) = window_fn { - if !args.is_empty() { - // Do type coercion first argument - let a = args[0].clone(); - let dt = a.return_field(schema.as_ref()).unwrap(); - let coerced = fields_with_aggregate_udf(&[dt], udf).unwrap(); - args[0] = cast(a, schema, coerced[0].data_type().clone()).unwrap(); - } + if let WindowFunctionDefinition::AggregateUDF(udf) = window_fn + && !args.is_empty() + { + // Do type coercion first argument + let a = args[0].clone(); + let dt = a.return_field(schema.as_ref()).unwrap(); + let coerced = fields_with_aggregate_udf(&[dt], udf).unwrap(); + args[0] = cast(a, schema, coerced[0].data_type().clone()).unwrap(); } (window_fn.clone(), args, (*fn_name).to_string()) @@ -569,10 +569,11 @@ fn convert_bound_to_current_row_if_applicable( ) { match bound { WindowFrameBound::Preceding(value) | WindowFrameBound::Following(value) => { - if let Ok(zero) = ScalarValue::new_zero(&value.data_type()) { - if value == &zero && rng.random_range(0..2) == 0 { - *bound = WindowFrameBound::CurrentRow; - } + if let Ok(zero) = ScalarValue::new_zero(&value.data_type()) + && value == &zero + && rng.random_range(0..2) == 0 + { + *bound = WindowFrameBound::CurrentRow; } } _ => {} @@ -644,10 +645,8 @@ async fn run_window_test( ) as _; // Table is ordered according to ORDER BY a, b, c In linear test we use PARTITION BY b, ORDER BY a // For WindowAggExec to produce correct result it need table to be ordered by b,a. Hence add a sort. - if is_linear { - if let Some(ordering) = LexOrdering::new(sort_keys) { - exec1 = Arc::new(SortExec::new(ordering, exec1)) as _; - } + if is_linear && let Some(ordering) = LexOrdering::new(sort_keys) { + exec1 = Arc::new(SortExec::new(ordering, exec1)) as _; } let extended_schema = schema_add_window_field(&args, &schema, &window_fn, &fn_name)?; @@ -699,7 +698,9 @@ async fn run_window_test( // BoundedWindowAggExec should produce more chunk than the usual WindowAggExec. // Otherwise it means that we cannot generate result in running mode. - let err_msg = format!("Inconsistent result for window_frame: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, random_seed: {random_seed:?}, search_mode: {search_mode:?}, partition_by_columns:{partition_by_columns:?}, orderby_columns: {orderby_columns:?}"); + let err_msg = format!( + "Inconsistent result for window_frame: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, random_seed: {random_seed:?}, search_mode: {search_mode:?}, partition_by_columns:{partition_by_columns:?}, orderby_columns: {orderby_columns:?}" + ); // Below check makes sure that, streaming execution generates more chunks than the bulk execution. // Since algorithms and operators works on sliding windows in the streaming execution. // However, in the current test setup for some random generated window frame clauses: It is not guaranteed @@ -731,8 +732,12 @@ async fn run_window_test( .enumerate() { if !usual_line.eq(running_line) { - println!("Inconsistent result for window_frame at line:{i:?}: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, pb_cols:{partition_by_columns:?}, ob_cols:{orderby_columns:?}, search_mode:{search_mode:?}"); - println!("--------usual_formatted_sorted----------------running_formatted_sorted--------"); + println!( + "Inconsistent result for window_frame at line:{i:?}: {window_frame:?}, window_fn: {window_fn:?}, args:{args:?}, pb_cols:{partition_by_columns:?}, ob_cols:{orderby_columns:?}, search_mode:{search_mode:?}" + ); + println!( + "--------usual_formatted_sorted----------------running_formatted_sorted--------" + ); for (line1, line2) in usual_formatted_sorted.iter().zip(running_formatted_sorted) { diff --git a/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs b/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs index 899c9f88d8a2..2c9fae20c860 100644 --- a/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs +++ b/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs @@ -16,10 +16,10 @@ // under the License. use datafusion_common_runtime::SpawnedTask; -use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, System}; -use tokio::time::{interval, Duration}; +use tokio::time::{Duration, interval}; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_common::human_readable_size; @@ -38,7 +38,7 @@ use datafusion_execution::{memory_pool::FairSpillPool, runtime_env::RuntimeEnvBu async fn measure_max_rss(f: F) -> (T, usize) where F: FnOnce() -> Fut, - Fut: std::future::Future, + Fut: Future, { // Initialize system information let mut system = System::new_all(); diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs index 5d8a1d24181c..c28d23ba0602 100644 --- a/datafusion/core/tests/memory_limit/mod.rs +++ b/datafusion/core/tests/memory_limit/mod.rs @@ -39,19 +39,19 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::streaming::PartitionStream; use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; use datafusion::prelude::{SessionConfig, SessionContext}; -use datafusion_catalog::streaming::StreamingTable; use datafusion_catalog::Session; -use datafusion_common::{assert_contains, Result}; +use datafusion_catalog::streaming::StreamingTable; +use datafusion_common::{Result, assert_contains}; +use datafusion_execution::TaskContext; use datafusion_execution::disk_manager::{DiskManagerBuilder, DiskManagerMode}; use datafusion_execution::memory_pool::{ FairSpillPool, GreedyMemoryPool, MemoryPool, TrackConsumersPool, }; use datafusion_execution::runtime_env::RuntimeEnv; -use datafusion_execution::TaskContext; use datafusion_expr::{Expr, TableType}; use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; -use datafusion_physical_optimizer::join_selection::JoinSelection; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::join_selection::JoinSelection; use datafusion_physical_plan::collect as collect_batches; use datafusion_physical_plan::common::collect; use datafusion_physical_plan::spill::get_record_batch_memory_size; @@ -604,8 +604,8 @@ async fn test_disk_spill_limit_reached() -> Result<()> { let err = df.collect().await.unwrap_err(); assert_contains!( - err.to_string(), - "The used disk space during the spilling process has exceeded the allowable limit" + err.to_string(), + "The used disk space during the spilling process has exceeded the allowable limit" ); Ok(()) @@ -977,11 +977,13 @@ impl Scenario { descending: false, nulls_first: false, }; - let sort_information = vec![[ - PhysicalSortExpr::new(col("a", &schema).unwrap(), options), - PhysicalSortExpr::new(col("b", &schema).unwrap(), options), - ] - .into()]; + let sort_information = vec![ + [ + PhysicalSortExpr::new(col("a", &schema).unwrap(), options), + PhysicalSortExpr::new(col("b", &schema).unwrap(), options), + ] + .into(), + ]; let table = SortedTableProvider::new(batches, sort_information); Arc::new(table) @@ -1057,7 +1059,7 @@ fn make_dict_batches() -> Vec { let batch_size = 50; let mut i = 0; - let gen = std::iter::from_fn(move || { + let batch_gen = std::iter::from_fn(move || { // create values like // 0000000001 // 0000000002 @@ -1080,7 +1082,7 @@ fn make_dict_batches() -> Vec { let num_batches = 5; - let batches: Vec<_> = gen.take(num_batches).collect(); + let batches: Vec<_> = batch_gen.take(num_batches).collect(); batches.iter().enumerate().for_each(|(i, batch)| { println!("Dict batch[{i}] size is: {}", batch.get_array_memory_size()); diff --git a/datafusion/core/tests/memory_limit/repartition_mem_limit.rs b/datafusion/core/tests/memory_limit/repartition_mem_limit.rs index a7af2f01d1cc..b21bffebaf95 100644 --- a/datafusion/core/tests/memory_limit/repartition_mem_limit.rs +++ b/datafusion/core/tests/memory_limit/repartition_mem_limit.rs @@ -25,7 +25,7 @@ use datafusion::{ use datafusion_catalog::MemTable; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_execution::runtime_env::RuntimeEnvBuilder; -use datafusion_physical_plan::{repartition::RepartitionExec, ExecutionPlanProperties}; +use datafusion_physical_plan::{ExecutionPlanProperties, repartition::RepartitionExec}; use futures::TryStreamExt; use itertools::Itertools; @@ -45,11 +45,14 @@ async fn test_repartition_memory_limit() { .with_batch_size(32) .with_target_partitions(2); let ctx = SessionContext::new_with_config_rt(config, Arc::new(runtime)); - let batches = vec![RecordBatch::try_from_iter(vec![( - "c1", - Arc::new(Int32Array::from_iter_values((0..10).cycle().take(100_000))) as ArrayRef, - )]) - .unwrap()]; + let batches = vec![ + RecordBatch::try_from_iter(vec![( + "c1", + Arc::new(Int32Array::from_iter_values((0..10).cycle().take(100_000))) + as ArrayRef, + )]) + .unwrap(), + ]; let table = Arc::new(MemTable::try_new(batches[0].schema(), vec![batches]).unwrap()); ctx.register_table("t", table).unwrap(); let plan = ctx diff --git a/datafusion/core/tests/optimizer/mod.rs b/datafusion/core/tests/optimizer/mod.rs index b288706a54c9..6466e9ad96d1 100644 --- a/datafusion/core/tests/optimizer/mod.rs +++ b/datafusion/core/tests/optimizer/mod.rs @@ -28,11 +28,11 @@ use arrow::datatypes::{ }; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::TransformedResult; -use datafusion_common::{plan_err, DFSchema, Result, ScalarValue, TableReference}; +use datafusion_common::{DFSchema, Result, ScalarValue, TableReference, plan_err}; use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; use datafusion_expr::{ - col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator, - ScalarUDF, TableSource, WindowUDF, + AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator, ScalarUDF, + TableSource, WindowUDF, col, lit, }; use datafusion_functions::core::expr_ext::FieldAccessor; use datafusion_optimizer::analyzer::Analyzer; diff --git a/datafusion/core/tests/parquet/custom_reader.rs b/datafusion/core/tests/parquet/custom_reader.rs index 0a147d15a6fd..31ec6efd1951 100644 --- a/datafusion/core/tests/parquet/custom_reader.rs +++ b/datafusion/core/tests/parquet/custom_reader.rs @@ -20,7 +20,7 @@ use std::ops::Range; use std::sync::Arc; use std::time::SystemTime; -use arrow::array::{ArrayRef, Int64Array, Int8Array, StringArray}; +use arrow::array::{ArrayRef, Int8Array, Int64Array, StringArray}; use arrow::datatypes::{Field, Schema, SchemaBuilder}; use arrow::record_batch::RecordBatch; use datafusion::datasource::listing::PartitionedFile; @@ -31,8 +31,8 @@ use datafusion::datasource::physical_plan::{ use datafusion::physical_plan::collect; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion::prelude::SessionContext; -use datafusion_common::test_util::batches_to_sort_string; use datafusion_common::Result; +use datafusion_common::test_util::batches_to_sort_string; use bytes::Bytes; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; @@ -44,9 +44,9 @@ use insta::assert_snapshot; use object_store::memory::InMemory; use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; +use parquet::arrow::ArrowWriter; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::async_reader::AsyncFileReader; -use parquet::arrow::ArrowWriter; use parquet::errors::ParquetError; use parquet::file::metadata::ParquetMetaData; diff --git a/datafusion/core/tests/parquet/encryption.rs b/datafusion/core/tests/parquet/encryption.rs index 82d8e61d9a2e..8b3170e36745 100644 --- a/datafusion/core/tests/parquet/encryption.rs +++ b/datafusion/core/tests/parquet/encryption.rs @@ -25,11 +25,11 @@ use datafusion::dataframe::DataFrameWriteOptions; use datafusion::datasource::listing::ListingOptions; use datafusion::prelude::{ParquetReadOptions, SessionContext}; use datafusion_common::config::{EncryptionFactoryOptions, TableParquetOptions}; -use datafusion_common::{assert_batches_sorted_eq, exec_datafusion_err, DataFusionError}; +use datafusion_common::{DataFusionError, assert_batches_sorted_eq, exec_datafusion_err}; use datafusion_datasource_parquet::ParquetFormat; use datafusion_execution::parquet_encryption::EncryptionFactory; -use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; use parquet::arrow::ArrowWriter; +use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; use parquet::encryption::decrypt::FileDecryptionProperties; use parquet::encryption::encrypt::FileEncryptionProperties; use parquet::file::column_crypto_metadata::ColumnCryptoMetaData; diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index 9022b253564a..0c02c8fe523d 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -21,7 +21,7 @@ use std::path::Path; use std::sync::Arc; use crate::parquet::utils::MetricsFinder; -use crate::parquet::{create_data_batch, Scenario}; +use crate::parquet::{Scenario, create_data_batch}; use arrow::datatypes::SchemaRef; use arrow::util::pretty::pretty_format_batches; @@ -29,17 +29,17 @@ use datafusion::common::Result; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::physical_plan::ParquetSource; use datafusion::prelude::SessionContext; -use datafusion_common::{assert_contains, DFSchema}; +use datafusion_common::{DFSchema, assert_contains}; use datafusion_datasource_parquet::{ParquetAccessPlan, RowGroupAccess}; use datafusion_execution::object_store::ObjectStoreUrl; -use datafusion_expr::{col, lit, Expr}; -use datafusion_physical_plan::metrics::{MetricValue, MetricsSet}; +use datafusion_expr::{Expr, col, lit}; use datafusion_physical_plan::ExecutionPlan; +use datafusion_physical_plan::metrics::{MetricValue, MetricsSet}; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::source::DataSourceExec; -use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::arrow::ArrowWriter; +use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::file::properties::WriterProperties; use tempfile::NamedTempFile; diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 942c1e29d43a..fdefdafa00aa 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -18,30 +18,30 @@ use std::fs; use std::sync::Arc; +use datafusion::datasource::TableProvider; use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; use datafusion::datasource::source::DataSourceExec; -use datafusion::datasource::TableProvider; use datafusion::execution::context::SessionState; use datafusion::execution::session_state::SessionStateBuilder; use datafusion::prelude::SessionContext; -use datafusion_common::stats::Precision; use datafusion_common::DFSchema; +use datafusion_common::stats::Precision; +use datafusion_execution::cache::DefaultListFilesCache; use datafusion_execution::cache::cache_manager::CacheManagerConfig; use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; -use datafusion_execution::cache::DefaultListFilesCache; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; -use datafusion_expr::{col, lit, Expr}; +use datafusion_expr::{Expr, col, lit}; use datafusion::datasource::physical_plan::FileScanConfig; use datafusion_common::config::ConfigOptions; -use datafusion_physical_optimizer::filter_pushdown::FilterPushdown; use datafusion_physical_optimizer::PhysicalOptimizerRule; -use datafusion_physical_plan::filter::FilterExec; +use datafusion_physical_optimizer::filter_pushdown::FilterPushdown; use datafusion_physical_plan::ExecutionPlan; +use datafusion_physical_plan::filter::FilterExec; use tempfile::tempdir; #[tokio::test] diff --git a/datafusion/core/tests/parquet/filter_pushdown.rs b/datafusion/core/tests/parquet/filter_pushdown.rs index 1d64669fadd9..e3a191ee9ade 100644 --- a/datafusion/core/tests/parquet/filter_pushdown.rs +++ b/datafusion/core/tests/parquet/filter_pushdown.rs @@ -31,7 +31,7 @@ use arrow::record_batch::RecordBatch; use datafusion::physical_plan::collect; use datafusion::physical_plan::metrics::{MetricValue, MetricsSet}; use datafusion::prelude::{ - col, lit, lit_timestamp_nano, Expr, ParquetReadOptions, SessionContext, + Expr, ParquetReadOptions, SessionContext, col, lit, lit_timestamp_nano, }; use datafusion::test_util::parquet::{ParquetScanOptions, TestParquetFile}; use datafusion_expr::utils::{conjunction, disjunction, split_conjunction}; @@ -645,8 +645,8 @@ async fn predicate_cache_pushdown_default() -> datafusion_common::Result<()> { } #[tokio::test] -async fn predicate_cache_pushdown_default_selections_only( -) -> datafusion_common::Result<()> { +async fn predicate_cache_pushdown_default_selections_only() +-> datafusion_common::Result<()> { let mut config = SessionConfig::new(); config.options_mut().execution.parquet.pushdown_filters = true; // forcing filter selections minimizes the number of rows read from the cache diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index aa2a687d2980..0a0478dd03c2 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -19,12 +19,12 @@ use crate::parquet::utils::MetricsFinder; use arrow::{ array::{ - make_array, Array, ArrayRef, BinaryArray, Date32Array, Date64Array, - Decimal128Array, DictionaryArray, FixedSizeBinaryArray, Float64Array, Int16Array, - Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, - StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, - UInt64Array, UInt8Array, + Array, ArrayRef, BinaryArray, Date32Array, Date64Array, Decimal128Array, + DictionaryArray, FixedSizeBinaryArray, Float64Array, Int8Array, Int16Array, + Int32Array, Int64Array, LargeBinaryArray, LargeStringArray, StringArray, + TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, + TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, + make_array, }, datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, @@ -32,7 +32,7 @@ use arrow::{ }; use chrono::{Datelike, Duration, TimeDelta}; use datafusion::{ - datasource::{provider_as_source, TableProvider}, + datasource::{TableProvider, provider_as_source}, physical_plan::metrics::MetricsSet, prelude::{ParquetReadOptions, SessionConfig, SessionContext}, }; @@ -147,15 +147,14 @@ impl TestOutput { for metric in self.parquet_metrics.iter() { let metric = metric.as_ref(); - if metric.value().name() == metric_name { - if let MetricValue::PruningMetrics { + if metric.value().name() == metric_name + && let MetricValue::PruningMetrics { pruning_metrics, .. } = metric.value() - { - total_pruned += pruning_metrics.pruned(); - total_matched += pruning_metrics.matched(); - found = true; - } + { + total_pruned += pruning_metrics.pruned(); + total_matched += pruning_metrics.matched(); + found = true; } } diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index fb2a196b0aa6..17392974b63a 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -21,25 +21,25 @@ use crate::parquet::Unit::Page; use crate::parquet::{ContextWithParquet, Scenario}; use arrow::array::RecordBatch; -use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::datasource::file_format::FileFormat; +use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::physical_plan::ParquetSource; use datafusion::datasource::source::DataSourceExec; use datafusion::execution::context::SessionState; -use datafusion::physical_plan::metrics::MetricValue; use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_plan::metrics::MetricValue; use datafusion::prelude::SessionContext; use datafusion_common::{ScalarValue, ToDFSchema}; use datafusion_expr::execution_props::ExecutionProps; -use datafusion_expr::{col, lit, Expr}; +use datafusion_expr::{Expr, col, lit}; use datafusion_physical_expr::create_physical_expr; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use futures::StreamExt; -use object_store::path::Path; use object_store::ObjectMeta; +use object_store::path::Path; async fn get_parquet_exec( state: &SessionState, diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs index 3c12862f38a0..7e2240cf6b79 100644 --- a/datafusion/core/tests/parquet/schema_adapter.rs +++ b/datafusion/core/tests/parquet/schema_adapter.rs @@ -17,7 +17,7 @@ use std::sync::Arc; -use arrow::array::{record_batch, RecordBatch}; +use arrow::array::{RecordBatch, record_batch}; use arrow_schema::{DataType, Field, Schema, SchemaRef}; use bytes::{BufMut, BytesMut}; use datafusion::assert_batches_eq; @@ -26,18 +26,18 @@ use datafusion::datasource::listing::{ ListingTable, ListingTableConfig, ListingTableConfigExt, }; use datafusion::prelude::{SessionConfig, SessionContext}; -use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::DataFusionError; use datafusion_common::ScalarValue; +use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_datasource::ListingTableUrl; use datafusion_execution::object_store::ObjectStoreUrl; -use datafusion_physical_expr::expressions::{self, Column}; use datafusion_physical_expr::PhysicalExpr; +use datafusion_physical_expr::expressions::{self, Column}; use datafusion_physical_expr_adapter::{ DefaultPhysicalExprAdapter, DefaultPhysicalExprAdapterFactory, PhysicalExprAdapter, PhysicalExprAdapterFactory, }; -use object_store::{memory::InMemory, path::Path, ObjectStore}; +use object_store::{ObjectStore, memory::InMemory, path::Path}; use parquet::arrow::ArrowWriter; async fn write_parquet(batch: RecordBatch, store: Arc, path: &str) { diff --git a/datafusion/core/tests/parquet/schema_coercion.rs b/datafusion/core/tests/parquet/schema_coercion.rs index e8666f07595e..6f7e2e328d0c 100644 --- a/datafusion/core/tests/parquet/schema_coercion.rs +++ b/datafusion/core/tests/parquet/schema_coercion.rs @@ -18,16 +18,16 @@ use std::sync::Arc; use arrow::array::{ - types::Int32Type, ArrayRef, DictionaryArray, Float32Array, Int64Array, RecordBatch, - StringArray, + ArrayRef, DictionaryArray, Float32Array, Int64Array, RecordBatch, StringArray, + types::Int32Type, }; use arrow::datatypes::{DataType, Field, Schema}; use datafusion::datasource::physical_plan::ParquetSource; use datafusion::physical_plan::collect; use datafusion::prelude::SessionContext; use datafusion::test::object_store::local_unpartitioned_file; -use datafusion_common::test_util::batches_to_sort_string; use datafusion_common::Result; +use datafusion_common::test_util::batches_to_sort_string; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; diff --git a/datafusion/core/tests/parquet/utils.rs b/datafusion/core/tests/parquet/utils.rs index 24b6cadc148f..e5e0026ec1f1 100644 --- a/datafusion/core/tests/parquet/utils.rs +++ b/datafusion/core/tests/parquet/utils.rs @@ -20,7 +20,7 @@ use datafusion::datasource::physical_plan::ParquetSource; use datafusion::datasource::source::DataSourceExec; use datafusion_physical_plan::metrics::MetricsSet; -use datafusion_physical_plan::{accept, ExecutionPlan, ExecutionPlanVisitor}; +use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanVisitor, accept}; /// Find the metrics from the first DataSourceExec encountered in the plan #[derive(Debug)] @@ -47,13 +47,12 @@ impl MetricsFinder { impl ExecutionPlanVisitor for MetricsFinder { type Error = std::convert::Infallible; fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result { - if let Some(data_source_exec) = plan.as_any().downcast_ref::() { - if data_source_exec + if let Some(data_source_exec) = plan.as_any().downcast_ref::() + && data_source_exec .downcast_to_file_source::() .is_some() - { - self.metrics = data_source_exec.metrics(); - } + { + self.metrics = data_source_exec.metrics(); } // stop searching once we have found the metrics Ok(self.metrics.is_none()) diff --git a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs index a79d743cb253..1fdc0ae6c7f6 100644 --- a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs @@ -24,14 +24,15 @@ use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::datasource::source::DataSourceExec; +use datafusion_common::Result; use datafusion_common::cast::as_int64_array; use datafusion_common::config::ConfigOptions; -use datafusion_common::Result; use datafusion_execution::TaskContext; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{self, cast}; -use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics; +use datafusion_physical_plan::ExecutionPlan; use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::aggregates::AggregateMode; use datafusion_physical_plan::aggregates::PhysicalGroupBy; @@ -39,7 +40,6 @@ use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion_physical_plan::common; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::projection::ProjectionExec; -use datafusion_physical_plan::ExecutionPlan; /// Mock data using a MemorySourceConfig which has an exact count statistic fn mock_data() -> Result> { diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs index 9c76f6ab6f58..e73f5318079e 100644 --- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs +++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs @@ -29,18 +29,18 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::config::ConfigOptions; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::sum::sum_udaf; +use datafusion_physical_expr::Partitioning; use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; use datafusion_physical_expr::expressions::{col, lit}; -use datafusion_physical_expr::Partitioning; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; +use datafusion_physical_plan::ExecutionPlan; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; use datafusion_physical_plan::displayable; use datafusion_physical_plan::repartition::RepartitionExec; -use datafusion_physical_plan::ExecutionPlan; /// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected macro_rules! assert_optimized { diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs index a3d9a1e407c7..04dffced31b0 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs @@ -26,33 +26,33 @@ use crate::physical_optimizer::test_utils::{ sort_preserving_merge_exec, union_exec, }; -use arrow::array::{RecordBatch, UInt64Array, UInt8Array}; +use arrow::array::{RecordBatch, UInt8Array, UInt64Array}; use arrow::compute::SortOptions; use arrow_schema::{DataType, Field, Schema, SchemaRef}; use datafusion::config::ConfigOptions; +use datafusion::datasource::MemTable; use datafusion::datasource::file_format::file_compression_type::FileCompressionType; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::physical_plan::{CsvSource, ParquetSource}; use datafusion::datasource::source::DataSourceExec; -use datafusion::datasource::MemTable; use datafusion::prelude::{SessionConfig, SessionContext}; +use datafusion_common::ScalarValue; use datafusion_common::config::CsvOptions; use datafusion_common::error::Result; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::ScalarValue; use datafusion_datasource::file_groups::FileGroup; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_expr::{JoinType, Operator}; -use datafusion_physical_expr::expressions::{binary, lit, BinaryExpr, Column, Literal}; +use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal, binary, lit}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{ LexOrdering, OrderingRequirements, PhysicalSortExpr, }; +use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_optimizer::enforce_distribution::*; use datafusion_physical_optimizer::enforce_sorting::EnforceSorting; use datafusion_physical_optimizer::output_requirements::OutputRequirements; -use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; @@ -67,8 +67,8 @@ use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr}; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion_physical_plan::union::UnionExec; use datafusion_physical_plan::{ - displayable, DisplayAs, DisplayFormatType, ExecutionPlanProperties, PlanProperties, - Statistics, + DisplayAs, DisplayFormatType, ExecutionPlanProperties, PlanProperties, Statistics, + displayable, }; use insta::Settings; diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 3f7fa62d6815..ca99b17be0df 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -19,14 +19,15 @@ use std::sync::Arc; use crate::memory_limit::DummyStreamPartition; use crate::physical_optimizer::test_utils::{ - aggregate_exec, bounded_window_exec, bounded_window_exec_with_partition, - check_integrity, coalesce_batches_exec, coalesce_partitions_exec, create_test_schema, - create_test_schema2, create_test_schema3, filter_exec, global_limit_exec, - hash_join_exec, local_limit_exec, memory_exec, parquet_exec, parquet_exec_with_sort, - projection_exec, repartition_exec, sort_exec, sort_exec_with_fetch, sort_expr, - sort_expr_options, sort_merge_join_exec, sort_preserving_merge_exec, + RequirementsTestExec, aggregate_exec, bounded_window_exec, + bounded_window_exec_with_partition, check_integrity, coalesce_batches_exec, + coalesce_partitions_exec, create_test_schema, create_test_schema2, + create_test_schema3, filter_exec, global_limit_exec, hash_join_exec, + local_limit_exec, memory_exec, parquet_exec, parquet_exec_with_sort, projection_exec, + repartition_exec, sort_exec, sort_exec_with_fetch, sort_expr, sort_expr_options, + sort_merge_join_exec, sort_preserving_merge_exec, sort_preserving_merge_exec_with_fetch, spr_repartition_exec, stream_exec_ordered, - union_exec, RequirementsTestExec, + union_exec, }; use arrow::compute::SortOptions; @@ -65,7 +66,7 @@ use datafusion_execution::TaskContext; use datafusion_catalog::streaming::StreamingTable; use futures::StreamExt; -use insta::{assert_snapshot, Settings}; +use insta::{Settings, assert_snapshot}; /// Create a sorted Csv exec fn csv_exec_sorted( @@ -366,8 +367,8 @@ async fn test_union_inputs_different_sorted2() -> Result<()> { #[tokio::test] // Test with `repartition_sorts` enabled to preserve pre-sorted partitions and avoid resorting -async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true( -) -> Result<()> { +async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true() +-> Result<()> { assert_snapshot!( union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?, @r" @@ -392,8 +393,8 @@ async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_reparti #[tokio::test] // Test with `repartition_sorts` disabled, causing a full resort of the data -async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false( -) -> Result<()> { +async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false() +-> Result<()> { assert_snapshot!( union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?, @r" @@ -770,8 +771,8 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> { } #[tokio::test] -async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( -) -> Result<()> { +async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns() +-> Result<()> { let schema = create_test_schema()?; let source = parquet_exec(schema.clone()); let ordering = [sort_expr_options( @@ -1069,8 +1070,8 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> { } #[tokio::test] -async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_requirement( -) -> Result<()> { +async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_requirement() +-> Result<()> { let schema = create_test_schema()?; let source = parquet_exec(schema.clone()); let ordering = [sort_expr_options( diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs index ef233e222912..de7611ff211a 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs @@ -31,7 +31,7 @@ use datafusion_physical_expr::expressions::col; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::windows::{ - create_window_expr, BoundedWindowAggExec, WindowAggExec, + BoundedWindowAggExec, WindowAggExec, create_window_expr, }; use datafusion_physical_plan::{ExecutionPlan, InputOrderMode}; use insta::assert_snapshot; diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs index 0956974cfcf1..f480de71d628 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs @@ -18,7 +18,7 @@ use std::sync::{Arc, LazyLock}; use arrow::{ - array::{record_batch, Float64Array, Int32Array, RecordBatch, StringArray}, + array::{Float64Array, Int32Array, RecordBatch, StringArray, record_batch}, datatypes::{DataType, Field, Schema, SchemaRef}, util::pretty::pretty_format_batches, }; @@ -27,8 +27,8 @@ use datafusion::{ assert_batches_eq, logical_expr::Operator, physical_plan::{ - expressions::{BinaryExpr, Column, Literal}, PhysicalExpr, + expressions::{BinaryExpr, Column, Literal}, }, prelude::{ParquetReadOptions, SessionConfig, SessionContext}, scalar::ScalarValue, @@ -36,7 +36,7 @@ use datafusion::{ use datafusion_catalog::memory::DataSourceExec; use datafusion_common::config::ConfigOptions; use datafusion_datasource::{ - file_groups::FileGroup, file_scan_config::FileScanConfigBuilder, PartitionedFile, + PartitionedFile, file_groups::FileGroup, file_scan_config::FileScanConfigBuilder, }; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_expr::ScalarUDF; @@ -45,15 +45,16 @@ use datafusion_functions_aggregate::{ count::count_udaf, min_max::{max_udaf, min_udaf}, }; +use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, expressions::col}; use datafusion_physical_expr::{ - aggregate::{AggregateExprBuilder, AggregateFunctionExpr}, Partitioning, ScalarFunctionExpr, + aggregate::{AggregateExprBuilder, AggregateFunctionExpr}, }; -use datafusion_physical_expr::{expressions::col, LexOrdering, PhysicalSortExpr}; use datafusion_physical_optimizer::{ - filter_pushdown::FilterPushdown, PhysicalOptimizerRule, + PhysicalOptimizerRule, filter_pushdown::FilterPushdown, }; use datafusion_physical_plan::{ + ExecutionPlan, aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}, coalesce_batches::CoalesceBatchesExec, coalesce_partitions::CoalescePartitionsExec, @@ -61,14 +62,13 @@ use datafusion_physical_plan::{ filter::FilterExec, repartition::RepartitionExec, sorts::sort::SortExec, - ExecutionPlan, }; use datafusion_physical_plan::union::UnionExec; use futures::StreamExt; -use object_store::{memory::InMemory, ObjectStore}; +use object_store::{ObjectStore, memory::InMemory}; use regex::Regex; -use util::{format_plan_for_test, OptimizationTest, TestNode, TestScanBuilder}; +use util::{OptimizationTest, TestNode, TestScanBuilder, format_plan_for_test}; use crate::physical_optimizer::filter_pushdown::util::TestSource; @@ -182,12 +182,14 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() { use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode}; // Create build side with limited values - let build_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab"]), - ("b", Utf8View, ["ba", "bb"]), - ("c", Float64, [1.0, 2.0]) - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab"]), + ("b", Utf8View, ["ba", "bb"]), + ("c", Float64, [1.0, 2.0]) + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8View, false), @@ -199,12 +201,14 @@ async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() { .build(); // Create probe side with more values - let probe_batches = vec![record_batch!( - ("d", Utf8, ["aa", "ab", "ac", "ad"]), - ("e", Utf8View, ["ba", "bb", "bc", "bd"]), - ("f", Float64, [1.0, 2.0, 3.0, 4.0]) - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("d", Utf8, ["aa", "ab", "ac", "ad"]), + ("e", Utf8View, ["ba", "bb", "bc", "bd"]), + ("f", Float64, [1.0, 2.0, 3.0, 4.0]) + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("d", DataType::Utf8, false), Field::new("e", DataType::Utf8View, false), @@ -299,12 +303,14 @@ async fn test_static_filter_pushdown_through_hash_join() { use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode}; // Create build side with limited values - let build_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab"]), - ("b", Utf8View, ["ba", "bb"]), - ("c", Float64, [1.0, 2.0]) - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab"]), + ("b", Utf8View, ["ba", "bb"]), + ("c", Float64, [1.0, 2.0]) + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8View, false), @@ -316,12 +322,14 @@ async fn test_static_filter_pushdown_through_hash_join() { .build(); // Create probe side with more values - let probe_batches = vec![record_batch!( - ("d", Utf8, ["aa", "ab", "ac", "ad"]), - ("e", Utf8View, ["ba", "bb", "bc", "bd"]), - ("f", Float64, [1.0, 2.0, 3.0, 4.0]) - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("d", Utf8, ["aa", "ab", "ac", "ad"]), + ("e", Utf8View, ["ba", "bb", "bc", "bd"]), + ("f", Float64, [1.0, 2.0, 3.0, 4.0]) + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("d", DataType::Utf8, false), Field::new("e", DataType::Utf8View, false), @@ -562,15 +570,14 @@ fn test_pushdown_through_aggregates_on_grouping_columns() { FilterExec::try_new(col_lit_predicate("a", "foo", &schema()), coalesce).unwrap(), ); - let aggregate_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()]) - .schema(schema()) - .alias("cnt") - .build() - .map(Arc::new) - .unwrap(), - ]; + let aggregate_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()]) + .schema(schema()) + .alias("cnt") + .build() + .map(Arc::new) + .unwrap(), + ]; let group_by = PhysicalGroupBy::new_single(vec![ (col("a", &schema()).unwrap(), "a".to_string()), (col("b", &schema()).unwrap(), "b".to_string()), @@ -975,12 +982,14 @@ async fn test_hashjoin_dynamic_filter_pushdown() { use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode}; // Create build side with limited values - let build_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab"]), - ("b", Utf8, ["ba", "bb"]), - ("c", Float64, [1.0, 2.0]) // Extra column not used in join - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab"]), + ("b", Utf8, ["ba", "bb"]), + ("c", Float64, [1.0, 2.0]) // Extra column not used in join + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -992,12 +1001,14 @@ async fn test_hashjoin_dynamic_filter_pushdown() { .build(); // Create probe side with more values - let probe_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab", "ac", "ad"]), - ("b", Utf8, ["ba", "bb", "bc", "bd"]), - ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab", "ac", "ad"]), + ("b", Utf8, ["ba", "bb", "bc", "bd"]), + ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -1143,12 +1154,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() { // +---------------+------------------------------------------------------------+ // Create build side with limited values - let build_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab"]), - ("b", Utf8, ["ba", "bb"]), - ("c", Float64, [1.0, 2.0]) // Extra column not used in join - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab"]), + ("b", Utf8, ["ba", "bb"]), + ("c", Float64, [1.0, 2.0]) // Extra column not used in join + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -1160,12 +1173,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() { .build(); // Create probe side with more values - let probe_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab", "ac", "ad"]), - ("b", Utf8, ["ba", "bb", "bc", "bd"]), - ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab", "ac", "ad"]), + ("b", Utf8, ["ba", "bb", "bc", "bd"]), + ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -1363,12 +1378,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() { use datafusion_common::JoinType; use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode}; - let build_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab"]), - ("b", Utf8, ["ba", "bb"]), - ("c", Float64, [1.0, 2.0]) // Extra column not used in join - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab"]), + ("b", Utf8, ["ba", "bb"]), + ("c", Float64, [1.0, 2.0]) // Extra column not used in join + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -1380,12 +1397,14 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() { .build(); // Create probe side with more values - let probe_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab", "ac", "ad"]), - ("b", Utf8, ["ba", "bb", "bc", "bd"]), - ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab", "ac", "ad"]), + ("b", Utf8, ["ba", "bb", "bc", "bd"]), + ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -1542,10 +1561,9 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() { // Create test data for three tables: t1, t2, t3 // t1: small table with limited values (will be build side of outer join) - let t1_batches = - vec![ - record_batch!(("a", Utf8, ["aa", "ab"]), ("x", Float64, [1.0, 2.0])).unwrap(), - ]; + let t1_batches = vec![ + record_batch!(("a", Utf8, ["aa", "ab"]), ("x", Float64, [1.0, 2.0])).unwrap(), + ]; let t1_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("x", DataType::Float64, false), @@ -1556,12 +1574,14 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() { .build(); // t2: larger table (will be probe side of inner join, build side of outer join) - let t2_batches = vec![record_batch!( - ("b", Utf8, ["aa", "ab", "ac", "ad", "ae"]), - ("c", Utf8, ["ca", "cb", "cc", "cd", "ce"]), - ("y", Float64, [1.0, 2.0, 3.0, 4.0, 5.0]) - ) - .unwrap()]; + let t2_batches = vec![ + record_batch!( + ("b", Utf8, ["aa", "ab", "ac", "ad", "ae"]), + ("c", Utf8, ["ca", "cb", "cc", "cd", "ce"]), + ("y", Float64, [1.0, 2.0, 3.0, 4.0, 5.0]) + ) + .unwrap(), + ]; let t2_schema = Arc::new(Schema::new(vec![ Field::new("b", DataType::Utf8, false), Field::new("c", DataType::Utf8, false), @@ -1573,11 +1593,13 @@ async fn test_nested_hashjoin_dynamic_filter_pushdown() { .build(); // t3: largest table (will be probe side of inner join) - let t3_batches = vec![record_batch!( - ("d", Utf8, ["ca", "cb", "cc", "cd", "ce", "cf", "cg", "ch"]), - ("z", Float64, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]) - ) - .unwrap()]; + let t3_batches = vec![ + record_batch!( + ("d", Utf8, ["ca", "cb", "cc", "cd", "ce", "cf", "cg", "ch"]), + ("z", Float64, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]) + ) + .unwrap(), + ]; let t3_schema = Arc::new(Schema::new(vec![ Field::new("d", DataType::Utf8, false), Field::new("z", DataType::Float64, false), @@ -1689,12 +1711,14 @@ async fn test_hashjoin_parent_filter_pushdown() { use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode}; // Create build side with limited values - let build_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab"]), - ("b", Utf8, ["ba", "bb"]), - ("c", Float64, [1.0, 2.0]) - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab"]), + ("b", Utf8, ["ba", "bb"]), + ("c", Float64, [1.0, 2.0]) + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -1706,12 +1730,14 @@ async fn test_hashjoin_parent_filter_pushdown() { .build(); // Create probe side with more values - let probe_batches = vec![record_batch!( - ("d", Utf8, ["aa", "ab", "ac", "ad"]), - ("e", Utf8, ["ba", "bb", "bc", "bd"]), - ("f", Float64, [1.0, 2.0, 3.0, 4.0]) - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("d", Utf8, ["aa", "ab", "ac", "ad"]), + ("e", Utf8, ["ba", "bb", "bc", "bd"]), + ("f", Float64, [1.0, 2.0, 3.0, 4.0]) + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("d", DataType::Utf8, false), Field::new("e", DataType::Utf8, false), @@ -2112,11 +2138,9 @@ async fn test_aggregate_dynamic_filter_min_max_different_columns() { Field::new("a", DataType::Int32, true), Field::new("b", DataType::Int32, true), ])); - let batches = - vec![ - record_batch!(("a", Int32, [5, 1, 3, 8]), ("b", Int32, [7, 2, 4, 9])) - .unwrap(), - ]; + let batches = vec![ + record_batch!(("a", Int32, [5, 1, 3, 8]), ("b", Int32, [7, 2, 4, 9])).unwrap(), + ]; let min_expr = AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()]) @@ -2151,12 +2175,14 @@ async fn test_aggregate_dynamic_filter_multiple_mixed_expressions() { Field::new("b", DataType::Int32, true), Field::new("c", DataType::Int32, true), ])); - let batches = vec![record_batch!( - ("a", Int32, [5, 1, 3, 8]), - ("b", Int32, [10, 4, 6, 12]), - ("c", Int32, [100, 70, 90, 110]) - ) - .unwrap()]; + let batches = vec![ + record_batch!( + ("a", Int32, [5, 1, 3, 8]), + ("b", Int32, [10, 4, 6, 12]), + ("c", Int32, [100, 70, 90, 110]) + ) + .unwrap(), + ]; let min_a = AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()]) .schema(Arc::clone(&schema)) @@ -2328,10 +2354,9 @@ async fn test_aggregate_filter_pushdown() { // when the filter references grouping columns // Simulates: SELECT a, COUNT(b) FROM table WHERE a = 'x' GROUP BY a - let batches = - vec![ - record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(), - ]; + let batches = vec![ + record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(), + ]; let scan = TestScanBuilder::new(schema()) .with_support(true) @@ -2392,10 +2417,9 @@ async fn test_no_pushdown_filter_on_aggregate_result() { // SELECT a, COUNT(b) as cnt FROM table GROUP BY a HAVING cnt > 5 // The filter on 'cnt' cannot be pushed down because it's an aggregate result - let batches = - vec![ - record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(), - ]; + let batches = vec![ + record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(), + ]; let scan = TestScanBuilder::new(schema()) .with_support(true) @@ -2464,15 +2488,14 @@ fn test_pushdown_filter_on_non_first_grouping_column() { // The filter is on 'b' (second grouping column), should push down let scan = TestScanBuilder::new(schema()).with_support(true).build(); - let aggregate_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()]) - .schema(schema()) - .alias("cnt") - .build() - .map(Arc::new) - .unwrap(), - ]; + let aggregate_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()]) + .schema(schema()) + .alias("cnt") + .build() + .map(Arc::new) + .unwrap(), + ]; let group_by = PhysicalGroupBy::new_single(vec![ (col("a", &schema()).unwrap(), "a".to_string()), @@ -2515,15 +2538,14 @@ fn test_no_pushdown_grouping_sets_filter_on_missing_column() { // Test that filters on columns missing from some grouping sets are NOT pushed through let scan = TestScanBuilder::new(schema()).with_support(true).build(); - let aggregate_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()]) - .schema(schema()) - .alias("cnt") - .build() - .map(Arc::new) - .unwrap(), - ]; + let aggregate_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()]) + .schema(schema()) + .alias("cnt") + .build() + .map(Arc::new) + .unwrap(), + ]; // Create GROUPING SETS with (a, b) and (b) let group_by = PhysicalGroupBy::new( @@ -2586,15 +2608,14 @@ fn test_pushdown_grouping_sets_filter_on_common_column() { // Test that filters on columns present in ALL grouping sets ARE pushed through let scan = TestScanBuilder::new(schema()).with_support(true).build(); - let aggregate_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()]) - .schema(schema()) - .alias("cnt") - .build() - .map(Arc::new) - .unwrap(), - ]; + let aggregate_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()]) + .schema(schema()) + .alias("cnt") + .build() + .map(Arc::new) + .unwrap(), + ]; // Create GROUPING SETS with (a, b) and (b) let group_by = PhysicalGroupBy::new( @@ -2658,15 +2679,14 @@ fn test_pushdown_with_empty_group_by() { // There are no grouping columns, so the filter should still push down let scan = TestScanBuilder::new(schema()).with_support(true).build(); - let aggregate_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()]) - .schema(schema()) - .alias("cnt") - .build() - .map(Arc::new) - .unwrap(), - ]; + let aggregate_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()]) + .schema(schema()) + .alias("cnt") + .build() + .map(Arc::new) + .unwrap(), + ]; // Empty GROUP BY - no grouping columns let group_by = PhysicalGroupBy::new_single(vec![]); @@ -2718,15 +2738,14 @@ fn test_pushdown_with_computed_grouping_key() { )) as Arc; let filter = Arc::new(FilterExec::try_new(predicate, scan).unwrap()); - let aggregate_expr = - vec![ - AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()]) - .schema(schema()) - .alias("cnt") - .build() - .map(Arc::new) - .unwrap(), - ]; + let aggregate_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()]) + .schema(schema()) + .alias("cnt") + .build() + .map(Arc::new) + .unwrap(), + ]; let c_plus_one = Arc::new(BinaryExpr::new( col("c", &schema()).unwrap(), @@ -2786,11 +2805,13 @@ async fn test_hashjoin_dynamic_filter_all_partitions_empty() { .build(); // Create probe side with some data - let probe_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab", "ac"]), - ("b", Utf8, ["ba", "bb", "bc"]) - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab", "ac"]), + ("b", Utf8, ["ba", "bb", "bc"]) + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -3055,12 +3076,14 @@ async fn test_hashjoin_hash_table_pushdown_partitioned() { use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode}; // Create build side with limited values - let build_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab"]), - ("b", Utf8, ["ba", "bb"]), - ("c", Float64, [1.0, 2.0]) // Extra column not used in join - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab"]), + ("b", Utf8, ["ba", "bb"]), + ("c", Float64, [1.0, 2.0]) // Extra column not used in join + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -3072,12 +3095,14 @@ async fn test_hashjoin_hash_table_pushdown_partitioned() { .build(); // Create probe side with more values - let probe_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab", "ac", "ad"]), - ("b", Utf8, ["ba", "bb", "bc", "bd"]), - ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab", "ac", "ad"]), + ("b", Utf8, ["ba", "bb", "bc", "bd"]), + ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -3220,12 +3245,14 @@ async fn test_hashjoin_hash_table_pushdown_collect_left() { use datafusion_common::JoinType; use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode}; - let build_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab"]), - ("b", Utf8, ["ba", "bb"]), - ("c", Float64, [1.0, 2.0]) // Extra column not used in join - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab"]), + ("b", Utf8, ["ba", "bb"]), + ("c", Float64, [1.0, 2.0]) // Extra column not used in join + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -3237,12 +3264,14 @@ async fn test_hashjoin_hash_table_pushdown_collect_left() { .build(); // Create probe side with more values - let probe_batches = vec![record_batch!( - ("a", Utf8, ["aa", "ab", "ac", "ad"]), - ("b", Utf8, ["ba", "bb", "bc", "bd"]), - ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("a", Utf8, ["aa", "ab", "ac", "ad"]), + ("b", Utf8, ["ba", "bb", "bc", "bd"]), + ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Utf8, false), Field::new("b", DataType::Utf8, false), @@ -3369,12 +3398,14 @@ async fn test_hashjoin_hash_table_pushdown_integer_keys() { use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode}; // Create build side with integer keys - let build_batches = vec![record_batch!( - ("id1", Int32, [1, 2]), - ("id2", Int32, [10, 20]), - ("value", Float64, [100.0, 200.0]) - ) - .unwrap()]; + let build_batches = vec![ + record_batch!( + ("id1", Int32, [1, 2]), + ("id2", Int32, [10, 20]), + ("value", Float64, [100.0, 200.0]) + ) + .unwrap(), + ]; let build_side_schema = Arc::new(Schema::new(vec![ Field::new("id1", DataType::Int32, false), Field::new("id2", DataType::Int32, false), @@ -3386,12 +3417,14 @@ async fn test_hashjoin_hash_table_pushdown_integer_keys() { .build(); // Create probe side with more integer rows - let probe_batches = vec![record_batch!( - ("id1", Int32, [1, 2, 3, 4]), - ("id2", Int32, [10, 20, 30, 40]), - ("data", Utf8, ["a", "b", "c", "d"]) - ) - .unwrap()]; + let probe_batches = vec![ + record_batch!( + ("id1", Int32, [1, 2, 3, 4]), + ("id2", Int32, [10, 20, 30, 40]), + ("data", Utf8, ["a", "b", "c", "d"]) + ) + .unwrap(), + ]; let probe_side_schema = Arc::new(Schema::new(vec![ Field::new("id1", DataType::Int32, false), Field::new("id2", DataType::Int32, false), diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs index c32f7b2d0ba9..93e928b51a18 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs @@ -18,26 +18,25 @@ use arrow::datatypes::SchemaRef; use arrow::{array::RecordBatch, compute::concat_batches}; use datafusion::{datasource::object_store::ObjectStoreUrl, physical_plan::PhysicalExpr}; -use datafusion_common::{config::ConfigOptions, internal_err, Result}; +use datafusion_common::{Result, config::ConfigOptions, internal_err}; use datafusion_datasource::{ - file::FileSource, file_scan_config::FileScanConfig, + PartitionedFile, file::FileSource, file_scan_config::FileScanConfig, file_scan_config::FileScanConfigBuilder, file_stream::FileOpenFuture, file_stream::FileOpener, schema_adapter::DefaultSchemaAdapterFactory, - schema_adapter::SchemaAdapterFactory, source::DataSourceExec, PartitionedFile, + schema_adapter::SchemaAdapterFactory, source::DataSourceExec, }; use datafusion_physical_expr_common::physical_expr::fmt_sql; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::filter::batch_filter; use datafusion_physical_plan::filter_pushdown::{FilterPushdownPhase, PushedDown}; use datafusion_physical_plan::{ - displayable, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, displayable, filter::FilterExec, filter_pushdown::{ ChildFilterDescription, ChildPushdownResult, FilterDescription, FilterPushdownPropagation, }, metrics::ExecutionPlanMetricsSet, - DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, }; use futures::StreamExt; use futures::{FutureExt, Stream}; diff --git a/datafusion/core/tests/physical_optimizer/join_selection.rs b/datafusion/core/tests/physical_optimizer/join_selection.rs index f9d3a045469e..37bcefd418bd 100644 --- a/datafusion/core/tests/physical_optimizer/join_selection.rs +++ b/datafusion/core/tests/physical_optimizer/join_selection.rs @@ -26,27 +26,27 @@ use std::{ use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use datafusion_common::config::ConfigOptions; -use datafusion_common::{stats::Precision, ColumnStatistics, JoinType, ScalarValue}; +use datafusion_common::{ColumnStatistics, JoinType, ScalarValue, stats::Precision}; use datafusion_common::{JoinSide, NullEquality}; use datafusion_common::{Result, Statistics}; use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}; use datafusion_expr::Operator; +use datafusion_physical_expr::PhysicalExprRef; use datafusion_physical_expr::expressions::col; use datafusion_physical_expr::expressions::{BinaryExpr, Column, NegativeExpr}; use datafusion_physical_expr::intervals::utils::check_support; -use datafusion_physical_expr::PhysicalExprRef; use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr}; -use datafusion_physical_optimizer::join_selection::JoinSelection; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::join_selection::JoinSelection; +use datafusion_physical_plan::ExecutionPlanProperties; use datafusion_physical_plan::displayable; use datafusion_physical_plan::joins::utils::ColumnIndex; use datafusion_physical_plan::joins::utils::JoinFilter; use datafusion_physical_plan::joins::{HashJoinExec, NestedLoopJoinExec, PartitionMode}; use datafusion_physical_plan::projection::ProjectionExec; -use datafusion_physical_plan::ExecutionPlanProperties; use datafusion_physical_plan::{ - execution_plan::{Boundedness, EmissionType}, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + execution_plan::{Boundedness, EmissionType}, }; use futures::Stream; @@ -949,10 +949,10 @@ impl Stream for UnboundedStream { mut self: Pin<&mut Self>, _cx: &mut Context<'_>, ) -> Poll> { - if let Some(val) = self.batch_produce { - if val <= self.count { - return Poll::Ready(None); - } + if let Some(val) = self.batch_produce + && val <= self.count + { + return Poll::Ready(None); } self.count += 1; Poll::Ready(Some(Ok(self.batch.clone()))) @@ -1088,9 +1088,10 @@ pub struct StatisticsExec { impl StatisticsExec { pub fn new(stats: Statistics, schema: Schema) -> Self { assert_eq!( - stats.column_statistics.len(), schema.fields().len(), - "if defined, the column statistics vector length should be the number of fields" - ); + stats.column_statistics.len(), + schema.fields().len(), + "if defined, the column statistics vector length should be the number of fields" + ); let cache = Self::compute_properties(Arc::new(schema.clone())); Self { stats, diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs index 9d172db246ad..b32a9bbd2543 100644 --- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs @@ -27,16 +27,16 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::config::ConfigOptions; use datafusion_common::error::Result; use datafusion_expr::Operator; -use datafusion_physical_expr::expressions::{col, lit, BinaryExpr}; use datafusion_physical_expr::Partitioning; +use datafusion_physical_expr::expressions::{BinaryExpr, col, lit}; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; -use datafusion_physical_optimizer::limit_pushdown::LimitPushdown; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::limit_pushdown::LimitPushdown; use datafusion_physical_plan::empty::EmptyExec; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::repartition::RepartitionExec; -use datafusion_physical_plan::{get_plan_string, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, get_plan_string}; fn create_schema() -> SchemaRef { Arc::new(Schema::new(vec![ @@ -96,51 +96,51 @@ fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero() -> let initial = get_plan_string(&global_limit); let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "GlobalLimitExec: skip=0, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(initial, expected_initial); let after_optimize = LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; let expected = [ - "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5" - ]; + "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5", + ]; assert_eq!(get_plan_string(&after_optimize), expected); Ok(()) } #[test] -fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero( -) -> Result<()> { +fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero() +-> Result<()> { let schema = create_schema(); let streaming_table = stream_exec(&schema); let global_limit = global_limit_exec(streaming_table, 2, Some(5)); let initial = get_plan_string(&global_limit); let expected_initial = [ - "GlobalLimitExec: skip=2, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "GlobalLimitExec: skip=2, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(initial, expected_initial); let after_optimize = LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; let expected = [ - "GlobalLimitExec: skip=2, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7" - ]; + "GlobalLimitExec: skip=2, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7", + ]; assert_eq!(get_plan_string(&after_optimize), expected); Ok(()) } #[test] -fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit( -) -> Result<()> { +fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit() +-> Result<()> { let schema = create_schema(); let streaming_table = stream_exec(&schema); let repartition = repartition_exec(streaming_table)?; @@ -152,14 +152,14 @@ fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limi let initial = get_plan_string(&global_limit); let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " LocalLimitExec: fetch=5", - " CoalesceBatchesExec: target_batch_size=8192", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " LocalLimitExec: fetch=5", + " CoalesceBatchesExec: target_batch_size=8192", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(initial, expected_initial); let after_optimize = @@ -170,8 +170,8 @@ fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limi " CoalesceBatchesExec: target_batch_size=8192, fetch=5", " FilterExec: c3@2 > 0", " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(get_plan_string(&after_optimize), expected); Ok(()) @@ -187,29 +187,29 @@ fn pushes_global_limit_exec_through_projection_exec() -> Result<()> { let initial = get_plan_string(&global_limit); let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " FilterExec: c3@2 > 0", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "GlobalLimitExec: skip=0, fetch=5", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " FilterExec: c3@2 > 0", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(initial, expected_initial); let after_optimize = LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; let expected = [ - "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " FilterExec: c3@2 > 0, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " FilterExec: c3@2 > 0, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(get_plan_string(&after_optimize), expected); Ok(()) } #[test] -fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version( -) -> Result<()> { +fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version() +-> Result<()> { let schema = create_schema(); let streaming_table = stream_exec(&schema); let coalesce_batches = coalesce_batches_exec(streaming_table, 8192); @@ -218,11 +218,11 @@ fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batc let initial = get_plan_string(&global_limit); let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "GlobalLimitExec: skip=0, fetch=5", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(initial, expected_initial); @@ -230,10 +230,10 @@ fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batc LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; let expected = [ - "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(get_plan_string(&after_optimize), expected); Ok(()) @@ -257,14 +257,14 @@ fn pushes_global_limit_into_multiple_fetch_plans() -> Result<()> { let initial = get_plan_string(&global_limit); let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " SortPreservingMergeExec: [c1@0 ASC]", - " SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "GlobalLimitExec: skip=0, fetch=5", + " SortPreservingMergeExec: [c1@0 ASC]", + " SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(initial, expected_initial); @@ -272,13 +272,13 @@ fn pushes_global_limit_into_multiple_fetch_plans() -> Result<()> { LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; let expected = [ - "SortPreservingMergeExec: [c1@0 ASC], fetch=5", - " SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "SortPreservingMergeExec: [c1@0 ASC], fetch=5", + " SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(get_plan_string(&after_optimize), expected); Ok(()) @@ -296,23 +296,23 @@ fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions() -> R let initial = get_plan_string(&global_limit); let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(initial, expected_initial); let after_optimize = LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; let expected = [ - "CoalescePartitionsExec: fetch=5", - " FilterExec: c3@2 > 0, fetch=5", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; + "CoalescePartitionsExec: fetch=5", + " FilterExec: c3@2 > 0, fetch=5", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true", + ]; assert_eq!(get_plan_string(&after_optimize), expected); Ok(()) diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs index ad15d6803413..c523b4a752a8 100644 --- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs +++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs @@ -21,8 +21,8 @@ use insta::assert_snapshot; use std::sync::Arc; use crate::physical_optimizer::test_utils::{ - build_group_by, get_optimized_plan, mock_data, parquet_exec_with_sort, schema, - TestAggregate, + TestAggregate, build_group_by, get_optimized_plan, mock_data, parquet_exec_with_sort, + schema, }; use arrow::datatypes::DataType; @@ -34,10 +34,10 @@ use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{self, cast, col}; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; use datafusion_physical_plan::{ + ExecutionPlan, aggregates::{AggregateExec, AggregateMode}, collect, limit::{GlobalLimitExec, LocalLimitExec}, - ExecutionPlan, }; async fn run_plan_and_format(plan: Arc) -> Result { diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs index 1846473e109a..468d25e0e57d 100644 --- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs @@ -25,16 +25,16 @@ mod test { use datafusion::datasource::listing::ListingTable; use datafusion::prelude::SessionContext; use datafusion_catalog::TableProvider; - use datafusion_common::stats::Precision; use datafusion_common::Result; + use datafusion_common::stats::Precision; use datafusion_common::{ColumnStatistics, ScalarValue, Statistics}; - use datafusion_execution::config::SessionConfig; use datafusion_execution::TaskContext; + use datafusion_execution::config::SessionConfig; use datafusion_expr_common::operator::Operator; use datafusion_functions_aggregate::count::count_udaf; - use datafusion_physical_expr::aggregate::AggregateExprBuilder; - use datafusion_physical_expr::expressions::{binary, col, lit, Column}; use datafusion_physical_expr::Partitioning; + use datafusion_physical_expr::aggregate::AggregateExprBuilder; + use datafusion_physical_expr::expressions::{Column, binary, col, lit}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; use datafusion_physical_plan::aggregates::{ @@ -53,8 +53,8 @@ mod test { use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::union::{InterleaveExec, UnionExec}; use datafusion_physical_plan::{ - execute_stream_partitioned, get_plan_string, ExecutionPlan, - ExecutionPlanProperties, + ExecutionPlan, ExecutionPlanProperties, execute_stream_partitioned, + get_plan_string, }; use futures::TryStreamExt; @@ -766,11 +766,13 @@ mod test { ), ]); - let aggr_expr = vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1)]) - .schema(Arc::clone(&scan_schema)) - .alias(String::from("COUNT(c)")) - .build() - .map(Arc::new)?]; + let aggr_expr = vec![ + AggregateExprBuilder::new(count_udaf(), vec![lit(1)]) + .schema(Arc::clone(&scan_schema)) + .alias(String::from("COUNT(c)")) + .build() + .map(Arc::new)?, + ]; let aggregate_exec_partial: Arc = Arc::new(AggregateExec::try_new( @@ -1067,9 +1069,11 @@ mod test { let result = repartition.partition_statistics(Some(2)); assert!(result.is_err()); let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("RepartitionExec invalid partition 2 (expected less than 2)")); + assert!( + error + .to_string() + .contains("RepartitionExec invalid partition 2 (expected less than 2)") + ); let partitions = execute_stream_partitioned( repartition.clone(), diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs index 80f4fbc3051c..89564cb9ed9c 100644 --- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs @@ -26,8 +26,8 @@ use datafusion::datasource::physical_plan::CsvSource; use datafusion::datasource::source::DataSourceExec; use datafusion_common::config::{ConfigOptions, CsvOptions}; use datafusion_common::{JoinSide, JoinType, NullEquality, Result, ScalarValue}; -use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::TableSchema; +use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::{ @@ -35,16 +35,16 @@ use datafusion_expr::{ }; use datafusion_expr_common::columnar_value::ColumnarValue; use datafusion_physical_expr::expressions::{ - binary, cast, col, BinaryExpr, CaseExpr, CastExpr, Column, Literal, NegativeExpr, + BinaryExpr, CaseExpr, CastExpr, Column, Literal, NegativeExpr, binary, cast, col, }; use datafusion_physical_expr::{Distribution, Partitioning, ScalarFunctionExpr}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{ OrderingRequirements, PhysicalSortExpr, PhysicalSortRequirement, }; +use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_optimizer::output_requirements::OutputRequirementExec; use datafusion_physical_optimizer::projection_pushdown::ProjectionPushdown; -use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter}; @@ -52,13 +52,13 @@ use datafusion_physical_plan::joins::{ HashJoinExec, NestedLoopJoinExec, PartitionMode, StreamJoinPartitionMode, SymmetricHashJoinExec, }; -use datafusion_physical_plan::projection::{update_expr, ProjectionExec, ProjectionExpr}; +use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr, update_expr}; use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; use datafusion_physical_plan::union::UnionExec; -use datafusion_physical_plan::{displayable, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, displayable}; use insta::assert_snapshot; use itertools::Itertools; @@ -230,9 +230,11 @@ fn test_update_matching_exprs() -> Result<()> { .map(|(expr, alias)| ProjectionExpr::new(expr.clone(), alias.clone())) .collect(); for (expr, expected_expr) in exprs.into_iter().zip(expected_exprs.into_iter()) { - assert!(update_expr(&expr, &child_exprs, true)? - .unwrap() - .eq(&expected_expr)); + assert!( + update_expr(&expr, &child_exprs, true)? + .unwrap() + .eq(&expected_expr) + ); } Ok(()) @@ -369,9 +371,11 @@ fn test_update_projected_exprs() -> Result<()> { .map(|(expr, alias)| ProjectionExpr::new(expr.clone(), alias.clone())) .collect(); for (expr, expected_expr) in exprs.into_iter().zip(expected_exprs.into_iter()) { - assert!(update_expr(&expr, &proj_exprs, false)? - .unwrap() - .eq(&expected_expr)); + assert!( + update_expr(&expr, &proj_exprs, false)? + .unwrap() + .eq(&expected_expr) + ); } Ok(()) @@ -814,10 +818,11 @@ fn test_output_req_after_projection() -> Result<()> { .required_input_distribution()[0] .clone() { - assert!(vec - .iter() - .zip(expected_distribution) - .all(|(actual, expected)| actual.eq(&expected))); + assert!( + vec.iter() + .zip(expected_distribution) + .all(|(actual, expected)| actual.eq(&expected)) + ); } else { panic!("Expected HashPartitioned distribution!"); }; diff --git a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs index 8fe35fc307df..d93081f5ceb8 100644 --- a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs +++ b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs @@ -50,8 +50,8 @@ use datafusion_physical_plan::{ collect, displayable, ExecutionPlan, Partitioning, }; -use object_store::memory::InMemory; use object_store::ObjectStore; +use object_store::memory::InMemory; use rstest::rstest; use url::Url; @@ -138,7 +138,8 @@ impl ReplaceTest { assert!( res.is_ok(), "Some errors occurred while executing the optimized physical plan: {:?}\nPlan: {}", - res.unwrap_err(), optimized_plan_string + res.unwrap_err(), + optimized_plan_string ); } @@ -1248,7 +1249,10 @@ fn test_plan_with_order_preserving_variants_preserves_fetch() -> Result<()> { )], ); let res = plan_with_order_preserving_variants(requirements, false, true, Some(15)); - assert_contains!(res.unwrap_err().to_string(), "CoalescePartitionsExec fetch [10] should be greater than or equal to SortExec fetch [15]"); + assert_contains!( + res.unwrap_err().to_string(), + "CoalescePartitionsExec fetch [10] should be greater than or equal to SortExec fetch [15]" + ); // Test sort is without fetch, expected to get the fetch value from the coalesced let requirements = OrderPreservationContext::new( diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs index f46147de1bfd..5aa3925e2728 100644 --- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs +++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs @@ -30,13 +30,13 @@ use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTab use datafusion::prelude::{CsvReadOptions, SessionContext}; use datafusion_common::config::ConfigOptions; use datafusion_common::{JoinType, Result, ScalarValue}; -use datafusion_physical_expr::expressions::{col, Literal}; use datafusion_physical_expr::Partitioning; +use datafusion_physical_expr::expressions::{Literal, col}; use datafusion_physical_expr_common::sort_expr::LexOrdering; -use datafusion_physical_optimizer::sanity_checker::SanityCheckPlan; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::sanity_checker::SanityCheckPlan; use datafusion_physical_plan::repartition::RepartitionExec; -use datafusion_physical_plan::{displayable, ExecutionPlan}; +use datafusion_physical_plan::{ExecutionPlan, displayable}; use async_trait::async_trait; diff --git a/datafusion/core/tests/physical_optimizer/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs index ef3f23b7cb76..1561ddf4407d 100644 --- a/datafusion/core/tests/physical_optimizer/test_utils.rs +++ b/datafusion/core/tests/physical_optimizer/test_utils.rs @@ -45,8 +45,8 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{ LexOrdering, OrderingRequirements, PhysicalSortExpr, }; -use datafusion_physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation; use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; @@ -63,10 +63,10 @@ use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeE use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; use datafusion_physical_plan::tree_node::PlanContext; use datafusion_physical_plan::union::UnionExec; -use datafusion_physical_plan::windows::{create_window_expr, BoundedWindowAggExec}; +use datafusion_physical_plan::windows::{BoundedWindowAggExec, create_window_expr}; use datafusion_physical_plan::{ - displayable, DisplayAs, DisplayFormatType, ExecutionPlan, InputOrderMode, - Partitioning, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, InputOrderMode, Partitioning, + PlanProperties, displayable, }; /// Create a non sorted parquet exec @@ -462,10 +462,11 @@ impl ExecutionPlan for RequirementsTestExec { } fn required_input_ordering(&self) -> Vec> { - vec![self - .required_input_ordering - .as_ref() - .map(|ordering| OrderingRequirements::from(ordering.clone()))] + vec![ + self.required_input_ordering + .as_ref() + .map(|ordering| OrderingRequirements::from(ordering.clone())), + ] } fn maintains_input_order(&self) -> Vec { diff --git a/datafusion/core/tests/physical_optimizer/window_optimize.rs b/datafusion/core/tests/physical_optimizer/window_optimize.rs index fc1e6444d756..796f6b625971 100644 --- a/datafusion/core/tests/physical_optimizer/window_optimize.rs +++ b/datafusion/core/tests/physical_optimizer/window_optimize.rs @@ -26,10 +26,10 @@ mod test { use datafusion_expr::WindowFrame; use datafusion_functions_aggregate::count::count_udaf; use datafusion_physical_expr::aggregate::AggregateExprBuilder; - use datafusion_physical_expr::expressions::{col, Column}; + use datafusion_physical_expr::expressions::{Column, col}; use datafusion_physical_expr::window::PlainAggregateWindowExpr; use datafusion_physical_plan::windows::BoundedWindowAggExec; - use datafusion_physical_plan::{common, ExecutionPlan, InputOrderMode}; + use datafusion_physical_plan::{ExecutionPlan, InputOrderMode, common}; use std::sync::Arc; /// Test case for diff --git a/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs index 2a0c133312cc..01242ff41fb9 100644 --- a/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs +++ b/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs @@ -32,18 +32,18 @@ use datafusion::datasource::physical_plan::{ use datafusion::logical_expr::{col, lit}; use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionContext; +use datafusion_common::ColumnStatistics; use datafusion_common::config::CsvOptions; use datafusion_common::record_batch; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::ColumnStatistics; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::schema_adapter::{ SchemaAdapter, SchemaAdapterFactory, SchemaMapper, }; use datafusion::assert_batches_eq; -use datafusion_datasource::source::DataSourceExec; use datafusion_datasource::TableSchema; +use datafusion_datasource::source::DataSourceExec; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_expr::Expr; use datafusion_physical_expr::expressions::Column; @@ -51,7 +51,7 @@ use datafusion_physical_expr::planner::logical2physical; use datafusion_physical_expr::projection::ProjectionExprs; use datafusion_physical_expr_adapter::{PhysicalExprAdapter, PhysicalExprAdapterFactory}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use object_store::{memory::InMemory, path::Path, ObjectStore}; +use object_store::{ObjectStore, memory::InMemory, path::Path}; use parquet::arrow::ArrowWriter; async fn write_parquet(batch: RecordBatch, store: Arc, path: &str) { diff --git a/datafusion/core/tests/sql/aggregates/mod.rs b/datafusion/core/tests/sql/aggregates/mod.rs index 321c158628e4..ede40d5c4cec 100644 --- a/datafusion/core/tests/sql/aggregates/mod.rs +++ b/datafusion/core/tests/sql/aggregates/mod.rs @@ -20,15 +20,15 @@ use super::*; use arrow::{ array::{ - types::UInt32Type, Decimal128Array, DictionaryArray, DurationNanosecondArray, - Int32Array, LargeBinaryArray, StringArray, TimestampMicrosecondArray, - UInt16Array, UInt32Array, UInt64Array, UInt8Array, + Decimal128Array, DictionaryArray, DurationNanosecondArray, Int32Array, + LargeBinaryArray, StringArray, TimestampMicrosecondArray, UInt8Array, + UInt16Array, UInt32Array, UInt64Array, types::UInt32Type, }, datatypes::{DataType, Field, Schema, TimeUnit}, record_batch::RecordBatch, }; use datafusion::{ - common::{test_util::batches_to_string, Result}, + common::{Result, test_util::batches_to_string}, execution::{config::SessionConfig, context::SessionContext}, }; use datafusion_catalog::MemTable; @@ -959,8 +959,8 @@ impl FuzzTimestampTestData { } /// Sets up test contexts for fuzz table with timestamps and both single and multiple partitions -pub async fn setup_fuzz_timestamp_test_contexts( -) -> Result<(SessionContext, SessionContext)> { +pub async fn setup_fuzz_timestamp_test_contexts() +-> Result<(SessionContext, SessionContext)> { let test_data = FuzzTimestampTestData::new(); // Single partition context diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs index b0a8aa08f271..201bcf178b0e 100644 --- a/datafusion/core/tests/sql/explain_analyze.rs +++ b/datafusion/core/tests/sql/explain_analyze.rs @@ -847,8 +847,7 @@ async fn csv_explain_analyze_order_by() { // Ensure that the ordering is not optimized away from the plan // https://github.com/apache/datafusion/issues/6379 - let needle = - "SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[false], metrics=[output_rows=100, elapsed_compute"; + let needle = "SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[false], metrics=[output_rows=100, elapsed_compute"; assert_contains!(&formatted, needle); } @@ -888,7 +887,7 @@ async fn parquet_explain_analyze() { (i_file < i_rowgroup_stat) && (i_rowgroup_stat < i_rowgroup_bloomfilter) && (i_rowgroup_bloomfilter < i_page), - "The parquet pruning metrics should be displayed in an order of: file range -> row group statistics -> row group bloom filter -> page index." + "The parquet pruning metrics should be displayed in an order of: file range -> row group statistics -> row group bloom filter -> page index." ); } diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs index 5ce3865d652d..c7f02e486897 100644 --- a/datafusion/core/tests/sql/joins.rs +++ b/datafusion/core/tests/sql/joins.rs @@ -38,14 +38,16 @@ async fn join_change_in_planner() -> Result<()> { Field::new("a2", DataType::UInt32, false), ])); // Specify the ordering: - let file_sort_order = vec![[col("a1")] - .into_iter() - .map(|e| { - let ascending = true; - let nulls_first = false; - e.sort(ascending, nulls_first) - }) - .collect::>()]; + let file_sort_order = vec![ + [col("a1")] + .into_iter() + .map(|e| { + let ascending = true; + let nulls_first = false; + e.sort(ascending, nulls_first) + }) + .collect::>(), + ]; register_unbounded_file_with_ordering( &ctx, schema.clone(), @@ -95,14 +97,16 @@ async fn join_no_order_on_filter() -> Result<()> { Field::new("a3", DataType::UInt32, false), ])); // Specify the ordering: - let file_sort_order = vec![[col("a1")] - .into_iter() - .map(|e| { - let ascending = true; - let nulls_first = false; - e.sort(ascending, nulls_first) - }) - .collect::>()]; + let file_sort_order = vec![ + [col("a1")] + .into_iter() + .map(|e| { + let ascending = true; + let nulls_first = false; + e.sort(ascending, nulls_first) + }) + .collect::>(), + ]; register_unbounded_file_with_ordering( &ctx, schema.clone(), @@ -202,7 +206,10 @@ async fn join_change_in_planner_without_sort_not_allowed() -> Result<()> { match df.create_physical_plan().await { Ok(_) => panic!("Expecting error."), Err(e) => { - assert_eq!(e.strip_backtrace(), "SanityCheckPlan\ncaused by\nError during planning: Join operation cannot operate on a non-prunable stream without enabling the 'allow_symmetric_joins_without_pruning' configuration flag") + assert_eq!( + e.strip_backtrace(), + "SanityCheckPlan\ncaused by\nError during planning: Join operation cannot operate on a non-prunable stream without enabling the 'allow_symmetric_joins_without_pruning' configuration flag" + ) } } Ok(()) diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 426ec213b324..3052ccf2b68f 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -24,10 +24,10 @@ use arrow::{ use datafusion::error::Result; use datafusion::logical_expr::{Aggregate, LogicalPlan, TableScan}; -use datafusion::physical_plan::collect; -use datafusion::physical_plan::metrics::MetricValue; use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::ExecutionPlanVisitor; +use datafusion::physical_plan::collect; +use datafusion::physical_plan::metrics::MetricValue; use datafusion::prelude::*; use datafusion::test_util; use datafusion::{execution::context::SessionContext, physical_plan::displayable}; @@ -335,8 +335,7 @@ async fn nyc() -> Result<()> { match &optimized_plan { LogicalPlan::Aggregate(Aggregate { input, .. }) => match input.as_ref() { LogicalPlan::TableScan(TableScan { - ref projected_schema, - .. + projected_schema, .. }) => { assert_eq!(2, projected_schema.fields().len()); assert_eq!(projected_schema.field(0).name(), "passenger_count"); diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs index 3ee4e37589c1..c6f920584dc2 100644 --- a/datafusion/core/tests/sql/path_partition.rs +++ b/datafusion/core/tests/sql/path_partition.rs @@ -35,9 +35,9 @@ use datafusion::{ test_util::{self, arrow_test_data, parquet_test_data}, }; use datafusion_catalog::TableProvider; +use datafusion_common::ScalarValue; use datafusion_common::stats::Precision; use datafusion_common::test_util::batches_to_sort_string; -use datafusion_common::ScalarValue; use datafusion_execution::config::SessionConfig; use async_trait::async_trait; @@ -45,11 +45,11 @@ use bytes::Bytes; use chrono::{TimeZone, Utc}; use futures::stream::{self, BoxStream}; use insta::assert_snapshot; +use object_store::{Attributes, MultipartUpload, PutMultipartOptions, PutPayload}; use object_store::{ - path::Path, GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta, - ObjectStore, PutOptions, PutResult, + GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore, + PutOptions, PutResult, path::Path, }; -use object_store::{Attributes, MultipartUpload, PutMultipartOptions, PutPayload}; use url::Url; #[tokio::test] diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs index d6dc6983998d..d85892c25457 100644 --- a/datafusion/core/tests/sql/runtime_config.rs +++ b/datafusion/core/tests/sql/runtime_config.rs @@ -23,8 +23,8 @@ use std::time::Duration; use datafusion::execution::context::SessionContext; use datafusion::execution::context::TaskContext; use datafusion::prelude::SessionConfig; -use datafusion_execution::cache::cache_manager::CacheManagerConfig; use datafusion_execution::cache::DefaultListFilesCache; +use datafusion_execution::cache::cache_manager::CacheManagerConfig; use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_physical_plan::common::collect; diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 5a51451461ed..6126793145ef 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -18,7 +18,7 @@ use std::collections::HashMap; use super::*; -use datafusion_common::{metadata::ScalarAndMetadata, ParamValues, ScalarValue}; +use datafusion_common::{ParamValues, ScalarValue, metadata::ScalarAndMetadata}; use insta::assert_snapshot; #[tokio::test] diff --git a/datafusion/core/tests/tracing/asserting_tracer.rs b/datafusion/core/tests/tracing/asserting_tracer.rs index 292e066e5f12..700f9f330846 100644 --- a/datafusion/core/tests/tracing/asserting_tracer.rs +++ b/datafusion/core/tests/tracing/asserting_tracer.rs @@ -21,7 +21,7 @@ use std::ops::Deref; use std::sync::{Arc, LazyLock}; use datafusion_common::{HashMap, HashSet}; -use datafusion_common_runtime::{set_join_set_tracer, JoinSetTracer}; +use datafusion_common_runtime::{JoinSetTracer, set_join_set_tracer}; use futures::future::BoxFuture; use tokio::sync::{Mutex, MutexGuard}; diff --git a/datafusion/core/tests/tracing/traceable_object_store.rs b/datafusion/core/tests/tracing/traceable_object_store.rs index 60ef1cc5d6b6..00aa4ea3f36d 100644 --- a/datafusion/core/tests/tracing/traceable_object_store.rs +++ b/datafusion/core/tests/tracing/traceable_object_store.rs @@ -20,8 +20,8 @@ use crate::tracing::asserting_tracer::assert_traceability; use futures::stream::BoxStream; use object_store::{ - path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, - ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, + GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOptions, PutOptions, PutPayload, PutResult, path::Path, }; use std::fmt::{Debug, Display, Formatter}; use std::sync::Arc; diff --git a/datafusion/core/tests/user_defined/expr_planner.rs b/datafusion/core/tests/user_defined/expr_planner.rs index 07d289cab06c..c2558fbba24d 100644 --- a/datafusion/core/tests/user_defined/expr_planner.rs +++ b/datafusion/core/tests/user_defined/expr_planner.rs @@ -26,9 +26,9 @@ use datafusion::logical_expr::Operator; use datafusion::prelude::*; use datafusion::sql::sqlparser::ast::BinaryOperator; use datafusion_common::ScalarValue; +use datafusion_expr::BinaryExpr; use datafusion_expr::expr::Alias; use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr}; -use datafusion_expr::BinaryExpr; #[derive(Debug)] struct MyCustomPlanner; diff --git a/datafusion/core/tests/user_defined/insert_operation.rs b/datafusion/core/tests/user_defined/insert_operation.rs index e0a3e98604ae..7ad00dece1b2 100644 --- a/datafusion/core/tests/user_defined/insert_operation.rs +++ b/datafusion/core/tests/user_defined/insert_operation.rs @@ -25,12 +25,12 @@ use datafusion::{ }; use datafusion_catalog::{Session, TableProvider}; use datafusion_common::config::Dialect; -use datafusion_expr::{dml::InsertOp, Expr, TableType}; +use datafusion_expr::{Expr, TableType, dml::InsertOp}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning}; use datafusion_physical_plan::execution_plan::SchedulingType; use datafusion_physical_plan::{ - execution_plan::{Boundedness, EmissionType}, DisplayAs, ExecutionPlan, PlanProperties, + execution_plan::{Boundedness, EmissionType}, }; #[tokio::test] diff --git a/datafusion/core/tests/user_defined/relation_planner.rs b/datafusion/core/tests/user_defined/relation_planner.rs index dc06c783db7c..e1c7a2618415 100644 --- a/datafusion/core/tests/user_defined/relation_planner.rs +++ b/datafusion/core/tests/user_defined/relation_planner.rs @@ -25,11 +25,11 @@ use datafusion::catalog::memory::MemTable; use datafusion::common::test_util::batches_to_string; use datafusion::prelude::*; use datafusion_common::{Result, ScalarValue}; +use datafusion_expr::Expr; use datafusion_expr::logical_plan::builder::LogicalPlanBuilder; use datafusion_expr::planner::{ PlannedRelation, RelationPlanner, RelationPlannerContext, RelationPlanning, }; -use datafusion_expr::Expr; use datafusion_sql::sqlparser::ast::TableFactor; use insta::assert_snapshot; diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs index 28a78feed3af..ca13b104a56c 100644 --- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs +++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs @@ -23,13 +23,13 @@ use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::mem::{size_of, size_of_val}; use std::sync::{ - atomic::{AtomicBool, Ordering}, Arc, + atomic::{AtomicBool, Ordering}, }; use arrow::array::{ - record_batch, types::UInt64Type, Array, AsArray, Int32Array, PrimitiveArray, - StringArray, StructArray, UInt64Array, + Array, AsArray, Int32Array, PrimitiveArray, StringArray, StructArray, UInt64Array, + record_batch, types::UInt64Type, }; use arrow::datatypes::{Fields, Schema}; use arrow_schema::FieldRef; @@ -56,8 +56,8 @@ use datafusion_common::{cast::as_primitive_array, exec_err}; use datafusion_expr::expr::WindowFunction; use datafusion_expr::{ - col, create_udaf, function::AccumulatorArgs, AggregateUDFImpl, Expr, - GroupsAccumulator, LogicalPlanBuilder, SimpleAggregateUDF, WindowFunctionDefinition, + AggregateUDFImpl, Expr, GroupsAccumulator, LogicalPlanBuilder, SimpleAggregateUDF, + WindowFunctionDefinition, col, create_udaf, function::AccumulatorArgs, }; use datafusion_functions_aggregate::average::AvgAccumulator; @@ -164,7 +164,10 @@ async fn test_udaf_as_window_with_frame_without_retract_batch() { let sql = "SELECT time_sum(time) OVER(ORDER BY time ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as time_sum from t"; // Note if this query ever does start working let err = execute(&ctx, sql).await.unwrap_err(); - assert_contains!(err.to_string(), "This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented: time_sum(t.time) ORDER BY [t.time ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING"); + assert_contains!( + err.to_string(), + "This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented: time_sum(t.time) ORDER BY [t.time ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING" + ); } /// Basic query for with a udaf returning a structure @@ -329,9 +332,10 @@ async fn case_sensitive_identifiers_user_defined_aggregates() -> Result<()> { // doesn't work as it was registered as non lowercase let err = ctx.sql("SELECT MY_AVG(i) FROM t").await.unwrap_err(); - assert!(err - .to_string() - .contains("Error during planning: Invalid function \'my_avg\'")); + assert!( + err.to_string() + .contains("Error during planning: Invalid function \'my_avg\'") + ); // Can call it if you put quotes let result = ctx @@ -761,11 +765,11 @@ impl Accumulator for FirstSelector { // Update the actual values for (value, time) in v.iter().zip(t.iter()) { - if let (Some(time), Some(value)) = (time, value) { - if time < self.time { - self.value = value; - self.time = time; - } + if let (Some(time), Some(value)) = (time, value) + && time < self.time + { + self.value = value; + self.time = time; } } diff --git a/datafusion/core/tests/user_defined/user_defined_async_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_async_scalar_functions.rs index 5b9585170a44..168d81fc6b44 100644 --- a/datafusion/core/tests/user_defined/user_defined_async_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_async_scalar_functions.rs @@ -21,7 +21,7 @@ use arrow::array::{Int32Array, RecordBatch, StringArray}; use arrow::datatypes::{DataType, Field, Schema}; use async_trait::async_trait; use datafusion::prelude::*; -use datafusion_common::{assert_batches_eq, Result}; +use datafusion_common::{Result, assert_batches_eq}; use datafusion_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl}; use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs index 2d0e02719c21..6d98d6823c45 100644 --- a/datafusion/core/tests/user_defined/user_defined_plan.rs +++ b/datafusion/core/tests/user_defined/user_defined_plan.rs @@ -70,7 +70,7 @@ use arrow::{ use datafusion::execution::session_state::SessionStateBuilder; use datafusion::{ common::cast::as_int64_array, - common::{arrow_datafusion_err, DFSchemaRef}, + common::{DFSchemaRef, arrow_datafusion_err}, error::{DataFusionError, Result}, execution::{ context::{QueryPlanner, SessionState, TaskContext}, @@ -91,10 +91,10 @@ use datafusion::{ }; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::{assert_eq_or_internal_err, assert_or_internal_err, ScalarValue}; +use datafusion_common::{ScalarValue, assert_eq_or_internal_err, assert_or_internal_err}; use datafusion_expr::{FetchType, InvariantLevel, Projection, SortExpr}; -use datafusion_optimizer::optimizer::ApplyOrder; use datafusion_optimizer::AnalyzerRule; +use datafusion_optimizer::optimizer::ApplyOrder; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use async_trait::async_trait; @@ -433,21 +433,21 @@ impl OptimizerRule for OptimizerMakeExtensionNodeInvalid { plan: LogicalPlan, _config: &dyn OptimizerConfig, ) -> Result, DataFusionError> { - if let LogicalPlan::Extension(Extension { node }) = &plan { - if let Some(prev) = node.as_any().downcast_ref::() { - return Ok(Transformed::yes(LogicalPlan::Extension(Extension { - node: Arc::new(TopKPlanNode { - k: prev.k, - input: prev.input.clone(), - expr: prev.expr.clone(), - // In a real use case, this rewriter could have change the number of inputs, etc - invariant_mock: Some(InvariantMock { - should_fail_invariant: true, - kind: InvariantLevel::Always, - }), + if let LogicalPlan::Extension(Extension { node }) = &plan + && let Some(prev) = node.as_any().downcast_ref::() + { + return Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(TopKPlanNode { + k: prev.k, + input: prev.input.clone(), + expr: prev.expr.clone(), + // In a real use case, this rewriter could have change the number of inputs, etc + invariant_mock: Some(InvariantMock { + should_fail_invariant: true, + kind: InvariantLevel::Always, }), - }))); - } + }), + }))); }; Ok(Transformed::no(plan)) @@ -515,23 +515,18 @@ impl OptimizerRule for TopKOptimizerRule { return Ok(Transformed::no(plan)); }; - if let LogicalPlan::Sort(Sort { - ref expr, - ref input, - .. - }) = limit.input.as_ref() + if let LogicalPlan::Sort(Sort { expr, input, .. }) = limit.input.as_ref() + && expr.len() == 1 { - if expr.len() == 1 { - // we found a sort with a single sort expr, replace with a a TopK - return Ok(Transformed::yes(LogicalPlan::Extension(Extension { - node: Arc::new(TopKPlanNode { - k: fetch, - input: input.as_ref().clone(), - expr: expr[0].clone(), - invariant_mock: self.invariant_mock.clone(), - }), - }))); - } + // we found a sort with a single sort expr, replace with a a TopK + return Ok(Transformed::yes(LogicalPlan::Extension(Extension { + node: Arc::new(TopKPlanNode { + k: fetch, + input: input.as_ref().clone(), + expr: expr[0].clone(), + invariant_mock: self.invariant_mock.clone(), + }), + }))); } Ok(Transformed::no(plan)) diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 1361091a4cb5..d0be7243260e 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -20,11 +20,11 @@ use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use arrow::array::{as_string_array, create_array, record_batch, Int8Array, UInt64Array}; use arrow::array::{ - builder::BooleanBuilder, cast::AsArray, Array, ArrayRef, Float32Array, Float64Array, - Int32Array, RecordBatch, StringArray, + Array, ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch, StringArray, + builder::BooleanBuilder, cast::AsArray, }; +use arrow::array::{Int8Array, UInt64Array, as_string_array, create_array, record_batch}; use arrow::compute::kernels::numeric::add; use arrow::datatypes::{DataType, Field, Schema}; use arrow_schema::extension::{Bool8, CanonicalExtensionType, ExtensionType}; @@ -38,14 +38,15 @@ use datafusion_common::metadata::FieldMetadata; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::utils::take_function_args; use datafusion_common::{ - assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_datafusion_err, - exec_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue, + DFSchema, DataFusionError, Result, ScalarValue, assert_batches_eq, + assert_batches_sorted_eq, assert_contains, exec_datafusion_err, exec_err, + not_impl_err, plan_err, }; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ - lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, - LogicalPlanBuilder, OperateFunctionArg, ReturnFieldArgs, ScalarFunctionArgs, - ScalarUDF, ScalarUDFImpl, Signature, Volatility, + Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, LogicalPlanBuilder, + OperateFunctionArg, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, + Signature, Volatility, lit_with_metadata, }; use datafusion_expr_common::signature::TypeSignature; use datafusion_functions_nested::range::range_udf; @@ -426,9 +427,10 @@ async fn case_sensitive_identifiers_user_defined_functions() -> Result<()> { let err = plan_and_collect(&ctx, "SELECT MY_FUNC(i) FROM t") .await .unwrap_err(); - assert!(err - .to_string() - .contains("Error during planning: Invalid function \'my_func\'")); + assert!( + err.to_string() + .contains("Error during planning: Invalid function \'my_func\'") + ); // Can call it if you put quotes let result = plan_and_collect(&ctx, "SELECT \"MY_FUNC\"(i) FROM t").await?; @@ -1094,10 +1096,11 @@ async fn create_scalar_function_from_sql_statement() -> Result<()> { // Create the `better_add` function dynamically via CREATE FUNCTION statement assert!(ctx.sql(sql).await.is_ok()); // try to `drop function` when sql options have allow ddl disabled - assert!(ctx - .sql_with_options("drop function better_add", options) - .await - .is_err()); + assert!( + ctx.sql_with_options("drop function better_add", options) + .await + .is_err() + ); let result = ctx .sql("select better_add(2.0, 2.0)") diff --git a/datafusion/core/tests/user_defined/user_defined_table_functions.rs b/datafusion/core/tests/user_defined/user_defined_table_functions.rs index 2c6611f382ce..bc3a47207810 100644 --- a/datafusion/core/tests/user_defined/user_defined_table_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_table_functions.rs @@ -21,17 +21,17 @@ use std::path::Path; use std::sync::Arc; use arrow::array::Int64Array; -use arrow::csv::reader::Format; use arrow::csv::ReaderBuilder; +use arrow::csv::reader::Format; use datafusion::arrow::datatypes::SchemaRef; use datafusion::arrow::record_batch::RecordBatch; use datafusion::common::test_util::batches_to_string; -use datafusion::datasource::memory::MemorySourceConfig; use datafusion::datasource::TableProvider; +use datafusion::datasource::memory::MemorySourceConfig; use datafusion::error::Result; use datafusion::execution::TaskContext; -use datafusion::physical_plan::{collect, ExecutionPlan}; +use datafusion::physical_plan::{ExecutionPlan, collect}; use datafusion::prelude::SessionContext; use datafusion_catalog::Session; use datafusion_catalog::TableFunctionImpl; @@ -205,7 +205,7 @@ impl TableFunctionImpl for SimpleCsvTableFunc { let mut filepath = String::new(); for expr in exprs { match expr { - Expr::Literal(ScalarValue::Utf8(Some(ref path)), _) => { + Expr::Literal(ScalarValue::Utf8(Some(path)), _) => { filepath.clone_from(path); } expr => new_exprs.push(expr.clone()), diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 33607ebc0d2c..f62cab523f38 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -19,8 +19,8 @@ //! user defined window functions use arrow::array::{ - record_batch, Array, ArrayRef, AsArray, Int64Array, RecordBatch, StringArray, - UInt64Array, + Array, ArrayRef, AsArray, Int64Array, RecordBatch, StringArray, UInt64Array, + record_batch, }; use arrow::datatypes::{DataType, Field, Schema}; use arrow_schema::FieldRef; @@ -38,8 +38,8 @@ use datafusion_functions_window_common::{ expr::ExpressionArgs, field::WindowUDFFieldArgs, }; use datafusion_physical_expr::{ - expressions::{col, lit}, PhysicalExpr, + expressions::{col, lit}, }; use std::collections::HashMap; use std::hash::{Hash, Hasher}; @@ -47,8 +47,8 @@ use std::{ any::Any, ops::Range, sync::{ - atomic::{AtomicUsize, Ordering}, Arc, + atomic::{AtomicUsize, Ordering}, }, }; @@ -62,8 +62,7 @@ const UNBOUNDED_WINDOW_QUERY_WITH_ALIAS: &str = "SELECT x, y, val, \ from t ORDER BY x, y"; /// A query with a window function evaluated over a moving window -const BOUNDED_WINDOW_QUERY: &str = - "SELECT x, y, val, \ +const BOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \ odd_counter(val) OVER (PARTITION BY x ORDER BY y ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) \ from t ORDER BY x, y"; @@ -616,7 +615,9 @@ impl PartitionEvaluator for OddCounter { ranks_in_partition: &[Range], ) -> Result { self.test_state.inc_evaluate_all_with_rank_called(); - println!("evaluate_all_with_rank, values: {num_rows:#?}, ranks_in_partitions: {ranks_in_partition:?}"); + println!( + "evaluate_all_with_rank, values: {num_rows:#?}, ranks_in_partitions: {ranks_in_partition:?}" + ); // when evaluating with ranks, just return the inverse rank instead let array: Int64Array = ranks_in_partition .iter()