@@ -20,16 +20,17 @@ use std::sync::Arc;
20
20
21
21
use crate :: physical_optimizer:: test_utils:: {
22
22
bounded_window_exec, global_limit_exec, local_limit_exec, memory_exec,
23
- repartition_exec, sort_exec, sort_expr_options, sort_merge_join_exec,
23
+ projection_exec, repartition_exec, sort_exec, sort_expr, sort_expr_options,
24
+ sort_merge_join_exec, sort_preserving_merge_exec, union_exec,
24
25
} ;
25
26
26
27
use arrow:: compute:: SortOptions ;
27
28
use arrow:: datatypes:: { DataType , Field , Schema , SchemaRef } ;
28
29
use datafusion:: datasource:: stream:: { FileStreamProvider , StreamConfig , StreamTable } ;
29
30
use datafusion:: prelude:: { CsvReadOptions , SessionContext } ;
30
31
use datafusion_common:: config:: ConfigOptions ;
31
- use datafusion_common:: { JoinType , Result } ;
32
- use datafusion_physical_expr:: expressions:: col;
32
+ use datafusion_common:: { JoinType , Result , ScalarValue } ;
33
+ use datafusion_physical_expr:: expressions:: { col, Literal } ;
33
34
use datafusion_physical_expr:: Partitioning ;
34
35
use datafusion_physical_expr_common:: sort_expr:: LexOrdering ;
35
36
use datafusion_physical_optimizer:: sanity_checker:: SanityCheckPlan ;
@@ -665,3 +666,77 @@ async fn test_sort_merge_join_dist_missing() -> Result<()> {
665
666
assert_sanity_check ( & smj, false ) ;
666
667
Ok ( ( ) )
667
668
}
669
+
670
+ /// A particular edge case.
671
+ ///
672
+ /// See <https://github.com/apache/datafusion/issues/17372>.
673
+ #[ tokio:: test]
674
+ async fn test_union_with_sorts_and_constants ( ) -> Result < ( ) > {
675
+ let schema_in = create_test_schema2 ( ) ;
676
+
677
+ let proj_exprs_1 = vec ! [
678
+ (
679
+ Arc :: new( Literal :: new( ScalarValue :: Utf8 ( Some ( "foo" . to_owned( ) ) ) ) ) as _,
680
+ "const_1" . to_owned( ) ,
681
+ ) ,
682
+ (
683
+ Arc :: new( Literal :: new( ScalarValue :: Utf8 ( Some ( "foo" . to_owned( ) ) ) ) ) as _,
684
+ "const_2" . to_owned( ) ,
685
+ ) ,
686
+ ( col( "a" , & schema_in) . unwrap( ) , "a" . to_owned( ) ) ,
687
+ ] ;
688
+ let proj_exprs_2 = vec ! [
689
+ (
690
+ Arc :: new( Literal :: new( ScalarValue :: Utf8 ( Some ( "foo" . to_owned( ) ) ) ) ) as _,
691
+ "const_1" . to_owned( ) ,
692
+ ) ,
693
+ (
694
+ Arc :: new( Literal :: new( ScalarValue :: Utf8 ( Some ( "bar" . to_owned( ) ) ) ) ) as _,
695
+ "const_2" . to_owned( ) ,
696
+ ) ,
697
+ ( col( "a" , & schema_in) . unwrap( ) , "a" . to_owned( ) ) ,
698
+ ] ;
699
+
700
+ let source_1 = memory_exec ( & schema_in) ;
701
+ let source_1 = projection_exec ( proj_exprs_1. clone ( ) , source_1) . unwrap ( ) ;
702
+ let schema_sources = source_1. schema ( ) ;
703
+ let ordering_sources: LexOrdering =
704
+ [ sort_expr ( "a" , & schema_sources) . nulls_last ( ) ] . into ( ) ;
705
+ let source_1 = sort_exec ( ordering_sources. clone ( ) , source_1) ;
706
+
707
+ let source_2 = memory_exec ( & schema_in) ;
708
+ let source_2 = projection_exec ( proj_exprs_2, source_2) . unwrap ( ) ;
709
+ let source_2 = sort_exec ( ordering_sources. clone ( ) , source_2) ;
710
+
711
+ let plan = union_exec ( vec ! [ source_1, source_2] ) ;
712
+
713
+ let schema_out = plan. schema ( ) ;
714
+ let ordering_out: LexOrdering = [
715
+ sort_expr ( "const_1" , & schema_out) . nulls_last ( ) ,
716
+ sort_expr ( "const_2" , & schema_out) . nulls_last ( ) ,
717
+ sort_expr ( "a" , & schema_out) . nulls_last ( ) ,
718
+ ]
719
+ . into ( ) ;
720
+
721
+ let plan = sort_preserving_merge_exec ( ordering_out, plan) ;
722
+
723
+ let plan_str = displayable ( plan. as_ref ( ) ) . indent ( true ) . to_string ( ) ;
724
+ let plan_str = plan_str. trim ( ) ;
725
+ assert_snapshot ! (
726
+ plan_str,
727
+ @r"
728
+ SortPreservingMergeExec: [const_1@0 ASC NULLS LAST, const_2@1 ASC NULLS LAST, a@2 ASC NULLS LAST]
729
+ UnionExec
730
+ SortExec: expr=[a@2 ASC NULLS LAST], preserve_partitioning=[false]
731
+ ProjectionExec: expr=[foo as const_1, foo as const_2, a@0 as a]
732
+ DataSourceExec: partitions=1, partition_sizes=[0]
733
+ SortExec: expr=[a@2 ASC NULLS LAST], preserve_partitioning=[false]
734
+ ProjectionExec: expr=[foo as const_1, bar as const_2, a@0 as a]
735
+ DataSourceExec: partitions=1, partition_sizes=[0]
736
+ "
737
+ ) ;
738
+
739
+ assert_sanity_check ( & plan, true ) ;
740
+
741
+ Ok ( ( ) )
742
+ }
0 commit comments