Skip to content

Commit bb2d90e

Browse files
2010YOUY01alamb
andauthored
Test: Add checks to sqllogictest temporary file creations (#17017)
* add checks to sqllogictest temporary file creation * clippy * review * clean up * clippy * update cargo.lock * update cargo.lock --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 048374c commit bb2d90e

File tree

8 files changed

+251
-161
lines changed

8 files changed

+251
-161
lines changed

Cargo.lock

Lines changed: 155 additions & 145 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-testing

Submodule datafusion-testing updated 241 files

datafusion/sqllogictest/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ postgres = [
8181

8282
[dev-dependencies]
8383
env_logger = { workspace = true }
84+
regex = { workspace = true }
8485
tokio = { workspace = true, features = ["rt-multi-thread"] }
8586

8687
[[test]]

datafusion/sqllogictest/bin/sqllogictests.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ use crate::postgres_container::{
4242
};
4343
use datafusion::common::runtime::SpawnedTask;
4444
use std::ffi::OsStr;
45+
use std::fs;
4546
use std::path::{Path, PathBuf};
4647

4748
#[cfg(feature = "postgres")]
@@ -124,6 +125,20 @@ async fn run_tests() -> Result<()> {
124125
let start = Instant::now();
125126

126127
let test_files = read_test_files(&options)?;
128+
129+
// Perform scratch file sanity check
130+
let scratch_errors = scratch_file_check(&test_files)?;
131+
if !scratch_errors.is_empty() {
132+
eprintln!("Scratch file sanity check failed:");
133+
for error in &scratch_errors {
134+
eprintln!(" {error}");
135+
}
136+
137+
eprintln!("\nTemporary file check failed. Please ensure that within each test file, any scratch file created is placed under a folder with the same name as the test file (without extension).\nExample: inside `join.slt`, temporary files must be created under `.../scratch/join/`\n");
138+
139+
return exec_err!("sqllogictests scratch file check failed");
140+
}
141+
127142
let num_tests = test_files.len();
128143
let errors: Vec<_> = futures::stream::iter(test_files)
129144
.map(|test_file| {
@@ -738,3 +753,67 @@ impl Options {
738753
}
739754
}
740755
}
756+
757+
/// Performs scratch file check for all test files.
758+
///
759+
/// Scratch file rule: In each .slt test file, the temporary file created must
760+
/// be under a folder that is has the same name as the test file.
761+
/// e.g. In `join.slt`, temporary files must be created under `.../scratch/join/`
762+
///
763+
/// See: <https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest#running-tests-scratchdir>
764+
///
765+
/// This function searches for `scratch/[target]/...` patterns and verifies
766+
/// that the target matches the file name.
767+
///
768+
/// Returns a vector of error strings for incorrectly created scratch files.
769+
fn scratch_file_check(test_files: &[TestFile]) -> Result<Vec<String>> {
770+
let mut errors = Vec::new();
771+
772+
// Search for any scratch/[target]/... patterns and check if they match the file name
773+
let scratch_pattern = regex::Regex::new(r"scratch/([^/]+)/").unwrap();
774+
775+
for test_file in test_files {
776+
// Get the file content
777+
let content = match fs::read_to_string(&test_file.path) {
778+
Ok(content) => content,
779+
Err(e) => {
780+
errors.push(format!(
781+
"Failed to read file {}: {}",
782+
test_file.path.display(),
783+
e
784+
));
785+
continue;
786+
}
787+
};
788+
789+
// Get the expected target name (file name without extension)
790+
let expected_target = match test_file.path.file_stem() {
791+
Some(stem) => stem.to_string_lossy().to_string(),
792+
None => {
793+
errors.push(format!("File {} has no stem", test_file.path.display()));
794+
continue;
795+
}
796+
};
797+
798+
let lines: Vec<&str> = content.lines().collect();
799+
800+
for (line_num, line) in lines.iter().enumerate() {
801+
if let Some(captures) = scratch_pattern.captures(line) {
802+
if let Some(found_target) = captures.get(1) {
803+
let found_target = found_target.as_str();
804+
if found_target != expected_target {
805+
errors.push(format!(
806+
"File {}:{}: scratch target '{}' does not match file name '{}'",
807+
test_file.path.display(),
808+
line_num + 1,
809+
found_target,
810+
expected_target
811+
));
812+
}
813+
}
814+
}
815+
}
816+
}
817+
818+
Ok(errors)
819+
}

datafusion/sqllogictest/test_files/dictionary.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,4 +456,4 @@ statement ok
456456
CREATE TABLE test0 AS VALUES ('foo',1), ('bar',2), ('foo',3);
457457

458458
statement ok
459-
COPY (SELECT arrow_cast(column1, 'Dictionary(Int32, Utf8)') AS column1, column2 FROM test0) TO 'test_files/scratch/copy/part_dict_test' STORED AS PARQUET PARTITIONED BY (column1);
459+
COPY (SELECT arrow_cast(column1, 'Dictionary(Int32, Utf8)') AS column1, column2 FROM test0) TO 'test_files/scratch/dictionary/part_dict_test' STORED AS PARQUET PARTITIONED BY (column1);

datafusion/sqllogictest/test_files/limit.slt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -799,23 +799,23 @@ CREATE TABLE src_table (
799799
# File 1:
800800
query I
801801
COPY (SELECT * FROM src_table where part_key = 1)
802-
TO 'test_files/scratch/parquet/test_limit_with_partitions/part-0.parquet'
802+
TO 'test_files/scratch/limit/test_limit_with_partitions/part-0.parquet'
803803
STORED AS PARQUET;
804804
----
805805
3
806806

807807
# File 2:
808808
query I
809809
COPY (SELECT * FROM src_table where part_key = 2)
810-
TO 'test_files/scratch/parquet/test_limit_with_partitions/part-1.parquet'
810+
TO 'test_files/scratch/limit/test_limit_with_partitions/part-1.parquet'
811811
STORED AS PARQUET;
812812
----
813813
4
814814

815815
# File 3:
816816
query I
817817
COPY (SELECT * FROM src_table where part_key = 3)
818-
TO 'test_files/scratch/parquet/test_limit_with_partitions/part-2.parquet'
818+
TO 'test_files/scratch/limit/test_limit_with_partitions/part-2.parquet'
819819
STORED AS PARQUET;
820820
----
821821
3
@@ -827,7 +827,7 @@ CREATE EXTERNAL TABLE test_limit_with_partitions
827827
value INT
828828
)
829829
STORED AS PARQUET
830-
LOCATION 'test_files/scratch/parquet/test_limit_with_partitions/';
830+
LOCATION 'test_files/scratch/limit/test_limit_with_partitions/';
831831

832832
query TT
833833
explain
@@ -853,7 +853,7 @@ physical_plan
853853
01)ProjectionExec: expr=[1 as foo]
854854
02)--SortPreservingMergeExec: [part_key@0 ASC NULLS LAST], fetch=1
855855
03)----SortExec: TopK(fetch=1), expr=[part_key@0 ASC NULLS LAST], preserve_partitioning=[true]
856-
04)------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-2.parquet]]}, projection=[part_key], file_type=parquet, predicate=DynamicFilterPhysicalExpr [ true ]
856+
04)------DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/test_limit_with_partitions/part-0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/test_limit_with_partitions/part-1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/limit/test_limit_with_partitions/part-2.parquet]]}, projection=[part_key], file_type=parquet, predicate=DynamicFilterPhysicalExpr [ true ]
857857

858858
query I
859859
with selection as (

datafusion/sqllogictest/test_files/listing_table_statistics.slt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717

1818
# Test file with different schema order but generating correct statistics for table
1919
statement ok
20-
COPY (SELECT * FROM values (1, 'a'), (2, 'b') t(int_col, str_col)) to 'test_files/scratch/table/1.parquet';
20+
COPY (SELECT * FROM values (1, 'a'), (2, 'b') t(int_col, str_col)) to 'test_files/scratch/listing_table_statistics/1.parquet';
2121

2222
statement ok
23-
COPY (SELECT * FROM values ('c', 3), ('d', -1) t(str_col, int_col)) to 'test_files/scratch/table/2.parquet';
23+
COPY (SELECT * FROM values ('c', 3), ('d', -1) t(str_col, int_col)) to 'test_files/scratch/listing_table_statistics/2.parquet';
2424

2525
statement ok
2626
set datafusion.execution.collect_statistics = true;
@@ -29,13 +29,13 @@ statement ok
2929
set datafusion.explain.show_statistics = true;
3030

3131
statement ok
32-
create external table t stored as parquet location 'test_files/scratch/table';
32+
create external table t stored as parquet location 'test_files/scratch/listing_table_statistics';
3333

3434
query TT
3535
explain format indent select * from t;
3636
----
3737
logical_plan TableScan: t projection=[int_col, str_col]
38-
physical_plan DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/table/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/table/2.parquet]]}, projection=[int_col, str_col], file_type=parquet, statistics=[Rows=Exact(4), Bytes=Exact(212), [(Col[0]: Min=Exact(Int64(-1)) Max=Exact(Int64(3)) Null=Exact(0)),(Col[1]: Min=Exact(Utf8View("a")) Max=Exact(Utf8View("d")) Null=Exact(0))]]
38+
physical_plan DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/listing_table_statistics/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/listing_table_statistics/2.parquet]]}, projection=[int_col, str_col], file_type=parquet, statistics=[Rows=Exact(4), Bytes=Exact(212), [(Col[0]: Min=Exact(Int64(-1)) Max=Exact(Int64(3)) Null=Exact(0)),(Col[1]: Min=Exact(Utf8View("a")) Max=Exact(Utf8View("d")) Null=Exact(0))]]
3939

4040
statement ok
4141
drop table t;

datafusion/sqllogictest/test_files/push_down_filter.slt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,23 +175,23 @@ CREATE TABLE src_table (
175175
# File 1:
176176
query I
177177
COPY (SELECT * FROM src_table where part_key = 1)
178-
TO 'test_files/scratch/parquet/test_filter_with_limit/part-0.parquet'
178+
TO 'test_files/scratch/push_down_filter/test_filter_with_limit/part-0.parquet'
179179
STORED AS PARQUET;
180180
----
181181
3
182182

183183
# File 2:
184184
query I
185185
COPY (SELECT * FROM src_table where part_key = 2)
186-
TO 'test_files/scratch/parquet/test_filter_with_limit/part-1.parquet'
186+
TO 'test_files/scratch/push_down_filter/test_filter_with_limit/part-1.parquet'
187187
STORED AS PARQUET;
188188
----
189189
4
190190

191191
# File 3:
192192
query I
193193
COPY (SELECT * FROM src_table where part_key = 3)
194-
TO 'test_files/scratch/parquet/test_filter_with_limit/part-2.parquet'
194+
TO 'test_files/scratch/push_down_filter/test_filter_with_limit/part-2.parquet'
195195
STORED AS PARQUET;
196196
----
197197
3
@@ -203,14 +203,14 @@ CREATE EXTERNAL TABLE test_filter_with_limit
203203
value INT
204204
)
205205
STORED AS PARQUET
206-
LOCATION 'test_files/scratch/parquet/test_filter_with_limit/';
206+
LOCATION 'test_files/scratch/push_down_filter/test_filter_with_limit/';
207207

208208
query TT
209209
explain select * from test_filter_with_limit where value = 2 limit 1;
210210
----
211211
physical_plan
212212
01)CoalescePartitionsExec: fetch=1
213-
02)--DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_filter_with_limit/part-0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_filter_with_limit/part-1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_filter_with_limit/part-2.parquet]]}, projection=[part_key, value], limit=1, file_type=parquet, predicate=value@1 = 2, pruning_predicate=value_null_count@2 != row_count@3 AND value_min@0 <= 2 AND 2 <= value_max@1, required_guarantees=[value in (2)]
213+
02)--DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/test_filter_with_limit/part-0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/test_filter_with_limit/part-1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/test_filter_with_limit/part-2.parquet]]}, projection=[part_key, value], limit=1, file_type=parquet, predicate=value@1 = 2, pruning_predicate=value_null_count@2 != row_count@3 AND value_min@0 <= 2 AND 2 <= value_max@1, required_guarantees=[value in (2)]
214214

215215
query II
216216
select * from test_filter_with_limit where value = 2 limit 1;

0 commit comments

Comments
 (0)